Source code for scibeam.core.peak

# peak.py ---
#
# Filename: peak.py
# Description:
#            peak analysis
# Author:    Yu Lu
# Email:     yulu@utexas.edu
# Github:    https://github.com/SuperYuLu
#
# Created: Tue Jun 26 16:50:12 2018 (-0500)
# Version:
# Last-Updated: Fri Aug 24 22:39:06 2018 (-0500)
#           By: yulu
#     Update #: 298
#


import pandas
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import quad

import scibeam
from . import base
from .gaussian import Gaussian
from . import tofseries


__all__ = [
    'SeriesPeak',
    'FramePeak',
    ]

[docs]class SeriesPeak(pandas.Series): """Peak analysis on 1D labeled / unlabeled data Build on top of pandas.Series, this adds more methods on peak analysis for pandas series data. The any class instance of SeriesPeak can still access all pandas sereis methods. By default, the indexes is treated as the time axis, while the values of series is the data value. Additionally, SeriesPeak is also designed as a mixin class which can be used as a method chain in other pandas dataframe / sereis based data formats. Attributes ---------- self : pandas series pandas series data """
[docs] def __init__(self, *args, **kwargs): """assign value to initialize SeriesPeak The initlization of this class can be done exactly as one initlize pandas series, for more information please pandas series documentation. """ super().__init__(*args, **kwargs)
@classmethod def _constructor(cls, *args, **kwargs): """class constructor The classmethod consturctor here are used to make sure the class instance element-wise operation still yields the object from the same class. Anther function is when it's used for a mixin class as for method chain, this will pass a copy of the class instance to make sure data is 'type safe' args, kwargs are the same as in __init__() """ return cls(*args, **kwargs)
[docs] def gausFit(self, plot = False, offset = False): """Fit series with gausssian function This gasussian fit function assumes the time or x-axis values is given by series index, while the measurement data or y-axis values is given by the values of sereis. Optionally the one can choose to plot the fitted gaussian curve together with the raw data to visuallize the fitting property. Parameters ---------- plot : bool If True a plot will be generated with raw data and fitted gaussian curve. Others no plot will be generated. Default False. offset : bool If True, the gaussian fitting will consider also fit the data offset. If False (default), the fitting procedure will assume that the data has 0 offset. Returns ------- popt : 1D array optimized parameters of gaussian fitting. [A, x0, sigma, y0(optional)] Where A: peak height of gaussian function x0: peak center x coordinates sigma: standard deviation y0: offset. Only exist if parameter offset is set to be 'True' pcov : 2D array Covariance matrix of fitted parameters corresponding to popt """ popt, pcov = Gaussian.gausFit(x = self.index, y = self.values, offset = offset, plot = plot) return popt, pcov
[docs] def idx(self, gauss_fit = False, offset = False): """find x-axis value corresponding to peak This funciton is to locate the corresponding x corrdinate or 'index' of the peak. Depend on the value of parameter 'gauss_fit', the x coordinate of peak can either come from the max value or gaussian fitting. The index of series is treated as the x coordinate of the data. Parameters ---------- gauss_fit : bool If true, the x coordinate corresponding to peak is get by performing gaussian fitting on the data, as in member method gausFit. If false, the maxmium value of data will be treated as the 'peak', and its corresponding x coordinate will be returend. offset : bool If True, the gaussian fitting will consider also fit the data offset. If False (default), the fitting procedure will assume that the data has 0 offset. Returns ------- float The x coordinate that corresponding to the peak """ if gauss_fit: return self.gausFit(offset = offset)[0][1] else: return self.idxmax()
[docs] def nidx(self, gauss_fit = False, offset = False): """number index of the peak Similar to member method idx, this one returns the number index rather than the real index, which means self.index[nidx] = idx """ if gauss_fit: height = self.height(gauss_fit = gauss_fit, offset = offset) nidx = np.argmin(abs(self.values - height)) else: nidx = np.argmax(self.values) return nidx
[docs] def height(self, gauss_fit = False, offset = False): """calculate peak height Calculated the peak height, either by gaussian fitting (if gauss_fit true), or simply return the maximium as the peak height (default) Parameters ---------- gauss_fit : bool If true, the peak height is get by performing a gaussian fit If false, simply the maximium value in the given data offset : bool If True, the gaussian fitting will consider also fit the data offset. If False (default), the fitting procedure will assume that the data has 0 offset. Returns ------- float Peak height """ if gauss_fit: return self.gausFit(offset = offset)[0][0] else: return self.max()
[docs] def sigma(self, n_sigmas = 1, gauss_fit = False, offset = False): """Find peak width Find the peak width, with specified mutiples of standard deviation. The width can be obtained by literally calculate the full-width-half-max or by gaussian fitting, depend on the parameter value 'gauss_fit' to be true of false. Parameters ---------- n_sigmas : integer Multiplies of standard deviations of the peak width is wanted gauss_fit : bool If true, the peak width is obtained from gaussian fitting If False (default), the peak width is calculated from literally full-width-half-max. offset : bool If True, the gaussian fitting will consider also fit the data offset. If False (default), the fitting procedure will assume that the data has 0 offset. Returns ------- float The peak width in terms of multiples of standard deviatioins """ if gauss_fit: return n_sigmas * self.gausFit(offset = offset)[0][2] else: peak_values = max(self) peak_idx = np.argmax(self.values) half_max = peak_values / 2 hwhm_idx_left = np.argmin(abs(self.iloc[:peak_idx].values - half_max)) hwhm_idx_right = np.argmin(abs(self.iloc[peak_idx : ].values - half_max)) + peak_idx fwhm = self.index[hwhm_idx_right] - self.index[hwhm_idx_left] sigma = fwhm / np.sqrt(8*np.log(2)) return sigma * n_sigmas
[docs] def fwhm(self, gauss_fit = False, offset = False): """Full-Width-Half-Maximium Find the Full-Width-Half-Maximium (FWHM) of the peak, from gaussian fitting or direction calculation. Parameters ---------- gauss_fit : bool If true, fwhm is get from gaussian fitting If false (default), fwhm is from direction calculation offset : bool If True, the gaussian fitting will consider also fit the data offset. If False (default), the fitting procedure will assume that the data has 0 offset. Returns ------- float peak full-width-half-maximium value """ sigma = self.sigma(n_sigmas = 1, gauss_fit = gauss_fit, offset = offset) return sigma * np.sqrt(8 * np.log(2))
[docs] def area(self, gauss_fit = False, offset = False): if gauss_fit: popt, pcov = self.gausFit() # if with offset #area = quad(lambda x:Gaussian.gaus(x, *popt) - popt[-1], min(data_label), max(data_label))[0] # assume no offset area = quad(lambda x:Gaussian.gaus(x, *popt), min(self.index), max(self.index))[0] else: # if with offset # area = np.trapz(x = data_label, y = self - np.min(self)) # assume no offset area = np.trapz(x = self.index, y = self.values) return area
[docs] def region(self, n_sigmas = 4, plot = False, offset = False): """Auto find the peak region Locate the region where there exists a peak and return the lower and upper bound index of the region. Parameters ---------- """ peak_nidx = self.nidx() peak_value = self.iloc[peak_nidx] hwhm_nidx_left = peak_nidx - np.argmin(abs(self.iloc[peak_nidx:0:-1].values - peak_value / 2)) hwhm_nidx_right = np.argmin(abs(self.iloc[peak_nidx:].values - peak_value / 2)) + peak_nidx fwhm_nidx_range = hwhm_nidx_right - hwhm_nidx_left delta_nidx = int(fwhm_nidx_range / np.sqrt(8 * np.log(2)) * n_sigmas) lb,ub = peak_nidx - delta_nidx, peak_nidx + delta_nidx lb = 0 if lb < 0 else lb ub = len(self) if ub > len(self) else ub if plot: plt.plot(self.index, self.values, '--', label = 'raw input') plt.plot(self.index[lb:ub], self.values[lb:ub], '-', label = 'detected peak') return lb, ub
[docs] def autocrop(self, n_sigmas = 4, offset = False): lb, ub = self.region(n_sigmas = n_sigmas, plot = False) return self._constructor(self.iloc[lb:ub])
[docs]class FramePeak(pandas.DataFrame): """ Peak analysis on 1D labeled / unlabeled data """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _toTOFSeries(func): """ Decorator to wrap series returns for method chain """ def wrapper(*args, **kwargs): result = func(*args, **kwargs) if type(result) == pandas.core.series.Series: return tofseries.TOFSeries(result) else: return result return wrapper @classmethod def _constructor(cls, *args, **kwargs): return cls(*args, **kwargs)
[docs] @_toTOFSeries def idx(self, axis = 0, gauss_fit = False, normalize = False): selfCopy = self.copy() if axis == 0 else self.copy().T if gauss_fit: idx = selfCopy.apply(lambda s: Gaussian.gausFit(x = s.index, y = s.values, offset = False)[0][1]) else: idx = selfCopy.idxmax() if normalize: idx = idx / idx.max() return idx
[docs] @_toTOFSeries def nidx(self, axis = 0, gauss_fit = False, normalize = False): selfCopy = self.copy() if axis == 0 else self.copy().T if gauss_fit: height = selfCopy.height(gauss_fit) n_idx = [np.argmin(abs(selfCopy[x].values - h)) for x, h in zip(selfCopy.columns, height)] else: n_idx = [np.argmax(selfCopy[x].values) for x in selfCopy.columns] n_idx = pandas.Series(n_idx, selfCopy.columns) if normalize: n_idx = n_idx / n_idx.max() return n_idx
[docs] @_toTOFSeries def height(self, axis = 0, gauss_fit = False, normalize = False): selfCopy = self.copy() if axis == 0 else self.copy().T if gauss_fit: height = selfCopy.apply(lambda s: Gaussian.gausFit(x = s.index, y = s.values, offset = False)[0][0]) else: height = selfCopy.max() if normalize: height = height / height.max() return height
[docs] @_toTOFSeries def sigma(self, n_sigmas = 1, axis = 0, gauss_fit = False, normalize = False): selfCopy = self.copy() if axis == 0 else self.copy().T def col_sigma(series): if gauss_fit: return Gaussian.gausFit(x = series.index, y = series.values, offset = False)[0][2] else: peak_values = max(series) peak_idx = np.argmax(series.values) half_max = peak_values / 2 try: hwhm_idx_left = np.argmin(abs(series.iloc[:peak_idx].values - half_max)) hwhm_idx_right = np.argmin(abs(series.iloc[peak_idx : ].values - half_max)) + peak_idx except ValueError: # When there is no peak found return 0 fwhm = series.index[hwhm_idx_right] - series.index[hwhm_idx_left] sigma = fwhm / np.sqrt(8*np.log(2)) return sigma * n_sigmas sigma = selfCopy.apply(col_sigma) if normalize: sigma = sigma / sigma.max() return sigma
[docs] @_toTOFSeries def fwhm(self, axis = 0, gauss_fit = False, normalize = False): sigma = self.sigma(n_sigmas = 1, axis = axis, gauss_fit = gauss_fit, normalize = False) fwhm = sigma * np.sqrt(8 * np.log(2)) if normalize: fwhm = fwhm / fwhm.max() return fwhm
[docs] @_toTOFSeries def area(self, axis = 0, gauss_fit = False, normalize = False): selfCopy = self.copy() if axis == 0 else self.copy().T def col_area(series): if gauss_fit: for col in series.columns: popt, pcov = Gaussian.gausFit(x = series.index, y = series[col].values) area = quad(lambda x:Gaussian.gaus(x, *popt), min(series[col].index), max(series[col].index))[0] else: area = np.trapz(x = series.index, y = series.values) return area area = selfCopy.apply(col_area) if normalize: area = area / area.max() return area
[docs] @_toTOFSeries def region(self, axis = 0, n_sigmas = 4, plot = False): """ Locate the region where there exists a peak return the bound index of the region """ selfCopy = self.copy() def col_region(series, plot = False): series = SeriesPeak(series.copy()) peak_nidx = series.nidx() peak_value = series.iloc[peak_nidx] try: hwhm_nidx_left = peak_nidx - np.argmin(abs(series.iloc[peak_nidx:0:-1].values - peak_value / 2)) hwhm_nidx_right = np.argmin(abs(series.iloc[peak_nidx:].values - peak_value / 2)) + peak_nidx fwhm_nidx_range = hwhm_nidx_right - hwhm_nidx_left except ValueError: # incase the peak is on the edge, e.g. no peak in the region return (int(len(series) / 2), int(len(series) / 2)) delta_nidx = int(fwhm_nidx_range / np.sqrt(8 * np.log(2)) * n_sigmas) lb,ub = peak_nidx - delta_nidx, peak_nidx + delta_nidx lb = 0 if lb < 0 else lb ub = len(series) if ub > len(series) else ub if plot: plt.plot(series.index, series.values, '--', label = 'raw input') plt.plot(series.index[lb:ub], series.values[lb:ub], '-', label = 'detected peak') return lb, ub if axis == 0: return selfCopy.apply(col_region) elif axis == 1: return selfCopy.T.apply(col_region) else: # Currently the 2D peak detect doesn't work well lb0 = min([x[0] for x in selfCopy.apply(col_region)]) ub0 = max([x[1] for x in selfCopy.apply(col_region)]) lb1 = min([x[0] for x in selfCopy.T.apply(col_region)]) ub1 = max([x[1] for x in selfCopy.T.apply(col_region)]) return lb0, ub0, lb1, ub1