# peak.py ---
#
# Filename: peak.py
# Description:
# peak analysis
# Author: Yu Lu
# Email: yulu@utexas.edu
# Github: https://github.com/SuperYuLu
#
# Created: Tue Jun 26 16:50:12 2018 (-0500)
# Version:
# Last-Updated: Fri Aug 24 22:39:06 2018 (-0500)
# By: yulu
# Update #: 298
#
import pandas
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import quad
import scibeam
from . import base
from .gaussian import Gaussian
from . import tofseries
__all__ = [
'SeriesPeak',
'FramePeak',
]
[docs]class SeriesPeak(pandas.Series):
"""Peak analysis on 1D labeled / unlabeled data
Build on top of pandas.Series, this adds more methods on peak analysis for
pandas series data. The any class instance of SeriesPeak can still access
all pandas sereis methods.
By default, the indexes is treated as the time axis, while the values of
series is the data value.
Additionally, SeriesPeak is also designed as a mixin class which can be used
as a method chain in other pandas dataframe / sereis based data formats.
Attributes
----------
self : pandas series
pandas series data
"""
[docs] def __init__(self, *args, **kwargs):
"""assign value to initialize SeriesPeak
The initlization of this class can be done exactly as one initlize
pandas series, for more information please pandas series documentation.
"""
super().__init__(*args, **kwargs)
@classmethod
def _constructor(cls, *args, **kwargs):
"""class constructor
The classmethod consturctor here are used to make sure the class
instance element-wise operation still yields the object from the same
class. Anther function is when it's used for a mixin class as for method
chain, this will pass a copy of the class instance to make sure data
is 'type safe'
args, kwargs are the same as in __init__()
"""
return cls(*args, **kwargs)
[docs] def gausFit(self, plot = False, offset = False):
"""Fit series with gausssian function
This gasussian fit function assumes the time or x-axis values is given
by series index, while the measurement data or y-axis values is given by
the values of sereis.
Optionally the one can choose to plot the fitted gaussian curve together
with the raw data to visuallize the fitting property.
Parameters
----------
plot : bool
If True a plot will be generated with raw data and fitted gaussian
curve. Others no plot will be generated. Default False.
offset : bool
If True, the gaussian fitting will consider also fit the data
offset. If False (default), the fitting procedure will assume that
the data has 0 offset.
Returns
-------
popt : 1D array
optimized parameters of gaussian fitting.
[A, x0, sigma, y0(optional)] Where
A: peak height of gaussian function
x0: peak center x coordinates
sigma: standard deviation
y0: offset. Only exist if parameter offset is set to be 'True'
pcov : 2D array
Covariance matrix of fitted parameters corresponding to popt
"""
popt, pcov = Gaussian.gausFit(x = self.index, y = self.values, offset = offset, plot = plot)
return popt, pcov
[docs] def idx(self, gauss_fit = False, offset = False):
"""find x-axis value corresponding to peak
This funciton is to locate the corresponding x corrdinate or 'index' of
the peak. Depend on the value of parameter 'gauss_fit', the x coordinate
of peak can either come from the max value or gaussian fitting.
The index of series is treated as the x coordinate of the data.
Parameters
----------
gauss_fit : bool
If true, the x coordinate corresponding to peak is get by performing
gaussian fitting on the data, as in member method gausFit.
If false, the maxmium value of data will be treated as the 'peak',
and its corresponding x coordinate will be returend.
offset : bool
If True, the gaussian fitting will consider also fit the data
offset. If False (default), the fitting procedure will assume that
the data has 0 offset.
Returns
-------
float
The x coordinate that corresponding to the peak
"""
if gauss_fit:
return self.gausFit(offset = offset)[0][1]
else:
return self.idxmax()
[docs] def nidx(self, gauss_fit = False, offset = False):
"""number index of the peak
Similar to member method idx, this one returns the number index rather
than the real index, which means self.index[nidx] = idx
"""
if gauss_fit:
height = self.height(gauss_fit = gauss_fit, offset = offset)
nidx = np.argmin(abs(self.values - height))
else:
nidx = np.argmax(self.values)
return nidx
[docs] def height(self, gauss_fit = False, offset = False):
"""calculate peak height
Calculated the peak height, either by gaussian fitting (if gauss_fit
true), or simply return the maximium as the peak height (default)
Parameters
----------
gauss_fit : bool
If true, the peak height is get by performing a gaussian fit
If false, simply the maximium value in the given data
offset : bool
If True, the gaussian fitting will consider also fit the data
offset. If False (default), the fitting procedure will assume that
the data has 0 offset.
Returns
-------
float
Peak height
"""
if gauss_fit:
return self.gausFit(offset = offset)[0][0]
else:
return self.max()
[docs] def sigma(self, n_sigmas = 1, gauss_fit = False, offset = False):
"""Find peak width
Find the peak width, with specified mutiples of standard deviation.
The width can be obtained by literally calculate the full-width-half-max
or by gaussian fitting, depend on the parameter value 'gauss_fit' to be
true of false.
Parameters
----------
n_sigmas : integer
Multiplies of standard deviations of the peak width is wanted
gauss_fit : bool
If true, the peak width is obtained from gaussian fitting
If False (default), the peak width is calculated from literally
full-width-half-max.
offset : bool
If True, the gaussian fitting will consider also fit the data
offset. If False (default), the fitting procedure will assume that
the data has 0 offset.
Returns
-------
float
The peak width in terms of multiples of standard deviatioins
"""
if gauss_fit:
return n_sigmas * self.gausFit(offset = offset)[0][2]
else:
peak_values = max(self)
peak_idx = np.argmax(self.values)
half_max = peak_values / 2
hwhm_idx_left = np.argmin(abs(self.iloc[:peak_idx].values - half_max))
hwhm_idx_right = np.argmin(abs(self.iloc[peak_idx : ].values - half_max)) + peak_idx
fwhm = self.index[hwhm_idx_right] - self.index[hwhm_idx_left]
sigma = fwhm / np.sqrt(8*np.log(2))
return sigma * n_sigmas
[docs] def fwhm(self, gauss_fit = False, offset = False):
"""Full-Width-Half-Maximium
Find the Full-Width-Half-Maximium (FWHM) of the peak, from
gaussian fitting or direction calculation.
Parameters
----------
gauss_fit : bool
If true, fwhm is get from gaussian fitting
If false (default), fwhm is from direction calculation
offset : bool
If True, the gaussian fitting will consider also fit the data
offset. If False (default), the fitting procedure will assume that
the data has 0 offset.
Returns
-------
float
peak full-width-half-maximium value
"""
sigma = self.sigma(n_sigmas = 1, gauss_fit = gauss_fit, offset = offset)
return sigma * np.sqrt(8 * np.log(2))
[docs] def area(self, gauss_fit = False, offset = False):
if gauss_fit:
popt, pcov = self.gausFit()
# if with offset
#area = quad(lambda x:Gaussian.gaus(x, *popt) - popt[-1], min(data_label), max(data_label))[0]
# assume no offset
area = quad(lambda x:Gaussian.gaus(x, *popt), min(self.index), max(self.index))[0]
else:
# if with offset
# area = np.trapz(x = data_label, y = self - np.min(self))
# assume no offset
area = np.trapz(x = self.index, y = self.values)
return area
[docs] def region(self, n_sigmas = 4, plot = False, offset = False):
"""Auto find the peak region
Locate the region where there exists a peak and return
the lower and upper bound index of the region.
Parameters
----------
"""
peak_nidx = self.nidx()
peak_value = self.iloc[peak_nidx]
hwhm_nidx_left = peak_nidx - np.argmin(abs(self.iloc[peak_nidx:0:-1].values - peak_value / 2))
hwhm_nidx_right = np.argmin(abs(self.iloc[peak_nidx:].values - peak_value / 2)) + peak_nidx
fwhm_nidx_range = hwhm_nidx_right - hwhm_nidx_left
delta_nidx = int(fwhm_nidx_range / np.sqrt(8 * np.log(2)) * n_sigmas)
lb,ub = peak_nidx - delta_nidx, peak_nidx + delta_nidx
lb = 0 if lb < 0 else lb
ub = len(self) if ub > len(self) else ub
if plot:
plt.plot(self.index, self.values, '--', label = 'raw input')
plt.plot(self.index[lb:ub], self.values[lb:ub], '-', label = 'detected peak')
return lb, ub
[docs] def autocrop(self, n_sigmas = 4, offset = False):
lb, ub = self.region(n_sigmas = n_sigmas, plot = False)
return self._constructor(self.iloc[lb:ub])
[docs]class FramePeak(pandas.DataFrame):
"""
Peak analysis on 1D labeled / unlabeled data
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _toTOFSeries(func):
"""
Decorator to wrap series returns for method chain
"""
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if type(result) == pandas.core.series.Series:
return tofseries.TOFSeries(result)
else:
return result
return wrapper
@classmethod
def _constructor(cls, *args, **kwargs):
return cls(*args, **kwargs)
[docs] @_toTOFSeries
def idx(self, axis = 0, gauss_fit = False, normalize = False):
selfCopy = self.copy() if axis == 0 else self.copy().T
if gauss_fit:
idx = selfCopy.apply(lambda s: Gaussian.gausFit(x = s.index, y = s.values, offset = False)[0][1])
else:
idx = selfCopy.idxmax()
if normalize:
idx = idx / idx.max()
return idx
[docs] @_toTOFSeries
def nidx(self, axis = 0, gauss_fit = False, normalize = False):
selfCopy = self.copy() if axis == 0 else self.copy().T
if gauss_fit:
height = selfCopy.height(gauss_fit)
n_idx = [np.argmin(abs(selfCopy[x].values - h)) for x, h in zip(selfCopy.columns, height)]
else:
n_idx = [np.argmax(selfCopy[x].values) for x in selfCopy.columns]
n_idx = pandas.Series(n_idx, selfCopy.columns)
if normalize:
n_idx = n_idx / n_idx.max()
return n_idx
[docs] @_toTOFSeries
def height(self, axis = 0, gauss_fit = False, normalize = False):
selfCopy = self.copy() if axis == 0 else self.copy().T
if gauss_fit:
height = selfCopy.apply(lambda s: Gaussian.gausFit(x = s.index, y = s.values, offset = False)[0][0])
else:
height = selfCopy.max()
if normalize:
height = height / height.max()
return height
[docs] @_toTOFSeries
def sigma(self, n_sigmas = 1, axis = 0, gauss_fit = False, normalize = False):
selfCopy = self.copy() if axis == 0 else self.copy().T
def col_sigma(series):
if gauss_fit:
return Gaussian.gausFit(x = series.index, y = series.values, offset = False)[0][2]
else:
peak_values = max(series)
peak_idx = np.argmax(series.values)
half_max = peak_values / 2
try:
hwhm_idx_left = np.argmin(abs(series.iloc[:peak_idx].values - half_max))
hwhm_idx_right = np.argmin(abs(series.iloc[peak_idx : ].values - half_max)) + peak_idx
except ValueError: # When there is no peak found
return 0
fwhm = series.index[hwhm_idx_right] - series.index[hwhm_idx_left]
sigma = fwhm / np.sqrt(8*np.log(2))
return sigma * n_sigmas
sigma = selfCopy.apply(col_sigma)
if normalize:
sigma = sigma / sigma.max()
return sigma
[docs] @_toTOFSeries
def fwhm(self, axis = 0, gauss_fit = False, normalize = False):
sigma = self.sigma(n_sigmas = 1, axis = axis, gauss_fit = gauss_fit, normalize = False)
fwhm = sigma * np.sqrt(8 * np.log(2))
if normalize:
fwhm = fwhm / fwhm.max()
return fwhm
[docs] @_toTOFSeries
def area(self, axis = 0, gauss_fit = False, normalize = False):
selfCopy = self.copy() if axis == 0 else self.copy().T
def col_area(series):
if gauss_fit:
for col in series.columns:
popt, pcov = Gaussian.gausFit(x = series.index, y = series[col].values)
area = quad(lambda x:Gaussian.gaus(x, *popt), min(series[col].index), max(series[col].index))[0]
else:
area = np.trapz(x = series.index, y = series.values)
return area
area = selfCopy.apply(col_area)
if normalize:
area = area / area.max()
return area
[docs] @_toTOFSeries
def region(self, axis = 0, n_sigmas = 4, plot = False):
"""
Locate the region where there exists a peak
return the bound index of the region
"""
selfCopy = self.copy()
def col_region(series, plot = False):
series = SeriesPeak(series.copy())
peak_nidx = series.nidx()
peak_value = series.iloc[peak_nidx]
try:
hwhm_nidx_left = peak_nidx - np.argmin(abs(series.iloc[peak_nidx:0:-1].values - peak_value / 2))
hwhm_nidx_right = np.argmin(abs(series.iloc[peak_nidx:].values - peak_value / 2)) + peak_nidx
fwhm_nidx_range = hwhm_nidx_right - hwhm_nidx_left
except ValueError: # incase the peak is on the edge, e.g. no peak in the region
return (int(len(series) / 2), int(len(series) / 2))
delta_nidx = int(fwhm_nidx_range / np.sqrt(8 * np.log(2)) * n_sigmas)
lb,ub = peak_nidx - delta_nidx, peak_nidx + delta_nidx
lb = 0 if lb < 0 else lb
ub = len(series) if ub > len(series) else ub
if plot:
plt.plot(series.index, series.values, '--', label = 'raw input')
plt.plot(series.index[lb:ub], series.values[lb:ub], '-', label = 'detected peak')
return lb, ub
if axis == 0:
return selfCopy.apply(col_region)
elif axis == 1:
return selfCopy.T.apply(col_region)
else: # Currently the 2D peak detect doesn't work well
lb0 = min([x[0] for x in selfCopy.apply(col_region)])
ub0 = max([x[1] for x in selfCopy.apply(col_region)])
lb1 = min([x[0] for x in selfCopy.T.apply(col_region)])
ub1 = max([x[1] for x in selfCopy.T.apply(col_region)])
return lb0, ub0, lb1, ub1