Source code for scibeam.core.tofframe

# tofframe.py ---
#
# Filename: tofframe.py
# Description:
#            DataFrame for  time-of-flight data frame analysis
# Author:    Yu Lu
# Email:     yulu@utexas.edu
# Github:    https://github.com/SuperYuLu
#
# Created: Fri May  4 10:53:40 2018 (-0500)
# Version:
# Last-Updated: Tue Jul 31 18:14:12 2018 (-0500)
#           By: yulu
#     Update #: 746
#




import os
import re
import pandas
import numpy as np
import matplotlib.pyplot as plt

from . import base
from . import tofseries
from .common import winPathHandler, loadFile
from .regexp import RegMatch
from .descriptor import DescriptorMixin
from .plot import PlotTOFFrame
from .peak import FramePeak


[docs]def read_folder(path, regStr, lowerBound = None, upperBound = None, removeOffset = True, offset_margin_how = 'outer', offset_margin_size = 20, skiprows = 0, sep = '\t'): """ Create TOFFrame class instance by reading in group of files in a folder matched by regex Parameters ----------- path: str folder path, linux style or windows style as "raw string", e.g. r"C:\\User\\Document\\FolderName" lowerBound: int or float time axis lower boundrary limit for data upperBound: int or float time axis upper boundrary limit for data removeOffset: bool if True (default) remove data offset (set floor to 0 in no-signal region) offset_margin_how: {"outer", "outer left", "out right", "inner", "inner left", "inner right"}, default "outer" Specify the way to handle offset margin, offset floor value is calculated by averaging the value in a given range relative to data lower and upper boundrary, with avaliable options: * "outer" (default): from both left and right side out of the [lowerBound, upperBound] region * "outer left": like "outer" but from only left side * "outer right": like "outer" but from only right side * "inner": from both left and right side inside of the [lowerBound, upperBound] region * "inner left": like "inner" but from only left side * "inner right": like "inner" but from only left side offset_margin_size: int Number of values to use for averaging when calculating offset skiprows: int number of rows to skip when read in data sep: str, defult "\t" seperator for columns in the data file Returns: -------- Instance of class TOFFrame """ return TOFFrame.from_path(path, regStr, lowerBound = lowerBound, upperBound = upperBound, removeOffset =removeOffset, offset_margin_how = offset_margin_how, offset_margin_size = offset_margin_size, skiprows = skiprows, sep = sep)
[docs]def read_regexp_match(path, matchDict, lowerBound = None, upperBound = None, removeOffset = True, offset_margin_how = 'outer', offset_margin_size = 20, skiprows = 0, sep = '\t'): """ Create instance of TOFFrame from regular expression match result dictionary using scibeam class RegMatch Parameters ---------- path: str path of the targeted data folder matchDict: dictionary result dictionary form scibeam.regexp.RegMatch, or user specified dictionary with key as measurement label, value as file name string lowerBound: int or float time axis lower boundrary limit for data upperBound: int or float time axis upper boundrary limit for data removeOffset: bool if True (default) remove data offset (set floor to 0 in no-signal region) offset_margin_how: {"outer", "outer left", "out right", "inner", "inner left", "inner right"}, default "outer" Specify the way to handle offset margin, offset floor value is calculated by averaging the value in a given range relative to data lower and upper boundrary, with avaliable options: * "outer" (default): from both left and right side out of the [lowerBound, upperBound] region * "outer left": like "outer" but from only left side * "outer right": like "outer" but from only right side * "inner": from both left and right side inside of the [lowerBound, upperBound] region * "inner left": like "inner" but from only left side * "inner right": like "inner" but from only left side offset_margin_size: int Number of values to use for averaging when calculating offset skiprows: int number of rows to skip when read in data sep: str, defult "\t" seperator for columns in the data file Returns ------- Instance of TOFFrame """ return TOFFrame.from_matchResult(path, matchDict, lowerBound = lowerBound, upperBound = upperBound, removeOffset = removeOffset, offset_margin_how = offset_margin_how, offset_margin_size = offset_margin_size, skiprows = skiprows, sep = sep)
[docs]class TOFFrame(pandas.DataFrame): """ Time-Of-Flight (TOF) DataFrame Subclassing pandas.DataFrame with extral methods / properties for time-series analysis Parameters ----------- data: numpy ndarray (structured or homogeneous), dict, or DataFrame Dict can contain Series, arrays, constants, or list-like objectsSingle time-of-flight data analysis Value of measurement, e.g. voltage, current, arbiturary unit signel, shape(len(labels), len(times)) index: numpy ndarray, iterables Time axis for time-of-flight columns: str, int, or float label of different tof measurement, e.g. pressure, temperature, etc """ pandas.set_option('precision', 9) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @property def _constructor(self): return TOFFrame @property def _constructor_sliced(self): return tofseries.TOFSeries @property def _make_mixin(self): return self.copy() def _toTOFSeries(func): """ Decorator to wrap series returns for method chain """ def wrapper(*args, **kwargs): result = func(*args, **kwargs) if type(result) == pandas.core.series.Series: return tofseries.TOFSeries(result) else: return result return wrapper def _toTOFFrame(func): """ Decorator to wrap frame returns for method chain """ def wrapper(*args, **kwargs): result = func(*args, **kwargs) if type(result) == pandas.core.frame.DataFrame: return TOFFrame(result) else: return result return wrapper
[docs] @classmethod def from_path(cls, path, regStr, lowerBound = None, upperBound = None, removeOffset = True, offset_margin_how = 'outer', offset_margin_size = 20, skiprows = 0, sep = '\t'): """ Buid TOFFrome instance from given file folder Current only works for '\t' seperated txt and lvm file """ path = winPathHandler(path) matchDict = RegMatch(regStr).matchFolder(path) if type(regStr) == str: keys, files = zip(*sorted(matchDict.items(), key = lambda x: x[0])) values = {} for k, f in zip(keys, files): try: data = loadFile(path + f, skiprows = skiprows, sep = sep) except TypeError: print("[*] Multiple files found under the same parameter, please check below files:\n", f) raise TypeError if lowerBound and upperBound: lb, ub = TOFFrame.find_time_idx(data[:, 0], lowerBound, upperBound) time = data[lb:ub, 0] if removeOffset: value = TOFFrame.remove_data_offset(data[:, 1], lowerBoundIdx = lb, upperBoundIdx = ub, how = offset_margin_how, margin_size = offset_margin_size) else: value = data[lb:ub, 1] else: time = data[:, 0] value = data[:, 1] values[k] = value else: raise ValueError("[*] Please provide regStr for file match in the path !") return cls(values, index = time)
[docs] @classmethod def from_file(cls, filePath, lowerBound = None, upperBound = None, removeOffset = True, offset_margin_how = 'outer', offset_margin_size = 20, skiprows = 0, sep = '\t'): """ Generate TOFFrame object from a single given file """ filePath = winPathHandler(filePath) data = loadFile(filePath, skiprows = skiprows, sep = sep) if lowerBound and upperBound: lb, ub = TOFFrame.find_time_idx(data[:,0], lowerbound, upperBound) time = data[lb : ub, 0] if removeOffset: value = TOFFrame.remove_data_offset(data[:, 1], lowerBoundIdx = lb, upperBoundIdx = ub, how = offset_margin_how, margin_size = offset_margin_size) else: value = data[lb:ub, 1] else: time = data[:,0] value = data[:,1] values = {'value':value} return cls(values, index = time)
[docs] @classmethod def from_matchResult(cls, path, matchDict, lowerBound = None, upperBound = None, removeOffset = True, offset_margin_how = 'outer', offset_margin_size = 20, skiprows = 0, sep = '\t'): """ Creat TOFFrame from a RegMatch resutl dictionary """ path = winPathHandler(path) # If given folder path if os.path.isdir(path): keys, files = zip(*sorted(matchDict.items(), key = lambda x: x[0])) values = {} for k, f in zip(keys, files): data = loadFile(path + f, skiprows = skiprows, sep = sep) if lowerBound and upperBound: lb, ub = TOFFrame.find_time_idx(data[:, 0], lowerBound, upperBound) time = data[lb:ub, 0] if removeOffset: value = TOFFrame.remove_data_offset(data[:, 1], lowerBoundIdx = lb, upperBoundIdx = ub, how = offset_margin_how, margin_size = offset_margin_size) else: value = data[lb:ub, 1] else: time = data[:, 0] value = data[:, 1] values[k] = value return cls(values, index = time) else: raise IsADirectoryError("[*] path not found!")
[docs] @staticmethod def find_time_idx(time, *args): """ Generator of time index for a given time value args: can be 1,2,3, or [1,2] or [1,2,3] """ time = np.array(time) t_max_gap = np.max(np.diff(time)) for arg_elem in args: if hasattr(arg_elem, '__iter__'): idx = [] for t in arg_elem: candi_idx = np.argmin(abs(t - time)) if abs(t - time[candi_idx]) > t_max_gap: raise ValueError("[*] Error: find_time_idx didn't find closest match !\n" + "[!] Searching for time %f while the closest match is %f, you may consider check the unit!" %(t, time[candi_idx])) else: idx.append(candi_idx) yield idx else: candi_idx = np.argmin(abs(arg_elem - time)) if abs(arg_elem - time[candi_idx]) > t_max_gap: raise ValueError("[*] Error: find_time_idx didn't find closest match !\n" + "[!] Searching for time %f while the closest match is %f, you may consider check the unit!" %(arg_elem, time[candi_idx])) else: idx = candi_idx yield idx
[docs] @staticmethod def remove_data_offset(data, lowerBoundIdx = None, upperBoundIdx = None, how = 'outer', margin_size = 10): """ remove offset in 1D array data """ # check bound index assignment if lowerBoundIdx is None and upperBoundIdx is None: lowerBoundIdx = 0 upperBoundIdx = len(data) if 'outer' in how: how = 'inner' print("[*] No bound index specified, using default full range !") print("[*] Outer margin offset forced to be *inner* !") else: pass elif lowerBoundIdx is None: lowerBoundIdx = 0 if how in ['outer', 'outer left']: how = how.replace('outer', 'inner') print("[*] No lower bound index specified, using default 0 !") print("[*] Outer margin offset forced to be *inner* !") else: pass elif upperBoundIdx is None: upperBoundIdx = len(data) if how in ['outer', 'outer right']: how = how.replace('outer', 'inner') print("[*] No lower bound index specified, using default max length !") print("[*] Outer margin offset forced to be *inner* !") else: pass else: pass if how == 'outer': offset = (np.mean(data[lowerBoundIdx-margin_size: lowerBoundIdx]) + np.mean(data[upperBoundIdx : upperBoundIdx + margin_size])) / 2. elif how == 'outer left': offset = np.mean(data[lowerBoundIdx-margin_size: lowerBoundIdx]) elif how == 'outer right': offset = np.mean(data[upperBoundIdx : upperBoundIdx + margin_size]) elif how == 'inner': offset = (np.mean(data[lowerBoundIdx: lowerBoundIdx + margin_size]) + np.mean(data[upperBoundIdx - margin_size: upperBoundIdx])) / 2 elif how == 'inner left': offset = np.mean(data[lowerBoundIdx: lowerBoundIdx + margin_size]) elif how == 'inner right': offset = np.mean(data[upperBoundIdx - margin_size: upperBoundIdx]) else: raise ValueError(("[*] how: %s not understood !\n" + "[!] possible values of how: 'outer', 'outer left', 'outer right', 'inner', 'inner left', 'inner right'") % how) data = data[lowerBoundIdx:upperBoundIdx] - offset return data
[docs] @_toTOFFrame def selectTimeSlice(self, *args, inplace = False): """ makeSlice ------------- Create descrete time sliced series, if want continus range, use makeTimeRange() [Input] :args: descrete time slicing values, can use timeSlice(1,2,3,4) or timeSlice([1,2,3,4]) [Output] Series of sliced data """ slice_value = [] for arg_elem in self.find_time_idx(self.index, args): if hasattr(arg_elem, '__iter__'): for t in arg_elem: slice_value.append(self.iloc[t]) else: slice_value.append(self.iloc[arg_elem]) slice_DataFrame = pandas.DataFrame(slice_value) if inplace: self.__init__(slice_DataFrame) else: return slice_DataFrame
[docs] @_toTOFFrame def selectTimeRange(self, lowerBound, upperBound, inplace = False): """ makeTimeRange Select continious data in a provided time range -------------- """ lb, ub = TOFFrame.find_time_idx(self.index, lowerBound, upperBound) selected = self.iloc[lb:ub, :].copy() if inplace: self.__init__(selected) else: return selected
[docs] @_toTOFSeries def sum(self, axis = 1): index = self.columns if axis == 0 else self.index sum_result = np.sum(self.values, axis = axis) return tofseries.TOFSeries(sum_result, index = index)
[docs] @_toTOFFrame def inch_to_mm(self, offset_inch = 0, inplace = False): """ convert inches to milimeters in the columns names """ values = (self.columns - offset_inch) * 25.4 if inplace: self.columns = values return self else: return values
[docs] @_toTOFFrame def mm_to_inch(self, offset_mm = 0, inplace = False): """ convert milimeters to inches in the columns names """ values = (self.columns - offset_mm) / 25.4 if inplace: self.columns = values return self else: return values
[docs] @_toTOFFrame def sec_to_microsec(self, offset_sec = 0, inplace = False): """ convert seconds in index to microseconds """ times = (self.index - offset_sec) * 1e6 if inplace: self.index = times return self else: return times
[docs] @_toTOFFrame def microsec_to_sec(self, offset_microsec = 0, inplace = False): """ convert microseconds in index to seconds """ times = (self.index - offset_microsec) * 1e-6 if inplace: self.index = times return self else: return times
[docs] @_toTOFSeries def reduce(self, axis = 0): """ reduce dimention from 2D to 1D by sum along axis """ return self.sum(axis = axis)
#Descriptors: #single = DescriptorMixin(TimeSeries) plot2d = DescriptorMixin(PlotTOFFrame) peak = DescriptorMixin(FramePeak)