Source code for scibeam.core.regexp

# regexp.py ---
#
# Filename: regexp.py
# Description:
#            Python regex related functions
# Author:    Yu Lu
# Email:     yulu@utexas.edu
# Github:    https://github.com/SuperYuLu
#
# Created: Sat May  5 16:24:14 2018 (-0500)
# Version:
# Last-Updated: Sat Jul 28 15:01:30 2018 (-0500)
#           By: yulu
#     Update #: 216
#

from .common import winPathHandler
from .dictfunc import buildDict
from . import base
import os, re

[docs]class RegMatch: def __init__(self, regStr): self.regex = regStr @property def regex(self): return self._regex @regex.setter def regex(self, regStr): if type(regStr) == list: self._regex = [re.compile(s) for s in regStr] else: self._regex = re.compile(regStr)
[docs] @staticmethod def single_regex_match(regStr, strings, group = 1, asNumber = False): """ Match python regex pattern in a given string or list of strings Based on python re package and uses group to locate the value returns pairs of (value, string) matched pairs """ if type(strings) == list: # for list of strings to be matched matched = [] for oneStr in strings: mch = regStr.match(oneStr) if hasattr(regStr, 'match') else re.match(regStr, oneStr) if mch: matched.append(mch) else: continue # check if match is empty if any(matched) > 0: value_strings = [mch.group(group) for mch in matched] match_strings = [mch.group(0) for mch in matched] if asNumber: values = [int(value_string) if len(value_string.split('.')) == 1 else float(value_string) for value_string in value_strings] else: values = value_strings else: raise LookupError("No match found ! regex *{}* doesn't match string *{}*".format(regStr, strings)) resDict = {} for key, s in zip(values, match_strings): resDict = buildDict(resDict, key, s) return resDict else: # for a single string to be matched mch = regStr.match(strings) if hasattr(regStr, 'match') else re.match(regStr, strings) # check if match is found if mch: value_string = mch.group(group) match_string = mch.group(0) else: raise LookupError("No match found ! regex *{}* doesn't match string *{}*".format(regStr, strings)) # Check convert to number if asNumber: value = int(value_string) if len(value_string.split('.')) == 1 else float(value_string) else: value = value_string return dict([(value, match_string)])
[docs] def match(self, strings, group = 1, asNumber = True): """ Match a single or list of regularizations to a single or list of strings Return as a dictionary """ if type(self.regex) == list: matched_dicts = [self.single_regex_match(regex, strings, group = 1, asNumber = asNumber) for regex in self.regex] else: matched_dicts = self.single_regex_match(self.regex, strings, group = 1, asNumber = asNumber) return matched_dicts
@staticmethod def _trace_dict_value(dictIn, trace_list): dictOut = dictIn for key in trace_list: dictOut = dictOut[key] return dictOut @staticmethod def _trace_dict_key(dictIn): """ Trace the key path of a nested dictionary """ trace = [] if type(dictIn) == dict: pass else: return [] for key in dictIn: res = [key] + RegMatch._trace_dict_key0(dictIn[key]) trace.append(res) return trace
[docs] def matchFolder(self, folder_path, asNumber = True, group = 1): """ Match files in the folder content with self.regex if two regex are in the self.regex, then the match is done in a recursive way, that first regex get matched, and the 2nd regex is applied to the match result from the first one. """ path = winPathHandler(folder_path) searchList = os.listdir(path) resDict = {} if type(self.regex) == list: for i, regex in enumerate(self.regex): if i == 0: resDict = self.single_regex_match(regex, searchList, group = group, asNumber = asNumber) else: for key in resDict: resDict[key] = self.single_regex_match(regex, resDict[key], group = group, asNumber = asNumber) else: resDict = self.single_regex_match(self.regex, searchList, group = group, asNumber = asNumber) return resDict