Source code for sovabids.parsers

"""Module with parser utilities."""
import re
from copy import deepcopy

from numpy import mat
from sovabids.misc import flat_paren_counter
from sovabids.dicts import deep_merge_N,nested_notation_to_tree

[docs]def placeholder_to_regex(placeholder,encloser='%',matcher='(.+)'): """Translate a placeholder pattern to a regex pattern. Parameters ---------- placeholder : str The placeholder pattern to translate. matcher : str, optional The regex pattern to use for the placeholder, ie : (.*?),(.*),(.+). encloser : str, optional The symbol which encloses the fields of the placeholder pattern. Returns ------- pattern : str The regex pattern. fields : list of str The fields as they appear in the regex pattern. """ pattern = placeholder pattern = pattern.replace('\\','/') if pattern.count('%') == 0 or pattern.count('%') % 2 != 0: return '',[] else: borders = pattern.split(encloser)[::2] fields = pattern.split(encloser)[1::2] for field in fields: pattern = pattern.replace(encloser+field+encloser, matcher, 1) pattern = pattern.replace('/','\\/') return pattern,fields
[docs]def parse_from_placeholder(string,pattern,encloser='%',matcher='(.+)'): """Parse string from a placeholder pattern. Danger: It will replace underscores and hyphens with an empty character in all fields except for the ignore field. This to accomodate to the bids standard restrictions automatically. Parameters ---------- string : str The string to parse. pattern : str The placeholder pattern to use for parsing. matcher : str, optional The regex pattern to use for the placeholder, ie : (.*?),(.*),(.+). encloser : str, optional The symbol which encloses the fields of the placeholder pattern. Returns ------- dict The dictionary with the fields and values requested. """ pattern,fields = placeholder_to_regex(pattern,encloser,matcher) return parse_from_regex(string,pattern,fields)
[docs]def parse_from_regex(string,pattern,fields,invalid_replace=''): """Parse string from regex pattern. Danger: It will replace underscores and hyphens with an empty character in all fields except for the ignore field. This to accomodate to the bids standard restrictions automatically. Parameters ---------- string : str The string to parse. pattern : str The regex pattern to use for parsing. fields : list of str List of fields in the same order as they appear in the regex pattern. invalid_replace: str String that will replace '-' and '_' that appear on extracted fields. Returns ------- dict The dictionary with the fields and values requested. """ string = string.replace('\\','/') # USE POSIX PLEASE num_groups = flat_paren_counter(pattern) if isinstance(fields,str): fields = [fields] num_fields = len(fields) if not num_fields == num_groups: return {} match = re.search(pattern,string) if not hasattr(match, 'groups'): raise AttributeError(f"Couldn't find fields in the string {string} using the pattern {pattern}. Recheck the pattern for errors.") if not num_groups == len(match.groups()): return {} l = [] for field,value in zip(fields,list(match.groups())): if field != 'ignore' and ('_' in value or '-' in value): value2 = value.replace('_',invalid_replace) value2 = value2.replace('-',invalid_replace) d = nested_notation_to_tree(field,value2) else: d = nested_notation_to_tree(field,value) l.append(d) return deep_merge_N(l)
[docs]def parse_entity_from_bidspath(path,entity,mode='r2l'): """Get the value of a bids-entity from a path. Parameters ---------- path : str The bidspath we are going to derive the information on. Should be the complete path of file of a modality (ie an _eeg file). entity : str The entity we are going to extract. SHOULD be one of sub|ses|task|acq|run mode : str Direction of lookup. One of r2l|l2r . r2l (right to left) l2r (left to right) Returns ------- value : str The extracted value of the entity as a string. If None, it means the entity was not found on the string. """ entity = entity if '-' in entity else entity + '-' # Easier to find it from the tail of the bidspath if mode == 'r2l': entity_position = path.rfind(entity) elif mode == 'l2r': entity_position = path.find(entity) else: raise ValueError('Incorrect usage of the mode argument.') if entity_position == -1: return None little_path = path[entity_position:] value = re.search('%s(.*?)%s' % ('-', '_'), little_path,).group(1) return value
[docs]def _modify_entities_of_placeholder_pattern(pattern,mode='append'): """Convert between sovabids entities pattern notation and the shorter notation. The shorter notation is: %dataset%, %task%, %session%, %subject%, %run%, %acquisition% Parameters ---------- string : str The pattern string to convert. mode : str Whether to append 'entities' or cut it. One of {'append','cut'} Returns ------- str The converted pattern string. """ if mode == 'append': for keyword in ['%task%','%session%','%subject%','%run%','%acquisition%']: pattern = pattern.replace(keyword,'%entities.'+keyword[1:]) pattern = pattern.replace('%dataset%','%dataset_description.Name%') elif mode == 'cut': for keyword in ['%task%','%session%','%subject%','%run%','%acquisition%']: pattern = pattern.replace('%entities.'+keyword[1:],keyword) pattern = pattern.replace('%dataset_description.Name%','%dataset%') return pattern
[docs]def parse_entities_from_bidspath(targetpath,entities=['sub','ses','task','acq','run'],mode='r2l'): """Get the bids entities from a bidspath. Parameters ---------- targetpath : str The bidspath we are going to derive the information on. entities : list of str The entities we are going to extract. Defaults to sub,ses,task,acq,run mode : str Direction of lookup. One of r2l|l2r . r2l (right to left) l2r (left to right) Returns ------- dict A dictionary with the extracted entities. {'sub':'11','task':'resting','ses':'V1','acq':'A','run':1} """ path = deepcopy(targetpath) bids_dict = dict() for entity in entities: bids_dict[entity] = parse_entity_from_bidspath(path,entity,mode) # Clean Non Existent key bids_dict2 = {key:value for key,value in bids_dict.items() if value is not None} return bids_dict2
[docs]def parse_path_pattern_from_entities(sourcepath,bids_entities): """Get the path pattern from a path and a dictionary of bids entities and their values. Parameters ---------- sourcepath : str The sourcepath that will be modified to get the path pattern bids_entities : dict Dictionary with the entities and their values on the path. Ie {'sub':'11','task':'resting','ses':'V1','acq':'A','run':1} There should be no ambiguity between the sourcepath and each of the values. Otherwise an error will be raised. Returns ------- str : The path pattern in placeholder format """ path = deepcopy(sourcepath) values = [val for key,val in bids_entities.items()] key_map={ 'sub':'%subject%', 'ses':'%session%', 'task':'%task%', 'acq':'%acquisition%', 'run':'%run%' } assert '%' not in path # otherwise it will mess up the logic for key,val in bids_entities.items(): pathcopy = deepcopy(path) # Replace all other values which are superstrings of the current one superstrings = [x for x in values if val in x and val!=x] for string in superstrings: pathcopy = pathcopy.replace(string,'*'*len(string)) # handle ambiguity if pathcopy.count(val) > 1: raise ValueError('Ambiguity: The path has multiple instances of {}'.format(val)) if pathcopy.count(val) < 1: superstrings = [x for x in bids_entities.values() if val in x and val!=x] substrings = [x for x in bids_entities.values() if x in val and val!=x] possible_ambiguity_with = set(superstrings+substrings) raise ValueError(f'{val} seems to be ambiguous with any of the following values {possible_ambiguity_with}') path = path.replace(val,key_map[key]) values[values.index(val)] = key_map[key] path = _modify_entities_of_placeholder_pattern(path) path = path.replace('\\','/') # Find first changing value and put the pattern from there first_placeholder = path.find('%') # Identify where should the pattern start start = path[:first_placeholder].rfind('/') + 1 if '/' in path[:first_placeholder] else 0 path = path[start:] return path
[docs]def find_bidsroot(path): """Get the bidsroot from an absolute path describing a bids file inside a subject subfolder. Parameters ---------- path : str The absolute path to any bids file inside a sub- folder. Returns ------- str : The bidsroot absolute path. """ sub = parse_entities_from_bidspath(path,entities=['sub'],mode='r2l') index = path.find(sub['sub']) #We know the bids root is the path up until that index minus some stuff bidsroot = path[:index-4] #remove sub- prefix return bidsroot