"""Module with parser utilities."""importrefromcopyimportdeepcopyfromsovabids.miscimportflat_paren_counterfromsovabids.dictsimportdeep_merge_N,nested_notation_to_tree
[docs]defplaceholder_to_regex(placeholder,encloser='%',matcher='(.+)'):"""Translate a placeholder pattern to a regex pattern. Parameters ---------- placeholder : str The placeholder pattern to translate. matcher : str, optional The regex pattern to use for the placeholder, ie : (.*?),(.*),(.+). encloser : str, optional The symbol which encloses the fields of the placeholder pattern. Returns ------- pattern : str The regex pattern. fields : list of str The fields as they appear in the regex pattern. """pattern=placeholderpattern=pattern.replace('\\','/')ifpattern.count('%')==0orpattern.count('%')%2!=0:return'',[]else:borders=pattern.split(encloser)[::2]fields=pattern.split(encloser)[1::2]forfieldinfields:pattern=pattern.replace(encloser+field+encloser,matcher,1)pattern=pattern.replace('/','\\/')returnpattern,fields
[docs]defparse_from_placeholder(string,pattern,encloser='%',matcher='(.+)'):"""Parse string from a placeholder pattern. Parameters ---------- string : str The string to parse. pattern : str The placeholder pattern to use for parsing. matcher : str, optional The regex pattern to use for the placeholder, ie : (.*?),(.*),(.+). encloser : str, optional The symbol which encloses the fields of the placeholder pattern. Returns ------- dict The dictionary with the fields and values requested. """pattern,fields=placeholder_to_regex(pattern,encloser,matcher)returnparse_from_regex(string,pattern,fields)
[docs]defparse_from_regex(string,pattern,fields):"""Parse string from regex pattern. Parameters ---------- string : str The string to parse. pattern : str The regex pattern to use for parsing. fields : list of str List of fields in the same order as they appear in the regex pattern. Returns ------- dict The dictionary with the fields and values requested. """string=string.replace('\\','/')# USE POSIX PLEASEnum_groups=flat_paren_counter(pattern)ifisinstance(fields,str):fields=[fields]num_fields=len(fields)ifnotnum_fields==num_groups:return{}match=re.search(pattern,string)ifnotnum_groups==len(match.groups()):return{}l=[]forfield,valueinzip(fields,list(match.groups())):d=nested_notation_to_tree(field,value)l.append(d)returndeep_merge_N(l)
[docs]defparse_entity_from_bidspath(path,entity,mode='r2l'):"""Get the value of a bids-entity from a path. Parameters ---------- path : str The bidspath we are going to derive the information on. Should be the complete path of file of a modality (ie an _eeg file). entity : str The entity we are going to extract. SHOULD be one of sub|ses|task|acq|run mode : str Direction of lookup. One of r2l|l2r . r2l (right to left) l2r (left to right) Returns ------- value : str The extracted value of the entity as a string. If None, it means the entity was not found on the string. """entity=entityif'-'inentityelseentity+'-'# Easier to find it from the tail of the bidspathifmode=='r2l':entity_position=path.rfind(entity)elifmode=='l2r':entity_position=path.find(entity)else:raiseValueError('Incorrect usage of the mode argument.')ifentity_position==-1:returnNonelittle_path=path[entity_position:]value=re.search('%s(.*?)%s'%('-','_'),little_path,).group(1)returnvalue
[docs]def_modify_entities_of_placeholder_pattern(pattern,mode='append'):"""Convert between sovabids entities pattern notation and the shorter notation. The shorter notation is: %dataset%, %task%, %session%, %subject%, %run%, %acquisition% Parameters ---------- string : str The pattern string to convert. mode : str Whether to append 'entities' or cut it. One of {'append','cut'} Returns ------- str The converted pattern string. """ifmode=='append':forkeywordin['%task%','%session%','%subject%','%run%','%acquisition%']:pattern=pattern.replace(keyword,'%entities.'+keyword[1:])pattern=pattern.replace('%dataset%','%dataset_description.Name%')elifmode=='cut':forkeywordin['%task%','%session%','%subject%','%run%','%acquisition%']:pattern=pattern.replace('%entities.'+keyword[1:],keyword)pattern=pattern.replace('%dataset_description.Name%','%dataset%')returnpattern
[docs]defparse_entities_from_bidspath(targetpath,entities=['sub','ses','task','acq','run'],mode='r2l'):"""Get the bids entities from a bidspath. Parameters ---------- targetpath : str The bidspath we are going to derive the information on. entities : list of str The entities we are going to extract. Defaults to sub,ses,task,acq,run mode : str Direction of lookup. One of r2l|l2r . r2l (right to left) l2r (left to right) Returns ------- dict A dictionary with the extracted entities. {'sub':'11','task':'resting','ses':'V1','acq':'A','run':1} """path=deepcopy(targetpath)bids_dict=dict()forentityinentities:bids_dict[entity]=parse_entity_from_bidspath(path,entity,mode)# Clean Non Existent keybids_dict2={key:valueforkey,valueinbids_dict.items()ifvalueisnotNone}returnbids_dict2
[docs]defparse_path_pattern_from_entities(sourcepath,bids_entities):"""Get the path pattern from a path and a dictionary of bids entities and their values. Parameters ---------- sourcepath : str The sourcepath that will be modified to get the path pattern bids_entities : dict Dictionary with the entities and their values on the path. Ie {'sub':'11','task':'resting','ses':'V1','acq':'A','run':1} There should be no ambiguity between the sourcepath and each of the values. Otherwise an error will be raised. Returns ------- str : The path pattern in placeholder format """path=deepcopy(sourcepath)values=[valforkey,valinbids_entities.items()]key_map={'sub':'%subject%','ses':'%session%','task':'%task%','acq':'%acquisition%','run':'%run%'}assert'%'notinpath# otherwise it will mess up the logicforkey,valinbids_entities.items():pathcopy=deepcopy(path)# Replace all other values which are superstrings of the current onesuperstrings=[xforxinvaluesifvalinxandval!=x]forstringinsuperstrings:pathcopy=pathcopy.replace(string,'*'*len(string))# handle ambiguityifpathcopy.count(val)>1:raiseValueError('Ambiguity: The path has multiple instances of {}'.format(val))ifpathcopy.count(val)<1:raiseValueError('There is no {} in path'.format(val))path=path.replace(val,key_map[key])values[values.index(val)]=key_map[key]path=_modify_entities_of_placeholder_pattern(path)path=path.replace('\\','/')# Find first changing value and put the pattern from therefirst_placeholder=path.find('%')# Identify where should the pattern startstart=path[:first_placeholder].rfind('/')+1if'/'inpath[:first_placeholder]else0path=path[start:]returnpath