"""Module with dataset utilities."""importosfrompandasimportread_csvimportshutilfromsovabids.filesimportdownload,_get_filesfromsovabids.miscimportget_num_digitsfromsovabids.parsersimportparse_from_regeximportmneimportnumpyasnpfrommne_bids.writeimport_write_raw_brainvision
[docs]deflemon_prepare():"""Download and prepare a few files of the LEMON dataset. Notes ----- See the `LEMON dataset <http://fcon_1000.projects.nitrc.org/indi/retro/MPI_LEMON.html>`_ . """# Path Configurationthis_dir=os.path.dirname(__file__)data_dir=os.path.join(this_dir,'..','_data')root_path=os.path.abspath(os.path.join(data_dir,'lemon'))os.makedirs(data_dir,exist_ok=True)# Download lemon Databaseurls=['https://fcp-indi.s3.amazonaws.com/data/Projects/INDI/MPI-LEMON/Compressed_tar/EEG_MPILMBB_LEMON/EEG_Raw_BIDS_ID/sub-032301.tar.gz','https://fcp-indi.s3.amazonaws.com/data/Projects/INDI/MPI-LEMON/Compressed_tar/EEG_MPILMBB_LEMON/EEG_Raw_BIDS_ID/sub-032302.tar.gz','https://fcp-indi.s3.amazonaws.com/data/Projects/INDI/MPI-LEMON/Compressed_tar/EEG_MPILMBB_LEMON/EEG_Raw_BIDS_ID/sub-032303.tar.gz','https://fcp-indi.s3.amazonaws.com/data/Projects/INDI/MPI-LEMON/name_match.csv']forurlinurls:download(url,os.path.join(data_dir,'lemon'))# Generate all filepathsfilepaths=_get_files(root_path)# Label Correctionname_match=read_csv(os.path.join(root_path,'name_match.csv'))# Unpack files# TAR FILEStars=[xforxinfilepathsif'tar.gz'inx]# SUBJECTSold_ids=[parse_from_regex(x,'(sub-.*?).tar.gz',['id'])forxintars]old_ids=[x['id']forxinold_ids]new_ids=[name_match.loc[(name_match.INDI_ID==x),'Initial_ID']._values[0]forxinold_ids]# EEG FILESnot_tars=[xforxinfilepathsif'.vhdr'inx]not_tars_ids=[parse_from_regex(x,'RSEEG\\/(sub-.*?).vhdr',['id'])forxinnot_tars]not_tars_ids=[x['id']forxinnot_tars_ids]assertlen(tars)==len(old_ids)==len(new_ids)ifset(new_ids)==set(not_tars_ids):# all donereturnelse:forfile,old,newinzip(tars,old_ids,new_ids):ifnotnewinnot_tars_ids:# skip already prepared filesshutil.unpack_archive(file,root_path)olddir=os.path.join(root_path,old)subject_files=_get_files(olddir)forsubfileinsubject_files:# fix sub-idnew_path=subfile.replace(old,new)dir,_=os.path.split(new_path)os.makedirs(dir,exist_ok=True)shutil.move(subfile,new_path)shutil.rmtree(olddir)print('LEMON PREPARE DONE!')
[docs]deflemon_bidscoin_prepare(src_path):"""Download and prepare a few files of the LEMON dataset to be used with BIDSCOIN. Parameters ---------- src_path : str The path where the BIDSCOIN-ready LEMON files will be See Also -------- datasets.lemon_prepare """lemon_prepare()this_dir=os.path.dirname(__file__)data_dir=os.path.join(this_dir,'..','_data')root_path=os.path.abspath(os.path.join(data_dir,'lemon'))bidscoin_input_path=src_pathos.makedirs(bidscoin_input_path,exist_ok=True)files=_get_files(root_path)files=[xforxinfilesifx.split('.')[-1]in['eeg','vmrk','vhdr']]files_out=[]forfinfiles:session='ses-001'task='resting'head,tail=os.path.split(f)sub=tail.split('.')[0]new_path=os.path.join(bidscoin_input_path,sub,session,task,tail)files_out.append(new_path)forold,newinzip(files,files_out):print(old,' to ',new)os.makedirs(os.path.split(new)[0],exist_ok=True)ifnotos.path.isfile(new):shutil.copy2(old,new)else:print('already done, skipping...')print('finish')
[docs]defmake_dummy_dataset(PATTERN='T%task%/S%session%/sub%subject%_%acquisition%_%run%',DATASET='DUMMY',NSUBS=2,NSESSIONS=2,NTASKS=2,NACQS=2,NRUNS=2,NCHANNELS=2,SFREQ=200,STOP=10,NUMEVENTS=10,PREFIXES={'subject':'SU','session':'SE','task':'TA','acquisition':'AC','run':'RU'},ROOT=None):"""Create a dummy dataset given some parameters. Parameters ---------- PATTERN : str, optional The pattern in placeholder notation using the following fields: %dataset%, %task%, %session%, %subject%, %run%, %acquisition% DATASET : str, optional Name of the dataset. NSUBS : int, optional Number of subjects. NSESSIONS : int, optional Number of sessions. NTASKS : int, optional Number of tasks. NACQS : int, optional Number of acquisitions. NRUNS : int, optional Number of runs. NCHANNELS : int, optional Number of channels. SFREQ : float, optional Samplinf frequency of the data. STOP : float, optional Time duration of the data in seconds. NUMEVENTS : int, optional Number of events along the duration. PREFIXES : dict, optional Dictionary with the following keys:'subject', 'session', 'task' and 'acquisition'. The values are the corresponding prefix. RUN is not present because it has to be a number. ROOT : str, optional Path where the files will be generated. If None, the _data subdir will be used. """ifROOTisNone:this_dir=os.path.dirname(__file__)data_dir=os.path.abspath(os.path.join(this_dir,'..','_data'))else:data_dir=ROOTos.makedirs(data_dir,exist_ok=True)sub_zeros=get_num_digits(NSUBS)subs=[PREFIXES['subject']+str(x).zfill(sub_zeros)forxinrange(NSUBS)]task_zeros=get_num_digits(NTASKS)tasks=[PREFIXES['task']+str(x).zfill(task_zeros)forxinrange(NTASKS)]run_zeros=get_num_digits(NRUNS)runs=[str(x).zfill(run_zeros)forxinrange(NRUNS)]ses_zeros=get_num_digits(NSESSIONS)sessions=[PREFIXES['session']+str(x).zfill(ses_zeros)forxinrange(NSESSIONS)]acq_zeros=get_num_digits(NACQS)acquisitions=[PREFIXES['acquisition']+str(x).zfill(acq_zeros)forxinrange(NACQS)]# Create some dummy metadatan_channels=NCHANNELSsampling_freq=SFREQ# in Hertzinfo=mne.create_info(n_channels,sfreq=sampling_freq)times=np.linspace(0,STOP,STOP*sampling_freq,endpoint=False)data=np.zeros((NCHANNELS,times.shape[0]))raw=mne.io.RawArray(data,info)raw.set_channel_types({x:'eeg'forxinraw.ch_names})new_events=mne.make_fixed_length_events(raw,duration=STOP//NUMEVENTS)fortaskintasks:forsessioninsessions:forruninruns:forsubinsubs:foracqinacquisitions:dummy=PATTERN.replace('%dataset%',DATASET)dummy=dummy.replace('%task%',task)dummy=dummy.replace('%session%',session)dummy=dummy.replace('%subject%',sub)dummy=dummy.replace('%run%',run)dummy=dummy.replace('%acquisition%',acq)path=[data_dir]+dummy.split('/')fpath=os.path.join(*path)_write_raw_brainvision(raw,fpath,new_events)