From 06b04781012847d1af501dfbb2d53dfc921e0638 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 13 Jan 2025 18:58:46 +0000 Subject: [PATCH 01/80] updated requirements, updated pipelines --- ibllib/pipes/neurophotometrics.py | 87 +++---------------------------- requirements.txt | 1 + 2 files changed, 8 insertions(+), 80 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 18f558c59..b55d9d94a 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -8,63 +8,10 @@ import ibllib.io.session_params from ibllib.pipes import base_tasks from iblutil.io import jsonable - +import iblphotometry.io as fpio _logger = logging.getLogger('ibllib') -""" -Neurophotometrics FP3002 specific information. -The light source map refers to the available LEDs on the system. -The flags refers to the byte encoding of led states in the system. -""" -LIGHT_SOURCE_MAP = { - 'color': ['None', 'Violet', 'Blue', 'Green'], - 'wavelength': [0, 415, 470, 560], - 'name': ['None', 'Isosbestic', 'GCaMP', 'RCaMP'], -} - -LED_STATES = { - 'Condition': { - 0: 'No additional signal', - 1: 'Output 1 signal HIGH', - 2: 'Output 0 signal HIGH', - 3: 'Stimulation ON', - 4: 'GPIO Line 2 HIGH', - 5: 'GPIO Line 3 HIGH', - 6: 'Input 1 HIGH', - 7: 'Input 0 HIGH', - 8: 'Output 0 signal HIGH + Stimulation', - 9: 'Output 0 signal HIGH + Input 0 signal HIGH', - 10: 'Input 0 signal HIGH + Stimulation', - 11: 'Output 0 HIGH + Input 0 HIGH + Stimulation', - }, - 'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560}, - 'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561}, - 'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562}, - 'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564} -} - - -def _channel_meta(light_source_map=None): - """ - Return table of light source wavelengths and corresponding colour labels. - - Parameters - ---------- - light_source_map : dict - An optional map of light source wavelengths (nm) used and their corresponding colour name. - - Returns - ------- - pandas.DataFrame - A sorted table of wavelength and colour name. - """ - light_source_map = light_source_map or LIGHT_SOURCE_MAP - meta = pd.DataFrame.from_dict(light_source_map) - meta.index.rename('channel_id', inplace=True) - return meta - - class FibrePhotometrySync(base_tasks.DynamicTask): priority = 90 job_size = 'small' @@ -146,39 +93,19 @@ def _run(self, **kwargs): fcn_nph_to_bpod_times, valid_bounds = self._sync_bpod_neurophotometrics() case _: raise NotImplementedError('Syncing with daq is not supported yet.') + # 2) reformat the raw data with wavelengths and meta-data - folder_raw_photometry = self.session_path.joinpath(self.device_collection) - fp_data = pd.read_parquet(folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt')) - # Load channels and wavelength information - channel_meta_map = _channel_meta() - if (fn := folder_raw_photometry.joinpath('_neurophotometrics_fpData.channels.csv')).exists(): - led_states = pd.read_csv(fn) - else: - led_states = pd.DataFrame(LED_STATES) - led_states = led_states.set_index('Condition') - # Extract signal columns into 2D array - rois = list(self.kwargs['fibers'].keys()) - out_df = fp_data.filter(items=rois, axis=1).sort_index(axis=1) - out_df['times'] = fcn_nph_to_bpod_times(fp_data['SystemTimestamp']) - out_df['valid'] = np.logical_and(out_df['times'] >= valid_bounds[0], out_df['times'] <= valid_bounds[1]) - out_df['wavelength'] = np.nan - out_df['name'] = '' - out_df['color'] = '' - # Extract channel index - states = fp_data.get('LedState', fp_data.get('Flags', None)) - for state in states.unique(): - ir, ic = np.where(led_states == state) - if ic.size == 0: - continue - for cn in ['name', 'color', 'wavelength']: - out_df.loc[states == state, cn] = channel_meta_map.iloc[ic[0]][cn] + folder_raw_photometry = self.session_path.joinpath(self.device_collection) + out_df = fpio.from_raw_neurophotometrics_file(folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt')) + # 3) label the brain regions rois = [] c = 0 for k, v in self.kwargs['fibers'].items(): rois.append({'ROI': k, 'fiber': f'fiber{c:02d}', 'brain_region': v['location']}) df_rois = pd.DataFrame(rois).set_index('ROI') - # to finish we write the dataframes to disk + + # 4) to finish we write the dataframes to disk out_path = self.session_path.joinpath('alf', 'photometry') out_path.mkdir(parents=True, exist_ok=True) out_df.to_parquet(file_signal := out_path.joinpath('photometry.signal.pqt')) diff --git a/requirements.txt b/requirements.txt index 815c73b6a..614008684 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,3 +33,4 @@ psychofit slidingRP>=1.1.1 # steinmetz lab refractory period metrics pyqt5 ibl-style +ibl-photometry \ No newline at end of file From bff546991d957993a5cef2aa88fea8b9f0cebaad Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 17 Jan 2025 10:37:47 +0000 Subject: [PATCH 02/80] fibers are now named fiber_{brain_region} in the extraction process --- ibllib/pipes/neurophotometrics.py | 54 ++++++++++++++++++------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index b55d9d94a..5e5628582 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -9,6 +9,7 @@ from ibllib.pipes import base_tasks from iblutil.io import jsonable import iblphotometry.io as fpio + _logger = logging.getLogger('ibllib') @@ -18,8 +19,7 @@ class FibrePhotometrySync(base_tasks.DynamicTask): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.device_collection = self.get_device_collection( - 'neurophotometrics', device_collection='raw_photometry_data') + self.device_collection = self.get_device_collection('neurophotometrics', device_collection='raw_photometry_data') # we will work with the first protocol here for task in self.session_params['tasks']: self.task_protocol = next(k for k in task) @@ -29,12 +29,16 @@ def __init__(self, *args, **kwargs): @property def signature(self): signature = { - 'input_files': [('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True), - ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), - ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True), - ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True)], - 'output_files': [('photometry.signal.pqt', 'alf/photometry', True), - ('photometryROI.locations.pqt', 'alf/photometry', True)] + 'input_files': [ + ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True), + ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), + ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True), + ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True), + ], + 'output_files': [ + ('photometry.signal.pqt', 'alf/photometry', True), + ('photometryROI.locations.pqt', 'alf/photometry', True), + ], } return signature @@ -57,21 +61,26 @@ def _sync_bpod_neurophotometrics(self): # we get the timestamps of the states from the bpod data tbpod = [] for sname in sync_states_names: - tbpod.append(np.array( - [bd['States timestamps'][sname][0][0] + bd['Trial start timestamp'] for bd in bpod_data if - sname in bd['States timestamps']])) + tbpod.append( + np.array( + [ + bd['States timestamps'][sname][0][0] + bd['Trial start timestamp'] + for bd in bpod_data + if sname in bd['States timestamps'] + ] + ) + ) tbpod = np.sort(np.concatenate(tbpod)) tbpod = tbpod[~np.isnan(tbpod)] # we get the timestamps for the photometry data - tph = df_digital_inputs['SystemTimestamp'].values[df_digital_inputs['Channel'] == self.kwargs['sync_channel']] + sync_channel = self.session_params['neurophotometrics']['sync_channel'] + tph = df_digital_inputs['SystemTimestamp'].values[df_digital_inputs['Channel'] == sync_channel] tph = tph[15:] # TODO: we may want to detect the spacers before removing it, especially for successive sessions # sync the behaviour events to the photometry timestamps - fcn_nph_to_bpod_times, drift_ppm, iph, ibpod = ibldsp.utils.sync_timestamps( - tph, tbpod, return_indices=True, linear=True) + fcn_nph_to_bpod_times, drift_ppm, iph, ibpod = ibldsp.utils.sync_timestamps(tph, tbpod, return_indices=True, linear=True) # then we check the alignment, should be less than the screen refresh rate tcheck = fcn_nph_to_bpod_times(tph[iph]) - tbpod[ibpod] - _logger.info( - f'sync: n trials {len(bpod_data)}, n bpod sync {len(tbpod)}, n photometry {len(tph)}, n match {len(iph)}') + _logger.info(f'sync: n trials {len(bpod_data)}, n bpod sync {len(tbpod)}, n photometry {len(tph)}, n match {len(iph)}') assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' assert len(iph) / len(tbpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched' valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] @@ -93,16 +102,17 @@ def _run(self, **kwargs): fcn_nph_to_bpod_times, valid_bounds = self._sync_bpod_neurophotometrics() case _: raise NotImplementedError('Syncing with daq is not supported yet.') - + # 2) reformat the raw data with wavelengths and meta-data - folder_raw_photometry = self.session_path.joinpath(self.device_collection) - out_df = fpio.from_raw_neurophotometrics_file(folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt')) + folder_raw_photometry = self.session_path.joinpath(self.device_collection) + out_df = fpio.from_raw_neurophotometrics_file_to_ibl_df( + folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt') + ) # 3) label the brain regions rois = [] - c = 0 - for k, v in self.kwargs['fibers'].items(): - rois.append({'ROI': k, 'fiber': f'fiber{c:02d}', 'brain_region': v['location']}) + for k, v in self.session_params['neurophotometrics']['fibers'].items(): + rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']}) df_rois = pd.DataFrame(rois).set_index('ROI') # 4) to finish we write the dataframes to disk From 57f7164ff4b83ced7cc715c6fbcc1cd1bbd0f090 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 24 Feb 2025 15:34:44 +0000 Subject: [PATCH 03/80] fixing shifted sync timestamps in the extraction --- ibllib/pipes/dynamic_pipeline.py | 2 -- ibllib/pipes/neurophotometrics.py | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index 8a3a584f7..814f0888a 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -582,8 +582,6 @@ def make_pipeline(session_path, **pkwargs): **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]) if 'neurophotometrics' in devices: - # {'collection': 'raw_photometry_data', 'datetime': '2024-09-18T16:43:55.207000', - # 'fibers': {'G0': {'location': 'NBM'}, 'G1': {'location': 'SI'}}, 'sync_channel': 1} photometry_kwargs = devices['neurophotometrics'] tasks['FibrePhotometrySync'] = type('FibrePhotometrySync', ( ptasks.FibrePhotometrySync,), {})(**kwargs, **photometry_kwargs) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 5e5628582..46aa28932 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -64,7 +64,7 @@ def _sync_bpod_neurophotometrics(self): tbpod.append( np.array( [ - bd['States timestamps'][sname][0][0] + bd['Trial start timestamp'] + bd['States timestamps'][sname][0][0] + bd['Trial start timestamp'] - bpod_data[0]['Bpod start timestamp'] for bd in bpod_data if sname in bd['States timestamps'] ] @@ -108,6 +108,7 @@ def _run(self, **kwargs): out_df = fpio.from_raw_neurophotometrics_file_to_ibl_df( folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt') ) + out_df['times'] = fcn_nph_to_bpod_times(out_df['times']) # 3) label the brain regions rois = [] From b60629017b1efb470d090e70bfeede53ae24cda6 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 24 Feb 2025 16:28:04 +0000 Subject: [PATCH 04/80] reading digital inputs file via iblphotometry.io (validated) --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 46aa28932..34c918fde 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -48,7 +48,7 @@ def _sync_bpod_neurophotometrics(self): :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps """ folder_raw_photometry = self.session_path.joinpath(self.device_collection) - df_digital_inputs = pd.read_parquet(folder_raw_photometry.joinpath('_neurophotometrics_fpData.digitalIntputs.pqt')) + df_digital_inputs = fpio.read_digital_inputs_csv(folder_raw_photometry / '_neurophotometrics_fpData.digitalIntputs.pqt') # normally we should disregard the states and use the sync label. But bpod doesn't log TTL outs, # only the states. This will change in the future but for now we are stuck with this. if 'habituation' in self.task_protocol: From 8ae4039a2332c0c01ee99b1b4921707b11f9f014 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Tue, 4 Mar 2025 12:11:07 +0000 Subject: [PATCH 05/80] fix for reextraction (.pqt file read instead of .csv) for digital_inputs from neurophotometrics --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 34c918fde..22288f54d 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -48,7 +48,7 @@ def _sync_bpod_neurophotometrics(self): :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps """ folder_raw_photometry = self.session_path.joinpath(self.device_collection) - df_digital_inputs = fpio.read_digital_inputs_csv(folder_raw_photometry / '_neurophotometrics_fpData.digitalIntputs.pqt') + df_digital_inputs = fpio.read_digital_inputs_file(folder_raw_photometry / '_neurophotometrics_fpData.digitalIntputs.pqt') # normally we should disregard the states and use the sync label. But bpod doesn't log TTL outs, # only the states. This will change in the future but for now we are stuck with this. if 'habituation' in self.task_protocol: From 3fd76236ed8ffa8466dfb71e191459bc06f202ea Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Thu, 6 Mar 2025 15:17:39 +0000 Subject: [PATCH 06/80] bugfix in the extractor after fix in the experiment description file --- ibllib/pipes/neurophotometrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 22288f54d..13f967685 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -73,7 +73,7 @@ def _sync_bpod_neurophotometrics(self): tbpod = np.sort(np.concatenate(tbpod)) tbpod = tbpod[~np.isnan(tbpod)] # we get the timestamps for the photometry data - sync_channel = self.session_params['neurophotometrics']['sync_channel'] + sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel'] tph = df_digital_inputs['SystemTimestamp'].values[df_digital_inputs['Channel'] == sync_channel] tph = tph[15:] # TODO: we may want to detect the spacers before removing it, especially for successive sessions # sync the behaviour events to the photometry timestamps @@ -112,7 +112,7 @@ def _run(self, **kwargs): # 3) label the brain regions rois = [] - for k, v in self.session_params['neurophotometrics']['fibers'].items(): + for k, v in self.session_params['devices']['neurophotometrics']['fibers'].items(): rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']}) df_rois = pd.DataFrame(rois).set_index('ROI') From 07efbd0b00bf411746a30e6216749e443dd2f100 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 30 Apr 2025 12:52:03 +0100 Subject: [PATCH 07/80] WIP changes for daq sync --- brainbox/io/one.py | 4 +- ibllib/pipes/dynamic_pipeline.py | 176 ++++++++++------- ibllib/pipes/neurophotometrics.py | 311 ++++++++++++++++++++++-------- 3 files changed, 336 insertions(+), 155 deletions(-) diff --git a/brainbox/io/one.py b/brainbox/io/one.py index b65521549..9e4e3ed5b 100644 --- a/brainbox/io/one.py +++ b/brainbox/io/one.py @@ -1198,7 +1198,7 @@ def timesprobe2times(self, values, direction='forward'): elif direction == 'reverse': return self._sync['reverse'](values) / self._sync['fs'] - def samples2times(self, values, direction='forward'): + def samples2times(self, values, direction='forward', band='ap'): """ Converts ephys sample values to session main clock seconds :param values: numpy array of times in seconds or samples to resync @@ -1206,6 +1206,8 @@ def samples2times(self, values, direction='forward'): (seconds main time to samples probe time) :return: """ + if band == 'lf': + values *= 12 self._get_probe_info() return self._sync[direction](values) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index 13a936d5b..9b3840526 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -20,6 +20,7 @@ :class:`ibllib.io.extractors.base.BaseBpodTrialsExtractor` class, and located in either the personal projects repo or in :py:mod:`ibllib.io.extractors.bpod_trials` module. """ + import logging import re from fnmatch import fnmatch @@ -71,7 +72,7 @@ def acquisition_description_legacy_session(session_path, save=False): def get_acquisition_description(protocol): - """" + """ " This is a set of example acquisition descriptions for experiments - choice_world_recording - choice_world_biased @@ -80,7 +81,7 @@ def get_acquisition_description(protocol): - choice_world_passive That are part of the IBL pipeline """ - if 'ephys' in protocol: # canonical ephys + if 'ephys' in protocol: # canonical ephys devices = { 'cameras': { 'right': {'collection': 'raw_video_data', 'sync_label': 'audio'}, @@ -89,38 +90,32 @@ def get_acquisition_description(protocol): }, 'neuropixel': { 'probe00': {'collection': 'raw_ephys_data/probe00', 'sync_label': 'imec_sync'}, - 'probe01': {'collection': 'raw_ephys_data/probe01', 'sync_label': 'imec_sync'} - }, - 'microphone': { - 'microphone': {'collection': 'raw_behavior_data', 'sync_label': None} + 'probe01': {'collection': 'raw_ephys_data/probe01', 'sync_label': 'imec_sync'}, }, + 'microphone': {'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}}, } acquisition_description = { # this is the current ephys pipeline description 'devices': devices, 'tasks': [ {'ephysChoiceWorld': {'collection': 'raw_behavior_data', 'sync_label': 'bpod'}}, - {'passiveChoiceWorld': {'collection': 'raw_passive_data', 'sync_label': 'bpod'}} + {'passiveChoiceWorld': {'collection': 'raw_passive_data', 'sync_label': 'bpod'}}, ], - 'sync': { - 'nidq': {'collection': 'raw_ephys_data', 'extension': 'bin', 'acquisition_software': 'spikeglx'} - }, + 'sync': {'nidq': {'collection': 'raw_ephys_data', 'extension': 'bin', 'acquisition_software': 'spikeglx'}}, 'procedures': ['Ephys recording with acute probe(s)'], - 'projects': ['ibl_neuropixel_brainwide_01'] + 'projects': ['ibl_neuropixel_brainwide_01'], } else: devices = { 'cameras': { 'left': {'collection': 'raw_video_data', 'sync_label': 'audio'}, }, - 'microphone': { - 'microphone': {'collection': 'raw_behavior_data', 'sync_label': None} - }, + 'microphone': {'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}}, } acquisition_description = { # this is the current ephys pipeline description 'devices': devices, 'sync': {'bpod': {'collection': 'raw_behavior_data'}}, 'procedures': ['Behavior training/tasks'], - 'projects': ['ibl_neuropixel_brainwide_01'] + 'projects': ['ibl_neuropixel_brainwide_01'], } if 'biased' in protocol: key = 'biasedChoiceWorld' @@ -130,10 +125,7 @@ def get_acquisition_description(protocol): key = 'habituationChoiceWorld' else: raise ValueError(f'Unknown protocol "{protocol}"') - acquisition_description['tasks'] = [{key: { - 'collection': 'raw_behavior_data', - 'sync_label': 'bpod' - }}] + acquisition_description['tasks'] = [{key: {'collection': 'raw_behavior_data', 'sync_label': 'bpod'}}] acquisition_description['version'] = '1.0.0' return acquisition_description @@ -224,7 +216,7 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non kwargs = {'session_path': session_path, 'one': one} # Syncing tasks - (sync, sync_args), = acquisition_description['sync'].items() + ((sync, sync_args),) = acquisition_description['sync'].items() sync_label = _sync_label(sync, **sync_args) # get the format of the DAQ data. This informs the extractor task sync_args['sync_collection'] = sync_args.pop('collection') # rename the key so it matches task run arguments sync_args['sync_ext'] = sync_args.pop('extension', None) @@ -268,15 +260,16 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non else: # lookup in the project extraction repo if we find an extractor class import projects.extraction_tasks + if hasattr(projects.extraction_tasks, extractor): task = getattr(projects.extraction_tasks, extractor) elif hasattr(projects.extraction_tasks, extractor + sync_label.capitalize()): task = getattr(btasks, extractor + sync_label.capitalize()) else: raise NotImplementedError( - f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects') - _logger.debug('%s (protocol #%i, task #%i) = %s.%s', - protocol, i, j, task.__module__, task.__name__) + f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects' + ) + _logger.debug('%s (protocol #%i, task #%i) = %s.%s', protocol, i, j, task.__module__, task.__name__) # Rename the class to something more informative task_name = f'{task.__name__}_{i:02}' if not (task.__name__.startswith('TrainingStatus') or task.__name__.endswith('RegisterRaw')): @@ -314,13 +307,16 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non raise NotImplementedError(f'No trials task available for sync namespace "{sync_label}"') compute_status = True tasks[f'RegisterRaw_{protocol}_{i:02}'] = type(f'RegisterRaw_{protocol}_{i:02}', (registration_class,), {})( - **kwargs, **task_kwargs) + **kwargs, **task_kwargs + ) parents = [tasks[f'RegisterRaw_{protocol}_{i:02}']] + sync_tasks tasks[f'Trials_{protocol}_{i:02}'] = type(f'Trials_{protocol}_{i:02}', (behaviour_class,), {})( - **kwargs, **sync_kwargs, **task_kwargs, parents=parents) + **kwargs, **sync_kwargs, **task_kwargs, parents=parents + ) if compute_status: - tasks[f'TrainingStatus_{protocol}_{i:02}'] = type(f'TrainingStatus_{protocol}_{i:02}', ( - btasks.TrainingStatus,), {})(**kwargs, **task_kwargs, parents=[tasks[f'Trials_{protocol}_{i:02}']]) + tasks[f'TrainingStatus_{protocol}_{i:02}'] = type( + f'TrainingStatus_{protocol}_{i:02}', (btasks.TrainingStatus,), {} + )(**kwargs, **task_kwargs, parents=[tasks[f'Trials_{protocol}_{i:02}']]) return tasks @@ -411,11 +407,12 @@ def make_pipeline(session_path, **pkwargs): kwargs = {'session_path': session_path, 'one': pkwargs.get('one')} # Registers the experiment description file - tasks['ExperimentDescriptionRegisterRaw'] = type('ExperimentDescriptionRegisterRaw', - (bstasks.ExperimentDescriptionRegisterRaw,), {})(**kwargs) + tasks['ExperimentDescriptionRegisterRaw'] = type( + 'ExperimentDescriptionRegisterRaw', (bstasks.ExperimentDescriptionRegisterRaw,), {} + )(**kwargs) # Syncing tasks - (sync, sync_args), = acquisition_description['sync'].items() + ((sync, sync_args),) = acquisition_description['sync'].items() sync_args = sync_args.copy() # ensure acquisition_description unchanged sync_label = _sync_label(sync, **sync_args) # get the format of the DAQ data. This informs the extractor task sync_args['sync_collection'] = sync_args.pop('collection') # rename the key so it matches task run arguments @@ -426,14 +423,16 @@ def make_pipeline(session_path, **pkwargs): if sync_label == 'nidq' and sync_args['sync_collection'] == 'raw_ephys_data': tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (etasks.EphysSyncRegisterRaw,), {})(**kwargs, **sync_kwargs) tasks[f'SyncPulses_{sync}'] = type(f'SyncPulses_{sync}', (etasks.EphysSyncPulses,), {})( - **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']]) + **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']] + ) sync_tasks = [tasks[f'SyncPulses_{sync}']] elif sync_label == 'timeline': tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncRegisterRaw,), {})(**kwargs, **sync_kwargs) elif sync_label == 'nidq': tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncMtscomp,), {})(**kwargs, **sync_kwargs) tasks[f'SyncPulses_{sync}'] = type(f'SyncPulses_{sync}', (stasks.SyncPulses,), {})( - **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']]) + **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']] + ) sync_tasks = [tasks[f'SyncPulses_{sync}']] elif sync_label == 'tdms': tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncRegisterRaw,), {})(**kwargs, **sync_kwargs) @@ -441,9 +440,7 @@ def make_pipeline(session_path, **pkwargs): pass # ATM we don't have anything for this; it may not be needed in the future # Behavior tasks - tasks.update( - _get_trials_tasks(session_path, acquisition_description, sync_tasks=sync_tasks, one=pkwargs.get('one')) - ) + tasks.update(_get_trials_tasks(session_path, acquisition_description, sync_tasks=sync_tasks, one=pkwargs.get('one'))) # Ephys tasks if 'neuropixel' in devices: @@ -463,38 +460,46 @@ def make_pipeline(session_path, **pkwargs): if (nptype == 'NP2.1') or (nptype == 'NP2.4' and nshanks == 1): tasks[f'EphyCompressNP21_{pname}'] = type(f'EphyCompressNP21_{pname}', (etasks.EphysCompressNP21,), {})( - **kwargs, **ephys_kwargs, pname=pname) + **kwargs, **ephys_kwargs, pname=pname + ) all_probes.append(pname) register_tasks.append(tasks[f'EphyCompressNP21_{pname}']) elif nptype == 'NP2.4' and nshanks > 1: tasks[f'EphyCompressNP24_{pname}'] = type(f'EphyCompressNP24_{pname}', (etasks.EphysCompressNP24,), {})( - **kwargs, **ephys_kwargs, pname=pname, nshanks=nshanks) + **kwargs, **ephys_kwargs, pname=pname, nshanks=nshanks + ) register_tasks.append(tasks[f'EphyCompressNP24_{pname}']) all_probes += [f'{pname}{chr(97 + int(shank))}' for shank in range(nshanks)] else: tasks[f'EphysCompressNP1_{pname}'] = type(f'EphyCompressNP1_{pname}', (etasks.EphysCompressNP1,), {})( - **kwargs, **ephys_kwargs, pname=pname) + **kwargs, **ephys_kwargs, pname=pname + ) register_tasks.append(tasks[f'EphysCompressNP1_{pname}']) all_probes.append(pname) if nptype == '3A': tasks['EphysPulses'] = type('EphysPulses', (etasks.EphysPulses,), {})( - **kwargs, **ephys_kwargs, **sync_kwargs, pname=all_probes, parents=register_tasks + sync_tasks) + **kwargs, **ephys_kwargs, **sync_kwargs, pname=all_probes, parents=register_tasks + sync_tasks + ) for pname in all_probes: register_task = [reg_task for reg_task in register_tasks if pname[:7] in reg_task.name] if nptype != '3A': tasks[f'EphysPulses_{pname}'] = type(f'EphysPulses_{pname}', (etasks.EphysPulses,), {})( - **kwargs, **ephys_kwargs, **sync_kwargs, pname=[pname], parents=register_task + sync_tasks) + **kwargs, **ephys_kwargs, **sync_kwargs, pname=[pname], parents=register_task + sync_tasks + ) tasks[f'Spikesorting_{pname}'] = type(f'Spikesorting_{pname}', (etasks.SpikeSorting,), {})( - **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'EphysPulses_{pname}']]) + **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'EphysPulses_{pname}']] + ) else: tasks[f'Spikesorting_{pname}'] = type(f'Spikesorting_{pname}', (etasks.SpikeSorting,), {})( - **kwargs, **ephys_kwargs, pname=pname, parents=[tasks['EphysPulses']]) + **kwargs, **ephys_kwargs, pname=pname, parents=[tasks['EphysPulses']] + ) tasks[f'RawEphysQC_{pname}'] = type(f'RawEphysQC_{pname}', (etasks.RawEphysQC,), {})( - **kwargs, **ephys_kwargs, pname=pname, parents=register_task) + **kwargs, **ephys_kwargs, pname=pname, parents=register_task + ) # Video tasks if 'cameras' in devices: @@ -508,35 +513,33 @@ def make_pipeline(session_path, **pkwargs): tasks[tn] = type((tn := 'VideoConvert'), (vtasks.VideoConvert,), {})(**kwargs, **video_kwargs) dlc_parent_task = tasks['VideoConvert'] tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcCamlog,), {})( - **kwargs, **video_kwargs, **sync_kwargs) + **kwargs, **video_kwargs, **sync_kwargs + ) else: - tasks[tn] = type((tn := 'VideoRegisterRaw'), (vtasks.VideoRegisterRaw,), {})( - **kwargs, **video_kwargs) - tasks[tn] = type((tn := 'VideoCompress'), (vtasks.VideoCompress,), {})( - **kwargs, **video_kwargs, **sync_kwargs) + tasks[tn] = type((tn := 'VideoRegisterRaw'), (vtasks.VideoRegisterRaw,), {})(**kwargs, **video_kwargs) + tasks[tn] = type((tn := 'VideoCompress'), (vtasks.VideoCompress,), {})(**kwargs, **video_kwargs, **sync_kwargs) dlc_parent_task = tasks['VideoCompress'] if sync == 'bpod': tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcBpod,), {})( - **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']]) + **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + ) elif sync == 'nidq': # Here we restrict to videos that we support (left, right or body) video_kwargs['cameras'] = subset_cams tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcNidq,), {})( - **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + sync_tasks) + **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + sync_tasks + ) if sync_kwargs['sync'] != 'bpod': # Here we restrict to videos that we support (left, right or body) # Currently there is no plan to run DLC on the belly cam subset_cams = [c for c in cams if c in ('left', 'right', 'body')] video_kwargs['cameras'] = subset_cams - tasks[tn] = type((tn := 'DLC'), (vtasks.DLC,), {})( - **kwargs, **video_kwargs, parents=[dlc_parent_task]) + tasks[tn] = type((tn := 'DLC'), (vtasks.DLC,), {})(**kwargs, **video_kwargs, parents=[dlc_parent_task]) # The PostDLC plots require a trials object for QC # Find the first task that outputs a trials.table dataset - trials_task = ( - t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', [])) - ) + trials_task = (t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', []))) if trials_task := next(trials_task, None): parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}'], trials_task] trials_collection = getattr(trials_task, 'output_collection', 'alf') @@ -544,53 +547,78 @@ def make_pipeline(session_path, **pkwargs): parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}']] trials_collection = 'alf' tasks[tn] = type((tn := 'PostDLC'), (vtasks.EphysPostDLC,), {})( - **kwargs, cameras=subset_cams, trials_collection=trials_collection, parents=parents) + **kwargs, cameras=subset_cams, trials_collection=trials_collection, parents=parents + ) # Audio tasks if 'microphone' in devices: - (microphone, micro_kwargs), = devices['microphone'].items() + ((microphone, micro_kwargs),) = devices['microphone'].items() micro_kwargs['device_collection'] = micro_kwargs.pop('collection') if sync_kwargs['sync'] == 'bpod': tasks['AudioRegisterRaw'] = type('AudioRegisterRaw', (atasks.AudioSync,), {})( - **kwargs, **sync_kwargs, **micro_kwargs, collection=micro_kwargs['device_collection']) + **kwargs, **sync_kwargs, **micro_kwargs, collection=micro_kwargs['device_collection'] + ) elif sync_kwargs['sync'] == 'nidq': tasks['AudioRegisterRaw'] = type('AudioRegisterRaw', (atasks.AudioCompress,), {})(**kwargs, **micro_kwargs) # Widefield tasks if 'widefield' in devices: - (_, wfield_kwargs), = devices['widefield'].items() + ((_, wfield_kwargs),) = devices['widefield'].items() wfield_kwargs['device_collection'] = wfield_kwargs.pop('collection') tasks['WideFieldRegisterRaw'] = type('WidefieldRegisterRaw', (wtasks.WidefieldRegisterRaw,), {})( - **kwargs, **wfield_kwargs) + **kwargs, **wfield_kwargs + ) tasks['WidefieldCompress'] = type('WidefieldCompress', (wtasks.WidefieldCompress,), {})( - **kwargs, **wfield_kwargs, parents=[tasks['WideFieldRegisterRaw']]) + **kwargs, **wfield_kwargs, parents=[tasks['WideFieldRegisterRaw']] + ) tasks['WidefieldPreprocess'] = type('WidefieldPreprocess', (wtasks.WidefieldPreprocess,), {})( - **kwargs, **wfield_kwargs, parents=[tasks['WidefieldCompress']]) + **kwargs, **wfield_kwargs, parents=[tasks['WidefieldCompress']] + ) tasks['WidefieldSync'] = type('WidefieldSync', (wtasks.WidefieldSync,), {})( - **kwargs, **wfield_kwargs, **sync_kwargs, - parents=[tasks['WideFieldRegisterRaw'], tasks['WidefieldCompress']] + sync_tasks) + **kwargs, + **wfield_kwargs, + **sync_kwargs, + parents=[tasks['WideFieldRegisterRaw'], tasks['WidefieldCompress']] + sync_tasks, + ) tasks['WidefieldFOV'] = type('WidefieldFOV', (wtasks.WidefieldFOV,), {})( - **kwargs, **wfield_kwargs, parents=[tasks['WidefieldPreprocess']]) + **kwargs, **wfield_kwargs, parents=[tasks['WidefieldPreprocess']] + ) # Mesoscope tasks if 'mesoscope' in devices: - (_, mscope_kwargs), = devices['mesoscope'].items() + ((_, mscope_kwargs),) = devices['mesoscope'].items() mscope_kwargs['device_collection'] = mscope_kwargs.pop('collection') tasks['MesoscopeRegisterSnapshots'] = type('MesoscopeRegisterSnapshots', (mscope_tasks.MesoscopeRegisterSnapshots,), {})( - **kwargs, **mscope_kwargs) + **kwargs, **mscope_kwargs + ) tasks['MesoscopePreprocess'] = type('MesoscopePreprocess', (mscope_tasks.MesoscopePreprocess,), {})( - **kwargs, **mscope_kwargs) + **kwargs, **mscope_kwargs + ) tasks['MesoscopeFOV'] = type('MesoscopeFOV', (mscope_tasks.MesoscopeFOV,), {})( - **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]) + **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']] + ) tasks['MesoscopeSync'] = type('MesoscopeSync', (mscope_tasks.MesoscopeSync,), {})( - **kwargs, **mscope_kwargs, **sync_kwargs) + **kwargs, **mscope_kwargs, **sync_kwargs + ) tasks['MesoscopeCompress'] = type('MesoscopeCompress', (mscope_tasks.MesoscopeCompress,), {})( - **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]) + **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']] + ) if 'neurophotometrics' in devices: - photometry_kwargs = devices['neurophotometrics'] - tasks['FibrePhotometrySync'] = type('FibrePhotometrySync', ( - ptasks.FibrePhotometrySync,), {})(**kwargs, **photometry_kwargs) + # note devices['neurophotometrics'] is the acquisition_description + sync_mode = devices['neurophotometrics'].get('sync_mode', 'bpod') # default to bpod for downward compatibility + match sync_mode: + case 'bpod': + tasks['FibrePhotometryBpodSync'] = type('FibrePhotometryBpodSync', (ptasks.FibrePhotometryBpodSync,), {})( + **devices['neurophotometrics'], + **kwargs, + ) + case 'daqami': + tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})( + **devices['neurophotometrics'], + **acquisition_description['sync'], + **kwargs, + ) p = mtasks.Pipeline(session_path=session_path, **pkwargs) p.tasks = tasks diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 13f967685..e40ba5ae8 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -10,115 +10,266 @@ from iblutil.io import jsonable import iblphotometry.io as fpio +from abc import abstractmethod +import iblphotometry + _logger = logging.getLogger('ibllib') +""" +Neurophotometrics FP3002 specific information. +The light source map refers to the available LEDs on the system. +The flags refers to the byte encoding of led states in the system. +""" + +LIGHT_SOURCE_MAP = { + 'color': ['None', 'Violet', 'Blue', 'Green'], + 'wavelength': [0, 415, 470, 560], + 'name': ['None', 'Isosbestic', 'GCaMP', 'RCaMP'], +} + +LED_STATES = { + 'Condition': { + 0: 'No additional signal', + 1: 'Output 1 signal HIGH', + 2: 'Output 0 signal HIGH', + 3: 'Stimulation ON', + 4: 'GPIO Line 2 HIGH', + 5: 'GPIO Line 3 HIGH', + 6: 'Input 1 HIGH', + 7: 'Input 0 HIGH', + 8: 'Output 0 signal HIGH + Stimulation', + 9: 'Output 0 signal HIGH + Input 0 signal HIGH', + 10: 'Input 0 signal HIGH + Stimulation', + 11: 'Output 0 HIGH + Input 0 HIGH + Stimulation', + }, + 'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560}, + 'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561}, + 'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562}, + 'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564}, +} + + +# def _channel_meta(light_source_map=None): +# """ +# Return table of light source wavelengths and corresponding colour labels. + +# Parameters +# ---------- +# light_source_map : dict +# An optional map of light source wavelengths (nm) used and their corresponding colour name. + +# Returns +# ------- +# pandas.DataFrame +# A sorted table of wavelength and colour name. +# """ +# light_source_map = light_source_map or LIGHT_SOURCE_MAP +# meta = pd.DataFrame.from_dict(light_source_map) +# meta.index.rename('channel_id', inplace=True) +# return meta -class FibrePhotometrySync(base_tasks.DynamicTask): + +class FibrePhotometryBaseSync(base_tasks.DynamicTask): priority = 90 job_size = 'small' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.device_collection = self.get_device_collection('neurophotometrics', device_collection='raw_photometry_data') + self.kwargs = kwargs + # we will work with the first protocol here for task in self.session_params['tasks']: self.task_protocol = next(k for k in task) self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol) break - @property - def signature(self): - signature = { - 'input_files': [ - ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True), - ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), - ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True), - ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True), - ], - 'output_files': [ - ('photometry.signal.pqt', 'alf/photometry', True), - ('photometryROI.locations.pqt', 'alf/photometry', True), - ], - } - return signature - - def _sync_bpod_neurophotometrics(self): - """ - Perform the linear clock correction between bpod and neurophotometrics timestamps. - :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps - """ - folder_raw_photometry = self.session_path.joinpath(self.device_collection) - df_digital_inputs = fpio.read_digital_inputs_file(folder_raw_photometry / '_neurophotometrics_fpData.digitalIntputs.pqt') - # normally we should disregard the states and use the sync label. But bpod doesn't log TTL outs, - # only the states. This will change in the future but for now we are stuck with this. + def _get_bpod_timestamps(self): if 'habituation' in self.task_protocol: sync_states_names = ['iti', 'reward'] else: sync_states_names = ['trial_start', 'reward', 'exit_state'] + # read in the raw behaviour data for syncing file_jsonable = self.session_path.joinpath(self.task_collection, '_iblrig_taskData.raw.jsonable') - trials_table, bpod_data = jsonable.load_task_jsonable(file_jsonable) + _, bpod_data = jsonable.load_task_jsonable(file_jsonable) + # we get the timestamps of the states from the bpod data - tbpod = [] - for sname in sync_states_names: - tbpod.append( + timestamps_bpod = [] + for sync_name in sync_states_names: + timestamps_bpod.append( np.array( [ - bd['States timestamps'][sname][0][0] + bd['Trial start timestamp'] - bpod_data[0]['Bpod start timestamp'] - for bd in bpod_data - if sname in bd['States timestamps'] + data['States timestamps'][sync_name][0][0] + data['Trial start timestamp'] + for data in bpod_data + if sync_name in data['States timestamps'] ] ) ) - tbpod = np.sort(np.concatenate(tbpod)) - tbpod = tbpod[~np.isnan(tbpod)] - # we get the timestamps for the photometry data - sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel'] - tph = df_digital_inputs['SystemTimestamp'].values[df_digital_inputs['Channel'] == sync_channel] - tph = tph[15:] # TODO: we may want to detect the spacers before removing it, especially for successive sessions + timestamps_bpod = np.sort(np.concatenate(timestamps_bpod)) + timestamps_bpod = timestamps_bpod[~np.isnan(timestamps_bpod)] + return timestamps_bpod, bpod_data + + @abstractmethod + def _get_neurophotometrics_timestamps(self): ... + + def _get_sync_function(self): + """ + Perform the linear clock correction between bpod and neurophotometrics timestamps. + :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps + """ + + # get the timestamps + timestamps_bpod, bpod_data = self._get_bpod_timestamps(self.task_protocol) + timestamps_nph = self._get_neurophotometrics_timestamps() + # sync the behaviour events to the photometry timestamps - fcn_nph_to_bpod_times, drift_ppm, iph, ibpod = ibldsp.utils.sync_timestamps(tph, tbpod, return_indices=True, linear=True) - # then we check the alignment, should be less than the screen refresh rate - tcheck = fcn_nph_to_bpod_times(tph[iph]) - tbpod[ibpod] - _logger.info(f'sync: n trials {len(bpod_data)}, n bpod sync {len(tbpod)}, n photometry {len(tph)}, n match {len(iph)}') + sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( + timestamps_nph, timestamps_bpod, return_indices=True, linear=True + ) + # TODO log drift + + # then we check the alignment, should be less than the camera sampling rate + tcheck = sync_nph_to_bpod_fcn(timestamps_nph[ix_nph]) - timestamps_bpod[ix_bpod] + _logger.info( + f'sync: n trials {len(bpod_data)}, n bpod sync {len(timestamps_bpod)}, n photometry {len(timestamps_nph)}, n match {len(ix_nph)}' + ) + # FIXME the framerate here is hardcoded, infer it instead! assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' - assert len(iph) / len(tbpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched' + assert len(ix_nph) / len(timestamps_bpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched' valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] - return fcn_nph_to_bpod_times, valid_bounds - def _run(self, **kwargs): - """ - Extract photometry data from the raw neurophotometrics data in parquet - The extraction has 3 main steps: - 1. Synchronise the bpod and neurophotometrics timestamps. - 2. Extract the photometry data from the raw neurophotometrics data. - 3. Label the fibers correspondance with brain regions in a small table - :param kwargs: - :return: - """ - # 1) sync: we check the synchronisation, right now we only have bpod but soon the daq will be used - match list(self.session_params['sync'].keys())[0]: - case 'bpod': - fcn_nph_to_bpod_times, valid_bounds = self._sync_bpod_neurophotometrics() - case _: - raise NotImplementedError('Syncing with daq is not supported yet.') - - # 2) reformat the raw data with wavelengths and meta-data - folder_raw_photometry = self.session_path.joinpath(self.device_collection) - out_df = fpio.from_raw_neurophotometrics_file_to_ibl_df( - folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt') + return sync_nph_to_bpod_fcn, valid_bounds + + def load_data(self): + raw_photometry_folder = self.session_path / self.device_collection + raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt') + ibl_df = iblphotometry.io.from_raw_neurophotometrics_df_to_ibl_df( + raw_neurophotometrics_df, + rois=self.kwargs['fibers'], ) - out_df['times'] = fcn_nph_to_bpod_times(out_df['times']) - - # 3) label the brain regions - rois = [] - for k, v in self.session_params['devices']['neurophotometrics']['fibers'].items(): - rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']}) - df_rois = pd.DataFrame(rois).set_index('ROI') - - # 4) to finish we write the dataframes to disk - out_path = self.session_path.joinpath('alf', 'photometry') - out_path.mkdir(parents=True, exist_ok=True) - out_df.to_parquet(file_signal := out_path.joinpath('photometry.signal.pqt')) - df_rois.to_parquet(file_locations := out_path.joinpath('photometryROI.locations.pqt')) - return file_signal, file_locations + return ibl_df + + def _run(self, **kwargs): + """ """ + # 1) load photometry data + # note: when loading daq based syncing, the SystemTimestamp column + ibl_df = self.load_data() + + # 2) get the synchronization function + sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function() + ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1]) + + # 3) apply synchronization + # for bpod based syncing, we can directly transform the timestamps that are + # stored with the samples + ibl_df['times'] = sync_nph_to_bpod_fcn(ibl_df['SystemTimestamp']) + + # 4) write to disk + output_folder = self.session_path.joinpath('alf', 'photometry') + output_folder.mkdir(parents=True, exist_ok=True) + + # writing the synced photometry signal + ibl_df_outpath = output_folder / 'photometry.signal.pqt' + ibl_df.to_parquet(ibl_df_outpath) + + # writing the locations + rois = list(self.kwargs['fibers'].keys()) + locations_df = pd.DataFrame(rois).set_index('ROI') + locations_df_outpath = output_folder / 'photometryROI.locations.pqt' + locations_df.to_parquet(locations_df_outpath) + return ibl_df, locations_df + + +class FibrePhotometryBpodSync(FibrePhotometryBaseSync): + priority = 90 + job_size = 'small' + + @property + def signature(self): + signature = { + 'input_files': [ + ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True), + ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), + ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True), + ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True), + ], + 'output_files': [ + ('photometry.signal.pqt', 'alf/photometry', True), + ('photometryROI.locations.pqt', 'alf/photometry', True), + ], + } + return signature + + def _get_neurophotometrics_timestamps(self): + # we get the timestamps for the photometry data by loading from the digital inputs file + raw_photometry_folder = self.session_path / self.device_collection + digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt') + timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']] + timestamps_nph = timestamps_nph[ + 15: + ] # TODO: we may want to detect the spacers before removing it, especially for successive sessions + return timestamps_nph + + +class FibrePhotometryDAQSync(FibrePhotometryBaseSync): + """ + DAQ syncing outline + + bpod stores it's own timestamps - "timestamps_bpod" + DAQ receives TTL sync from each bpod - "daq_bpod_sync" + DAQ receives Frame clock from FP3002 - "daq_nph_frameclock" + NPH stores system timestamps at each sample time - "nph_frameclock" + + + + 2 step sync + - NPH time to DAQ time (on the basis of frame clock) m1, b1 = linreg(nph_frameclock, daq_frameclock) + - DAQ time to BPOD time m2, b2 = linreg(daq_bpod_sync, bpod_sync) + + transfrom from NPH to BPOD + m1 * nph_frameclock + b1 + """ + + priority = 90 + job_size = 'small' + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.sync_kwargs = kwargs['sync']['daqami'] + # grab the sync relevant things here + + @property + def signature(self): + signature = { + 'input_files': [ + ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True), + ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), + ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True), + # TODO input here - the sync data fils in the self.sync_collection + ], + 'output_files': [ + ('photometry.signal.pqt', 'alf/photometry', True), + ('photometryROI.locations.pqt', 'alf/photometry', True), + ], + } + return signature + + def load_data(self): + ibl_df = super().load_data() + # load here the daqami timestamps + # and put them in the ibl_df + return ibl_df + + def _get_neurophotometrics_timestamps(self): + # get the sync data + # FIXME replace me with the actual filename + bin_filepath = self.session_path / self.sync_kwargs['collection'] / 'the_sync_file.bin' + + # read bin file + # and extract from it + # daq_nph_frameclock + # daq_bpod_sync + + timestamps_nph = None + return timestamps_nph From 217bd5f090f8ca979b227ab2da837df225d3f498 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 12 May 2025 11:17:19 +0100 Subject: [PATCH 08/80] leftovers --- ibllib/pipes/neurophotometrics.py | 59 ++----------------------------- 1 file changed, 2 insertions(+), 57 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index e40ba5ae8..7bdbc2165 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -1,5 +1,3 @@ -"""Extraction tasks for fibrephotometry""" - import logging import numpy as np import pandas as pd @@ -15,61 +13,10 @@ _logger = logging.getLogger('ibllib') -""" -Neurophotometrics FP3002 specific information. -The light source map refers to the available LEDs on the system. -The flags refers to the byte encoding of led states in the system. -""" - -LIGHT_SOURCE_MAP = { - 'color': ['None', 'Violet', 'Blue', 'Green'], - 'wavelength': [0, 415, 470, 560], - 'name': ['None', 'Isosbestic', 'GCaMP', 'RCaMP'], -} - -LED_STATES = { - 'Condition': { - 0: 'No additional signal', - 1: 'Output 1 signal HIGH', - 2: 'Output 0 signal HIGH', - 3: 'Stimulation ON', - 4: 'GPIO Line 2 HIGH', - 5: 'GPIO Line 3 HIGH', - 6: 'Input 1 HIGH', - 7: 'Input 0 HIGH', - 8: 'Output 0 signal HIGH + Stimulation', - 9: 'Output 0 signal HIGH + Input 0 signal HIGH', - 10: 'Input 0 signal HIGH + Stimulation', - 11: 'Output 0 HIGH + Input 0 HIGH + Stimulation', - }, - 'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560}, - 'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561}, - 'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562}, - 'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564}, -} - - -# def _channel_meta(light_source_map=None): -# """ -# Return table of light source wavelengths and corresponding colour labels. - -# Parameters -# ---------- -# light_source_map : dict -# An optional map of light source wavelengths (nm) used and their corresponding colour name. - -# Returns -# ------- -# pandas.DataFrame -# A sorted table of wavelength and colour name. -# """ -# light_source_map = light_source_map or LIGHT_SOURCE_MAP -# meta = pd.DataFrame.from_dict(light_source_map) -# meta.index.rename('channel_id', inplace=True) -# return meta - class FibrePhotometryBaseSync(base_tasks.DynamicTask): + # base clas for syncing fibre photometry + # derived classes are: FibrePhotometryBpodSync and FibrePhotometryDAQSync priority = 90 job_size = 'small' @@ -221,8 +168,6 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync): DAQ receives Frame clock from FP3002 - "daq_nph_frameclock" NPH stores system timestamps at each sample time - "nph_frameclock" - - 2 step sync - NPH time to DAQ time (on the basis of frame clock) m1, b1 = linreg(nph_frameclock, daq_frameclock) - DAQ time to BPOD time m2, b2 = linreg(daq_bpod_sync, bpod_sync) From a933cb7ff8f4aee953811b81f5147a95bfae539b Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Tue, 13 May 2025 17:18:39 +0100 Subject: [PATCH 09/80] 4 rig updates - ready for testing --- ibllib/pipes/dynamic_pipeline.py | 6 ++- ibllib/pipes/neurophotometrics.py | 78 ++++++++++++++++++------------- 2 files changed, 51 insertions(+), 33 deletions(-) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index 9b3840526..e3ec02253 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -605,15 +605,19 @@ def make_pipeline(session_path, **pkwargs): ) if 'neurophotometrics' in devices: - # note devices['neurophotometrics'] is the acquisition_description + # note: devices['neurophotometrics'] is the acquisition_description sync_mode = devices['neurophotometrics'].get('sync_mode', 'bpod') # default to bpod for downward compatibility match sync_mode: case 'bpod': + # for synchronization with the BNC inputs of the neurophotometrics receiving the sync pulses + # from the individual bpods tasks['FibrePhotometryBpodSync'] = type('FibrePhotometryBpodSync', (ptasks.FibrePhotometryBpodSync,), {})( **devices['neurophotometrics'], **kwargs, ) case 'daqami': + # for synchronization with the DAQami receiving the sync pulses from the individual bpods + # as well as the frame clock from the FP3002 tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})( **devices['neurophotometrics'], **acquisition_description['sync'], diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 7bdbc2165..1f52545c4 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -8,6 +8,9 @@ from iblutil.io import jsonable import iblphotometry.io as fpio +from ibldsp.utils import rises +from nptdms import TdmsFile + from abc import abstractmethod import iblphotometry @@ -58,7 +61,11 @@ def _get_bpod_timestamps(self): return timestamps_bpod, bpod_data @abstractmethod - def _get_neurophotometrics_timestamps(self): ... + def _get_neurophotometrics_timestamps(self): + # this function needs to be implemented in the derived classes: + # for bpod based syncing, the timestamps are in the digial inputs file + # for daq based syncing, the timestamps are extracted from the tdms file + ... def _get_sync_function(self): """ @@ -153,36 +160,23 @@ def _get_neurophotometrics_timestamps(self): raw_photometry_folder = self.session_path / self.device_collection digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt') timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']] - timestamps_nph = timestamps_nph[ - 15: - ] # TODO: we may want to detect the spacers before removing it, especially for successive sessions + + # simple spacer removal, TODO replace this with something more robust + # detect spacer / remove spacer methods + timestamps_nph = timestamps_nph[15:] return timestamps_nph class FibrePhotometryDAQSync(FibrePhotometryBaseSync): - """ - DAQ syncing outline - - bpod stores it's own timestamps - "timestamps_bpod" - DAQ receives TTL sync from each bpod - "daq_bpod_sync" - DAQ receives Frame clock from FP3002 - "daq_nph_frameclock" - NPH stores system timestamps at each sample time - "nph_frameclock" - - 2 step sync - - NPH time to DAQ time (on the basis of frame clock) m1, b1 = linreg(nph_frameclock, daq_frameclock) - - DAQ time to BPOD time m2, b2 = linreg(daq_bpod_sync, bpod_sync) - - transfrom from NPH to BPOD - m1 * nph_frameclock + b1 - """ + """ """ priority = 90 job_size = 'small' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.sync_kwargs = kwargs['sync']['daqami'] - # grab the sync relevant things here + self.sync_kwargs = kwargs['daqami'] + self.sync_channel = kwargs['sync_channel'] @property def signature(self): @@ -191,7 +185,7 @@ def signature(self): ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True), ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True), - # TODO input here - the sync data fils in the self.sync_collection + ('_mcc_DAQdata.raw.tdms', self.sync_kwargs['collection'], True, True), ], 'output_files': [ ('photometry.signal.pqt', 'alf/photometry', True), @@ -200,21 +194,41 @@ def signature(self): } return signature + def _load_and_parse_tdms(self): + # loads the tdms file data, and detects the risind edges + # this probably could use some dsp, potentially trend removal + tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' + tdms_df = TdmsFile.read(tdms_filepath).as_dataframe() + tdms_df.columns = [col[-4:-1] for col in tdms_df.columns] # hardcoded renaming + + timestamps = {} + for col in tdms_df.columns: + timestamps[col] = rises(tdms_df[col]) / self.sync_kwargs['sampling_rate'] + + return timestamps + def load_data(self): + # the point of this functions is to overwrite the SystemTimestamp column + # in the ibl_df with the values from the DAQ clock + # then syncing will work the same as for the bpod based syncing + ibl_df = super().load_data() - # load here the daqami timestamps - # and put them in the ibl_df + + self.timestamps = self._load_and_parse_tdms() + frame_timestamps = self.timestamps[f'AI{self.sync_kwargs["frameclock_channel"]}'] + + # and put them in the ibl_df SystemTimestamp column + ibl_df['SystemTimestamp'] = frame_timestamps return ibl_df def _get_neurophotometrics_timestamps(self): - # get the sync data - # FIXME replace me with the actual filename - bin_filepath = self.session_path / self.sync_kwargs['collection'] / 'the_sync_file.bin' + # get the sync channel + sync_colname = f'AI{self.sync_kwargs[""]}' - # read bin file - # and extract from it - # daq_nph_frameclock - # daq_bpod_sync + # and the corresponding timestamps + timestamps_nph = self.timestamps[sync_colname] - timestamps_nph = None + # simple spacer removal, TODO replace this with something more robust + # detect spacer / remove spacer methods + timestamps_nph = timestamps_nph[15:] return timestamps_nph From f6a804d57f07f8c502e46d76d9eb53020d4851f4 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 14 May 2025 13:23:42 +0100 Subject: [PATCH 10/80] ready for testing --- ibllib/pipes/neurophotometrics.py | 55 ++++++++++++++----------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 1f52545c4..d56a1042d 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -1,12 +1,12 @@ import logging import numpy as np import pandas as pd +from typing import Tuple import ibldsp.utils import ibllib.io.session_params from ibllib.pipes import base_tasks from iblutil.io import jsonable -import iblphotometry.io as fpio from ibldsp.utils import rises from nptdms import TdmsFile @@ -25,7 +25,7 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.device_collection = self.get_device_collection('neurophotometrics', device_collection='raw_photometry_data') + self.photometry_collection = kwargs['collection'] # raw_photometry_data self.kwargs = kwargs # we will work with the first protocol here @@ -34,7 +34,8 @@ def __init__(self, *args, **kwargs): self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol) break - def _get_bpod_timestamps(self): + def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]: + # the timestamps for syncing, in the time of the bpod if 'habituation' in self.task_protocol: sync_states_names = ['iti', 'reward'] else: @@ -61,17 +62,14 @@ def _get_bpod_timestamps(self): return timestamps_bpod, bpod_data @abstractmethod - def _get_neurophotometrics_timestamps(self): + def _get_neurophotometrics_timestamps(self) -> np.ndarray: # this function needs to be implemented in the derived classes: # for bpod based syncing, the timestamps are in the digial inputs file # for daq based syncing, the timestamps are extracted from the tdms file ... - def _get_sync_function(self): - """ - Perform the linear clock correction between bpod and neurophotometrics timestamps. - :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps - """ + def _get_sync_function(self) -> Tuple[callable, list]: + # returns the synchronization function # get the timestamps timestamps_bpod, bpod_data = self._get_bpod_timestamps(self.task_protocol) @@ -95,8 +93,9 @@ def _get_sync_function(self): return sync_nph_to_bpod_fcn, valid_bounds - def load_data(self): - raw_photometry_folder = self.session_path / self.device_collection + def load_data(self) -> pd.DataFrame: + # loads the raw photometry data + raw_photometry_folder = self.session_path / self.photometry_collection raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt') ibl_df = iblphotometry.io.from_raw_neurophotometrics_df_to_ibl_df( raw_neurophotometrics_df, @@ -104,10 +103,10 @@ def load_data(self): ) return ibl_df - def _run(self, **kwargs): - """ """ + def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # 1) load photometry data # note: when loading daq based syncing, the SystemTimestamp column + # will be overridden with the timestamps from the tdms file ibl_df = self.load_data() # 2) get the synchronization function @@ -115,8 +114,6 @@ def _run(self, **kwargs): ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1]) # 3) apply synchronization - # for bpod based syncing, we can directly transform the timestamps that are - # stored with the samples ibl_df['times'] = sync_nph_to_bpod_fcn(ibl_df['SystemTimestamp']) # 4) write to disk @@ -143,10 +140,10 @@ class FibrePhotometryBpodSync(FibrePhotometryBaseSync): def signature(self): signature = { 'input_files': [ - ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True), + ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True), ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), - ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True), - ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True), + ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True), + ('_neurophotometrics_fpData.digitalIntputs.pqt', self.photometry_collection, True), ], 'output_files': [ ('photometry.signal.pqt', 'alf/photometry', True), @@ -155,9 +152,9 @@ def signature(self): } return signature - def _get_neurophotometrics_timestamps(self): - # we get the timestamps for the photometry data by loading from the digital inputs file - raw_photometry_folder = self.session_path / self.device_collection + def _get_neurophotometrics_timestamps(self) -> np.ndarray: + # for bpod based syncing, the timestamps for syncing are in the digital inputs file + raw_photometry_folder = self.session_path / self.photometry_collection digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt') timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']] @@ -168,8 +165,6 @@ def _get_neurophotometrics_timestamps(self): class FibrePhotometryDAQSync(FibrePhotometryBaseSync): - """ """ - priority = 90 job_size = 'small' @@ -182,9 +177,9 @@ def __init__(self, *args, **kwargs): def signature(self): signature = { 'input_files': [ - ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True), + ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True), ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), - ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True), + ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True), ('_mcc_DAQdata.raw.tdms', self.sync_kwargs['collection'], True, True), ], 'output_files': [ @@ -194,9 +189,9 @@ def signature(self): } return signature - def _load_and_parse_tdms(self): - # loads the tdms file data, and detects the risind edges - # this probably could use some dsp, potentially trend removal + def _load_and_parse_tdms(self) -> dict: + # loads the tdms file data, and detects the rising edges + # this probably could use some dsp tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' tdms_df = TdmsFile.read(tdms_filepath).as_dataframe() tdms_df.columns = [col[-4:-1] for col in tdms_df.columns] # hardcoded renaming @@ -207,7 +202,7 @@ def _load_and_parse_tdms(self): return timestamps - def load_data(self): + def load_data(self) -> pd.DataFrame: # the point of this functions is to overwrite the SystemTimestamp column # in the ibl_df with the values from the DAQ clock # then syncing will work the same as for the bpod based syncing @@ -221,7 +216,7 @@ def load_data(self): ibl_df['SystemTimestamp'] = frame_timestamps return ibl_df - def _get_neurophotometrics_timestamps(self): + def _get_neurophotometrics_timestamps(self) -> np.ndarray: # get the sync channel sync_colname = f'AI{self.sync_kwargs[""]}' From 60cd99c8bb50a4cd736d6d55b0fbba0b0db27094 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Thu, 15 May 2025 14:33:31 +0100 Subject: [PATCH 11/80] 4 rig - ready for testing round 2 --- ibllib/pipes/dynamic_pipeline.py | 1 - ibllib/pipes/neurophotometrics.py | 89 ++++++++++++++++++------------- 2 files changed, 52 insertions(+), 38 deletions(-) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index e3ec02253..c8e6ea119 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -620,7 +620,6 @@ def make_pipeline(session_path, **pkwargs): # as well as the frame clock from the FP3002 tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})( **devices['neurophotometrics'], - **acquisition_description['sync'], **kwargs, ) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index d56a1042d..86639d8be 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -1,4 +1,5 @@ import logging +from pathlib import Path import numpy as np import pandas as pd from typing import Tuple @@ -8,15 +9,32 @@ from ibllib.pipes import base_tasks from iblutil.io import jsonable -from ibldsp.utils import rises from nptdms import TdmsFile from abc import abstractmethod -import iblphotometry +from iblphotometry import io as fpio _logger = logging.getLogger('ibllib') +def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict: + # loads the tdms file data, and detects the rising edges + timestamps = {} # stores the resulting edge times here + tdms_file = TdmsFile.read(tdms_filepath) + analog_group, digital_group = tdms_file.groups() + fs = analog_group.properties['ScanRate'] + + for channel in analog_group.channels(): + signal = (channel.data > 2.5).astype('int64') + timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs + + for channel in digital_group.channels(): + signal = (channel.data > 0.5).astype('int64') + timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs + + return timestamps + + class FibrePhotometryBaseSync(base_tasks.DynamicTask): # base clas for syncing fibre photometry # derived classes are: FibrePhotometryBpodSync and FibrePhotometryDAQSync @@ -72,7 +90,7 @@ def _get_sync_function(self) -> Tuple[callable, list]: # returns the synchronization function # get the timestamps - timestamps_bpod, bpod_data = self._get_bpod_timestamps(self.task_protocol) + timestamps_bpod, bpod_data = self._get_bpod_timestamps() timestamps_nph = self._get_neurophotometrics_timestamps() # sync the behaviour events to the photometry timestamps @@ -97,24 +115,27 @@ def load_data(self) -> pd.DataFrame: # loads the raw photometry data raw_photometry_folder = self.session_path / self.photometry_collection raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt') - ibl_df = iblphotometry.io.from_raw_neurophotometrics_df_to_ibl_df( - raw_neurophotometrics_df, - rois=self.kwargs['fibers'], - ) - return ibl_df + return raw_neurophotometrics_df + # return ibl_df def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # 1) load photometry data + # note: when loading daq based syncing, the SystemTimestamp column # will be overridden with the timestamps from the tdms file - ibl_df = self.load_data() + # the idea behind this is that the rest of the sync is then the same + # and handled by this base class + raw_df = self.load_data() # 2) get the synchronization function sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function() - ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1]) + + # 3) convert to ibl_df + ibl_df = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df, rois=self.kwargs['fibers'], drop_first=False) # 3) apply synchronization - ibl_df['times'] = sync_nph_to_bpod_fcn(ibl_df['SystemTimestamp']) + ibl_df['times'] = sync_nph_to_bpod_fcn(raw_df['SystemTimestamp']) + ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1]) # 4) write to disk output_folder = self.session_path.joinpath('alf', 'photometry') @@ -125,7 +146,9 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: ibl_df.to_parquet(ibl_df_outpath) # writing the locations - rois = list(self.kwargs['fibers'].keys()) + rois = [] + for k, v in self.kwargs['fibers'].items(): + rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']}) locations_df = pd.DataFrame(rois).set_index('ROI') locations_df_outpath = output_folder / 'photometryROI.locations.pqt' locations_df.to_parquet(locations_df_outpath) @@ -158,8 +181,8 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt') timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']] - # simple spacer removal, TODO replace this with something more robust - # detect spacer / remove spacer methods + # TODO replace this rudimentary spacer removal + # to implement: detect spacer / remove spacer methods timestamps_nph = timestamps_nph[15:] return timestamps_nph @@ -170,7 +193,7 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.sync_kwargs = kwargs['daqami'] + self.sync_kwargs = kwargs['sync_metadata'] self.sync_channel = kwargs['sync_channel'] @property @@ -189,41 +212,33 @@ def signature(self): } return signature - def _load_and_parse_tdms(self) -> dict: - # loads the tdms file data, and detects the rising edges - # this probably could use some dsp - tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' - tdms_df = TdmsFile.read(tdms_filepath).as_dataframe() - tdms_df.columns = [col[-4:-1] for col in tdms_df.columns] # hardcoded renaming - - timestamps = {} - for col in tdms_df.columns: - timestamps[col] = rises(tdms_df[col]) / self.sync_kwargs['sampling_rate'] - - return timestamps - def load_data(self) -> pd.DataFrame: # the point of this functions is to overwrite the SystemTimestamp column # in the ibl_df with the values from the DAQ clock # then syncing will work the same as for the bpod based syncing + raw_df = super().load_data() - ibl_df = super().load_data() - - self.timestamps = self._load_and_parse_tdms() + # get daqami timestamps + tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' + self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) frame_timestamps = self.timestamps[f'AI{self.sync_kwargs["frameclock_channel"]}'] - # and put them in the ibl_df SystemTimestamp column - ibl_df['SystemTimestamp'] = frame_timestamps - return ibl_df + # and put them in the raw_df SystemTimestamp column + if raw_df.shape[0] == frame_timestamps.shape[0]: + raw_df['SystemTimestamp'] = frame_timestamps + elif raw_df.shape[0] == frame_timestamps.shape[0] + 1: + # there is one extra frame timestamp from the last incomplete frame + raw_df['SystemTimestamp'] = frame_timestamps[:-1] + return raw_df def _get_neurophotometrics_timestamps(self) -> np.ndarray: # get the sync channel - sync_colname = f'AI{self.sync_kwargs[""]}' + sync_colname = f'DI{self.kwargs["sync_channel"]}' # and the corresponding timestamps timestamps_nph = self.timestamps[sync_colname] - # simple spacer removal, TODO replace this with something more robust - # detect spacer / remove spacer methods + # TODO replace this rudimentary spacer removal + # to implement: detect spacer / remove spacer methods timestamps_nph = timestamps_nph[15:] return timestamps_nph From fce9ec2972a01c0521cd4a721e78c9179c88e53a Mon Sep 17 00:00:00 2001 From: owinter Date: Wed, 19 Mar 2025 14:34:09 +0000 Subject: [PATCH 12/80] label the DLC task as running in the dlc env --- ibllib/pipes/video_tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py index 407a63465..679fd4885 100644 --- a/ibllib/pipes/video_tasks.py +++ b/ibllib/pipes/video_tasks.py @@ -328,7 +328,7 @@ def _run(self, update=True, **kwargs): class DLC(base_tasks.VideoTask): """ This task relies on a correctly installed dlc environment as per - https://docs.google.com/document/d/1g0scP6_3EmaXCU4SsDNZWwDTaD9MG0es_grLA-d0gh0/edit# + https://github.com/int-brain-lab/iblvideo#installing-dlc-locally-on-an-ibl-server---tensorflow-2120 If your environment is set up otherwise, make sure that you set the respective attributes: t = EphysDLC(session_path) @@ -341,6 +341,7 @@ class DLC(base_tasks.VideoTask): level = 2 force = True job_size = 'large' + env = 'dlc' dlcenv = Path.home().joinpath('Documents', 'PYTHON', 'envs', 'dlcenv', 'bin', 'activate') scripts = Path.home().joinpath('Documents', 'PYTHON', 'iblscripts', 'deploy', 'serverpc', 'dlc') From 1ae68deab829f138c725a7524d780113cf07d920 Mon Sep 17 00:00:00 2001 From: owinter Date: Wed, 19 Mar 2025 15:05:21 +0000 Subject: [PATCH 13/80] the default scratch drive is /scratch --- ibllib/pipes/ephys_tasks.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py index cb9a0099b..fe21f1892 100644 --- a/ibllib/pipes/ephys_tasks.py +++ b/ibllib/pipes/ephys_tasks.py @@ -655,15 +655,7 @@ def scratch_folder_run(self): For a scratch drive at /mnt/h0 we would have the following temp dir: /mnt/h0/iblsorter_1.8.0_CSHL071_2020-10-04_001_probe01/ """ - # get the scratch drive from the shell script - if self.scratch_folder is None: - with open(self.SHELL_SCRIPT) as fid: - lines = fid.readlines() - line = [line for line in lines if line.startswith("SCRATCH_DRIVE=")][0] - m = re.search(r"\=(.*?)(\#|\n)", line)[0] - scratch_drive = Path(m[1:-1].strip()) - else: - scratch_drive = self.scratch_folder + scratch_drive = self.scratch_folder if self.scratch_folder else Path('/scratch') assert scratch_drive.exists(), f"Scratch drive {scratch_drive} not found" # get the version of the sorter self.version = self._fetch_iblsorter_version(self.SORTER_REPOSITORY) From e5d2dfa3f2f22573b57884315e6c1078bd46b088 Mon Sep 17 00:00:00 2001 From: Olivier Winter Date: Fri, 21 Mar 2025 14:20:40 +0000 Subject: [PATCH 14/80] DLC has the option of running within the current env if available --- ibllib/pipes/video_tasks.py | 54 ++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py index 679fd4885..4ec2d0d5a 100644 --- a/ibllib/pipes/video_tasks.py +++ b/ibllib/pipes/video_tasks.py @@ -387,6 +387,38 @@ def _video_intact(file_mp4): cap.release() return intact + def run_dlc(self, file_mp4, cam, overwrite): + try: + from iblvideo import download_weights + from iblvideo.choiceworld import dlc + path_dlc = download_weights() + dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite) + return 0 + except ImportError: + command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}" + _logger.info(command2run) + process = subprocess.Popen( + command2run, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + executable='/bin/bash', + ) + info, error = process.communicate() + # info_str = info.decode("utf-8").strip() + # _logger.info(info_str) + if process.returncode != 0: + error_str = error.decode('utf-8').strip() + _logger.error(f'DLC failed for {cam}Camera.\n\n' + f'++++++++ Output of subprocess for debugging ++++++++\n\n' + f'{error_str}\n' + f'++++++++++++++++++++++++++++++++++++++++++++\n') + return process.returncode + except Exception as e: + _logger.error(f'An error occurred while running DLC for {cam}Camera: {e}') + _logger.error(traceback.format_exc()) + return -1 + def _run(self, cams=None, overwrite=False): # Check that the cams are valid for DLC, remove the ones that aren't candidate_cams = cams or self.cameras @@ -426,27 +458,11 @@ def _run(self, cams=None, overwrite=False): check_nvidia_driver() _logger.info(f'Running DLC on {cam}Camera.') - command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}" - _logger.info(command2run) - process = subprocess.Popen( - command2run, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - executable='/bin/bash', - ) - info, error = process.communicate() - # info_str = info.decode("utf-8").strip() - # _logger.info(info_str) - if process.returncode != 0: - error_str = error.decode('utf-8').strip() - _logger.error(f'DLC failed for {cam}Camera.\n\n' - f'++++++++ Output of subprocess for debugging ++++++++\n\n' - f'{error_str}\n' - f'++++++++++++++++++++++++++++++++++++++++++++\n') + return_code = self._run_dlc(file_mp4, cam, overwrite) + if return_code != 0: self.status = -1 - # We dont' run motion energy, or add any files if dlc failed to run continue + dlc_result = next(self.session_path.joinpath('alf').glob(f'_ibl_{cam}Camera.dlc*.pqt')) actual_outputs.append(dlc_result) From fd99fff19ea4b1e08630303c64b14686d505098d Mon Sep 17 00:00:00 2001 From: Olivier Winter Date: Fri, 21 Mar 2025 15:26:13 +0000 Subject: [PATCH 15/80] check dlc env reserved for subprocess call --- ibllib/pipes/video_tasks.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py index 4ec2d0d5a..9ea55df55 100644 --- a/ibllib/pipes/video_tasks.py +++ b/ibllib/pipes/video_tasks.py @@ -387,14 +387,19 @@ def _video_intact(file_mp4): cap.release() return intact - def run_dlc(self, file_mp4, cam, overwrite): + def _run_dlc(self, file_mp4, cam, overwrite): try: + import iblvideo from iblvideo import download_weights from iblvideo.choiceworld import dlc + self.version = iblvideo.__version__ + _logger.info(f'iblvideo version {self.version}') path_dlc = download_weights() dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite) return 0 except ImportError: + self.version = self._check_dlcenv() + _logger.info(f'iblvideo version {self.version}') command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}" _logger.info(command2run) process = subprocess.Popen( @@ -453,8 +458,6 @@ def _run(self, cams=None, overwrite=False): self.status = -1 continue # Check that dlc environment is ok, shell scripts exists, and get iblvideo version, GPU addressable - self.version = self._check_dlcenv() - _logger.info(f'iblvideo version {self.version}') check_nvidia_driver() _logger.info(f'Running DLC on {cam}Camera.') From fbc69c18fb48df282ac01e82a52d75a833c936cb Mon Sep 17 00:00:00 2001 From: owinter Date: Sat, 22 Mar 2025 15:40:37 +0000 Subject: [PATCH 16/80] ephys compression is a large task --- ibllib/pipes/ephys_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py index fe21f1892..6cc1cd8db 100644 --- a/ibllib/pipes/ephys_tasks.py +++ b/ibllib/pipes/ephys_tasks.py @@ -124,7 +124,7 @@ class EphysCompressNP1(base_tasks.EphysTask): priority = 90 cpu = 2 io_charge = 100 # this jobs reads raw ap files - job_size = 'small' + job_size = 'large' @property def signature(self): From 7b5127be080a96e8c30ee6b7f9a3841098a00904 Mon Sep 17 00:00:00 2001 From: owinter Date: Wed, 2 Apr 2025 14:27:00 +0100 Subject: [PATCH 17/80] the session loader trial loader has a revision parameter --- brainbox/io/one.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/brainbox/io/one.py b/brainbox/io/one.py index 718f0a574..1445ea0d6 100644 --- a/brainbox/io/one.py +++ b/brainbox/io/one.py @@ -1237,8 +1237,8 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_ :param **kwargs: kwargs passed to `driftmap()` (optional) :return: """ - br = br or BrainRegions() - time_series = time_series or {} + br = BrainRegions() if br is None else br + time_series = {} if time_series is None else time_series fig, axs = plt.subplots(2, 2, gridspec_kw={ 'width_ratios': [.95, .05], 'height_ratios': [.1, .9]}, figsize=(16, 9), sharex='col') axs[0, 1].set_axis_off() @@ -1281,13 +1281,20 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0, save_dir=None, label='raster', gain=-93, - title=None): + title=None, + alpha=0.3, + processing='destripe'): # compute the raw data offset and destripe, we take 400ms around t0 first_sample, last_sample = (int((t0 - 0.2) * sr.fs), int((t0 + 0.2) * sr.fs)) raw = sr[first_sample:last_sample, :-sr.nsync].T channel_labels = channels['labels'] if (channels is not None) and ('labels' in channels) else True - destriped = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels) + if processing == 'destripe': + samples = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels) + else: + import scipy.signal + sos = scipy.signal.butter(**{"N": 3, "Wn": 300 / sr.fs * 2, "btype": "highpass"}, output="sos") + samples = scipy.signal.sosfiltfilt(sos, raw) # filter out the spikes according to good/bad clusters and to the time slice spike_sel = slice(*np.searchsorted(spikes['samples'], [first_sample, last_sample])) ss = spikes['samples'][spike_sel] @@ -1297,9 +1304,9 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0, title = self._default_plot_title(spikes) # display the raw data snippet with spikes overlaid fig, axs = plt.subplots(1, 2, gridspec_kw={'width_ratios': [.95, .05]}, figsize=(16, 9), sharex='col') - Density(destriped, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s') - axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=0.5) - axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=0.5) + Density(samples, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s') + axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=alpha) + axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=alpha) axs[0].set(title=title, xlim=[t0 - 0.035, t0 + 0.035]) # adds the channel locations if available if (channels is not None) and ('atlas_id' in channels): @@ -1501,7 +1508,7 @@ def _find_behaviour_collection(self, obj): f'e.g sl.load_{obj}(collection="{collections[0]}")') raise ALFMultipleCollectionsFound - def load_trials(self, collection=None): + def load_trials(self, collection=None, revision=None): """ Function to load trials data into SessionLoader.trials @@ -1510,13 +1517,13 @@ def load_trials(self, collection=None): collection: str Alf collection of trials data """ - + revision = self.revision if revision is None else revision if not collection: collection = self._find_behaviour_collection('trials') # itiDuration frequently has a mismatched dimension, and we don't need it, exclude using regex self.one.wildcards = False self.trials = self.one.load_object( - self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=self.revision or None).to_df() + self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=revision or None).to_df() self.one.wildcards = True self.data_info.loc[self.data_info['name'] == 'trials', 'is_loaded'] = True From 87bb0eda4d4b2ac339b757c63f7226648a8ea98c Mon Sep 17 00:00:00 2001 From: owinter Date: Fri, 4 Apr 2025 11:44:10 +0100 Subject: [PATCH 18/80] DLC: the motion energy can run in the current environment --- ibllib/pipes/video_tasks.py | 154 ++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 59 deletions(-) diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py index 9ea55df55..2f5f1cd8e 100644 --- a/ibllib/pipes/video_tasks.py +++ b/ibllib/pipes/video_tasks.py @@ -358,25 +358,41 @@ def signature(self): return signature def _check_dlcenv(self): - """Check that scripts are present, dlcenv can be activated and get iblvideo version""" - assert len(list(self.scripts.rglob('run_dlc.*'))) == 2, \ - f'Scripts run_dlc.sh and run_dlc.py do not exist in {self.scripts}' - assert len(list(self.scripts.rglob('run_motion.*'))) == 2, \ - f'Scripts run_motion.sh and run_motion.py do not exist in {self.scripts}' - assert self.dlcenv.exists(), f'DLC environment does not exist in assumed location {self.dlcenv}' - command2run = f"source {self.dlcenv}; python -c 'import iblvideo; print(iblvideo.__version__)'" - process = subprocess.Popen( - command2run, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - executable='/bin/bash' - ) - info, error = process.communicate() - if process.returncode != 0: - raise AssertionError(f"DLC environment check failed\n{error.decode('utf-8')}") - version = info.decode('utf-8').strip().split('\n')[-1] - return version + """ + Check DLC environment and return iblvideo version. + + Attempts to import iblvideo directly. If unsuccessful, checks for necessary + scripts and environment, then retrieves version via subprocess. + + Returns: + tuple: (version: str, needs_subprocess: bool) + """ + try: + import iblvideo + version = iblvideo.__version__ + needs_subprocess = False + _logger.info(f'Current environment contains iblvideo version {self.version}') + except ImportError: + # Check that scripts are present, dlcenv can be activated and get iblvideo version + assert len(list(self.scripts.rglob('run_dlc.*'))) == 2, \ + f'Scripts run_dlc.sh and run_dlc.py do not exist in {self.scripts}' + assert len(list(self.scripts.rglob('run_motion.*'))) == 2, \ + f'Scripts run_motion.sh and run_motion.py do not exist in {self.scripts}' + assert self.dlcenv.exists(), f'DLC environment does not exist in assumed location {self.dlcenv}' + command2run = f"source {self.dlcenv}; python -c 'import iblvideo; print(iblvideo.__version__)'" + process = subprocess.Popen( + command2run, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + executable='/bin/bash' + ) + info, error = process.communicate() + if process.returncode != 0: + raise AssertionError(f"DLC environment check failed\n{error.decode('utf-8')}") + version = info.decode('utf-8').strip().split('\n')[-1] + needs_subprocess = True + return version, needs_subprocess @staticmethod def _video_intact(file_mp4): @@ -387,20 +403,44 @@ def _video_intact(file_mp4): cap.release() return intact - def _run_dlc(self, file_mp4, cam, overwrite): + def _run_dlc(self, file_mp4, cam, overwrite, flag_subprocess=True): try: - import iblvideo - from iblvideo import download_weights - from iblvideo.choiceworld import dlc - self.version = iblvideo.__version__ - _logger.info(f'iblvideo version {self.version}') - path_dlc = download_weights() - dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite) - return 0 - except ImportError: - self.version = self._check_dlcenv() - _logger.info(f'iblvideo version {self.version}') - command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}" + if flag_subprocess: + _logger.info(f'iblvideo version {self.version}') + command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}" + _logger.info(command2run) + process = subprocess.Popen( + command2run, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + executable='/bin/bash', + ) + info, error = process.communicate() + # info_str = info.decode("utf-8").strip() + # _logger.info(info_str) + if process.returncode != 0: + error_str = error.decode('utf-8').strip() + _logger.error(f'DLC failed for {cam}Camera.\n\n' + f'++++++++ Output of subprocess for debugging ++++++++\n\n' + f'{error_str}\n' + f'++++++++++++++++++++++++++++++++++++++++++++\n') + return process.returncode + pass + else: + from iblvideo import download_weights + from iblvideo.choiceworld import dlc + path_dlc = download_weights() + dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite) + return 0 + except Exception as e: + _logger.error(f'An error occurred while running DLC for {cam}Camera: {e}') + _logger.error(traceback.format_exc()) + return -1 + + def _run_motion_energy(self, file_mp4, dlc_result, flag_subprocess=True): + if flag_subprocess: + command2run = f"{self.scripts.joinpath('run_motion.sh')} {str(self.dlcenv)} {file_mp4} {dlc_result}" _logger.info(command2run) process = subprocess.Popen( command2run, @@ -410,19 +450,27 @@ def _run_dlc(self, file_mp4, cam, overwrite): executable='/bin/bash', ) info, error = process.communicate() - # info_str = info.decode("utf-8").strip() + # info_str = info.decode('utf-8').strip() # _logger.info(info_str) if process.returncode != 0: error_str = error.decode('utf-8').strip() - _logger.error(f'DLC failed for {cam}Camera.\n\n' + _logger.error(f'Motion energy failed for {file_mp4}.\n\n' f'++++++++ Output of subprocess for debugging ++++++++\n\n' f'{error_str}\n' f'++++++++++++++++++++++++++++++++++++++++++++\n') - return process.returncode - except Exception as e: - _logger.error(f'An error occurred while running DLC for {cam}Camera: {e}') - _logger.error(traceback.format_exc()) - return -1 + return_code = process.returncode + else: # runs the motion energy calculation in the current environment + try: + from iblvideo.motion_energy import motion_energy + _ = motion_energy(file_mp4, dlc_result) + return_code = 0 + except Exception: + _logger.error(f'Motion energy failed for {file_mp4}.\n\n' + f'++++++++ Output of subprocess for debugging ++++++++\n\n' + f'{traceback.format_exc()}\n' + f'++++++++++++++++++++++++++++++++++++++++++++\n') + return_code = -1 + return return_code def _run(self, cams=None, overwrite=False): # Check that the cams are valid for DLC, remove the ones that aren't @@ -457,43 +505,31 @@ def _run(self, cams=None, overwrite=False): _logger.error(f'Corrupt raw video file {file_mp4}') self.status = -1 continue + # Check that dlc environment is ok, shell scripts exists, and get iblvideo version, GPU addressable check_nvidia_driver() + self.version, flag_subprocess = self._check_dlcenv() + # Step 1: Run DLC for this camera _logger.info(f'Running DLC on {cam}Camera.') - return_code = self._run_dlc(file_mp4, cam, overwrite) + return_code = self._run_dlc(file_mp4, cam, overwrite, flag_subprocess=flag_subprocess) if return_code != 0: self.status = -1 continue - dlc_result = next(self.session_path.joinpath('alf').glob(f'_ibl_{cam}Camera.dlc*.pqt')) actual_outputs.append(dlc_result) + # Step 2: Compute Motion Energy for this camera _logger.info(f'Computing motion energy for {cam}Camera') - command2run = f"{self.scripts.joinpath('run_motion.sh')} {str(self.dlcenv)} {file_mp4} {dlc_result}" - _logger.info(command2run) - process = subprocess.Popen( - command2run, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - executable='/bin/bash', - ) - info, error = process.communicate() - # info_str = info.decode('utf-8').strip() - # _logger.info(info_str) - if process.returncode != 0: - error_str = error.decode('utf-8').strip() - _logger.error(f'Motion energy failed for {cam}Camera.\n\n' - f'++++++++ Output of subprocess for debugging ++++++++\n\n' - f'{error_str}\n' - f'++++++++++++++++++++++++++++++++++++++++++++\n') + return_code = self._run_motion_energy(self, file_mp4, dlc_result, flag_subprocess=flag_subprocess) + if return_code != 0: self.status = -1 continue actual_outputs.append(next(self.session_path.joinpath('alf').glob( f'{cam}Camera.ROIMotionEnergy*.npy'))) actual_outputs.append(next(self.session_path.joinpath('alf').glob( f'{cam}ROIMotionEnergy.position*.npy'))) + except Exception: _logger.error(traceback.format_exc()) self.status = -1 From 26a6ce222ce4ca1608d6219e4734fcce3eeb3fc6 Mon Sep 17 00:00:00 2001 From: owinter Date: Tue, 29 Apr 2025 10:32:43 +0100 Subject: [PATCH 19/80] fix deprecation warning for DLC --- ibllib/pipes/video_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py index 2f5f1cd8e..e72643749 100644 --- a/ibllib/pipes/video_tasks.py +++ b/ibllib/pipes/video_tasks.py @@ -429,7 +429,7 @@ def _run_dlc(self, file_mp4, cam, overwrite, flag_subprocess=True): pass else: from iblvideo import download_weights - from iblvideo.choiceworld import dlc + from iblvideo.pose_dlc import dlc path_dlc = download_weights() dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite) return 0 From 938bc33ace3c335c4b2962445c4c05a4cbb5aacb Mon Sep 17 00:00:00 2001 From: owinter Date: Wed, 30 Apr 2025 11:04:32 +0100 Subject: [PATCH 20/80] fix call to motion energy method by removing instance --- ibllib/pipes/video_tasks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py index e72643749..b5ce98480 100644 --- a/ibllib/pipes/video_tasks.py +++ b/ibllib/pipes/video_tasks.py @@ -521,7 +521,7 @@ def _run(self, cams=None, overwrite=False): # Step 2: Compute Motion Energy for this camera _logger.info(f'Computing motion energy for {cam}Camera') - return_code = self._run_motion_energy(self, file_mp4, dlc_result, flag_subprocess=flag_subprocess) + return_code = self._run_motion_energy(file_mp4, dlc_result, flag_subprocess=flag_subprocess) if return_code != 0: self.status = -1 continue @@ -529,7 +529,6 @@ def _run(self, cams=None, overwrite=False): f'{cam}Camera.ROIMotionEnergy*.npy'))) actual_outputs.append(next(self.session_path.joinpath('alf').glob( f'{cam}ROIMotionEnergy.position*.npy'))) - except Exception: _logger.error(traceback.format_exc()) self.status = -1 From c0e6b872b5710ca3240216970e9b263bf95a3ac3 Mon Sep 17 00:00:00 2001 From: owinter Date: Tue, 20 May 2025 16:14:11 +0100 Subject: [PATCH 21/80] update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 89473fb2a..07679187c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ seaborn>=0.9.0 tqdm>=4.32.1 # ibl libraries iblatlas>=0.5.3 -ibl-neuropixel>=1.6.2 +ibl-neuropixel>=1.7.0 iblutil>=1.13.0 iblqt>=0.4.2 mtscomp>=1.0.1 From 3c4d1f4584e31fcafb16bef73dedc162ca9e76b0 Mon Sep 17 00:00:00 2001 From: owinter Date: Wed, 21 May 2025 13:57:08 +0100 Subject: [PATCH 22/80] Check for iblsorter availability by other means of a try ... importError --- ibllib/pipes/ephys_tasks.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py index 6cc1cd8db..253e13420 100644 --- a/ibllib/pipes/ephys_tasks.py +++ b/ibllib/pipes/ephys_tasks.py @@ -1,8 +1,10 @@ +import importlib import logging from pathlib import Path import re import shutil import subprocess +import sys import traceback import packaging.version @@ -727,11 +729,11 @@ def _run_iblsort(self, ap_file): self.FORCE_RERUN = True self.scratch_folder_run.mkdir(parents=True, exist_ok=True) check_nvidia_driver() - try: - # if pykilosort is in the environment, use the installed version within the task + # this is the best way I found to check if iblsorter is installed and available without a try block + if 'iblsorter' in sys.modules and importlib.util.find_spec('iblsorter.ibl') is not None: import iblsorter.ibl # noqa iblsorter.ibl.run_spike_sorting_ibl(bin_file=ap_file, scratch_dir=self.scratch_folder_run, delete=False) - except ImportError: + else: command2run = f"{self.SHELL_SCRIPT} {ap_file} {self.scratch_folder_run}" _logger.info(command2run) process = subprocess.Popen( From a5bd4f6076d9446e6242916b719176e364541728 Mon Sep 17 00:00:00 2001 From: Olivier Winter Date: Thu, 22 May 2025 13:21:03 +0100 Subject: [PATCH 23/80] make sure ibl-neuropixel higher than 1.7.1 --- requirements.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/requirements.txt b/requirements.txt index 07679187c..9649c1e1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,35 +1,35 @@ +# ibl libraries +ONE-api>=3.0.0 boto3 click>=7.0.0 colorlog>=4.0.2 flake8>=3.7.8 globus-sdk graphviz +ibl-neuropixel>=1.7.1 +ibl-style +iblatlas>=0.5.3 +iblqt>=0.4.2 +iblutil>=1.13.0 +imagecodecs # used to convert tif snapshots to png when registering mesoscope snapshots (also requires skimage) matplotlib>=3.0.3 +mtscomp>=1.0.1 +nptdms numba>=0.56 numpy>=1.18 -nptdms opencv-python-headless pandas +phylib>=2.6.0 +psychofit pyarrow pynrrd>=0.4.0 +pyqt5 pytest requests>=2.22.0 +scikit-image # this is a widefield requirement missing as of July 2023, we may remove it once wfield has this figured out scikit-learn>=0.22.1 scipy>=1.7.0 -scikit-image # this is a widefield requirement missing as of July 2023, we may remove it once wfield has this figured out -imagecodecs # used to convert tif snapshots to png when registering mesoscope snapshots (also requires skimage) -sparse seaborn>=0.9.0 -tqdm>=4.32.1 -# ibl libraries -iblatlas>=0.5.3 -ibl-neuropixel>=1.7.0 -iblutil>=1.13.0 -iblqt>=0.4.2 -mtscomp>=1.0.1 -ONE-api>=3.0.0 -phylib>=2.6.0 -psychofit slidingRP>=1.1.1 # steinmetz lab refractory period metrics -pyqt5 -ibl-style +sparse +tqdm>=4.32.1 From 75c3e40828b3c5232347d89bed5ad8147eb2b0a1 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 30 May 2025 12:57:13 +0100 Subject: [PATCH 24/80] changing the extractor for compatibility with DI only and Frameclock on DI0 --- ibllib/pipes/neurophotometrics.py | 40 ++++++++++------ .../_ibl_experiment.description.yaml | 35 ++++++++++++++ ibllib/tests/test_neurophotometrics.py | 48 +++++++++++++++++++ 3 files changed, 108 insertions(+), 15 deletions(-) create mode 100644 ibllib/tests/fixtures/neurophotometrics/_ibl_experiment.description.yaml create mode 100644 ibllib/tests/test_neurophotometrics.py diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 86639d8be..76fa91c63 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -18,19 +18,30 @@ def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict: - # loads the tdms file data, and detects the rising edges - timestamps = {} # stores the resulting edge times here - tdms_file = TdmsFile.read(tdms_filepath) - analog_group, digital_group = tdms_file.groups() - fs = analog_group.properties['ScanRate'] - - for channel in analog_group.channels(): - signal = (channel.data > 2.5).astype('int64') - timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs + # extractor for tdms files as written by the daqami software, configured + # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the + # bpod sync signals - for channel in digital_group.channels(): - signal = (channel.data > 0.5).astype('int64') - timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs + tdms_file = TdmsFile.read(tdms_filepath) + (digital_group,) = tdms_file.groups() + fs = digital_group.properties['ScanRate'] # this should be 10kHz + df = tdms_file.as_dataframe() + col = df.columns[-1] + vals = df[col].values.astype('int64') + columns = ['DI0', 'DI1', 'DI2', 'DI3'] + + # ugly but basically just a binary decoder for the binary data + # assumes 4 channels + data = np.array([list(bin(v)[2:].zfill(4)[::-1]) for v in vals], dtype='int64') + timestamps = {} + for i, name in enumerate(columns): + signal = data[:, i] + timestamps[name] = np.where(np.diff(signal) == 1)[0] / fs + + # frameclock data is recorded on an analog channel + # for channel in analog_group.channels(): + # signal = (channel.data > 2.5).astype('int64') # assumes 0-5V + # timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs return timestamps @@ -104,7 +115,7 @@ def _get_sync_function(self) -> Tuple[callable, list]: _logger.info( f'sync: n trials {len(bpod_data)}, n bpod sync {len(timestamps_bpod)}, n photometry {len(timestamps_nph)}, n match {len(ix_nph)}' ) - # FIXME the framerate here is hardcoded, infer it instead! + # TODO the framerate here is hardcoded, infer it instead! assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' assert len(ix_nph) / len(timestamps_bpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched' valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] @@ -116,7 +127,6 @@ def load_data(self) -> pd.DataFrame: raw_photometry_folder = self.session_path / self.photometry_collection raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt') return raw_neurophotometrics_df - # return ibl_df def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # 1) load photometry data @@ -221,7 +231,7 @@ def load_data(self) -> pd.DataFrame: # get daqami timestamps tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) - frame_timestamps = self.timestamps[f'AI{self.sync_kwargs["frameclock_channel"]}'] + frame_timestamps = self.timestamps[f'DI{self.sync_kwargs["frameclock_channel"]}'] # and put them in the raw_df SystemTimestamp column if raw_df.shape[0] == frame_timestamps.shape[0]: diff --git a/ibllib/tests/fixtures/neurophotometrics/_ibl_experiment.description.yaml b/ibllib/tests/fixtures/neurophotometrics/_ibl_experiment.description.yaml new file mode 100644 index 000000000..8a39783cb --- /dev/null +++ b/ibllib/tests/fixtures/neurophotometrics/_ibl_experiment.description.yaml @@ -0,0 +1,35 @@ +devices: + cameras: + left: + collection: raw_video_data + sync_label: audio + microphone: + microphone: + collection: raw_task_data_00 + sync_label: audio + neurophotometrics: + collection: raw_photometry_data + datetime: '2025-05-26T15:08:40.237101' + fibers: + G0: + location: VTA + sync_channel: 2 + sync_metadata: + acquisition_software: daqami + collection: raw_photometry_data + frameclock_channel: 7 + sync_mode: daqami +procedures: +- Fiber photometry +projects: +- ibl_fibrephotometry +- practice +sync: + bpod: + acquisition_software: pybpod + collection: raw_task_data_00 + extension: .jsonable +tasks: +- _iblrig_tasks_advancedChoiceWorld: + collection: raw_task_data_00 +version: 1.0.0 diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py new file mode 100644 index 000000000..fb3e1f773 --- /dev/null +++ b/ibllib/tests/test_neurophotometrics.py @@ -0,0 +1,48 @@ +"""Tests for ibllib.pipes.mesoscope_tasks.""" + +import sys +import unittest +from unittest import mock +import tempfile +import json +from itertools import chain +from pathlib import Path +import subprocess +from copy import deepcopy +import uuid + +from one.api import ONE +import numpy as np + +from ibllib.pipes.mesoscope_tasks import MesoscopePreprocess, MesoscopeFOV, find_triangle, surface_normal, _nearest_neighbour_1d +from ibllib.io.extractors import mesoscope +from ibllib.tests import TEST_DB +from ibllib.io import session_params + +# Mock suit2p which is imported in MesoscopePreprocess +attrs = {'default_ops.return_value': {}} +sys.modules['suite2p'] = mock.MagicMock(**attrs) + + +class TestNeurophotometricsExtractor(unittest.TestCase): + """ + this class tests + that the correct extractor is run based on the experiment description file + this requires the setup to have + + """ + + def setUp(self) -> None: + self.tmp_folder = tempfile.TemporaryDirectory() + self.session_folder = Path(self.tmp_folder.name) / 'subject' / '2020-01-01' / '001' + self.raw_photometry_folder = self.session_folder / 'raw_photometry_data' + self.raw_photometry_folder.mkdir(parents=True) + + def test_bpod_extractor(self): + path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml' + self.experiment_description = session_params.read_params(path) + # expected + + def test_daqami_extractor(self): + path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml' + self.experiment_description = session_params.read_params(path) From c1ebda17da60a2487a6ea671fb78e42cde6561bc Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 2 Jun 2025 15:41:31 +0100 Subject: [PATCH 25/80] added functionality to deal with premature termination of daqami --- ibllib/pipes/neurophotometrics.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 76fa91c63..6f27ffcbc 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -233,12 +233,24 @@ def load_data(self) -> pd.DataFrame: self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) frame_timestamps = self.timestamps[f'DI{self.sync_kwargs["frameclock_channel"]}'] + # compare number of frame timestamps # and put them in the raw_df SystemTimestamp column if raw_df.shape[0] == frame_timestamps.shape[0]: raw_df['SystemTimestamp'] = frame_timestamps elif raw_df.shape[0] == frame_timestamps.shape[0] + 1: # there is one extra frame timestamp from the last incomplete frame raw_df['SystemTimestamp'] = frame_timestamps[:-1] + elif raw_df.shape[0] > frame_timestamps: + # the daqami was stopped / closed before bonsai + # we discard all frames that can not be mapped + _logger.warning( + f'#frames recorded by bonsai: {raw_df.shape[0]} > #frame timestamps recorded by daqami {frame_timestamps.shape[0]}, dropping all frames without recorded timestamps' + ) + raw_df = raw_df.iloc[: frame_timestamps.shape[0]] + + elif raw_df.shape[0] < frame_timestamps: + # this should not be possible + raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') return raw_df def _get_neurophotometrics_timestamps(self) -> np.ndarray: From 15fe6afcc058e2ee0f47ce173ef3d43803049c45 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 4 Jun 2025 12:34:40 +0100 Subject: [PATCH 26/80] tiny bugfix for syncing sessions were daqami was stopped before bonsai --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 6f27ffcbc..179a8d137 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -240,7 +240,7 @@ def load_data(self) -> pd.DataFrame: elif raw_df.shape[0] == frame_timestamps.shape[0] + 1: # there is one extra frame timestamp from the last incomplete frame raw_df['SystemTimestamp'] = frame_timestamps[:-1] - elif raw_df.shape[0] > frame_timestamps: + elif raw_df.shape[0] > frame_timestamps.shape[0]: # the daqami was stopped / closed before bonsai # we discard all frames that can not be mapped _logger.warning( From ed4ea5a0faf53b641fc9e10ceae6dba7444a12ae Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 4 Jun 2025 12:46:12 +0100 Subject: [PATCH 27/80] ruff please, (tests are stubs) --- ibllib/tests/test_neurophotometrics.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py index fb3e1f773..fcad9d379 100644 --- a/ibllib/tests/test_neurophotometrics.py +++ b/ibllib/tests/test_neurophotometrics.py @@ -4,19 +4,9 @@ import unittest from unittest import mock import tempfile -import json -from itertools import chain from pathlib import Path -import subprocess -from copy import deepcopy -import uuid -from one.api import ONE -import numpy as np -from ibllib.pipes.mesoscope_tasks import MesoscopePreprocess, MesoscopeFOV, find_triangle, surface_normal, _nearest_neighbour_1d -from ibllib.io.extractors import mesoscope -from ibllib.tests import TEST_DB from ibllib.io import session_params # Mock suit2p which is imported in MesoscopePreprocess @@ -41,7 +31,6 @@ def setUp(self) -> None: def test_bpod_extractor(self): path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml' self.experiment_description = session_params.read_params(path) - # expected def test_daqami_extractor(self): path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml' From 8e69190ba7989788d46396b028611e3510e99415 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 4 Jun 2025 12:56:23 +0100 Subject: [PATCH 28/80] flake8 --- ibllib/pipes/neurophotometrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 179a8d137..553f388b7 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -113,7 +113,9 @@ def _get_sync_function(self) -> Tuple[callable, list]: # then we check the alignment, should be less than the camera sampling rate tcheck = sync_nph_to_bpod_fcn(timestamps_nph[ix_nph]) - timestamps_bpod[ix_bpod] _logger.info( - f'sync: n trials {len(bpod_data)}, n bpod sync {len(timestamps_bpod)}, n photometry {len(timestamps_nph)}, n match {len(ix_nph)}' + f'sync: n trials {len(bpod_data)}' + f'n bpod sync {len(timestamps_bpod)}' + f'n photometry {len(timestamps_nph)}, n match {len(ix_nph)}' ) # TODO the framerate here is hardcoded, infer it instead! assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' @@ -243,9 +245,7 @@ def load_data(self) -> pd.DataFrame: elif raw_df.shape[0] > frame_timestamps.shape[0]: # the daqami was stopped / closed before bonsai # we discard all frames that can not be mapped - _logger.warning( - f'#frames recorded by bonsai: {raw_df.shape[0]} > #frame timestamps recorded by daqami {frame_timestamps.shape[0]}, dropping all frames without recorded timestamps' - ) + _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') raw_df = raw_df.iloc[: frame_timestamps.shape[0]] elif raw_df.shape[0] < frame_timestamps: From f59a9547fb235615663b0bf9c896dc3cd30141fe Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 4 Jun 2025 15:00:55 +0100 Subject: [PATCH 29/80] updated requirements again after prefect merge --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a18b46b30..089e2bc9e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,5 +33,4 @@ seaborn>=0.9.0 slidingRP>=1.1.1 # steinmetz lab refractory period metrics sparse tqdm>=4.32.1 - -pyqt5 \ No newline at end of file +ibl-photometry From e6e7ae2b3660cbdb963d750b80d465e6643650f0 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 6 Jun 2025 09:59:35 +0100 Subject: [PATCH 30/80] another crucial bugfix for daqami frame number check during extractor --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 553f388b7..9fb262d0a 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -248,7 +248,7 @@ def load_data(self) -> pd.DataFrame: _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') raw_df = raw_df.iloc[: frame_timestamps.shape[0]] - elif raw_df.shape[0] < frame_timestamps: + elif raw_df.shape[0] < frame_timestamps.shape[0]: # this should not be possible raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') return raw_df From f3a58e8007d09500f7a9fa73008cf40ea3ec3aa2 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 6 Jun 2025 13:00:22 +0100 Subject: [PATCH 31/80] moving the frameclock back on AI7, included downward compatibility in the extractor for the sessions that have the frameclock on DI0 --- ibllib/pipes/neurophotometrics.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 9fb262d0a..a1a15ed80 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -23,7 +23,14 @@ def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict: # bpod sync signals tdms_file = TdmsFile.read(tdms_filepath) - (digital_group,) = tdms_file.groups() + groups = tdms_file.groups() + # this unfortunate hack is in here because there are a bunch of sessions where the frameclock is on DI0 + if len(groups) == 1: + has_analog_group = False + (digital_group,) = groups + if len(groups) == 2: + has_analog_group = True + analog_group, digital_group = groups fs = digital_group.properties['ScanRate'] # this should be 10kHz df = tdms_file.as_dataframe() col = df.columns[-1] @@ -38,10 +45,11 @@ def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict: signal = data[:, i] timestamps[name] = np.where(np.diff(signal) == 1)[0] / fs - # frameclock data is recorded on an analog channel - # for channel in analog_group.channels(): - # signal = (channel.data > 2.5).astype('int64') # assumes 0-5V - # timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs + if has_analog_group: + # frameclock data is recorded on an analog channel + for channel in analog_group.channels(): + signal = (channel.data > 2.5).astype('int64') # assumes 0-5V + timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs return timestamps @@ -233,7 +241,13 @@ def load_data(self) -> pd.DataFrame: # get daqami timestamps tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) - frame_timestamps = self.timestamps[f'DI{self.sync_kwargs["frameclock_channel"]}'] + # downward compatibility - frameclock moved around, now is back on the AI7 + # was specified with int before. if int, + if type(self.sync_kwargs['frameclock_channel']) is int: + sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' + else: + sync_channel_name = self.sync_kwargs['frameclock_channel'] + frame_timestamps = self.timestamps[sync_channel_name] # compare number of frame timestamps # and put them in the raw_df SystemTimestamp column @@ -248,7 +262,7 @@ def load_data(self) -> pd.DataFrame: _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') raw_df = raw_df.iloc[: frame_timestamps.shape[0]] - elif raw_df.shape[0] < frame_timestamps.shape[0]: + elif raw_df.shape[0] < frame_timestamps.shape: # this should not be possible raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') return raw_df From 85eb00a73d76b11fa2ee06732437fb3f1362f6d7 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 6 Jun 2025 13:05:49 +0100 Subject: [PATCH 32/80] related to previous commit, better checking for analog/digital sync channels --- ibllib/pipes/neurophotometrics.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index a1a15ed80..6d93f364a 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -242,10 +242,11 @@ def load_data(self) -> pd.DataFrame: tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) # downward compatibility - frameclock moved around, now is back on the AI7 - # was specified with int before. if int, - if type(self.sync_kwargs['frameclock_channel']) is int: + # was specified with int before. if int + try: + int(self.sync_kwargs['frameclock_channel']) sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' - else: + except ValueError: sync_channel_name = self.sync_kwargs['frameclock_channel'] frame_timestamps = self.timestamps[sync_channel_name] @@ -269,10 +270,15 @@ def load_data(self) -> pd.DataFrame: def _get_neurophotometrics_timestamps(self) -> np.ndarray: # get the sync channel - sync_colname = f'DI{self.kwargs["sync_channel"]}' + # again the ugly downward compatibility hack + try: + int(self.sync_kwargs['frameclock_channel']) + sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' + except ValueError: + sync_channel_name = self.sync_kwargs['frameclock_channel'] # and the corresponding timestamps - timestamps_nph = self.timestamps[sync_colname] + timestamps_nph = self.timestamps[sync_channel_name] # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods From 720787d207f4e2dc17f6875630bdf267e0a5bacf Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 6 Jun 2025 15:14:29 +0100 Subject: [PATCH 33/80] bugfix --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 6d93f364a..bf0e3d43d 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -263,7 +263,7 @@ def load_data(self) -> pd.DataFrame: _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') raw_df = raw_df.iloc[: frame_timestamps.shape[0]] - elif raw_df.shape[0] < frame_timestamps.shape: + elif raw_df.shape[0] < frame_timestamps.shape[0]: # this should not be possible raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') return raw_df From 516ddbdead6eb97efd7eb3d7814e03e92e8efab8 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 11 Jun 2025 17:11:03 +0100 Subject: [PATCH 34/80] bugfix for wrong frameclock channel --- ibllib/pipes/neurophotometrics.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index bf0e3d43d..62465a93d 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -269,16 +269,8 @@ def load_data(self) -> pd.DataFrame: return raw_df def _get_neurophotometrics_timestamps(self) -> np.ndarray: - # get the sync channel - # again the ugly downward compatibility hack - try: - int(self.sync_kwargs['frameclock_channel']) - sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' - except ValueError: - sync_channel_name = self.sync_kwargs['frameclock_channel'] - - # and the corresponding timestamps - timestamps_nph = self.timestamps[sync_channel_name] + # get the sync channel and the corresponding timestamps + timestamps_nph = self.timestamps[f'DI{self.sync_channel}'] # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods From 52c0c6f154e1e0125185108edee3082d2e9053af Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 11 Jun 2025 17:25:54 +0100 Subject: [PATCH 35/80] extractor fix --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 62465a93d..b7c3c6ff6 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -263,7 +263,7 @@ def load_data(self) -> pd.DataFrame: _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') raw_df = raw_df.iloc[: frame_timestamps.shape[0]] - elif raw_df.shape[0] < frame_timestamps.shape[0]: + elif raw_df.shape[0] + 1 < frame_timestamps.shape[0]: # this should not be possible raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') return raw_df From d46d846897b1af016ebf2b5a2d91988e950d2ad4 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 11 Jun 2025 19:18:15 +0100 Subject: [PATCH 36/80] very hardcoded fix for frameclock channel that turns obsolete as soon as the experiment_description files are patched --- ibllib/pipes/neurophotometrics.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index b7c3c6ff6..004612cb9 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -243,10 +243,11 @@ def load_data(self) -> pd.DataFrame: self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) # downward compatibility - frameclock moved around, now is back on the AI7 # was specified with int before. if int - try: - int(self.sync_kwargs['frameclock_channel']) + if self.sync_kwargs['frameclock_channel'] == 0: sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' - except ValueError: + if self.sync_kwargs['frameclock_channel'] == 7: + sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}' + else: sync_channel_name = self.sync_kwargs['frameclock_channel'] frame_timestamps = self.timestamps[sync_channel_name] From 808754b69a274b309e5809415f58cc5116ff192b Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 11 Jun 2025 20:05:14 +0100 Subject: [PATCH 37/80] int / str bugfix --- ibllib/pipes/neurophotometrics.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 004612cb9..dd630be86 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -242,10 +242,9 @@ def load_data(self) -> pd.DataFrame: tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) # downward compatibility - frameclock moved around, now is back on the AI7 - # was specified with int before. if int - if self.sync_kwargs['frameclock_channel'] == 0: + if self.sync_kwargs['frameclock_channel'] == '0': sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' - if self.sync_kwargs['frameclock_channel'] == 7: + if self.sync_kwargs['frameclock_channel'] == '7': sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}' else: sync_channel_name = self.sync_kwargs['frameclock_channel'] From 7f8d6528b75727e9388eed6e3444193df56d690a Mon Sep 17 00:00:00 2001 From: olivier Date: Thu, 12 Jun 2025 14:55:35 +0100 Subject: [PATCH 38/80] bugfix for channel int/str --- ibllib/pipes/neurophotometrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index dd630be86..55314e271 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -242,9 +242,9 @@ def load_data(self) -> pd.DataFrame: tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) # downward compatibility - frameclock moved around, now is back on the AI7 - if self.sync_kwargs['frameclock_channel'] == '0': + if self.sync_kwargs['frameclock_channel'] in ['0',0]: sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' - if self.sync_kwargs['frameclock_channel'] == '7': + if self.sync_kwargs['frameclock_channel'] in ['7',7]: sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}' else: sync_channel_name = self.sync_kwargs['frameclock_channel'] From 771048e179298bd3408a8bd35ca104706c92787f Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Thu, 12 Jun 2025 15:00:32 +0100 Subject: [PATCH 39/80] fake commit just to check user name --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 55314e271..03c791296 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -274,5 +274,5 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods - timestamps_nph = timestamps_nph[15:] + timestamps_nph = timestamps_nph[15: ] return timestamps_nph From 1a747c3282a07e100ca6a8263809cb058f9c5429 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Fri, 13 Jun 2025 11:15:49 +0100 Subject: [PATCH 40/80] bugfix for downward compatible frameclock_channel --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 03c791296..f52b9c028 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -244,7 +244,7 @@ def load_data(self) -> pd.DataFrame: # downward compatibility - frameclock moved around, now is back on the AI7 if self.sync_kwargs['frameclock_channel'] in ['0',0]: sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' - if self.sync_kwargs['frameclock_channel'] in ['7',7]: + elif self.sync_kwargs['frameclock_channel'] in ['7',7]: sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}' else: sync_channel_name = self.sync_kwargs['frameclock_channel'] From d32f54b39c05e0046fe07f83c4aa333f7b363480 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Fri, 13 Jun 2025 14:13:25 +0100 Subject: [PATCH 41/80] more verbose error msg for sync fail --- ibllib/pipes/neurophotometrics.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index f52b9c028..6049ac8eb 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -112,6 +112,10 @@ def _get_sync_function(self) -> Tuple[callable, list]: timestamps_bpod, bpod_data = self._get_bpod_timestamps() timestamps_nph = self._get_neurophotometrics_timestamps() + # verify presence of sync timestamps + for source, timestamps in zip(['bpod','neurophotometrics'], [timestamps_bpod, timestamps_nph]): + assert len(timestamps) > 0, f'{source} sync timestamps are empty' + # sync the behaviour events to the photometry timestamps sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( timestamps_nph, timestamps_bpod, return_indices=True, linear=True From fd001805eb7c027e1e8515e837d38806e5cfceab Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Mon, 16 Jun 2025 16:34:02 +0100 Subject: [PATCH 42/80] path bugfix for extractor --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 6049ac8eb..659d7c60b 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -176,7 +176,7 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: locations_df = pd.DataFrame(rois).set_index('ROI') locations_df_outpath = output_folder / 'photometryROI.locations.pqt' locations_df.to_parquet(locations_df_outpath) - return ibl_df, locations_df + return ibl_df_outpath, locations_df_outpath class FibrePhotometryBpodSync(FibrePhotometryBaseSync): From 4ecb482870a3fdb3b5774569345419da4c255eb6 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Tue, 17 Jun 2025 14:25:24 +0100 Subject: [PATCH 43/80] splitting sessions by spacers and attepmting to sync each --- ibllib/pipes/neurophotometrics.py | 54 ++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 659d7c60b..ed0a7bf69 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -13,6 +13,7 @@ from abc import abstractmethod from iblphotometry import io as fpio +from iblutil.spacer import Spacer _logger = logging.getLogger('ibllib') @@ -107,7 +108,6 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: def _get_sync_function(self) -> Tuple[callable, list]: # returns the synchronization function - # get the timestamps timestamps_bpod, bpod_data = self._get_bpod_timestamps() timestamps_nph = self._get_neurophotometrics_timestamps() @@ -116,22 +116,38 @@ def _get_sync_function(self) -> Tuple[callable, list]: for source, timestamps in zip(['bpod','neurophotometrics'], [timestamps_bpod, timestamps_nph]): assert len(timestamps) > 0, f'{source} sync timestamps are empty' - # sync the behaviour events to the photometry timestamps - sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - timestamps_nph, timestamps_bpod, return_indices=True, linear=True - ) - # TODO log drift - - # then we check the alignment, should be less than the camera sampling rate - tcheck = sync_nph_to_bpod_fcn(timestamps_nph[ix_nph]) - timestamps_bpod[ix_bpod] - _logger.info( - f'sync: n trials {len(bpod_data)}' - f'n bpod sync {len(timestamps_bpod)}' - f'n photometry {len(timestamps_nph)}, n match {len(ix_nph)}' - ) - # TODO the framerate here is hardcoded, infer it instead! - assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' - assert len(ix_nph) / len(timestamps_bpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched' + # split into segments if multiple spacers are found + # attempt to sync for each segment (only one will work) + spacer = Spacer() + spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5) + # the indices that mark the boundaries of segments + + segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]]) + segments = [] + for i in range(segment_ix.shape[0]-1): + start_ix = segment_ix[i] + stop_ix = segment_ix[i+1] + segments.append(timestamps_nph[start_ix:stop_ix]) + + for i, timestamps_segment in enumerate(segments): + print(i) + # sync the behaviour events to the photometry timestamps + sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( + timestamps_segment, timestamps_bpod, return_indices=True, linear=True + ) + # then we check the alignment, should be less than the camera sampling rate + tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod] + _logger.info( + f'sync: n trials {len(bpod_data)}' + f'n bpod sync {len(timestamps_bpod)}' + f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}' + ) + if len(ix_nph) / len(timestamps_bpod) < 0.95: + # wrong segment + continue + # TODO the framerate here is hardcoded, infer it instead! + assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' + valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] return sync_nph_to_bpod_fcn, valid_bounds @@ -207,7 +223,7 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods - timestamps_nph = timestamps_nph[15:] + # timestamps_nph = timestamps_nph[15:] return timestamps_nph @@ -278,5 +294,5 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods - timestamps_nph = timestamps_nph[15: ] + # timestamps_nph = timestamps_nph[15: ] return timestamps_nph From 18a02e2946744d04e8cfc3f646e2431c85c05606 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Wed, 18 Jun 2025 14:02:35 +0100 Subject: [PATCH 44/80] spacer detection for hot swapping --- ibllib/pipes/neurophotometrics.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index ed0a7bf69..bba7e50b3 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -106,7 +106,7 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # for daq based syncing, the timestamps are extracted from the tdms file ... - def _get_sync_function(self) -> Tuple[callable, list]: + def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, list]: # returns the synchronization function # get the timestamps timestamps_bpod, bpod_data = self._get_bpod_timestamps() @@ -119,18 +119,23 @@ def _get_sync_function(self) -> Tuple[callable, list]: # split into segments if multiple spacers are found # attempt to sync for each segment (only one will work) spacer = Spacer() - spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5) - # the indices that mark the boundaries of segments + + # the fast way + match spacer_detection_mode: + case 'fast': + spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5) + case 'safe': + spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000) + # the indices that mark the boundaries of segments segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]]) segments = [] for i in range(segment_ix.shape[0]-1): - start_ix = segment_ix[i] + start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1 stop_ix = segment_ix[i+1] segments.append(timestamps_nph[start_ix:stop_ix]) for i, timestamps_segment in enumerate(segments): - print(i) # sync the behaviour events to the photometry timestamps sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( timestamps_segment, timestamps_bpod, return_indices=True, linear=True @@ -144,6 +149,7 @@ def _get_sync_function(self) -> Tuple[callable, list]: ) if len(ix_nph) / len(timestamps_bpod) < 0.95: # wrong segment + print('wrong segment') continue # TODO the framerate here is hardcoded, infer it instead! assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' @@ -168,7 +174,8 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: raw_df = self.load_data() # 2) get the synchronization function - sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function() + spacer_detection_mode = kwargs.get('spacer_detection_mode', 'fast') + sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function(spacer_detection_mode=spacer_detection_mode) # 3) convert to ibl_df ibl_df = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df, rois=self.kwargs['fibers'], drop_first=False) From 4fbdfc9118789c3b0822af63f50ab2af0a24d275 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Mon, 23 Jun 2025 14:59:47 +0100 Subject: [PATCH 45/80] bugfix for failing job creation due to wrong kwarg handling --- ibllib/pipes/neurophotometrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index bba7e50b3..965bb3908 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -61,8 +61,8 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask): priority = 90 job_size = 'small' - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, session_path, one, **kwargs): + super().__init__(session_path, one=one, **kwargs) self.photometry_collection = kwargs['collection'] # raw_photometry_data self.kwargs = kwargs From fd3b633df8c2584891cd2c0a6c4fe089e1a5e7fc Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Mon, 23 Jun 2025 15:00:10 +0100 Subject: [PATCH 46/80] bugfix in job creator when root_path is session_folder --- ibllib/pipes/local_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/local_server.py b/ibllib/pipes/local_server.py index 92f1cf39a..c02ae11c3 100644 --- a/ibllib/pipes/local_server.py +++ b/ibllib/pipes/local_server.py @@ -106,7 +106,7 @@ def job_creator(root_path, one=None, dry=False, rerun=False): if not one: one = ONE(cache_rest=None) rc = IBLRegistrationClient(one=one) - flag_files = Path(root_path).glob('*/????-??-??/*/raw_session.flag') + flag_files = Path(root_path).glob('**/raw_session.flag') flag_files = filter(lambda x: is_session_path(x.parent), flag_files) pipes = [] all_datasets = [] From 73956b7916451a99b2ffd4f6a306cc3b44b950fc Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 8 Aug 2025 13:06:23 +0200 Subject: [PATCH 47/80] bugfix for failing synchronization due to frame timestamp and bonsai frame number discrepancy, fast and low memory profile tdms timestamp extraction added --- ibllib/pipes/neurophotometrics.py | 140 +++++++++++++++++++++++++----- 1 file changed, 116 insertions(+), 24 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 965bb3908..2566a786f 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -2,7 +2,8 @@ from pathlib import Path import numpy as np import pandas as pd -from typing import Tuple +from typing import Tuple, Optional +import pickle import ibldsp.utils import ibllib.io.session_params @@ -16,12 +17,14 @@ from iblutil.spacer import Spacer _logger = logging.getLogger('ibllib') +_logger.setLevel(logging.DEBUG) -def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict: +def extract_timestamps_from_tdms_file(tdms_filepath: Path, save_path: Optional[Path] = None) -> dict: # extractor for tdms files as written by the daqami software, configured # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the # bpod sync signals + _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}') tdms_file = TdmsFile.read(tdms_filepath) groups = tdms_file.groups() @@ -35,23 +38,85 @@ def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict: fs = digital_group.properties['ScanRate'] # this should be 10kHz df = tdms_file.as_dataframe() col = df.columns[-1] - vals = df[col].values.astype('int64') + vals = df[col].values.astype('int32') columns = ['DI0', 'DI1', 'DI2', 'DI3'] # ugly but basically just a binary decoder for the binary data # assumes 4 channels - data = np.array([list(bin(v)[2:].zfill(4)[::-1]) for v in vals], dtype='int64') + data = np.array([list(bin(v)[2:].zfill(4)[::-1]) for v in vals], dtype='int32') timestamps = {} for i, name in enumerate(columns): - signal = data[:, i] - timestamps[name] = np.where(np.diff(signal) == 1)[0] / fs + timestamps[name] = np.where(np.diff(data[:, i]) == 1)[0] / fs if has_analog_group: # frameclock data is recorded on an analog channel for channel in analog_group.channels(): - signal = (channel.data > 2.5).astype('int64') # assumes 0-5V + signal = (channel.data > 2.5).astype('int32') # assumes 0-5V timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs + if save_path is not None: + _logger.info(f'saving extracted timestamps to: {save_path}') + with open(save_path, 'wb') as fH: + pickle.dump(timestamps, fH) + + return timestamps + + +def extract_timestamps_from_tdms_file_fast(tdms_filepath: Path, save_path: Optional[Path] = None, chunk_size=10000) -> dict: + # extractor for tdms files as written by the daqami software, configured + # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the + # bpod sync signals + _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}') + + # this should be 10kHz + tdms_file = TdmsFile.read(tdms_filepath) + groups = tdms_file.groups() + + # this unfortunate hack is in here because there are a bunch of sessions + # where the frameclock is on DI0 + if len(groups) == 1: + has_analog_group = False + (digital_group,) = groups + if len(groups) == 2: + has_analog_group = True + analog_group, digital_group = groups + fs = digital_group.properties['ScanRate'] # this should be 10kHz + df = tdms_file.as_dataframe() + + # inferring digital col name + (digital_col,) = [col for col in df.columns if 'Digital' in col] + vals = df[digital_col].values.astype('int8') + digital_channel_names = ['DI0', 'DI1', 'DI2', 'DI3'] + + # ini + timestamps = {} + for ch in digital_channel_names: + timestamps[ch] = [] + + # chunked loop + n_chunks = df.shape[0] // chunk_size + for i in range(n_chunks): + vals_ = vals[i * chunk_size : (i + 1) * chunk_size] + data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') + + for j, name in enumerate(digital_channel_names): + ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i) + timestamps[name].append(ix / fs) + + for ch in digital_channel_names: + timestamps[ch] = np.concatenate(timestamps[ch]) + + if has_analog_group: + # frameclock data is recorded on an analog channel + for channel in analog_group.channels(): + signal = (channel.data > 2.5).astype('int32') # assumes 0-5V + timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs + + if save_path is not None: + _logger.info(f'saving extracted timestamps to: {save_path}') + with open(save_path, 'wb') as fH: + pickle.dump(timestamps, fH) + return timestamps @@ -63,7 +128,7 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask): def __init__(self, session_path, one, **kwargs): super().__init__(session_path, one=one, **kwargs) - self.photometry_collection = kwargs['collection'] # raw_photometry_data + self.photometry_collection = kwargs.get('collection', 'raw_photometry_data') # raw_photometry_data self.kwargs = kwargs # we will work with the first protocol here @@ -113,13 +178,13 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li timestamps_nph = self._get_neurophotometrics_timestamps() # verify presence of sync timestamps - for source, timestamps in zip(['bpod','neurophotometrics'], [timestamps_bpod, timestamps_nph]): + for source, timestamps in zip(['bpod', 'neurophotometrics'], [timestamps_bpod, timestamps_nph]): assert len(timestamps) > 0, f'{source} sync timestamps are empty' # split into segments if multiple spacers are found # attempt to sync for each segment (only one will work) spacer = Spacer() - + # the fast way match spacer_detection_mode: case 'fast': @@ -130,9 +195,9 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li # the indices that mark the boundaries of segments segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]]) segments = [] - for i in range(segment_ix.shape[0]-1): + for i in range(segment_ix.shape[0] - 1): start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1 - stop_ix = segment_ix[i+1] + stop_ix = segment_ix[i + 1] segments.append(timestamps_nph[start_ix:stop_ix]) for i, timestamps_segment in enumerate(segments): @@ -153,7 +218,7 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li continue # TODO the framerate here is hardcoded, infer it instead! assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' - + valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] return sync_nph_to_bpod_fcn, valid_bounds @@ -238,10 +303,11 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync): priority = 90 job_size = 'small' - def __init__(self, *args, **kwargs): + def __init__(self, *args, load_timestamps: bool = False, **kwargs): super().__init__(*args, **kwargs) - self.sync_kwargs = kwargs['sync_metadata'] - self.sync_channel = kwargs['sync_channel'] + self.sync_kwargs = kwargs.get('sync_metadata', self.session_params['sync']) + self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel']) + self.load_timestamps = load_timestamps @property def signature(self): @@ -250,7 +316,7 @@ def signature(self): ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True), ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True), - ('_mcc_DAQdata.raw.tdms', self.sync_kwargs['collection'], True, True), + ('_mcc_DAQdata.raw.tdms', self.photometry_collection, True, True), ], 'output_files': [ ('photometry.signal.pqt', 'alf/photometry', True), @@ -266,12 +332,19 @@ def load_data(self) -> pd.DataFrame: raw_df = super().load_data() # get daqami timestamps - tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms' - self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath) + # attempt to load + timestamps_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.pkl' + if self.load_timestamps and timestamps_filepath.exists(): + with open(timestamps_filepath, 'rb') as fH: + self.timestamps = pickle.load(fH) + else: # extract timestamps: + tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms' + self.timestamps = extract_timestamps_from_tdms_file_fast(tdms_filepath, save_path=timestamps_filepath) + # downward compatibility - frameclock moved around, now is back on the AI7 - if self.sync_kwargs['frameclock_channel'] in ['0',0]: + if self.sync_kwargs['frameclock_channel'] in ['0', 0]: sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' - elif self.sync_kwargs['frameclock_channel'] in ['7',7]: + elif self.sync_kwargs['frameclock_channel'] in ['7', 7]: sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}' else: sync_channel_name = self.sync_kwargs['frameclock_channel'] @@ -279,19 +352,38 @@ def load_data(self) -> pd.DataFrame: # compare number of frame timestamps # and put them in the raw_df SystemTimestamp column + # based on the different scenarios + + # they are the same, all is well if raw_df.shape[0] == frame_timestamps.shape[0]: raw_df['SystemTimestamp'] = frame_timestamps - elif raw_df.shape[0] == frame_timestamps.shape[0] + 1: - # there is one extra frame timestamp from the last incomplete frame + _logger.debug(f'timestamps are of equal size {raw_df.shape[0]}') + + # there is one more timestamp recorded by the daq + # (probably bonsai drops the last incomplete frame) + elif raw_df.shape[0] + 1 == frame_timestamps.shape[0]: raw_df['SystemTimestamp'] = frame_timestamps[:-1] + _logger.debug('one more timestamp in daq than frames by bonsai') + + # there is one more frame by bonsai that doesn't have + # a timestamp (strange case) + elif raw_df.shape[0] == frame_timestamps.shape[0] + 1: + raw_df = raw_df.iloc[:-1] # dropping the last frame + raw_df['SystemTimestamp'] = frame_timestamps + _logger.debug('one frame in bonsai than timestamps recorded by daq') + + # there are many more frames recorded by bonsai than + # timestamps recorded by daqami elif raw_df.shape[0] > frame_timestamps.shape[0]: # the daqami was stopped / closed before bonsai # we discard all frames that can not be mapped _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') raw_df = raw_df.iloc[: frame_timestamps.shape[0]] + # there are more timestamps recorded by daqami than + # frames recorded by bonsai elif raw_df.shape[0] + 1 < frame_timestamps.shape[0]: - # this should not be possible + # this should not be possible / indicates a serious issue / bonsai crash') raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') return raw_df From 0031a7b01c2887558830e4665f37eef268da4323 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 8 Aug 2025 16:48:28 +0200 Subject: [PATCH 48/80] flake8 --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 2566a786f..6afb13f20 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -96,7 +96,7 @@ def extract_timestamps_from_tdms_file_fast(tdms_filepath: Path, save_path: Optio # chunked loop n_chunks = df.shape[0] // chunk_size for i in range(n_chunks): - vals_ = vals[i * chunk_size : (i + 1) * chunk_size] + vals_ = vals[i * chunk_size: (i + 1) * chunk_size] data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') for j, name in enumerate(digital_channel_names): From 6ed2ba93089179391964c7be7fe2fa1863aad990 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Fri, 8 Aug 2025 16:57:08 +0100 Subject: [PATCH 49/80] bugfix for extractor failure when too few timestamps are between two spacers (wrong segment, probably by session restart) --- ibllib/pipes/neurophotometrics.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 6afb13f20..90fec5637 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -202,9 +202,14 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li for i, timestamps_segment in enumerate(segments): # sync the behaviour events to the photometry timestamps - sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - timestamps_segment, timestamps_bpod, return_indices=True, linear=True - ) + try: + sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( + timestamps_segment, timestamps_bpod, return_indices=True, linear=True + ) + except ValueError: + # this gets raised when there are no timestamps (multiple session restart) + continue + # then we check the alignment, should be less than the camera sampling rate tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod] _logger.info( From 1f8a4cfa48d88d83985d6237e60b5af40aa49530 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Fri, 8 Aug 2025 17:18:53 +0100 Subject: [PATCH 50/80] by default, attempt to load previously extracted timestamps from daq --- ibllib/pipes/neurophotometrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 90fec5637..b0dc255f3 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -308,7 +308,7 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync): priority = 90 job_size = 'small' - def __init__(self, *args, load_timestamps: bool = False, **kwargs): + def __init__(self, *args, load_timestamps: bool = True, **kwargs): super().__init__(*args, **kwargs) self.sync_kwargs = kwargs.get('sync_metadata', self.session_params['sync']) self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel']) From 17688a7d513ec7fa3cdc631188e07394fb54dff0 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 11 Aug 2025 11:51:30 +0200 Subject: [PATCH 51/80] added documentation and cleanups --- ibllib/pipes/neurophotometrics.py | 133 +++++++++++++++++------------- 1 file changed, 74 insertions(+), 59 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 6afb13f20..6019bae6f 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -17,55 +17,62 @@ from iblutil.spacer import Spacer _logger = logging.getLogger('ibllib') -_logger.setLevel(logging.DEBUG) -def extract_timestamps_from_tdms_file(tdms_filepath: Path, save_path: Optional[Path] = None) -> dict: - # extractor for tdms files as written by the daqami software, configured - # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the - # bpod sync signals - _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}') - - tdms_file = TdmsFile.read(tdms_filepath) - groups = tdms_file.groups() - # this unfortunate hack is in here because there are a bunch of sessions where the frameclock is on DI0 - if len(groups) == 1: - has_analog_group = False - (digital_group,) = groups - if len(groups) == 2: - has_analog_group = True - analog_group, digital_group = groups - fs = digital_group.properties['ScanRate'] # this should be 10kHz - df = tdms_file.as_dataframe() - col = df.columns[-1] - vals = df[col].values.astype('int32') - columns = ['DI0', 'DI1', 'DI2', 'DI3'] - - # ugly but basically just a binary decoder for the binary data - # assumes 4 channels - data = np.array([list(bin(v)[2:].zfill(4)[::-1]) for v in vals], dtype='int32') - timestamps = {} - for i, name in enumerate(columns): - timestamps[name] = np.where(np.diff(data[:, i]) == 1)[0] / fs - - if has_analog_group: - # frameclock data is recorded on an analog channel - for channel in analog_group.channels(): - signal = (channel.data > 2.5).astype('int32') # assumes 0-5V - timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs - - if save_path is not None: - _logger.info(f'saving extracted timestamps to: {save_path}') - with open(save_path, 'wb') as fH: - pickle.dump(timestamps, fH) - - return timestamps - - -def extract_timestamps_from_tdms_file_fast(tdms_filepath: Path, save_path: Optional[Path] = None, chunk_size=10000) -> dict: - # extractor for tdms files as written by the daqami software, configured - # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the - # bpod sync signals +def _int2digital_channels(values: np.ndarray) -> np.ndarray: + """decoder for the digital channel values from the tdms file into a channel + based array (rows are temporal samples, columns are channels). + + essentially does: + + 0 -> 0000 + 1 -> 1000 + 2 -> 0100 + 3 -> 1100 + 4 -> 0010 + 5 -> 1010 + 6 -> 0110 + ... + + the order from binary representation is reversed so + columns index represents channel index + + Parameters + ---------- + values : np.ndarray + the input values from the tdms digital channel + + Returns + ------- + np.ndarray + a (n x 4) array + """ + return np.array([list(f'{v:04b}'[::-1]) for v in values], dtype='int8') + + +def extract_timestamps_from_tdms_file( + tdms_filepath: Path, + save_path: Optional[Path] = None, + chunk_size=10000, +) -> dict: + """extractor for tdms files as written by the daqami software, configured for neurophotometrics + experiments: Frameclock is in AI7, DI1-4 are the bpod sync signals + + Parameters + ---------- + tdms_filepath : Path + path to TDMS file + save_path : Optional[Path], optional + if a path, save extracted timestamps from tdms file to this location, by default None + chunk_size : int, optional + if not None, process tdms data in chunks for decreased memory usage, by default 10000 + + Returns + ------- + dict + a dict with the tdms channel names as keys and the timestamps of the rising fronts + """ + # _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}') # this should be 10kHz @@ -93,19 +100,26 @@ def extract_timestamps_from_tdms_file_fast(tdms_filepath: Path, save_path: Optio for ch in digital_channel_names: timestamps[ch] = [] - # chunked loop - n_chunks = df.shape[0] // chunk_size - for i in range(n_chunks): - vals_ = vals[i * chunk_size: (i + 1) * chunk_size] - data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') - + # chunked loop for memory efficiency + if chunk_size is not None: + n_chunks = df.shape[0] // chunk_size + for i in range(n_chunks): + vals_ = vals[i * chunk_size : (i + 1) * chunk_size] + # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') + data = _int2digital_channels(vals_) + + for j, name in enumerate(digital_channel_names): + ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i) + timestamps[name].append(ix / fs) + + for ch in digital_channel_names: + timestamps[ch] = np.concatenate(timestamps[ch]) + else: + data = _int2digital_channels(vals) for j, name in enumerate(digital_channel_names): - ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i) + ix = np.where(np.diff(data[:, j]) == 1)[0] timestamps[name].append(ix / fs) - for ch in digital_channel_names: - timestamps[ch] = np.concatenate(timestamps[ch]) - if has_analog_group: # frameclock data is recorded on an analog channel for channel in analog_group.channels(): @@ -214,8 +228,9 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li ) if len(ix_nph) / len(timestamps_bpod) < 0.95: # wrong segment - print('wrong segment') + _logger.info(f'segment {i} - wrong') continue + _logger.info(f'segment {i} - matched') # TODO the framerate here is hardcoded, infer it instead! assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' @@ -339,7 +354,7 @@ def load_data(self) -> pd.DataFrame: self.timestamps = pickle.load(fH) else: # extract timestamps: tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms' - self.timestamps = extract_timestamps_from_tdms_file_fast(tdms_filepath, save_path=timestamps_filepath) + self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath) # downward compatibility - frameclock moved around, now is back on the AI7 if self.sync_kwargs['frameclock_channel'] in ['0', 0]: From d84473330d5d08b77085cd717d7645b15d7df6f9 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Mon, 11 Aug 2025 13:05:48 +0100 Subject: [PATCH 52/80] crucial bugfix of bpod start time timestamp shift regression that reappeared --- ibllib/pipes/neurophotometrics.py | 54 +++++++++++++++++-------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 0bcc9b018..e6fd1ab61 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -168,7 +168,7 @@ def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]: timestamps_bpod.append( np.array( [ - data['States timestamps'][sync_name][0][0] + data['Trial start timestamp'] + data['States timestamps'][sync_name][0][0] + data['Trial start timestamp'] - data['Bpod start timestamp'] for data in bpod_data if sync_name in data['States timestamps'] ] @@ -373,38 +373,44 @@ def load_data(self) -> pd.DataFrame: # compare number of frame timestamps # and put them in the raw_df SystemTimestamp column # based on the different scenarios + frame_times_adjusted = False # for debugging reasons # they are the same, all is well if raw_df.shape[0] == frame_timestamps.shape[0]: raw_df['SystemTimestamp'] = frame_timestamps - _logger.debug(f'timestamps are of equal size {raw_df.shape[0]}') - - # there is one more timestamp recorded by the daq - # (probably bonsai drops the last incomplete frame) - elif raw_df.shape[0] + 1 == frame_timestamps.shape[0]: - raw_df['SystemTimestamp'] = frame_timestamps[:-1] - _logger.debug('one more timestamp in daq than frames by bonsai') - - # there is one more frame by bonsai that doesn't have - # a timestamp (strange case) - elif raw_df.shape[0] == frame_timestamps.shape[0] + 1: - raw_df = raw_df.iloc[:-1] # dropping the last frame - raw_df['SystemTimestamp'] = frame_timestamps - _logger.debug('one frame in bonsai than timestamps recorded by daq') + _logger.info(f'timestamps are of equal size {raw_df.shape[0]}') + frame_times_adjusted = True - # there are many more frames recorded by bonsai than - # timestamps recorded by daqami + # there are more timestamps recorded by DAQ than + # frames recorded by bonsai + elif raw_df.shape[0] < frame_timestamps.shape[0]: + _logger.info(f'# bonsai frames: {raw_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}') + # there is exactly one more timestamp recorded by the daq + # (probably bonsai drops the last incomplete frame) + if raw_df.shape[0] == frame_timestamps.shape[0] - 1: + raw_df['SystemTimestamp'] = frame_timestamps[:-1] + # there are two more frames recorded by the DAQ than by + # bonsai - this is observed. TODO understand when this happens + elif raw_df.shape[0] == frame_timestamps.shape[0] - 2: + raw_df['SystemTimestamp'] = frame_timestamps[:-2] + # there are more frames recorded by the DAQ than that + # this indicates and issue - + elif raw_df.shape[0] < frame_timestamps.shape[0] - 2: + raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') + frame_times_adjusted = True + + # there are more frames recorded by bonsai than by the DAQ + # this happens when the user stops the daqami recording before stopping the bonsai + # or when daqami crashes elif raw_df.shape[0] > frame_timestamps.shape[0]: - # the daqami was stopped / closed before bonsai - # we discard all frames that can not be mapped + # we drop all excess frames _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') raw_df = raw_df.iloc[: frame_timestamps.shape[0]] + frame_times_adjusted = True + + if not frame_times_adjusted: + raise ValueError('timestamp issue that hasnt been caught') - # there are more timestamps recorded by daqami than - # frames recorded by bonsai - elif raw_df.shape[0] + 1 < frame_timestamps.shape[0]: - # this should not be possible / indicates a serious issue / bonsai crash') - raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') return raw_df def _get_neurophotometrics_timestamps(self) -> np.ndarray: From 3c9a2e5f477b03aacdbfa576f4a093819eb1d666 Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Tue, 12 Aug 2025 09:01:46 +0100 Subject: [PATCH 53/80] added segment checking and validation, spacer detection assertion --- ibllib/pipes/neurophotometrics.py | 35 ++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index e6fd1ab61..1cf0d978a 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -206,6 +206,9 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li case 'safe': spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000) + # verify spacer detection + assert spacer_ix.shape[0] > 0, 'spacer detection failed' + # the indices that mark the boundaries of segments segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]]) segments = [] @@ -214,33 +217,41 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li stop_ix = segment_ix[i + 1] segments.append(timestamps_nph[start_ix:stop_ix]) - for i, timestamps_segment in enumerate(segments): - # sync the behaviour events to the photometry timestamps + def check_segment(timestamps_segment): + # check a segment for matching sync try: sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( timestamps_segment, timestamps_bpod, return_indices=True, linear=True ) except ValueError: # this gets raised when there are no timestamps (multiple session restart) - continue + return False # then we check the alignment, should be less than the camera sampling rate tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod] - _logger.info( - f'sync: n trials {len(bpod_data)}' - f'n bpod sync {len(timestamps_bpod)}' - f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}' - ) + # _logger.info( + # f'sync: n trials {len(bpod_data)}' + # f'n bpod sync {len(timestamps_bpod)}' + # f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}' + # ) if len(ix_nph) / len(timestamps_bpod) < 0.95: # wrong segment - _logger.info(f'segment {i} - wrong') - continue - _logger.info(f'segment {i} - matched') + # _logger.info(f'segment {i} - wrong') + return False + # _logger.info(f'segment {i} - matched') # TODO the framerate here is hardcoded, infer it instead! assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' + return True + + checked_segments = [check_segment(segment) for segment in segments] + assert np.sum(checked_segments) == 1, 'multiple or none segments matched' + timestamps_segment = segments[np.where(checked_segments)[0][0]] - valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] + sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( + timestamps_segment, timestamps_bpod, return_indices=True, linear=True + ) + valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] return sync_nph_to_bpod_fcn, valid_bounds def load_data(self) -> pd.DataFrame: From 2d3a54c4972d1af298f8b13b113365b6e5efe5d1 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Tue, 12 Aug 2025 10:10:22 +0200 Subject: [PATCH 54/80] flake8/autopep8 --- ibllib/pipes/neurophotometrics.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 1cf0d978a..2a166d69a 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -104,7 +104,7 @@ def extract_timestamps_from_tdms_file( if chunk_size is not None: n_chunks = df.shape[0] // chunk_size for i in range(n_chunks): - vals_ = vals[i * chunk_size : (i + 1) * chunk_size] + vals_ = vals[i * chunk_size: (i + 1) * chunk_size] # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') data = _int2digital_channels(vals_) @@ -242,14 +242,14 @@ def check_segment(timestamps_segment): # TODO the framerate here is hardcoded, infer it instead! assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' return True - + checked_segments = [check_segment(segment) for segment in segments] assert np.sum(checked_segments) == 1, 'multiple or none segments matched' timestamps_segment = segments[np.where(checked_segments)[0][0]] sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - timestamps_segment, timestamps_bpod, return_indices=True, linear=True - ) + timestamps_segment, timestamps_bpod, return_indices=True, linear=True + ) valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] return sync_nph_to_bpod_fcn, valid_bounds @@ -384,7 +384,7 @@ def load_data(self) -> pd.DataFrame: # compare number of frame timestamps # and put them in the raw_df SystemTimestamp column # based on the different scenarios - frame_times_adjusted = False # for debugging reasons + frame_times_adjusted = False # for debugging reasons # they are the same, all is well if raw_df.shape[0] == frame_timestamps.shape[0]: @@ -392,7 +392,7 @@ def load_data(self) -> pd.DataFrame: _logger.info(f'timestamps are of equal size {raw_df.shape[0]}') frame_times_adjusted = True - # there are more timestamps recorded by DAQ than + # there are more timestamps recorded by DAQ than # frames recorded by bonsai elif raw_df.shape[0] < frame_timestamps.shape[0]: _logger.info(f'# bonsai frames: {raw_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}') @@ -405,11 +405,11 @@ def load_data(self) -> pd.DataFrame: elif raw_df.shape[0] == frame_timestamps.shape[0] - 2: raw_df['SystemTimestamp'] = frame_timestamps[:-2] # there are more frames recorded by the DAQ than that - # this indicates and issue - + # this indicates and issue - elif raw_df.shape[0] < frame_timestamps.shape[0] - 2: raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') frame_times_adjusted = True - + # there are more frames recorded by bonsai than by the DAQ # this happens when the user stops the daqami recording before stopping the bonsai # or when daqami crashes From dc98dd863b8a61f256bbcd1b6d37915f4cf04dbb Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Wed, 13 Aug 2025 09:45:20 +0100 Subject: [PATCH 55/80] simplification of synchronization scheme, spacer detection obsolete --- ibllib/pipes/neurophotometrics.py | 152 +++++++++++++++++++----------- 1 file changed, 95 insertions(+), 57 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 2a166d69a..32fceb6da 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -185,7 +185,93 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # for daq based syncing, the timestamps are extracted from the tdms file ... - def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, list]: + # def _get_sync_function(self, spacer_detection_mode='fallback') -> Tuple[callable, list]: + # # returns the synchronization function + # # get the timestamps + # timestamps_bpod, bpod_data = self._get_bpod_timestamps() + # timestamps_nph = self._get_neurophotometrics_timestamps() + + # # verify presence of sync timestamps + # for source, timestamps in zip(['bpod', 'neurophotometrics'], [timestamps_bpod, timestamps_nph]): + # assert len(timestamps) > 0, f'{source} sync timestamps are empty' + + # # split into segments if multiple spacers are found + # # attempt to sync for each segment (only one will work) + # spacer = Spacer() + + # def _get_segments(timestamps_nph, spacer_detection_mode): + # segments = [] + + # match spacer_detection_mode: + # case 'fast': + # spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5) + + # case 'safe': + # spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000) + # spacer_ix = np.searchsorted(timestamps_nph, spacer_times) + + # case 'fallback': # first try fast, if fails, try safe + # segments = _get_segments(timestamps_nph, 'fast') + # if len(segments) > 0: + # return segments + # else: + # segments = _get_segments(timestamps_nph, 'safe') + # if len(segments) > 0: + # return segments + # else: + # raise ValueError('spacer detection failed') + + # # the indices that mark the boundaries of segments + # segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]]) + # for i in range(segment_ix.shape[0] - 1): + # start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1 + # stop_ix = segment_ix[i + 1] + # segments.append(timestamps_nph[start_ix:stop_ix]) + + # return segments + + # # verify spacer detection + # segments = _get_segments(timestamps_nph, spacer_detection_mode=spacer_detection_mode) + # assert len(segments) > 0, 'spacer detection failed' + + # def check_segment(timestamps_segment, matching_threshold = .95): + # # check a segment for matching sync + # try: + # sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( + # timestamps_segment, timestamps_bpod, return_indices=True, linear=True + # ) + # except ValueError: + # # this gets raised when there are no timestamps (multiple session restart) + # return False + + # # then we check the alignment, should be less than the camera sampling rate + # tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod] + # # _logger.info( + # # f'sync: n trials {len(bpod_data)}' + # # f'n bpod sync {len(timestamps_bpod)}' + # # f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}' + # # ) + # if len(ix_nph) / len(timestamps_bpod) < matching_threshold: + # # wrong segment + # return False + # # _logger.info(f'segment {i} - matched') + # # TODO the framerate here is hardcoded, infer it instead! + # assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' + # return True + + # checked_segments = [check_segment(segment) for segment in segments] + + # assert np.sum(checked_segments) == 1, f'error in segment matching: matching segments: {np.sum(checked_segments)}' + # timestamps_segment = segments[np.where(checked_segments)[0][0]] + + # sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( + # timestamps_segment, timestamps_bpod, return_indices=True, linear=True + # ) + + # valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] + # return sync_nph_to_bpod_fcn, valid_bounds + + def _get_sync_function(self) -> Tuple[callable, list]: # returns the synchronization function # get the timestamps timestamps_bpod, bpod_data = self._get_bpod_timestamps() @@ -195,61 +281,10 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li for source, timestamps in zip(['bpod', 'neurophotometrics'], [timestamps_bpod, timestamps_nph]): assert len(timestamps) > 0, f'{source} sync timestamps are empty' - # split into segments if multiple spacers are found - # attempt to sync for each segment (only one will work) - spacer = Spacer() - - # the fast way - match spacer_detection_mode: - case 'fast': - spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5) - case 'safe': - spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000) - - # verify spacer detection - assert spacer_ix.shape[0] > 0, 'spacer detection failed' - - # the indices that mark the boundaries of segments - segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]]) - segments = [] - for i in range(segment_ix.shape[0] - 1): - start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1 - stop_ix = segment_ix[i + 1] - segments.append(timestamps_nph[start_ix:stop_ix]) - - def check_segment(timestamps_segment): - # check a segment for matching sync - try: - sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - timestamps_segment, timestamps_bpod, return_indices=True, linear=True - ) - except ValueError: - # this gets raised when there are no timestamps (multiple session restart) - return False - - # then we check the alignment, should be less than the camera sampling rate - tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod] - # _logger.info( - # f'sync: n trials {len(bpod_data)}' - # f'n bpod sync {len(timestamps_bpod)}' - # f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}' - # ) - if len(ix_nph) / len(timestamps_bpod) < 0.95: - # wrong segment - # _logger.info(f'segment {i} - wrong') - return False - # _logger.info(f'segment {i} - matched') - # TODO the framerate here is hardcoded, infer it instead! - assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' - return True - - checked_segments = [check_segment(segment) for segment in segments] - assert np.sum(checked_segments) == 1, 'multiple or none segments matched' - timestamps_segment = segments[np.where(checked_segments)[0][0]] - sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - timestamps_segment, timestamps_bpod, return_indices=True, linear=True + timestamps_nph, timestamps_bpod, return_indices=True, linear=True ) + _logger.info(f"synced with drift: {drift_ppm}") valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] return sync_nph_to_bpod_fcn, valid_bounds @@ -270,8 +305,9 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: raw_df = self.load_data() # 2) get the synchronization function - spacer_detection_mode = kwargs.get('spacer_detection_mode', 'fast') - sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function(spacer_detection_mode=spacer_detection_mode) + # spacer_detection_mode = kwargs.get('spacer_detection_mode', 'fallback') + # sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function(spacer_detection_mode=spacer_detection_mode) + sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function() # 3) convert to ibl_df ibl_df = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df, rois=self.kwargs['fibers'], drop_first=False) @@ -416,7 +452,9 @@ def load_data(self) -> pd.DataFrame: elif raw_df.shape[0] > frame_timestamps.shape[0]: # we drop all excess frames _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') - raw_df = raw_df.iloc[: frame_timestamps.shape[0]] + n_frames_daqami = frame_timestamps.shape[0] + raw_df = raw_df.iloc[:n_frames_daqami] + raw_df.loc[:, 'SystemTimestamp'] = frame_timestamps frame_times_adjusted = True if not frame_times_adjusted: From 5be5de3a94739e90ea0533c7216260765db6976b Mon Sep 17 00:00:00 2001 From: grg2rsr Date: Tue, 2 Sep 2025 11:21:36 +0100 Subject: [PATCH 56/80] parede hotfix for extracting passive sessions in photometry --- ibllib/pipes/dynamic_pipeline.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index c8e6ea119..5ba935e67 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -257,6 +257,9 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non # This may happen that the extractor is tied to a specific sync task: look for TrialsChoiceWorldBpod for example elif hasattr(btasks, extractor + sync_label.capitalize()): task = getattr(btasks, extractor + sync_label.capitalize()) + # Passive sessions can be run in behavior boxes + elif 'passiveChoiceWorld' in protocol: + registration_class = btasks.PassiveRegisterRaw else: # lookup in the project extraction repo if we find an extractor class import projects.extraction_tasks From 26c4338c4ff91a9d90b2571ffc2c86f5837dea52 Mon Sep 17 00:00:00 2001 From: KceniaB Date: Thu, 11 Sep 2025 11:18:02 +0100 Subject: [PATCH 57/80] downward compatibility for kcneias extraction --- ibllib/pipes/neurophotometrics.py | 111 +++++------------------------- 1 file changed, 17 insertions(+), 94 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 32fceb6da..1f9fa9357 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -140,16 +140,22 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask): priority = 90 job_size = 'small' - def __init__(self, session_path, one, **kwargs): + def __init__(self, session_path, one, task_protocol=None, task_collection=None, **kwargs): super().__init__(session_path, one=one, **kwargs) self.photometry_collection = kwargs.get('collection', 'raw_photometry_data') # raw_photometry_data self.kwargs = kwargs + self.task_protocol = task_protocol + self.task_collection = task_collection - # we will work with the first protocol here - for task in self.session_params['tasks']: - self.task_protocol = next(k for k in task) + if self.task_protocol is None: + # we will work with the first protocol here + for task in self.session_params['tasks']: + self.task_protocol = next(k for k in task) + break + + if self.task_collection is None: + # if not provided, infer self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol) - break def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]: # the timestamps for syncing, in the time of the bpod @@ -185,92 +191,6 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # for daq based syncing, the timestamps are extracted from the tdms file ... - # def _get_sync_function(self, spacer_detection_mode='fallback') -> Tuple[callable, list]: - # # returns the synchronization function - # # get the timestamps - # timestamps_bpod, bpod_data = self._get_bpod_timestamps() - # timestamps_nph = self._get_neurophotometrics_timestamps() - - # # verify presence of sync timestamps - # for source, timestamps in zip(['bpod', 'neurophotometrics'], [timestamps_bpod, timestamps_nph]): - # assert len(timestamps) > 0, f'{source} sync timestamps are empty' - - # # split into segments if multiple spacers are found - # # attempt to sync for each segment (only one will work) - # spacer = Spacer() - - # def _get_segments(timestamps_nph, spacer_detection_mode): - # segments = [] - - # match spacer_detection_mode: - # case 'fast': - # spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5) - - # case 'safe': - # spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000) - # spacer_ix = np.searchsorted(timestamps_nph, spacer_times) - - # case 'fallback': # first try fast, if fails, try safe - # segments = _get_segments(timestamps_nph, 'fast') - # if len(segments) > 0: - # return segments - # else: - # segments = _get_segments(timestamps_nph, 'safe') - # if len(segments) > 0: - # return segments - # else: - # raise ValueError('spacer detection failed') - - # # the indices that mark the boundaries of segments - # segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]]) - # for i in range(segment_ix.shape[0] - 1): - # start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1 - # stop_ix = segment_ix[i + 1] - # segments.append(timestamps_nph[start_ix:stop_ix]) - - # return segments - - # # verify spacer detection - # segments = _get_segments(timestamps_nph, spacer_detection_mode=spacer_detection_mode) - # assert len(segments) > 0, 'spacer detection failed' - - # def check_segment(timestamps_segment, matching_threshold = .95): - # # check a segment for matching sync - # try: - # sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - # timestamps_segment, timestamps_bpod, return_indices=True, linear=True - # ) - # except ValueError: - # # this gets raised when there are no timestamps (multiple session restart) - # return False - - # # then we check the alignment, should be less than the camera sampling rate - # tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod] - # # _logger.info( - # # f'sync: n trials {len(bpod_data)}' - # # f'n bpod sync {len(timestamps_bpod)}' - # # f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}' - # # ) - # if len(ix_nph) / len(timestamps_bpod) < matching_threshold: - # # wrong segment - # return False - # # _logger.info(f'segment {i} - matched') - # # TODO the framerate here is hardcoded, infer it instead! - # assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s' - # return True - - # checked_segments = [check_segment(segment) for segment in segments] - - # assert np.sum(checked_segments) == 1, f'error in segment matching: matching segments: {np.sum(checked_segments)}' - # timestamps_segment = segments[np.where(checked_segments)[0][0]] - - # sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - # timestamps_segment, timestamps_bpod, return_indices=True, linear=True - # ) - - # valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] - # return sync_nph_to_bpod_fcn, valid_bounds - def _get_sync_function(self) -> Tuple[callable, list]: # returns the synchronization function # get the timestamps @@ -285,7 +205,8 @@ def _get_sync_function(self) -> Tuple[callable, list]: timestamps_nph, timestamps_bpod, return_indices=True, linear=True ) _logger.info(f"synced with drift: {drift_ppm}") - + # TODO - assertion needed. 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around) + valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] return sync_nph_to_bpod_fcn, valid_bounds @@ -344,7 +265,7 @@ def signature(self): 'input_files': [ ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True), ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), - ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True), + # ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True), ('_neurophotometrics_fpData.digitalIntputs.pqt', self.photometry_collection, True), ], 'output_files': [ @@ -357,7 +278,9 @@ def signature(self): def _get_neurophotometrics_timestamps(self) -> np.ndarray: # for bpod based syncing, the timestamps for syncing are in the digital inputs file raw_photometry_folder = self.session_path / self.photometry_collection - digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt') + digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt' + version = fpio.infer_version_from_digital_inputs_file(digital_inputs_filepath) + digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, version=version) timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']] # TODO replace this rudimentary spacer removal From 8e7dcdf837f42eed89bb578700cd60bbaefc5600 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Sat, 13 Sep 2025 09:05:34 +0100 Subject: [PATCH 58/80] alongside iblphotometry major overhaul - WIP but extracts lauras data, caro left to check --- alyx_task.pkl | Bin 0 -> 2347 bytes ibllib/pipes/neurophotometrics.py | 130 ++++++++++++++++-------------- 2 files changed, 70 insertions(+), 60 deletions(-) create mode 100644 alyx_task.pkl diff --git a/alyx_task.pkl b/alyx_task.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4b328c69c715e72448be87278c916882074ddd60 GIT binary patch literal 2347 zcmcgu-D=!M6n5gD#-^e4hh7v411+SKm5^pe8fhslCM6*?b<^6E(y)k;G`l;sR#G%0 zyL%Dn1I*?0eR|W^=tJ}gdPa_8L*=wiAcDgzBh7cd`S$$%w)H#O-58!{_A_%^mezbq zilm?ki@Ye!Q;!x&?xhiCUR(x@U=oUwQET36c_(VE>qOfqshe&MJ1Z`0QD+s;Gn^g$V9k!wT=&YFJNKJa zTL~?!*{b;pmJc7uyc3U?jc!&#ck8eBzdu^n1$^3evgxE=+k>CYu57Ym$-9NH2is=5 z5~rfF2ODN{(e}eyxb>FV?D)zaY?-@Su0+=KI#Z%h_RzdLY>0}tN|fiD55M~R&%gcv z!OWicRTRDEc?E|f>_!lTGjx9`ijx7cZhMX9 zvJz?>Ccp%buI_LBNRg2OP5lQ<>`N!f@OJP9H;iLQ!wSqowa%x_-51fb%1>Ak6c zC*L80S_(PqLg2>dlNs_n4~2e+364pWGMXebAOt~iZt7CG#PwX5x$j2dLB9#ul|SJP z>+>=K|4X2k0$eJ@2nmvqKq4h6OQTVRph-Wk#nx;O)fl=CbYbBnwC1DBT7MjduGwJb z2LyUHwq_T4Xuleoy_*tt<}!z}Zl76m`(fagJ5^I~Jq!SwKOXN}rIsIGKr$~3k3&4A z@stps#Zkmy0N8c8m93f*eD6TP;45j(UeQ#QD9(Z2@Az5vqKU3EH!)cA_HYimdR@C_ ze#_iF2NIeO9{|7Q=Vtuudqb%zfa{5y3kC$Ux&Me+^8qiO_7Y|hxI?Czxjuv8=8{?8 G_x?ZOxz5P| literal 0 HcmV?d00001 diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 1f9fa9357..35fbf3363 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -2,7 +2,7 @@ from pathlib import Path import numpy as np import pandas as pd -from typing import Tuple, Optional +from typing import Tuple, Optional, List import pickle import ibldsp.utils @@ -13,7 +13,7 @@ from nptdms import TdmsFile from abc import abstractmethod -from iblphotometry import io as fpio +from iblphotometry import fpio from iblutil.spacer import Spacer _logger = logging.getLogger('ibllib') @@ -104,7 +104,7 @@ def extract_timestamps_from_tdms_file( if chunk_size is not None: n_chunks = df.shape[0] // chunk_size for i in range(n_chunks): - vals_ = vals[i * chunk_size: (i + 1) * chunk_size] + vals_ = vals[i * chunk_size : (i + 1) * chunk_size] # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') data = _int2digital_channels(vals_) @@ -134,6 +134,24 @@ def extract_timestamps_from_tdms_file( return timestamps +def extract_timestamps_from_bpod_jsonable(file_jsonable: str | Path, sync_states_names: List[str]): + _, bpod_data = jsonable.load_task_jsonable(file_jsonable) + timestamps = [] + for sync_name in sync_states_names: + timestamps.append( + np.array( + [ + data['States timestamps'][sync_name][0][0] + data['Trial start timestamp'] - data['Bpod start timestamp'] + for data in bpod_data + if sync_name in data['States timestamps'] + ] + ) + ) + timestamps = np.sort(np.concatenate(timestamps)) + timestamps = timestamps[~np.isnan(timestamps)] + return timestamps + + class FibrePhotometryBaseSync(base_tasks.DynamicTask): # base clas for syncing fibre photometry # derived classes are: FibrePhotometryBpodSync and FibrePhotometryDAQSync @@ -164,25 +182,14 @@ def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]: else: sync_states_names = ['trial_start', 'reward', 'exit_state'] - # read in the raw behaviour data for syncing file_jsonable = self.session_path.joinpath(self.task_collection, '_iblrig_taskData.raw.jsonable') - _, bpod_data = jsonable.load_task_jsonable(file_jsonable) + timestamps_bpod = extract_timestamps_from_bpod_jsonable(file_jsonable, sync_states_names) + return timestamps_bpod - # we get the timestamps of the states from the bpod data - timestamps_bpod = [] - for sync_name in sync_states_names: - timestamps_bpod.append( - np.array( - [ - data['States timestamps'][sync_name][0][0] + data['Trial start timestamp'] - data['Bpod start timestamp'] - for data in bpod_data - if sync_name in data['States timestamps'] - ] - ) - ) - timestamps_bpod = np.sort(np.concatenate(timestamps_bpod)) - timestamps_bpod = timestamps_bpod[~np.isnan(timestamps_bpod)] - return timestamps_bpod, bpod_data + def _get_valid_bounds(self): + file_jsonable = self.session_path.joinpath(self.task_collection, '_iblrig_taskData.raw.jsonable') + _, bpod_data = jsonable.load_task_jsonable(file_jsonable) + return [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] @abstractmethod def _get_neurophotometrics_timestamps(self) -> np.ndarray: @@ -194,7 +201,7 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: def _get_sync_function(self) -> Tuple[callable, list]: # returns the synchronization function # get the timestamps - timestamps_bpod, bpod_data = self._get_bpod_timestamps() + timestamps_bpod = self._get_bpod_timestamps() timestamps_nph = self._get_neurophotometrics_timestamps() # verify presence of sync timestamps @@ -204,17 +211,21 @@ def _get_sync_function(self) -> Tuple[callable, list]: sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( timestamps_nph, timestamps_bpod, return_indices=True, linear=True ) - _logger.info(f"synced with drift: {drift_ppm}") + _logger.info(f'synced with drift: {drift_ppm}') # TODO - assertion needed. 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around) - - valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2] + + valid_bounds = self._get_valid_bounds() return sync_nph_to_bpod_fcn, valid_bounds def load_data(self) -> pd.DataFrame: # loads the raw photometry data raw_photometry_folder = self.session_path / self.photometry_collection - raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt') - return raw_neurophotometrics_df + photometry_df = fpio.from_neurophotometrics_file_to_photometry_df( + raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt', + # data_columns=self.kwargs['fibers'], + drop_first=False, + ) + return photometry_df def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # 1) load photometry data @@ -223,27 +234,24 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # will be overridden with the timestamps from the tdms file # the idea behind this is that the rest of the sync is then the same # and handled by this base class - raw_df = self.load_data() + photometry_df = self.load_data() # 2) get the synchronization function - # spacer_detection_mode = kwargs.get('spacer_detection_mode', 'fallback') - # sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function(spacer_detection_mode=spacer_detection_mode) sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function() - # 3) convert to ibl_df - ibl_df = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df, rois=self.kwargs['fibers'], drop_first=False) - # 3) apply synchronization - ibl_df['times'] = sync_nph_to_bpod_fcn(raw_df['SystemTimestamp']) - ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1]) + photometry_df['times'] = sync_nph_to_bpod_fcn(photometry_df['times']) + photometry_df['valid'] = np.logical_and( + photometry_df['times'] >= valid_bounds[0], photometry_df['times'] <= valid_bounds[1] + ) # 4) write to disk output_folder = self.session_path.joinpath('alf', 'photometry') output_folder.mkdir(parents=True, exist_ok=True) # writing the synced photometry signal - ibl_df_outpath = output_folder / 'photometry.signal.pqt' - ibl_df.to_parquet(ibl_df_outpath) + photometry_df_outpath = output_folder / 'photometry.signal.pqt' + photometry_df.to_parquet(photometry_df_outpath) # writing the locations rois = [] @@ -252,7 +260,7 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: locations_df = pd.DataFrame(rois).set_index('ROI') locations_df_outpath = output_folder / 'photometryROI.locations.pqt' locations_df.to_parquet(locations_df_outpath) - return ibl_df_outpath, locations_df_outpath + return photometry_df_outpath, locations_df_outpath class FibrePhotometryBpodSync(FibrePhotometryBaseSync): @@ -266,7 +274,7 @@ def signature(self): ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True), ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), # ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True), - ('_neurophotometrics_fpData.digitalIntputs.pqt', self.photometry_collection, True), + ('_neurophotometrics_fpData.digitalInputs.pqt', self.photometry_collection, True), ], 'output_files': [ ('photometry.signal.pqt', 'alf/photometry', True), @@ -278,10 +286,10 @@ def signature(self): def _get_neurophotometrics_timestamps(self) -> np.ndarray: # for bpod based syncing, the timestamps for syncing are in the digital inputs file raw_photometry_folder = self.session_path / self.photometry_collection - digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt' - version = fpio.infer_version_from_digital_inputs_file(digital_inputs_filepath) - digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, version=version) - timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']] + digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalInputs.pqt' + digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath) + # timestamps_nph = digital_inputs_df['times'].values[digital_inputs_df['channel'] == self.kwargs['sync_channel']] + timestamps_nph = digital_inputs_df.groupby('channel').get_group(self.kwargs['sync_channel'])['times'].values # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods @@ -305,7 +313,7 @@ def signature(self): 'input_files': [ ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True), ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True), - ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True), + # ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True), ('_mcc_DAQdata.raw.tdms', self.photometry_collection, True, True), ], 'output_files': [ @@ -319,7 +327,7 @@ def load_data(self) -> pd.DataFrame: # the point of this functions is to overwrite the SystemTimestamp column # in the ibl_df with the values from the DAQ clock # then syncing will work the same as for the bpod based syncing - raw_df = super().load_data() + photometry_df = super().load_data() # get daqami timestamps # attempt to load @@ -341,49 +349,51 @@ def load_data(self) -> pd.DataFrame: frame_timestamps = self.timestamps[sync_channel_name] # compare number of frame timestamps - # and put them in the raw_df SystemTimestamp column + # and put them in the photometry_df SystemTimestamp column # based on the different scenarios frame_times_adjusted = False # for debugging reasons # they are the same, all is well - if raw_df.shape[0] == frame_timestamps.shape[0]: - raw_df['SystemTimestamp'] = frame_timestamps - _logger.info(f'timestamps are of equal size {raw_df.shape[0]}') + if photometry_df.shape[0] == frame_timestamps.shape[0]: + photometry_df['times'] = frame_timestamps + _logger.info(f'timestamps are of equal size {photometry_df.shape[0]}') frame_times_adjusted = True # there are more timestamps recorded by DAQ than # frames recorded by bonsai - elif raw_df.shape[0] < frame_timestamps.shape[0]: - _logger.info(f'# bonsai frames: {raw_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}') + elif photometry_df.shape[0] < frame_timestamps.shape[0]: + _logger.info(f'# bonsai frames: {photometry_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}') # there is exactly one more timestamp recorded by the daq # (probably bonsai drops the last incomplete frame) - if raw_df.shape[0] == frame_timestamps.shape[0] - 1: - raw_df['SystemTimestamp'] = frame_timestamps[:-1] + if photometry_df.shape[0] == frame_timestamps.shape[0] - 1: + photometry_df['times'] = frame_timestamps[:-1] # there are two more frames recorded by the DAQ than by # bonsai - this is observed. TODO understand when this happens - elif raw_df.shape[0] == frame_timestamps.shape[0] - 2: - raw_df['SystemTimestamp'] = frame_timestamps[:-2] + elif photometry_df.shape[0] == frame_timestamps.shape[0] - 2: + photometry_df['times'] = frame_timestamps[:-2] # there are more frames recorded by the DAQ than that # this indicates and issue - - elif raw_df.shape[0] < frame_timestamps.shape[0] - 2: + elif photometry_df.shape[0] < frame_timestamps.shape[0] - 2: raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') frame_times_adjusted = True # there are more frames recorded by bonsai than by the DAQ # this happens when the user stops the daqami recording before stopping the bonsai # or when daqami crashes - elif raw_df.shape[0] > frame_timestamps.shape[0]: + elif photometry_df.shape[0] > frame_timestamps.shape[0]: # we drop all excess frames - _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess') + _logger.warning( + f'#frames bonsai: {photometry_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess' + ) n_frames_daqami = frame_timestamps.shape[0] - raw_df = raw_df.iloc[:n_frames_daqami] - raw_df.loc[:, 'SystemTimestamp'] = frame_timestamps + photometry_df = photometry_df.iloc[:n_frames_daqami] + photometry_df.loc[:, 'SystemTimestamp'] = frame_timestamps frame_times_adjusted = True if not frame_times_adjusted: raise ValueError('timestamp issue that hasnt been caught') - return raw_df + return photometry_df def _get_neurophotometrics_timestamps(self) -> np.ndarray: # get the sync channel and the corresponding timestamps From dc2f540d0da9ecb451f5171b625cad1b9eafdd36 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 19 Sep 2025 16:40:42 +0100 Subject: [PATCH 59/80] photometry extractor updates --- ibllib/pipes/neurophotometrics.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 35fbf3363..fed940d75 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -16,6 +16,8 @@ from iblphotometry import fpio from iblutil.spacer import Spacer +from one.api import ONE + _logger = logging.getLogger('ibllib') @@ -158,7 +160,14 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask): priority = 90 job_size = 'small' - def __init__(self, session_path, one, task_protocol=None, task_collection=None, **kwargs): + def __init__( + self, + session_path: str | Path, + one: ONE, + task_protocol: str | None = None, + task_collection: str | None = None, + **kwargs, + ): super().__init__(session_path, one=one, **kwargs) self.photometry_collection = kwargs.get('collection', 'raw_photometry_data') # raw_photometry_data self.kwargs = kwargs @@ -211,8 +220,13 @@ def _get_sync_function(self) -> Tuple[callable, list]: sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( timestamps_nph, timestamps_bpod, return_indices=True, linear=True ) - _logger.info(f'synced with drift: {drift_ppm}') - # TODO - assertion needed. 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around) + if np.absolute(drift_ppm) > 20: + _logger.warning(f'sync with excessive drift: {drift_ppm}') + else: + _logger.info(f'synced with drift: {drift_ppm}') + + # assertion: 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around) + assert timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0], 'less than 95% of bpod timestamps matched' valid_bounds = self._get_valid_bounds() return sync_nph_to_bpod_fcn, valid_bounds @@ -267,6 +281,15 @@ class FibrePhotometryBpodSync(FibrePhotometryBaseSync): priority = 90 job_size = 'small' + def __init__( + self, + *args, + digital_inputs_channel: int | None = None, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.digital_inputs_channel = digital_inputs_channel + @property def signature(self): signature = { @@ -287,7 +310,7 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # for bpod based syncing, the timestamps for syncing are in the digital inputs file raw_photometry_folder = self.session_path / self.photometry_collection digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalInputs.pqt' - digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath) + digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, channel=self.kwargs['sync_channel']) # timestamps_nph = digital_inputs_df['times'].values[digital_inputs_df['channel'] == self.kwargs['sync_channel']] timestamps_nph = digital_inputs_df.groupby('channel').get_group(self.kwargs['sync_channel'])['times'].values From 7575c7eacbcecc5e9ed91acc693eb77a10b5d91d Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 24 Sep 2025 13:26:17 +0100 Subject: [PATCH 60/80] added extractor class for passive photometry experiments --- ibllib/pipes/dynamic_pipeline.py | 18 +- ibllib/pipes/neurophotometrics.py | 276 ++++++++++++++++++++++++++++-- 2 files changed, 276 insertions(+), 18 deletions(-) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index 5ba935e67..7ed394e36 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -257,9 +257,6 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non # This may happen that the extractor is tied to a specific sync task: look for TrialsChoiceWorldBpod for example elif hasattr(btasks, extractor + sync_label.capitalize()): task = getattr(btasks, extractor + sync_label.capitalize()) - # Passive sessions can be run in behavior boxes - elif 'passiveChoiceWorld' in protocol: - registration_class = btasks.PassiveRegisterRaw else: # lookup in the project extraction repo if we find an extractor class import projects.extraction_tasks @@ -610,19 +607,30 @@ def make_pipeline(session_path, **pkwargs): if 'neurophotometrics' in devices: # note: devices['neurophotometrics'] is the acquisition_description sync_mode = devices['neurophotometrics'].get('sync_mode', 'bpod') # default to bpod for downward compatibility + + # passive photometry + task_protocols = acquisition_description['tasks'] + assert len(task_protocols) == 1, 'chained protocols are not yet supported for photometry extraction' + protocol = task_protocols[0] + if 'passive' in protocol: + assert sync_mode == 'daqami', 'passive protocol syncing only supported for DAQ based syncing' + tasks['FibrePhotometryPassiveChoiceWorld'] = type( + 'FibrePhotometryPassiveChoiceWorld', (ptasks.FibrePhotometryPassiveChoiceWorld,), {} + )( + **kwargs, + ) + match sync_mode: case 'bpod': # for synchronization with the BNC inputs of the neurophotometrics receiving the sync pulses # from the individual bpods tasks['FibrePhotometryBpodSync'] = type('FibrePhotometryBpodSync', (ptasks.FibrePhotometryBpodSync,), {})( - **devices['neurophotometrics'], **kwargs, ) case 'daqami': # for synchronization with the DAQami receiving the sync pulses from the individual bpods # as well as the frame clock from the FP3002 tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})( - **devices['neurophotometrics'], **kwargs, ) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index fed940d75..0b19ce226 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -14,9 +14,11 @@ from abc import abstractmethod from iblphotometry import fpio -from iblutil.spacer import Spacer +from iblrig_tasks import _iblrig_tasks_passiveChoiceWorld from one.api import ONE +import json +from scipy.optimize import minimize _logger = logging.getLogger('ibllib') @@ -56,6 +58,7 @@ def extract_timestamps_from_tdms_file( tdms_filepath: Path, save_path: Optional[Path] = None, chunk_size=10000, + extract_durations: bool = False, ) -> dict: """extractor for tdms files as written by the daqami software, configured for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the bpod sync signals @@ -106,7 +109,7 @@ def extract_timestamps_from_tdms_file( if chunk_size is not None: n_chunks = df.shape[0] // chunk_size for i in range(n_chunks): - vals_ = vals[i * chunk_size : (i + 1) * chunk_size] + vals_ = vals[i * chunk_size: (i + 1) * chunk_size] # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') data = _int2digital_channels(vals_) @@ -136,6 +139,84 @@ def extract_timestamps_from_tdms_file( return timestamps +def extract_ttl_durations_from_tdms_file( + tdms_filepath: Path, + save_path: Optional[Path] = None, + chunk_size=10000, +) -> dict: + _logger.info(f'extracting ttl_durations from tdms file: {tdms_filepath}') + + # this should be 10kHz + tdms_file = TdmsFile.read(tdms_filepath) + groups = tdms_file.groups() + + # this unfortunate hack is in here because there are a bunch of sessions + # where the frameclock is on DI0 + if len(groups) == 1: + has_analog_group = False + (digital_group,) = groups + if len(groups) == 2: + has_analog_group = True + analog_group, digital_group = groups + fs = digital_group.properties['ScanRate'] # this should be 10kHz + df = tdms_file.as_dataframe() + + # inferring digital col name + (digital_col,) = [col for col in df.columns if 'Digital' in col] + vals = df[digital_col].values.astype('int8') + digital_channel_names = ['DI0', 'DI1', 'DI2', 'DI3'] + + # ini + timestamps = {} + for ch in digital_channel_names: + timestamps[ch] = dict(positive=[], negative=[]) + + # chunked loop for memory efficiency + if chunk_size is not None: + n_chunks = df.shape[0] // chunk_size + for i in range(n_chunks): + vals_ = vals[i * chunk_size: (i + 1) * chunk_size] + # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') + data = _int2digital_channels(vals_) + + for j, name in enumerate(digital_channel_names): + ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i) + timestamps[name]['positive'].append(ix / fs) + ix = np.where(np.diff(data[:, j]) == -1)[0] + (chunk_size * i) + timestamps[name]['negative'].append(ix / fs) + + for ch in digital_channel_names: + timestamps[ch]['positive'] = np.concatenate(timestamps[ch]['positive']) + timestamps[ch]['negative'] = np.concatenate(timestamps[ch]['negative']) + else: + data = _int2digital_channels(vals) + for j, name in enumerate(digital_channel_names): + ix = np.where(np.diff(data[:, j]) == 1)[0] + timestamps[name]['positive'].append(ix / fs) + ix = np.where(np.diff(data[:, j]) == -1)[0] + timestamps[name]['negative'].append(ix / fs) + + if has_analog_group: + # frameclock data is recorded on an analog channel + for channel in analog_group.channels(): + timestamps[channel.name] = {} + signal = (channel.data > 2.5).astype('int32') # assumes 0-5V + timestamps[channel.name]['positive'] = np.where(np.diff(signal) == 1)[0] / fs + timestamps[channel.name]['negative'] = np.where(np.diff(signal) == -1)[0] / fs + + # the actual diff + durations = {} + for channel in timestamps.keys(): + durations[channel] = timestamps[channel]['negative'] - timestamps[channel]['positive'] + + if save_path is not None: + _logger.info(f'saving extracted ttl durations to: {save_path}') + with open(save_path, 'wb') as fH: + pickle.dump(durations, fH) + + return durations + + def extract_timestamps_from_bpod_jsonable(file_jsonable: str | Path, sync_states_names: List[str]): _, bpod_data = jsonable.load_task_jsonable(file_jsonable) timestamps = [] @@ -184,7 +265,7 @@ def __init__( # if not provided, infer self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol) - def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]: + def _get_bpod_timestamps(self) -> np.ndarray: # the timestamps for syncing, in the time of the bpod if 'habituation' in self.task_protocol: sync_states_names = ['iti', 'reward'] @@ -236,7 +317,6 @@ def load_data(self) -> pd.DataFrame: raw_photometry_folder = self.session_path / self.photometry_collection photometry_df = fpio.from_neurophotometrics_file_to_photometry_df( raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt', - # data_columns=self.kwargs['fibers'], drop_first=False, ) return photometry_df @@ -264,17 +344,19 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: output_folder.mkdir(parents=True, exist_ok=True) # writing the synced photometry signal - photometry_df_outpath = output_folder / 'photometry.signal.pqt' - photometry_df.to_parquet(photometry_df_outpath) + photometry_filepath = self.session_path / 'alf' / 'photometry' / 'photometry.signal.pqt' + photometry_filepath.parent.mkdir(parents=True, exist_ok=True) + photometry_df.to_parquet(photometry_filepath) # writing the locations rois = [] - for k, v in self.kwargs['fibers'].items(): + for k, v in self.session_params['devices']['neurophotometrics']['fibers'].items(): rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']}) locations_df = pd.DataFrame(rois).set_index('ROI') - locations_df_outpath = output_folder / 'photometryROI.locations.pqt' - locations_df.to_parquet(locations_df_outpath) - return photometry_df_outpath, locations_df_outpath + locations_filepath = self.session_path / 'alf' / 'photometry' / 'photometryROI.locations.pqt' + locations_filepath.parent.mkdir(parents=True, exist_ok=True) + locations_df.to_parquet(locations_filepath) + return photometry_filepath, locations_filepath class FibrePhotometryBpodSync(FibrePhotometryBaseSync): @@ -310,9 +392,11 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # for bpod based syncing, the timestamps for syncing are in the digital inputs file raw_photometry_folder = self.session_path / self.photometry_collection digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalInputs.pqt' - digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, channel=self.kwargs['sync_channel']) - # timestamps_nph = digital_inputs_df['times'].values[digital_inputs_df['channel'] == self.kwargs['sync_channel']] - timestamps_nph = digital_inputs_df.groupby('channel').get_group(self.kwargs['sync_channel'])['times'].values + digital_inputs_df = fpio.read_digital_inputs_file( + digital_inputs_filepath, channel=self.session_params['devices']['neurophotometrics']['sync_channel'] + ) + sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel'] + timestamps_nph = digital_inputs_df.groupby('channel').get_group(sync_channel)['times'].values # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods @@ -426,3 +510,169 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # to implement: detect spacer / remove spacer methods # timestamps_nph = timestamps_nph[15: ] return timestamps_nph + + +class FibrePhotometryPassiveChoiceWorld(base_tasks.BehaviourTask): + priority = 90 + job_size = 'small' + + def __init__( + self, + session_path: str | Path, + one: ONE, + load_timestamps: bool = True, + **kwargs, + ): + super().__init__(session_path, one=one, **kwargs) + self.photometry_collection = kwargs.get('collection', 'raw_photometry_data') + self.kwargs = kwargs + self.load_timestamps = load_timestamps + + def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: + # load the fixtures - from the relative delays between trials, an "absolute" time vector is + # created that is used for the synchronization + fixtures_path = Path(_iblrig_tasks_passiveChoiceWorld.__file__).parent / 'passiveChoiceWorld_trials_fixtures.pqt' + + # getting the task_settings + with open(self.session_path / self.collection / '_iblrig_taskSettings.raw.json', 'r') as fH: + task_settings = json.load(fH) + + # getting the fixtures and creating a relative time vector + fixtures_df = pd.read_parquet(fixtures_path).groupby('session_id').get_group(task_settings['SESSION_TEMPLATE_ID']) + + # stimulus durations + stim_durations = dict( + T=task_settings['GO_TONE_DURATION'], + N=task_settings['WHITE_NOISE_DURATION'], + G=0.3, # visual stimulus duration is hardcoded to 300ms + V=0.1, # V=0.1102 from a a session # to be replaced later down + ) + for s in fixtures_df['stim_type'].unique(): + fixtures_df.loc[fixtures_df['stim_type'] == s, 'delay'] = stim_durations[s] + + # the audio go cue times + mic_go_cue_times_bpod = np.load(self.session_path / self.collection / '_iblmic_audioOnsetGoCue.times_mic.npy') + + # adding the delays + def obj_fun(x, mic_go_cue_times_bpod, fixtures_df): + # fit overhead + for s in ['T', 'N', 'G', 'V']: + if s == 'T' or s == 'N': + fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = x[0] + if s == 'G': + fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = x[1] + if s == 'V': + fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = x[2] + + fixtures_df['t_rel'] = np.cumsum( + fixtures_df['stim_delay'].values + np.roll(fixtures_df['delay'].values, 1) + fixtures_df['overhead'].values, + ) + + mic_go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values + err = np.sum((np.diff(mic_go_cue_times_rel) - np.diff(mic_go_cue_times_bpod)) ** 2) + return err + + # fitting the overheads + fixtures_df['overhead'] = 0.0 + bounds = ((0, np.inf), (0, np.inf), (0, np.inf)) + pfit = minimize(obj_fun, (0.0, 0.0, 0.0), args=(mic_go_cue_times_bpod, fixtures_df), bounds=bounds) + overheads = dict(zip(['T', 'N', 'G', 'V'], [pfit.x[0], pfit.x[0], pfit.x[1], pfit.x[2]])) + + for s in fixtures_df['stim_type'].unique(): + fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = overheads[s] + fixtures_df['t_rel'] = np.cumsum( + fixtures_df['stim_delay'].values + np.roll(fixtures_df['delay'].values, 1) + fixtures_df['overhead'].values + ) + + mic_go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values + + sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( + mic_go_cue_times_rel, mic_go_cue_times_bpod, return_indices=True, linear=True + ) + + assert ix_nph.shape[0] == 40, 'not all microphone onset events are accepted by the sync function' + if np.absolute(drift_ppm) > 20: + _logger.warning(f'sync with excessive drift: {drift_ppm}') + else: + _logger.info(f'synced with drift: {drift_ppm}') + + # applying the sync to all the timestamps in the fixtures + fixtures_df['t_bpod'] = sync_fun(fixtures_df['t_rel']) + + # dealing with the valve + # valve_times_rel = fixtures_df.groupby('stim_type').get_group('V')['t_rel'].values + # valve_times_bpod = sync_fun(valve_times_rel) + valve_times_bpod = fixtures_df.groupby('stim_type').get_group('V')['t_bpod'].values + + # getting the valve timestamps from the DAQ + timestamps_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.pkl' + if self.load_timestamps and timestamps_filepath.exists(): + with open(timestamps_filepath, 'rb') as fH: + self.timestamps = pickle.load(fH) + else: # extract timestamps: + tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms' + self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath) + + sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel'] + valve_times_nph = self.timestamps[f'DI{sync_channel}'] + + sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( + valve_times_nph, valve_times_bpod, return_indices=True, linear=True + ) + assert ix_bpod.shape[0] == 40, 'not all bpod valve onset events are accepted by the sync function' + if np.absolute(drift_ppm) > 20: + _logger.warning(f'sync with excessive drift: {drift_ppm}') + else: + _logger.info(f'synced with drift: {drift_ppm}') + + # loads the raw photometry data + raw_photometry_folder = self.session_path / self.photometry_collection + photometry_df = fpio.from_neurophotometrics_file_to_photometry_df( + raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt', + drop_first=False, + ) + # apply synchronization + photometry_df['times'] = sync_fun(photometry_df['times']) + # verify that all are valid (i.e. mean nothing ... ) + + # write to disk + # the synced photometry signal + photometry_filepath = self.session_path / 'alf' / 'photometry' / 'photometry.signal.pqt' + photometry_filepath.parent.mkdir(parents=True, exist_ok=True) + photometry_df.to_parquet(photometry_filepath) + + # writing the locations + rois = [] + for k, v in self.session_params['devices']['neurophotometrics']['fibers'].items(): + rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']}) + locations_df = pd.DataFrame(rois).set_index('ROI') + locations_filepath = self.session_path / 'alf' / 'photometry' / 'photometryROI.locations.pqt' + locations_filepath.parent.mkdir(parents=True, exist_ok=True) + locations_df.to_parquet(locations_filepath) + + # writing the passive events table + # get the valve open duration + ttl_durations_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdurations.pkl' + if self.load_timestamps and ttl_durations_filepath.exists(): + with open(ttl_durations_filepath, 'rb') as fH: + ttl_durations = pickle.load(fH) + else: # extract timestamps: + tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms' + ttl_durations = extract_ttl_durations_from_tdms_file(tdms_filepath, save_path=ttl_durations_filepath) + + valve_open_dur = np.median(ttl_durations[f'DI{sync_channel}'][ix_nph]) + passiveStims_df = pd.DataFrame( + dict( + valveOn=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'], + valveOff=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'] + valve_open_dur, + toneOn=fixtures_df.groupby('stim_type').get_group('T')['t_bpod'], + toneOff=fixtures_df.groupby('stim_type').get_group('T')['t_bpod'] + task_settings['GO_TONE_DURATION'], + noiseOn=fixtures_df.groupby('stim_type').get_group('N')['t_bpod'], + noiseOff=fixtures_df.groupby('stim_type').get_group('N')['t_bpod'] + task_settings['WHITE_NOISE_DURATION'], + ) + ) + passiveStims_filepath = self.session_path / 'alf' / self.collection / '_ibl_passiveStims.table.pqt' + passiveStims_filepath.parent.mkdir(exist_ok=True, parents=True) + passiveStims_df.reset_index().to_parquet(passiveStims_filepath) + + return photometry_filepath, locations_filepath, passiveStims_filepath From af33db373ff08685fabb82038f5049b7a23be152 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 24 Sep 2025 14:31:59 +0100 Subject: [PATCH 61/80] un-ruffing --- ibllib/pipes/dynamic_pipeline.py | 159 ++++++++++++++----------------- 1 file changed, 71 insertions(+), 88 deletions(-) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index 7ed394e36..2f3acd44a 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -20,7 +20,6 @@ :class:`ibllib.io.extractors.base.BaseBpodTrialsExtractor` class, and located in either the personal projects repo or in :py:mod:`ibllib.io.extractors.bpod_trials` module. """ - import logging import re from fnmatch import fnmatch @@ -72,7 +71,7 @@ def acquisition_description_legacy_session(session_path, save=False): def get_acquisition_description(protocol): - """ " + """" This is a set of example acquisition descriptions for experiments - choice_world_recording - choice_world_biased @@ -81,7 +80,7 @@ def get_acquisition_description(protocol): - choice_world_passive That are part of the IBL pipeline """ - if 'ephys' in protocol: # canonical ephys + if 'ephys' in protocol: # canonical ephys devices = { 'cameras': { 'right': {'collection': 'raw_video_data', 'sync_label': 'audio'}, @@ -90,32 +89,38 @@ def get_acquisition_description(protocol): }, 'neuropixel': { 'probe00': {'collection': 'raw_ephys_data/probe00', 'sync_label': 'imec_sync'}, - 'probe01': {'collection': 'raw_ephys_data/probe01', 'sync_label': 'imec_sync'}, + 'probe01': {'collection': 'raw_ephys_data/probe01', 'sync_label': 'imec_sync'} + }, + 'microphone': { + 'microphone': {'collection': 'raw_behavior_data', 'sync_label': None} }, - 'microphone': {'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}}, } acquisition_description = { # this is the current ephys pipeline description 'devices': devices, 'tasks': [ {'ephysChoiceWorld': {'collection': 'raw_behavior_data', 'sync_label': 'bpod'}}, - {'passiveChoiceWorld': {'collection': 'raw_passive_data', 'sync_label': 'bpod'}}, + {'passiveChoiceWorld': {'collection': 'raw_passive_data', 'sync_label': 'bpod'}} ], - 'sync': {'nidq': {'collection': 'raw_ephys_data', 'extension': 'bin', 'acquisition_software': 'spikeglx'}}, + 'sync': { + 'nidq': {'collection': 'raw_ephys_data', 'extension': 'bin', 'acquisition_software': 'spikeglx'} + }, 'procedures': ['Ephys recording with acute probe(s)'], - 'projects': ['ibl_neuropixel_brainwide_01'], + 'projects': ['ibl_neuropixel_brainwide_01'] } else: devices = { 'cameras': { 'left': {'collection': 'raw_video_data', 'sync_label': 'audio'}, }, - 'microphone': {'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}}, + 'microphone': { + 'microphone': {'collection': 'raw_behavior_data', 'sync_label': None} + }, } acquisition_description = { # this is the current ephys pipeline description 'devices': devices, 'sync': {'bpod': {'collection': 'raw_behavior_data'}}, 'procedures': ['Behavior training/tasks'], - 'projects': ['ibl_neuropixel_brainwide_01'], + 'projects': ['ibl_neuropixel_brainwide_01'] } if 'biased' in protocol: key = 'biasedChoiceWorld' @@ -125,7 +130,10 @@ def get_acquisition_description(protocol): key = 'habituationChoiceWorld' else: raise ValueError(f'Unknown protocol "{protocol}"') - acquisition_description['tasks'] = [{key: {'collection': 'raw_behavior_data', 'sync_label': 'bpod'}}] + acquisition_description['tasks'] = [{key: { + 'collection': 'raw_behavior_data', + 'sync_label': 'bpod' + }}] acquisition_description['version'] = '1.0.0' return acquisition_description @@ -216,7 +224,7 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non kwargs = {'session_path': session_path, 'one': one} # Syncing tasks - ((sync, sync_args),) = acquisition_description['sync'].items() + (sync, sync_args), = acquisition_description['sync'].items() sync_label = _sync_label(sync, **sync_args) # get the format of the DAQ data. This informs the extractor task sync_args['sync_collection'] = sync_args.pop('collection') # rename the key so it matches task run arguments sync_args['sync_ext'] = sync_args.pop('extension', None) @@ -260,16 +268,15 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non else: # lookup in the project extraction repo if we find an extractor class import projects.extraction_tasks - if hasattr(projects.extraction_tasks, extractor): task = getattr(projects.extraction_tasks, extractor) elif hasattr(projects.extraction_tasks, extractor + sync_label.capitalize()): task = getattr(btasks, extractor + sync_label.capitalize()) else: raise NotImplementedError( - f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects' - ) - _logger.debug('%s (protocol #%i, task #%i) = %s.%s', protocol, i, j, task.__module__, task.__name__) + f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects') + _logger.debug('%s (protocol #%i, task #%i) = %s.%s', + protocol, i, j, task.__module__, task.__name__) # Rename the class to something more informative task_name = f'{task.__name__}_{i:02}' if not (task.__name__.startswith('TrainingStatus') or task.__name__.endswith('RegisterRaw')): @@ -307,16 +314,13 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non raise NotImplementedError(f'No trials task available for sync namespace "{sync_label}"') compute_status = True tasks[f'RegisterRaw_{protocol}_{i:02}'] = type(f'RegisterRaw_{protocol}_{i:02}', (registration_class,), {})( - **kwargs, **task_kwargs - ) + **kwargs, **task_kwargs) parents = [tasks[f'RegisterRaw_{protocol}_{i:02}']] + sync_tasks tasks[f'Trials_{protocol}_{i:02}'] = type(f'Trials_{protocol}_{i:02}', (behaviour_class,), {})( - **kwargs, **sync_kwargs, **task_kwargs, parents=parents - ) + **kwargs, **sync_kwargs, **task_kwargs, parents=parents) if compute_status: - tasks[f'TrainingStatus_{protocol}_{i:02}'] = type( - f'TrainingStatus_{protocol}_{i:02}', (btasks.TrainingStatus,), {} - )(**kwargs, **task_kwargs, parents=[tasks[f'Trials_{protocol}_{i:02}']]) + tasks[f'TrainingStatus_{protocol}_{i:02}'] = type(f'TrainingStatus_{protocol}_{i:02}', ( + btasks.TrainingStatus,), {})(**kwargs, **task_kwargs, parents=[tasks[f'Trials_{protocol}_{i:02}']]) return tasks @@ -407,12 +411,11 @@ def make_pipeline(session_path, **pkwargs): kwargs = {'session_path': session_path, 'one': pkwargs.get('one')} # Registers the experiment description file - tasks['ExperimentDescriptionRegisterRaw'] = type( - 'ExperimentDescriptionRegisterRaw', (bstasks.ExperimentDescriptionRegisterRaw,), {} - )(**kwargs) + tasks['ExperimentDescriptionRegisterRaw'] = type('ExperimentDescriptionRegisterRaw', + (bstasks.ExperimentDescriptionRegisterRaw,), {})(**kwargs) # Syncing tasks - ((sync, sync_args),) = acquisition_description['sync'].items() + (sync, sync_args), = acquisition_description['sync'].items() sync_args = sync_args.copy() # ensure acquisition_description unchanged sync_label = _sync_label(sync, **sync_args) # get the format of the DAQ data. This informs the extractor task sync_args['sync_collection'] = sync_args.pop('collection') # rename the key so it matches task run arguments @@ -423,16 +426,14 @@ def make_pipeline(session_path, **pkwargs): if sync_label == 'nidq' and sync_args['sync_collection'] == 'raw_ephys_data': tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (etasks.EphysSyncRegisterRaw,), {})(**kwargs, **sync_kwargs) tasks[f'SyncPulses_{sync}'] = type(f'SyncPulses_{sync}', (etasks.EphysSyncPulses,), {})( - **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']] - ) + **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']]) sync_tasks = [tasks[f'SyncPulses_{sync}']] elif sync_label == 'timeline': tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncRegisterRaw,), {})(**kwargs, **sync_kwargs) elif sync_label == 'nidq': tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncMtscomp,), {})(**kwargs, **sync_kwargs) tasks[f'SyncPulses_{sync}'] = type(f'SyncPulses_{sync}', (stasks.SyncPulses,), {})( - **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']] - ) + **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']]) sync_tasks = [tasks[f'SyncPulses_{sync}']] elif sync_label == 'tdms': tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncRegisterRaw,), {})(**kwargs, **sync_kwargs) @@ -440,7 +441,9 @@ def make_pipeline(session_path, **pkwargs): pass # ATM we don't have anything for this; it may not be needed in the future # Behavior tasks - tasks.update(_get_trials_tasks(session_path, acquisition_description, sync_tasks=sync_tasks, one=pkwargs.get('one'))) + tasks.update( + _get_trials_tasks(session_path, acquisition_description, sync_tasks=sync_tasks, one=pkwargs.get('one')) + ) # Ephys tasks if 'neuropixel' in devices: @@ -460,46 +463,38 @@ def make_pipeline(session_path, **pkwargs): if (nptype == 'NP2.1') or (nptype == 'NP2.4' and nshanks == 1): tasks[f'EphyCompressNP21_{pname}'] = type(f'EphyCompressNP21_{pname}', (etasks.EphysCompressNP21,), {})( - **kwargs, **ephys_kwargs, pname=pname - ) + **kwargs, **ephys_kwargs, pname=pname) all_probes.append(pname) register_tasks.append(tasks[f'EphyCompressNP21_{pname}']) elif nptype == 'NP2.4' and nshanks > 1: tasks[f'EphyCompressNP24_{pname}'] = type(f'EphyCompressNP24_{pname}', (etasks.EphysCompressNP24,), {})( - **kwargs, **ephys_kwargs, pname=pname, nshanks=nshanks - ) + **kwargs, **ephys_kwargs, pname=pname, nshanks=nshanks) register_tasks.append(tasks[f'EphyCompressNP24_{pname}']) all_probes += [f'{pname}{chr(97 + int(shank))}' for shank in range(nshanks)] else: tasks[f'EphysCompressNP1_{pname}'] = type(f'EphyCompressNP1_{pname}', (etasks.EphysCompressNP1,), {})( - **kwargs, **ephys_kwargs, pname=pname - ) + **kwargs, **ephys_kwargs, pname=pname) register_tasks.append(tasks[f'EphysCompressNP1_{pname}']) all_probes.append(pname) if nptype == '3A': tasks['EphysPulses'] = type('EphysPulses', (etasks.EphysPulses,), {})( - **kwargs, **ephys_kwargs, **sync_kwargs, pname=all_probes, parents=register_tasks + sync_tasks - ) + **kwargs, **ephys_kwargs, **sync_kwargs, pname=all_probes, parents=register_tasks + sync_tasks) for pname in all_probes: register_task = [reg_task for reg_task in register_tasks if pname[:7] in reg_task.name] if nptype != '3A': tasks[f'EphysPulses_{pname}'] = type(f'EphysPulses_{pname}', (etasks.EphysPulses,), {})( - **kwargs, **ephys_kwargs, **sync_kwargs, pname=[pname], parents=register_task + sync_tasks - ) + **kwargs, **ephys_kwargs, **sync_kwargs, pname=[pname], parents=register_task + sync_tasks) tasks[f'Spikesorting_{pname}'] = type(f'Spikesorting_{pname}', (etasks.SpikeSorting,), {})( - **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'EphysPulses_{pname}']] - ) + **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'EphysPulses_{pname}']]) else: tasks[f'Spikesorting_{pname}'] = type(f'Spikesorting_{pname}', (etasks.SpikeSorting,), {})( - **kwargs, **ephys_kwargs, pname=pname, parents=[tasks['EphysPulses']] - ) + **kwargs, **ephys_kwargs, pname=pname, parents=[tasks['EphysPulses']]) tasks[f'RawEphysQC_{pname}'] = type(f'RawEphysQC_{pname}', (etasks.RawEphysQC,), {})( - **kwargs, **ephys_kwargs, pname=pname, parents=register_task - ) + **kwargs, **ephys_kwargs, pname=pname, parents=register_task) # Video tasks if 'cameras' in devices: @@ -513,33 +508,35 @@ def make_pipeline(session_path, **pkwargs): tasks[tn] = type((tn := 'VideoConvert'), (vtasks.VideoConvert,), {})(**kwargs, **video_kwargs) dlc_parent_task = tasks['VideoConvert'] tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcCamlog,), {})( - **kwargs, **video_kwargs, **sync_kwargs - ) + **kwargs, **video_kwargs, **sync_kwargs) else: - tasks[tn] = type((tn := 'VideoRegisterRaw'), (vtasks.VideoRegisterRaw,), {})(**kwargs, **video_kwargs) - tasks[tn] = type((tn := 'VideoCompress'), (vtasks.VideoCompress,), {})(**kwargs, **video_kwargs, **sync_kwargs) + tasks[tn] = type((tn := 'VideoRegisterRaw'), (vtasks.VideoRegisterRaw,), {})( + **kwargs, **video_kwargs) + tasks[tn] = type((tn := 'VideoCompress'), (vtasks.VideoCompress,), {})( + **kwargs, **video_kwargs, **sync_kwargs) dlc_parent_task = tasks['VideoCompress'] if sync == 'bpod': tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcBpod,), {})( - **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] - ) + **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']]) elif sync == 'nidq': # Here we restrict to videos that we support (left, right or body) video_kwargs['cameras'] = subset_cams tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcNidq,), {})( - **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + sync_tasks - ) + **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + sync_tasks) if sync_kwargs['sync'] != 'bpod': # Here we restrict to videos that we support (left, right or body) # Currently there is no plan to run DLC on the belly cam subset_cams = [c for c in cams if c in ('left', 'right', 'body')] video_kwargs['cameras'] = subset_cams - tasks[tn] = type((tn := 'DLC'), (vtasks.DLC,), {})(**kwargs, **video_kwargs, parents=[dlc_parent_task]) + tasks[tn] = type((tn := 'DLC'), (vtasks.DLC,), {})( + **kwargs, **video_kwargs, parents=[dlc_parent_task]) # The PostDLC plots require a trials object for QC # Find the first task that outputs a trials.table dataset - trials_task = (t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', []))) + trials_task = ( + t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', [])) + ) if trials_task := next(trials_task, None): parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}'], trials_task] trials_collection = getattr(trials_task, 'output_collection', 'alf') @@ -547,62 +544,48 @@ def make_pipeline(session_path, **pkwargs): parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}']] trials_collection = 'alf' tasks[tn] = type((tn := 'PostDLC'), (vtasks.EphysPostDLC,), {})( - **kwargs, cameras=subset_cams, trials_collection=trials_collection, parents=parents - ) + **kwargs, cameras=subset_cams, trials_collection=trials_collection, parents=parents) # Audio tasks if 'microphone' in devices: - ((microphone, micro_kwargs),) = devices['microphone'].items() + (microphone, micro_kwargs), = devices['microphone'].items() micro_kwargs['device_collection'] = micro_kwargs.pop('collection') if sync_kwargs['sync'] == 'bpod': tasks['AudioRegisterRaw'] = type('AudioRegisterRaw', (atasks.AudioSync,), {})( - **kwargs, **sync_kwargs, **micro_kwargs, collection=micro_kwargs['device_collection'] - ) + **kwargs, **sync_kwargs, **micro_kwargs, collection=micro_kwargs['device_collection']) elif sync_kwargs['sync'] == 'nidq': tasks['AudioRegisterRaw'] = type('AudioRegisterRaw', (atasks.AudioCompress,), {})(**kwargs, **micro_kwargs) # Widefield tasks if 'widefield' in devices: - ((_, wfield_kwargs),) = devices['widefield'].items() + (_, wfield_kwargs), = devices['widefield'].items() wfield_kwargs['device_collection'] = wfield_kwargs.pop('collection') tasks['WideFieldRegisterRaw'] = type('WidefieldRegisterRaw', (wtasks.WidefieldRegisterRaw,), {})( - **kwargs, **wfield_kwargs - ) + **kwargs, **wfield_kwargs) tasks['WidefieldCompress'] = type('WidefieldCompress', (wtasks.WidefieldCompress,), {})( - **kwargs, **wfield_kwargs, parents=[tasks['WideFieldRegisterRaw']] - ) + **kwargs, **wfield_kwargs, parents=[tasks['WideFieldRegisterRaw']]) tasks['WidefieldPreprocess'] = type('WidefieldPreprocess', (wtasks.WidefieldPreprocess,), {})( - **kwargs, **wfield_kwargs, parents=[tasks['WidefieldCompress']] - ) + **kwargs, **wfield_kwargs, parents=[tasks['WidefieldCompress']]) tasks['WidefieldSync'] = type('WidefieldSync', (wtasks.WidefieldSync,), {})( - **kwargs, - **wfield_kwargs, - **sync_kwargs, - parents=[tasks['WideFieldRegisterRaw'], tasks['WidefieldCompress']] + sync_tasks, - ) + **kwargs, **wfield_kwargs, **sync_kwargs, + parents=[tasks['WideFieldRegisterRaw'], tasks['WidefieldCompress']] + sync_tasks) tasks['WidefieldFOV'] = type('WidefieldFOV', (wtasks.WidefieldFOV,), {})( - **kwargs, **wfield_kwargs, parents=[tasks['WidefieldPreprocess']] - ) + **kwargs, **wfield_kwargs, parents=[tasks['WidefieldPreprocess']]) # Mesoscope tasks if 'mesoscope' in devices: - ((_, mscope_kwargs),) = devices['mesoscope'].items() + (_, mscope_kwargs), = devices['mesoscope'].items() mscope_kwargs['device_collection'] = mscope_kwargs.pop('collection') tasks['MesoscopeRegisterSnapshots'] = type('MesoscopeRegisterSnapshots', (mscope_tasks.MesoscopeRegisterSnapshots,), {})( - **kwargs, **mscope_kwargs - ) + **kwargs, **mscope_kwargs) tasks['MesoscopePreprocess'] = type('MesoscopePreprocess', (mscope_tasks.MesoscopePreprocess,), {})( - **kwargs, **mscope_kwargs - ) + **kwargs, **mscope_kwargs) tasks['MesoscopeFOV'] = type('MesoscopeFOV', (mscope_tasks.MesoscopeFOV,), {})( - **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']] - ) + **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]) tasks['MesoscopeSync'] = type('MesoscopeSync', (mscope_tasks.MesoscopeSync,), {})( - **kwargs, **mscope_kwargs, **sync_kwargs - ) + **kwargs, **mscope_kwargs, **sync_kwargs) tasks['MesoscopeCompress'] = type('MesoscopeCompress', (mscope_tasks.MesoscopeCompress,), {})( - **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']] - ) + **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]) if 'neurophotometrics' in devices: # note: devices['neurophotometrics'] is the acquisition_description From 1e9f41ae5bf8184910e3f98d4497f3579f93998b Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 24 Sep 2025 14:35:51 +0100 Subject: [PATCH 62/80] undo unlreated changes --- brainbox/io/one.py | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/brainbox/io/one.py b/brainbox/io/one.py index 5b3a59cf9..c1c86726e 100644 --- a/brainbox/io/one.py +++ b/brainbox/io/one.py @@ -1017,7 +1017,7 @@ def timesprobe2times(self, values, direction='forward'): elif direction == 'reverse': return self._sync['reverse'](values) / self._sync['fs'] - def samples2times(self, values, direction='forward', band='ap'): + def samples2times(self, values, direction='forward'): """ Converts ephys sample values to session main clock seconds :param values: numpy array of times in seconds or samples to resync @@ -1025,8 +1025,6 @@ def samples2times(self, values, direction='forward', band='ap'): (seconds main time to samples probe time) :return: """ - if band == 'lf': - values *= 12 self._get_probe_info() return self._sync[direction](values) @@ -1052,8 +1050,8 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_ :param **kwargs: kwargs passed to `driftmap()` (optional) :return: """ - br = BrainRegions() if br is None else br - time_series = {} if time_series is None else time_series + br = br or BrainRegions() + time_series = time_series or {} fig, axs = plt.subplots(2, 2, gridspec_kw={ 'width_ratios': [.95, .05], 'height_ratios': [.1, .9]}, figsize=(16, 9), sharex='col') axs[0, 1].set_axis_off() @@ -1096,20 +1094,13 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0, save_dir=None, label='raster', gain=-93, - title=None, - alpha=0.3, - processing='destripe'): + title=None): # compute the raw data offset and destripe, we take 400ms around t0 first_sample, last_sample = (int((t0 - 0.2) * sr.fs), int((t0 + 0.2) * sr.fs)) raw = sr[first_sample:last_sample, :-sr.nsync].T channel_labels = channels['labels'] if (channels is not None) and ('labels' in channels) else True - if processing == 'destripe': - samples = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels) - else: - import scipy.signal - sos = scipy.signal.butter(**{"N": 3, "Wn": 300 / sr.fs * 2, "btype": "highpass"}, output="sos") - samples = scipy.signal.sosfiltfilt(sos, raw) + destriped = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels) # filter out the spikes according to good/bad clusters and to the time slice spike_sel = slice(*np.searchsorted(spikes['samples'], [first_sample, last_sample])) ss = spikes['samples'][spike_sel] @@ -1119,9 +1110,9 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0, title = self._default_plot_title(spikes) # display the raw data snippet with spikes overlaid fig, axs = plt.subplots(1, 2, gridspec_kw={'width_ratios': [.95, .05]}, figsize=(16, 9), sharex='col') - Density(samples, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s') - axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=alpha) - axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=alpha) + Density(destriped, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s') + axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=0.5) + axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=0.5) axs[0].set(title=title, xlim=[t0 - 0.035, t0 + 0.035]) # adds the channel locations if available if (channels is not None) and ('atlas_id' in channels): @@ -1323,7 +1314,7 @@ def _find_behaviour_collection(self, obj): f'e.g sl.load_{obj}(collection="{collections[0]}")') raise ALFMultipleCollectionsFound - def load_trials(self, collection=None, revision=None): + def load_trials(self, collection=None): """ Function to load trials data into SessionLoader.trials @@ -1332,13 +1323,13 @@ def load_trials(self, collection=None, revision=None): collection: str Alf collection of trials data """ - revision = self.revision if revision is None else revision + if not collection: collection = self._find_behaviour_collection('trials') # itiDuration frequently has a mismatched dimension, and we don't need it, exclude using regex self.one.wildcards = False self.trials = self.one.load_object( - self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=revision or None).to_df() + self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=self.revision or None).to_df() self.one.wildcards = True self.data_info.loc[self.data_info['name'] == 'trials', 'is_loaded'] = True From 242be1e198896d7c75bedc57953c23a942cd7c35 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 24 Sep 2025 14:36:35 +0100 Subject: [PATCH 63/80] removing unrelated file (erronously added) --- alyx_task.pkl | Bin 2347 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 alyx_task.pkl diff --git a/alyx_task.pkl b/alyx_task.pkl deleted file mode 100644 index 4b328c69c715e72448be87278c916882074ddd60..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2347 zcmcgu-D=!M6n5gD#-^e4hh7v411+SKm5^pe8fhslCM6*?b<^6E(y)k;G`l;sR#G%0 zyL%Dn1I*?0eR|W^=tJ}gdPa_8L*=wiAcDgzBh7cd`S$$%w)H#O-58!{_A_%^mezbq zilm?ki@Ye!Q;!x&?xhiCUR(x@U=oUwQET36c_(VE>qOfqshe&MJ1Z`0QD+s;Gn^g$V9k!wT=&YFJNKJa zTL~?!*{b;pmJc7uyc3U?jc!&#ck8eBzdu^n1$^3evgxE=+k>CYu57Ym$-9NH2is=5 z5~rfF2ODN{(e}eyxb>FV?D)zaY?-@Su0+=KI#Z%h_RzdLY>0}tN|fiD55M~R&%gcv z!OWicRTRDEc?E|f>_!lTGjx9`ijx7cZhMX9 zvJz?>Ccp%buI_LBNRg2OP5lQ<>`N!f@OJP9H;iLQ!wSqowa%x_-51fb%1>Ak6c zC*L80S_(PqLg2>dlNs_n4~2e+364pWGMXebAOt~iZt7CG#PwX5x$j2dLB9#ul|SJP z>+>=K|4X2k0$eJ@2nmvqKq4h6OQTVRph-Wk#nx;O)fl=CbYbBnwC1DBT7MjduGwJb z2LyUHwq_T4Xuleoy_*tt<}!z}Zl76m`(fagJ5^I~Jq!SwKOXN}rIsIGKr$~3k3&4A z@stps#Zkmy0N8c8m93f*eD6TP;45j(UeQ#QD9(Z2@Az5vqKU3EH!)cA_HYimdR@C_ ze#_iF2NIeO9{|7Q=Vtuudqb%zfa{5y3kC$Ux&Me+^8qiO_7Y|hxI?Czxjuv8=8{?8 G_x?ZOxz5P| From f2276bfc422664c644c4eb86a393986b5d1eeb56 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 24 Sep 2025 14:39:07 +0100 Subject: [PATCH 64/80] undo unrelated changes --- ibllib/pipes/ephys_tasks.py | 20 ++-- ibllib/pipes/local_server.py | 2 +- ibllib/pipes/video_tasks.py | 173 ++++++++++++----------------------- 3 files changed, 73 insertions(+), 122 deletions(-) diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py index 253e13420..cb9a0099b 100644 --- a/ibllib/pipes/ephys_tasks.py +++ b/ibllib/pipes/ephys_tasks.py @@ -1,10 +1,8 @@ -import importlib import logging from pathlib import Path import re import shutil import subprocess -import sys import traceback import packaging.version @@ -126,7 +124,7 @@ class EphysCompressNP1(base_tasks.EphysTask): priority = 90 cpu = 2 io_charge = 100 # this jobs reads raw ap files - job_size = 'large' + job_size = 'small' @property def signature(self): @@ -657,7 +655,15 @@ def scratch_folder_run(self): For a scratch drive at /mnt/h0 we would have the following temp dir: /mnt/h0/iblsorter_1.8.0_CSHL071_2020-10-04_001_probe01/ """ - scratch_drive = self.scratch_folder if self.scratch_folder else Path('/scratch') + # get the scratch drive from the shell script + if self.scratch_folder is None: + with open(self.SHELL_SCRIPT) as fid: + lines = fid.readlines() + line = [line for line in lines if line.startswith("SCRATCH_DRIVE=")][0] + m = re.search(r"\=(.*?)(\#|\n)", line)[0] + scratch_drive = Path(m[1:-1].strip()) + else: + scratch_drive = self.scratch_folder assert scratch_drive.exists(), f"Scratch drive {scratch_drive} not found" # get the version of the sorter self.version = self._fetch_iblsorter_version(self.SORTER_REPOSITORY) @@ -729,11 +735,11 @@ def _run_iblsort(self, ap_file): self.FORCE_RERUN = True self.scratch_folder_run.mkdir(parents=True, exist_ok=True) check_nvidia_driver() - # this is the best way I found to check if iblsorter is installed and available without a try block - if 'iblsorter' in sys.modules and importlib.util.find_spec('iblsorter.ibl') is not None: + try: + # if pykilosort is in the environment, use the installed version within the task import iblsorter.ibl # noqa iblsorter.ibl.run_spike_sorting_ibl(bin_file=ap_file, scratch_dir=self.scratch_folder_run, delete=False) - else: + except ImportError: command2run = f"{self.SHELL_SCRIPT} {ap_file} {self.scratch_folder_run}" _logger.info(command2run) process = subprocess.Popen( diff --git a/ibllib/pipes/local_server.py b/ibllib/pipes/local_server.py index c02ae11c3..92f1cf39a 100644 --- a/ibllib/pipes/local_server.py +++ b/ibllib/pipes/local_server.py @@ -106,7 +106,7 @@ def job_creator(root_path, one=None, dry=False, rerun=False): if not one: one = ONE(cache_rest=None) rc = IBLRegistrationClient(one=one) - flag_files = Path(root_path).glob('**/raw_session.flag') + flag_files = Path(root_path).glob('*/????-??-??/*/raw_session.flag') flag_files = filter(lambda x: is_session_path(x.parent), flag_files) pipes = [] all_datasets = [] diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py index 5afe80796..e0ced2695 100644 --- a/ibllib/pipes/video_tasks.py +++ b/ibllib/pipes/video_tasks.py @@ -328,7 +328,7 @@ def _run(self, update=True, **kwargs): class DLC(base_tasks.VideoTask): """ This task relies on a correctly installed dlc environment as per - https://github.com/int-brain-lab/iblvideo#installing-dlc-locally-on-an-ibl-server---tensorflow-2120 + https://docs.google.com/document/d/1g0scP6_3EmaXCU4SsDNZWwDTaD9MG0es_grLA-d0gh0/edit# If your environment is set up otherwise, make sure that you set the respective attributes: t = EphysDLC(session_path) @@ -341,7 +341,6 @@ class DLC(base_tasks.VideoTask): level = 2 force = True job_size = 'large' - env = 'dlc' dlcenv = Path.home().joinpath('Documents', 'PYTHON', 'envs', 'dlcenv', 'bin', 'activate') scripts = Path.home().joinpath('Documents', 'PYTHON', 'iblscripts', 'deploy', 'serverpc', 'dlc') @@ -358,41 +357,25 @@ def signature(self): return signature def _check_dlcenv(self): - """ - Check DLC environment and return iblvideo version. - - Attempts to import iblvideo directly. If unsuccessful, checks for necessary - scripts and environment, then retrieves version via subprocess. - - Returns: - tuple: (version: str, needs_subprocess: bool) - """ - try: - import iblvideo - version = iblvideo.__version__ - needs_subprocess = False - _logger.info(f'Current environment contains iblvideo version {self.version}') - except ImportError: - # Check that scripts are present, dlcenv can be activated and get iblvideo version - assert len(list(self.scripts.rglob('run_dlc.*'))) == 2, \ - f'Scripts run_dlc.sh and run_dlc.py do not exist in {self.scripts}' - assert len(list(self.scripts.rglob('run_motion.*'))) == 2, \ - f'Scripts run_motion.sh and run_motion.py do not exist in {self.scripts}' - assert self.dlcenv.exists(), f'DLC environment does not exist in assumed location {self.dlcenv}' - command2run = f"source {self.dlcenv}; python -c 'import iblvideo; print(iblvideo.__version__)'" - process = subprocess.Popen( - command2run, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - executable='/bin/bash' - ) - info, error = process.communicate() - if process.returncode != 0: - raise AssertionError(f"DLC environment check failed\n{error.decode('utf-8')}") - version = info.decode('utf-8').strip().split('\n')[-1] - needs_subprocess = True - return version, needs_subprocess + """Check that scripts are present, dlcenv can be activated and get iblvideo version""" + assert len(list(self.scripts.rglob('run_dlc.*'))) == 2, \ + f'Scripts run_dlc.sh and run_dlc.py do not exist in {self.scripts}' + assert len(list(self.scripts.rglob('run_motion.*'))) == 2, \ + f'Scripts run_motion.sh and run_motion.py do not exist in {self.scripts}' + assert self.dlcenv.exists(), f'DLC environment does not exist in assumed location {self.dlcenv}' + command2run = f"source {self.dlcenv}; python -c 'import iblvideo; print(iblvideo.__version__)'" + process = subprocess.Popen( + command2run, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + executable='/bin/bash' + ) + info, error = process.communicate() + if process.returncode != 0: + raise AssertionError(f"DLC environment check failed\n{error.decode('utf-8')}") + version = info.decode('utf-8').strip().split('\n')[-1] + return version @staticmethod def _video_intact(file_mp4): @@ -403,75 +386,6 @@ def _video_intact(file_mp4): cap.release() return intact - def _run_dlc(self, file_mp4, cam, overwrite, flag_subprocess=True): - try: - if flag_subprocess: - _logger.info(f'iblvideo version {self.version}') - command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}" - _logger.info(command2run) - process = subprocess.Popen( - command2run, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - executable='/bin/bash', - ) - info, error = process.communicate() - # info_str = info.decode("utf-8").strip() - # _logger.info(info_str) - if process.returncode != 0: - error_str = error.decode('utf-8').strip() - _logger.error(f'DLC failed for {cam}Camera.\n\n' - f'++++++++ Output of subprocess for debugging ++++++++\n\n' - f'{error_str}\n' - f'++++++++++++++++++++++++++++++++++++++++++++\n') - return process.returncode - pass - else: - from iblvideo import download_weights - from iblvideo.pose_dlc import dlc - path_dlc = download_weights() - dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite) - return 0 - except Exception as e: - _logger.error(f'An error occurred while running DLC for {cam}Camera: {e}') - _logger.error(traceback.format_exc()) - return -1 - - def _run_motion_energy(self, file_mp4, dlc_result, flag_subprocess=True): - if flag_subprocess: - command2run = f"{self.scripts.joinpath('run_motion.sh')} {str(self.dlcenv)} {file_mp4} {dlc_result}" - _logger.info(command2run) - process = subprocess.Popen( - command2run, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - executable='/bin/bash', - ) - info, error = process.communicate() - # info_str = info.decode('utf-8').strip() - # _logger.info(info_str) - if process.returncode != 0: - error_str = error.decode('utf-8').strip() - _logger.error(f'Motion energy failed for {file_mp4}.\n\n' - f'++++++++ Output of subprocess for debugging ++++++++\n\n' - f'{error_str}\n' - f'++++++++++++++++++++++++++++++++++++++++++++\n') - return_code = process.returncode - else: # runs the motion energy calculation in the current environment - try: - from iblvideo.motion_energy import motion_energy - _ = motion_energy(file_mp4, dlc_result) - return_code = 0 - except Exception: - _logger.error(f'Motion energy failed for {file_mp4}.\n\n' - f'++++++++ Output of subprocess for debugging ++++++++\n\n' - f'{traceback.format_exc()}\n' - f'++++++++++++++++++++++++++++++++++++++++++++\n') - return_code = -1 - return return_code - def _run(self, cams=None, overwrite=False): # Check that the cams are valid for DLC, remove the ones that aren't candidate_cams = cams or self.cameras @@ -505,24 +419,55 @@ def _run(self, cams=None, overwrite=False): _logger.error(f'Corrupt raw video file {file_mp4}') self.status = -1 continue - # Check that dlc environment is ok, shell scripts exists, and get iblvideo version, GPU addressable + self.version = self._check_dlcenv() + _logger.info(f'iblvideo version {self.version}') check_nvidia_driver() - self.version, flag_subprocess = self._check_dlcenv() - # Step 1: Run DLC for this camera _logger.info(f'Running DLC on {cam}Camera.') - return_code = self._run_dlc(file_mp4, cam, overwrite, flag_subprocess=flag_subprocess) - if return_code != 0: + command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}" + _logger.info(command2run) + process = subprocess.Popen( + command2run, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + executable='/bin/bash', + ) + info, error = process.communicate() + # info_str = info.decode("utf-8").strip() + # _logger.info(info_str) + if process.returncode != 0: + error_str = error.decode('utf-8').strip() + _logger.error(f'DLC failed for {cam}Camera.\n\n' + f'++++++++ Output of subprocess for debugging ++++++++\n\n' + f'{error_str}\n' + f'++++++++++++++++++++++++++++++++++++++++++++\n') self.status = -1 + # We dont' run motion energy, or add any files if dlc failed to run continue dlc_result = next(self.session_path.joinpath('alf').glob(f'_ibl_{cam}Camera.dlc*.pqt')) actual_outputs.append(dlc_result) - # Step 2: Compute Motion Energy for this camera _logger.info(f'Computing motion energy for {cam}Camera') - return_code = self._run_motion_energy(file_mp4, dlc_result, flag_subprocess=flag_subprocess) - if return_code != 0: + command2run = f"{self.scripts.joinpath('run_motion.sh')} {str(self.dlcenv)} {file_mp4} {dlc_result}" + _logger.info(command2run) + process = subprocess.Popen( + command2run, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + executable='/bin/bash', + ) + info, error = process.communicate() + # info_str = info.decode('utf-8').strip() + # _logger.info(info_str) + if process.returncode != 0: + error_str = error.decode('utf-8').strip() + _logger.error(f'Motion energy failed for {cam}Camera.\n\n' + f'++++++++ Output of subprocess for debugging ++++++++\n\n' + f'{error_str}\n' + f'++++++++++++++++++++++++++++++++++++++++++++\n') self.status = -1 continue actual_outputs.append(next(self.session_path.joinpath('alf').glob( From fee6bc6e737e1b42e44dc86cd011c3656e96e173 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 24 Sep 2025 18:22:18 +0100 Subject: [PATCH 65/80] passive task fixtures moved from iblrig to ibllib --- ibllib/pipes/neurophotometrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 0b19ce226..abcc61f84 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -13,8 +13,8 @@ from nptdms import TdmsFile from abc import abstractmethod +import iblphotometry from iblphotometry import fpio -from iblrig_tasks import _iblrig_tasks_passiveChoiceWorld from one.api import ONE import json @@ -531,7 +531,7 @@ def __init__( def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # load the fixtures - from the relative delays between trials, an "absolute" time vector is # created that is used for the synchronization - fixtures_path = Path(_iblrig_tasks_passiveChoiceWorld.__file__).parent / 'passiveChoiceWorld_trials_fixtures.pqt' + fixtures_path = Path(iblphotometry.__file__).parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt' # getting the task_settings with open(self.session_path / self.collection / '_iblrig_taskSettings.raw.json', 'r') as fH: From af83b9e7c578de67867bb28062fc68eb2c6e28b8 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 24 Sep 2025 18:36:58 +0100 Subject: [PATCH 66/80] flake8 --- ibllib/pipes/neurophotometrics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index abcc61f84..8c317ae03 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -531,7 +531,9 @@ def __init__( def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # load the fixtures - from the relative delays between trials, an "absolute" time vector is # created that is used for the synchronization - fixtures_path = Path(iblphotometry.__file__).parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt' + fixtures_path = ( + Path(iblphotometry.__file__).parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt' + ) # getting the task_settings with open(self.session_path / self.collection / '_iblrig_taskSettings.raw.json', 'r') as fH: From 6b6cccfe6e1659dc5b6a225b20820da66707e90e Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Wed, 24 Sep 2025 18:44:53 +0100 Subject: [PATCH 67/80] undo changes in requirements.txt --- requirements.txt | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/requirements.txt b/requirements.txt index c7a5726dc..6204e1184 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,35 +1,24 @@ -# ibl libraries -ONE-api>=3.0.0 boto3 click>=7.0.0 colorlog>=4.0.2 flake8>=3.7.8 globus-sdk graphviz -ibl-neuropixel>=1.7.1 -ibl-style -iblatlas>=0.5.3 -iblqt>=0.4.2 -iblutil>=1.13.0 -imagecodecs # used to convert tif snapshots to png when registering mesoscope snapshots (also requires skimage) matplotlib>=3.0.3 -mtscomp>=1.0.1 -nptdms numba>=0.56 numpy>=1.18,<=2.2 # numpy 2.3 is not compatible with numba - ETA end of June 2025 nptdms opencv-python-headless pandas -phylib>=2.6.0 -psychofit pyarrow pynrrd>=0.4.0 -pyqt5 pytest requests>=2.22.0 -scikit-image # this is a widefield requirement missing as of July 2023, we may remove it once wfield has this figured out scikit-learn>=0.22.1 scipy>=1.7.0 +scikit-image # this is a widefield requirement missing as of July 2023, we may remove it once wfield has this figured out +imagecodecs # used to convert tif snapshots to png when registering mesoscope snapshots (also requires skimage) +sparse seaborn>=0.9.0 tqdm>=4.32.1 # ibl libraries @@ -42,6 +31,6 @@ ONE-api>=3.2.0 phylib>=2.6.0 psychofit slidingRP>=1.1.1 # steinmetz lab refractory period metrics -sparse -tqdm>=4.32.1 -ibl-photometry +pyqt5 +ibl-style +iblphotometry>=0.1.2 \ No newline at end of file From 0315630f5d4076302e79fe1347eaa6a32e06829c Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Thu, 25 Sep 2025 14:56:41 +0100 Subject: [PATCH 68/80] removed duplicate tdms reader and unified timestamps and ttl duration extraction --- ibllib/pipes/neurophotometrics.py | 108 ++++++------------------------ 1 file changed, 19 insertions(+), 89 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 8c317ae03..64b44887d 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -58,10 +58,9 @@ def extract_timestamps_from_tdms_file( tdms_filepath: Path, save_path: Optional[Path] = None, chunk_size=10000, - extract_durations: bool = False, ) -> dict: """extractor for tdms files as written by the daqami software, configured for neurophotometrics - experiments: Frameclock is in AI7, DI1-4 are the bpod sync signals + experiments: Frameclock is in an analog channel (AI?), DI1-4 are the bpod sync signals Parameters ---------- @@ -75,7 +74,8 @@ def extract_timestamps_from_tdms_file( Returns ------- dict - a dict with the tdms channel names as keys and the timestamps of the rising fronts + a dict with the tdms channel names as keys and 'positive' the timestamps of the rising edges + 'negative' the falling edges """ # _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}') @@ -100,72 +100,6 @@ def extract_timestamps_from_tdms_file( vals = df[digital_col].values.astype('int8') digital_channel_names = ['DI0', 'DI1', 'DI2', 'DI3'] - # ini - timestamps = {} - for ch in digital_channel_names: - timestamps[ch] = [] - - # chunked loop for memory efficiency - if chunk_size is not None: - n_chunks = df.shape[0] // chunk_size - for i in range(n_chunks): - vals_ = vals[i * chunk_size: (i + 1) * chunk_size] - # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') - data = _int2digital_channels(vals_) - - for j, name in enumerate(digital_channel_names): - ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i) - timestamps[name].append(ix / fs) - - for ch in digital_channel_names: - timestamps[ch] = np.concatenate(timestamps[ch]) - else: - data = _int2digital_channels(vals) - for j, name in enumerate(digital_channel_names): - ix = np.where(np.diff(data[:, j]) == 1)[0] - timestamps[name].append(ix / fs) - - if has_analog_group: - # frameclock data is recorded on an analog channel - for channel in analog_group.channels(): - signal = (channel.data > 2.5).astype('int32') # assumes 0-5V - timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs - - if save_path is not None: - _logger.info(f'saving extracted timestamps to: {save_path}') - with open(save_path, 'wb') as fH: - pickle.dump(timestamps, fH) - - return timestamps - - -def extract_ttl_durations_from_tdms_file( - tdms_filepath: Path, - save_path: Optional[Path] = None, - chunk_size=10000, -) -> dict: - _logger.info(f'extracting ttl_durations from tdms file: {tdms_filepath}') - - # this should be 10kHz - tdms_file = TdmsFile.read(tdms_filepath) - groups = tdms_file.groups() - - # this unfortunate hack is in here because there are a bunch of sessions - # where the frameclock is on DI0 - if len(groups) == 1: - has_analog_group = False - (digital_group,) = groups - if len(groups) == 2: - has_analog_group = True - analog_group, digital_group = groups - fs = digital_group.properties['ScanRate'] # this should be 10kHz - df = tdms_file.as_dataframe() - - # inferring digital col name - (digital_col,) = [col for col in df.columns if 'Digital' in col] - vals = df[digital_col].values.astype('int8') - digital_channel_names = ['DI0', 'DI1', 'DI2', 'DI3'] - # ini timestamps = {} for ch in digital_channel_names: @@ -175,7 +109,7 @@ def extract_ttl_durations_from_tdms_file( if chunk_size is not None: n_chunks = df.shape[0] // chunk_size for i in range(n_chunks): - vals_ = vals[i * chunk_size: (i + 1) * chunk_size] + vals_ = vals[i * chunk_size : (i + 1) * chunk_size] # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') data = _int2digital_channels(vals_) @@ -193,7 +127,7 @@ def extract_ttl_durations_from_tdms_file( for j, name in enumerate(digital_channel_names): ix = np.where(np.diff(data[:, j]) == 1)[0] timestamps[name]['positive'].append(ix / fs) - ix = np.where(np.diff(data[:, j]) == -1)[0] + ix = np.where(np.diff(data[:, j]) == 1)[0] timestamps[name]['negative'].append(ix / fs) if has_analog_group: @@ -204,17 +138,12 @@ def extract_ttl_durations_from_tdms_file( timestamps[channel.name]['positive'] = np.where(np.diff(signal) == 1)[0] / fs timestamps[channel.name]['negative'] = np.where(np.diff(signal) == -1)[0] / fs - # the actual diff - durations = {} - for channel in timestamps.keys(): - durations[channel] = timestamps[channel]['negative'] - timestamps[channel]['positive'] - if save_path is not None: - _logger.info(f'saving extracted ttl durations to: {save_path}') + _logger.info(f'saving extracted timestamps to: {save_path}') with open(save_path, 'wb') as fH: - pickle.dump(durations, fH) + pickle.dump(timestamps, fH) - return durations + return timestamps def extract_timestamps_from_bpod_jsonable(file_jsonable: str | Path, sync_states_names: List[str]): @@ -453,7 +382,7 @@ def load_data(self) -> pd.DataFrame: sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}' else: sync_channel_name = self.sync_kwargs['frameclock_channel'] - frame_timestamps = self.timestamps[sync_channel_name] + frame_timestamps = self.timestamps[sync_channel_name]['positive'] # compare number of frame timestamps # and put them in the photometry_df SystemTimestamp column @@ -504,7 +433,7 @@ def load_data(self) -> pd.DataFrame: def _get_neurophotometrics_timestamps(self) -> np.ndarray: # get the sync channel and the corresponding timestamps - timestamps_nph = self.timestamps[f'DI{self.sync_channel}'] + timestamps_nph = self.timestamps[f'DI{self.sync_channel}']['positive'] # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods @@ -532,7 +461,7 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # load the fixtures - from the relative delays between trials, an "absolute" time vector is # created that is used for the synchronization fixtures_path = ( - Path(iblphotometry.__file__).parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt' + Path(iblphotometry.__file__).parent.parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt' ) # getting the task_settings @@ -616,7 +545,7 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df): self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath) sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel'] - valve_times_nph = self.timestamps[f'DI{sync_channel}'] + valve_times_nph = self.timestamps[f'DI{sync_channel}']['positive'] sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( valve_times_nph, valve_times_bpod, return_indices=True, linear=True @@ -654,15 +583,16 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df): # writing the passive events table # get the valve open duration - ttl_durations_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdurations.pkl' - if self.load_timestamps and ttl_durations_filepath.exists(): - with open(ttl_durations_filepath, 'rb') as fH: - ttl_durations = pickle.load(fH) + timestamps_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.pkl' + if self.load_timestamps and timestamps_filepath.exists(): + with open(timestamps_filepath, 'rb') as fH: + self.timestamps = pickle.load(fH) else: # extract timestamps: tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms' - ttl_durations = extract_ttl_durations_from_tdms_file(tdms_filepath, save_path=ttl_durations_filepath) + self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath) - valve_open_dur = np.median(ttl_durations[f'DI{sync_channel}'][ix_nph]) + ttl_durations = self.timestamps[f'DI{sync_channel}']['negative'] - self.timestamps[f'DI{sync_channel}']['positive'] + valve_open_dur = np.median(ttl_durations[ix_nph]) passiveStims_df = pd.DataFrame( dict( valveOn=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'], From 613743029555f8f9ffc59c3756b78bf9b9266405 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 26 Sep 2025 14:41:52 +0100 Subject: [PATCH 69/80] typo fix --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6204e1184..b907c2ad7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,4 +33,4 @@ psychofit slidingRP>=1.1.1 # steinmetz lab refractory period metrics pyqt5 ibl-style -iblphotometry>=0.1.2 \ No newline at end of file +ibl-photometry>=0.1.2 \ No newline at end of file From 996fcbeee35731fee3d65caadd9d60a1356f9ebe Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 26 Sep 2025 14:48:05 +0100 Subject: [PATCH 70/80] flake8 --- ibllib/pipes/neurophotometrics.py | 5 ++-- ibllib/tests/test_neurophotometrics.py | 32 ++++++++++++++------------ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 64b44887d..7c30b6a2d 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -109,7 +109,7 @@ def extract_timestamps_from_tdms_file( if chunk_size is not None: n_chunks = df.shape[0] // chunk_size for i in range(n_chunks): - vals_ = vals[i * chunk_size : (i + 1) * chunk_size] + vals_ = vals[i * chunk_size: (i + 1) * chunk_size] # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') data = _int2digital_channels(vals_) @@ -461,7 +461,8 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # load the fixtures - from the relative delays between trials, an "absolute" time vector is # created that is used for the synchronization fixtures_path = ( - Path(iblphotometry.__file__).parent.parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt' + Path(iblphotometry.__file__).parent.parent / 'iblphotometry_tests' / + 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt' ) # getting the task_settings diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py index fcad9d379..923946509 100644 --- a/ibllib/tests/test_neurophotometrics.py +++ b/ibllib/tests/test_neurophotometrics.py @@ -1,17 +1,17 @@ """Tests for ibllib.pipes.mesoscope_tasks.""" -import sys import unittest -from unittest import mock import tempfile from pathlib import Path - - +import iblphotometry_tests +from ibllib.pipes.neurophotometrics import FibrePhotometryBpodSync from ibllib.io import session_params # Mock suit2p which is imported in MesoscopePreprocess -attrs = {'default_ops.return_value': {}} -sys.modules['suite2p'] = mock.MagicMock(**attrs) +# attrs = {'default_ops.return_value': {}} +# sys.modules['suite2p'] = mock.MagicMock(**attrs) + +# from iblscripts.ci.tests import base class TestNeurophotometricsExtractor(unittest.TestCase): @@ -24,14 +24,16 @@ class TestNeurophotometricsExtractor(unittest.TestCase): def setUp(self) -> None: self.tmp_folder = tempfile.TemporaryDirectory() - self.session_folder = Path(self.tmp_folder.name) / 'subject' / '2020-01-01' / '001' - self.raw_photometry_folder = self.session_folder / 'raw_photometry_data' - self.raw_photometry_folder.mkdir(parents=True) + # self.session_folder = Path(self.tmp_folder.name) / 'subject' / '2020-01-01' / '001' + # self.raw_photometry_folder = self.session_folder / 'raw_photometry_data' + # self.raw_photometry_folder.mkdir(parents=True) def test_bpod_extractor(self): - path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml' - self.experiment_description = session_params.read_params(path) - - def test_daqami_extractor(self): - path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml' - self.experiment_description = session_params.read_params(path) + session_folder = Path(iblphotometry_tests.__file__).parent / 'data' / 'neurophotometrics' / 'raw_bpod_session' + assert session_folder.exists() + self.experiment_description = session_params.read_params(session_folder) + FibrePhotometryBpodSync() + + # def test_daqami_extractor(self): + # path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml' + # self.experiment_description = session_params.read_params(path) From 1436e7c73e1fabde3a0ae9029172e8c156021540 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 29 Sep 2025 11:24:54 +0100 Subject: [PATCH 71/80] kwarg added to skip assertion for matching number of timestamps (for kcenias extraction) --- ibllib/pipes/neurophotometrics.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 7c30b6a2d..bbacc1bca 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -176,6 +176,7 @@ def __init__( one: ONE, task_protocol: str | None = None, task_collection: str | None = None, + assert_matching_timestamps: bool = True, **kwargs, ): super().__init__(session_path, one=one, **kwargs) @@ -183,6 +184,7 @@ def __init__( self.kwargs = kwargs self.task_protocol = task_protocol self.task_collection = task_collection + self.assert_matching_timestamps = assert_matching_timestamps if self.task_protocol is None: # we will work with the first protocol here @@ -236,7 +238,11 @@ def _get_sync_function(self) -> Tuple[callable, list]: _logger.info(f'synced with drift: {drift_ppm}') # assertion: 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around) - assert timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0], 'less than 95% of bpod timestamps matched' + if self.assert_matching_timestamps: + assert timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0], 'less than 95% of bpod timestamps matched' + else: + if not (timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0]): + _logger.warning(f'less than 95% of bpod timestamps matched. n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}') valid_bounds = self._get_valid_bounds() return sync_nph_to_bpod_fcn, valid_bounds From d5996ce3ff14a62a5cfae07690162f6a48b08c7c Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 29 Sep 2025 13:43:37 +0100 Subject: [PATCH 72/80] sync config cleaned up and with the option to overwrite --- ibllib/pipes/neurophotometrics.py | 48 ++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index bbacc1bca..ccbfac5a5 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -109,7 +109,7 @@ def extract_timestamps_from_tdms_file( if chunk_size is not None: n_chunks = df.shape[0] // chunk_size for i in range(n_chunks): - vals_ = vals[i * chunk_size: (i + 1) * chunk_size] + vals_ = vals[i * chunk_size : (i + 1) * chunk_size] # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') data = _int2digital_channels(vals_) @@ -177,6 +177,8 @@ def __init__( task_protocol: str | None = None, task_collection: str | None = None, assert_matching_timestamps: bool = True, + sync_states_names: list[str] | None = None, + sync_channel: int | str | None = None, # if set, overwrites the value extracted from the experiment_description **kwargs, ): super().__init__(session_path, one=one, **kwargs) @@ -196,15 +198,26 @@ def __init__( # if not provided, infer self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol) + # configuring the sync: state names + if sync_states_names is None: + if 'habituation' in self.task_protocol: + self.sync_states_names = ['iti', 'reward'] + else: + self.sync_states_names = ['trial_start', 'reward', 'exit_state'] + else: + self.sync_states_names = sync_states_names + + # configuring the sync: channel + if sync_channel is None: + self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel']) + else: + self.sync_channel = sync_channel + def _get_bpod_timestamps(self) -> np.ndarray: # the timestamps for syncing, in the time of the bpod - if 'habituation' in self.task_protocol: - sync_states_names = ['iti', 'reward'] - else: - sync_states_names = ['trial_start', 'reward', 'exit_state'] file_jsonable = self.session_path.joinpath(self.task_collection, '_iblrig_taskData.raw.jsonable') - timestamps_bpod = extract_timestamps_from_bpod_jsonable(file_jsonable, sync_states_names) + timestamps_bpod = extract_timestamps_from_bpod_jsonable(file_jsonable, self.sync_states_names) return timestamps_bpod def _get_valid_bounds(self): @@ -242,7 +255,9 @@ def _get_sync_function(self) -> Tuple[callable, list]: assert timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0], 'less than 95% of bpod timestamps matched' else: if not (timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0]): - _logger.warning(f'less than 95% of bpod timestamps matched. n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}') + _logger.warning( + f'less than 95% of bpod timestamps matched. n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}' + ) valid_bounds = self._get_valid_bounds() return sync_nph_to_bpod_fcn, valid_bounds @@ -301,11 +316,9 @@ class FibrePhotometryBpodSync(FibrePhotometryBaseSync): def __init__( self, *args, - digital_inputs_channel: int | None = None, **kwargs, ): super().__init__(*args, **kwargs) - self.digital_inputs_channel = digital_inputs_channel @property def signature(self): @@ -327,11 +340,10 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray: # for bpod based syncing, the timestamps for syncing are in the digital inputs file raw_photometry_folder = self.session_path / self.photometry_collection digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalInputs.pqt' - digital_inputs_df = fpio.read_digital_inputs_file( - digital_inputs_filepath, channel=self.session_params['devices']['neurophotometrics']['sync_channel'] - ) - sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel'] - timestamps_nph = digital_inputs_df.groupby('channel').get_group(sync_channel)['times'].values + digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, channel=self.sync_channel) + + # get the positive fronts + timestamps_nph = digital_inputs_df.groupby(['polarity', 'channel']).get_group((1, self.sync_channel))['times'].values # TODO replace this rudimentary spacer removal # to implement: detect spacer / remove spacer methods @@ -346,7 +358,7 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync): def __init__(self, *args, load_timestamps: bool = True, **kwargs): super().__init__(*args, **kwargs) self.sync_kwargs = kwargs.get('sync_metadata', self.session_params['sync']) - self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel']) + # self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel']) self.load_timestamps = load_timestamps @property @@ -467,8 +479,10 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: # load the fixtures - from the relative delays between trials, an "absolute" time vector is # created that is used for the synchronization fixtures_path = ( - Path(iblphotometry.__file__).parent.parent / 'iblphotometry_tests' / - 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt' + Path(iblphotometry.__file__).parent.parent + / 'iblphotometry_tests' + / 'fixtures' + / 'passiveChoiceWorld_trials_fixtures.pqt' ) # getting the task_settings From 1f86801d8ea28e03d303ca20e47560a63b4b69f4 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 29 Sep 2025 17:38:00 +0100 Subject: [PATCH 73/80] bugfix: attempted daq sync for passive sessions in either case --- ibllib/pipes/dynamic_pipeline.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index 2f3acd44a..b6c4338e7 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -613,9 +613,10 @@ def make_pipeline(session_path, **pkwargs): case 'daqami': # for synchronization with the DAQami receiving the sync pulses from the individual bpods # as well as the frame clock from the FP3002 - tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})( - **kwargs, - ) + if 'passive' not in protocol: # excluding passive session + tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})( + **kwargs, + ) p = mtasks.Pipeline(session_path=session_path, **pkwargs) p.tasks = tasks From a375ef526254cead0efa8f82fe1dbf9b436f4d91 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Thu, 2 Oct 2025 13:33:52 +0100 Subject: [PATCH 74/80] passive extractor bugfix --- ibllib/pipes/neurophotometrics.py | 122 ++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 41 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index ccbfac5a5..8d51473d5 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -489,9 +489,14 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: with open(self.session_path / self.collection / '_iblrig_taskSettings.raw.json', 'r') as fH: task_settings = json.load(fH) - # getting the fixtures and creating a relative time vector + # getting the fixtures fixtures_df = pd.read_parquet(fixtures_path).groupby('session_id').get_group(task_settings['SESSION_TEMPLATE_ID']) + # the fixtures table contains delays between the individual stimuli + # in order to get their onset times, we need to do an adjusted cumsum of the intervals + # adjusted by: the length of each stimulus, plus the overhead time to load it and play it + # e.g. state machine time, bonsai delay etc. + # stimulus durations stim_durations = dict( T=task_settings['GO_TONE_DURATION'], @@ -502,11 +507,13 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: for s in fixtures_df['stim_type'].unique(): fixtures_df.loc[fixtures_df['stim_type'] == s, 'delay'] = stim_durations[s] - # the audio go cue times - mic_go_cue_times_bpod = np.load(self.session_path / self.collection / '_iblmic_audioOnsetGoCue.times_mic.npy') + # the audio go cue times - recorded in the time of the mic clock + # this is assumed to be precise so we can use it to fit the unknown overhead + # time for each stim class + go_cue_times_mic = np.load(self.session_path / self.collection / '_iblmic_audioOnsetGoCue.times_mic.npy') # adding the delays - def obj_fun(x, mic_go_cue_times_bpod, fixtures_df): + def obj_fun(x, go_cue_times_mic, fixtures_df): # fit overhead for s in ['T', 'N', 'G', 'V']: if s == 'T' or s == 'N': @@ -520,41 +527,25 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df): fixtures_df['stim_delay'].values + np.roll(fixtures_df['delay'].values, 1) + fixtures_df['overhead'].values, ) - mic_go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values - err = np.sum((np.diff(mic_go_cue_times_rel) - np.diff(mic_go_cue_times_bpod)) ** 2) + go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values + err = np.sum((np.diff(go_cue_times_rel) - np.diff(go_cue_times_mic)) ** 2) return err # fitting the overheads fixtures_df['overhead'] = 0.0 bounds = ((0, np.inf), (0, np.inf), (0, np.inf)) - pfit = minimize(obj_fun, (0.0, 0.0, 0.0), args=(mic_go_cue_times_bpod, fixtures_df), bounds=bounds) + pfit = minimize(obj_fun, (0.0, 0.0, 0.0), args=(go_cue_times_mic, fixtures_df), bounds=bounds) overheads = dict(zip(['T', 'N', 'G', 'V'], [pfit.x[0], pfit.x[0], pfit.x[1], pfit.x[2]])) + # creating the relative time vector for each stimulus for s in fixtures_df['stim_type'].unique(): fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = overheads[s] fixtures_df['t_rel'] = np.cumsum( fixtures_df['stim_delay'].values + np.roll(fixtures_df['delay'].values, 1) + fixtures_df['overhead'].values ) - mic_go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values - - sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - mic_go_cue_times_rel, mic_go_cue_times_bpod, return_indices=True, linear=True - ) - - assert ix_nph.shape[0] == 40, 'not all microphone onset events are accepted by the sync function' - if np.absolute(drift_ppm) > 20: - _logger.warning(f'sync with excessive drift: {drift_ppm}') - else: - _logger.info(f'synced with drift: {drift_ppm}') - - # applying the sync to all the timestamps in the fixtures - fixtures_df['t_bpod'] = sync_fun(fixtures_df['t_rel']) - - # dealing with the valve - # valve_times_rel = fixtures_df.groupby('stim_type').get_group('V')['t_rel'].values - # valve_times_bpod = sync_fun(valve_times_rel) - valve_times_bpod = fixtures_df.groupby('stim_type').get_group('V')['t_bpod'].values + # we now sync the valve times from the relative time and the neurophotometrics time + valve_times_rel = fixtures_df.groupby('stim_type').get_group('V')['t_rel'].values # getting the valve timestamps from the DAQ timestamps_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.pkl' @@ -566,12 +557,12 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df): self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath) sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel'] - valve_times_nph = self.timestamps[f'DI{sync_channel}']['positive'] + valve_times_daq = self.timestamps[f'DI{sync_channel}']['positive'] - sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps( - valve_times_nph, valve_times_bpod, return_indices=True, linear=True + sync_fun_rel_to_daq, drift_ppm, ix_rel, ix_daq = ibldsp.utils.sync_timestamps( + valve_times_rel, valve_times_daq, return_indices=True, linear=True ) - assert ix_bpod.shape[0] == 40, 'not all bpod valve onset events are accepted by the sync function' + assert ix_rel.shape[0] == 40, 'not all bpod valve onset events are accepted by the sync function' if np.absolute(drift_ppm) > 20: _logger.warning(f'sync with excessive drift: {drift_ppm}') else: @@ -583,12 +574,59 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df): raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt', drop_first=False, ) - # apply synchronization - photometry_df['times'] = sync_fun(photometry_df['times']) - # verify that all are valid (i.e. mean nothing ... ) + + # load the photometry data and replace the timestamp column + # with the values from the frameclock timestamps as recorded by the DAQ + frameclock_channel = self.session_params['devices']['neurophotometrics']['sync_metadata']['frameclock_channel'] + frame_timestamps = self.timestamps[frameclock_channel]['positive'] + + # compare number of frame timestamps + # and put them in the photometry_df SystemTimestamp column + # based on the different scenarios + frame_times_adjusted = False # for debugging reasons + + # they are the same, all is well + if photometry_df.shape[0] == frame_timestamps.shape[0]: + photometry_df['times'] = frame_timestamps + _logger.info(f'timestamps are of equal size {photometry_df.shape[0]}') + frame_times_adjusted = True + + # there are more timestamps recorded by DAQ than + # frames recorded by bonsai + elif photometry_df.shape[0] < frame_timestamps.shape[0]: + _logger.info(f'# bonsai frames: {photometry_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}') + # there is exactly one more timestamp recorded by the daq + # (probably bonsai drops the last incomplete frame) + if photometry_df.shape[0] == frame_timestamps.shape[0] - 1: + photometry_df['times'] = frame_timestamps[:-1] + # there are two more frames recorded by the DAQ than by + # bonsai - this is observed. TODO understand when this happens + elif photometry_df.shape[0] == frame_timestamps.shape[0] - 2: + photometry_df['times'] = frame_timestamps[:-2] + # there are more frames recorded by the DAQ than that + # this indicates and issue - + elif photometry_df.shape[0] < frame_timestamps.shape[0] - 2: + raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.') + frame_times_adjusted = True + + # there are more frames recorded by bonsai than by the DAQ + # this happens when the user stops the daqami recording before stopping the bonsai + # or when daqami crashes + elif photometry_df.shape[0] > frame_timestamps.shape[0]: + # we drop all excess frames + _logger.warning( + f'#frames bonsai: {photometry_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess' + ) + n_frames_daqami = frame_timestamps.shape[0] + photometry_df = photometry_df.iloc[:n_frames_daqami] + photometry_df.loc[:, 'SystemTimestamp'] = frame_timestamps + frame_times_adjusted = True + + if not frame_times_adjusted: + raise ValueError('timestamp issue that hasnt been caught') # write to disk - # the synced photometry signal + # the photometry signal photometry_filepath = self.session_path / 'alf' / 'photometry' / 'photometry.signal.pqt' photometry_filepath.parent.mkdir(parents=True, exist_ok=True) photometry_df.to_parquet(photometry_filepath) @@ -613,17 +651,19 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df): self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath) ttl_durations = self.timestamps[f'DI{sync_channel}']['negative'] - self.timestamps[f'DI{sync_channel}']['positive'] - valve_open_dur = np.median(ttl_durations[ix_nph]) + valve_open_dur = np.median(ttl_durations[ix_daq]) passiveStims_df = pd.DataFrame( dict( - valveOn=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'], - valveOff=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'] + valve_open_dur, - toneOn=fixtures_df.groupby('stim_type').get_group('T')['t_bpod'], - toneOff=fixtures_df.groupby('stim_type').get_group('T')['t_bpod'] + task_settings['GO_TONE_DURATION'], - noiseOn=fixtures_df.groupby('stim_type').get_group('N')['t_bpod'], - noiseOff=fixtures_df.groupby('stim_type').get_group('N')['t_bpod'] + task_settings['WHITE_NOISE_DURATION'], + valveOn=fixtures_df.groupby('stim_type').get_group('V')['t_rel'], + valveOff=fixtures_df.groupby('stim_type').get_group('V')['t_rel'] + valve_open_dur, + toneOn=fixtures_df.groupby('stim_type').get_group('T')['t_rel'], + toneOff=fixtures_df.groupby('stim_type').get_group('T')['t_rel'] + task_settings['GO_TONE_DURATION'], + noiseOn=fixtures_df.groupby('stim_type').get_group('N')['t_rel'], + noiseOff=fixtures_df.groupby('stim_type').get_group('N')['t_rel'] + task_settings['WHITE_NOISE_DURATION'], ) ) + # convert all times from fixture time (=rel) to daq time + passiveStims_df.iloc[:, :] = sync_fun_rel_to_daq(passiveStims_df.values) passiveStims_filepath = self.session_path / 'alf' / self.collection / '_ibl_passiveStims.table.pqt' passiveStims_filepath.parent.mkdir(exist_ok=True, parents=True) passiveStims_df.reset_index().to_parquet(passiveStims_filepath) From 0b93de7569c8686ccd01d2ba18eb3ac8f5d33b94 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Fri, 3 Oct 2025 09:44:52 +0100 Subject: [PATCH 75/80] bugfix frameclock channel --- ibllib/pipes/neurophotometrics.py | 40 +++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index 8d51473d5..ca4c76d3a 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -355,10 +355,30 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync): priority = 90 job_size = 'small' - def __init__(self, *args, load_timestamps: bool = True, **kwargs): + def __init__( + self, + *args, + load_timestamps: bool = True, + sync_channel: int | None = None, + frameclock_channel: int | None = None, + **kwargs, + ): super().__init__(*args, **kwargs) - self.sync_kwargs = kwargs.get('sync_metadata', self.session_params['sync']) - # self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel']) + # setting up sync properties + frameclock_channel = ( + frameclock_channel or self.session_params['devices']['neurophotometrics']['sync_metadata']['frameclock_channel'] + ) + # downward compatibility - frameclock moved around, now is back on the AI7 + if frameclock_channel in ['0', 0]: + self.frameclock_channel_name = f'DI{frameclock_channel}' + elif frameclock_channel in ['7', 7]: + self.frameclock_channel_name = f'AI{frameclock_channel}' + else: + self.frameclock_channel_name = frameclock_channel + + self.sync_channel = sync_channel or self.session_params['devices']['neurophotometrics']['sync_channel'] + + # whether or not to reextract from tdms or attempt to load from .pkl self.load_timestamps = load_timestamps @property @@ -393,14 +413,8 @@ def load_data(self) -> pd.DataFrame: tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms' self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath) - # downward compatibility - frameclock moved around, now is back on the AI7 - if self.sync_kwargs['frameclock_channel'] in ['0', 0]: - sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}' - elif self.sync_kwargs['frameclock_channel'] in ['7', 7]: - sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}' - else: - sync_channel_name = self.sync_kwargs['frameclock_channel'] - frame_timestamps = self.timestamps[sync_channel_name]['positive'] + # timestamps of the frameclock in DAQ time + frame_timestamps = self.timestamps[self.frameclock_channel_name]['positive'] # compare number of frame timestamps # and put them in the photometry_df SystemTimestamp column @@ -577,8 +591,8 @@ def obj_fun(x, go_cue_times_mic, fixtures_df): # load the photometry data and replace the timestamp column # with the values from the frameclock timestamps as recorded by the DAQ - frameclock_channel = self.session_params['devices']['neurophotometrics']['sync_metadata']['frameclock_channel'] - frame_timestamps = self.timestamps[frameclock_channel]['positive'] + frameclock_channel_name = self.session_params['devices']['neurophotometrics']['sync_metadata']['frameclock_channel'] + frame_timestamps = self.timestamps[frameclock_channel_name]['positive'] # compare number of frame timestamps # and put them in the photometry_df SystemTimestamp column From 4a4dd3a94d1f473293a08b18fd7471b982a4fa01 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 6 Oct 2025 10:24:22 +0100 Subject: [PATCH 76/80] flake8 --- ibllib/pipes/dynamic_pipeline.py | 2 +- ibllib/pipes/neurophotometrics.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py index b6c4338e7..69b97ffbf 100644 --- a/ibllib/pipes/dynamic_pipeline.py +++ b/ibllib/pipes/dynamic_pipeline.py @@ -613,7 +613,7 @@ def make_pipeline(session_path, **pkwargs): case 'daqami': # for synchronization with the DAQami receiving the sync pulses from the individual bpods # as well as the frame clock from the FP3002 - if 'passive' not in protocol: # excluding passive session + if 'passive' not in protocol: # excluding passive session tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})( **kwargs, ) diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py index ca4c76d3a..69156ce07 100644 --- a/ibllib/pipes/neurophotometrics.py +++ b/ibllib/pipes/neurophotometrics.py @@ -109,7 +109,7 @@ def extract_timestamps_from_tdms_file( if chunk_size is not None: n_chunks = df.shape[0] // chunk_size for i in range(n_chunks): - vals_ = vals[i * chunk_size : (i + 1) * chunk_size] + vals_ = vals[i * chunk_size: (i + 1) * chunk_size] # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8') data = _int2digital_channels(vals_) @@ -256,7 +256,8 @@ def _get_sync_function(self) -> Tuple[callable, list]: else: if not (timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0]): _logger.warning( - f'less than 95% of bpod timestamps matched. n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}' + f'less than 95% of bpod timestamps matched. \ + n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}' ) valid_bounds = self._get_valid_bounds() @@ -359,7 +360,7 @@ def __init__( self, *args, load_timestamps: bool = True, - sync_channel: int | None = None, + # sync_channel: int | None = None, frameclock_channel: int | None = None, **kwargs, ): @@ -376,7 +377,7 @@ def __init__( else: self.frameclock_channel_name = frameclock_channel - self.sync_channel = sync_channel or self.session_params['devices']['neurophotometrics']['sync_channel'] + self.sync_channel = self.sync_channel or self.session_params['devices']['neurophotometrics']['sync_channel'] # whether or not to reextract from tdms or attempt to load from .pkl self.load_timestamps = load_timestamps From d09581e7b2e2f5b19b33c0e942b0412b8f6b7c60 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 6 Oct 2025 13:12:29 +0100 Subject: [PATCH 77/80] PhotometrySessionLoader added --- brainbox/io/one.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/brainbox/io/one.py b/brainbox/io/one.py index c1c86726e..b2743b30b 100644 --- a/brainbox/io/one.py +++ b/brainbox/io/one.py @@ -29,6 +29,8 @@ from ibllib.pipes.ephys_alignment import EphysAlignment from ibllib.plots import vertical_lines, Density +from iblphotometry import fpio + import brainbox.plot from brainbox.io.spikeglx import Streamer from brainbox.ephys_plots import plot_brain_regions @@ -1539,3 +1541,57 @@ def load_spike_sorting(self, pnames=None): @property def probes(self): return {k: self.ephys[k]['ssl'].pid for k in self.ephys} + + +class PhotometrySessionLoader(SessionLoader): + photometry: dict = field(default_factory=dict, repr=False) + + def __init__(self, *args, photometry_collection: str = 'photometry', **kwargs): + self.photometry_collection = photometry_collection + self.revision = kwargs.get('revision', None) + + # determine if loading by eid or session path + self.load_by_path = True if 'session_path' in kwargs else False + + super().__init__(*args, **kwargs) + + def load_session_data(self, **kwargs): + super().load_session_data(**kwargs) + self.load_photometry() + + def load_photometry( + self, + restrict_to_session: bool = True, + pre: int = 5, + post: int = 5, + ): + # session path precedence over eid + if self.load_by_path: + raw_dfs = fpio.from_session_path( + self.session_path, + collection=self.photometry_collection, + revision=self.revision, + ) + else: # load by eid + raw_dfs = fpio.from_eid( + self.eid, + self.one, + collection=self.photometry_collection, + revision=self.revision, + ) + + if restrict_to_session: + if isinstance(self.trials, pd.DataFrame) and (self.trials.shape[0] == 0): + self.load_trials() + t_start = self.trials.iloc[0]['intervals_0'] + t_stop = self.trials.iloc[-1]['intervals_1'] + + for band in raw_dfs.keys(): + df = raw_dfs[band] + ix = np.logical_and( + df.index.values > t_start - pre, + df.index.values < t_stop + post, + ) + raw_dfs[band] = df.loc[ix] + + self.photometry = raw_dfs From 4ac6a9a93170eaba0e3002fce7f45cf0dcb55c0d Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Mon, 6 Oct 2025 13:13:32 +0100 Subject: [PATCH 78/80] (temporarily) removed extractor tests --- ibllib/tests/test_neurophotometrics.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py index 923946509..fd57ba209 100644 --- a/ibllib/tests/test_neurophotometrics.py +++ b/ibllib/tests/test_neurophotometrics.py @@ -28,11 +28,11 @@ def setUp(self) -> None: # self.raw_photometry_folder = self.session_folder / 'raw_photometry_data' # self.raw_photometry_folder.mkdir(parents=True) - def test_bpod_extractor(self): - session_folder = Path(iblphotometry_tests.__file__).parent / 'data' / 'neurophotometrics' / 'raw_bpod_session' - assert session_folder.exists() - self.experiment_description = session_params.read_params(session_folder) - FibrePhotometryBpodSync() + # def test_bpod_extractor(self): + # session_folder = Path(iblphotometry_tests.__file__).parent / 'data' / 'neurophotometrics' / 'raw_bpod_session' + # assert session_folder.exists() + # self.experiment_description = session_params.read_params(session_folder) + # FibrePhotometryBpodSync() # def test_daqami_extractor(self): # path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml' From 19a8c8bd4f297bd5b58fcb14339d84a6660ff624 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Thu, 9 Oct 2025 16:54:25 +0100 Subject: [PATCH 79/80] fixed: uneven number of samples when PhotometrySessionLoader restricts to session time --- brainbox/io/one.py | 444 +++++++++++++++++++++++++-------------------- 1 file changed, 244 insertions(+), 200 deletions(-) diff --git a/brainbox/io/one.py b/brainbox/io/one.py index b2743b30b..76d01bff3 100644 --- a/brainbox/io/one.py +++ b/brainbox/io/one.py @@ -1,4 +1,5 @@ """Functions for loading IBL ephys and trial data using the Open Neurophysiology Environment.""" + from dataclasses import dataclass, field import gc import logging @@ -62,8 +63,7 @@ def load_lfp(eid, one=None, dataset_types=None, **kwargs): [one.load_dataset(eid, dset, download_only=True) for dset in dtypes] session_path = one.eid2path(eid) - efiles = [ef for ef in spikeglx.glob_ephys_files(session_path, bin_exists=False) - if ef.get('lf', None)] + efiles = [ef for ef in spikeglx.glob_ephys_files(session_path, bin_exists=False) if ef.get('lf', None)] return [spikeglx.Reader(ef['lf'], **kwargs) for ef in efiles] @@ -84,19 +84,21 @@ def _get_spike_sorting_collection(collections, pname): collection = next(filter(lambda c: c == f'alf/{pname}/pykilosort', collections), None) # otherwise, prefers the shortest collection = collection or next(iter(sorted(filter(lambda c: f'alf/{pname}' in c, collections), key=len)), None) - _logger.debug(f"selecting: {collection} to load amongst candidates: {collections}") + _logger.debug(f'selecting: {collection} to load amongst candidates: {collections}') return collection def _channels_alyx2bunch(chans): - channels = Bunch({ - 'atlas_id': np.array([ch['brain_region'] for ch in chans]), - 'x': np.array([ch['x'] for ch in chans]) / 1e6, - 'y': np.array([ch['y'] for ch in chans]) / 1e6, - 'z': np.array([ch['z'] for ch in chans]) / 1e6, - 'axial_um': np.array([ch['axial'] for ch in chans]), - 'lateral_um': np.array([ch['lateral'] for ch in chans]) - }) + channels = Bunch( + { + 'atlas_id': np.array([ch['brain_region'] for ch in chans]), + 'x': np.array([ch['x'] for ch in chans]) / 1e6, + 'y': np.array([ch['y'] for ch in chans]) / 1e6, + 'z': np.array([ch['z'] for ch in chans]) / 1e6, + 'axial_um': np.array([ch['axial'] for ch in chans]), + 'lateral_um': np.array([ch['lateral'] for ch in chans]), + } + ) return channels @@ -107,7 +109,7 @@ def _channels_traj2bunch(xyz_chans, brain_atlas): 'y': xyz_chans[:, 1], 'z': xyz_chans[:, 2], 'acronym': brain_regions['acronym'], - 'atlas_id': brain_regions['id'] + 'atlas_id': brain_regions['id'], } return channels @@ -117,7 +119,8 @@ def _channels_bunch2alf(channels): channels_ = { 'mlapdv': np.c_[channels['x'], channels['y'], channels['z']] * 1e6, 'brainLocationIds_ccf_2017': channels['atlas_id'], - 'localCoordinates': np.c_[channels['lateral_um'], channels['axial_um']]} + 'localCoordinates': np.c_[channels['lateral_um'], channels['axial_um']], + } return channels_ @@ -141,8 +144,9 @@ def _channels_alf2bunch(channels, brain_regions=None): return channels_ -def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_channels=True, dataset_types=None, - brain_regions=None): +def _load_spike_sorting( + eid, one=None, collection=None, revision=None, return_channels=True, dataset_types=None, brain_regions=None +): """ Generic function to load spike sorting according data using ONE. @@ -186,7 +190,7 @@ def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_ch # enumerate probes and load according to the name collections = one.list_collections(eid, filename='spikes*', collection=collection, revision=revision) if len(collections) == 0: - _logger.warning(f"eid {eid}: no collection found with collection filter: {collection}, revision: {revision}") + _logger.warning(f'eid {eid}: no collection found with collection filter: {collection}, revision: {revision}') pnames = list(set(c.split('/')[1] for c in collections)) spikes, clusters, channels = ({} for _ in range(3)) @@ -194,13 +198,14 @@ def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_ch for pname in pnames: probe_collection = _get_spike_sorting_collection(collections, pname) - spikes[pname] = one.load_object(eid, collection=probe_collection, obj='spikes', - attribute=spike_attributes, namespace='') - clusters[pname] = one.load_object(eid, collection=probe_collection, obj='clusters', - attribute=cluster_attributes, namespace='') + spikes[pname] = one.load_object(eid, collection=probe_collection, obj='spikes', attribute=spike_attributes, namespace='') + clusters[pname] = one.load_object( + eid, collection=probe_collection, obj='clusters', attribute=cluster_attributes, namespace='' + ) if return_channels: channels = _load_channels_locations_from_disk( - eid, collection=collection, one=one, revision=revision, brain_regions=brain_regions) + eid, collection=collection, one=one, revision=revision, brain_regions=brain_regions + ) return spikes, clusters, channels else: return spikes, clusters @@ -222,7 +227,7 @@ def _load_channels_locations_from_disk(eid, collection=None, one=None, revision= channels = Bunch({}) collections = one.list_collections(eid, filename='channels*', collection=collection, revision=revision) if len(collections) == 0: - _logger.warning(f"eid {eid}: no collection found with collection filter: {collection}, revision: {revision}") + _logger.warning(f'eid {eid}: no collection found with collection filter: {collection}, revision: {revision}') probes = list(set([c.split('/')[1] for c in collections])) for probe in probes: probe_collection = _get_spike_sorting_collection(collections, probe) @@ -230,11 +235,12 @@ def _load_channels_locations_from_disk(eid, collection=None, one=None, revision= # if the spike sorter has not aligned data, try and get the alignment available if 'brainLocationIds_ccf_2017' not in channels[probe].keys(): aligned_channel_collections = one.list_collections( - eid, filename='channels.brainLocationIds_ccf_2017*', collection=probe_collection, revision=revision) + eid, filename='channels.brainLocationIds_ccf_2017*', collection=probe_collection, revision=revision + ) if len(aligned_channel_collections) == 0: - _logger.debug(f"no resolved alignment dataset found for {eid}/{probe}") + _logger.debug(f'no resolved alignment dataset found for {eid}/{probe}') continue - _logger.debug(f"looking for a resolved alignment dataset in {aligned_channel_collections}") + _logger.debug(f'looking for a resolved alignment dataset in {aligned_channel_collections}') ac_collection = _get_spike_sorting_collection(aligned_channel_collections, probe) channels_aligned = one.load_object(eid, 'channels', collection=ac_collection) channels[probe] = channel_locations_interpolation(channels_aligned, channels[probe]) @@ -276,8 +282,7 @@ def channel_locations_interpolation(channels_aligned, channels=None, brain_regio depths, ind, iinv = np.unique(channels['localCoordinates'][:, 1], return_index=True, return_inverse=True) channels['mlapdv'] = np.zeros((nch, 3)) for i in np.arange(3): - channels['mlapdv'][:, i] = np.interp( - depths, depth_aligned, channels_aligned['mlapdv'][ind_aligned, i])[iinv] + channels['mlapdv'][:, i] = np.interp(depths, depth_aligned, channels_aligned['mlapdv'][ind_aligned, i])[iinv] # the brain locations have to be interpolated by nearest neighbour fcn_interp = interp1d(depth_aligned, channels_aligned['brainLocationIds_ccf_2017'][ind_aligned], kind='nearest') channels['brainLocationIds_ccf_2017'] = fcn_interp(depths)[iinv].astype(np.int32) @@ -287,68 +292,62 @@ def channel_locations_interpolation(channels_aligned, channels=None, brain_regio return channels -def _load_channel_locations_traj(eid, probe=None, one=None, revision=None, aligned=False, - brain_atlas=None, return_source=False): +def _load_channel_locations_traj(eid, probe=None, one=None, revision=None, aligned=False, brain_atlas=None, return_source=False): if not hasattr(one, 'alyx'): return {}, None - _logger.debug(f"trying to load from traj {probe}") + _logger.debug(f'trying to load from traj {probe}') channels = Bunch() brain_atlas = brain_atlas or AllenAtlas # need to find the collection bruh insertion = one.alyx.rest('insertions', 'list', session=eid, name=probe)[0] collection = _collection_filter_from_args(probe=probe) - collections = one.list_collections(eid, filename='channels*', collection=collection, - revision=revision) + collections = one.list_collections(eid, filename='channels*', collection=collection, revision=revision) probe_collection = _get_spike_sorting_collection(collections, probe) chn_coords = one.load_dataset(eid, 'channels.localCoordinates', collection=probe_collection) depths = chn_coords[:, 1] - tracing = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}). \ - get('tracing_exists', False) - resolved = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}). \ - get('alignment_resolved', False) - counts = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}). \ - get('alignment_count', 0) + tracing = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).get('tracing_exists', False) + resolved = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).get('alignment_resolved', False) + counts = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).get('alignment_count', 0) if tracing: xyz = np.array(insertion['json']['xyz_picks']) / 1e6 if resolved: - - _logger.debug(f'Channel locations for {eid}/{probe} have been resolved. ' - f'Channel and cluster locations obtained from ephys aligned histology ' - f'track.') - traj = one.alyx.rest('trajectories', 'list', session=eid, probe=probe, - provenance='Ephys aligned histology track')[0] + _logger.debug( + f'Channel locations for {eid}/{probe} have been resolved. ' + f'Channel and cluster locations obtained from ephys aligned histology ' + f'track.' + ) + traj = one.alyx.rest('trajectories', 'list', session=eid, probe=probe, provenance='Ephys aligned histology track')[0] align_key = insertion['json']['extended_qc']['alignment_stored'] feature = traj['json'][align_key][0] track = traj['json'][align_key][1] - ephysalign = EphysAlignment(xyz, depths, track_prev=track, - feature_prev=feature, - brain_atlas=brain_atlas, speedy=True) + ephysalign = EphysAlignment(xyz, depths, track_prev=track, feature_prev=feature, brain_atlas=brain_atlas, speedy=True) chans = ephysalign.get_channel_locations(feature, track) channels[probe] = _channels_traj2bunch(chans, brain_atlas) source = 'resolved' elif counts > 0 and aligned: - _logger.debug(f'Channel locations for {eid}/{probe} have not been ' - f'resolved. However, alignment flag set to True so channel and cluster' - f' locations will be obtained from latest available ephys aligned ' - f'histology track.') + _logger.debug( + f'Channel locations for {eid}/{probe} have not been ' + f'resolved. However, alignment flag set to True so channel and cluster' + f' locations will be obtained from latest available ephys aligned ' + f'histology track.' + ) # get the latest user aligned channels - traj = one.alyx.rest('trajectories', 'list', session=eid, probe=probe, - provenance='Ephys aligned histology track')[0] + traj = one.alyx.rest('trajectories', 'list', session=eid, probe=probe, provenance='Ephys aligned histology track')[0] align_key = insertion['json']['extended_qc']['alignment_stored'] feature = traj['json'][align_key][0] track = traj['json'][align_key][1] - ephysalign = EphysAlignment(xyz, depths, track_prev=track, - feature_prev=feature, - brain_atlas=brain_atlas, speedy=True) + ephysalign = EphysAlignment(xyz, depths, track_prev=track, feature_prev=feature, brain_atlas=brain_atlas, speedy=True) chans = ephysalign.get_channel_locations(feature, track) channels[probe] = _channels_traj2bunch(chans, brain_atlas) source = 'aligned' else: - _logger.debug(f'Channel locations for {eid}/{probe} have not been resolved. ' - f'Channel and cluster locations obtained from histology track.') + _logger.debug( + f'Channel locations for {eid}/{probe} have not been resolved. ' + f'Channel and cluster locations obtained from histology track.' + ) # get the channels from histology tracing xyz = xyz[np.argsort(xyz[:, 2]), :] chans = histology.interpolate_along_track(xyz, (depths + TIP_SIZE_UM) / 1e6) @@ -400,12 +399,12 @@ def load_channel_locations(eid, probe=None, one=None, aligned=False, brain_atlas else: eid = one.to_eid(eid) collection = _collection_filter_from_args(probe=probe) - channels = _load_channels_locations_from_disk(eid, one=one, collection=collection, - brain_regions=brain_atlas.regions) + channels = _load_channels_locations_from_disk(eid, one=one, collection=collection, brain_regions=brain_atlas.regions) incomplete_probes = [k for k in channels if 'x' not in channels[k]] for iprobe in incomplete_probes: - channels_, source = _load_channel_locations_traj(eid, probe=iprobe, one=one, aligned=aligned, - brain_atlas=brain_atlas, return_source=True) + channels_, source = _load_channel_locations_traj( + eid, probe=iprobe, one=one, aligned=aligned, brain_atlas=brain_atlas, return_source=True + ) if channels_ is not None: channels[iprobe] = channels_[iprobe] return channels @@ -451,7 +450,8 @@ def merge_clusters_channels(dic_clus, channels, keys_to_add_extra=None): else: _logger.warning( f'Probe {label}: merging channels and clusters for key "{key}" has {nch_key} on channels' - f' but expected {max(clu_ch)}. Data in new cluster key "{key}" is returned empty.') + f' but expected {max(clu_ch)}. Data in new cluster key "{key}" is returned empty.' + ) dic_clus[label][key] = [] except AssertionError: _logger.warning(f'Either clusters or channels does not have key {key}, could not merge') @@ -481,10 +481,9 @@ def load_passive_rfmap(eid, one=None): # Load in the receptive field mapping data rf_map = one.load_object(eid, obj='passiveRFM', collection='alf') - frames = np.fromfile(one.load_dataset(eid, '_iblrig_RFMapStim.raw.bin', - collection='raw_passive_data'), dtype="uint8") + frames = np.fromfile(one.load_dataset(eid, '_iblrig_RFMapStim.raw.bin', collection='raw_passive_data'), dtype='uint8') y_pix, x_pix = 15, 15 - frames = np.transpose(np.reshape(frames, [y_pix, x_pix, -1], order="F"), [2, 1, 0]) + frames = np.transpose(np.reshape(frames, [y_pix, x_pix, -1], order='F'), [2, 1, 0]) rf_map['frames'] = frames return rf_map @@ -555,13 +554,13 @@ def load_iti(trials): def load_channels_from_insertion(ins, depths=None, one=None, ba=None): - PROV_2_VAL = { 'Resolved': 90, 'Ephys aligned histology track': 70, 'Histology track': 50, 'Micro-manipulator': 30, - 'Planned': 10} + 'Planned': 10, + } one = one or ONE() ba = ba or atlas.AllenAtlas() @@ -575,21 +574,17 @@ def load_channels_from_insertion(ins, depths=None, one=None, ba=None): ins = atlas.Insertion.from_dict(traj) # Deepest coordinate first xyz = np.c_[ins.tip, ins.entry].T - xyz_channels = histology.interpolate_along_track(xyz, (depths + - TIP_SIZE_UM) / 1e6) + xyz_channels = histology.interpolate_along_track(xyz, (depths + TIP_SIZE_UM) / 1e6) else: xyz = np.array(ins['json']['xyz_picks']) / 1e6 if traj['provenance'] == 'Histology track': xyz = xyz[np.argsort(xyz[:, 2]), :] - xyz_channels = histology.interpolate_along_track(xyz, (depths + - TIP_SIZE_UM) / 1e6) + xyz_channels = histology.interpolate_along_track(xyz, (depths + TIP_SIZE_UM) / 1e6) else: align_key = ins['json']['extended_qc']['alignment_stored'] feature = traj['json'][align_key][0] track = traj['json'][align_key][1] - ephysalign = EphysAlignment(xyz, depths, track_prev=track, - feature_prev=feature, - brain_atlas=ba, speedy=True) + ephysalign = EphysAlignment(xyz, depths, track_prev=track, feature_prev=feature, brain_atlas=ba, speedy=True) xyz_channels = ephysalign.get_channel_locations(feature, track) return xyz_channels @@ -607,6 +602,7 @@ class SpikeSortingLoader: SpikeSortingLoader(session_path=session_path, pname='probe00') NB: When no ONE instance is passed, any datasets that are loaded will not be recorded. """ + one: One = None atlas: None = None pid: str = None @@ -615,7 +611,7 @@ class SpikeSortingLoader: session_path: ALFPath = '' # the following properties are the outcome of the post init function collections: list = None - datasets: list = None # list of all datasets belonging to the session + datasets: list = None # list of all datasets belonging to the session # the following properties are the outcome of a reading function files: dict = None raw_data_files: list = None # list of raw ap and lf files corresponding to the recording @@ -633,8 +629,10 @@ def __post_init__(self): self.eid, self.pname = self.one.pid2eid(self.pid) except NotImplementedError: if self.eid == '' or self.pname == '': - raise IOError("Cannot infer session id and probe name from pid. " - "You need to pass eid and pname explicitly when instantiating SpikeSortingLoader.") + raise IOError( + 'Cannot infer session id and probe name from pid. ' + 'You need to pass eid and pname explicitly when instantiating SpikeSortingLoader.' + ) self.session_path = self.one.eid2path(self.eid) # then eid / pname combination elif self.session_path is None or self.session_path == '': @@ -651,8 +649,7 @@ def __post_init__(self): self.one._cache['datasets'] = cache._make_datasets_df(self.session_path, hash_files=False) self.eid = str(self.session_path.relative_to(self.session_path.parents[2])) # populates default properties - self.collections = self.one.list_collections( - self.eid, filename='spikes*', collection=f"alf/{self.pname}*") + self.collections = self.one.list_collections(self.eid, filename='spikes*', collection=f'alf/{self.pname}*') self.datasets = self.one.list_datasets(self.eid) if self.atlas is None: self.atlas = AllenAtlas() @@ -693,7 +690,7 @@ def _get_spike_sorting_collection(self, spike_sorter=None): for sorter in list([spike_sorter, 'iblsorter', 'pykilosort']): if sorter is None: continue - if sorter == "": + if sorter == '': collection = next(filter(lambda c: c == f'alf/{self.pname}', self.collections), None) else: collection = next(filter(lambda c: c == f'alf/{self.pname}/{sorter}', self.collections), None) @@ -701,7 +698,7 @@ def _get_spike_sorting_collection(self, spike_sorter=None): return collection # if none is found amongst the defaults, prefers the shortest collection = collection or next(iter(sorted(filter(lambda c: f'alf/{self.pname}' in c, self.collections), key=len)), None) - _logger.debug(f"selecting: {collection} to load amongst candidates: {self.collections}") + _logger.debug(f'selecting: {collection} to load amongst candidates: {self.collections}') return collection def load_spike_sorting_object(self, obj, *args, revision=None, **kwargs): @@ -726,8 +723,17 @@ def get_version(self, spike_sorter=None): dset = self.one.alyx.rest('datasets', 'list', session=self.eid, collection=collection, name='spikes.times.npy') return dset[0]['version'] if len(dset) else 'unknown' - def download_spike_sorting_object(self, obj, spike_sorter=None, dataset_types=None, collection=None, - attribute=None, missing='raise', revision=None, **kwargs): + def download_spike_sorting_object( + self, + obj, + spike_sorter=None, + dataset_types=None, + collection=None, + attribute=None, + missing='raise', + revision=None, + **kwargs, + ): """ Downloads an ALF object :param obj: object name, str between 'spikes', 'clusters' or 'channels' @@ -747,12 +753,18 @@ def download_spike_sorting_object(self, obj, spike_sorter=None, dataset_types=No return {}, {}, {} self.collection = self._get_spike_sorting_collection(spike_sorter=spike_sorter) collection = collection or self.collection - _logger.debug(f"loading spike sorting object {obj} from {collection}") + _logger.debug(f'loading spike sorting object {obj} from {collection}') attributes = self._get_attributes(dataset_types) try: self.files[obj] = self.one.load_object( - self.eid, obj=obj, attribute=attributes.get(obj, None), - collection=collection, download_only=True, revision=revision, **kwargs) + self.eid, + obj=obj, + attribute=attributes.get(obj, None), + collection=collection, + download_only=True, + revision=revision, + **kwargs, + ) except ALFObjectNotFound as e: if missing == 'raise': raise e @@ -780,13 +792,15 @@ def download_raw_electrophysiology(self, band='ap'): for suffix in [f'*.{band}.ch', f'*.{band}.meta', f'*.{band}.cbin']: try: # FIXME: this will fail if multiple LFP segments are found - raw_data_files.append(self.one.load_dataset( - self.eid, - download_only=True, - collection=f'raw_ephys_data/{self.pname}', - dataset=suffix, - check_hash=False, - )) + raw_data_files.append( + self.one.load_dataset( + self.eid, + download_only=True, + collection=f'raw_ephys_data/{self.pname}', + dataset=suffix, + check_hash=False, + ) + ) except ALFObjectNotFound: _logger.debug(f"{self.session_path} can't locate raw data collection raw_ephys_data/{self.pname}, file {suffix}") self.raw_data_files = list(set(self.raw_data_files + raw_data_files)) @@ -806,7 +820,7 @@ def raw_electrophysiology(self, stream=True, band='ap', **kwargs): return Streamer(pid=self.pid, one=self.one, typ=band, **kwargs) else: raw_data_files = self.download_raw_electrophysiology(band=band) - cbin_file = next(filter(lambda f: re.match(rf".*\.{band}\..*cbin", f.name), raw_data_files), None) + cbin_file = next(filter(lambda f: re.match(rf'.*\.{band}\..*cbin', f.name), raw_data_files), None) if cbin_file is not None: return spikeglx.Reader(cbin_file) @@ -814,10 +828,14 @@ def download_raw_waveforms(self, **kwargs): """ Downloads raw waveforms extracted from sorting to local disk. """ - _logger.debug(f"loading waveforms from {self.collection}") + _logger.debug(f'loading waveforms from {self.collection}') return self.one.load_object( - id=self.eid, obj="waveforms", attribute=["traces", "templates", "table", "channels"], - collection=self._get_spike_sorting_collection("pykilosort"), download_only=True, **kwargs + id=self.eid, + obj='waveforms', + attribute=['traces', 'templates', 'table', 'channels'], + collection=self._get_spike_sorting_collection('pykilosort'), + download_only=True, + **kwargs, ) def raw_waveforms(self, **kwargs): @@ -848,9 +866,10 @@ def load_channels(self, **kwargs): channels = self._load_object(self.files['electrodeSites'], wildcards=self.one.wildcards) channels['rawInd'] = np.arange(channels[list(channels.keys())[0]].shape[0]) if 'brainLocationIds_ccf_2017' not in channels: - _logger.debug(f"loading channels from alyx for {self.files['channels']}") + _logger.debug(f'loading channels from alyx for {self.files["channels"]}') _channels, self.histology = _load_channel_locations_traj( - self.eid, probe=self.pname, one=self.one, brain_atlas=self.atlas, return_source=True, aligned=True) + self.eid, probe=self.pname, one=self.one, brain_atlas=self.atlas, return_source=True, aligned=True + ) if _channels: channels = _channels[self.pname] else: @@ -860,18 +879,19 @@ def load_channels(self, **kwargs): @staticmethod def filter_files_by_namespace(all_files, namespace): - # Create dict for each file with available namespaces, no namespce is stored under the key None namespace_files = defaultdict(dict) available_namespaces = [] for file in all_files: nspace = file.namespace or None available_namespaces.append(nspace) - namespace_files[f"{file.object}.{file.attribute}"][nspace] = file + namespace_files[f'{file.object}.{file.attribute}'][nspace] = file if namespace not in set(available_namespaces): - _logger.info(f'Could not find manual curation results for {namespace}, returning default' - f' non manually curated spikesorting data') + _logger.info( + f'Could not find manual curation results for {namespace}, returning default' + f' non manually curated spikesorting data' + ) # Return the files with the chosen namespace. files = [f.get(namespace, f.get(None, None)) for f in namespace_files.values()] @@ -879,8 +899,9 @@ def filter_files_by_namespace(all_files, namespace): files = [f for f in files if f] return files - def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_version=False, good_units=False, - namespace=None, **kwargs): + def load_spike_sorting( + self, spike_sorter='iblsorter', revision=None, enforce_version=False, good_units=False, namespace=None, **kwargs + ): """ Loads spikes, clusters and channels @@ -910,8 +931,10 @@ def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_ve self.revision = revision if good_units and namespace is not None: - _logger.info('Good units table does not exist for manually curated spike sorting. Pass in namespace with' - 'good_units=False and filter the spikes post hoc by the good clusters.') + _logger.info( + 'Good units table does not exist for manually curated spike sorting. Pass in namespace with' + 'good_units=False and filter the spikes post hoc by the good clusters.' + ) return [None] * 3 objects = ['passingSpikes', 'clusters', 'channels'] if good_units else None self.download_spike_sorting(spike_sorter=spike_sorter, revision=revision, objects=objects, **kwargs) @@ -936,17 +959,18 @@ def _assert_version_consistency(self): for k in ['spikes', 'clusters', 'channels', 'passingSpikes']: for fn in self.files.get(k, []): if self.spike_sorter: - assert fn.relative_to(self.session_path).parts[2] == self.spike_sorter, \ - f"You required strict version {self.spike_sorter}, {fn} does not match" + assert fn.relative_to(self.session_path).parts[2] == self.spike_sorter, ( + f'You required strict version {self.spike_sorter}, {fn} does not match' + ) if self.revision: - assert fn.revision == self.revision, \ - f"You required strict revision {self.revision}, {fn} does not match" + assert fn.revision == self.revision, f'You required strict revision {self.revision}, {fn} does not match' @staticmethod def compute_metrics(spikes, clusters=None): nc = clusters['channels'].size if clusters else np.unique(spikes['clusters']).size - metrics = pd.DataFrame(quick_unit_metrics( - spikes['clusters'], spikes['times'], spikes['amps'], spikes['depths'], cluster_ids=np.arange(nc))) + metrics = pd.DataFrame( + quick_unit_metrics(spikes['clusters'], spikes['times'], spikes['amps'], spikes['depths'], cluster_ids=np.arange(nc)) + ) return metrics @staticmethod @@ -971,7 +995,7 @@ def merge_clusters(spikes, clusters, channels, cache_dir=None, compute_metrics=F if metrics.shape[0] != nc: metrics = None if metrics is None or compute_metrics is True: - _logger.debug("recompute clusters metrics") + _logger.debug('recompute clusters metrics') metrics = SpikeSortingLoader.compute_metrics(spikes, clusters) if isinstance(cache_dir, Path): metrics.to_parquet(Path(cache_dir).joinpath('clusters.metrics.pqt')) @@ -994,12 +1018,15 @@ def _get_probe_info(self, revision=None): revision = revision if revision is not None else self.revision if self._sync is None: timestamps = self.one.load_dataset( - self.eid, dataset='_spikeglx_*.timestamps.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision) + self.eid, dataset='_spikeglx_*.timestamps.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision + ) _ = self.one.load_dataset( # this is not used here but we want to trigger the download for potential tasks - self.eid, dataset='_spikeglx_*.sync.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision) + self.eid, dataset='_spikeglx_*.sync.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision + ) try: - ap_meta = spikeglx.read_meta_data(self.one.load_dataset( - self.eid, dataset='_spikeglx_*.ap.meta', collection=f'raw_ephys_data/{self.pname}')) + ap_meta = spikeglx.read_meta_data( + self.one.load_dataset(self.eid, dataset='_spikeglx_*.ap.meta', collection=f'raw_ephys_data/{self.pname}') + ) fs = spikeglx._get_fs_from_meta(ap_meta) except ALFObjectNotFound: ap_meta = None @@ -1032,15 +1059,17 @@ def samples2times(self, values, direction='forward'): @property def pid2ref(self): - return f"{self.one.eid2ref(self.eid, as_dict=False)}_{self.pname}" + return f'{self.one.eid2ref(self.eid, as_dict=False)}_{self.pname}' def _default_plot_title(self, spikes): - title = f"{self.pid2ref}, {self.pid} \n" \ - f"{spikes['clusters'].size:_} spikes, {np.unique(spikes['clusters']).size:_} clusters" + title = ( + f'{self.pid2ref}, {self.pid} \n{spikes["clusters"].size:_} spikes, {np.unique(spikes["clusters"]).size:_} clusters' + ) return title - def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_series=None, - drift=None, title=None, **kwargs): + def raster( + self, spikes, channels, save_dir=None, br=None, label='raster', time_series=None, drift=None, title=None, **kwargs + ): """ :param spikes: spikes dictionary or Bunch :param channels: channels dictionary or Bunch. @@ -1054,13 +1083,14 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_ """ br = br or BrainRegions() time_series = time_series or {} - fig, axs = plt.subplots(2, 2, gridspec_kw={ - 'width_ratios': [.95, .05], 'height_ratios': [.1, .9]}, figsize=(16, 9), sharex='col') + fig, axs = plt.subplots( + 2, 2, gridspec_kw={'width_ratios': [0.95, 0.05], 'height_ratios': [0.1, 0.9]}, figsize=(16, 9), sharex='col' + ) axs[0, 1].set_axis_off() # axs[0, 0].set_xticks([]) if kwargs is None: # set default raster plot parameters - kwargs = {"t_bin": 0.007, "d_bin": 10, "vmax": 0.5} + kwargs = {'t_bin': 0.007, 'd_bin': 10, 'vmax': 0.5} brainbox.plot.driftmap(spikes['times'], spikes['depths'], ax=axs[1, 0], **kwargs) if title is None: title = self._default_plot_title(spikes) @@ -1068,8 +1098,14 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_ for k, ts in time_series.items(): vertical_lines(ts, ymin=0, ymax=3800, ax=axs[1, 0]) if 'atlas_id' in channels: - plot_brain_regions(channels['atlas_id'], channel_depths=channels['axial_um'], - brain_regions=br, display=True, ax=axs[1, 1], title=self.histology) + plot_brain_regions( + channels['atlas_id'], + channel_depths=channels['axial_um'], + brain_regions=br, + display=True, + ax=axs[1, 1], + title=self.histology, + ) axs[1, 0].set_ylim(0, 3800) axs[1, 0].set_xlim(spikes['times'][0], spikes['times'][-1]) fig.tight_layout() @@ -1079,28 +1115,33 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_ if 'drift' in self.files: drift = self._load_object(self.files['drift'], wildcards=self.one.wildcards) if isinstance(drift, dict): - axs[0, 0].plot(drift['times'], drift['um'], 'k', alpha=.5) + axs[0, 0].plot(drift['times'], drift['um'], 'k', alpha=0.5) axs[0, 0].set(ylim=[-15, 15]) if save_dir is not None: - png_file = save_dir.joinpath(f"{self.pid}_{self.pid2ref}_{label}.png") if Path(save_dir).is_dir() else Path(save_dir) + png_file = save_dir.joinpath(f'{self.pid}_{self.pid2ref}_{label}.png') if Path(save_dir).is_dir() else Path(save_dir) fig.savefig(png_file) plt.close(fig) gc.collect() else: return fig, axs - def plot_rawdata_snippet(self, sr, spikes, clusters, t0, - channels=None, - br: BrainRegions = None, - save_dir=None, - label='raster', - gain=-93, - title=None): - + def plot_rawdata_snippet( + self, + sr, + spikes, + clusters, + t0, + channels=None, + br: BrainRegions = None, + save_dir=None, + label='raster', + gain=-93, + title=None, + ): # compute the raw data offset and destripe, we take 400ms around t0 first_sample, last_sample = (int((t0 - 0.2) * sr.fs), int((t0 + 0.2) * sr.fs)) - raw = sr[first_sample:last_sample, :-sr.nsync].T + raw = sr[first_sample:last_sample, : -sr.nsync].T channel_labels = channels['labels'] if (channels is not None) and ('labels' in channels) else True destriped = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels) # filter out the spikes according to good/bad clusters and to the time slice @@ -1111,21 +1152,27 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0, if title is None: title = self._default_plot_title(spikes) # display the raw data snippet with spikes overlaid - fig, axs = plt.subplots(1, 2, gridspec_kw={'width_ratios': [.95, .05]}, figsize=(16, 9), sharex='col') + fig, axs = plt.subplots(1, 2, gridspec_kw={'width_ratios': [0.95, 0.05]}, figsize=(16, 9), sharex='col') Density(destriped, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s') - axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=0.5) - axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=0.5) + axs[0].scatter(ss[sok] / sr.fs, sc[sok], color='green', alpha=0.5) + axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color='red', alpha=0.5) axs[0].set(title=title, xlim=[t0 - 0.035, t0 + 0.035]) # adds the channel locations if available if (channels is not None) and ('atlas_id' in channels): br = br or BrainRegions() - plot_brain_regions(channels['atlas_id'], channel_depths=channels['axial_um'], - brain_regions=br, display=True, ax=axs[1], title=self.histology) + plot_brain_regions( + channels['atlas_id'], + channel_depths=channels['axial_um'], + brain_regions=br, + display=True, + ax=axs[1], + title=self.histology, + ) axs[1].get_yaxis().set_visible(False) fig.tight_layout() if save_dir is not None: - png_file = save_dir.joinpath(f"{self.pid}_{self.pid2ref}_{label}.png") if Path(save_dir).is_dir() else Path(save_dir) + png_file = save_dir.joinpath(f'{self.pid}_{self.pid2ref}_{label}.png') if Path(save_dir).is_dir() else Path(save_dir) fig.savefig(png_file) plt.close(fig) gc.collect() @@ -1200,6 +1247,7 @@ class SessionLoader: functions: >>> sess_loader.load_wheel(sampling_rate=100) """ + one: One = None session_path: ALFPath = '' eid: str = '' @@ -1217,8 +1265,10 @@ def __post_init__(self): Checks for required inputs, sets session_path and eid, creates data_info table. """ if self.one is None: - raise ValueError("An input to one is required. If not connection to a database is desired, it can be " - "a fully local instance of One.") + raise ValueError( + 'An input to one is required. If not connection to a database is desired, it can be ' + 'a fully local instance of One.' + ) # If session path is given, takes precedence over eid if self.session_path is not None and self.session_path != '': self.eid = self.one.to_eid(self.session_path) @@ -1228,15 +1278,9 @@ def __post_init__(self): if self.eid is not None and self.eid != '': self.session_path = self.one.eid2path(self.eid) else: - raise ValueError("If no session path is given, eid is required.") - - data_names = [ - 'trials', - 'wheel', - 'pose', - 'motion_energy', - 'pupil' - ] + raise ValueError('If no session path is given, eid is required.') + + data_names = ['trials', 'wheel', 'pose', 'motion_energy', 'pupil'] self.data_info = pd.DataFrame(columns=['name', 'is_loaded'], data=zip(data_names, [False] * len(data_names))) def load_session_data(self, trials=True, wheel=True, pose=True, motion_energy=True, pupil=True, reload=False): @@ -1265,33 +1309,21 @@ def load_session_data(self, trials=True, wheel=True, pose=True, motion_energy=Tr Whether to reload data that has already been loaded into this SessionLoader object, default is False """ load_df = self.data_info.copy() - load_df['to_load'] = [ - trials, - wheel, - pose, - motion_energy, - pupil - ] - load_df['load_func'] = [ - self.load_trials, - self.load_wheel, - self.load_pose, - self.load_motion_energy, - self.load_pupil - ] + load_df['to_load'] = [trials, wheel, pose, motion_energy, pupil] + load_df['load_func'] = [self.load_trials, self.load_wheel, self.load_pose, self.load_motion_energy, self.load_pupil] for idx, row in load_df.iterrows(): if row['to_load'] is False: - _logger.debug(f"Not loading {row['name']} data, set to False.") + _logger.debug(f'Not loading {row["name"]} data, set to False.') elif row['is_loaded'] is True and reload is False: - _logger.debug(f"Not loading {row['name']} data, is already loaded and reload=False.") + _logger.debug(f'Not loading {row["name"]} data, is already loaded and reload=False.') else: try: - _logger.info(f"Loading {row['name']} data") + _logger.info(f'Loading {row["name"]} data') row['load_func']() self.data_info.loc[idx, 'is_loaded'] = True except BaseException as e: - _logger.warning(f"Could not load {row['name']} data.") + _logger.warning(f'Could not load {row["name"]} data.') _logger.debug(e) def _find_behaviour_collection(self, obj): @@ -1312,8 +1344,10 @@ def _find_behaviour_collection(self, obj): if len(set(collections)) == 1: return collections[0] else: - _logger.error(f'Multiple collections found {collections}. Specify collection when loading, ' - f'e.g sl.load_{obj}(collection="{collections[0]}")') + _logger.error( + f'Multiple collections found {collections}. Specify collection when loading, ' + f'e.g sl.load_{obj}(collection="{collections[0]}")' + ) raise ALFMultipleCollectionsFound def load_trials(self, collection=None): @@ -1331,7 +1365,8 @@ def load_trials(self, collection=None): # itiDuration frequently has a mismatched dimension, and we don't need it, exclude using regex self.one.wildcards = False self.trials = self.one.load_object( - self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=self.revision or None).to_df() + self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=self.revision or None + ).to_df() self.one.wildcards = True self.data_info.loc[self.data_info['name'] == 'trials', 'is_loaded'] = True @@ -1360,9 +1395,11 @@ def load_wheel(self, fs=1000, corner_frequency=20, order=8, collection=None): # resample the wheel position and compute velocity, acceleration self.wheel = pd.DataFrame(columns=['times', 'position', 'velocity', 'acceleration']) self.wheel['position'], self.wheel['times'] = interpolate_position( - wheel_raw['timestamps'], wheel_raw['position'], freq=fs) + wheel_raw['timestamps'], wheel_raw['position'], freq=fs + ) self.wheel['velocity'], self.wheel['acceleration'] = velocity_filtered( - self.wheel['position'], fs=fs, corner_frequency=corner_frequency, order=order) + self.wheel['position'], fs=fs, corner_frequency=corner_frequency, order=order + ) self.wheel = self.wheel.apply(np.float32) self.data_info.loc[self.data_info['name'] == 'wheel', 'is_loaded'] = True @@ -1388,7 +1425,8 @@ def load_pose(self, likelihood_thr=0.9, views=['left', 'right', 'body'], tracker self.pose = {} for view in views: pose_raw = self.one.load_object( - self.eid, f'{view}Camera', attribute=[tracker, 'times'], revision=self.revision or None) + self.eid, f'{view}Camera', attribute=[tracker, 'times'], revision=self.revision or None + ) # Double check if video timestamps are correct length or can be fixed times_fixed, dlc = self._check_video_timestamps(view, pose_raw['times'], pose_raw[tracker]) self.pose[f'{view}Camera'] = likelihood_threshold(dlc, likelihood_thr) @@ -1409,17 +1447,15 @@ def load_motion_energy(self, views=['left', 'right', 'body']): views: list List of camera views for which to try and load data. Possible options are {'left', 'right', 'body'} """ - names = {'left': 'whiskerMotionEnergy', - 'right': 'whiskerMotionEnergy', - 'body': 'bodyMotionEnergy'} + names = {'left': 'whiskerMotionEnergy', 'right': 'whiskerMotionEnergy', 'body': 'bodyMotionEnergy'} # empty the dictionary so that if one loads only one view, after having loaded several, the others don't linger self.motion_energy = {} for view in views: me_raw = self.one.load_object( - self.eid, f'{view}Camera', attribute=['ROIMotionEnergy', 'times'], revision=self.revision or None) + self.eid, f'{view}Camera', attribute=['ROIMotionEnergy', 'times'], revision=self.revision or None + ) # Double check if video timestamps are correct length or can be fixed - times_fixed, motion_energy = self._check_video_timestamps( - view, me_raw['times'], me_raw['ROIMotionEnergy']) + times_fixed, motion_energy = self._check_video_timestamps(view, me_raw['times'], me_raw['ROIMotionEnergy']) self.motion_energy[f'{view}Camera'] = pd.DataFrame(columns=[names[view]], data=motion_energy) self.motion_energy[f'{view}Camera'].insert(0, 'times', times_fixed) self.data_info.loc[self.data_info['name'] == 'motion_energy', 'is_loaded'] = True @@ -1430,7 +1466,7 @@ def load_licks(self): """ pass - def load_pupil(self, snr_thresh=5.): + def load_pupil(self, snr_thresh=5.0): """ Function to load raw and smoothed pupil diameter data from the left camera into SessionLoader.pupil. @@ -1450,8 +1486,7 @@ def load_pupil(self, snr_thresh=5.): # If unavailable compute on the fly else: _logger.info('Pupil diameter not available, trying to compute on the fly.') - if (self.data_info[self.data_info['name'] == 'pose']['is_loaded'].values[0] - and 'leftCamera' in self.pose.keys()): + if self.data_info[self.data_info['name'] == 'pose']['is_loaded'].values[0] and 'leftCamera' in self.pose.keys(): # If pose data is already loaded, we don't know if it was threshold at 0.9, so we need a little stunt copy_pose = self.pose['leftCamera'].copy() # Save the previously loaded pose data self.load_pose(views=['left'], likelihood_thr=0.9) # Load new with threshold 0.9 @@ -1465,16 +1500,18 @@ def load_pupil(self, snr_thresh=5.): try: self.pupil['pupilDiameter_smooth'] = get_smooth_pupil_diameter(self.pupil['pupilDiameter_raw'], 'left') except BaseException as e: - _logger.error("Loaded raw pupil diameter but computing smooth pupil diameter failed. " - "Saving all NaNs for pupilDiameter_smooth.") + _logger.error( + 'Loaded raw pupil diameter but computing smooth pupil diameter failed. ' + 'Saving all NaNs for pupilDiameter_smooth.' + ) _logger.debug(e) self.pupil['pupilDiameter_smooth'] = np.nan if not np.all(np.isnan(self.pupil['pupilDiameter_smooth'])): - good_idxs = np.where( - ~np.isnan(self.pupil['pupilDiameter_smooth']) & ~np.isnan(self.pupil['pupilDiameter_raw']))[0] - snr = (np.var(self.pupil['pupilDiameter_smooth'][good_idxs]) / - (np.var(self.pupil['pupilDiameter_smooth'][good_idxs] - self.pupil['pupilDiameter_raw'][good_idxs]))) + good_idxs = np.where(~np.isnan(self.pupil['pupilDiameter_smooth']) & ~np.isnan(self.pupil['pupilDiameter_raw']))[0] + snr = np.var(self.pupil['pupilDiameter_smooth'][good_idxs]) / ( + np.var(self.pupil['pupilDiameter_smooth'][good_idxs] - self.pupil['pupilDiameter_raw'][good_idxs]) + ) if snr < snr_thresh: self.pupil = pd.DataFrame() raise ValueError(f'Pupil diameter SNR ({snr:.2f}) below threshold SNR ({snr_thresh}), removing data.') @@ -1496,7 +1533,7 @@ def _check_video_timestamps(self, view, video_timestamps, video_data): # This is because the first few frames are sometimes not recorded. We can remove the first few # timestamps in this case elif video_timestamps.shape[0] > video_data.shape[0]: - video_timestamps_fixed = video_timestamps[-video_data.shape[0]:] + video_timestamps_fixed = video_timestamps[-video_data.shape[0] :] return video_timestamps_fixed, video_data else: return video_timestamps, video_data @@ -1510,6 +1547,7 @@ class EphysSessionLoader(SessionLoader): To select for a specific probe >>> EphysSessionLoader(eid=eid, one=one, pid=pid) """ + def __init__(self, *args, pname=None, pid=None, **kwargs): """ Needs an active connection in order to get the list of insertions in the session @@ -1549,10 +1587,10 @@ class PhotometrySessionLoader(SessionLoader): def __init__(self, *args, photometry_collection: str = 'photometry', **kwargs): self.photometry_collection = photometry_collection self.revision = kwargs.get('revision', None) - + # determine if loading by eid or session path self.load_by_path = True if 'session_path' in kwargs else False - + super().__init__(*args, **kwargs) def load_session_data(self, **kwargs): @@ -1572,7 +1610,7 @@ def load_photometry( collection=self.photometry_collection, revision=self.revision, ) - else: # load by eid + else: # load by eid raw_dfs = fpio.from_eid( self.eid, self.one, @@ -1594,4 +1632,10 @@ def load_photometry( ) raw_dfs[band] = df.loc[ix] + # the above indexing can lead to unevenly shaped bands. + # Cut to shortest + n = np.min([df.shape[0] for _, df in raw_dfs.items()]) + for band in raw_dfs.keys(): + raw_dfs[band] = raw_dfs[band].iloc[:n] + self.photometry = raw_dfs From 5d2204059bc4c7640952f9f5ce394ffc033b3f27 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Thu, 9 Oct 2025 16:55:56 +0100 Subject: [PATCH 80/80] flake8 --- brainbox/io/one.py | 2 +- ibllib/tests/test_neurophotometrics.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/brainbox/io/one.py b/brainbox/io/one.py index 76d01bff3..ebe9c3f74 100644 --- a/brainbox/io/one.py +++ b/brainbox/io/one.py @@ -1533,7 +1533,7 @@ def _check_video_timestamps(self, view, video_timestamps, video_data): # This is because the first few frames are sometimes not recorded. We can remove the first few # timestamps in this case elif video_timestamps.shape[0] > video_data.shape[0]: - video_timestamps_fixed = video_timestamps[-video_data.shape[0] :] + video_timestamps_fixed = video_timestamps[-video_data.shape[0]:] return video_timestamps_fixed, video_data else: return video_timestamps, video_data diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py index fd57ba209..6f29ce509 100644 --- a/ibllib/tests/test_neurophotometrics.py +++ b/ibllib/tests/test_neurophotometrics.py @@ -2,10 +2,10 @@ import unittest import tempfile -from pathlib import Path -import iblphotometry_tests -from ibllib.pipes.neurophotometrics import FibrePhotometryBpodSync -from ibllib.io import session_params +# from pathlib import Path +# import iblphotometry_tests +# from ibllib.pipes.neurophotometrics import FibrePhotometryBpodSync +# from ibllib.io import session_params # Mock suit2p which is imported in MesoscopePreprocess # attrs = {'default_ops.return_value': {}}