From 06b04781012847d1af501dfbb2d53dfc921e0638 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 13 Jan 2025 18:58:46 +0000
Subject: [PATCH 01/80] updated requirements, updated pipelines

---
 ibllib/pipes/neurophotometrics.py | 87 +++----------------------------
 requirements.txt                  |  1 +
 2 files changed, 8 insertions(+), 80 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 18f558c59..b55d9d94a 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -8,63 +8,10 @@
 import ibllib.io.session_params
 from ibllib.pipes import base_tasks
 from iblutil.io import jsonable
-
+import iblphotometry.io as fpio
 _logger = logging.getLogger('ibllib')
 
 
-"""
-Neurophotometrics FP3002 specific information.
-The light source map refers to the available LEDs on the system.
-The flags refers to the byte encoding of led states in the system.
-"""
-LIGHT_SOURCE_MAP = {
-    'color': ['None', 'Violet', 'Blue', 'Green'],
-    'wavelength': [0, 415, 470, 560],
-    'name': ['None', 'Isosbestic', 'GCaMP', 'RCaMP'],
-}
-
-LED_STATES = {
-    'Condition': {
-        0: 'No additional signal',
-        1: 'Output 1 signal HIGH',
-        2: 'Output 0 signal HIGH',
-        3: 'Stimulation ON',
-        4: 'GPIO Line 2 HIGH',
-        5: 'GPIO Line 3 HIGH',
-        6: 'Input 1 HIGH',
-        7: 'Input 0 HIGH',
-        8: 'Output 0 signal HIGH + Stimulation',
-        9: 'Output 0 signal HIGH + Input 0 signal HIGH',
-        10: 'Input 0 signal HIGH + Stimulation',
-        11: 'Output 0 HIGH + Input 0 HIGH + Stimulation',
-    },
-    'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560},
-    'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561},
-    'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562},
-    'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564}
-}
-
-
-def _channel_meta(light_source_map=None):
-    """
-    Return table of light source wavelengths and corresponding colour labels.
-
-    Parameters
-    ----------
-    light_source_map : dict
-        An optional map of light source wavelengths (nm) used and their corresponding colour name.
-
-    Returns
-    -------
-    pandas.DataFrame
-        A sorted table of wavelength and colour name.
-    """
-    light_source_map = light_source_map or LIGHT_SOURCE_MAP
-    meta = pd.DataFrame.from_dict(light_source_map)
-    meta.index.rename('channel_id', inplace=True)
-    return meta
-
-
 class FibrePhotometrySync(base_tasks.DynamicTask):
     priority = 90
     job_size = 'small'
@@ -146,39 +93,19 @@ def _run(self, **kwargs):
                 fcn_nph_to_bpod_times, valid_bounds = self._sync_bpod_neurophotometrics()
             case _:
                 raise NotImplementedError('Syncing with daq is not supported yet.')
+            
         # 2) reformat the raw data with wavelengths and meta-data
-        folder_raw_photometry = self.session_path.joinpath(self.device_collection)
-        fp_data = pd.read_parquet(folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt'))
-        # Load channels and wavelength information
-        channel_meta_map = _channel_meta()
-        if (fn := folder_raw_photometry.joinpath('_neurophotometrics_fpData.channels.csv')).exists():
-            led_states = pd.read_csv(fn)
-        else:
-            led_states = pd.DataFrame(LED_STATES)
-        led_states = led_states.set_index('Condition')
-        # Extract signal columns into 2D array
-        rois = list(self.kwargs['fibers'].keys())
-        out_df = fp_data.filter(items=rois, axis=1).sort_index(axis=1)
-        out_df['times'] = fcn_nph_to_bpod_times(fp_data['SystemTimestamp'])
-        out_df['valid'] = np.logical_and(out_df['times'] >= valid_bounds[0], out_df['times'] <= valid_bounds[1])
-        out_df['wavelength'] = np.nan
-        out_df['name'] = ''
-        out_df['color'] = ''
-        # Extract channel index
-        states = fp_data.get('LedState', fp_data.get('Flags', None))
-        for state in states.unique():
-            ir, ic = np.where(led_states == state)
-            if ic.size == 0:
-                continue
-            for cn in ['name', 'color', 'wavelength']:
-                out_df.loc[states == state, cn] = channel_meta_map.iloc[ic[0]][cn]
+        folder_raw_photometry = self.session_path.joinpath(self.device_collection) 
+        out_df = fpio.from_raw_neurophotometrics_file(folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt'))
+
         # 3) label the brain regions
         rois = []
         c = 0
         for k, v in self.kwargs['fibers'].items():
             rois.append({'ROI': k, 'fiber': f'fiber{c:02d}', 'brain_region': v['location']})
         df_rois = pd.DataFrame(rois).set_index('ROI')
-        # to finish we write the dataframes to disk
+
+        # 4) to finish we write the dataframes to disk
         out_path = self.session_path.joinpath('alf', 'photometry')
         out_path.mkdir(parents=True, exist_ok=True)
         out_df.to_parquet(file_signal := out_path.joinpath('photometry.signal.pqt'))
diff --git a/requirements.txt b/requirements.txt
index 815c73b6a..614008684 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,3 +33,4 @@ psychofit
 slidingRP>=1.1.1  # steinmetz lab refractory period metrics
 pyqt5
 ibl-style
+ibl-photometry
\ No newline at end of file

From bff546991d957993a5cef2aa88fea8b9f0cebaad Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 17 Jan 2025 10:37:47 +0000
Subject: [PATCH 02/80] fibers are now named fiber_{brain_region} in the
 extraction process

---
 ibllib/pipes/neurophotometrics.py | 54 ++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index b55d9d94a..5e5628582 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -9,6 +9,7 @@
 from ibllib.pipes import base_tasks
 from iblutil.io import jsonable
 import iblphotometry.io as fpio
+
 _logger = logging.getLogger('ibllib')
 
 
@@ -18,8 +19,7 @@ class FibrePhotometrySync(base_tasks.DynamicTask):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.device_collection = self.get_device_collection(
-            'neurophotometrics', device_collection='raw_photometry_data')
+        self.device_collection = self.get_device_collection('neurophotometrics', device_collection='raw_photometry_data')
         # we will work with the first protocol here
         for task in self.session_params['tasks']:
             self.task_protocol = next(k for k in task)
@@ -29,12 +29,16 @@ def __init__(self, *args, **kwargs):
     @property
     def signature(self):
         signature = {
-            'input_files': [('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True),
-                            ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
-                            ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True),
-                            ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True)],
-            'output_files': [('photometry.signal.pqt', 'alf/photometry', True),
-                             ('photometryROI.locations.pqt', 'alf/photometry', True)]
+            'input_files': [
+                ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True),
+                ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
+                ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True),
+                ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True),
+            ],
+            'output_files': [
+                ('photometry.signal.pqt', 'alf/photometry', True),
+                ('photometryROI.locations.pqt', 'alf/photometry', True),
+            ],
         }
         return signature
 
@@ -57,21 +61,26 @@ def _sync_bpod_neurophotometrics(self):
         # we get the timestamps of the states from the bpod data
         tbpod = []
         for sname in sync_states_names:
-            tbpod.append(np.array(
-                [bd['States timestamps'][sname][0][0] + bd['Trial start timestamp'] for bd in bpod_data if
-                 sname in bd['States timestamps']]))
+            tbpod.append(
+                np.array(
+                    [
+                        bd['States timestamps'][sname][0][0] + bd['Trial start timestamp']
+                        for bd in bpod_data
+                        if sname in bd['States timestamps']
+                    ]
+                )
+            )
         tbpod = np.sort(np.concatenate(tbpod))
         tbpod = tbpod[~np.isnan(tbpod)]
         # we get the timestamps for the photometry data
-        tph = df_digital_inputs['SystemTimestamp'].values[df_digital_inputs['Channel'] == self.kwargs['sync_channel']]
+        sync_channel = self.session_params['neurophotometrics']['sync_channel']
+        tph = df_digital_inputs['SystemTimestamp'].values[df_digital_inputs['Channel'] == sync_channel]
         tph = tph[15:]  # TODO: we may want to detect the spacers before removing it, especially for successive sessions
         # sync the behaviour events to the photometry timestamps
-        fcn_nph_to_bpod_times, drift_ppm, iph, ibpod = ibldsp.utils.sync_timestamps(
-            tph, tbpod, return_indices=True, linear=True)
+        fcn_nph_to_bpod_times, drift_ppm, iph, ibpod = ibldsp.utils.sync_timestamps(tph, tbpod, return_indices=True, linear=True)
         # then we check the alignment, should be less than the screen refresh rate
         tcheck = fcn_nph_to_bpod_times(tph[iph]) - tbpod[ibpod]
-        _logger.info(
-            f'sync: n trials {len(bpod_data)}, n bpod sync {len(tbpod)}, n photometry {len(tph)}, n match {len(iph)}')
+        _logger.info(f'sync: n trials {len(bpod_data)}, n bpod sync {len(tbpod)}, n photometry {len(tph)}, n match {len(iph)}')
         assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
         assert len(iph) / len(tbpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched'
         valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
@@ -93,16 +102,17 @@ def _run(self, **kwargs):
                 fcn_nph_to_bpod_times, valid_bounds = self._sync_bpod_neurophotometrics()
             case _:
                 raise NotImplementedError('Syncing with daq is not supported yet.')
-            
+
         # 2) reformat the raw data with wavelengths and meta-data
-        folder_raw_photometry = self.session_path.joinpath(self.device_collection) 
-        out_df = fpio.from_raw_neurophotometrics_file(folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt'))
+        folder_raw_photometry = self.session_path.joinpath(self.device_collection)
+        out_df = fpio.from_raw_neurophotometrics_file_to_ibl_df(
+            folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt')
+        )
 
         # 3) label the brain regions
         rois = []
-        c = 0
-        for k, v in self.kwargs['fibers'].items():
-            rois.append({'ROI': k, 'fiber': f'fiber{c:02d}', 'brain_region': v['location']})
+        for k, v in self.session_params['neurophotometrics']['fibers'].items():
+            rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']})
         df_rois = pd.DataFrame(rois).set_index('ROI')
 
         # 4) to finish we write the dataframes to disk

From 57f7164ff4b83ced7cc715c6fbcc1cd1bbd0f090 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 24 Feb 2025 15:34:44 +0000
Subject: [PATCH 03/80] fixing shifted sync timestamps in the extraction

---
 ibllib/pipes/dynamic_pipeline.py  | 2 --
 ibllib/pipes/neurophotometrics.py | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index 8a3a584f7..814f0888a 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -582,8 +582,6 @@ def make_pipeline(session_path, **pkwargs):
             **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']])
 
     if 'neurophotometrics' in devices:
-        # {'collection': 'raw_photometry_data', 'datetime': '2024-09-18T16:43:55.207000',
-        #   'fibers': {'G0': {'location': 'NBM'}, 'G1': {'location': 'SI'}}, 'sync_channel': 1}
         photometry_kwargs = devices['neurophotometrics']
         tasks['FibrePhotometrySync'] = type('FibrePhotometrySync', (
             ptasks.FibrePhotometrySync,), {})(**kwargs, **photometry_kwargs)
diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 5e5628582..46aa28932 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -64,7 +64,7 @@ def _sync_bpod_neurophotometrics(self):
             tbpod.append(
                 np.array(
                     [
-                        bd['States timestamps'][sname][0][0] + bd['Trial start timestamp']
+                        bd['States timestamps'][sname][0][0] + bd['Trial start timestamp'] - bpod_data[0]['Bpod start timestamp']
                         for bd in bpod_data
                         if sname in bd['States timestamps']
                     ]
@@ -108,6 +108,7 @@ def _run(self, **kwargs):
         out_df = fpio.from_raw_neurophotometrics_file_to_ibl_df(
             folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt')
         )
+        out_df['times'] = fcn_nph_to_bpod_times(out_df['times'])
 
         # 3) label the brain regions
         rois = []

From b60629017b1efb470d090e70bfeede53ae24cda6 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 24 Feb 2025 16:28:04 +0000
Subject: [PATCH 04/80] reading digital inputs file via iblphotometry.io
 (validated)

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 46aa28932..34c918fde 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -48,7 +48,7 @@ def _sync_bpod_neurophotometrics(self):
         :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps
         """
         folder_raw_photometry = self.session_path.joinpath(self.device_collection)
-        df_digital_inputs = pd.read_parquet(folder_raw_photometry.joinpath('_neurophotometrics_fpData.digitalIntputs.pqt'))
+        df_digital_inputs = fpio.read_digital_inputs_csv(folder_raw_photometry / '_neurophotometrics_fpData.digitalIntputs.pqt')
         # normally we should disregard the states and use the sync label. But bpod doesn't log TTL outs,
         # only the states. This will change in the future but for now we are stuck with this.
         if 'habituation' in self.task_protocol:

From 8ae4039a2332c0c01ee99b1b4921707b11f9f014 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Tue, 4 Mar 2025 12:11:07 +0000
Subject: [PATCH 05/80] fix for reextraction (.pqt file read instead of .csv)
 for digital_inputs from neurophotometrics

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 34c918fde..22288f54d 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -48,7 +48,7 @@ def _sync_bpod_neurophotometrics(self):
         :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps
         """
         folder_raw_photometry = self.session_path.joinpath(self.device_collection)
-        df_digital_inputs = fpio.read_digital_inputs_csv(folder_raw_photometry / '_neurophotometrics_fpData.digitalIntputs.pqt')
+        df_digital_inputs = fpio.read_digital_inputs_file(folder_raw_photometry / '_neurophotometrics_fpData.digitalIntputs.pqt')
         # normally we should disregard the states and use the sync label. But bpod doesn't log TTL outs,
         # only the states. This will change in the future but for now we are stuck with this.
         if 'habituation' in self.task_protocol:

From 3fd76236ed8ffa8466dfb71e191459bc06f202ea Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Thu, 6 Mar 2025 15:17:39 +0000
Subject: [PATCH 06/80] bugfix in the extractor after fix in the experiment
 description file

---
 ibllib/pipes/neurophotometrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 22288f54d..13f967685 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -73,7 +73,7 @@ def _sync_bpod_neurophotometrics(self):
         tbpod = np.sort(np.concatenate(tbpod))
         tbpod = tbpod[~np.isnan(tbpod)]
         # we get the timestamps for the photometry data
-        sync_channel = self.session_params['neurophotometrics']['sync_channel']
+        sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel']
         tph = df_digital_inputs['SystemTimestamp'].values[df_digital_inputs['Channel'] == sync_channel]
         tph = tph[15:]  # TODO: we may want to detect the spacers before removing it, especially for successive sessions
         # sync the behaviour events to the photometry timestamps
@@ -112,7 +112,7 @@ def _run(self, **kwargs):
 
         # 3) label the brain regions
         rois = []
-        for k, v in self.session_params['neurophotometrics']['fibers'].items():
+        for k, v in self.session_params['devices']['neurophotometrics']['fibers'].items():
             rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']})
         df_rois = pd.DataFrame(rois).set_index('ROI')
 

From 07efbd0b00bf411746a30e6216749e443dd2f100 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 30 Apr 2025 12:52:03 +0100
Subject: [PATCH 07/80] WIP changes for daq sync

---
 brainbox/io/one.py                |   4 +-
 ibllib/pipes/dynamic_pipeline.py  | 176 ++++++++++-------
 ibllib/pipes/neurophotometrics.py | 311 ++++++++++++++++++++++--------
 3 files changed, 336 insertions(+), 155 deletions(-)

diff --git a/brainbox/io/one.py b/brainbox/io/one.py
index b65521549..9e4e3ed5b 100644
--- a/brainbox/io/one.py
+++ b/brainbox/io/one.py
@@ -1198,7 +1198,7 @@ def timesprobe2times(self, values, direction='forward'):
         elif direction == 'reverse':
             return self._sync['reverse'](values) / self._sync['fs']
 
-    def samples2times(self, values, direction='forward'):
+    def samples2times(self, values, direction='forward', band='ap'):
         """
         Converts ephys sample values to session main clock seconds
         :param values: numpy array of times in seconds or samples to resync
@@ -1206,6 +1206,8 @@ def samples2times(self, values, direction='forward'):
          (seconds main time to samples probe time)
         :return:
         """
+        if band == 'lf':
+            values *= 12
         self._get_probe_info()
         return self._sync[direction](values)
 
diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index 13a936d5b..9b3840526 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -20,6 +20,7 @@
 :class:`ibllib.io.extractors.base.BaseBpodTrialsExtractor` class, and located in either the
 personal projects repo or in :py:mod:`ibllib.io.extractors.bpod_trials` module.
 """
+
 import logging
 import re
 from fnmatch import fnmatch
@@ -71,7 +72,7 @@ def acquisition_description_legacy_session(session_path, save=False):
 
 
 def get_acquisition_description(protocol):
-    """"
+    """ "
     This is a set of example acquisition descriptions for experiments
     -   choice_world_recording
     -   choice_world_biased
@@ -80,7 +81,7 @@ def get_acquisition_description(protocol):
     -   choice_world_passive
     That are part of the IBL pipeline
     """
-    if 'ephys' in protocol:   # canonical ephys
+    if 'ephys' in protocol:  # canonical ephys
         devices = {
             'cameras': {
                 'right': {'collection': 'raw_video_data', 'sync_label': 'audio'},
@@ -89,38 +90,32 @@ def get_acquisition_description(protocol):
             },
             'neuropixel': {
                 'probe00': {'collection': 'raw_ephys_data/probe00', 'sync_label': 'imec_sync'},
-                'probe01': {'collection': 'raw_ephys_data/probe01', 'sync_label': 'imec_sync'}
-            },
-            'microphone': {
-                'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}
+                'probe01': {'collection': 'raw_ephys_data/probe01', 'sync_label': 'imec_sync'},
             },
+            'microphone': {'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}},
         }
         acquisition_description = {  # this is the current ephys pipeline description
             'devices': devices,
             'tasks': [
                 {'ephysChoiceWorld': {'collection': 'raw_behavior_data', 'sync_label': 'bpod'}},
-                {'passiveChoiceWorld': {'collection': 'raw_passive_data', 'sync_label': 'bpod'}}
+                {'passiveChoiceWorld': {'collection': 'raw_passive_data', 'sync_label': 'bpod'}},
             ],
-            'sync': {
-                'nidq': {'collection': 'raw_ephys_data', 'extension': 'bin', 'acquisition_software': 'spikeglx'}
-            },
+            'sync': {'nidq': {'collection': 'raw_ephys_data', 'extension': 'bin', 'acquisition_software': 'spikeglx'}},
             'procedures': ['Ephys recording with acute probe(s)'],
-            'projects': ['ibl_neuropixel_brainwide_01']
+            'projects': ['ibl_neuropixel_brainwide_01'],
         }
     else:
         devices = {
             'cameras': {
                 'left': {'collection': 'raw_video_data', 'sync_label': 'audio'},
             },
-            'microphone': {
-                'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}
-            },
+            'microphone': {'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}},
         }
         acquisition_description = {  # this is the current ephys pipeline description
             'devices': devices,
             'sync': {'bpod': {'collection': 'raw_behavior_data'}},
             'procedures': ['Behavior training/tasks'],
-            'projects': ['ibl_neuropixel_brainwide_01']
+            'projects': ['ibl_neuropixel_brainwide_01'],
         }
         if 'biased' in protocol:
             key = 'biasedChoiceWorld'
@@ -130,10 +125,7 @@ def get_acquisition_description(protocol):
             key = 'habituationChoiceWorld'
         else:
             raise ValueError(f'Unknown protocol "{protocol}"')
-        acquisition_description['tasks'] = [{key: {
-            'collection': 'raw_behavior_data',
-            'sync_label': 'bpod'
-        }}]
+        acquisition_description['tasks'] = [{key: {'collection': 'raw_behavior_data', 'sync_label': 'bpod'}}]
     acquisition_description['version'] = '1.0.0'
     return acquisition_description
 
@@ -224,7 +216,7 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non
     kwargs = {'session_path': session_path, 'one': one}
 
     # Syncing tasks
-    (sync, sync_args), = acquisition_description['sync'].items()
+    ((sync, sync_args),) = acquisition_description['sync'].items()
     sync_label = _sync_label(sync, **sync_args)  # get the format of the DAQ data. This informs the extractor task
     sync_args['sync_collection'] = sync_args.pop('collection')  # rename the key so it matches task run arguments
     sync_args['sync_ext'] = sync_args.pop('extension', None)
@@ -268,15 +260,16 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non
                 else:
                     # lookup in the project extraction repo if we find an extractor class
                     import projects.extraction_tasks
+
                     if hasattr(projects.extraction_tasks, extractor):
                         task = getattr(projects.extraction_tasks, extractor)
                     elif hasattr(projects.extraction_tasks, extractor + sync_label.capitalize()):
                         task = getattr(btasks, extractor + sync_label.capitalize())
                     else:
                         raise NotImplementedError(
-                            f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects')
-                _logger.debug('%s (protocol #%i, task #%i) = %s.%s',
-                              protocol, i, j, task.__module__, task.__name__)
+                            f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects'
+                        )
+                _logger.debug('%s (protocol #%i, task #%i) = %s.%s', protocol, i, j, task.__module__, task.__name__)
                 # Rename the class to something more informative
                 task_name = f'{task.__name__}_{i:02}'
                 if not (task.__name__.startswith('TrainingStatus') or task.__name__.endswith('RegisterRaw')):
@@ -314,13 +307,16 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non
                     raise NotImplementedError(f'No trials task available for sync namespace "{sync_label}"')
                 compute_status = True
             tasks[f'RegisterRaw_{protocol}_{i:02}'] = type(f'RegisterRaw_{protocol}_{i:02}', (registration_class,), {})(
-                **kwargs, **task_kwargs)
+                **kwargs, **task_kwargs
+            )
             parents = [tasks[f'RegisterRaw_{protocol}_{i:02}']] + sync_tasks
             tasks[f'Trials_{protocol}_{i:02}'] = type(f'Trials_{protocol}_{i:02}', (behaviour_class,), {})(
-                **kwargs, **sync_kwargs, **task_kwargs, parents=parents)
+                **kwargs, **sync_kwargs, **task_kwargs, parents=parents
+            )
             if compute_status:
-                tasks[f'TrainingStatus_{protocol}_{i:02}'] = type(f'TrainingStatus_{protocol}_{i:02}', (
-                    btasks.TrainingStatus,), {})(**kwargs, **task_kwargs, parents=[tasks[f'Trials_{protocol}_{i:02}']])
+                tasks[f'TrainingStatus_{protocol}_{i:02}'] = type(
+                    f'TrainingStatus_{protocol}_{i:02}', (btasks.TrainingStatus,), {}
+                )(**kwargs, **task_kwargs, parents=[tasks[f'Trials_{protocol}_{i:02}']])
     return tasks
 
 
@@ -411,11 +407,12 @@ def make_pipeline(session_path, **pkwargs):
     kwargs = {'session_path': session_path, 'one': pkwargs.get('one')}
 
     # Registers the experiment description file
-    tasks['ExperimentDescriptionRegisterRaw'] = type('ExperimentDescriptionRegisterRaw',
-                                                     (bstasks.ExperimentDescriptionRegisterRaw,), {})(**kwargs)
+    tasks['ExperimentDescriptionRegisterRaw'] = type(
+        'ExperimentDescriptionRegisterRaw', (bstasks.ExperimentDescriptionRegisterRaw,), {}
+    )(**kwargs)
 
     # Syncing tasks
-    (sync, sync_args), = acquisition_description['sync'].items()
+    ((sync, sync_args),) = acquisition_description['sync'].items()
     sync_args = sync_args.copy()  # ensure acquisition_description unchanged
     sync_label = _sync_label(sync, **sync_args)  # get the format of the DAQ data. This informs the extractor task
     sync_args['sync_collection'] = sync_args.pop('collection')  # rename the key so it matches task run arguments
@@ -426,14 +423,16 @@ def make_pipeline(session_path, **pkwargs):
     if sync_label == 'nidq' and sync_args['sync_collection'] == 'raw_ephys_data':
         tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (etasks.EphysSyncRegisterRaw,), {})(**kwargs, **sync_kwargs)
         tasks[f'SyncPulses_{sync}'] = type(f'SyncPulses_{sync}', (etasks.EphysSyncPulses,), {})(
-            **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']])
+            **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']]
+        )
         sync_tasks = [tasks[f'SyncPulses_{sync}']]
     elif sync_label == 'timeline':
         tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncRegisterRaw,), {})(**kwargs, **sync_kwargs)
     elif sync_label == 'nidq':
         tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncMtscomp,), {})(**kwargs, **sync_kwargs)
         tasks[f'SyncPulses_{sync}'] = type(f'SyncPulses_{sync}', (stasks.SyncPulses,), {})(
-            **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']])
+            **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']]
+        )
         sync_tasks = [tasks[f'SyncPulses_{sync}']]
     elif sync_label == 'tdms':
         tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncRegisterRaw,), {})(**kwargs, **sync_kwargs)
@@ -441,9 +440,7 @@ def make_pipeline(session_path, **pkwargs):
         pass  # ATM we don't have anything for this; it may not be needed in the future
 
     # Behavior tasks
-    tasks.update(
-        _get_trials_tasks(session_path, acquisition_description, sync_tasks=sync_tasks, one=pkwargs.get('one'))
-    )
+    tasks.update(_get_trials_tasks(session_path, acquisition_description, sync_tasks=sync_tasks, one=pkwargs.get('one')))
 
     # Ephys tasks
     if 'neuropixel' in devices:
@@ -463,38 +460,46 @@ def make_pipeline(session_path, **pkwargs):
 
             if (nptype == 'NP2.1') or (nptype == 'NP2.4' and nshanks == 1):
                 tasks[f'EphyCompressNP21_{pname}'] = type(f'EphyCompressNP21_{pname}', (etasks.EphysCompressNP21,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname)
+                    **kwargs, **ephys_kwargs, pname=pname
+                )
                 all_probes.append(pname)
                 register_tasks.append(tasks[f'EphyCompressNP21_{pname}'])
             elif nptype == 'NP2.4' and nshanks > 1:
                 tasks[f'EphyCompressNP24_{pname}'] = type(f'EphyCompressNP24_{pname}', (etasks.EphysCompressNP24,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname, nshanks=nshanks)
+                    **kwargs, **ephys_kwargs, pname=pname, nshanks=nshanks
+                )
                 register_tasks.append(tasks[f'EphyCompressNP24_{pname}'])
                 all_probes += [f'{pname}{chr(97 + int(shank))}' for shank in range(nshanks)]
             else:
                 tasks[f'EphysCompressNP1_{pname}'] = type(f'EphyCompressNP1_{pname}', (etasks.EphysCompressNP1,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname)
+                    **kwargs, **ephys_kwargs, pname=pname
+                )
                 register_tasks.append(tasks[f'EphysCompressNP1_{pname}'])
                 all_probes.append(pname)
 
         if nptype == '3A':
             tasks['EphysPulses'] = type('EphysPulses', (etasks.EphysPulses,), {})(
-                **kwargs, **ephys_kwargs, **sync_kwargs, pname=all_probes, parents=register_tasks + sync_tasks)
+                **kwargs, **ephys_kwargs, **sync_kwargs, pname=all_probes, parents=register_tasks + sync_tasks
+            )
 
         for pname in all_probes:
             register_task = [reg_task for reg_task in register_tasks if pname[:7] in reg_task.name]
 
             if nptype != '3A':
                 tasks[f'EphysPulses_{pname}'] = type(f'EphysPulses_{pname}', (etasks.EphysPulses,), {})(
-                    **kwargs, **ephys_kwargs, **sync_kwargs, pname=[pname], parents=register_task + sync_tasks)
+                    **kwargs, **ephys_kwargs, **sync_kwargs, pname=[pname], parents=register_task + sync_tasks
+                )
                 tasks[f'Spikesorting_{pname}'] = type(f'Spikesorting_{pname}', (etasks.SpikeSorting,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'EphysPulses_{pname}']])
+                    **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'EphysPulses_{pname}']]
+                )
             else:
                 tasks[f'Spikesorting_{pname}'] = type(f'Spikesorting_{pname}', (etasks.SpikeSorting,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname, parents=[tasks['EphysPulses']])
+                    **kwargs, **ephys_kwargs, pname=pname, parents=[tasks['EphysPulses']]
+                )
 
             tasks[f'RawEphysQC_{pname}'] = type(f'RawEphysQC_{pname}', (etasks.RawEphysQC,), {})(
-                **kwargs, **ephys_kwargs, pname=pname, parents=register_task)
+                **kwargs, **ephys_kwargs, pname=pname, parents=register_task
+            )
 
     # Video tasks
     if 'cameras' in devices:
@@ -508,35 +513,33 @@ def make_pipeline(session_path, **pkwargs):
             tasks[tn] = type((tn := 'VideoConvert'), (vtasks.VideoConvert,), {})(**kwargs, **video_kwargs)
             dlc_parent_task = tasks['VideoConvert']
             tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcCamlog,), {})(
-                **kwargs, **video_kwargs, **sync_kwargs)
+                **kwargs, **video_kwargs, **sync_kwargs
+            )
         else:
-            tasks[tn] = type((tn := 'VideoRegisterRaw'), (vtasks.VideoRegisterRaw,), {})(
-                **kwargs, **video_kwargs)
-            tasks[tn] = type((tn := 'VideoCompress'), (vtasks.VideoCompress,), {})(
-                **kwargs, **video_kwargs, **sync_kwargs)
+            tasks[tn] = type((tn := 'VideoRegisterRaw'), (vtasks.VideoRegisterRaw,), {})(**kwargs, **video_kwargs)
+            tasks[tn] = type((tn := 'VideoCompress'), (vtasks.VideoCompress,), {})(**kwargs, **video_kwargs, **sync_kwargs)
             dlc_parent_task = tasks['VideoCompress']
             if sync == 'bpod':
                 tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcBpod,), {})(
-                    **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']])
+                    **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']]
+                )
             elif sync == 'nidq':
                 # Here we restrict to videos that we support (left, right or body)
                 video_kwargs['cameras'] = subset_cams
                 tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcNidq,), {})(
-                    **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + sync_tasks)
+                    **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + sync_tasks
+                )
 
         if sync_kwargs['sync'] != 'bpod':
             # Here we restrict to videos that we support (left, right or body)
             # Currently there is no plan to run DLC on the belly cam
             subset_cams = [c for c in cams if c in ('left', 'right', 'body')]
             video_kwargs['cameras'] = subset_cams
-            tasks[tn] = type((tn := 'DLC'), (vtasks.DLC,), {})(
-                **kwargs, **video_kwargs, parents=[dlc_parent_task])
+            tasks[tn] = type((tn := 'DLC'), (vtasks.DLC,), {})(**kwargs, **video_kwargs, parents=[dlc_parent_task])
 
             # The PostDLC plots require a trials object for QC
             # Find the first task that outputs a trials.table dataset
-            trials_task = (
-                t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', []))
-            )
+            trials_task = (t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', [])))
             if trials_task := next(trials_task, None):
                 parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}'], trials_task]
                 trials_collection = getattr(trials_task, 'output_collection', 'alf')
@@ -544,53 +547,78 @@ def make_pipeline(session_path, **pkwargs):
                 parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}']]
                 trials_collection = 'alf'
             tasks[tn] = type((tn := 'PostDLC'), (vtasks.EphysPostDLC,), {})(
-                **kwargs, cameras=subset_cams, trials_collection=trials_collection, parents=parents)
+                **kwargs, cameras=subset_cams, trials_collection=trials_collection, parents=parents
+            )
 
     # Audio tasks
     if 'microphone' in devices:
-        (microphone, micro_kwargs), = devices['microphone'].items()
+        ((microphone, micro_kwargs),) = devices['microphone'].items()
         micro_kwargs['device_collection'] = micro_kwargs.pop('collection')
         if sync_kwargs['sync'] == 'bpod':
             tasks['AudioRegisterRaw'] = type('AudioRegisterRaw', (atasks.AudioSync,), {})(
-                **kwargs, **sync_kwargs, **micro_kwargs, collection=micro_kwargs['device_collection'])
+                **kwargs, **sync_kwargs, **micro_kwargs, collection=micro_kwargs['device_collection']
+            )
         elif sync_kwargs['sync'] == 'nidq':
             tasks['AudioRegisterRaw'] = type('AudioRegisterRaw', (atasks.AudioCompress,), {})(**kwargs, **micro_kwargs)
 
     # Widefield tasks
     if 'widefield' in devices:
-        (_, wfield_kwargs), = devices['widefield'].items()
+        ((_, wfield_kwargs),) = devices['widefield'].items()
         wfield_kwargs['device_collection'] = wfield_kwargs.pop('collection')
         tasks['WideFieldRegisterRaw'] = type('WidefieldRegisterRaw', (wtasks.WidefieldRegisterRaw,), {})(
-            **kwargs, **wfield_kwargs)
+            **kwargs, **wfield_kwargs
+        )
         tasks['WidefieldCompress'] = type('WidefieldCompress', (wtasks.WidefieldCompress,), {})(
-            **kwargs, **wfield_kwargs, parents=[tasks['WideFieldRegisterRaw']])
+            **kwargs, **wfield_kwargs, parents=[tasks['WideFieldRegisterRaw']]
+        )
         tasks['WidefieldPreprocess'] = type('WidefieldPreprocess', (wtasks.WidefieldPreprocess,), {})(
-            **kwargs, **wfield_kwargs, parents=[tasks['WidefieldCompress']])
+            **kwargs, **wfield_kwargs, parents=[tasks['WidefieldCompress']]
+        )
         tasks['WidefieldSync'] = type('WidefieldSync', (wtasks.WidefieldSync,), {})(
-            **kwargs, **wfield_kwargs, **sync_kwargs,
-            parents=[tasks['WideFieldRegisterRaw'], tasks['WidefieldCompress']] + sync_tasks)
+            **kwargs,
+            **wfield_kwargs,
+            **sync_kwargs,
+            parents=[tasks['WideFieldRegisterRaw'], tasks['WidefieldCompress']] + sync_tasks,
+        )
         tasks['WidefieldFOV'] = type('WidefieldFOV', (wtasks.WidefieldFOV,), {})(
-            **kwargs, **wfield_kwargs, parents=[tasks['WidefieldPreprocess']])
+            **kwargs, **wfield_kwargs, parents=[tasks['WidefieldPreprocess']]
+        )
 
     # Mesoscope tasks
     if 'mesoscope' in devices:
-        (_, mscope_kwargs), = devices['mesoscope'].items()
+        ((_, mscope_kwargs),) = devices['mesoscope'].items()
         mscope_kwargs['device_collection'] = mscope_kwargs.pop('collection')
         tasks['MesoscopeRegisterSnapshots'] = type('MesoscopeRegisterSnapshots', (mscope_tasks.MesoscopeRegisterSnapshots,), {})(
-            **kwargs, **mscope_kwargs)
+            **kwargs, **mscope_kwargs
+        )
         tasks['MesoscopePreprocess'] = type('MesoscopePreprocess', (mscope_tasks.MesoscopePreprocess,), {})(
-            **kwargs, **mscope_kwargs)
+            **kwargs, **mscope_kwargs
+        )
         tasks['MesoscopeFOV'] = type('MesoscopeFOV', (mscope_tasks.MesoscopeFOV,), {})(
-            **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']])
+            **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]
+        )
         tasks['MesoscopeSync'] = type('MesoscopeSync', (mscope_tasks.MesoscopeSync,), {})(
-            **kwargs, **mscope_kwargs, **sync_kwargs)
+            **kwargs, **mscope_kwargs, **sync_kwargs
+        )
         tasks['MesoscopeCompress'] = type('MesoscopeCompress', (mscope_tasks.MesoscopeCompress,), {})(
-            **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']])
+            **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]
+        )
 
     if 'neurophotometrics' in devices:
-        photometry_kwargs = devices['neurophotometrics']
-        tasks['FibrePhotometrySync'] = type('FibrePhotometrySync', (
-            ptasks.FibrePhotometrySync,), {})(**kwargs, **photometry_kwargs)
+        # note devices['neurophotometrics'] is the acquisition_description
+        sync_mode = devices['neurophotometrics'].get('sync_mode', 'bpod')  # default to bpod for downward compatibility
+        match sync_mode:
+            case 'bpod':
+                tasks['FibrePhotometryBpodSync'] = type('FibrePhotometryBpodSync', (ptasks.FibrePhotometryBpodSync,), {})(
+                    **devices['neurophotometrics'],
+                    **kwargs,
+                )
+            case 'daqami':
+                tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})(
+                    **devices['neurophotometrics'],
+                    **acquisition_description['sync'],
+                    **kwargs,
+                )
 
     p = mtasks.Pipeline(session_path=session_path, **pkwargs)
     p.tasks = tasks
diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 13f967685..e40ba5ae8 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -10,115 +10,266 @@
 from iblutil.io import jsonable
 import iblphotometry.io as fpio
 
+from abc import abstractmethod
+import iblphotometry
+
 _logger = logging.getLogger('ibllib')
 
+"""
+Neurophotometrics FP3002 specific information.
+The light source map refers to the available LEDs on the system.
+The flags refers to the byte encoding of led states in the system.
+"""
+
+LIGHT_SOURCE_MAP = {
+    'color': ['None', 'Violet', 'Blue', 'Green'],
+    'wavelength': [0, 415, 470, 560],
+    'name': ['None', 'Isosbestic', 'GCaMP', 'RCaMP'],
+}
+
+LED_STATES = {
+    'Condition': {
+        0: 'No additional signal',
+        1: 'Output 1 signal HIGH',
+        2: 'Output 0 signal HIGH',
+        3: 'Stimulation ON',
+        4: 'GPIO Line 2 HIGH',
+        5: 'GPIO Line 3 HIGH',
+        6: 'Input 1 HIGH',
+        7: 'Input 0 HIGH',
+        8: 'Output 0 signal HIGH + Stimulation',
+        9: 'Output 0 signal HIGH + Input 0 signal HIGH',
+        10: 'Input 0 signal HIGH + Stimulation',
+        11: 'Output 0 HIGH + Input 0 HIGH + Stimulation',
+    },
+    'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560},
+    'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561},
+    'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562},
+    'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564},
+}
+
+
+# def _channel_meta(light_source_map=None):
+#     """
+#     Return table of light source wavelengths and corresponding colour labels.
+
+#     Parameters
+#     ----------
+#     light_source_map : dict
+#         An optional map of light source wavelengths (nm) used and their corresponding colour name.
+
+#     Returns
+#     -------
+#     pandas.DataFrame
+#         A sorted table of wavelength and colour name.
+#     """
+#     light_source_map = light_source_map or LIGHT_SOURCE_MAP
+#     meta = pd.DataFrame.from_dict(light_source_map)
+#     meta.index.rename('channel_id', inplace=True)
+#     return meta
 
-class FibrePhotometrySync(base_tasks.DynamicTask):
+
+class FibrePhotometryBaseSync(base_tasks.DynamicTask):
     priority = 90
     job_size = 'small'
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.device_collection = self.get_device_collection('neurophotometrics', device_collection='raw_photometry_data')
+        self.kwargs = kwargs
+
         # we will work with the first protocol here
         for task in self.session_params['tasks']:
             self.task_protocol = next(k for k in task)
             self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol)
             break
 
-    @property
-    def signature(self):
-        signature = {
-            'input_files': [
-                ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True),
-                ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
-                ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True),
-                ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True),
-            ],
-            'output_files': [
-                ('photometry.signal.pqt', 'alf/photometry', True),
-                ('photometryROI.locations.pqt', 'alf/photometry', True),
-            ],
-        }
-        return signature
-
-    def _sync_bpod_neurophotometrics(self):
-        """
-        Perform the linear clock correction between bpod and neurophotometrics timestamps.
-        :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps
-        """
-        folder_raw_photometry = self.session_path.joinpath(self.device_collection)
-        df_digital_inputs = fpio.read_digital_inputs_file(folder_raw_photometry / '_neurophotometrics_fpData.digitalIntputs.pqt')
-        # normally we should disregard the states and use the sync label. But bpod doesn't log TTL outs,
-        # only the states. This will change in the future but for now we are stuck with this.
+    def _get_bpod_timestamps(self):
         if 'habituation' in self.task_protocol:
             sync_states_names = ['iti', 'reward']
         else:
             sync_states_names = ['trial_start', 'reward', 'exit_state']
+
         # read in the raw behaviour data for syncing
         file_jsonable = self.session_path.joinpath(self.task_collection, '_iblrig_taskData.raw.jsonable')
-        trials_table, bpod_data = jsonable.load_task_jsonable(file_jsonable)
+        _, bpod_data = jsonable.load_task_jsonable(file_jsonable)
+
         # we get the timestamps of the states from the bpod data
-        tbpod = []
-        for sname in sync_states_names:
-            tbpod.append(
+        timestamps_bpod = []
+        for sync_name in sync_states_names:
+            timestamps_bpod.append(
                 np.array(
                     [
-                        bd['States timestamps'][sname][0][0] + bd['Trial start timestamp'] - bpod_data[0]['Bpod start timestamp']
-                        for bd in bpod_data
-                        if sname in bd['States timestamps']
+                        data['States timestamps'][sync_name][0][0] + data['Trial start timestamp']
+                        for data in bpod_data
+                        if sync_name in data['States timestamps']
                     ]
                 )
             )
-        tbpod = np.sort(np.concatenate(tbpod))
-        tbpod = tbpod[~np.isnan(tbpod)]
-        # we get the timestamps for the photometry data
-        sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel']
-        tph = df_digital_inputs['SystemTimestamp'].values[df_digital_inputs['Channel'] == sync_channel]
-        tph = tph[15:]  # TODO: we may want to detect the spacers before removing it, especially for successive sessions
+        timestamps_bpod = np.sort(np.concatenate(timestamps_bpod))
+        timestamps_bpod = timestamps_bpod[~np.isnan(timestamps_bpod)]
+        return timestamps_bpod, bpod_data
+
+    @abstractmethod
+    def _get_neurophotometrics_timestamps(self): ...
+
+    def _get_sync_function(self):
+        """
+        Perform the linear clock correction between bpod and neurophotometrics timestamps.
+        :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps
+        """
+
+        # get the timestamps
+        timestamps_bpod, bpod_data = self._get_bpod_timestamps(self.task_protocol)
+        timestamps_nph = self._get_neurophotometrics_timestamps()
+
         # sync the behaviour events to the photometry timestamps
-        fcn_nph_to_bpod_times, drift_ppm, iph, ibpod = ibldsp.utils.sync_timestamps(tph, tbpod, return_indices=True, linear=True)
-        # then we check the alignment, should be less than the screen refresh rate
-        tcheck = fcn_nph_to_bpod_times(tph[iph]) - tbpod[ibpod]
-        _logger.info(f'sync: n trials {len(bpod_data)}, n bpod sync {len(tbpod)}, n photometry {len(tph)}, n match {len(iph)}')
+        sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
+            timestamps_nph, timestamps_bpod, return_indices=True, linear=True
+        )
+        # TODO log drift
+
+        # then we check the alignment, should be less than the camera sampling rate
+        tcheck = sync_nph_to_bpod_fcn(timestamps_nph[ix_nph]) - timestamps_bpod[ix_bpod]
+        _logger.info(
+            f'sync: n trials {len(bpod_data)}, n bpod sync {len(timestamps_bpod)}, n photometry {len(timestamps_nph)}, n match {len(ix_nph)}'
+        )
+        # FIXME the framerate here is hardcoded, infer it instead!
         assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
-        assert len(iph) / len(tbpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched'
+        assert len(ix_nph) / len(timestamps_bpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched'
         valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
-        return fcn_nph_to_bpod_times, valid_bounds
 
-    def _run(self, **kwargs):
-        """
-        Extract photometry data from the raw neurophotometrics data in parquet
-        The extraction has 3 main steps:
-        1. Synchronise the bpod and neurophotometrics timestamps.
-        2. Extract the photometry data from the raw neurophotometrics data.
-        3. Label the fibers correspondance with brain regions in a small table
-        :param kwargs:
-        :return:
-        """
-        # 1) sync: we check the synchronisation, right now we only have bpod but soon the daq will be used
-        match list(self.session_params['sync'].keys())[0]:
-            case 'bpod':
-                fcn_nph_to_bpod_times, valid_bounds = self._sync_bpod_neurophotometrics()
-            case _:
-                raise NotImplementedError('Syncing with daq is not supported yet.')
-
-        # 2) reformat the raw data with wavelengths and meta-data
-        folder_raw_photometry = self.session_path.joinpath(self.device_collection)
-        out_df = fpio.from_raw_neurophotometrics_file_to_ibl_df(
-            folder_raw_photometry.joinpath('_neurophotometrics_fpData.raw.pqt')
+        return sync_nph_to_bpod_fcn, valid_bounds
+
+    def load_data(self):
+        raw_photometry_folder = self.session_path / self.device_collection
+        raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt')
+        ibl_df = iblphotometry.io.from_raw_neurophotometrics_df_to_ibl_df(
+            raw_neurophotometrics_df,
+            rois=self.kwargs['fibers'],
         )
-        out_df['times'] = fcn_nph_to_bpod_times(out_df['times'])
-
-        # 3) label the brain regions
-        rois = []
-        for k, v in self.session_params['devices']['neurophotometrics']['fibers'].items():
-            rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']})
-        df_rois = pd.DataFrame(rois).set_index('ROI')
-
-        # 4) to finish we write the dataframes to disk
-        out_path = self.session_path.joinpath('alf', 'photometry')
-        out_path.mkdir(parents=True, exist_ok=True)
-        out_df.to_parquet(file_signal := out_path.joinpath('photometry.signal.pqt'))
-        df_rois.to_parquet(file_locations := out_path.joinpath('photometryROI.locations.pqt'))
-        return file_signal, file_locations
+        return ibl_df
+
+    def _run(self, **kwargs):
+        """ """
+        # 1) load photometry data
+        # note: when loading daq based syncing, the SystemTimestamp column
+        ibl_df = self.load_data()
+
+        # 2) get the synchronization function
+        sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function()
+        ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1])
+
+        # 3) apply synchronization
+        # for bpod based syncing, we can directly transform the timestamps that are
+        # stored with the samples
+        ibl_df['times'] = sync_nph_to_bpod_fcn(ibl_df['SystemTimestamp'])
+
+        # 4) write to disk
+        output_folder = self.session_path.joinpath('alf', 'photometry')
+        output_folder.mkdir(parents=True, exist_ok=True)
+
+        # writing the synced photometry signal
+        ibl_df_outpath = output_folder / 'photometry.signal.pqt'
+        ibl_df.to_parquet(ibl_df_outpath)
+
+        # writing the locations
+        rois = list(self.kwargs['fibers'].keys())
+        locations_df = pd.DataFrame(rois).set_index('ROI')
+        locations_df_outpath = output_folder / 'photometryROI.locations.pqt'
+        locations_df.to_parquet(locations_df_outpath)
+        return ibl_df, locations_df
+
+
+class FibrePhotometryBpodSync(FibrePhotometryBaseSync):
+    priority = 90
+    job_size = 'small'
+
+    @property
+    def signature(self):
+        signature = {
+            'input_files': [
+                ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True),
+                ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
+                ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True),
+                ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True),
+            ],
+            'output_files': [
+                ('photometry.signal.pqt', 'alf/photometry', True),
+                ('photometryROI.locations.pqt', 'alf/photometry', True),
+            ],
+        }
+        return signature
+
+    def _get_neurophotometrics_timestamps(self):
+        # we get the timestamps for the photometry data by loading from the digital inputs file
+        raw_photometry_folder = self.session_path / self.device_collection
+        digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt')
+        timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']]
+        timestamps_nph = timestamps_nph[
+            15:
+        ]  # TODO: we may want to detect the spacers before removing it, especially for successive sessions
+        return timestamps_nph
+
+
+class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
+    """
+    DAQ syncing outline
+
+    bpod stores it's own timestamps - "timestamps_bpod"
+    DAQ receives TTL sync from each bpod - "daq_bpod_sync"
+    DAQ receives Frame clock from FP3002 - "daq_nph_frameclock"
+    NPH stores system timestamps at each sample time - "nph_frameclock"
+
+
+
+    2 step sync
+     - NPH time to DAQ time (on the basis of frame clock) m1, b1 = linreg(nph_frameclock, daq_frameclock)
+     - DAQ time to BPOD time m2, b2 = linreg(daq_bpod_sync, bpod_sync)
+
+    transfrom from NPH to BPOD
+    m1 * nph_frameclock + b1
+    """
+
+    priority = 90
+    job_size = 'small'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.sync_kwargs = kwargs['sync']['daqami']
+        # grab the sync relevant things here
+
+    @property
+    def signature(self):
+        signature = {
+            'input_files': [
+                ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True),
+                ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
+                ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True),
+                # TODO input here - the sync data fils in the self.sync_collection
+            ],
+            'output_files': [
+                ('photometry.signal.pqt', 'alf/photometry', True),
+                ('photometryROI.locations.pqt', 'alf/photometry', True),
+            ],
+        }
+        return signature
+
+    def load_data(self):
+        ibl_df = super().load_data()
+        # load here the daqami timestamps
+        # and put them in the ibl_df
+        return ibl_df
+
+    def _get_neurophotometrics_timestamps(self):
+        # get the sync data
+        # FIXME replace me with the actual filename
+        bin_filepath = self.session_path / self.sync_kwargs['collection'] / 'the_sync_file.bin'
+
+        # read bin file
+        # and extract from it
+        # daq_nph_frameclock
+        # daq_bpod_sync
+
+        timestamps_nph = None
+        return timestamps_nph

From 217bd5f090f8ca979b227ab2da837df225d3f498 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 12 May 2025 11:17:19 +0100
Subject: [PATCH 08/80] leftovers

---
 ibllib/pipes/neurophotometrics.py | 59 ++-----------------------------
 1 file changed, 2 insertions(+), 57 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index e40ba5ae8..7bdbc2165 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -1,5 +1,3 @@
-"""Extraction tasks for fibrephotometry"""
-
 import logging
 import numpy as np
 import pandas as pd
@@ -15,61 +13,10 @@
 
 _logger = logging.getLogger('ibllib')
 
-"""
-Neurophotometrics FP3002 specific information.
-The light source map refers to the available LEDs on the system.
-The flags refers to the byte encoding of led states in the system.
-"""
-
-LIGHT_SOURCE_MAP = {
-    'color': ['None', 'Violet', 'Blue', 'Green'],
-    'wavelength': [0, 415, 470, 560],
-    'name': ['None', 'Isosbestic', 'GCaMP', 'RCaMP'],
-}
-
-LED_STATES = {
-    'Condition': {
-        0: 'No additional signal',
-        1: 'Output 1 signal HIGH',
-        2: 'Output 0 signal HIGH',
-        3: 'Stimulation ON',
-        4: 'GPIO Line 2 HIGH',
-        5: 'GPIO Line 3 HIGH',
-        6: 'Input 1 HIGH',
-        7: 'Input 0 HIGH',
-        8: 'Output 0 signal HIGH + Stimulation',
-        9: 'Output 0 signal HIGH + Input 0 signal HIGH',
-        10: 'Input 0 signal HIGH + Stimulation',
-        11: 'Output 0 HIGH + Input 0 HIGH + Stimulation',
-    },
-    'No LED ON': {0: 0, 1: 8, 2: 16, 3: 32, 4: 64, 5: 128, 6: 256, 7: 512, 8: 48, 9: 528, 10: 544, 11: 560},
-    'L415': {0: 1, 1: 9, 2: 17, 3: 33, 4: 65, 5: 129, 6: 257, 7: 513, 8: 49, 9: 529, 10: 545, 11: 561},
-    'L470': {0: 2, 1: 10, 2: 18, 3: 34, 4: 66, 5: 130, 6: 258, 7: 514, 8: 50, 9: 530, 10: 546, 11: 562},
-    'L560': {0: 4, 1: 12, 2: 20, 3: 36, 4: 68, 5: 132, 6: 260, 7: 516, 8: 52, 9: 532, 10: 548, 11: 564},
-}
-
-
-# def _channel_meta(light_source_map=None):
-#     """
-#     Return table of light source wavelengths and corresponding colour labels.
-
-#     Parameters
-#     ----------
-#     light_source_map : dict
-#         An optional map of light source wavelengths (nm) used and their corresponding colour name.
-
-#     Returns
-#     -------
-#     pandas.DataFrame
-#         A sorted table of wavelength and colour name.
-#     """
-#     light_source_map = light_source_map or LIGHT_SOURCE_MAP
-#     meta = pd.DataFrame.from_dict(light_source_map)
-#     meta.index.rename('channel_id', inplace=True)
-#     return meta
-
 
 class FibrePhotometryBaseSync(base_tasks.DynamicTask):
+    # base clas for syncing fibre photometry
+    # derived classes are: FibrePhotometryBpodSync and FibrePhotometryDAQSync
     priority = 90
     job_size = 'small'
 
@@ -221,8 +168,6 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
     DAQ receives Frame clock from FP3002 - "daq_nph_frameclock"
     NPH stores system timestamps at each sample time - "nph_frameclock"
 
-
-
     2 step sync
      - NPH time to DAQ time (on the basis of frame clock) m1, b1 = linreg(nph_frameclock, daq_frameclock)
      - DAQ time to BPOD time m2, b2 = linreg(daq_bpod_sync, bpod_sync)

From a933cb7ff8f4aee953811b81f5147a95bfae539b Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Tue, 13 May 2025 17:18:39 +0100
Subject: [PATCH 09/80] 4 rig updates - ready for testing

---
 ibllib/pipes/dynamic_pipeline.py  |  6 ++-
 ibllib/pipes/neurophotometrics.py | 78 ++++++++++++++++++-------------
 2 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index 9b3840526..e3ec02253 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -605,15 +605,19 @@ def make_pipeline(session_path, **pkwargs):
         )
 
     if 'neurophotometrics' in devices:
-        # note devices['neurophotometrics'] is the acquisition_description
+        # note: devices['neurophotometrics'] is the acquisition_description
         sync_mode = devices['neurophotometrics'].get('sync_mode', 'bpod')  # default to bpod for downward compatibility
         match sync_mode:
             case 'bpod':
+                # for synchronization with the BNC inputs of the neurophotometrics receiving the sync pulses
+                # from the individual bpods
                 tasks['FibrePhotometryBpodSync'] = type('FibrePhotometryBpodSync', (ptasks.FibrePhotometryBpodSync,), {})(
                     **devices['neurophotometrics'],
                     **kwargs,
                 )
             case 'daqami':
+                # for synchronization with the DAQami receiving the sync pulses from the individual bpods
+                # as well as the frame clock from the FP3002
                 tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})(
                     **devices['neurophotometrics'],
                     **acquisition_description['sync'],
diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 7bdbc2165..1f52545c4 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -8,6 +8,9 @@
 from iblutil.io import jsonable
 import iblphotometry.io as fpio
 
+from ibldsp.utils import rises
+from nptdms import TdmsFile
+
 from abc import abstractmethod
 import iblphotometry
 
@@ -58,7 +61,11 @@ def _get_bpod_timestamps(self):
         return timestamps_bpod, bpod_data
 
     @abstractmethod
-    def _get_neurophotometrics_timestamps(self): ...
+    def _get_neurophotometrics_timestamps(self):
+        # this function needs to be implemented in the derived classes:
+        # for bpod based syncing, the timestamps are in the digial inputs file
+        # for daq based syncing, the timestamps are extracted from the tdms file
+        ...
 
     def _get_sync_function(self):
         """
@@ -153,36 +160,23 @@ def _get_neurophotometrics_timestamps(self):
         raw_photometry_folder = self.session_path / self.device_collection
         digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt')
         timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']]
-        timestamps_nph = timestamps_nph[
-            15:
-        ]  # TODO: we may want to detect the spacers before removing it, especially for successive sessions
+
+        # simple spacer removal, TODO replace this with something more robust
+        # detect spacer / remove spacer methods
+        timestamps_nph = timestamps_nph[15:]
         return timestamps_nph
 
 
 class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
-    """
-    DAQ syncing outline
-
-    bpod stores it's own timestamps - "timestamps_bpod"
-    DAQ receives TTL sync from each bpod - "daq_bpod_sync"
-    DAQ receives Frame clock from FP3002 - "daq_nph_frameclock"
-    NPH stores system timestamps at each sample time - "nph_frameclock"
-
-    2 step sync
-     - NPH time to DAQ time (on the basis of frame clock) m1, b1 = linreg(nph_frameclock, daq_frameclock)
-     - DAQ time to BPOD time m2, b2 = linreg(daq_bpod_sync, bpod_sync)
-
-    transfrom from NPH to BPOD
-    m1 * nph_frameclock + b1
-    """
+    """ """
 
     priority = 90
     job_size = 'small'
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.sync_kwargs = kwargs['sync']['daqami']
-        # grab the sync relevant things here
+        self.sync_kwargs = kwargs['daqami']
+        self.sync_channel = kwargs['sync_channel']
 
     @property
     def signature(self):
@@ -191,7 +185,7 @@ def signature(self):
                 ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True),
                 ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
                 ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True),
-                # TODO input here - the sync data fils in the self.sync_collection
+                ('_mcc_DAQdata.raw.tdms', self.sync_kwargs['collection'], True, True),
             ],
             'output_files': [
                 ('photometry.signal.pqt', 'alf/photometry', True),
@@ -200,21 +194,41 @@ def signature(self):
         }
         return signature
 
+    def _load_and_parse_tdms(self):
+        # loads the tdms file data, and detects the risind edges
+        # this probably could use some dsp, potentially trend removal
+        tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
+        tdms_df = TdmsFile.read(tdms_filepath).as_dataframe()
+        tdms_df.columns = [col[-4:-1] for col in tdms_df.columns]  # hardcoded renaming
+
+        timestamps = {}
+        for col in tdms_df.columns:
+            timestamps[col] = rises(tdms_df[col]) / self.sync_kwargs['sampling_rate']
+
+        return timestamps
+
     def load_data(self):
+        # the point of this functions is to overwrite the SystemTimestamp column
+        # in the ibl_df with the values from the DAQ clock
+        # then syncing will work the same as for the bpod based syncing
+
         ibl_df = super().load_data()
-        # load here the daqami timestamps
-        # and put them in the ibl_df
+
+        self.timestamps = self._load_and_parse_tdms()
+        frame_timestamps = self.timestamps[f'AI{self.sync_kwargs["frameclock_channel"]}']
+
+        # and put them in the ibl_df SystemTimestamp column
+        ibl_df['SystemTimestamp'] = frame_timestamps
         return ibl_df
 
     def _get_neurophotometrics_timestamps(self):
-        # get the sync data
-        # FIXME replace me with the actual filename
-        bin_filepath = self.session_path / self.sync_kwargs['collection'] / 'the_sync_file.bin'
+        # get the sync channel
+        sync_colname = f'AI{self.sync_kwargs[""]}'
 
-        # read bin file
-        # and extract from it
-        # daq_nph_frameclock
-        # daq_bpod_sync
+        # and the corresponding timestamps
+        timestamps_nph = self.timestamps[sync_colname]
 
-        timestamps_nph = None
+        # simple spacer removal, TODO replace this with something more robust
+        # detect spacer / remove spacer methods
+        timestamps_nph = timestamps_nph[15:]
         return timestamps_nph

From f6a804d57f07f8c502e46d76d9eb53020d4851f4 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 14 May 2025 13:23:42 +0100
Subject: [PATCH 10/80] ready for testing

---
 ibllib/pipes/neurophotometrics.py | 55 ++++++++++++++-----------------
 1 file changed, 25 insertions(+), 30 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 1f52545c4..d56a1042d 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -1,12 +1,12 @@
 import logging
 import numpy as np
 import pandas as pd
+from typing import Tuple
 
 import ibldsp.utils
 import ibllib.io.session_params
 from ibllib.pipes import base_tasks
 from iblutil.io import jsonable
-import iblphotometry.io as fpio
 
 from ibldsp.utils import rises
 from nptdms import TdmsFile
@@ -25,7 +25,7 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.device_collection = self.get_device_collection('neurophotometrics', device_collection='raw_photometry_data')
+        self.photometry_collection = kwargs['collection']  # raw_photometry_data
         self.kwargs = kwargs
 
         # we will work with the first protocol here
@@ -34,7 +34,8 @@ def __init__(self, *args, **kwargs):
             self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol)
             break
 
-    def _get_bpod_timestamps(self):
+    def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]:
+        # the timestamps for syncing, in the time of the bpod
         if 'habituation' in self.task_protocol:
             sync_states_names = ['iti', 'reward']
         else:
@@ -61,17 +62,14 @@ def _get_bpod_timestamps(self):
         return timestamps_bpod, bpod_data
 
     @abstractmethod
-    def _get_neurophotometrics_timestamps(self):
+    def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # this function needs to be implemented in the derived classes:
         # for bpod based syncing, the timestamps are in the digial inputs file
         # for daq based syncing, the timestamps are extracted from the tdms file
         ...
 
-    def _get_sync_function(self):
-        """
-        Perform the linear clock correction between bpod and neurophotometrics timestamps.
-        :return: interpolation function that outputs bpod timestamsp from neurophotometrics timestamps
-        """
+    def _get_sync_function(self) -> Tuple[callable, list]:
+        # returns the synchronization function
 
         # get the timestamps
         timestamps_bpod, bpod_data = self._get_bpod_timestamps(self.task_protocol)
@@ -95,8 +93,9 @@ def _get_sync_function(self):
 
         return sync_nph_to_bpod_fcn, valid_bounds
 
-    def load_data(self):
-        raw_photometry_folder = self.session_path / self.device_collection
+    def load_data(self) -> pd.DataFrame:
+        # loads the raw photometry data
+        raw_photometry_folder = self.session_path / self.photometry_collection
         raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt')
         ibl_df = iblphotometry.io.from_raw_neurophotometrics_df_to_ibl_df(
             raw_neurophotometrics_df,
@@ -104,10 +103,10 @@ def load_data(self):
         )
         return ibl_df
 
-    def _run(self, **kwargs):
-        """ """
+    def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # 1) load photometry data
         # note: when loading daq based syncing, the SystemTimestamp column
+        # will be overridden with the timestamps from the tdms file
         ibl_df = self.load_data()
 
         # 2) get the synchronization function
@@ -115,8 +114,6 @@ def _run(self, **kwargs):
         ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1])
 
         # 3) apply synchronization
-        # for bpod based syncing, we can directly transform the timestamps that are
-        # stored with the samples
         ibl_df['times'] = sync_nph_to_bpod_fcn(ibl_df['SystemTimestamp'])
 
         # 4) write to disk
@@ -143,10 +140,10 @@ class FibrePhotometryBpodSync(FibrePhotometryBaseSync):
     def signature(self):
         signature = {
             'input_files': [
-                ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True),
+                ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True),
                 ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
-                ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True),
-                ('_neurophotometrics_fpData.digitalIntputs.pqt', self.device_collection, True),
+                ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True),
+                ('_neurophotometrics_fpData.digitalIntputs.pqt', self.photometry_collection, True),
             ],
             'output_files': [
                 ('photometry.signal.pqt', 'alf/photometry', True),
@@ -155,9 +152,9 @@ def signature(self):
         }
         return signature
 
-    def _get_neurophotometrics_timestamps(self):
-        # we get the timestamps for the photometry data by loading from the digital inputs file
-        raw_photometry_folder = self.session_path / self.device_collection
+    def _get_neurophotometrics_timestamps(self) -> np.ndarray:
+        # for bpod based syncing, the timestamps for syncing are in the digital inputs file
+        raw_photometry_folder = self.session_path / self.photometry_collection
         digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt')
         timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']]
 
@@ -168,8 +165,6 @@ def _get_neurophotometrics_timestamps(self):
 
 
 class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
-    """ """
-
     priority = 90
     job_size = 'small'
 
@@ -182,9 +177,9 @@ def __init__(self, *args, **kwargs):
     def signature(self):
         signature = {
             'input_files': [
-                ('_neurophotometrics_fpData.raw.pqt', self.device_collection, True, True),
+                ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True),
                 ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
-                ('_neurophotometrics_fpData.channels.csv', self.device_collection, True, True),
+                ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True),
                 ('_mcc_DAQdata.raw.tdms', self.sync_kwargs['collection'], True, True),
             ],
             'output_files': [
@@ -194,9 +189,9 @@ def signature(self):
         }
         return signature
 
-    def _load_and_parse_tdms(self):
-        # loads the tdms file data, and detects the risind edges
-        # this probably could use some dsp, potentially trend removal
+    def _load_and_parse_tdms(self) -> dict:
+        # loads the tdms file data, and detects the rising edges
+        # this probably could use some dsp
         tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
         tdms_df = TdmsFile.read(tdms_filepath).as_dataframe()
         tdms_df.columns = [col[-4:-1] for col in tdms_df.columns]  # hardcoded renaming
@@ -207,7 +202,7 @@ def _load_and_parse_tdms(self):
 
         return timestamps
 
-    def load_data(self):
+    def load_data(self) -> pd.DataFrame:
         # the point of this functions is to overwrite the SystemTimestamp column
         # in the ibl_df with the values from the DAQ clock
         # then syncing will work the same as for the bpod based syncing
@@ -221,7 +216,7 @@ def load_data(self):
         ibl_df['SystemTimestamp'] = frame_timestamps
         return ibl_df
 
-    def _get_neurophotometrics_timestamps(self):
+    def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # get the sync channel
         sync_colname = f'AI{self.sync_kwargs[""]}'
 

From 60cd99c8bb50a4cd736d6d55b0fbba0b0db27094 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Thu, 15 May 2025 14:33:31 +0100
Subject: [PATCH 11/80] 4 rig - ready for testing round 2

---
 ibllib/pipes/dynamic_pipeline.py  |  1 -
 ibllib/pipes/neurophotometrics.py | 89 ++++++++++++++++++-------------
 2 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index e3ec02253..c8e6ea119 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -620,7 +620,6 @@ def make_pipeline(session_path, **pkwargs):
                 # as well as the frame clock from the FP3002
                 tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})(
                     **devices['neurophotometrics'],
-                    **acquisition_description['sync'],
                     **kwargs,
                 )
 
diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index d56a1042d..86639d8be 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -1,4 +1,5 @@
 import logging
+from pathlib import Path
 import numpy as np
 import pandas as pd
 from typing import Tuple
@@ -8,15 +9,32 @@
 from ibllib.pipes import base_tasks
 from iblutil.io import jsonable
 
-from ibldsp.utils import rises
 from nptdms import TdmsFile
 
 from abc import abstractmethod
-import iblphotometry
+from iblphotometry import io as fpio
 
 _logger = logging.getLogger('ibllib')
 
 
+def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict:
+    # loads the tdms file data, and detects the rising edges
+    timestamps = {}  # stores the resulting edge times here
+    tdms_file = TdmsFile.read(tdms_filepath)
+    analog_group, digital_group = tdms_file.groups()
+    fs = analog_group.properties['ScanRate']
+
+    for channel in analog_group.channels():
+        signal = (channel.data > 2.5).astype('int64')
+        timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
+
+    for channel in digital_group.channels():
+        signal = (channel.data > 0.5).astype('int64')
+        timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
+
+    return timestamps
+
+
 class FibrePhotometryBaseSync(base_tasks.DynamicTask):
     # base clas for syncing fibre photometry
     # derived classes are: FibrePhotometryBpodSync and FibrePhotometryDAQSync
@@ -72,7 +90,7 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         # returns the synchronization function
 
         # get the timestamps
-        timestamps_bpod, bpod_data = self._get_bpod_timestamps(self.task_protocol)
+        timestamps_bpod, bpod_data = self._get_bpod_timestamps()
         timestamps_nph = self._get_neurophotometrics_timestamps()
 
         # sync the behaviour events to the photometry timestamps
@@ -97,24 +115,27 @@ def load_data(self) -> pd.DataFrame:
         # loads the raw photometry data
         raw_photometry_folder = self.session_path / self.photometry_collection
         raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt')
-        ibl_df = iblphotometry.io.from_raw_neurophotometrics_df_to_ibl_df(
-            raw_neurophotometrics_df,
-            rois=self.kwargs['fibers'],
-        )
-        return ibl_df
+        return raw_neurophotometrics_df
+        # return ibl_df
 
     def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # 1) load photometry data
+
         # note: when loading daq based syncing, the SystemTimestamp column
         # will be overridden with the timestamps from the tdms file
-        ibl_df = self.load_data()
+        # the idea behind this is that the rest of the sync is then the same
+        # and handled by this base class
+        raw_df = self.load_data()
 
         # 2) get the synchronization function
         sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function()
-        ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1])
+
+        # 3) convert to ibl_df
+        ibl_df = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df, rois=self.kwargs['fibers'], drop_first=False)
 
         # 3) apply synchronization
-        ibl_df['times'] = sync_nph_to_bpod_fcn(ibl_df['SystemTimestamp'])
+        ibl_df['times'] = sync_nph_to_bpod_fcn(raw_df['SystemTimestamp'])
+        ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1])
 
         # 4) write to disk
         output_folder = self.session_path.joinpath('alf', 'photometry')
@@ -125,7 +146,9 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         ibl_df.to_parquet(ibl_df_outpath)
 
         # writing the locations
-        rois = list(self.kwargs['fibers'].keys())
+        rois = []
+        for k, v in self.kwargs['fibers'].items():
+            rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']})
         locations_df = pd.DataFrame(rois).set_index('ROI')
         locations_df_outpath = output_folder / 'photometryROI.locations.pqt'
         locations_df.to_parquet(locations_df_outpath)
@@ -158,8 +181,8 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt')
         timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']]
 
-        # simple spacer removal, TODO replace this with something more robust
-        # detect spacer / remove spacer methods
+        # TODO replace this rudimentary spacer removal
+        # to implement: detect spacer / remove spacer methods
         timestamps_nph = timestamps_nph[15:]
         return timestamps_nph
 
@@ -170,7 +193,7 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.sync_kwargs = kwargs['daqami']
+        self.sync_kwargs = kwargs['sync_metadata']
         self.sync_channel = kwargs['sync_channel']
 
     @property
@@ -189,41 +212,33 @@ def signature(self):
         }
         return signature
 
-    def _load_and_parse_tdms(self) -> dict:
-        # loads the tdms file data, and detects the rising edges
-        # this probably could use some dsp
-        tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
-        tdms_df = TdmsFile.read(tdms_filepath).as_dataframe()
-        tdms_df.columns = [col[-4:-1] for col in tdms_df.columns]  # hardcoded renaming
-
-        timestamps = {}
-        for col in tdms_df.columns:
-            timestamps[col] = rises(tdms_df[col]) / self.sync_kwargs['sampling_rate']
-
-        return timestamps
-
     def load_data(self) -> pd.DataFrame:
         # the point of this functions is to overwrite the SystemTimestamp column
         # in the ibl_df with the values from the DAQ clock
         # then syncing will work the same as for the bpod based syncing
+        raw_df = super().load_data()
 
-        ibl_df = super().load_data()
-
-        self.timestamps = self._load_and_parse_tdms()
+        # get daqami timestamps
+        tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
+        self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
         frame_timestamps = self.timestamps[f'AI{self.sync_kwargs["frameclock_channel"]}']
 
-        # and put them in the ibl_df SystemTimestamp column
-        ibl_df['SystemTimestamp'] = frame_timestamps
-        return ibl_df
+        # and put them in the raw_df SystemTimestamp column
+        if raw_df.shape[0] == frame_timestamps.shape[0]:
+            raw_df['SystemTimestamp'] = frame_timestamps
+        elif raw_df.shape[0] == frame_timestamps.shape[0] + 1:
+            # there is one extra frame timestamp from the last incomplete frame
+            raw_df['SystemTimestamp'] = frame_timestamps[:-1]
+        return raw_df
 
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # get the sync channel
-        sync_colname = f'AI{self.sync_kwargs[""]}'
+        sync_colname = f'DI{self.kwargs["sync_channel"]}'
 
         # and the corresponding timestamps
         timestamps_nph = self.timestamps[sync_colname]
 
-        # simple spacer removal, TODO replace this with something more robust
-        # detect spacer / remove spacer methods
+        # TODO replace this rudimentary spacer removal
+        # to implement: detect spacer / remove spacer methods
         timestamps_nph = timestamps_nph[15:]
         return timestamps_nph

From fce9ec2972a01c0521cd4a721e78c9179c88e53a Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Wed, 19 Mar 2025 14:34:09 +0000
Subject: [PATCH 12/80] label the DLC task as running in the dlc env

---
 ibllib/pipes/video_tasks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py
index 407a63465..679fd4885 100644
--- a/ibllib/pipes/video_tasks.py
+++ b/ibllib/pipes/video_tasks.py
@@ -328,7 +328,7 @@ def _run(self, update=True, **kwargs):
 class DLC(base_tasks.VideoTask):
     """
     This task relies on a correctly installed dlc environment as per
-    https://docs.google.com/document/d/1g0scP6_3EmaXCU4SsDNZWwDTaD9MG0es_grLA-d0gh0/edit#
+    https://github.com/int-brain-lab/iblvideo#installing-dlc-locally-on-an-ibl-server---tensorflow-2120
 
     If your environment is set up otherwise, make sure that you set the respective attributes:
     t = EphysDLC(session_path)
@@ -341,6 +341,7 @@ class DLC(base_tasks.VideoTask):
     level = 2
     force = True
     job_size = 'large'
+    env = 'dlc'
 
     dlcenv = Path.home().joinpath('Documents', 'PYTHON', 'envs', 'dlcenv', 'bin', 'activate')
     scripts = Path.home().joinpath('Documents', 'PYTHON', 'iblscripts', 'deploy', 'serverpc', 'dlc')

From 1ae68deab829f138c725a7524d780113cf07d920 Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Wed, 19 Mar 2025 15:05:21 +0000
Subject: [PATCH 13/80] the default scratch drive is /scratch

---
 ibllib/pipes/ephys_tasks.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py
index cb9a0099b..fe21f1892 100644
--- a/ibllib/pipes/ephys_tasks.py
+++ b/ibllib/pipes/ephys_tasks.py
@@ -655,15 +655,7 @@ def scratch_folder_run(self):
         For a scratch drive at /mnt/h0 we would have the following temp dir:
         /mnt/h0/iblsorter_1.8.0_CSHL071_2020-10-04_001_probe01/
         """
-        # get the scratch drive from the shell script
-        if self.scratch_folder is None:
-            with open(self.SHELL_SCRIPT) as fid:
-                lines = fid.readlines()
-            line = [line for line in lines if line.startswith("SCRATCH_DRIVE=")][0]
-            m = re.search(r"\=(.*?)(\#|\n)", line)[0]
-            scratch_drive = Path(m[1:-1].strip())
-        else:
-            scratch_drive = self.scratch_folder
+        scratch_drive = self.scratch_folder if self.scratch_folder else Path('/scratch')
         assert scratch_drive.exists(), f"Scratch drive {scratch_drive} not found"
         # get the version of the sorter
         self.version = self._fetch_iblsorter_version(self.SORTER_REPOSITORY)

From e5d2dfa3f2f22573b57884315e6c1078bd46b088 Mon Sep 17 00:00:00 2001
From: Olivier Winter <olivier.winter@hotmail.fr>
Date: Fri, 21 Mar 2025 14:20:40 +0000
Subject: [PATCH 14/80] DLC has the option of running within the current env if
 available

---
 ibllib/pipes/video_tasks.py | 54 ++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py
index 679fd4885..4ec2d0d5a 100644
--- a/ibllib/pipes/video_tasks.py
+++ b/ibllib/pipes/video_tasks.py
@@ -387,6 +387,38 @@ def _video_intact(file_mp4):
         cap.release()
         return intact
 
+    def run_dlc(self, file_mp4, cam, overwrite):
+        try:
+            from iblvideo import download_weights
+            from iblvideo.choiceworld import dlc
+            path_dlc = download_weights()
+            dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite)
+            return 0
+        except ImportError:
+            command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}"
+            _logger.info(command2run)
+            process = subprocess.Popen(
+                command2run,
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                executable='/bin/bash',
+            )
+            info, error = process.communicate()
+            # info_str = info.decode("utf-8").strip()
+            # _logger.info(info_str)
+            if process.returncode != 0:
+                error_str = error.decode('utf-8').strip()
+                _logger.error(f'DLC failed for {cam}Camera.\n\n'
+                              f'++++++++ Output of subprocess for debugging ++++++++\n\n'
+                              f'{error_str}\n'
+                              f'++++++++++++++++++++++++++++++++++++++++++++\n')
+            return process.returncode
+        except Exception as e:
+            _logger.error(f'An error occurred while running DLC for {cam}Camera: {e}')
+            _logger.error(traceback.format_exc())
+            return -1
+
     def _run(self, cams=None, overwrite=False):
         # Check that the cams are valid for DLC, remove the ones that aren't
         candidate_cams = cams or self.cameras
@@ -426,27 +458,11 @@ def _run(self, cams=None, overwrite=False):
                     check_nvidia_driver()
 
                     _logger.info(f'Running DLC on {cam}Camera.')
-                    command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}"
-                    _logger.info(command2run)
-                    process = subprocess.Popen(
-                        command2run,
-                        shell=True,
-                        stdout=subprocess.PIPE,
-                        stderr=subprocess.PIPE,
-                        executable='/bin/bash',
-                    )
-                    info, error = process.communicate()
-                    # info_str = info.decode("utf-8").strip()
-                    # _logger.info(info_str)
-                    if process.returncode != 0:
-                        error_str = error.decode('utf-8').strip()
-                        _logger.error(f'DLC failed for {cam}Camera.\n\n'
-                                      f'++++++++ Output of subprocess for debugging ++++++++\n\n'
-                                      f'{error_str}\n'
-                                      f'++++++++++++++++++++++++++++++++++++++++++++\n')
+                    return_code = self._run_dlc(file_mp4, cam, overwrite)
+                    if return_code != 0:
                         self.status = -1
-                        # We dont' run motion energy, or add any files if dlc failed to run
                         continue
+
                     dlc_result = next(self.session_path.joinpath('alf').glob(f'_ibl_{cam}Camera.dlc*.pqt'))
                     actual_outputs.append(dlc_result)
 

From fd99fff19ea4b1e08630303c64b14686d505098d Mon Sep 17 00:00:00 2001
From: Olivier Winter <olivier.winter@hotmail.fr>
Date: Fri, 21 Mar 2025 15:26:13 +0000
Subject: [PATCH 15/80] check dlc env reserved for subprocess call

---
 ibllib/pipes/video_tasks.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py
index 4ec2d0d5a..9ea55df55 100644
--- a/ibllib/pipes/video_tasks.py
+++ b/ibllib/pipes/video_tasks.py
@@ -387,14 +387,19 @@ def _video_intact(file_mp4):
         cap.release()
         return intact
 
-    def run_dlc(self, file_mp4, cam, overwrite):
+    def _run_dlc(self, file_mp4, cam, overwrite):
         try:
+            import iblvideo
             from iblvideo import download_weights
             from iblvideo.choiceworld import dlc
+            self.version = iblvideo.__version__
+            _logger.info(f'iblvideo version {self.version}')
             path_dlc = download_weights()
             dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite)
             return 0
         except ImportError:
+            self.version = self._check_dlcenv()
+            _logger.info(f'iblvideo version {self.version}')
             command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}"
             _logger.info(command2run)
             process = subprocess.Popen(
@@ -453,8 +458,6 @@ def _run(self, cams=None, overwrite=False):
                         self.status = -1
                         continue
                     # Check that dlc environment is ok, shell scripts exists, and get iblvideo version, GPU addressable
-                    self.version = self._check_dlcenv()
-                    _logger.info(f'iblvideo version {self.version}')
                     check_nvidia_driver()
 
                     _logger.info(f'Running DLC on {cam}Camera.')

From fbc69c18fb48df282ac01e82a52d75a833c936cb Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Sat, 22 Mar 2025 15:40:37 +0000
Subject: [PATCH 16/80] ephys compression is a large task

---
 ibllib/pipes/ephys_tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py
index fe21f1892..6cc1cd8db 100644
--- a/ibllib/pipes/ephys_tasks.py
+++ b/ibllib/pipes/ephys_tasks.py
@@ -124,7 +124,7 @@ class EphysCompressNP1(base_tasks.EphysTask):
     priority = 90
     cpu = 2
     io_charge = 100  # this jobs reads raw ap files
-    job_size = 'small'
+    job_size = 'large'
 
     @property
     def signature(self):

From 7b5127be080a96e8c30ee6b7f9a3841098a00904 Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Wed, 2 Apr 2025 14:27:00 +0100
Subject: [PATCH 17/80] the session loader trial loader has a revision
 parameter

---
 brainbox/io/one.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/brainbox/io/one.py b/brainbox/io/one.py
index 718f0a574..1445ea0d6 100644
--- a/brainbox/io/one.py
+++ b/brainbox/io/one.py
@@ -1237,8 +1237,8 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_
         :param **kwargs: kwargs passed to `driftmap()` (optional)
         :return:
         """
-        br = br or BrainRegions()
-        time_series = time_series or {}
+        br = BrainRegions() if br is None else br
+        time_series = {} if time_series is None else time_series
         fig, axs = plt.subplots(2, 2, gridspec_kw={
             'width_ratios': [.95, .05], 'height_ratios': [.1, .9]}, figsize=(16, 9), sharex='col')
         axs[0, 1].set_axis_off()
@@ -1281,13 +1281,20 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0,
                              save_dir=None,
                              label='raster',
                              gain=-93,
-                             title=None):
+                             title=None,
+                             alpha=0.3,
+                             processing='destripe'):
 
         # compute the raw data offset and destripe, we take 400ms around t0
         first_sample, last_sample = (int((t0 - 0.2) * sr.fs), int((t0 + 0.2) * sr.fs))
         raw = sr[first_sample:last_sample, :-sr.nsync].T
         channel_labels = channels['labels'] if (channels is not None) and ('labels' in channels) else True
-        destriped = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels)
+        if processing == 'destripe':
+            samples = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels)
+        else:
+            import scipy.signal
+            sos = scipy.signal.butter(**{"N": 3, "Wn": 300 / sr.fs * 2, "btype": "highpass"}, output="sos")
+            samples = scipy.signal.sosfiltfilt(sos, raw)
         # filter out the spikes according to good/bad clusters and to the time slice
         spike_sel = slice(*np.searchsorted(spikes['samples'], [first_sample, last_sample]))
         ss = spikes['samples'][spike_sel]
@@ -1297,9 +1304,9 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0,
             title = self._default_plot_title(spikes)
         # display the raw data snippet with spikes overlaid
         fig, axs = plt.subplots(1, 2, gridspec_kw={'width_ratios': [.95, .05]}, figsize=(16, 9), sharex='col')
-        Density(destriped, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s')
-        axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=0.5)
-        axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=0.5)
+        Density(samples, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s')
+        axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=alpha)
+        axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=alpha)
         axs[0].set(title=title, xlim=[t0 - 0.035, t0 + 0.035])
         # adds the channel locations if available
         if (channels is not None) and ('atlas_id' in channels):
@@ -1501,7 +1508,7 @@ def _find_behaviour_collection(self, obj):
                               f'e.g sl.load_{obj}(collection="{collections[0]}")')
                 raise ALFMultipleCollectionsFound
 
-    def load_trials(self, collection=None):
+    def load_trials(self, collection=None, revision=None):
         """
         Function to load trials data into SessionLoader.trials
 
@@ -1510,13 +1517,13 @@ def load_trials(self, collection=None):
         collection: str
             Alf collection of trials data
         """
-
+        revision = self.revision if revision is None else revision
         if not collection:
             collection = self._find_behaviour_collection('trials')
         # itiDuration frequently has a mismatched dimension, and we don't need it, exclude using regex
         self.one.wildcards = False
         self.trials = self.one.load_object(
-            self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=self.revision or None).to_df()
+            self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=revision or None).to_df()
         self.one.wildcards = True
         self.data_info.loc[self.data_info['name'] == 'trials', 'is_loaded'] = True
 

From 87bb0eda4d4b2ac339b757c63f7226648a8ea98c Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Fri, 4 Apr 2025 11:44:10 +0100
Subject: [PATCH 18/80] DLC: the motion energy can run in the current
 environment

---
 ibllib/pipes/video_tasks.py | 154 ++++++++++++++++++++++--------------
 1 file changed, 95 insertions(+), 59 deletions(-)

diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py
index 9ea55df55..2f5f1cd8e 100644
--- a/ibllib/pipes/video_tasks.py
+++ b/ibllib/pipes/video_tasks.py
@@ -358,25 +358,41 @@ def signature(self):
         return signature
 
     def _check_dlcenv(self):
-        """Check that scripts are present, dlcenv can be activated and get iblvideo version"""
-        assert len(list(self.scripts.rglob('run_dlc.*'))) == 2, \
-            f'Scripts run_dlc.sh and run_dlc.py do not exist in {self.scripts}'
-        assert len(list(self.scripts.rglob('run_motion.*'))) == 2, \
-            f'Scripts run_motion.sh and run_motion.py do not exist in {self.scripts}'
-        assert self.dlcenv.exists(), f'DLC environment does not exist in assumed location {self.dlcenv}'
-        command2run = f"source {self.dlcenv}; python -c 'import iblvideo; print(iblvideo.__version__)'"
-        process = subprocess.Popen(
-            command2run,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            executable='/bin/bash'
-        )
-        info, error = process.communicate()
-        if process.returncode != 0:
-            raise AssertionError(f"DLC environment check failed\n{error.decode('utf-8')}")
-        version = info.decode('utf-8').strip().split('\n')[-1]
-        return version
+        """
+        Check DLC environment and return iblvideo version.
+
+        Attempts to import iblvideo directly. If unsuccessful, checks for necessary
+        scripts and environment, then retrieves version via subprocess.
+
+        Returns:
+            tuple: (version: str, needs_subprocess: bool)
+        """
+        try:
+            import iblvideo
+            version = iblvideo.__version__
+            needs_subprocess = False
+            _logger.info(f'Current environment contains iblvideo version {self.version}')
+        except ImportError:
+            # Check that scripts are present, dlcenv can be activated and get iblvideo version
+            assert len(list(self.scripts.rglob('run_dlc.*'))) == 2, \
+                f'Scripts run_dlc.sh and run_dlc.py do not exist in {self.scripts}'
+            assert len(list(self.scripts.rglob('run_motion.*'))) == 2, \
+                f'Scripts run_motion.sh and run_motion.py do not exist in {self.scripts}'
+            assert self.dlcenv.exists(), f'DLC environment does not exist in assumed location {self.dlcenv}'
+            command2run = f"source {self.dlcenv}; python -c 'import iblvideo; print(iblvideo.__version__)'"
+            process = subprocess.Popen(
+                command2run,
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                executable='/bin/bash'
+            )
+            info, error = process.communicate()
+            if process.returncode != 0:
+                raise AssertionError(f"DLC environment check failed\n{error.decode('utf-8')}")
+            version = info.decode('utf-8').strip().split('\n')[-1]
+            needs_subprocess = True
+        return version, needs_subprocess
 
     @staticmethod
     def _video_intact(file_mp4):
@@ -387,20 +403,44 @@ def _video_intact(file_mp4):
         cap.release()
         return intact
 
-    def _run_dlc(self, file_mp4, cam, overwrite):
+    def _run_dlc(self, file_mp4, cam, overwrite, flag_subprocess=True):
         try:
-            import iblvideo
-            from iblvideo import download_weights
-            from iblvideo.choiceworld import dlc
-            self.version = iblvideo.__version__
-            _logger.info(f'iblvideo version {self.version}')
-            path_dlc = download_weights()
-            dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite)
-            return 0
-        except ImportError:
-            self.version = self._check_dlcenv()
-            _logger.info(f'iblvideo version {self.version}')
-            command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}"
+            if flag_subprocess:
+                _logger.info(f'iblvideo version {self.version}')
+                command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}"
+                _logger.info(command2run)
+                process = subprocess.Popen(
+                    command2run,
+                    shell=True,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    executable='/bin/bash',
+                )
+                info, error = process.communicate()
+                # info_str = info.decode("utf-8").strip()
+                # _logger.info(info_str)
+                if process.returncode != 0:
+                    error_str = error.decode('utf-8').strip()
+                    _logger.error(f'DLC failed for {cam}Camera.\n\n'
+                                  f'++++++++ Output of subprocess for debugging ++++++++\n\n'
+                                  f'{error_str}\n'
+                                  f'++++++++++++++++++++++++++++++++++++++++++++\n')
+                return process.returncode
+                pass
+            else:
+                from iblvideo import download_weights
+                from iblvideo.choiceworld import dlc
+                path_dlc = download_weights()
+                dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite)
+                return 0
+        except Exception as e:
+            _logger.error(f'An error occurred while running DLC for {cam}Camera: {e}')
+            _logger.error(traceback.format_exc())
+            return -1
+
+    def _run_motion_energy(self, file_mp4, dlc_result, flag_subprocess=True):
+        if flag_subprocess:
+            command2run = f"{self.scripts.joinpath('run_motion.sh')} {str(self.dlcenv)} {file_mp4} {dlc_result}"
             _logger.info(command2run)
             process = subprocess.Popen(
                 command2run,
@@ -410,19 +450,27 @@ def _run_dlc(self, file_mp4, cam, overwrite):
                 executable='/bin/bash',
             )
             info, error = process.communicate()
-            # info_str = info.decode("utf-8").strip()
+            # info_str = info.decode('utf-8').strip()
             # _logger.info(info_str)
             if process.returncode != 0:
                 error_str = error.decode('utf-8').strip()
-                _logger.error(f'DLC failed for {cam}Camera.\n\n'
+                _logger.error(f'Motion energy failed for {file_mp4}.\n\n'
                               f'++++++++ Output of subprocess for debugging ++++++++\n\n'
                               f'{error_str}\n'
                               f'++++++++++++++++++++++++++++++++++++++++++++\n')
-            return process.returncode
-        except Exception as e:
-            _logger.error(f'An error occurred while running DLC for {cam}Camera: {e}')
-            _logger.error(traceback.format_exc())
-            return -1
+            return_code = process.returncode
+        else:  # runs the motion energy calculation in the current environment
+            try:
+                from iblvideo.motion_energy import motion_energy
+                _ = motion_energy(file_mp4, dlc_result)
+                return_code = 0
+            except Exception:
+                _logger.error(f'Motion energy failed for {file_mp4}.\n\n'
+                              f'++++++++ Output of subprocess for debugging ++++++++\n\n'
+                              f'{traceback.format_exc()}\n'
+                              f'++++++++++++++++++++++++++++++++++++++++++++\n')
+                return_code = -1
+        return return_code
 
     def _run(self, cams=None, overwrite=False):
         # Check that the cams are valid for DLC, remove the ones that aren't
@@ -457,43 +505,31 @@ def _run(self, cams=None, overwrite=False):
                         _logger.error(f'Corrupt raw video file {file_mp4}')
                         self.status = -1
                         continue
+
                     # Check that dlc environment is ok, shell scripts exists, and get iblvideo version, GPU addressable
                     check_nvidia_driver()
+                    self.version, flag_subprocess = self._check_dlcenv()
 
+                    # Step 1: Run DLC for this camera
                     _logger.info(f'Running DLC on {cam}Camera.')
-                    return_code = self._run_dlc(file_mp4, cam, overwrite)
+                    return_code = self._run_dlc(file_mp4, cam, overwrite, flag_subprocess=flag_subprocess)
                     if return_code != 0:
                         self.status = -1
                         continue
-
                     dlc_result = next(self.session_path.joinpath('alf').glob(f'_ibl_{cam}Camera.dlc*.pqt'))
                     actual_outputs.append(dlc_result)
 
+                    # Step 2: Compute Motion Energy for this camera
                     _logger.info(f'Computing motion energy for {cam}Camera')
-                    command2run = f"{self.scripts.joinpath('run_motion.sh')} {str(self.dlcenv)} {file_mp4} {dlc_result}"
-                    _logger.info(command2run)
-                    process = subprocess.Popen(
-                        command2run,
-                        shell=True,
-                        stdout=subprocess.PIPE,
-                        stderr=subprocess.PIPE,
-                        executable='/bin/bash',
-                    )
-                    info, error = process.communicate()
-                    # info_str = info.decode('utf-8').strip()
-                    # _logger.info(info_str)
-                    if process.returncode != 0:
-                        error_str = error.decode('utf-8').strip()
-                        _logger.error(f'Motion energy failed for {cam}Camera.\n\n'
-                                      f'++++++++ Output of subprocess for debugging ++++++++\n\n'
-                                      f'{error_str}\n'
-                                      f'++++++++++++++++++++++++++++++++++++++++++++\n')
+                    return_code = self._run_motion_energy(self, file_mp4, dlc_result, flag_subprocess=flag_subprocess)
+                    if return_code != 0:
                         self.status = -1
                         continue
                     actual_outputs.append(next(self.session_path.joinpath('alf').glob(
                         f'{cam}Camera.ROIMotionEnergy*.npy')))
                     actual_outputs.append(next(self.session_path.joinpath('alf').glob(
                         f'{cam}ROIMotionEnergy.position*.npy')))
+
             except Exception:
                 _logger.error(traceback.format_exc())
                 self.status = -1

From 26a6ce222ce4ca1608d6219e4734fcce3eeb3fc6 Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Tue, 29 Apr 2025 10:32:43 +0100
Subject: [PATCH 19/80] fix deprecation warning for DLC

---
 ibllib/pipes/video_tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py
index 2f5f1cd8e..e72643749 100644
--- a/ibllib/pipes/video_tasks.py
+++ b/ibllib/pipes/video_tasks.py
@@ -429,7 +429,7 @@ def _run_dlc(self, file_mp4, cam, overwrite, flag_subprocess=True):
                 pass
             else:
                 from iblvideo import download_weights
-                from iblvideo.choiceworld import dlc
+                from iblvideo.pose_dlc import dlc
                 path_dlc = download_weights()
                 dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite)
                 return 0

From 938bc33ace3c335c4b2962445c4c05a4cbb5aacb Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Wed, 30 Apr 2025 11:04:32 +0100
Subject: [PATCH 20/80] fix call to motion energy method by removing instance

---
 ibllib/pipes/video_tasks.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py
index e72643749..b5ce98480 100644
--- a/ibllib/pipes/video_tasks.py
+++ b/ibllib/pipes/video_tasks.py
@@ -521,7 +521,7 @@ def _run(self, cams=None, overwrite=False):
 
                     # Step 2: Compute Motion Energy for this camera
                     _logger.info(f'Computing motion energy for {cam}Camera')
-                    return_code = self._run_motion_energy(self, file_mp4, dlc_result, flag_subprocess=flag_subprocess)
+                    return_code = self._run_motion_energy(file_mp4, dlc_result, flag_subprocess=flag_subprocess)
                     if return_code != 0:
                         self.status = -1
                         continue
@@ -529,7 +529,6 @@ def _run(self, cams=None, overwrite=False):
                         f'{cam}Camera.ROIMotionEnergy*.npy')))
                     actual_outputs.append(next(self.session_path.joinpath('alf').glob(
                         f'{cam}ROIMotionEnergy.position*.npy')))
-
             except Exception:
                 _logger.error(traceback.format_exc())
                 self.status = -1

From c0e6b872b5710ca3240216970e9b263bf95a3ac3 Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Tue, 20 May 2025 16:14:11 +0100
Subject: [PATCH 21/80] update requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 89473fb2a..07679187c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ seaborn>=0.9.0
 tqdm>=4.32.1
 # ibl libraries
 iblatlas>=0.5.3
-ibl-neuropixel>=1.6.2
+ibl-neuropixel>=1.7.0
 iblutil>=1.13.0
 iblqt>=0.4.2
 mtscomp>=1.0.1

From 3c4d1f4584e31fcafb16bef73dedc162ca9e76b0 Mon Sep 17 00:00:00 2001
From: owinter <olivier.winter@hotmail.fr>
Date: Wed, 21 May 2025 13:57:08 +0100
Subject: [PATCH 22/80] Check for iblsorter availability by other means of a
 try ... importError

---
 ibllib/pipes/ephys_tasks.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py
index 6cc1cd8db..253e13420 100644
--- a/ibllib/pipes/ephys_tasks.py
+++ b/ibllib/pipes/ephys_tasks.py
@@ -1,8 +1,10 @@
+import importlib
 import logging
 from pathlib import Path
 import re
 import shutil
 import subprocess
+import sys
 import traceback
 
 import packaging.version
@@ -727,11 +729,11 @@ def _run_iblsort(self, ap_file):
                     self.FORCE_RERUN = True
         self.scratch_folder_run.mkdir(parents=True, exist_ok=True)
         check_nvidia_driver()
-        try:
-            # if pykilosort is in the environment, use the installed version within the task
+        # this is the best way I found to check if iblsorter is installed and available without a try block
+        if 'iblsorter' in sys.modules and importlib.util.find_spec('iblsorter.ibl') is not None:
             import iblsorter.ibl  # noqa
             iblsorter.ibl.run_spike_sorting_ibl(bin_file=ap_file, scratch_dir=self.scratch_folder_run, delete=False)
-        except ImportError:
+        else:
             command2run = f"{self.SHELL_SCRIPT} {ap_file} {self.scratch_folder_run}"
             _logger.info(command2run)
             process = subprocess.Popen(

From a5bd4f6076d9446e6242916b719176e364541728 Mon Sep 17 00:00:00 2001
From: Olivier Winter <olivier.winter@hotmail.fr>
Date: Thu, 22 May 2025 13:21:03 +0100
Subject: [PATCH 23/80] make sure ibl-neuropixel higher than 1.7.1

---
 requirements.txt | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 07679187c..9649c1e1c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,35 +1,35 @@
+# ibl libraries
+ONE-api>=3.0.0
 boto3
 click>=7.0.0
 colorlog>=4.0.2
 flake8>=3.7.8
 globus-sdk
 graphviz
+ibl-neuropixel>=1.7.1
+ibl-style
+iblatlas>=0.5.3
+iblqt>=0.4.2
+iblutil>=1.13.0
+imagecodecs  # used to convert tif snapshots to png when registering mesoscope snapshots (also requires skimage)
 matplotlib>=3.0.3
+mtscomp>=1.0.1
+nptdms
 numba>=0.56
 numpy>=1.18
-nptdms
 opencv-python-headless
 pandas
+phylib>=2.6.0
+psychofit
 pyarrow
 pynrrd>=0.4.0
+pyqt5
 pytest
 requests>=2.22.0
+scikit-image  # this is a widefield requirement missing as of July 2023, we may remove it once wfield has this figured out
 scikit-learn>=0.22.1
 scipy>=1.7.0
-scikit-image  # this is a widefield requirement missing as of July 2023, we may remove it once wfield has this figured out
-imagecodecs  # used to convert tif snapshots to png when registering mesoscope snapshots (also requires skimage)
-sparse
 seaborn>=0.9.0
-tqdm>=4.32.1
-# ibl libraries
-iblatlas>=0.5.3
-ibl-neuropixel>=1.7.0
-iblutil>=1.13.0
-iblqt>=0.4.2
-mtscomp>=1.0.1
-ONE-api>=3.0.0
-phylib>=2.6.0
-psychofit
 slidingRP>=1.1.1  # steinmetz lab refractory period metrics
-pyqt5
-ibl-style
+sparse
+tqdm>=4.32.1

From 75c3e40828b3c5232347d89bed5ad8147eb2b0a1 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 30 May 2025 12:57:13 +0100
Subject: [PATCH 24/80] changing the extractor for compatibility with DI only
 and Frameclock on DI0

---
 ibllib/pipes/neurophotometrics.py             | 40 ++++++++++------
 .../_ibl_experiment.description.yaml          | 35 ++++++++++++++
 ibllib/tests/test_neurophotometrics.py        | 48 +++++++++++++++++++
 3 files changed, 108 insertions(+), 15 deletions(-)
 create mode 100644 ibllib/tests/fixtures/neurophotometrics/_ibl_experiment.description.yaml
 create mode 100644 ibllib/tests/test_neurophotometrics.py

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 86639d8be..76fa91c63 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -18,19 +18,30 @@
 
 
 def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict:
-    # loads the tdms file data, and detects the rising edges
-    timestamps = {}  # stores the resulting edge times here
-    tdms_file = TdmsFile.read(tdms_filepath)
-    analog_group, digital_group = tdms_file.groups()
-    fs = analog_group.properties['ScanRate']
-
-    for channel in analog_group.channels():
-        signal = (channel.data > 2.5).astype('int64')
-        timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
+    # extractor for tdms files as written by the daqami software, configured
+    # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the
+    # bpod sync signals
 
-    for channel in digital_group.channels():
-        signal = (channel.data > 0.5).astype('int64')
-        timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
+    tdms_file = TdmsFile.read(tdms_filepath)
+    (digital_group,) = tdms_file.groups()
+    fs = digital_group.properties['ScanRate']  # this should be 10kHz
+    df = tdms_file.as_dataframe()
+    col = df.columns[-1]
+    vals = df[col].values.astype('int64')
+    columns = ['DI0', 'DI1', 'DI2', 'DI3']
+
+    # ugly but basically just a binary decoder for the binary data
+    # assumes 4 channels
+    data = np.array([list(bin(v)[2:].zfill(4)[::-1]) for v in vals], dtype='int64')
+    timestamps = {}
+    for i, name in enumerate(columns):
+        signal = data[:, i]
+        timestamps[name] = np.where(np.diff(signal) == 1)[0] / fs
+
+    # frameclock data is recorded on an analog channel
+    # for channel in analog_group.channels():
+    #     signal = (channel.data > 2.5).astype('int64')  # assumes 0-5V
+    #     timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
 
     return timestamps
 
@@ -104,7 +115,7 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         _logger.info(
             f'sync: n trials {len(bpod_data)}, n bpod sync {len(timestamps_bpod)}, n photometry {len(timestamps_nph)}, n match {len(ix_nph)}'
         )
-        # FIXME the framerate here is hardcoded, infer it instead!
+        # TODO the framerate here is hardcoded, infer it instead!
         assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
         assert len(ix_nph) / len(timestamps_bpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched'
         valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
@@ -116,7 +127,6 @@ def load_data(self) -> pd.DataFrame:
         raw_photometry_folder = self.session_path / self.photometry_collection
         raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt')
         return raw_neurophotometrics_df
-        # return ibl_df
 
     def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # 1) load photometry data
@@ -221,7 +231,7 @@ def load_data(self) -> pd.DataFrame:
         # get daqami timestamps
         tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
         self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
-        frame_timestamps = self.timestamps[f'AI{self.sync_kwargs["frameclock_channel"]}']
+        frame_timestamps = self.timestamps[f'DI{self.sync_kwargs["frameclock_channel"]}']
 
         # and put them in the raw_df SystemTimestamp column
         if raw_df.shape[0] == frame_timestamps.shape[0]:
diff --git a/ibllib/tests/fixtures/neurophotometrics/_ibl_experiment.description.yaml b/ibllib/tests/fixtures/neurophotometrics/_ibl_experiment.description.yaml
new file mode 100644
index 000000000..8a39783cb
--- /dev/null
+++ b/ibllib/tests/fixtures/neurophotometrics/_ibl_experiment.description.yaml
@@ -0,0 +1,35 @@
+devices:
+  cameras:
+    left:
+      collection: raw_video_data
+      sync_label: audio
+  microphone:
+    microphone:
+      collection: raw_task_data_00
+      sync_label: audio
+  neurophotometrics:
+    collection: raw_photometry_data
+    datetime: '2025-05-26T15:08:40.237101'
+    fibers:
+      G0:
+        location: VTA
+    sync_channel: 2
+    sync_metadata:
+      acquisition_software: daqami
+      collection: raw_photometry_data
+      frameclock_channel: 7
+    sync_mode: daqami
+procedures:
+- Fiber photometry
+projects:
+- ibl_fibrephotometry
+- practice
+sync:
+  bpod:
+    acquisition_software: pybpod
+    collection: raw_task_data_00
+    extension: .jsonable
+tasks:
+- _iblrig_tasks_advancedChoiceWorld:
+    collection: raw_task_data_00
+version: 1.0.0
diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py
new file mode 100644
index 000000000..fb3e1f773
--- /dev/null
+++ b/ibllib/tests/test_neurophotometrics.py
@@ -0,0 +1,48 @@
+"""Tests for ibllib.pipes.mesoscope_tasks."""
+
+import sys
+import unittest
+from unittest import mock
+import tempfile
+import json
+from itertools import chain
+from pathlib import Path
+import subprocess
+from copy import deepcopy
+import uuid
+
+from one.api import ONE
+import numpy as np
+
+from ibllib.pipes.mesoscope_tasks import MesoscopePreprocess, MesoscopeFOV, find_triangle, surface_normal, _nearest_neighbour_1d
+from ibllib.io.extractors import mesoscope
+from ibllib.tests import TEST_DB
+from ibllib.io import session_params
+
+# Mock suit2p which is imported in MesoscopePreprocess
+attrs = {'default_ops.return_value': {}}
+sys.modules['suite2p'] = mock.MagicMock(**attrs)
+
+
+class TestNeurophotometricsExtractor(unittest.TestCase):
+    """
+    this class tests
+    that the correct extractor is run based on the experiment description file
+    this requires the setup to have
+
+    """
+
+    def setUp(self) -> None:
+        self.tmp_folder = tempfile.TemporaryDirectory()
+        self.session_folder = Path(self.tmp_folder.name) / 'subject' / '2020-01-01' / '001'
+        self.raw_photometry_folder = self.session_folder / 'raw_photometry_data'
+        self.raw_photometry_folder.mkdir(parents=True)
+
+    def test_bpod_extractor(self):
+        path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml'
+        self.experiment_description = session_params.read_params(path)
+        # expected
+
+    def test_daqami_extractor(self):
+        path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml'
+        self.experiment_description = session_params.read_params(path)

From c1ebda17da60a2487a6ea671fb78e42cde6561bc Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 2 Jun 2025 15:41:31 +0100
Subject: [PATCH 25/80] added functionality to deal with premature termination
 of daqami

---
 ibllib/pipes/neurophotometrics.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 76fa91c63..6f27ffcbc 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -233,12 +233,24 @@ def load_data(self) -> pd.DataFrame:
         self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
         frame_timestamps = self.timestamps[f'DI{self.sync_kwargs["frameclock_channel"]}']
 
+        # compare number of frame timestamps
         # and put them in the raw_df SystemTimestamp column
         if raw_df.shape[0] == frame_timestamps.shape[0]:
             raw_df['SystemTimestamp'] = frame_timestamps
         elif raw_df.shape[0] == frame_timestamps.shape[0] + 1:
             # there is one extra frame timestamp from the last incomplete frame
             raw_df['SystemTimestamp'] = frame_timestamps[:-1]
+        elif raw_df.shape[0] > frame_timestamps:
+            # the daqami was stopped / closed before bonsai
+            # we discard all frames that can not be mapped
+            _logger.warning(
+                f'#frames recorded by bonsai: {raw_df.shape[0]} > #frame timestamps recorded by daqami {frame_timestamps.shape[0]}, dropping all frames without recorded timestamps'
+            )
+            raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
+
+        elif raw_df.shape[0] < frame_timestamps:
+            # this should not be possible
+            raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
         return raw_df
 
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:

From 15fe6afcc058e2ee0f47ce173ef3d43803049c45 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 4 Jun 2025 12:34:40 +0100
Subject: [PATCH 26/80] tiny bugfix for syncing sessions were daqami was
 stopped before bonsai

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 6f27ffcbc..179a8d137 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -240,7 +240,7 @@ def load_data(self) -> pd.DataFrame:
         elif raw_df.shape[0] == frame_timestamps.shape[0] + 1:
             # there is one extra frame timestamp from the last incomplete frame
             raw_df['SystemTimestamp'] = frame_timestamps[:-1]
-        elif raw_df.shape[0] > frame_timestamps:
+        elif raw_df.shape[0] > frame_timestamps.shape[0]:
             # the daqami was stopped / closed before bonsai
             # we discard all frames that can not be mapped
             _logger.warning(

From ed4ea5a0faf53b641fc9e10ceae6dba7444a12ae Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 4 Jun 2025 12:46:12 +0100
Subject: [PATCH 27/80] ruff please, (tests are stubs)

---
 ibllib/tests/test_neurophotometrics.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py
index fb3e1f773..fcad9d379 100644
--- a/ibllib/tests/test_neurophotometrics.py
+++ b/ibllib/tests/test_neurophotometrics.py
@@ -4,19 +4,9 @@
 import unittest
 from unittest import mock
 import tempfile
-import json
-from itertools import chain
 from pathlib import Path
-import subprocess
-from copy import deepcopy
-import uuid
 
-from one.api import ONE
-import numpy as np
 
-from ibllib.pipes.mesoscope_tasks import MesoscopePreprocess, MesoscopeFOV, find_triangle, surface_normal, _nearest_neighbour_1d
-from ibllib.io.extractors import mesoscope
-from ibllib.tests import TEST_DB
 from ibllib.io import session_params
 
 # Mock suit2p which is imported in MesoscopePreprocess
@@ -41,7 +31,6 @@ def setUp(self) -> None:
     def test_bpod_extractor(self):
         path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml'
         self.experiment_description = session_params.read_params(path)
-        # expected
 
     def test_daqami_extractor(self):
         path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml'

From 8e69190ba7989788d46396b028611e3510e99415 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 4 Jun 2025 12:56:23 +0100
Subject: [PATCH 28/80] flake8

---
 ibllib/pipes/neurophotometrics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 179a8d137..553f388b7 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -113,7 +113,9 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         # then we check the alignment, should be less than the camera sampling rate
         tcheck = sync_nph_to_bpod_fcn(timestamps_nph[ix_nph]) - timestamps_bpod[ix_bpod]
         _logger.info(
-            f'sync: n trials {len(bpod_data)}, n bpod sync {len(timestamps_bpod)}, n photometry {len(timestamps_nph)}, n match {len(ix_nph)}'
+            f'sync: n trials {len(bpod_data)}'
+            f'n bpod sync {len(timestamps_bpod)}'
+            f'n photometry {len(timestamps_nph)}, n match {len(ix_nph)}'
         )
         # TODO the framerate here is hardcoded, infer it instead!
         assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
@@ -243,9 +245,7 @@ def load_data(self) -> pd.DataFrame:
         elif raw_df.shape[0] > frame_timestamps.shape[0]:
             # the daqami was stopped / closed before bonsai
             # we discard all frames that can not be mapped
-            _logger.warning(
-                f'#frames recorded by bonsai: {raw_df.shape[0]} > #frame timestamps recorded by daqami {frame_timestamps.shape[0]}, dropping all frames without recorded timestamps'
-            )
+            _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
             raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
 
         elif raw_df.shape[0] < frame_timestamps:

From f59a9547fb235615663b0bf9c896dc3cd30141fe Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 4 Jun 2025 15:00:55 +0100
Subject: [PATCH 29/80] updated requirements again after prefect merge

---
 requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index a18b46b30..089e2bc9e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,5 +33,4 @@ seaborn>=0.9.0
 slidingRP>=1.1.1  # steinmetz lab refractory period metrics
 sparse
 tqdm>=4.32.1
-
-pyqt5
\ No newline at end of file
+ibl-photometry

From e6e7ae2b3660cbdb963d750b80d465e6643650f0 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 6 Jun 2025 09:59:35 +0100
Subject: [PATCH 30/80] another crucial bugfix for daqami frame number check
 during extractor

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 553f388b7..9fb262d0a 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -248,7 +248,7 @@ def load_data(self) -> pd.DataFrame:
             _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
             raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
 
-        elif raw_df.shape[0] < frame_timestamps:
+        elif raw_df.shape[0] < frame_timestamps.shape[0]:
             # this should not be possible
             raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
         return raw_df

From f3a58e8007d09500f7a9fa73008cf40ea3ec3aa2 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 6 Jun 2025 13:00:22 +0100
Subject: [PATCH 31/80] moving the frameclock back on AI7, included downward
 compatibility in the extractor for the sessions that have the frameclock on
 DI0

---
 ibllib/pipes/neurophotometrics.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 9fb262d0a..a1a15ed80 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -23,7 +23,14 @@ def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict:
     # bpod sync signals
 
     tdms_file = TdmsFile.read(tdms_filepath)
-    (digital_group,) = tdms_file.groups()
+    groups = tdms_file.groups()
+    # this unfortunate hack is in here because there are a bunch of sessions where the frameclock is on DI0
+    if len(groups) == 1:
+        has_analog_group = False
+        (digital_group,) = groups
+    if len(groups) == 2:
+        has_analog_group = True
+        analog_group, digital_group = groups
     fs = digital_group.properties['ScanRate']  # this should be 10kHz
     df = tdms_file.as_dataframe()
     col = df.columns[-1]
@@ -38,10 +45,11 @@ def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict:
         signal = data[:, i]
         timestamps[name] = np.where(np.diff(signal) == 1)[0] / fs
 
-    # frameclock data is recorded on an analog channel
-    # for channel in analog_group.channels():
-    #     signal = (channel.data > 2.5).astype('int64')  # assumes 0-5V
-    #     timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
+    if has_analog_group:
+        # frameclock data is recorded on an analog channel
+        for channel in analog_group.channels():
+            signal = (channel.data > 2.5).astype('int64')  # assumes 0-5V
+            timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
 
     return timestamps
 
@@ -233,7 +241,13 @@ def load_data(self) -> pd.DataFrame:
         # get daqami timestamps
         tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
         self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
-        frame_timestamps = self.timestamps[f'DI{self.sync_kwargs["frameclock_channel"]}']
+        # downward compatibility - frameclock moved around, now is back on the AI7
+        # was specified with int before. if int,
+        if type(self.sync_kwargs['frameclock_channel']) is int:
+            sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
+        else:
+            sync_channel_name = self.sync_kwargs['frameclock_channel']
+        frame_timestamps = self.timestamps[sync_channel_name]
 
         # compare number of frame timestamps
         # and put them in the raw_df SystemTimestamp column
@@ -248,7 +262,7 @@ def load_data(self) -> pd.DataFrame:
             _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
             raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
 
-        elif raw_df.shape[0] < frame_timestamps.shape[0]:
+        elif raw_df.shape[0] < frame_timestamps.shape:
             # this should not be possible
             raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
         return raw_df

From 85eb00a73d76b11fa2ee06732437fb3f1362f6d7 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 6 Jun 2025 13:05:49 +0100
Subject: [PATCH 32/80] related to previous commit, better checking for
 analog/digital sync channels

---
 ibllib/pipes/neurophotometrics.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index a1a15ed80..6d93f364a 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -242,10 +242,11 @@ def load_data(self) -> pd.DataFrame:
         tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
         self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
         # downward compatibility - frameclock moved around, now is back on the AI7
-        # was specified with int before. if int,
-        if type(self.sync_kwargs['frameclock_channel']) is int:
+        # was specified with int before. if int
+        try:
+            int(self.sync_kwargs['frameclock_channel'])
             sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
-        else:
+        except ValueError:
             sync_channel_name = self.sync_kwargs['frameclock_channel']
         frame_timestamps = self.timestamps[sync_channel_name]
 
@@ -269,10 +270,15 @@ def load_data(self) -> pd.DataFrame:
 
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # get the sync channel
-        sync_colname = f'DI{self.kwargs["sync_channel"]}'
+        # again the ugly downward compatibility hack
+        try:
+            int(self.sync_kwargs['frameclock_channel'])
+            sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
+        except ValueError:
+            sync_channel_name = self.sync_kwargs['frameclock_channel']
 
         # and the corresponding timestamps
-        timestamps_nph = self.timestamps[sync_colname]
+        timestamps_nph = self.timestamps[sync_channel_name]
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods

From 720787d207f4e2dc17f6875630bdf267e0a5bacf Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 6 Jun 2025 15:14:29 +0100
Subject: [PATCH 33/80] bugfix

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 6d93f364a..bf0e3d43d 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -263,7 +263,7 @@ def load_data(self) -> pd.DataFrame:
             _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
             raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
 
-        elif raw_df.shape[0] < frame_timestamps.shape:
+        elif raw_df.shape[0] < frame_timestamps.shape[0]:
             # this should not be possible
             raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
         return raw_df

From 516ddbdead6eb97efd7eb3d7814e03e92e8efab8 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 11 Jun 2025 17:11:03 +0100
Subject: [PATCH 34/80] bugfix for wrong frameclock channel

---
 ibllib/pipes/neurophotometrics.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index bf0e3d43d..62465a93d 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -269,16 +269,8 @@ def load_data(self) -> pd.DataFrame:
         return raw_df
 
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:
-        # get the sync channel
-        # again the ugly downward compatibility hack
-        try:
-            int(self.sync_kwargs['frameclock_channel'])
-            sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
-        except ValueError:
-            sync_channel_name = self.sync_kwargs['frameclock_channel']
-
-        # and the corresponding timestamps
-        timestamps_nph = self.timestamps[sync_channel_name]
+        # get the sync channel and the corresponding timestamps
+        timestamps_nph = self.timestamps[f'DI{self.sync_channel}']
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods

From 52c0c6f154e1e0125185108edee3082d2e9053af Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 11 Jun 2025 17:25:54 +0100
Subject: [PATCH 35/80] extractor fix

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 62465a93d..b7c3c6ff6 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -263,7 +263,7 @@ def load_data(self) -> pd.DataFrame:
             _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
             raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
 
-        elif raw_df.shape[0] < frame_timestamps.shape[0]:
+        elif raw_df.shape[0] + 1 < frame_timestamps.shape[0]:
             # this should not be possible
             raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
         return raw_df

From d46d846897b1af016ebf2b5a2d91988e950d2ad4 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 11 Jun 2025 19:18:15 +0100
Subject: [PATCH 36/80] very hardcoded fix for frameclock channel that turns
 obsolete as soon as the experiment_description files are patched

---
 ibllib/pipes/neurophotometrics.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index b7c3c6ff6..004612cb9 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -243,10 +243,11 @@ def load_data(self) -> pd.DataFrame:
         self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
         # downward compatibility - frameclock moved around, now is back on the AI7
         # was specified with int before. if int
-        try:
-            int(self.sync_kwargs['frameclock_channel'])
+        if self.sync_kwargs['frameclock_channel'] == 0:
             sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
-        except ValueError:
+        if self.sync_kwargs['frameclock_channel'] == 7:
+            sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}'
+        else:
             sync_channel_name = self.sync_kwargs['frameclock_channel']
         frame_timestamps = self.timestamps[sync_channel_name]
 

From 808754b69a274b309e5809415f58cc5116ff192b Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 11 Jun 2025 20:05:14 +0100
Subject: [PATCH 37/80] int / str bugfix

---
 ibllib/pipes/neurophotometrics.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 004612cb9..dd630be86 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -242,10 +242,9 @@ def load_data(self) -> pd.DataFrame:
         tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
         self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
         # downward compatibility - frameclock moved around, now is back on the AI7
-        # was specified with int before. if int
-        if self.sync_kwargs['frameclock_channel'] == 0:
+        if self.sync_kwargs['frameclock_channel'] == '0':
             sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
-        if self.sync_kwargs['frameclock_channel'] == 7:
+        if self.sync_kwargs['frameclock_channel'] == '7':
             sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}'
         else:
             sync_channel_name = self.sync_kwargs['frameclock_channel']

From 7f8d6528b75727e9388eed6e3444193df56d690a Mon Sep 17 00:00:00 2001
From: olivier <olivier.winter@hotmail.fr>
Date: Thu, 12 Jun 2025 14:55:35 +0100
Subject: [PATCH 38/80] bugfix for channel int/str

---
 ibllib/pipes/neurophotometrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index dd630be86..55314e271 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -242,9 +242,9 @@ def load_data(self) -> pd.DataFrame:
         tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
         self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
         # downward compatibility - frameclock moved around, now is back on the AI7
-        if self.sync_kwargs['frameclock_channel'] == '0':
+        if self.sync_kwargs['frameclock_channel'] in ['0',0]:
             sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
-        if self.sync_kwargs['frameclock_channel'] == '7':
+        if self.sync_kwargs['frameclock_channel'] in ['7',7]:
             sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}'
         else:
             sync_channel_name = self.sync_kwargs['frameclock_channel']

From 771048e179298bd3408a8bd35ca104706c92787f Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Thu, 12 Jun 2025 15:00:32 +0100
Subject: [PATCH 39/80] fake commit just to check user name

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 55314e271..03c791296 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -274,5 +274,5 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods
-        timestamps_nph = timestamps_nph[15:]
+        timestamps_nph = timestamps_nph[15: ]
         return timestamps_nph

From 1a747c3282a07e100ca6a8263809cb058f9c5429 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Fri, 13 Jun 2025 11:15:49 +0100
Subject: [PATCH 40/80] bugfix for downward compatible frameclock_channel

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 03c791296..f52b9c028 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -244,7 +244,7 @@ def load_data(self) -> pd.DataFrame:
         # downward compatibility - frameclock moved around, now is back on the AI7
         if self.sync_kwargs['frameclock_channel'] in ['0',0]:
             sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
-        if self.sync_kwargs['frameclock_channel'] in ['7',7]:
+        elif self.sync_kwargs['frameclock_channel'] in ['7',7]:
             sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}'
         else:
             sync_channel_name = self.sync_kwargs['frameclock_channel']

From d32f54b39c05e0046fe07f83c4aa333f7b363480 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Fri, 13 Jun 2025 14:13:25 +0100
Subject: [PATCH 41/80] more verbose error msg for sync fail

---
 ibllib/pipes/neurophotometrics.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index f52b9c028..6049ac8eb 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -112,6 +112,10 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         timestamps_bpod, bpod_data = self._get_bpod_timestamps()
         timestamps_nph = self._get_neurophotometrics_timestamps()
 
+        # verify presence of sync timestamps
+        for source, timestamps in zip(['bpod','neurophotometrics'], [timestamps_bpod, timestamps_nph]):
+            assert len(timestamps) > 0, f'{source} sync timestamps are empty'
+
         # sync the behaviour events to the photometry timestamps
         sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
             timestamps_nph, timestamps_bpod, return_indices=True, linear=True

From fd001805eb7c027e1e8515e837d38806e5cfceab Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Mon, 16 Jun 2025 16:34:02 +0100
Subject: [PATCH 42/80] path bugfix for extractor

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 6049ac8eb..659d7c60b 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -176,7 +176,7 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         locations_df = pd.DataFrame(rois).set_index('ROI')
         locations_df_outpath = output_folder / 'photometryROI.locations.pqt'
         locations_df.to_parquet(locations_df_outpath)
-        return ibl_df, locations_df
+        return ibl_df_outpath, locations_df_outpath
 
 
 class FibrePhotometryBpodSync(FibrePhotometryBaseSync):

From 4ecb482870a3fdb3b5774569345419da4c255eb6 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Tue, 17 Jun 2025 14:25:24 +0100
Subject: [PATCH 43/80] splitting sessions by spacers and attepmting to sync
 each

---
 ibllib/pipes/neurophotometrics.py | 54 ++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 659d7c60b..ed0a7bf69 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -13,6 +13,7 @@
 
 from abc import abstractmethod
 from iblphotometry import io as fpio
+from iblutil.spacer import Spacer
 
 _logger = logging.getLogger('ibllib')
 
@@ -107,7 +108,6 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
 
     def _get_sync_function(self) -> Tuple[callable, list]:
         # returns the synchronization function
-
         # get the timestamps
         timestamps_bpod, bpod_data = self._get_bpod_timestamps()
         timestamps_nph = self._get_neurophotometrics_timestamps()
@@ -116,22 +116,38 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         for source, timestamps in zip(['bpod','neurophotometrics'], [timestamps_bpod, timestamps_nph]):
             assert len(timestamps) > 0, f'{source} sync timestamps are empty'
 
-        # sync the behaviour events to the photometry timestamps
-        sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-            timestamps_nph, timestamps_bpod, return_indices=True, linear=True
-        )
-        # TODO log drift
-
-        # then we check the alignment, should be less than the camera sampling rate
-        tcheck = sync_nph_to_bpod_fcn(timestamps_nph[ix_nph]) - timestamps_bpod[ix_bpod]
-        _logger.info(
-            f'sync: n trials {len(bpod_data)}'
-            f'n bpod sync {len(timestamps_bpod)}'
-            f'n photometry {len(timestamps_nph)}, n match {len(ix_nph)}'
-        )
-        # TODO the framerate here is hardcoded, infer it instead!
-        assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
-        assert len(ix_nph) / len(timestamps_bpod) > 0.95, 'Sync issue detected, less than 95% of the bpod events matched'
+        # split into segments if multiple spacers are found
+        # attempt to sync for each segment (only one will work)
+        spacer = Spacer()
+        spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5)
+        # the indices that mark the boundaries of segments
+
+        segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]])
+        segments = []
+        for i in range(segment_ix.shape[0]-1):
+            start_ix = segment_ix[i]
+            stop_ix = segment_ix[i+1]
+            segments.append(timestamps_nph[start_ix:stop_ix])
+
+        for i, timestamps_segment in enumerate(segments):
+            print(i)
+            # sync the behaviour events to the photometry timestamps
+            sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
+                timestamps_segment, timestamps_bpod, return_indices=True, linear=True
+            )
+            # then we check the alignment, should be less than the camera sampling rate
+            tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod]
+            _logger.info(
+                f'sync: n trials {len(bpod_data)}'
+                f'n bpod sync {len(timestamps_bpod)}'
+                f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}'
+            )
+            if len(ix_nph) / len(timestamps_bpod) < 0.95:
+                # wrong segment
+                continue
+            # TODO the framerate here is hardcoded, infer it instead!
+            assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
+        
         valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
 
         return sync_nph_to_bpod_fcn, valid_bounds
@@ -207,7 +223,7 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods
-        timestamps_nph = timestamps_nph[15:]
+        # timestamps_nph = timestamps_nph[15:]
         return timestamps_nph
 
 
@@ -278,5 +294,5 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods
-        timestamps_nph = timestamps_nph[15: ]
+        # timestamps_nph = timestamps_nph[15: ]
         return timestamps_nph

From 18a02e2946744d04e8cfc3f646e2431c85c05606 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Wed, 18 Jun 2025 14:02:35 +0100
Subject: [PATCH 44/80] spacer detection for hot swapping

---
 ibllib/pipes/neurophotometrics.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index ed0a7bf69..bba7e50b3 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -106,7 +106,7 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # for daq based syncing, the timestamps are extracted from the tdms file
         ...
 
-    def _get_sync_function(self) -> Tuple[callable, list]:
+    def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, list]:
         # returns the synchronization function
         # get the timestamps
         timestamps_bpod, bpod_data = self._get_bpod_timestamps()
@@ -119,18 +119,23 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         # split into segments if multiple spacers are found
         # attempt to sync for each segment (only one will work)
         spacer = Spacer()
-        spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5)
-        # the indices that mark the boundaries of segments
+        
+        # the fast way
+        match spacer_detection_mode:
+            case 'fast':
+                spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5)
+            case 'safe':
+                spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000)
 
+        # the indices that mark the boundaries of segments
         segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]])
         segments = []
         for i in range(segment_ix.shape[0]-1):
-            start_ix = segment_ix[i]
+            start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1
             stop_ix = segment_ix[i+1]
             segments.append(timestamps_nph[start_ix:stop_ix])
 
         for i, timestamps_segment in enumerate(segments):
-            print(i)
             # sync the behaviour events to the photometry timestamps
             sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
                 timestamps_segment, timestamps_bpod, return_indices=True, linear=True
@@ -144,6 +149,7 @@ def _get_sync_function(self) -> Tuple[callable, list]:
             )
             if len(ix_nph) / len(timestamps_bpod) < 0.95:
                 # wrong segment
+                print('wrong segment')
                 continue
             # TODO the framerate here is hardcoded, infer it instead!
             assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
@@ -168,7 +174,8 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         raw_df = self.load_data()
 
         # 2) get the synchronization function
-        sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function()
+        spacer_detection_mode = kwargs.get('spacer_detection_mode', 'fast')
+        sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function(spacer_detection_mode=spacer_detection_mode)
 
         # 3) convert to ibl_df
         ibl_df = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df, rois=self.kwargs['fibers'], drop_first=False)

From 4fbdfc9118789c3b0822af63f50ab2af0a24d275 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Mon, 23 Jun 2025 14:59:47 +0100
Subject: [PATCH 45/80] bugfix for failing job creation due to wrong kwarg
 handling

---
 ibllib/pipes/neurophotometrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index bba7e50b3..965bb3908 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -61,8 +61,8 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask):
     priority = 90
     job_size = 'small'
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, session_path, one, **kwargs):
+        super().__init__(session_path, one=one, **kwargs)
         self.photometry_collection = kwargs['collection']  # raw_photometry_data
         self.kwargs = kwargs
 

From fd3b633df8c2584891cd2c0a6c4fe089e1a5e7fc Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Mon, 23 Jun 2025 15:00:10 +0100
Subject: [PATCH 46/80] bugfix in job creator when root_path is session_folder

---
 ibllib/pipes/local_server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/local_server.py b/ibllib/pipes/local_server.py
index 92f1cf39a..c02ae11c3 100644
--- a/ibllib/pipes/local_server.py
+++ b/ibllib/pipes/local_server.py
@@ -106,7 +106,7 @@ def job_creator(root_path, one=None, dry=False, rerun=False):
     if not one:
         one = ONE(cache_rest=None)
     rc = IBLRegistrationClient(one=one)
-    flag_files = Path(root_path).glob('*/????-??-??/*/raw_session.flag')
+    flag_files = Path(root_path).glob('**/raw_session.flag')
     flag_files = filter(lambda x: is_session_path(x.parent), flag_files)
     pipes = []
     all_datasets = []

From 73956b7916451a99b2ffd4f6a306cc3b44b950fc Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 8 Aug 2025 13:06:23 +0200
Subject: [PATCH 47/80] bugfix for failing synchronization due to frame
 timestamp and bonsai frame number discrepancy, fast and low memory profile
 tdms timestamp extraction added

---
 ibllib/pipes/neurophotometrics.py | 140 +++++++++++++++++++++++++-----
 1 file changed, 116 insertions(+), 24 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 965bb3908..2566a786f 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -2,7 +2,8 @@
 from pathlib import Path
 import numpy as np
 import pandas as pd
-from typing import Tuple
+from typing import Tuple, Optional
+import pickle
 
 import ibldsp.utils
 import ibllib.io.session_params
@@ -16,12 +17,14 @@
 from iblutil.spacer import Spacer
 
 _logger = logging.getLogger('ibllib')
+_logger.setLevel(logging.DEBUG)
 
 
-def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict:
+def extract_timestamps_from_tdms_file(tdms_filepath: Path, save_path: Optional[Path] = None) -> dict:
     # extractor for tdms files as written by the daqami software, configured
     # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the
     # bpod sync signals
+    _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}')
 
     tdms_file = TdmsFile.read(tdms_filepath)
     groups = tdms_file.groups()
@@ -35,23 +38,85 @@ def extract_timestamps_from_tdms_file(tdms_filepath: Path) -> dict:
     fs = digital_group.properties['ScanRate']  # this should be 10kHz
     df = tdms_file.as_dataframe()
     col = df.columns[-1]
-    vals = df[col].values.astype('int64')
+    vals = df[col].values.astype('int32')
     columns = ['DI0', 'DI1', 'DI2', 'DI3']
 
     # ugly but basically just a binary decoder for the binary data
     # assumes 4 channels
-    data = np.array([list(bin(v)[2:].zfill(4)[::-1]) for v in vals], dtype='int64')
+    data = np.array([list(bin(v)[2:].zfill(4)[::-1]) for v in vals], dtype='int32')
     timestamps = {}
     for i, name in enumerate(columns):
-        signal = data[:, i]
-        timestamps[name] = np.where(np.diff(signal) == 1)[0] / fs
+        timestamps[name] = np.where(np.diff(data[:, i]) == 1)[0] / fs
 
     if has_analog_group:
         # frameclock data is recorded on an analog channel
         for channel in analog_group.channels():
-            signal = (channel.data > 2.5).astype('int64')  # assumes 0-5V
+            signal = (channel.data > 2.5).astype('int32')  # assumes 0-5V
             timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
 
+    if save_path is not None:
+        _logger.info(f'saving extracted timestamps to: {save_path}')
+        with open(save_path, 'wb') as fH:
+            pickle.dump(timestamps, fH)
+
+    return timestamps
+
+
+def extract_timestamps_from_tdms_file_fast(tdms_filepath: Path, save_path: Optional[Path] = None, chunk_size=10000) -> dict:
+    # extractor for tdms files as written by the daqami software, configured
+    # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the
+    # bpod sync signals
+    _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}')
+
+    # this should be 10kHz
+    tdms_file = TdmsFile.read(tdms_filepath)
+    groups = tdms_file.groups()
+
+    # this unfortunate hack is in here because there are a bunch of sessions
+    # where the frameclock is on DI0
+    if len(groups) == 1:
+        has_analog_group = False
+        (digital_group,) = groups
+    if len(groups) == 2:
+        has_analog_group = True
+        analog_group, digital_group = groups
+    fs = digital_group.properties['ScanRate']  # this should be 10kHz
+    df = tdms_file.as_dataframe()
+
+    # inferring digital col name
+    (digital_col,) = [col for col in df.columns if 'Digital' in col]
+    vals = df[digital_col].values.astype('int8')
+    digital_channel_names = ['DI0', 'DI1', 'DI2', 'DI3']
+
+    # ini
+    timestamps = {}
+    for ch in digital_channel_names:
+        timestamps[ch] = []
+
+    # chunked loop
+    n_chunks = df.shape[0] // chunk_size
+    for i in range(n_chunks):
+        vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
+        data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
+
+        for j, name in enumerate(digital_channel_names):
+            ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i)
+            timestamps[name].append(ix / fs)
+
+    for ch in digital_channel_names:
+        timestamps[ch] = np.concatenate(timestamps[ch])
+
+    if has_analog_group:
+        # frameclock data is recorded on an analog channel
+        for channel in analog_group.channels():
+            signal = (channel.data > 2.5).astype('int32')  # assumes 0-5V
+            timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
+
+    if save_path is not None:
+        _logger.info(f'saving extracted timestamps to: {save_path}')
+        with open(save_path, 'wb') as fH:
+            pickle.dump(timestamps, fH)
+
     return timestamps
 
 
@@ -63,7 +128,7 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask):
 
     def __init__(self, session_path, one, **kwargs):
         super().__init__(session_path, one=one, **kwargs)
-        self.photometry_collection = kwargs['collection']  # raw_photometry_data
+        self.photometry_collection = kwargs.get('collection', 'raw_photometry_data')  # raw_photometry_data
         self.kwargs = kwargs
 
         # we will work with the first protocol here
@@ -113,13 +178,13 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li
         timestamps_nph = self._get_neurophotometrics_timestamps()
 
         # verify presence of sync timestamps
-        for source, timestamps in zip(['bpod','neurophotometrics'], [timestamps_bpod, timestamps_nph]):
+        for source, timestamps in zip(['bpod', 'neurophotometrics'], [timestamps_bpod, timestamps_nph]):
             assert len(timestamps) > 0, f'{source} sync timestamps are empty'
 
         # split into segments if multiple spacers are found
         # attempt to sync for each segment (only one will work)
         spacer = Spacer()
-        
+
         # the fast way
         match spacer_detection_mode:
             case 'fast':
@@ -130,9 +195,9 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li
         # the indices that mark the boundaries of segments
         segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]])
         segments = []
-        for i in range(segment_ix.shape[0]-1):
+        for i in range(segment_ix.shape[0] - 1):
             start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1
-            stop_ix = segment_ix[i+1]
+            stop_ix = segment_ix[i + 1]
             segments.append(timestamps_nph[start_ix:stop_ix])
 
         for i, timestamps_segment in enumerate(segments):
@@ -153,7 +218,7 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li
                 continue
             # TODO the framerate here is hardcoded, infer it instead!
             assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
-        
+
         valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
 
         return sync_nph_to_bpod_fcn, valid_bounds
@@ -238,10 +303,11 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
     priority = 90
     job_size = 'small'
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, load_timestamps: bool = False, **kwargs):
         super().__init__(*args, **kwargs)
-        self.sync_kwargs = kwargs['sync_metadata']
-        self.sync_channel = kwargs['sync_channel']
+        self.sync_kwargs = kwargs.get('sync_metadata', self.session_params['sync'])
+        self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel'])
+        self.load_timestamps = load_timestamps
 
     @property
     def signature(self):
@@ -250,7 +316,7 @@ def signature(self):
                 ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True),
                 ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
                 ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True),
-                ('_mcc_DAQdata.raw.tdms', self.sync_kwargs['collection'], True, True),
+                ('_mcc_DAQdata.raw.tdms', self.photometry_collection, True, True),
             ],
             'output_files': [
                 ('photometry.signal.pqt', 'alf/photometry', True),
@@ -266,12 +332,19 @@ def load_data(self) -> pd.DataFrame:
         raw_df = super().load_data()
 
         # get daqami timestamps
-        tdms_filepath = self.session_path / self.sync_kwargs['collection'] / '_mcc_DAQdata.raw.tdms'
-        self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath)
+        # attempt to load
+        timestamps_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.pkl'
+        if self.load_timestamps and timestamps_filepath.exists():
+            with open(timestamps_filepath, 'rb') as fH:
+                self.timestamps = pickle.load(fH)
+        else:  # extract timestamps:
+            tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms'
+            self.timestamps = extract_timestamps_from_tdms_file_fast(tdms_filepath, save_path=timestamps_filepath)
+
         # downward compatibility - frameclock moved around, now is back on the AI7
-        if self.sync_kwargs['frameclock_channel'] in ['0',0]:
+        if self.sync_kwargs['frameclock_channel'] in ['0', 0]:
             sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
-        elif self.sync_kwargs['frameclock_channel'] in ['7',7]:
+        elif self.sync_kwargs['frameclock_channel'] in ['7', 7]:
             sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}'
         else:
             sync_channel_name = self.sync_kwargs['frameclock_channel']
@@ -279,19 +352,38 @@ def load_data(self) -> pd.DataFrame:
 
         # compare number of frame timestamps
         # and put them in the raw_df SystemTimestamp column
+        # based on the different scenarios
+
+        # they are the same, all is well
         if raw_df.shape[0] == frame_timestamps.shape[0]:
             raw_df['SystemTimestamp'] = frame_timestamps
-        elif raw_df.shape[0] == frame_timestamps.shape[0] + 1:
-            # there is one extra frame timestamp from the last incomplete frame
+            _logger.debug(f'timestamps are of equal size {raw_df.shape[0]}')
+
+        # there is one more timestamp recorded by the daq
+        # (probably bonsai drops the last incomplete frame)
+        elif raw_df.shape[0] + 1 == frame_timestamps.shape[0]:
             raw_df['SystemTimestamp'] = frame_timestamps[:-1]
+            _logger.debug('one more timestamp in daq than frames by bonsai')
+
+        # there is one more frame by bonsai that doesn't have
+        # a timestamp (strange case)
+        elif raw_df.shape[0] == frame_timestamps.shape[0] + 1:
+            raw_df = raw_df.iloc[:-1]  # dropping the last frame
+            raw_df['SystemTimestamp'] = frame_timestamps
+            _logger.debug('one frame in bonsai than timestamps recorded by daq')
+
+        # there are many more frames recorded by bonsai than
+        # timestamps recorded by daqami
         elif raw_df.shape[0] > frame_timestamps.shape[0]:
             # the daqami was stopped / closed before bonsai
             # we discard all frames that can not be mapped
             _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
             raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
 
+        # there are more timestamps recorded by daqami than
+        # frames recorded by bonsai
         elif raw_df.shape[0] + 1 < frame_timestamps.shape[0]:
-            # this should not be possible
+            # this should not be possible / indicates a serious issue / bonsai crash')
             raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
         return raw_df
 

From 0031a7b01c2887558830e4665f37eef268da4323 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 8 Aug 2025 16:48:28 +0200
Subject: [PATCH 48/80] flake8

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 2566a786f..6afb13f20 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -96,7 +96,7 @@ def extract_timestamps_from_tdms_file_fast(tdms_filepath: Path, save_path: Optio
     # chunked loop
     n_chunks = df.shape[0] // chunk_size
     for i in range(n_chunks):
-        vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
+        vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
         data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
 
         for j, name in enumerate(digital_channel_names):

From 6ed2ba93089179391964c7be7fe2fa1863aad990 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Fri, 8 Aug 2025 16:57:08 +0100
Subject: [PATCH 49/80] bugfix for extractor failure when too few timestamps
 are between two spacers (wrong segment, probably by session restart)

---
 ibllib/pipes/neurophotometrics.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 6afb13f20..90fec5637 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -202,9 +202,14 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li
 
         for i, timestamps_segment in enumerate(segments):
             # sync the behaviour events to the photometry timestamps
-            sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-                timestamps_segment, timestamps_bpod, return_indices=True, linear=True
-            )
+            try:
+                sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
+                    timestamps_segment, timestamps_bpod, return_indices=True, linear=True
+                )
+            except ValueError:
+                # this gets raised when there are no timestamps (multiple session restart)
+                continue
+
             # then we check the alignment, should be less than the camera sampling rate
             tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod]
             _logger.info(

From 1f8a4cfa48d88d83985d6237e60b5af40aa49530 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Fri, 8 Aug 2025 17:18:53 +0100
Subject: [PATCH 50/80] by default, attempt to load previously extracted
 timestamps from daq

---
 ibllib/pipes/neurophotometrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 90fec5637..b0dc255f3 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -308,7 +308,7 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
     priority = 90
     job_size = 'small'
 
-    def __init__(self, *args, load_timestamps: bool = False, **kwargs):
+    def __init__(self, *args, load_timestamps: bool = True, **kwargs):
         super().__init__(*args, **kwargs)
         self.sync_kwargs = kwargs.get('sync_metadata', self.session_params['sync'])
         self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel'])

From 17688a7d513ec7fa3cdc631188e07394fb54dff0 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 11 Aug 2025 11:51:30 +0200
Subject: [PATCH 51/80] added documentation and cleanups

---
 ibllib/pipes/neurophotometrics.py | 133 +++++++++++++++++-------------
 1 file changed, 74 insertions(+), 59 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 6afb13f20..6019bae6f 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -17,55 +17,62 @@
 from iblutil.spacer import Spacer
 
 _logger = logging.getLogger('ibllib')
-_logger.setLevel(logging.DEBUG)
 
 
-def extract_timestamps_from_tdms_file(tdms_filepath: Path, save_path: Optional[Path] = None) -> dict:
-    # extractor for tdms files as written by the daqami software, configured
-    # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the
-    # bpod sync signals
-    _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}')
-
-    tdms_file = TdmsFile.read(tdms_filepath)
-    groups = tdms_file.groups()
-    # this unfortunate hack is in here because there are a bunch of sessions where the frameclock is on DI0
-    if len(groups) == 1:
-        has_analog_group = False
-        (digital_group,) = groups
-    if len(groups) == 2:
-        has_analog_group = True
-        analog_group, digital_group = groups
-    fs = digital_group.properties['ScanRate']  # this should be 10kHz
-    df = tdms_file.as_dataframe()
-    col = df.columns[-1]
-    vals = df[col].values.astype('int32')
-    columns = ['DI0', 'DI1', 'DI2', 'DI3']
-
-    # ugly but basically just a binary decoder for the binary data
-    # assumes 4 channels
-    data = np.array([list(bin(v)[2:].zfill(4)[::-1]) for v in vals], dtype='int32')
-    timestamps = {}
-    for i, name in enumerate(columns):
-        timestamps[name] = np.where(np.diff(data[:, i]) == 1)[0] / fs
-
-    if has_analog_group:
-        # frameclock data is recorded on an analog channel
-        for channel in analog_group.channels():
-            signal = (channel.data > 2.5).astype('int32')  # assumes 0-5V
-            timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
-
-    if save_path is not None:
-        _logger.info(f'saving extracted timestamps to: {save_path}')
-        with open(save_path, 'wb') as fH:
-            pickle.dump(timestamps, fH)
-
-    return timestamps
-
-
-def extract_timestamps_from_tdms_file_fast(tdms_filepath: Path, save_path: Optional[Path] = None, chunk_size=10000) -> dict:
-    # extractor for tdms files as written by the daqami software, configured
-    # for neurophotometrics experiments: Frameclock is in AI7, DI1-4 are the
-    # bpod sync signals
+def _int2digital_channels(values: np.ndarray) -> np.ndarray:
+    """decoder for the digital channel values from the tdms file into a channel
+    based array (rows are temporal samples, columns are channels).
+
+    essentially does:
+
+    0 -> 0000
+    1 -> 1000
+    2 -> 0100
+    3 -> 1100
+    4 -> 0010
+    5 -> 1010
+    6 -> 0110
+    ...
+
+    the order from binary representation is reversed so
+    columns index represents channel index
+
+    Parameters
+    ----------
+    values : np.ndarray
+        the input values from the tdms digital channel
+
+    Returns
+    -------
+    np.ndarray
+        a (n x 4) array
+    """
+    return np.array([list(f'{v:04b}'[::-1]) for v in values], dtype='int8')
+
+
+def extract_timestamps_from_tdms_file(
+    tdms_filepath: Path,
+    save_path: Optional[Path] = None,
+    chunk_size=10000,
+) -> dict:
+    """extractor for tdms files as written by the daqami software, configured for neurophotometrics
+    experiments: Frameclock is in AI7, DI1-4 are the bpod sync signals
+
+    Parameters
+    ----------
+    tdms_filepath : Path
+        path to TDMS file
+    save_path : Optional[Path], optional
+        if a path, save extracted timestamps from tdms file to this location, by default None
+    chunk_size : int, optional
+        if not None, process tdms data in chunks for decreased memory usage, by default 10000
+
+    Returns
+    -------
+    dict
+        a dict with the tdms channel names as keys and the timestamps of the rising fronts
+    """
+    #
     _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}')
 
     # this should be 10kHz
@@ -93,19 +100,26 @@ def extract_timestamps_from_tdms_file_fast(tdms_filepath: Path, save_path: Optio
     for ch in digital_channel_names:
         timestamps[ch] = []
 
-    # chunked loop
-    n_chunks = df.shape[0] // chunk_size
-    for i in range(n_chunks):
-        vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
-        data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
-
+    # chunked loop for memory efficiency
+    if chunk_size is not None:
+        n_chunks = df.shape[0] // chunk_size
+        for i in range(n_chunks):
+            vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
+            # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
+            data = _int2digital_channels(vals_)
+
+            for j, name in enumerate(digital_channel_names):
+                ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i)
+                timestamps[name].append(ix / fs)
+
+        for ch in digital_channel_names:
+            timestamps[ch] = np.concatenate(timestamps[ch])
+    else:
+        data = _int2digital_channels(vals)
         for j, name in enumerate(digital_channel_names):
-            ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i)
+            ix = np.where(np.diff(data[:, j]) == 1)[0]
             timestamps[name].append(ix / fs)
 
-    for ch in digital_channel_names:
-        timestamps[ch] = np.concatenate(timestamps[ch])
-
     if has_analog_group:
         # frameclock data is recorded on an analog channel
         for channel in analog_group.channels():
@@ -214,8 +228,9 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li
             )
             if len(ix_nph) / len(timestamps_bpod) < 0.95:
                 # wrong segment
-                print('wrong segment')
+                _logger.info(f'segment {i} - wrong')
                 continue
+            _logger.info(f'segment {i} - matched')
             # TODO the framerate here is hardcoded, infer it instead!
             assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
 
@@ -339,7 +354,7 @@ def load_data(self) -> pd.DataFrame:
                 self.timestamps = pickle.load(fH)
         else:  # extract timestamps:
             tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms'
-            self.timestamps = extract_timestamps_from_tdms_file_fast(tdms_filepath, save_path=timestamps_filepath)
+            self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath)
 
         # downward compatibility - frameclock moved around, now is back on the AI7
         if self.sync_kwargs['frameclock_channel'] in ['0', 0]:

From d84473330d5d08b77085cd717d7645b15d7df6f9 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Mon, 11 Aug 2025 13:05:48 +0100
Subject: [PATCH 52/80] crucial bugfix of bpod start time timestamp shift
 regression that reappeared

---
 ibllib/pipes/neurophotometrics.py | 54 +++++++++++++++++--------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 0bcc9b018..e6fd1ab61 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -168,7 +168,7 @@ def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]:
             timestamps_bpod.append(
                 np.array(
                     [
-                        data['States timestamps'][sync_name][0][0] + data['Trial start timestamp']
+                        data['States timestamps'][sync_name][0][0] + data['Trial start timestamp'] - data['Bpod start timestamp']
                         for data in bpod_data
                         if sync_name in data['States timestamps']
                     ]
@@ -373,38 +373,44 @@ def load_data(self) -> pd.DataFrame:
         # compare number of frame timestamps
         # and put them in the raw_df SystemTimestamp column
         # based on the different scenarios
+        frame_times_adjusted = False # for debugging reasons
 
         # they are the same, all is well
         if raw_df.shape[0] == frame_timestamps.shape[0]:
             raw_df['SystemTimestamp'] = frame_timestamps
-            _logger.debug(f'timestamps are of equal size {raw_df.shape[0]}')
-
-        # there is one more timestamp recorded by the daq
-        # (probably bonsai drops the last incomplete frame)
-        elif raw_df.shape[0] + 1 == frame_timestamps.shape[0]:
-            raw_df['SystemTimestamp'] = frame_timestamps[:-1]
-            _logger.debug('one more timestamp in daq than frames by bonsai')
-
-        # there is one more frame by bonsai that doesn't have
-        # a timestamp (strange case)
-        elif raw_df.shape[0] == frame_timestamps.shape[0] + 1:
-            raw_df = raw_df.iloc[:-1]  # dropping the last frame
-            raw_df['SystemTimestamp'] = frame_timestamps
-            _logger.debug('one frame in bonsai than timestamps recorded by daq')
+            _logger.info(f'timestamps are of equal size {raw_df.shape[0]}')
+            frame_times_adjusted = True
 
-        # there are many more frames recorded by bonsai than
-        # timestamps recorded by daqami
+        # there are more timestamps recorded by DAQ than 
+        # frames recorded by bonsai
+        elif raw_df.shape[0] < frame_timestamps.shape[0]:
+            _logger.info(f'# bonsai frames: {raw_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}')
+            # there is exactly one more timestamp recorded by the daq
+            # (probably bonsai drops the last incomplete frame)
+            if raw_df.shape[0] == frame_timestamps.shape[0] - 1:
+                raw_df['SystemTimestamp'] = frame_timestamps[:-1]
+            # there are two more frames recorded by the DAQ than by
+            # bonsai - this is observed. TODO understand when this happens
+            elif raw_df.shape[0] == frame_timestamps.shape[0] - 2:
+                raw_df['SystemTimestamp'] = frame_timestamps[:-2]
+            # there are more frames recorded by the DAQ than that
+            # this indicates and issue - 
+            elif raw_df.shape[0] < frame_timestamps.shape[0] - 2:
+                raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
+            frame_times_adjusted = True
+        
+        # there are more frames recorded by bonsai than by the DAQ
+        # this happens when the user stops the daqami recording before stopping the bonsai
+        # or when daqami crashes
         elif raw_df.shape[0] > frame_timestamps.shape[0]:
-            # the daqami was stopped / closed before bonsai
-            # we discard all frames that can not be mapped
+            # we drop all excess frames
             _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
             raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
+            frame_times_adjusted = True
+
+        if not frame_times_adjusted:
+            raise ValueError('timestamp issue that hasnt been caught')
 
-        # there are more timestamps recorded by daqami than
-        # frames recorded by bonsai
-        elif raw_df.shape[0] + 1 < frame_timestamps.shape[0]:
-            # this should not be possible / indicates a serious issue / bonsai crash')
-            raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
         return raw_df
 
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:

From 3c9a2e5f477b03aacdbfa576f4a093819eb1d666 Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Tue, 12 Aug 2025 09:01:46 +0100
Subject: [PATCH 53/80] added segment checking and validation, spacer detection
 assertion

---
 ibllib/pipes/neurophotometrics.py | 35 ++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index e6fd1ab61..1cf0d978a 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -206,6 +206,9 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li
             case 'safe':
                 spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000)
 
+        # verify spacer detection
+        assert spacer_ix.shape[0] > 0, 'spacer detection failed'
+
         # the indices that mark the boundaries of segments
         segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]])
         segments = []
@@ -214,33 +217,41 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li
             stop_ix = segment_ix[i + 1]
             segments.append(timestamps_nph[start_ix:stop_ix])
 
-        for i, timestamps_segment in enumerate(segments):
-            # sync the behaviour events to the photometry timestamps
+        def check_segment(timestamps_segment):
+            # check a segment for matching sync
             try:
                 sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
                     timestamps_segment, timestamps_bpod, return_indices=True, linear=True
                 )
             except ValueError:
                 # this gets raised when there are no timestamps (multiple session restart)
-                continue
+                return False
 
             # then we check the alignment, should be less than the camera sampling rate
             tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod]
-            _logger.info(
-                f'sync: n trials {len(bpod_data)}'
-                f'n bpod sync {len(timestamps_bpod)}'
-                f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}'
-            )
+            # _logger.info(
+            #     f'sync: n trials {len(bpod_data)}'
+            #     f'n bpod sync {len(timestamps_bpod)}'
+            #     f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}'
+            # )
             if len(ix_nph) / len(timestamps_bpod) < 0.95:
                 # wrong segment
-                _logger.info(f'segment {i} - wrong')
-                continue
-            _logger.info(f'segment {i} - matched')
+                # _logger.info(f'segment {i} - wrong')
+                return False
+            # _logger.info(f'segment {i} - matched')
             # TODO the framerate here is hardcoded, infer it instead!
             assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
+            return True
+        
+        checked_segments = [check_segment(segment) for segment in segments]
+        assert np.sum(checked_segments) == 1, 'multiple or none segments matched'
+        timestamps_segment = segments[np.where(checked_segments)[0][0]]
 
-        valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
+        sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
+                    timestamps_segment, timestamps_bpod, return_indices=True, linear=True
+                )
 
+        valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
         return sync_nph_to_bpod_fcn, valid_bounds
 
     def load_data(self) -> pd.DataFrame:

From 2d3a54c4972d1af298f8b13b113365b6e5efe5d1 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Tue, 12 Aug 2025 10:10:22 +0200
Subject: [PATCH 54/80] flake8/autopep8

---
 ibllib/pipes/neurophotometrics.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 1cf0d978a..2a166d69a 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -104,7 +104,7 @@ def extract_timestamps_from_tdms_file(
     if chunk_size is not None:
         n_chunks = df.shape[0] // chunk_size
         for i in range(n_chunks):
-            vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
+            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
             # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
             data = _int2digital_channels(vals_)
 
@@ -242,14 +242,14 @@ def check_segment(timestamps_segment):
             # TODO the framerate here is hardcoded, infer it instead!
             assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
             return True
-        
+
         checked_segments = [check_segment(segment) for segment in segments]
         assert np.sum(checked_segments) == 1, 'multiple or none segments matched'
         timestamps_segment = segments[np.where(checked_segments)[0][0]]
 
         sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-                    timestamps_segment, timestamps_bpod, return_indices=True, linear=True
-                )
+            timestamps_segment, timestamps_bpod, return_indices=True, linear=True
+        )
 
         valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
         return sync_nph_to_bpod_fcn, valid_bounds
@@ -384,7 +384,7 @@ def load_data(self) -> pd.DataFrame:
         # compare number of frame timestamps
         # and put them in the raw_df SystemTimestamp column
         # based on the different scenarios
-        frame_times_adjusted = False # for debugging reasons
+        frame_times_adjusted = False  # for debugging reasons
 
         # they are the same, all is well
         if raw_df.shape[0] == frame_timestamps.shape[0]:
@@ -392,7 +392,7 @@ def load_data(self) -> pd.DataFrame:
             _logger.info(f'timestamps are of equal size {raw_df.shape[0]}')
             frame_times_adjusted = True
 
-        # there are more timestamps recorded by DAQ than 
+        # there are more timestamps recorded by DAQ than
         # frames recorded by bonsai
         elif raw_df.shape[0] < frame_timestamps.shape[0]:
             _logger.info(f'# bonsai frames: {raw_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}')
@@ -405,11 +405,11 @@ def load_data(self) -> pd.DataFrame:
             elif raw_df.shape[0] == frame_timestamps.shape[0] - 2:
                 raw_df['SystemTimestamp'] = frame_timestamps[:-2]
             # there are more frames recorded by the DAQ than that
-            # this indicates and issue - 
+            # this indicates and issue -
             elif raw_df.shape[0] < frame_timestamps.shape[0] - 2:
                 raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
             frame_times_adjusted = True
-        
+
         # there are more frames recorded by bonsai than by the DAQ
         # this happens when the user stops the daqami recording before stopping the bonsai
         # or when daqami crashes

From dc98dd863b8a61f256bbcd1b6d37915f4cf04dbb Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Wed, 13 Aug 2025 09:45:20 +0100
Subject: [PATCH 55/80] simplification of synchronization scheme, spacer
 detection obsolete

---
 ibllib/pipes/neurophotometrics.py | 152 +++++++++++++++++++-----------
 1 file changed, 95 insertions(+), 57 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 2a166d69a..32fceb6da 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -185,7 +185,93 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # for daq based syncing, the timestamps are extracted from the tdms file
         ...
 
-    def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, list]:
+    # def _get_sync_function(self, spacer_detection_mode='fallback') -> Tuple[callable, list]:
+    #     # returns the synchronization function
+    #     # get the timestamps
+    #     timestamps_bpod, bpod_data = self._get_bpod_timestamps()
+    #     timestamps_nph = self._get_neurophotometrics_timestamps()
+
+    #     # verify presence of sync timestamps
+    #     for source, timestamps in zip(['bpod', 'neurophotometrics'], [timestamps_bpod, timestamps_nph]):
+    #         assert len(timestamps) > 0, f'{source} sync timestamps are empty'
+
+    #     # split into segments if multiple spacers are found
+    #     # attempt to sync for each segment (only one will work)
+    #     spacer = Spacer()
+
+    #     def _get_segments(timestamps_nph, spacer_detection_mode):
+    #         segments = []
+
+    #         match spacer_detection_mode:
+    #             case 'fast':
+    #                 spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5)
+                        
+    #             case 'safe':
+    #                 spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000)
+    #                 spacer_ix = np.searchsorted(timestamps_nph, spacer_times)
+
+    #             case 'fallback': # first try fast, if fails, try safe
+    #                 segments = _get_segments(timestamps_nph, 'fast')
+    #                 if len(segments) > 0:
+    #                     return segments
+    #                 else:
+    #                     segments = _get_segments(timestamps_nph, 'safe')
+    #                     if len(segments) > 0:
+    #                         return segments
+    #                     else:
+    #                         raise ValueError('spacer detection failed')
+
+    #         # the indices that mark the boundaries of segments
+    #         segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]])
+    #         for i in range(segment_ix.shape[0] - 1):
+    #             start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1
+    #             stop_ix = segment_ix[i + 1]
+    #             segments.append(timestamps_nph[start_ix:stop_ix])
+
+    #         return segments
+
+    #     # verify spacer detection
+    #     segments = _get_segments(timestamps_nph, spacer_detection_mode=spacer_detection_mode)
+    #     assert len(segments) > 0, 'spacer detection failed'
+
+    #     def check_segment(timestamps_segment, matching_threshold = .95):
+    #         # check a segment for matching sync
+    #         try:
+    #             sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
+    #                 timestamps_segment, timestamps_bpod, return_indices=True, linear=True
+    #             )
+    #         except ValueError:
+    #             # this gets raised when there are no timestamps (multiple session restart)
+    #             return False
+
+    #         # then we check the alignment, should be less than the camera sampling rate
+    #         tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod]
+    #         # _logger.info(
+    #         #     f'sync: n trials {len(bpod_data)}'
+    #         #     f'n bpod sync {len(timestamps_bpod)}'
+    #         #     f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}'
+    #         # )
+    #         if len(ix_nph) / len(timestamps_bpod) < matching_threshold:
+    #             # wrong segment
+    #             return False
+    #         # _logger.info(f'segment {i} - matched')
+    #         # TODO the framerate here is hardcoded, infer it instead!
+    #         assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
+    #         return True
+
+    #     checked_segments = [check_segment(segment) for segment in segments]
+
+    #     assert np.sum(checked_segments) == 1, f'error in segment matching: matching segments: {np.sum(checked_segments)}'
+    #     timestamps_segment = segments[np.where(checked_segments)[0][0]]
+
+    #     sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
+    #         timestamps_segment, timestamps_bpod, return_indices=True, linear=True
+    #     )
+
+    #     valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
+    #     return sync_nph_to_bpod_fcn, valid_bounds
+
+    def _get_sync_function(self) -> Tuple[callable, list]:
         # returns the synchronization function
         # get the timestamps
         timestamps_bpod, bpod_data = self._get_bpod_timestamps()
@@ -195,61 +281,10 @@ def _get_sync_function(self, spacer_detection_mode='fast') -> Tuple[callable, li
         for source, timestamps in zip(['bpod', 'neurophotometrics'], [timestamps_bpod, timestamps_nph]):
             assert len(timestamps) > 0, f'{source} sync timestamps are empty'
 
-        # split into segments if multiple spacers are found
-        # attempt to sync for each segment (only one will work)
-        spacer = Spacer()
-
-        # the fast way
-        match spacer_detection_mode:
-            case 'fast':
-                spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5)
-            case 'safe':
-                spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000)
-
-        # verify spacer detection
-        assert spacer_ix.shape[0] > 0, 'spacer detection failed'
-
-        # the indices that mark the boundaries of segments
-        segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]])
-        segments = []
-        for i in range(segment_ix.shape[0] - 1):
-            start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1
-            stop_ix = segment_ix[i + 1]
-            segments.append(timestamps_nph[start_ix:stop_ix])
-
-        def check_segment(timestamps_segment):
-            # check a segment for matching sync
-            try:
-                sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-                    timestamps_segment, timestamps_bpod, return_indices=True, linear=True
-                )
-            except ValueError:
-                # this gets raised when there are no timestamps (multiple session restart)
-                return False
-
-            # then we check the alignment, should be less than the camera sampling rate
-            tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod]
-            # _logger.info(
-            #     f'sync: n trials {len(bpod_data)}'
-            #     f'n bpod sync {len(timestamps_bpod)}'
-            #     f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}'
-            # )
-            if len(ix_nph) / len(timestamps_bpod) < 0.95:
-                # wrong segment
-                # _logger.info(f'segment {i} - wrong')
-                return False
-            # _logger.info(f'segment {i} - matched')
-            # TODO the framerate here is hardcoded, infer it instead!
-            assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
-            return True
-
-        checked_segments = [check_segment(segment) for segment in segments]
-        assert np.sum(checked_segments) == 1, 'multiple or none segments matched'
-        timestamps_segment = segments[np.where(checked_segments)[0][0]]
-
         sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-            timestamps_segment, timestamps_bpod, return_indices=True, linear=True
+            timestamps_nph, timestamps_bpod, return_indices=True, linear=True
         )
+        _logger.info(f"synced with drift: {drift_ppm}")
 
         valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
         return sync_nph_to_bpod_fcn, valid_bounds
@@ -270,8 +305,9 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         raw_df = self.load_data()
 
         # 2) get the synchronization function
-        spacer_detection_mode = kwargs.get('spacer_detection_mode', 'fast')
-        sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function(spacer_detection_mode=spacer_detection_mode)
+        # spacer_detection_mode = kwargs.get('spacer_detection_mode', 'fallback')
+        # sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function(spacer_detection_mode=spacer_detection_mode)
+        sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function()
 
         # 3) convert to ibl_df
         ibl_df = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df, rois=self.kwargs['fibers'], drop_first=False)
@@ -416,7 +452,9 @@ def load_data(self) -> pd.DataFrame:
         elif raw_df.shape[0] > frame_timestamps.shape[0]:
             # we drop all excess frames
             _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
-            raw_df = raw_df.iloc[: frame_timestamps.shape[0]]
+            n_frames_daqami = frame_timestamps.shape[0]
+            raw_df = raw_df.iloc[:n_frames_daqami]
+            raw_df.loc[:, 'SystemTimestamp'] = frame_timestamps
             frame_times_adjusted = True
 
         if not frame_times_adjusted:

From 5be5de3a94739e90ea0533c7216260765db6976b Mon Sep 17 00:00:00 2001
From: grg2rsr <grg2rsr@gmail.com>
Date: Tue, 2 Sep 2025 11:21:36 +0100
Subject: [PATCH 56/80] parede hotfix for extracting passive sessions in
 photometry

---
 ibllib/pipes/dynamic_pipeline.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index c8e6ea119..5ba935e67 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -257,6 +257,9 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non
                 # This may happen that the extractor is tied to a specific sync task: look for TrialsChoiceWorldBpod for example
                 elif hasattr(btasks, extractor + sync_label.capitalize()):
                     task = getattr(btasks, extractor + sync_label.capitalize())
+                # Passive sessions can be run in behavior boxes
+                elif 'passiveChoiceWorld' in protocol:
+                        registration_class = btasks.PassiveRegisterRaw
                 else:
                     # lookup in the project extraction repo if we find an extractor class
                     import projects.extraction_tasks

From 26c4338c4ff91a9d90b2571ffc2c86f5837dea52 Mon Sep 17 00:00:00 2001
From: KceniaB <kcenia.bougrova@research.fchampalimaud.org>
Date: Thu, 11 Sep 2025 11:18:02 +0100
Subject: [PATCH 57/80] downward compatibility for kcneias extraction

---
 ibllib/pipes/neurophotometrics.py | 111 +++++-------------------------
 1 file changed, 17 insertions(+), 94 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 32fceb6da..1f9fa9357 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -140,16 +140,22 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask):
     priority = 90
     job_size = 'small'
 
-    def __init__(self, session_path, one, **kwargs):
+    def __init__(self, session_path, one, task_protocol=None, task_collection=None, **kwargs):
         super().__init__(session_path, one=one, **kwargs)
         self.photometry_collection = kwargs.get('collection', 'raw_photometry_data')  # raw_photometry_data
         self.kwargs = kwargs
+        self.task_protocol = task_protocol
+        self.task_collection = task_collection
 
-        # we will work with the first protocol here
-        for task in self.session_params['tasks']:
-            self.task_protocol = next(k for k in task)
+        if self.task_protocol is None:
+            # we will work with the first protocol here
+            for task in self.session_params['tasks']:
+                self.task_protocol = next(k for k in task)
+                break
+
+        if self.task_collection is None:
+            # if not provided, infer
             self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol)
-            break
 
     def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]:
         # the timestamps for syncing, in the time of the bpod
@@ -185,92 +191,6 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # for daq based syncing, the timestamps are extracted from the tdms file
         ...
 
-    # def _get_sync_function(self, spacer_detection_mode='fallback') -> Tuple[callable, list]:
-    #     # returns the synchronization function
-    #     # get the timestamps
-    #     timestamps_bpod, bpod_data = self._get_bpod_timestamps()
-    #     timestamps_nph = self._get_neurophotometrics_timestamps()
-
-    #     # verify presence of sync timestamps
-    #     for source, timestamps in zip(['bpod', 'neurophotometrics'], [timestamps_bpod, timestamps_nph]):
-    #         assert len(timestamps) > 0, f'{source} sync timestamps are empty'
-
-    #     # split into segments if multiple spacers are found
-    #     # attempt to sync for each segment (only one will work)
-    #     spacer = Spacer()
-
-    #     def _get_segments(timestamps_nph, spacer_detection_mode):
-    #         segments = []
-
-    #         match spacer_detection_mode:
-    #             case 'fast':
-    #                 spacer_ix = spacer.find_spacers_from_timestamps(timestamps_nph, atol=1e-5)
-                        
-    #             case 'safe':
-    #                 spacer_ix, spacer_times = spacer.find_spacers_from_positive_fronts(timestamps_nph, fs=1000)
-    #                 spacer_ix = np.searchsorted(timestamps_nph, spacer_times)
-
-    #             case 'fallback': # first try fast, if fails, try safe
-    #                 segments = _get_segments(timestamps_nph, 'fast')
-    #                 if len(segments) > 0:
-    #                     return segments
-    #                 else:
-    #                     segments = _get_segments(timestamps_nph, 'safe')
-    #                     if len(segments) > 0:
-    #                         return segments
-    #                     else:
-    #                         raise ValueError('spacer detection failed')
-
-    #         # the indices that mark the boundaries of segments
-    #         segment_ix = np.concatenate([spacer_ix, [timestamps_nph.shape[0]]])
-    #         for i in range(segment_ix.shape[0] - 1):
-    #             start_ix = segment_ix[i] + (spacer.n_pulses * 2) - 1
-    #             stop_ix = segment_ix[i + 1]
-    #             segments.append(timestamps_nph[start_ix:stop_ix])
-
-    #         return segments
-
-    #     # verify spacer detection
-    #     segments = _get_segments(timestamps_nph, spacer_detection_mode=spacer_detection_mode)
-    #     assert len(segments) > 0, 'spacer detection failed'
-
-    #     def check_segment(timestamps_segment, matching_threshold = .95):
-    #         # check a segment for matching sync
-    #         try:
-    #             sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-    #                 timestamps_segment, timestamps_bpod, return_indices=True, linear=True
-    #             )
-    #         except ValueError:
-    #             # this gets raised when there are no timestamps (multiple session restart)
-    #             return False
-
-    #         # then we check the alignment, should be less than the camera sampling rate
-    #         tcheck = sync_nph_to_bpod_fcn(timestamps_segment[ix_nph]) - timestamps_bpod[ix_bpod]
-    #         # _logger.info(
-    #         #     f'sync: n trials {len(bpod_data)}'
-    #         #     f'n bpod sync {len(timestamps_bpod)}'
-    #         #     f'n photometry {len(timestamps_segment)}, n match {len(ix_nph)}'
-    #         # )
-    #         if len(ix_nph) / len(timestamps_bpod) < matching_threshold:
-    #             # wrong segment
-    #             return False
-    #         # _logger.info(f'segment {i} - matched')
-    #         # TODO the framerate here is hardcoded, infer it instead!
-    #         assert np.all(np.abs(tcheck) < 1 / 60), 'Sync issue detected, residual above 1/60s'
-    #         return True
-
-    #     checked_segments = [check_segment(segment) for segment in segments]
-
-    #     assert np.sum(checked_segments) == 1, f'error in segment matching: matching segments: {np.sum(checked_segments)}'
-    #     timestamps_segment = segments[np.where(checked_segments)[0][0]]
-
-    #     sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-    #         timestamps_segment, timestamps_bpod, return_indices=True, linear=True
-    #     )
-
-    #     valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
-    #     return sync_nph_to_bpod_fcn, valid_bounds
-
     def _get_sync_function(self) -> Tuple[callable, list]:
         # returns the synchronization function
         # get the timestamps
@@ -285,7 +205,8 @@ def _get_sync_function(self) -> Tuple[callable, list]:
             timestamps_nph, timestamps_bpod, return_indices=True, linear=True
         )
         _logger.info(f"synced with drift: {drift_ppm}")
-
+        # TODO - assertion needed. 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around)
+        
         valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
         return sync_nph_to_bpod_fcn, valid_bounds
 
@@ -344,7 +265,7 @@ def signature(self):
             'input_files': [
                 ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True),
                 ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
-                ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True),
+                # ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True),
                 ('_neurophotometrics_fpData.digitalIntputs.pqt', self.photometry_collection, True),
             ],
             'output_files': [
@@ -357,7 +278,9 @@ def signature(self):
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # for bpod based syncing, the timestamps for syncing are in the digital inputs file
         raw_photometry_folder = self.session_path / self.photometry_collection
-        digital_inputs_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt')
+        digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt'
+        version = fpio.infer_version_from_digital_inputs_file(digital_inputs_filepath)
+        digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, version=version)
         timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']]
 
         # TODO replace this rudimentary spacer removal

From 8e7dcdf837f42eed89bb578700cd60bbaefc5600 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Sat, 13 Sep 2025 09:05:34 +0100
Subject: [PATCH 58/80] alongside iblphotometry major overhaul - WIP but
 extracts lauras data, caro left to check

---
 alyx_task.pkl                     | Bin 0 -> 2347 bytes
 ibllib/pipes/neurophotometrics.py | 130 ++++++++++++++++--------------
 2 files changed, 70 insertions(+), 60 deletions(-)
 create mode 100644 alyx_task.pkl

diff --git a/alyx_task.pkl b/alyx_task.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..4b328c69c715e72448be87278c916882074ddd60
GIT binary patch
literal 2347
zcmcgu-D=!M6n5gD#-^e4hh7v411+SKm5^pe8fhslCM6*?b<^6E(y)k;G`l;sR#G%0
zyL%Dn1I*?0eR|W^=tJ}gdPa_8L*=wiAcDgzBh7cd`S$$%w)H#O-58!{_A_%^mezbq
zilm?ki@Ye!Q;!x&?xhiCUR(x@U=oUwQET36c_(VE><oS@p_FWDILv8K<~+w<LdwLW
z^P=!L6M-kfn1vJ*f@x~aJ0;gV>qOfqshe&MJ1Z`0QD+s;Gn^g$V9k!wT=&YFJNKJa
zTL~?!*{b;pmJc7uyc3U?jc!&#ck8eBzdu^n1$^3evgxE=+k>CYu57Ym$-9NH2is=5
z5~rfF2ODN{(e}eyxb>FV?D)zaY?-@Su0+=KI#Z%h_RzdLY>0}tN|fiD55M~R&%gcv
z!OWicRTRDEc?E<bnOBv}{Z_U@`L*b~roCV%3*}$OcYD$Cb_u*6!@&v!ux7Jr7WSua
zcQ!U~fFmzRypW(Um|{E)f=NoTGxX^2!FR|#Pc&C2%5T?GOy~?9Y2ImBFVHVdj=m@=
zt`xfWO62Z+xp!j%Y=*kNuATpLpXgL{E-(=BsUKbgNZ>|f>_!lTGjx9`ijx7cZhMX9
zvJz?>C<bZbQE6z1XK25wki5h|^QN!M=?lV6$7^sY;L<UIm=cg&=)Og#yy^v#YKo4#
zUR(o|08lap^qK@yz)i<+Z$dDf5Xzig0s40&L!eO?XrG|GsTG%K4%s8%+Hp#0OegT7
z<WG^y9LV2Qd+{b3ktCeZfH`}E9$A`9!2Zi4cA=-Bric=qNxej6C+B*KJP70f2MMEb
zN+1!Vbo5>cp%buI_LBNRg2OP5lQ<>`N!f@OJP9H;iLQ!wSqowa%x_-51fb%1>Ak6c
zC*L80S_(PqLg2>dlNs_n4~2e+364pWGMXebAOt~iZt7CG#PwX5x$j2dLB9#ul|SJP
z>+>=K|4X2k0$eJ@2nmvqKq4h6OQTVRph-Wk#nx;O)fl=CbYbBnwC1DBT7MjduGwJb
z2LyUHwq_T4Xuleoy_*tt<}!z}Zl76m`(fagJ5^I~Jq!SwKOXN}rIsIGKr$~3k3&4A
z@stps#Zkmy0N8c8m93f*eD6TP;45j(UeQ#QD9(Z2@Az5vqKU3EH!)cA_HYimdR@C_
ze#_iF2NIeO9{|7Q=Vtuudqb%zfa{5y3kC$Ux&Me+^8qiO_7Y|hxI?Czxjuv8=8{?8
G_x?ZOxz5P|

literal 0
HcmV?d00001

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 1f9fa9357..35fbf3363 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 import numpy as np
 import pandas as pd
-from typing import Tuple, Optional
+from typing import Tuple, Optional, List
 import pickle
 
 import ibldsp.utils
@@ -13,7 +13,7 @@
 from nptdms import TdmsFile
 
 from abc import abstractmethod
-from iblphotometry import io as fpio
+from iblphotometry import fpio
 from iblutil.spacer import Spacer
 
 _logger = logging.getLogger('ibllib')
@@ -104,7 +104,7 @@ def extract_timestamps_from_tdms_file(
     if chunk_size is not None:
         n_chunks = df.shape[0] // chunk_size
         for i in range(n_chunks):
-            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
+            vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
             # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
             data = _int2digital_channels(vals_)
 
@@ -134,6 +134,24 @@ def extract_timestamps_from_tdms_file(
     return timestamps
 
 
+def extract_timestamps_from_bpod_jsonable(file_jsonable: str | Path, sync_states_names: List[str]):
+    _, bpod_data = jsonable.load_task_jsonable(file_jsonable)
+    timestamps = []
+    for sync_name in sync_states_names:
+        timestamps.append(
+            np.array(
+                [
+                    data['States timestamps'][sync_name][0][0] + data['Trial start timestamp'] - data['Bpod start timestamp']
+                    for data in bpod_data
+                    if sync_name in data['States timestamps']
+                ]
+            )
+        )
+    timestamps = np.sort(np.concatenate(timestamps))
+    timestamps = timestamps[~np.isnan(timestamps)]
+    return timestamps
+
+
 class FibrePhotometryBaseSync(base_tasks.DynamicTask):
     # base clas for syncing fibre photometry
     # derived classes are: FibrePhotometryBpodSync and FibrePhotometryDAQSync
@@ -164,25 +182,14 @@ def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]:
         else:
             sync_states_names = ['trial_start', 'reward', 'exit_state']
 
-        # read in the raw behaviour data for syncing
         file_jsonable = self.session_path.joinpath(self.task_collection, '_iblrig_taskData.raw.jsonable')
-        _, bpod_data = jsonable.load_task_jsonable(file_jsonable)
+        timestamps_bpod = extract_timestamps_from_bpod_jsonable(file_jsonable, sync_states_names)
+        return timestamps_bpod
 
-        # we get the timestamps of the states from the bpod data
-        timestamps_bpod = []
-        for sync_name in sync_states_names:
-            timestamps_bpod.append(
-                np.array(
-                    [
-                        data['States timestamps'][sync_name][0][0] + data['Trial start timestamp'] - data['Bpod start timestamp']
-                        for data in bpod_data
-                        if sync_name in data['States timestamps']
-                    ]
-                )
-            )
-        timestamps_bpod = np.sort(np.concatenate(timestamps_bpod))
-        timestamps_bpod = timestamps_bpod[~np.isnan(timestamps_bpod)]
-        return timestamps_bpod, bpod_data
+    def _get_valid_bounds(self):
+        file_jsonable = self.session_path.joinpath(self.task_collection, '_iblrig_taskData.raw.jsonable')
+        _, bpod_data = jsonable.load_task_jsonable(file_jsonable)
+        return [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
 
     @abstractmethod
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:
@@ -194,7 +201,7 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
     def _get_sync_function(self) -> Tuple[callable, list]:
         # returns the synchronization function
         # get the timestamps
-        timestamps_bpod, bpod_data = self._get_bpod_timestamps()
+        timestamps_bpod = self._get_bpod_timestamps()
         timestamps_nph = self._get_neurophotometrics_timestamps()
 
         # verify presence of sync timestamps
@@ -204,17 +211,21 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
             timestamps_nph, timestamps_bpod, return_indices=True, linear=True
         )
-        _logger.info(f"synced with drift: {drift_ppm}")
+        _logger.info(f'synced with drift: {drift_ppm}')
         # TODO - assertion needed. 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around)
-        
-        valid_bounds = [bpod_data[0]['Trial start timestamp'] - 2, bpod_data[-1]['Trial end timestamp'] + 2]
+
+        valid_bounds = self._get_valid_bounds()
         return sync_nph_to_bpod_fcn, valid_bounds
 
     def load_data(self) -> pd.DataFrame:
         # loads the raw photometry data
         raw_photometry_folder = self.session_path / self.photometry_collection
-        raw_neurophotometrics_df = pd.read_parquet(raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt')
-        return raw_neurophotometrics_df
+        photometry_df = fpio.from_neurophotometrics_file_to_photometry_df(
+            raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt',
+            # data_columns=self.kwargs['fibers'],
+            drop_first=False,
+        )
+        return photometry_df
 
     def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # 1) load photometry data
@@ -223,27 +234,24 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # will be overridden with the timestamps from the tdms file
         # the idea behind this is that the rest of the sync is then the same
         # and handled by this base class
-        raw_df = self.load_data()
+        photometry_df = self.load_data()
 
         # 2) get the synchronization function
-        # spacer_detection_mode = kwargs.get('spacer_detection_mode', 'fallback')
-        # sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function(spacer_detection_mode=spacer_detection_mode)
         sync_nph_to_bpod_fcn, valid_bounds = self._get_sync_function()
 
-        # 3) convert to ibl_df
-        ibl_df = fpio.from_raw_neurophotometrics_df_to_ibl_df(raw_df, rois=self.kwargs['fibers'], drop_first=False)
-
         # 3) apply synchronization
-        ibl_df['times'] = sync_nph_to_bpod_fcn(raw_df['SystemTimestamp'])
-        ibl_df['valid'] = np.logical_and(ibl_df['times'] >= valid_bounds[0], ibl_df['times'] <= valid_bounds[1])
+        photometry_df['times'] = sync_nph_to_bpod_fcn(photometry_df['times'])
+        photometry_df['valid'] = np.logical_and(
+            photometry_df['times'] >= valid_bounds[0], photometry_df['times'] <= valid_bounds[1]
+        )
 
         # 4) write to disk
         output_folder = self.session_path.joinpath('alf', 'photometry')
         output_folder.mkdir(parents=True, exist_ok=True)
 
         # writing the synced photometry signal
-        ibl_df_outpath = output_folder / 'photometry.signal.pqt'
-        ibl_df.to_parquet(ibl_df_outpath)
+        photometry_df_outpath = output_folder / 'photometry.signal.pqt'
+        photometry_df.to_parquet(photometry_df_outpath)
 
         # writing the locations
         rois = []
@@ -252,7 +260,7 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         locations_df = pd.DataFrame(rois).set_index('ROI')
         locations_df_outpath = output_folder / 'photometryROI.locations.pqt'
         locations_df.to_parquet(locations_df_outpath)
-        return ibl_df_outpath, locations_df_outpath
+        return photometry_df_outpath, locations_df_outpath
 
 
 class FibrePhotometryBpodSync(FibrePhotometryBaseSync):
@@ -266,7 +274,7 @@ def signature(self):
                 ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True),
                 ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
                 # ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True),
-                ('_neurophotometrics_fpData.digitalIntputs.pqt', self.photometry_collection, True),
+                ('_neurophotometrics_fpData.digitalInputs.pqt', self.photometry_collection, True),
             ],
             'output_files': [
                 ('photometry.signal.pqt', 'alf/photometry', True),
@@ -278,10 +286,10 @@ def signature(self):
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # for bpod based syncing, the timestamps for syncing are in the digital inputs file
         raw_photometry_folder = self.session_path / self.photometry_collection
-        digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalIntputs.pqt'
-        version = fpio.infer_version_from_digital_inputs_file(digital_inputs_filepath)
-        digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, version=version)
-        timestamps_nph = digital_inputs_df['SystemTimestamp'].values[digital_inputs_df['Channel'] == self.kwargs['sync_channel']]
+        digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalInputs.pqt'
+        digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath)
+        # timestamps_nph = digital_inputs_df['times'].values[digital_inputs_df['channel'] == self.kwargs['sync_channel']]
+        timestamps_nph = digital_inputs_df.groupby('channel').get_group(self.kwargs['sync_channel'])['times'].values
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods
@@ -305,7 +313,7 @@ def signature(self):
             'input_files': [
                 ('_neurophotometrics_fpData.raw.pqt', self.photometry_collection, True, True),
                 ('_iblrig_taskData.raw.jsonable', self.task_collection, True, True),
-                ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True),
+                # ('_neurophotometrics_fpData.channels.csv', self.photometry_collection, True, True),
                 ('_mcc_DAQdata.raw.tdms', self.photometry_collection, True, True),
             ],
             'output_files': [
@@ -319,7 +327,7 @@ def load_data(self) -> pd.DataFrame:
         # the point of this functions is to overwrite the SystemTimestamp column
         # in the ibl_df with the values from the DAQ clock
         # then syncing will work the same as for the bpod based syncing
-        raw_df = super().load_data()
+        photometry_df = super().load_data()
 
         # get daqami timestamps
         # attempt to load
@@ -341,49 +349,51 @@ def load_data(self) -> pd.DataFrame:
         frame_timestamps = self.timestamps[sync_channel_name]
 
         # compare number of frame timestamps
-        # and put them in the raw_df SystemTimestamp column
+        # and put them in the photometry_df SystemTimestamp column
         # based on the different scenarios
         frame_times_adjusted = False  # for debugging reasons
 
         # they are the same, all is well
-        if raw_df.shape[0] == frame_timestamps.shape[0]:
-            raw_df['SystemTimestamp'] = frame_timestamps
-            _logger.info(f'timestamps are of equal size {raw_df.shape[0]}')
+        if photometry_df.shape[0] == frame_timestamps.shape[0]:
+            photometry_df['times'] = frame_timestamps
+            _logger.info(f'timestamps are of equal size {photometry_df.shape[0]}')
             frame_times_adjusted = True
 
         # there are more timestamps recorded by DAQ than
         # frames recorded by bonsai
-        elif raw_df.shape[0] < frame_timestamps.shape[0]:
-            _logger.info(f'# bonsai frames: {raw_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}')
+        elif photometry_df.shape[0] < frame_timestamps.shape[0]:
+            _logger.info(f'# bonsai frames: {photometry_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}')
             # there is exactly one more timestamp recorded by the daq
             # (probably bonsai drops the last incomplete frame)
-            if raw_df.shape[0] == frame_timestamps.shape[0] - 1:
-                raw_df['SystemTimestamp'] = frame_timestamps[:-1]
+            if photometry_df.shape[0] == frame_timestamps.shape[0] - 1:
+                photometry_df['times'] = frame_timestamps[:-1]
             # there are two more frames recorded by the DAQ than by
             # bonsai - this is observed. TODO understand when this happens
-            elif raw_df.shape[0] == frame_timestamps.shape[0] - 2:
-                raw_df['SystemTimestamp'] = frame_timestamps[:-2]
+            elif photometry_df.shape[0] == frame_timestamps.shape[0] - 2:
+                photometry_df['times'] = frame_timestamps[:-2]
             # there are more frames recorded by the DAQ than that
             # this indicates and issue -
-            elif raw_df.shape[0] < frame_timestamps.shape[0] - 2:
+            elif photometry_df.shape[0] < frame_timestamps.shape[0] - 2:
                 raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
             frame_times_adjusted = True
 
         # there are more frames recorded by bonsai than by the DAQ
         # this happens when the user stops the daqami recording before stopping the bonsai
         # or when daqami crashes
-        elif raw_df.shape[0] > frame_timestamps.shape[0]:
+        elif photometry_df.shape[0] > frame_timestamps.shape[0]:
             # we drop all excess frames
-            _logger.warning(f'#frames bonsai: {raw_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess')
+            _logger.warning(
+                f'#frames bonsai: {photometry_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess'
+            )
             n_frames_daqami = frame_timestamps.shape[0]
-            raw_df = raw_df.iloc[:n_frames_daqami]
-            raw_df.loc[:, 'SystemTimestamp'] = frame_timestamps
+            photometry_df = photometry_df.iloc[:n_frames_daqami]
+            photometry_df.loc[:, 'SystemTimestamp'] = frame_timestamps
             frame_times_adjusted = True
 
         if not frame_times_adjusted:
             raise ValueError('timestamp issue that hasnt been caught')
 
-        return raw_df
+        return photometry_df
 
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # get the sync channel and the corresponding timestamps

From dc2f540d0da9ecb451f5171b625cad1b9eafdd36 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 19 Sep 2025 16:40:42 +0100
Subject: [PATCH 59/80] photometry extractor updates

---
 ibllib/pipes/neurophotometrics.py | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 35fbf3363..fed940d75 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -16,6 +16,8 @@
 from iblphotometry import fpio
 from iblutil.spacer import Spacer
 
+from one.api import ONE
+
 _logger = logging.getLogger('ibllib')
 
 
@@ -158,7 +160,14 @@ class FibrePhotometryBaseSync(base_tasks.DynamicTask):
     priority = 90
     job_size = 'small'
 
-    def __init__(self, session_path, one, task_protocol=None, task_collection=None, **kwargs):
+    def __init__(
+        self,
+        session_path: str | Path,
+        one: ONE,
+        task_protocol: str | None = None,
+        task_collection: str | None = None,
+        **kwargs,
+    ):
         super().__init__(session_path, one=one, **kwargs)
         self.photometry_collection = kwargs.get('collection', 'raw_photometry_data')  # raw_photometry_data
         self.kwargs = kwargs
@@ -211,8 +220,13 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         sync_nph_to_bpod_fcn, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
             timestamps_nph, timestamps_bpod, return_indices=True, linear=True
         )
-        _logger.info(f'synced with drift: {drift_ppm}')
-        # TODO - assertion needed. 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around)
+        if np.absolute(drift_ppm) > 20:
+            _logger.warning(f'sync with excessive drift: {drift_ppm}')
+        else:
+            _logger.info(f'synced with drift: {drift_ppm}')
+
+        # assertion: 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around)
+        assert timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0], 'less than 95% of bpod timestamps matched'
 
         valid_bounds = self._get_valid_bounds()
         return sync_nph_to_bpod_fcn, valid_bounds
@@ -267,6 +281,15 @@ class FibrePhotometryBpodSync(FibrePhotometryBaseSync):
     priority = 90
     job_size = 'small'
 
+    def __init__(
+        self,
+        *args,
+        digital_inputs_channel: int | None = None,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        self.digital_inputs_channel = digital_inputs_channel
+
     @property
     def signature(self):
         signature = {
@@ -287,7 +310,7 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # for bpod based syncing, the timestamps for syncing are in the digital inputs file
         raw_photometry_folder = self.session_path / self.photometry_collection
         digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalInputs.pqt'
-        digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath)
+        digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, channel=self.kwargs['sync_channel'])
         # timestamps_nph = digital_inputs_df['times'].values[digital_inputs_df['channel'] == self.kwargs['sync_channel']]
         timestamps_nph = digital_inputs_df.groupby('channel').get_group(self.kwargs['sync_channel'])['times'].values
 

From 7575c7eacbcecc5e9ed91acc693eb77a10b5d91d Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 24 Sep 2025 13:26:17 +0100
Subject: [PATCH 60/80] added extractor class for passive photometry
 experiments

---
 ibllib/pipes/dynamic_pipeline.py  |  18 +-
 ibllib/pipes/neurophotometrics.py | 276 ++++++++++++++++++++++++++++--
 2 files changed, 276 insertions(+), 18 deletions(-)

diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index 5ba935e67..7ed394e36 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -257,9 +257,6 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non
                 # This may happen that the extractor is tied to a specific sync task: look for TrialsChoiceWorldBpod for example
                 elif hasattr(btasks, extractor + sync_label.capitalize()):
                     task = getattr(btasks, extractor + sync_label.capitalize())
-                # Passive sessions can be run in behavior boxes
-                elif 'passiveChoiceWorld' in protocol:
-                        registration_class = btasks.PassiveRegisterRaw
                 else:
                     # lookup in the project extraction repo if we find an extractor class
                     import projects.extraction_tasks
@@ -610,19 +607,30 @@ def make_pipeline(session_path, **pkwargs):
     if 'neurophotometrics' in devices:
         # note: devices['neurophotometrics'] is the acquisition_description
         sync_mode = devices['neurophotometrics'].get('sync_mode', 'bpod')  # default to bpod for downward compatibility
+
+        # passive photometry
+        task_protocols = acquisition_description['tasks']
+        assert len(task_protocols) == 1, 'chained protocols are not yet supported for photometry extraction'
+        protocol = task_protocols[0]
+        if 'passive' in protocol:
+            assert sync_mode == 'daqami', 'passive protocol syncing only supported for DAQ based syncing'
+            tasks['FibrePhotometryPassiveChoiceWorld'] = type(
+                'FibrePhotometryPassiveChoiceWorld', (ptasks.FibrePhotometryPassiveChoiceWorld,), {}
+            )(
+                **kwargs,
+            )
+
         match sync_mode:
             case 'bpod':
                 # for synchronization with the BNC inputs of the neurophotometrics receiving the sync pulses
                 # from the individual bpods
                 tasks['FibrePhotometryBpodSync'] = type('FibrePhotometryBpodSync', (ptasks.FibrePhotometryBpodSync,), {})(
-                    **devices['neurophotometrics'],
                     **kwargs,
                 )
             case 'daqami':
                 # for synchronization with the DAQami receiving the sync pulses from the individual bpods
                 # as well as the frame clock from the FP3002
                 tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})(
-                    **devices['neurophotometrics'],
                     **kwargs,
                 )
 
diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index fed940d75..0b19ce226 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -14,9 +14,11 @@
 
 from abc import abstractmethod
 from iblphotometry import fpio
-from iblutil.spacer import Spacer
+from iblrig_tasks import _iblrig_tasks_passiveChoiceWorld
 
 from one.api import ONE
+import json
+from scipy.optimize import minimize
 
 _logger = logging.getLogger('ibllib')
 
@@ -56,6 +58,7 @@ def extract_timestamps_from_tdms_file(
     tdms_filepath: Path,
     save_path: Optional[Path] = None,
     chunk_size=10000,
+    extract_durations: bool = False,
 ) -> dict:
     """extractor for tdms files as written by the daqami software, configured for neurophotometrics
     experiments: Frameclock is in AI7, DI1-4 are the bpod sync signals
@@ -106,7 +109,7 @@ def extract_timestamps_from_tdms_file(
     if chunk_size is not None:
         n_chunks = df.shape[0] // chunk_size
         for i in range(n_chunks):
-            vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
+            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
             # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
             data = _int2digital_channels(vals_)
 
@@ -136,6 +139,84 @@ def extract_timestamps_from_tdms_file(
     return timestamps
 
 
+def extract_ttl_durations_from_tdms_file(
+    tdms_filepath: Path,
+    save_path: Optional[Path] = None,
+    chunk_size=10000,
+) -> dict:
+    _logger.info(f'extracting ttl_durations from tdms file: {tdms_filepath}')
+
+    # this should be 10kHz
+    tdms_file = TdmsFile.read(tdms_filepath)
+    groups = tdms_file.groups()
+
+    # this unfortunate hack is in here because there are a bunch of sessions
+    # where the frameclock is on DI0
+    if len(groups) == 1:
+        has_analog_group = False
+        (digital_group,) = groups
+    if len(groups) == 2:
+        has_analog_group = True
+        analog_group, digital_group = groups
+    fs = digital_group.properties['ScanRate']  # this should be 10kHz
+    df = tdms_file.as_dataframe()
+
+    # inferring digital col name
+    (digital_col,) = [col for col in df.columns if 'Digital' in col]
+    vals = df[digital_col].values.astype('int8')
+    digital_channel_names = ['DI0', 'DI1', 'DI2', 'DI3']
+
+    # ini
+    timestamps = {}
+    for ch in digital_channel_names:
+        timestamps[ch] = dict(positive=[], negative=[])
+
+    # chunked loop for memory efficiency
+    if chunk_size is not None:
+        n_chunks = df.shape[0] // chunk_size
+        for i in range(n_chunks):
+            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
+            # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
+            data = _int2digital_channels(vals_)
+
+            for j, name in enumerate(digital_channel_names):
+                ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i)
+                timestamps[name]['positive'].append(ix / fs)
+                ix = np.where(np.diff(data[:, j]) == -1)[0] + (chunk_size * i)
+                timestamps[name]['negative'].append(ix / fs)
+
+        for ch in digital_channel_names:
+            timestamps[ch]['positive'] = np.concatenate(timestamps[ch]['positive'])
+            timestamps[ch]['negative'] = np.concatenate(timestamps[ch]['negative'])
+    else:
+        data = _int2digital_channels(vals)
+        for j, name in enumerate(digital_channel_names):
+            ix = np.where(np.diff(data[:, j]) == 1)[0]
+            timestamps[name]['positive'].append(ix / fs)
+            ix = np.where(np.diff(data[:, j]) == -1)[0]
+            timestamps[name]['negative'].append(ix / fs)
+
+    if has_analog_group:
+        # frameclock data is recorded on an analog channel
+        for channel in analog_group.channels():
+            timestamps[channel.name] = {}
+            signal = (channel.data > 2.5).astype('int32')  # assumes 0-5V
+            timestamps[channel.name]['positive'] = np.where(np.diff(signal) == 1)[0] / fs
+            timestamps[channel.name]['negative'] = np.where(np.diff(signal) == -1)[0] / fs
+
+    # the actual diff
+    durations = {}
+    for channel in timestamps.keys():
+        durations[channel] = timestamps[channel]['negative'] - timestamps[channel]['positive']
+
+    if save_path is not None:
+        _logger.info(f'saving extracted ttl durations to: {save_path}')
+        with open(save_path, 'wb') as fH:
+            pickle.dump(durations, fH)
+
+    return durations
+
+
 def extract_timestamps_from_bpod_jsonable(file_jsonable: str | Path, sync_states_names: List[str]):
     _, bpod_data = jsonable.load_task_jsonable(file_jsonable)
     timestamps = []
@@ -184,7 +265,7 @@ def __init__(
             # if not provided, infer
             self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol)
 
-    def _get_bpod_timestamps(self) -> Tuple[np.ndarray, list]:
+    def _get_bpod_timestamps(self) -> np.ndarray:
         # the timestamps for syncing, in the time of the bpod
         if 'habituation' in self.task_protocol:
             sync_states_names = ['iti', 'reward']
@@ -236,7 +317,6 @@ def load_data(self) -> pd.DataFrame:
         raw_photometry_folder = self.session_path / self.photometry_collection
         photometry_df = fpio.from_neurophotometrics_file_to_photometry_df(
             raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt',
-            # data_columns=self.kwargs['fibers'],
             drop_first=False,
         )
         return photometry_df
@@ -264,17 +344,19 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         output_folder.mkdir(parents=True, exist_ok=True)
 
         # writing the synced photometry signal
-        photometry_df_outpath = output_folder / 'photometry.signal.pqt'
-        photometry_df.to_parquet(photometry_df_outpath)
+        photometry_filepath = self.session_path / 'alf' / 'photometry' / 'photometry.signal.pqt'
+        photometry_filepath.parent.mkdir(parents=True, exist_ok=True)
+        photometry_df.to_parquet(photometry_filepath)
 
         # writing the locations
         rois = []
-        for k, v in self.kwargs['fibers'].items():
+        for k, v in self.session_params['devices']['neurophotometrics']['fibers'].items():
             rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']})
         locations_df = pd.DataFrame(rois).set_index('ROI')
-        locations_df_outpath = output_folder / 'photometryROI.locations.pqt'
-        locations_df.to_parquet(locations_df_outpath)
-        return photometry_df_outpath, locations_df_outpath
+        locations_filepath = self.session_path / 'alf' / 'photometry' / 'photometryROI.locations.pqt'
+        locations_filepath.parent.mkdir(parents=True, exist_ok=True)
+        locations_df.to_parquet(locations_filepath)
+        return photometry_filepath, locations_filepath
 
 
 class FibrePhotometryBpodSync(FibrePhotometryBaseSync):
@@ -310,9 +392,11 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # for bpod based syncing, the timestamps for syncing are in the digital inputs file
         raw_photometry_folder = self.session_path / self.photometry_collection
         digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalInputs.pqt'
-        digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, channel=self.kwargs['sync_channel'])
-        # timestamps_nph = digital_inputs_df['times'].values[digital_inputs_df['channel'] == self.kwargs['sync_channel']]
-        timestamps_nph = digital_inputs_df.groupby('channel').get_group(self.kwargs['sync_channel'])['times'].values
+        digital_inputs_df = fpio.read_digital_inputs_file(
+            digital_inputs_filepath, channel=self.session_params['devices']['neurophotometrics']['sync_channel']
+        )
+        sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel']
+        timestamps_nph = digital_inputs_df.groupby('channel').get_group(sync_channel)['times'].values
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods
@@ -426,3 +510,169 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # to implement: detect spacer / remove spacer methods
         # timestamps_nph = timestamps_nph[15: ]
         return timestamps_nph
+
+
+class FibrePhotometryPassiveChoiceWorld(base_tasks.BehaviourTask):
+    priority = 90
+    job_size = 'small'
+
+    def __init__(
+        self,
+        session_path: str | Path,
+        one: ONE,
+        load_timestamps: bool = True,
+        **kwargs,
+    ):
+        super().__init__(session_path, one=one, **kwargs)
+        self.photometry_collection = kwargs.get('collection', 'raw_photometry_data')
+        self.kwargs = kwargs
+        self.load_timestamps = load_timestamps
+
+    def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        # load the fixtures - from the relative delays between trials, an "absolute" time vector is
+        # created that is used for the synchronization
+        fixtures_path = Path(_iblrig_tasks_passiveChoiceWorld.__file__).parent / 'passiveChoiceWorld_trials_fixtures.pqt'
+
+        # getting the task_settings
+        with open(self.session_path / self.collection / '_iblrig_taskSettings.raw.json', 'r') as fH:
+            task_settings = json.load(fH)
+
+        # getting the fixtures and creating a relative time vector
+        fixtures_df = pd.read_parquet(fixtures_path).groupby('session_id').get_group(task_settings['SESSION_TEMPLATE_ID'])
+
+        # stimulus durations
+        stim_durations = dict(
+            T=task_settings['GO_TONE_DURATION'],
+            N=task_settings['WHITE_NOISE_DURATION'],
+            G=0.3,  # visual stimulus duration is hardcoded to 300ms
+            V=0.1,  # V=0.1102 from a a session # to be replaced later down
+        )
+        for s in fixtures_df['stim_type'].unique():
+            fixtures_df.loc[fixtures_df['stim_type'] == s, 'delay'] = stim_durations[s]
+
+        # the audio go cue times
+        mic_go_cue_times_bpod = np.load(self.session_path / self.collection / '_iblmic_audioOnsetGoCue.times_mic.npy')
+
+        # adding the delays
+        def obj_fun(x, mic_go_cue_times_bpod, fixtures_df):
+            # fit overhead
+            for s in ['T', 'N', 'G', 'V']:
+                if s == 'T' or s == 'N':
+                    fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = x[0]
+                if s == 'G':
+                    fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = x[1]
+                if s == 'V':
+                    fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = x[2]
+
+            fixtures_df['t_rel'] = np.cumsum(
+                fixtures_df['stim_delay'].values + np.roll(fixtures_df['delay'].values, 1) + fixtures_df['overhead'].values,
+            )
+
+            mic_go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values
+            err = np.sum((np.diff(mic_go_cue_times_rel) - np.diff(mic_go_cue_times_bpod)) ** 2)
+            return err
+
+        # fitting the overheads
+        fixtures_df['overhead'] = 0.0
+        bounds = ((0, np.inf), (0, np.inf), (0, np.inf))
+        pfit = minimize(obj_fun, (0.0, 0.0, 0.0), args=(mic_go_cue_times_bpod, fixtures_df), bounds=bounds)
+        overheads = dict(zip(['T', 'N', 'G', 'V'], [pfit.x[0], pfit.x[0], pfit.x[1], pfit.x[2]]))
+
+        for s in fixtures_df['stim_type'].unique():
+            fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = overheads[s]
+        fixtures_df['t_rel'] = np.cumsum(
+            fixtures_df['stim_delay'].values + np.roll(fixtures_df['delay'].values, 1) + fixtures_df['overhead'].values
+        )
+
+        mic_go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values
+
+        sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
+            mic_go_cue_times_rel, mic_go_cue_times_bpod, return_indices=True, linear=True
+        )
+
+        assert ix_nph.shape[0] == 40, 'not all microphone onset events are accepted by the sync function'
+        if np.absolute(drift_ppm) > 20:
+            _logger.warning(f'sync with excessive drift: {drift_ppm}')
+        else:
+            _logger.info(f'synced with drift: {drift_ppm}')
+
+        # applying the sync to all the timestamps in the fixtures
+        fixtures_df['t_bpod'] = sync_fun(fixtures_df['t_rel'])
+
+        # dealing with the valve
+        # valve_times_rel = fixtures_df.groupby('stim_type').get_group('V')['t_rel'].values
+        # valve_times_bpod = sync_fun(valve_times_rel)
+        valve_times_bpod = fixtures_df.groupby('stim_type').get_group('V')['t_bpod'].values
+
+        # getting the valve timestamps from the DAQ
+        timestamps_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.pkl'
+        if self.load_timestamps and timestamps_filepath.exists():
+            with open(timestamps_filepath, 'rb') as fH:
+                self.timestamps = pickle.load(fH)
+        else:  # extract timestamps:
+            tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms'
+            self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath)
+
+        sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel']
+        valve_times_nph = self.timestamps[f'DI{sync_channel}']
+
+        sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
+            valve_times_nph, valve_times_bpod, return_indices=True, linear=True
+        )
+        assert ix_bpod.shape[0] == 40, 'not all bpod valve onset events are accepted by the sync function'
+        if np.absolute(drift_ppm) > 20:
+            _logger.warning(f'sync with excessive drift: {drift_ppm}')
+        else:
+            _logger.info(f'synced with drift: {drift_ppm}')
+
+        # loads the raw photometry data
+        raw_photometry_folder = self.session_path / self.photometry_collection
+        photometry_df = fpio.from_neurophotometrics_file_to_photometry_df(
+            raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt',
+            drop_first=False,
+        )
+        # apply synchronization
+        photometry_df['times'] = sync_fun(photometry_df['times'])
+        # verify that all are valid (i.e. mean nothing ... )
+
+        # write to disk
+        # the synced photometry signal
+        photometry_filepath = self.session_path / 'alf' / 'photometry' / 'photometry.signal.pqt'
+        photometry_filepath.parent.mkdir(parents=True, exist_ok=True)
+        photometry_df.to_parquet(photometry_filepath)
+
+        # writing the locations
+        rois = []
+        for k, v in self.session_params['devices']['neurophotometrics']['fibers'].items():
+            rois.append({'ROI': k, 'fiber': f'fiber_{v["location"]}', 'brain_region': v['location']})
+        locations_df = pd.DataFrame(rois).set_index('ROI')
+        locations_filepath = self.session_path / 'alf' / 'photometry' / 'photometryROI.locations.pqt'
+        locations_filepath.parent.mkdir(parents=True, exist_ok=True)
+        locations_df.to_parquet(locations_filepath)
+
+        # writing the passive events table
+        # get the valve open duration
+        ttl_durations_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdurations.pkl'
+        if self.load_timestamps and ttl_durations_filepath.exists():
+            with open(ttl_durations_filepath, 'rb') as fH:
+                ttl_durations = pickle.load(fH)
+        else:  # extract timestamps:
+            tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms'
+            ttl_durations = extract_ttl_durations_from_tdms_file(tdms_filepath, save_path=ttl_durations_filepath)
+
+        valve_open_dur = np.median(ttl_durations[f'DI{sync_channel}'][ix_nph])
+        passiveStims_df = pd.DataFrame(
+            dict(
+                valveOn=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'],
+                valveOff=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'] + valve_open_dur,
+                toneOn=fixtures_df.groupby('stim_type').get_group('T')['t_bpod'],
+                toneOff=fixtures_df.groupby('stim_type').get_group('T')['t_bpod'] + task_settings['GO_TONE_DURATION'],
+                noiseOn=fixtures_df.groupby('stim_type').get_group('N')['t_bpod'],
+                noiseOff=fixtures_df.groupby('stim_type').get_group('N')['t_bpod'] + task_settings['WHITE_NOISE_DURATION'],
+            )
+        )
+        passiveStims_filepath = self.session_path / 'alf' / self.collection / '_ibl_passiveStims.table.pqt'
+        passiveStims_filepath.parent.mkdir(exist_ok=True, parents=True)
+        passiveStims_df.reset_index().to_parquet(passiveStims_filepath)
+
+        return photometry_filepath, locations_filepath, passiveStims_filepath

From af33db373ff08685fabb82038f5049b7a23be152 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 24 Sep 2025 14:31:59 +0100
Subject: [PATCH 61/80] un-ruffing

---
 ibllib/pipes/dynamic_pipeline.py | 159 ++++++++++++++-----------------
 1 file changed, 71 insertions(+), 88 deletions(-)

diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index 7ed394e36..2f3acd44a 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -20,7 +20,6 @@
 :class:`ibllib.io.extractors.base.BaseBpodTrialsExtractor` class, and located in either the
 personal projects repo or in :py:mod:`ibllib.io.extractors.bpod_trials` module.
 """
-
 import logging
 import re
 from fnmatch import fnmatch
@@ -72,7 +71,7 @@ def acquisition_description_legacy_session(session_path, save=False):
 
 
 def get_acquisition_description(protocol):
-    """ "
+    """"
     This is a set of example acquisition descriptions for experiments
     -   choice_world_recording
     -   choice_world_biased
@@ -81,7 +80,7 @@ def get_acquisition_description(protocol):
     -   choice_world_passive
     That are part of the IBL pipeline
     """
-    if 'ephys' in protocol:  # canonical ephys
+    if 'ephys' in protocol:   # canonical ephys
         devices = {
             'cameras': {
                 'right': {'collection': 'raw_video_data', 'sync_label': 'audio'},
@@ -90,32 +89,38 @@ def get_acquisition_description(protocol):
             },
             'neuropixel': {
                 'probe00': {'collection': 'raw_ephys_data/probe00', 'sync_label': 'imec_sync'},
-                'probe01': {'collection': 'raw_ephys_data/probe01', 'sync_label': 'imec_sync'},
+                'probe01': {'collection': 'raw_ephys_data/probe01', 'sync_label': 'imec_sync'}
+            },
+            'microphone': {
+                'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}
             },
-            'microphone': {'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}},
         }
         acquisition_description = {  # this is the current ephys pipeline description
             'devices': devices,
             'tasks': [
                 {'ephysChoiceWorld': {'collection': 'raw_behavior_data', 'sync_label': 'bpod'}},
-                {'passiveChoiceWorld': {'collection': 'raw_passive_data', 'sync_label': 'bpod'}},
+                {'passiveChoiceWorld': {'collection': 'raw_passive_data', 'sync_label': 'bpod'}}
             ],
-            'sync': {'nidq': {'collection': 'raw_ephys_data', 'extension': 'bin', 'acquisition_software': 'spikeglx'}},
+            'sync': {
+                'nidq': {'collection': 'raw_ephys_data', 'extension': 'bin', 'acquisition_software': 'spikeglx'}
+            },
             'procedures': ['Ephys recording with acute probe(s)'],
-            'projects': ['ibl_neuropixel_brainwide_01'],
+            'projects': ['ibl_neuropixel_brainwide_01']
         }
     else:
         devices = {
             'cameras': {
                 'left': {'collection': 'raw_video_data', 'sync_label': 'audio'},
             },
-            'microphone': {'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}},
+            'microphone': {
+                'microphone': {'collection': 'raw_behavior_data', 'sync_label': None}
+            },
         }
         acquisition_description = {  # this is the current ephys pipeline description
             'devices': devices,
             'sync': {'bpod': {'collection': 'raw_behavior_data'}},
             'procedures': ['Behavior training/tasks'],
-            'projects': ['ibl_neuropixel_brainwide_01'],
+            'projects': ['ibl_neuropixel_brainwide_01']
         }
         if 'biased' in protocol:
             key = 'biasedChoiceWorld'
@@ -125,7 +130,10 @@ def get_acquisition_description(protocol):
             key = 'habituationChoiceWorld'
         else:
             raise ValueError(f'Unknown protocol "{protocol}"')
-        acquisition_description['tasks'] = [{key: {'collection': 'raw_behavior_data', 'sync_label': 'bpod'}}]
+        acquisition_description['tasks'] = [{key: {
+            'collection': 'raw_behavior_data',
+            'sync_label': 'bpod'
+        }}]
     acquisition_description['version'] = '1.0.0'
     return acquisition_description
 
@@ -216,7 +224,7 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non
     kwargs = {'session_path': session_path, 'one': one}
 
     # Syncing tasks
-    ((sync, sync_args),) = acquisition_description['sync'].items()
+    (sync, sync_args), = acquisition_description['sync'].items()
     sync_label = _sync_label(sync, **sync_args)  # get the format of the DAQ data. This informs the extractor task
     sync_args['sync_collection'] = sync_args.pop('collection')  # rename the key so it matches task run arguments
     sync_args['sync_ext'] = sync_args.pop('extension', None)
@@ -260,16 +268,15 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non
                 else:
                     # lookup in the project extraction repo if we find an extractor class
                     import projects.extraction_tasks
-
                     if hasattr(projects.extraction_tasks, extractor):
                         task = getattr(projects.extraction_tasks, extractor)
                     elif hasattr(projects.extraction_tasks, extractor + sync_label.capitalize()):
                         task = getattr(btasks, extractor + sync_label.capitalize())
                     else:
                         raise NotImplementedError(
-                            f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects'
-                        )
-                _logger.debug('%s (protocol #%i, task #%i) = %s.%s', protocol, i, j, task.__module__, task.__name__)
+                            f'Extractor "{extractor}" not found in main IBL pipeline nor in personal projects')
+                _logger.debug('%s (protocol #%i, task #%i) = %s.%s',
+                              protocol, i, j, task.__module__, task.__name__)
                 # Rename the class to something more informative
                 task_name = f'{task.__name__}_{i:02}'
                 if not (task.__name__.startswith('TrainingStatus') or task.__name__.endswith('RegisterRaw')):
@@ -307,16 +314,13 @@ def _get_trials_tasks(session_path, acquisition_description=None, sync_tasks=Non
                     raise NotImplementedError(f'No trials task available for sync namespace "{sync_label}"')
                 compute_status = True
             tasks[f'RegisterRaw_{protocol}_{i:02}'] = type(f'RegisterRaw_{protocol}_{i:02}', (registration_class,), {})(
-                **kwargs, **task_kwargs
-            )
+                **kwargs, **task_kwargs)
             parents = [tasks[f'RegisterRaw_{protocol}_{i:02}']] + sync_tasks
             tasks[f'Trials_{protocol}_{i:02}'] = type(f'Trials_{protocol}_{i:02}', (behaviour_class,), {})(
-                **kwargs, **sync_kwargs, **task_kwargs, parents=parents
-            )
+                **kwargs, **sync_kwargs, **task_kwargs, parents=parents)
             if compute_status:
-                tasks[f'TrainingStatus_{protocol}_{i:02}'] = type(
-                    f'TrainingStatus_{protocol}_{i:02}', (btasks.TrainingStatus,), {}
-                )(**kwargs, **task_kwargs, parents=[tasks[f'Trials_{protocol}_{i:02}']])
+                tasks[f'TrainingStatus_{protocol}_{i:02}'] = type(f'TrainingStatus_{protocol}_{i:02}', (
+                    btasks.TrainingStatus,), {})(**kwargs, **task_kwargs, parents=[tasks[f'Trials_{protocol}_{i:02}']])
     return tasks
 
 
@@ -407,12 +411,11 @@ def make_pipeline(session_path, **pkwargs):
     kwargs = {'session_path': session_path, 'one': pkwargs.get('one')}
 
     # Registers the experiment description file
-    tasks['ExperimentDescriptionRegisterRaw'] = type(
-        'ExperimentDescriptionRegisterRaw', (bstasks.ExperimentDescriptionRegisterRaw,), {}
-    )(**kwargs)
+    tasks['ExperimentDescriptionRegisterRaw'] = type('ExperimentDescriptionRegisterRaw',
+                                                     (bstasks.ExperimentDescriptionRegisterRaw,), {})(**kwargs)
 
     # Syncing tasks
-    ((sync, sync_args),) = acquisition_description['sync'].items()
+    (sync, sync_args), = acquisition_description['sync'].items()
     sync_args = sync_args.copy()  # ensure acquisition_description unchanged
     sync_label = _sync_label(sync, **sync_args)  # get the format of the DAQ data. This informs the extractor task
     sync_args['sync_collection'] = sync_args.pop('collection')  # rename the key so it matches task run arguments
@@ -423,16 +426,14 @@ def make_pipeline(session_path, **pkwargs):
     if sync_label == 'nidq' and sync_args['sync_collection'] == 'raw_ephys_data':
         tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (etasks.EphysSyncRegisterRaw,), {})(**kwargs, **sync_kwargs)
         tasks[f'SyncPulses_{sync}'] = type(f'SyncPulses_{sync}', (etasks.EphysSyncPulses,), {})(
-            **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']]
-        )
+            **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']])
         sync_tasks = [tasks[f'SyncPulses_{sync}']]
     elif sync_label == 'timeline':
         tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncRegisterRaw,), {})(**kwargs, **sync_kwargs)
     elif sync_label == 'nidq':
         tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncMtscomp,), {})(**kwargs, **sync_kwargs)
         tasks[f'SyncPulses_{sync}'] = type(f'SyncPulses_{sync}', (stasks.SyncPulses,), {})(
-            **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']]
-        )
+            **kwargs, **sync_kwargs, parents=[tasks['SyncRegisterRaw']])
         sync_tasks = [tasks[f'SyncPulses_{sync}']]
     elif sync_label == 'tdms':
         tasks['SyncRegisterRaw'] = type('SyncRegisterRaw', (stasks.SyncRegisterRaw,), {})(**kwargs, **sync_kwargs)
@@ -440,7 +441,9 @@ def make_pipeline(session_path, **pkwargs):
         pass  # ATM we don't have anything for this; it may not be needed in the future
 
     # Behavior tasks
-    tasks.update(_get_trials_tasks(session_path, acquisition_description, sync_tasks=sync_tasks, one=pkwargs.get('one')))
+    tasks.update(
+        _get_trials_tasks(session_path, acquisition_description, sync_tasks=sync_tasks, one=pkwargs.get('one'))
+    )
 
     # Ephys tasks
     if 'neuropixel' in devices:
@@ -460,46 +463,38 @@ def make_pipeline(session_path, **pkwargs):
 
             if (nptype == 'NP2.1') or (nptype == 'NP2.4' and nshanks == 1):
                 tasks[f'EphyCompressNP21_{pname}'] = type(f'EphyCompressNP21_{pname}', (etasks.EphysCompressNP21,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname
-                )
+                    **kwargs, **ephys_kwargs, pname=pname)
                 all_probes.append(pname)
                 register_tasks.append(tasks[f'EphyCompressNP21_{pname}'])
             elif nptype == 'NP2.4' and nshanks > 1:
                 tasks[f'EphyCompressNP24_{pname}'] = type(f'EphyCompressNP24_{pname}', (etasks.EphysCompressNP24,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname, nshanks=nshanks
-                )
+                    **kwargs, **ephys_kwargs, pname=pname, nshanks=nshanks)
                 register_tasks.append(tasks[f'EphyCompressNP24_{pname}'])
                 all_probes += [f'{pname}{chr(97 + int(shank))}' for shank in range(nshanks)]
             else:
                 tasks[f'EphysCompressNP1_{pname}'] = type(f'EphyCompressNP1_{pname}', (etasks.EphysCompressNP1,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname
-                )
+                    **kwargs, **ephys_kwargs, pname=pname)
                 register_tasks.append(tasks[f'EphysCompressNP1_{pname}'])
                 all_probes.append(pname)
 
         if nptype == '3A':
             tasks['EphysPulses'] = type('EphysPulses', (etasks.EphysPulses,), {})(
-                **kwargs, **ephys_kwargs, **sync_kwargs, pname=all_probes, parents=register_tasks + sync_tasks
-            )
+                **kwargs, **ephys_kwargs, **sync_kwargs, pname=all_probes, parents=register_tasks + sync_tasks)
 
         for pname in all_probes:
             register_task = [reg_task for reg_task in register_tasks if pname[:7] in reg_task.name]
 
             if nptype != '3A':
                 tasks[f'EphysPulses_{pname}'] = type(f'EphysPulses_{pname}', (etasks.EphysPulses,), {})(
-                    **kwargs, **ephys_kwargs, **sync_kwargs, pname=[pname], parents=register_task + sync_tasks
-                )
+                    **kwargs, **ephys_kwargs, **sync_kwargs, pname=[pname], parents=register_task + sync_tasks)
                 tasks[f'Spikesorting_{pname}'] = type(f'Spikesorting_{pname}', (etasks.SpikeSorting,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'EphysPulses_{pname}']]
-                )
+                    **kwargs, **ephys_kwargs, pname=pname, parents=[tasks[f'EphysPulses_{pname}']])
             else:
                 tasks[f'Spikesorting_{pname}'] = type(f'Spikesorting_{pname}', (etasks.SpikeSorting,), {})(
-                    **kwargs, **ephys_kwargs, pname=pname, parents=[tasks['EphysPulses']]
-                )
+                    **kwargs, **ephys_kwargs, pname=pname, parents=[tasks['EphysPulses']])
 
             tasks[f'RawEphysQC_{pname}'] = type(f'RawEphysQC_{pname}', (etasks.RawEphysQC,), {})(
-                **kwargs, **ephys_kwargs, pname=pname, parents=register_task
-            )
+                **kwargs, **ephys_kwargs, pname=pname, parents=register_task)
 
     # Video tasks
     if 'cameras' in devices:
@@ -513,33 +508,35 @@ def make_pipeline(session_path, **pkwargs):
             tasks[tn] = type((tn := 'VideoConvert'), (vtasks.VideoConvert,), {})(**kwargs, **video_kwargs)
             dlc_parent_task = tasks['VideoConvert']
             tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcCamlog,), {})(
-                **kwargs, **video_kwargs, **sync_kwargs
-            )
+                **kwargs, **video_kwargs, **sync_kwargs)
         else:
-            tasks[tn] = type((tn := 'VideoRegisterRaw'), (vtasks.VideoRegisterRaw,), {})(**kwargs, **video_kwargs)
-            tasks[tn] = type((tn := 'VideoCompress'), (vtasks.VideoCompress,), {})(**kwargs, **video_kwargs, **sync_kwargs)
+            tasks[tn] = type((tn := 'VideoRegisterRaw'), (vtasks.VideoRegisterRaw,), {})(
+                **kwargs, **video_kwargs)
+            tasks[tn] = type((tn := 'VideoCompress'), (vtasks.VideoCompress,), {})(
+                **kwargs, **video_kwargs, **sync_kwargs)
             dlc_parent_task = tasks['VideoCompress']
             if sync == 'bpod':
                 tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcBpod,), {})(
-                    **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']]
-                )
+                    **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']])
             elif sync == 'nidq':
                 # Here we restrict to videos that we support (left, right or body)
                 video_kwargs['cameras'] = subset_cams
                 tasks[tn] = type((tn := f'VideoSyncQC_{sync}'), (vtasks.VideoSyncQcNidq,), {})(
-                    **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + sync_tasks
-                )
+                    **kwargs, **video_kwargs, **sync_kwargs, parents=[tasks['VideoCompress']] + sync_tasks)
 
         if sync_kwargs['sync'] != 'bpod':
             # Here we restrict to videos that we support (left, right or body)
             # Currently there is no plan to run DLC on the belly cam
             subset_cams = [c for c in cams if c in ('left', 'right', 'body')]
             video_kwargs['cameras'] = subset_cams
-            tasks[tn] = type((tn := 'DLC'), (vtasks.DLC,), {})(**kwargs, **video_kwargs, parents=[dlc_parent_task])
+            tasks[tn] = type((tn := 'DLC'), (vtasks.DLC,), {})(
+                **kwargs, **video_kwargs, parents=[dlc_parent_task])
 
             # The PostDLC plots require a trials object for QC
             # Find the first task that outputs a trials.table dataset
-            trials_task = (t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', [])))
+            trials_task = (
+                t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', []))
+            )
             if trials_task := next(trials_task, None):
                 parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}'], trials_task]
                 trials_collection = getattr(trials_task, 'output_collection', 'alf')
@@ -547,62 +544,48 @@ def make_pipeline(session_path, **pkwargs):
                 parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}']]
                 trials_collection = 'alf'
             tasks[tn] = type((tn := 'PostDLC'), (vtasks.EphysPostDLC,), {})(
-                **kwargs, cameras=subset_cams, trials_collection=trials_collection, parents=parents
-            )
+                **kwargs, cameras=subset_cams, trials_collection=trials_collection, parents=parents)
 
     # Audio tasks
     if 'microphone' in devices:
-        ((microphone, micro_kwargs),) = devices['microphone'].items()
+        (microphone, micro_kwargs), = devices['microphone'].items()
         micro_kwargs['device_collection'] = micro_kwargs.pop('collection')
         if sync_kwargs['sync'] == 'bpod':
             tasks['AudioRegisterRaw'] = type('AudioRegisterRaw', (atasks.AudioSync,), {})(
-                **kwargs, **sync_kwargs, **micro_kwargs, collection=micro_kwargs['device_collection']
-            )
+                **kwargs, **sync_kwargs, **micro_kwargs, collection=micro_kwargs['device_collection'])
         elif sync_kwargs['sync'] == 'nidq':
             tasks['AudioRegisterRaw'] = type('AudioRegisterRaw', (atasks.AudioCompress,), {})(**kwargs, **micro_kwargs)
 
     # Widefield tasks
     if 'widefield' in devices:
-        ((_, wfield_kwargs),) = devices['widefield'].items()
+        (_, wfield_kwargs), = devices['widefield'].items()
         wfield_kwargs['device_collection'] = wfield_kwargs.pop('collection')
         tasks['WideFieldRegisterRaw'] = type('WidefieldRegisterRaw', (wtasks.WidefieldRegisterRaw,), {})(
-            **kwargs, **wfield_kwargs
-        )
+            **kwargs, **wfield_kwargs)
         tasks['WidefieldCompress'] = type('WidefieldCompress', (wtasks.WidefieldCompress,), {})(
-            **kwargs, **wfield_kwargs, parents=[tasks['WideFieldRegisterRaw']]
-        )
+            **kwargs, **wfield_kwargs, parents=[tasks['WideFieldRegisterRaw']])
         tasks['WidefieldPreprocess'] = type('WidefieldPreprocess', (wtasks.WidefieldPreprocess,), {})(
-            **kwargs, **wfield_kwargs, parents=[tasks['WidefieldCompress']]
-        )
+            **kwargs, **wfield_kwargs, parents=[tasks['WidefieldCompress']])
         tasks['WidefieldSync'] = type('WidefieldSync', (wtasks.WidefieldSync,), {})(
-            **kwargs,
-            **wfield_kwargs,
-            **sync_kwargs,
-            parents=[tasks['WideFieldRegisterRaw'], tasks['WidefieldCompress']] + sync_tasks,
-        )
+            **kwargs, **wfield_kwargs, **sync_kwargs,
+            parents=[tasks['WideFieldRegisterRaw'], tasks['WidefieldCompress']] + sync_tasks)
         tasks['WidefieldFOV'] = type('WidefieldFOV', (wtasks.WidefieldFOV,), {})(
-            **kwargs, **wfield_kwargs, parents=[tasks['WidefieldPreprocess']]
-        )
+            **kwargs, **wfield_kwargs, parents=[tasks['WidefieldPreprocess']])
 
     # Mesoscope tasks
     if 'mesoscope' in devices:
-        ((_, mscope_kwargs),) = devices['mesoscope'].items()
+        (_, mscope_kwargs), = devices['mesoscope'].items()
         mscope_kwargs['device_collection'] = mscope_kwargs.pop('collection')
         tasks['MesoscopeRegisterSnapshots'] = type('MesoscopeRegisterSnapshots', (mscope_tasks.MesoscopeRegisterSnapshots,), {})(
-            **kwargs, **mscope_kwargs
-        )
+            **kwargs, **mscope_kwargs)
         tasks['MesoscopePreprocess'] = type('MesoscopePreprocess', (mscope_tasks.MesoscopePreprocess,), {})(
-            **kwargs, **mscope_kwargs
-        )
+            **kwargs, **mscope_kwargs)
         tasks['MesoscopeFOV'] = type('MesoscopeFOV', (mscope_tasks.MesoscopeFOV,), {})(
-            **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]
-        )
+            **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']])
         tasks['MesoscopeSync'] = type('MesoscopeSync', (mscope_tasks.MesoscopeSync,), {})(
-            **kwargs, **mscope_kwargs, **sync_kwargs
-        )
+            **kwargs, **mscope_kwargs, **sync_kwargs)
         tasks['MesoscopeCompress'] = type('MesoscopeCompress', (mscope_tasks.MesoscopeCompress,), {})(
-            **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']]
-        )
+            **kwargs, **mscope_kwargs, parents=[tasks['MesoscopePreprocess']])
 
     if 'neurophotometrics' in devices:
         # note: devices['neurophotometrics'] is the acquisition_description

From 1e9f41ae5bf8184910e3f98d4497f3579f93998b Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 24 Sep 2025 14:35:51 +0100
Subject: [PATCH 62/80] undo unlreated changes

---
 brainbox/io/one.py | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/brainbox/io/one.py b/brainbox/io/one.py
index 5b3a59cf9..c1c86726e 100644
--- a/brainbox/io/one.py
+++ b/brainbox/io/one.py
@@ -1017,7 +1017,7 @@ def timesprobe2times(self, values, direction='forward'):
         elif direction == 'reverse':
             return self._sync['reverse'](values) / self._sync['fs']
 
-    def samples2times(self, values, direction='forward', band='ap'):
+    def samples2times(self, values, direction='forward'):
         """
         Converts ephys sample values to session main clock seconds
         :param values: numpy array of times in seconds or samples to resync
@@ -1025,8 +1025,6 @@ def samples2times(self, values, direction='forward', band='ap'):
          (seconds main time to samples probe time)
         :return:
         """
-        if band == 'lf':
-            values *= 12
         self._get_probe_info()
         return self._sync[direction](values)
 
@@ -1052,8 +1050,8 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_
         :param **kwargs: kwargs passed to `driftmap()` (optional)
         :return:
         """
-        br = BrainRegions() if br is None else br
-        time_series = {} if time_series is None else time_series
+        br = br or BrainRegions()
+        time_series = time_series or {}
         fig, axs = plt.subplots(2, 2, gridspec_kw={
             'width_ratios': [.95, .05], 'height_ratios': [.1, .9]}, figsize=(16, 9), sharex='col')
         axs[0, 1].set_axis_off()
@@ -1096,20 +1094,13 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0,
                              save_dir=None,
                              label='raster',
                              gain=-93,
-                             title=None,
-                             alpha=0.3,
-                             processing='destripe'):
+                             title=None):
 
         # compute the raw data offset and destripe, we take 400ms around t0
         first_sample, last_sample = (int((t0 - 0.2) * sr.fs), int((t0 + 0.2) * sr.fs))
         raw = sr[first_sample:last_sample, :-sr.nsync].T
         channel_labels = channels['labels'] if (channels is not None) and ('labels' in channels) else True
-        if processing == 'destripe':
-            samples = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels)
-        else:
-            import scipy.signal
-            sos = scipy.signal.butter(**{"N": 3, "Wn": 300 / sr.fs * 2, "btype": "highpass"}, output="sos")
-            samples = scipy.signal.sosfiltfilt(sos, raw)
+        destriped = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels)
         # filter out the spikes according to good/bad clusters and to the time slice
         spike_sel = slice(*np.searchsorted(spikes['samples'], [first_sample, last_sample]))
         ss = spikes['samples'][spike_sel]
@@ -1119,9 +1110,9 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0,
             title = self._default_plot_title(spikes)
         # display the raw data snippet with spikes overlaid
         fig, axs = plt.subplots(1, 2, gridspec_kw={'width_ratios': [.95, .05]}, figsize=(16, 9), sharex='col')
-        Density(samples, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s')
-        axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=alpha)
-        axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=alpha)
+        Density(destriped, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s')
+        axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=0.5)
+        axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=0.5)
         axs[0].set(title=title, xlim=[t0 - 0.035, t0 + 0.035])
         # adds the channel locations if available
         if (channels is not None) and ('atlas_id' in channels):
@@ -1323,7 +1314,7 @@ def _find_behaviour_collection(self, obj):
                               f'e.g sl.load_{obj}(collection="{collections[0]}")')
                 raise ALFMultipleCollectionsFound
 
-    def load_trials(self, collection=None, revision=None):
+    def load_trials(self, collection=None):
         """
         Function to load trials data into SessionLoader.trials
 
@@ -1332,13 +1323,13 @@ def load_trials(self, collection=None, revision=None):
         collection: str
             Alf collection of trials data
         """
-        revision = self.revision if revision is None else revision
+
         if not collection:
             collection = self._find_behaviour_collection('trials')
         # itiDuration frequently has a mismatched dimension, and we don't need it, exclude using regex
         self.one.wildcards = False
         self.trials = self.one.load_object(
-            self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=revision or None).to_df()
+            self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=self.revision or None).to_df()
         self.one.wildcards = True
         self.data_info.loc[self.data_info['name'] == 'trials', 'is_loaded'] = True
 

From 242be1e198896d7c75bedc57953c23a942cd7c35 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 24 Sep 2025 14:36:35 +0100
Subject: [PATCH 63/80] removing unrelated file (erronously added)

---
 alyx_task.pkl | Bin 2347 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 alyx_task.pkl

diff --git a/alyx_task.pkl b/alyx_task.pkl
deleted file mode 100644
index 4b328c69c715e72448be87278c916882074ddd60..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2347
zcmcgu-D=!M6n5gD#-^e4hh7v411+SKm5^pe8fhslCM6*?b<^6E(y)k;G`l;sR#G%0
zyL%Dn1I*?0eR|W^=tJ}gdPa_8L*=wiAcDgzBh7cd`S$$%w)H#O-58!{_A_%^mezbq
zilm?ki@Ye!Q;!x&?xhiCUR(x@U=oUwQET36c_(VE><oS@p_FWDILv8K<~+w<LdwLW
z^P=!L6M-kfn1vJ*f@x~aJ0;gV>qOfqshe&MJ1Z`0QD+s;Gn^g$V9k!wT=&YFJNKJa
zTL~?!*{b;pmJc7uyc3U?jc!&#ck8eBzdu^n1$^3evgxE=+k>CYu57Ym$-9NH2is=5
z5~rfF2ODN{(e}eyxb>FV?D)zaY?-@Su0+=KI#Z%h_RzdLY>0}tN|fiD55M~R&%gcv
z!OWicRTRDEc?E<bnOBv}{Z_U@`L*b~roCV%3*}$OcYD$Cb_u*6!@&v!ux7Jr7WSua
zcQ!U~fFmzRypW(Um|{E)f=NoTGxX^2!FR|#Pc&C2%5T?GOy~?9Y2ImBFVHVdj=m@=
zt`xfWO62Z+xp!j%Y=*kNuATpLpXgL{E-(=BsUKbgNZ>|f>_!lTGjx9`ijx7cZhMX9
zvJz?>C<bZbQE6z1XK25wki5h|^QN!M=?lV6$7^sY;L<UIm=cg&=)Og#yy^v#YKo4#
zUR(o|08lap^qK@yz)i<+Z$dDf5Xzig0s40&L!eO?XrG|GsTG%K4%s8%+Hp#0OegT7
z<WG^y9LV2Qd+{b3ktCeZfH`}E9$A`9!2Zi4cA=-Bric=qNxej6C+B*KJP70f2MMEb
zN+1!Vbo5>cp%buI_LBNRg2OP5lQ<>`N!f@OJP9H;iLQ!wSqowa%x_-51fb%1>Ak6c
zC*L80S_(PqLg2>dlNs_n4~2e+364pWGMXebAOt~iZt7CG#PwX5x$j2dLB9#ul|SJP
z>+>=K|4X2k0$eJ@2nmvqKq4h6OQTVRph-Wk#nx;O)fl=CbYbBnwC1DBT7MjduGwJb
z2LyUHwq_T4Xuleoy_*tt<}!z}Zl76m`(fagJ5^I~Jq!SwKOXN}rIsIGKr$~3k3&4A
z@stps#Zkmy0N8c8m93f*eD6TP;45j(UeQ#QD9(Z2@Az5vqKU3EH!)cA_HYimdR@C_
ze#_iF2NIeO9{|7Q=Vtuudqb%zfa{5y3kC$Ux&Me+^8qiO_7Y|hxI?Czxjuv8=8{?8
G_x?ZOxz5P|


From f2276bfc422664c644c4eb86a393986b5d1eeb56 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 24 Sep 2025 14:39:07 +0100
Subject: [PATCH 64/80] undo unrelated changes

---
 ibllib/pipes/ephys_tasks.py  |  20 ++--
 ibllib/pipes/local_server.py |   2 +-
 ibllib/pipes/video_tasks.py  | 173 ++++++++++++-----------------------
 3 files changed, 73 insertions(+), 122 deletions(-)

diff --git a/ibllib/pipes/ephys_tasks.py b/ibllib/pipes/ephys_tasks.py
index 253e13420..cb9a0099b 100644
--- a/ibllib/pipes/ephys_tasks.py
+++ b/ibllib/pipes/ephys_tasks.py
@@ -1,10 +1,8 @@
-import importlib
 import logging
 from pathlib import Path
 import re
 import shutil
 import subprocess
-import sys
 import traceback
 
 import packaging.version
@@ -126,7 +124,7 @@ class EphysCompressNP1(base_tasks.EphysTask):
     priority = 90
     cpu = 2
     io_charge = 100  # this jobs reads raw ap files
-    job_size = 'large'
+    job_size = 'small'
 
     @property
     def signature(self):
@@ -657,7 +655,15 @@ def scratch_folder_run(self):
         For a scratch drive at /mnt/h0 we would have the following temp dir:
         /mnt/h0/iblsorter_1.8.0_CSHL071_2020-10-04_001_probe01/
         """
-        scratch_drive = self.scratch_folder if self.scratch_folder else Path('/scratch')
+        # get the scratch drive from the shell script
+        if self.scratch_folder is None:
+            with open(self.SHELL_SCRIPT) as fid:
+                lines = fid.readlines()
+            line = [line for line in lines if line.startswith("SCRATCH_DRIVE=")][0]
+            m = re.search(r"\=(.*?)(\#|\n)", line)[0]
+            scratch_drive = Path(m[1:-1].strip())
+        else:
+            scratch_drive = self.scratch_folder
         assert scratch_drive.exists(), f"Scratch drive {scratch_drive} not found"
         # get the version of the sorter
         self.version = self._fetch_iblsorter_version(self.SORTER_REPOSITORY)
@@ -729,11 +735,11 @@ def _run_iblsort(self, ap_file):
                     self.FORCE_RERUN = True
         self.scratch_folder_run.mkdir(parents=True, exist_ok=True)
         check_nvidia_driver()
-        # this is the best way I found to check if iblsorter is installed and available without a try block
-        if 'iblsorter' in sys.modules and importlib.util.find_spec('iblsorter.ibl') is not None:
+        try:
+            # if pykilosort is in the environment, use the installed version within the task
             import iblsorter.ibl  # noqa
             iblsorter.ibl.run_spike_sorting_ibl(bin_file=ap_file, scratch_dir=self.scratch_folder_run, delete=False)
-        else:
+        except ImportError:
             command2run = f"{self.SHELL_SCRIPT} {ap_file} {self.scratch_folder_run}"
             _logger.info(command2run)
             process = subprocess.Popen(
diff --git a/ibllib/pipes/local_server.py b/ibllib/pipes/local_server.py
index c02ae11c3..92f1cf39a 100644
--- a/ibllib/pipes/local_server.py
+++ b/ibllib/pipes/local_server.py
@@ -106,7 +106,7 @@ def job_creator(root_path, one=None, dry=False, rerun=False):
     if not one:
         one = ONE(cache_rest=None)
     rc = IBLRegistrationClient(one=one)
-    flag_files = Path(root_path).glob('**/raw_session.flag')
+    flag_files = Path(root_path).glob('*/????-??-??/*/raw_session.flag')
     flag_files = filter(lambda x: is_session_path(x.parent), flag_files)
     pipes = []
     all_datasets = []
diff --git a/ibllib/pipes/video_tasks.py b/ibllib/pipes/video_tasks.py
index 5afe80796..e0ced2695 100644
--- a/ibllib/pipes/video_tasks.py
+++ b/ibllib/pipes/video_tasks.py
@@ -328,7 +328,7 @@ def _run(self, update=True, **kwargs):
 class DLC(base_tasks.VideoTask):
     """
     This task relies on a correctly installed dlc environment as per
-    https://github.com/int-brain-lab/iblvideo#installing-dlc-locally-on-an-ibl-server---tensorflow-2120
+    https://docs.google.com/document/d/1g0scP6_3EmaXCU4SsDNZWwDTaD9MG0es_grLA-d0gh0/edit#
 
     If your environment is set up otherwise, make sure that you set the respective attributes:
     t = EphysDLC(session_path)
@@ -341,7 +341,6 @@ class DLC(base_tasks.VideoTask):
     level = 2
     force = True
     job_size = 'large'
-    env = 'dlc'
 
     dlcenv = Path.home().joinpath('Documents', 'PYTHON', 'envs', 'dlcenv', 'bin', 'activate')
     scripts = Path.home().joinpath('Documents', 'PYTHON', 'iblscripts', 'deploy', 'serverpc', 'dlc')
@@ -358,41 +357,25 @@ def signature(self):
         return signature
 
     def _check_dlcenv(self):
-        """
-        Check DLC environment and return iblvideo version.
-
-        Attempts to import iblvideo directly. If unsuccessful, checks for necessary
-        scripts and environment, then retrieves version via subprocess.
-
-        Returns:
-            tuple: (version: str, needs_subprocess: bool)
-        """
-        try:
-            import iblvideo
-            version = iblvideo.__version__
-            needs_subprocess = False
-            _logger.info(f'Current environment contains iblvideo version {self.version}')
-        except ImportError:
-            # Check that scripts are present, dlcenv can be activated and get iblvideo version
-            assert len(list(self.scripts.rglob('run_dlc.*'))) == 2, \
-                f'Scripts run_dlc.sh and run_dlc.py do not exist in {self.scripts}'
-            assert len(list(self.scripts.rglob('run_motion.*'))) == 2, \
-                f'Scripts run_motion.sh and run_motion.py do not exist in {self.scripts}'
-            assert self.dlcenv.exists(), f'DLC environment does not exist in assumed location {self.dlcenv}'
-            command2run = f"source {self.dlcenv}; python -c 'import iblvideo; print(iblvideo.__version__)'"
-            process = subprocess.Popen(
-                command2run,
-                shell=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                executable='/bin/bash'
-            )
-            info, error = process.communicate()
-            if process.returncode != 0:
-                raise AssertionError(f"DLC environment check failed\n{error.decode('utf-8')}")
-            version = info.decode('utf-8').strip().split('\n')[-1]
-            needs_subprocess = True
-        return version, needs_subprocess
+        """Check that scripts are present, dlcenv can be activated and get iblvideo version"""
+        assert len(list(self.scripts.rglob('run_dlc.*'))) == 2, \
+            f'Scripts run_dlc.sh and run_dlc.py do not exist in {self.scripts}'
+        assert len(list(self.scripts.rglob('run_motion.*'))) == 2, \
+            f'Scripts run_motion.sh and run_motion.py do not exist in {self.scripts}'
+        assert self.dlcenv.exists(), f'DLC environment does not exist in assumed location {self.dlcenv}'
+        command2run = f"source {self.dlcenv}; python -c 'import iblvideo; print(iblvideo.__version__)'"
+        process = subprocess.Popen(
+            command2run,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            executable='/bin/bash'
+        )
+        info, error = process.communicate()
+        if process.returncode != 0:
+            raise AssertionError(f"DLC environment check failed\n{error.decode('utf-8')}")
+        version = info.decode('utf-8').strip().split('\n')[-1]
+        return version
 
     @staticmethod
     def _video_intact(file_mp4):
@@ -403,75 +386,6 @@ def _video_intact(file_mp4):
         cap.release()
         return intact
 
-    def _run_dlc(self, file_mp4, cam, overwrite, flag_subprocess=True):
-        try:
-            if flag_subprocess:
-                _logger.info(f'iblvideo version {self.version}')
-                command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}"
-                _logger.info(command2run)
-                process = subprocess.Popen(
-                    command2run,
-                    shell=True,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    executable='/bin/bash',
-                )
-                info, error = process.communicate()
-                # info_str = info.decode("utf-8").strip()
-                # _logger.info(info_str)
-                if process.returncode != 0:
-                    error_str = error.decode('utf-8').strip()
-                    _logger.error(f'DLC failed for {cam}Camera.\n\n'
-                                  f'++++++++ Output of subprocess for debugging ++++++++\n\n'
-                                  f'{error_str}\n'
-                                  f'++++++++++++++++++++++++++++++++++++++++++++\n')
-                return process.returncode
-                pass
-            else:
-                from iblvideo import download_weights
-                from iblvideo.pose_dlc import dlc
-                path_dlc = download_weights()
-                dlc_result, _ = dlc(file_mp4, path_dlc=path_dlc, force=overwrite)
-                return 0
-        except Exception as e:
-            _logger.error(f'An error occurred while running DLC for {cam}Camera: {e}')
-            _logger.error(traceback.format_exc())
-            return -1
-
-    def _run_motion_energy(self, file_mp4, dlc_result, flag_subprocess=True):
-        if flag_subprocess:
-            command2run = f"{self.scripts.joinpath('run_motion.sh')} {str(self.dlcenv)} {file_mp4} {dlc_result}"
-            _logger.info(command2run)
-            process = subprocess.Popen(
-                command2run,
-                shell=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                executable='/bin/bash',
-            )
-            info, error = process.communicate()
-            # info_str = info.decode('utf-8').strip()
-            # _logger.info(info_str)
-            if process.returncode != 0:
-                error_str = error.decode('utf-8').strip()
-                _logger.error(f'Motion energy failed for {file_mp4}.\n\n'
-                              f'++++++++ Output of subprocess for debugging ++++++++\n\n'
-                              f'{error_str}\n'
-                              f'++++++++++++++++++++++++++++++++++++++++++++\n')
-            return_code = process.returncode
-        else:  # runs the motion energy calculation in the current environment
-            try:
-                from iblvideo.motion_energy import motion_energy
-                _ = motion_energy(file_mp4, dlc_result)
-                return_code = 0
-            except Exception:
-                _logger.error(f'Motion energy failed for {file_mp4}.\n\n'
-                              f'++++++++ Output of subprocess for debugging ++++++++\n\n'
-                              f'{traceback.format_exc()}\n'
-                              f'++++++++++++++++++++++++++++++++++++++++++++\n')
-                return_code = -1
-        return return_code
-
     def _run(self, cams=None, overwrite=False):
         # Check that the cams are valid for DLC, remove the ones that aren't
         candidate_cams = cams or self.cameras
@@ -505,24 +419,55 @@ def _run(self, cams=None, overwrite=False):
                         _logger.error(f'Corrupt raw video file {file_mp4}')
                         self.status = -1
                         continue
-
                     # Check that dlc environment is ok, shell scripts exists, and get iblvideo version, GPU addressable
+                    self.version = self._check_dlcenv()
+                    _logger.info(f'iblvideo version {self.version}')
                     check_nvidia_driver()
-                    self.version, flag_subprocess = self._check_dlcenv()
 
-                    # Step 1: Run DLC for this camera
                     _logger.info(f'Running DLC on {cam}Camera.')
-                    return_code = self._run_dlc(file_mp4, cam, overwrite, flag_subprocess=flag_subprocess)
-                    if return_code != 0:
+                    command2run = f"{self.scripts.joinpath('run_dlc.sh')} {str(self.dlcenv)} {file_mp4} {overwrite}"
+                    _logger.info(command2run)
+                    process = subprocess.Popen(
+                        command2run,
+                        shell=True,
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE,
+                        executable='/bin/bash',
+                    )
+                    info, error = process.communicate()
+                    # info_str = info.decode("utf-8").strip()
+                    # _logger.info(info_str)
+                    if process.returncode != 0:
+                        error_str = error.decode('utf-8').strip()
+                        _logger.error(f'DLC failed for {cam}Camera.\n\n'
+                                      f'++++++++ Output of subprocess for debugging ++++++++\n\n'
+                                      f'{error_str}\n'
+                                      f'++++++++++++++++++++++++++++++++++++++++++++\n')
                         self.status = -1
+                        # We dont' run motion energy, or add any files if dlc failed to run
                         continue
                     dlc_result = next(self.session_path.joinpath('alf').glob(f'_ibl_{cam}Camera.dlc*.pqt'))
                     actual_outputs.append(dlc_result)
 
-                    # Step 2: Compute Motion Energy for this camera
                     _logger.info(f'Computing motion energy for {cam}Camera')
-                    return_code = self._run_motion_energy(file_mp4, dlc_result, flag_subprocess=flag_subprocess)
-                    if return_code != 0:
+                    command2run = f"{self.scripts.joinpath('run_motion.sh')} {str(self.dlcenv)} {file_mp4} {dlc_result}"
+                    _logger.info(command2run)
+                    process = subprocess.Popen(
+                        command2run,
+                        shell=True,
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE,
+                        executable='/bin/bash',
+                    )
+                    info, error = process.communicate()
+                    # info_str = info.decode('utf-8').strip()
+                    # _logger.info(info_str)
+                    if process.returncode != 0:
+                        error_str = error.decode('utf-8').strip()
+                        _logger.error(f'Motion energy failed for {cam}Camera.\n\n'
+                                      f'++++++++ Output of subprocess for debugging ++++++++\n\n'
+                                      f'{error_str}\n'
+                                      f'++++++++++++++++++++++++++++++++++++++++++++\n')
                         self.status = -1
                         continue
                     actual_outputs.append(next(self.session_path.joinpath('alf').glob(

From fee6bc6e737e1b42e44dc86cd011c3656e96e173 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 24 Sep 2025 18:22:18 +0100
Subject: [PATCH 65/80] passive task fixtures moved from iblrig to ibllib

---
 ibllib/pipes/neurophotometrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 0b19ce226..abcc61f84 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -13,8 +13,8 @@
 from nptdms import TdmsFile
 
 from abc import abstractmethod
+import iblphotometry
 from iblphotometry import fpio
-from iblrig_tasks import _iblrig_tasks_passiveChoiceWorld
 
 from one.api import ONE
 import json
@@ -531,7 +531,7 @@ def __init__(
     def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # load the fixtures - from the relative delays between trials, an "absolute" time vector is
         # created that is used for the synchronization
-        fixtures_path = Path(_iblrig_tasks_passiveChoiceWorld.__file__).parent / 'passiveChoiceWorld_trials_fixtures.pqt'
+        fixtures_path = Path(iblphotometry.__file__).parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt'
 
         # getting the task_settings
         with open(self.session_path / self.collection / '_iblrig_taskSettings.raw.json', 'r') as fH:

From af83b9e7c578de67867bb28062fc68eb2c6e28b8 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 24 Sep 2025 18:36:58 +0100
Subject: [PATCH 66/80] flake8

---
 ibllib/pipes/neurophotometrics.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index abcc61f84..8c317ae03 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -531,7 +531,9 @@ def __init__(
     def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # load the fixtures - from the relative delays between trials, an "absolute" time vector is
         # created that is used for the synchronization
-        fixtures_path = Path(iblphotometry.__file__).parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt'
+        fixtures_path = (
+            Path(iblphotometry.__file__).parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt'
+        )
 
         # getting the task_settings
         with open(self.session_path / self.collection / '_iblrig_taskSettings.raw.json', 'r') as fH:

From 6b6cccfe6e1659dc5b6a225b20820da66707e90e Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Wed, 24 Sep 2025 18:44:53 +0100
Subject: [PATCH 67/80] undo changes in requirements.txt

---
 requirements.txt | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index c7a5726dc..6204e1184 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,35 +1,24 @@
-# ibl libraries
-ONE-api>=3.0.0
 boto3
 click>=7.0.0
 colorlog>=4.0.2
 flake8>=3.7.8
 globus-sdk
 graphviz
-ibl-neuropixel>=1.7.1
-ibl-style
-iblatlas>=0.5.3
-iblqt>=0.4.2
-iblutil>=1.13.0
-imagecodecs  # used to convert tif snapshots to png when registering mesoscope snapshots (also requires skimage)
 matplotlib>=3.0.3
-mtscomp>=1.0.1
-nptdms
 numba>=0.56
 numpy>=1.18,<=2.2  # numpy 2.3 is not compatible with numba - ETA end of June 2025
 nptdms
 opencv-python-headless
 pandas
-phylib>=2.6.0
-psychofit
 pyarrow
 pynrrd>=0.4.0
-pyqt5
 pytest
 requests>=2.22.0
-scikit-image  # this is a widefield requirement missing as of July 2023, we may remove it once wfield has this figured out
 scikit-learn>=0.22.1
 scipy>=1.7.0
+scikit-image  # this is a widefield requirement missing as of July 2023, we may remove it once wfield has this figured out
+imagecodecs  # used to convert tif snapshots to png when registering mesoscope snapshots (also requires skimage)
+sparse
 seaborn>=0.9.0
 tqdm>=4.32.1
 # ibl libraries
@@ -42,6 +31,6 @@ ONE-api>=3.2.0
 phylib>=2.6.0
 psychofit
 slidingRP>=1.1.1  # steinmetz lab refractory period metrics
-sparse
-tqdm>=4.32.1
-ibl-photometry
+pyqt5
+ibl-style
+iblphotometry>=0.1.2
\ No newline at end of file

From 0315630f5d4076302e79fe1347eaa6a32e06829c Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Thu, 25 Sep 2025 14:56:41 +0100
Subject: [PATCH 68/80] removed duplicate tdms reader and unified timestamps
 and ttl duration extraction

---
 ibllib/pipes/neurophotometrics.py | 108 ++++++------------------------
 1 file changed, 19 insertions(+), 89 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 8c317ae03..64b44887d 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -58,10 +58,9 @@ def extract_timestamps_from_tdms_file(
     tdms_filepath: Path,
     save_path: Optional[Path] = None,
     chunk_size=10000,
-    extract_durations: bool = False,
 ) -> dict:
     """extractor for tdms files as written by the daqami software, configured for neurophotometrics
-    experiments: Frameclock is in AI7, DI1-4 are the bpod sync signals
+    experiments: Frameclock is in an analog channel (AI?), DI1-4 are the bpod sync signals
 
     Parameters
     ----------
@@ -75,7 +74,8 @@ def extract_timestamps_from_tdms_file(
     Returns
     -------
     dict
-        a dict with the tdms channel names as keys and the timestamps of the rising fronts
+        a dict with the tdms channel names as keys and 'positive' the timestamps of the rising edges
+        'negative' the falling edges
     """
     #
     _logger.info(f'extracting timestamps from tdms file: {tdms_filepath}')
@@ -100,72 +100,6 @@ def extract_timestamps_from_tdms_file(
     vals = df[digital_col].values.astype('int8')
     digital_channel_names = ['DI0', 'DI1', 'DI2', 'DI3']
 
-    # ini
-    timestamps = {}
-    for ch in digital_channel_names:
-        timestamps[ch] = []
-
-    # chunked loop for memory efficiency
-    if chunk_size is not None:
-        n_chunks = df.shape[0] // chunk_size
-        for i in range(n_chunks):
-            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
-            # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
-            data = _int2digital_channels(vals_)
-
-            for j, name in enumerate(digital_channel_names):
-                ix = np.where(np.diff(data[:, j]) == 1)[0] + (chunk_size * i)
-                timestamps[name].append(ix / fs)
-
-        for ch in digital_channel_names:
-            timestamps[ch] = np.concatenate(timestamps[ch])
-    else:
-        data = _int2digital_channels(vals)
-        for j, name in enumerate(digital_channel_names):
-            ix = np.where(np.diff(data[:, j]) == 1)[0]
-            timestamps[name].append(ix / fs)
-
-    if has_analog_group:
-        # frameclock data is recorded on an analog channel
-        for channel in analog_group.channels():
-            signal = (channel.data > 2.5).astype('int32')  # assumes 0-5V
-            timestamps[channel.name] = np.where(np.diff(signal) == 1)[0] / fs
-
-    if save_path is not None:
-        _logger.info(f'saving extracted timestamps to: {save_path}')
-        with open(save_path, 'wb') as fH:
-            pickle.dump(timestamps, fH)
-
-    return timestamps
-
-
-def extract_ttl_durations_from_tdms_file(
-    tdms_filepath: Path,
-    save_path: Optional[Path] = None,
-    chunk_size=10000,
-) -> dict:
-    _logger.info(f'extracting ttl_durations from tdms file: {tdms_filepath}')
-
-    # this should be 10kHz
-    tdms_file = TdmsFile.read(tdms_filepath)
-    groups = tdms_file.groups()
-
-    # this unfortunate hack is in here because there are a bunch of sessions
-    # where the frameclock is on DI0
-    if len(groups) == 1:
-        has_analog_group = False
-        (digital_group,) = groups
-    if len(groups) == 2:
-        has_analog_group = True
-        analog_group, digital_group = groups
-    fs = digital_group.properties['ScanRate']  # this should be 10kHz
-    df = tdms_file.as_dataframe()
-
-    # inferring digital col name
-    (digital_col,) = [col for col in df.columns if 'Digital' in col]
-    vals = df[digital_col].values.astype('int8')
-    digital_channel_names = ['DI0', 'DI1', 'DI2', 'DI3']
-
     # ini
     timestamps = {}
     for ch in digital_channel_names:
@@ -175,7 +109,7 @@ def extract_ttl_durations_from_tdms_file(
     if chunk_size is not None:
         n_chunks = df.shape[0] // chunk_size
         for i in range(n_chunks):
-            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
+            vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
             # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
             data = _int2digital_channels(vals_)
 
@@ -193,7 +127,7 @@ def extract_ttl_durations_from_tdms_file(
         for j, name in enumerate(digital_channel_names):
             ix = np.where(np.diff(data[:, j]) == 1)[0]
             timestamps[name]['positive'].append(ix / fs)
-            ix = np.where(np.diff(data[:, j]) == -1)[0]
+            ix = np.where(np.diff(data[:, j]) == 1)[0]
             timestamps[name]['negative'].append(ix / fs)
 
     if has_analog_group:
@@ -204,17 +138,12 @@ def extract_ttl_durations_from_tdms_file(
             timestamps[channel.name]['positive'] = np.where(np.diff(signal) == 1)[0] / fs
             timestamps[channel.name]['negative'] = np.where(np.diff(signal) == -1)[0] / fs
 
-    # the actual diff
-    durations = {}
-    for channel in timestamps.keys():
-        durations[channel] = timestamps[channel]['negative'] - timestamps[channel]['positive']
-
     if save_path is not None:
-        _logger.info(f'saving extracted ttl durations to: {save_path}')
+        _logger.info(f'saving extracted timestamps to: {save_path}')
         with open(save_path, 'wb') as fH:
-            pickle.dump(durations, fH)
+            pickle.dump(timestamps, fH)
 
-    return durations
+    return timestamps
 
 
 def extract_timestamps_from_bpod_jsonable(file_jsonable: str | Path, sync_states_names: List[str]):
@@ -453,7 +382,7 @@ def load_data(self) -> pd.DataFrame:
             sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}'
         else:
             sync_channel_name = self.sync_kwargs['frameclock_channel']
-        frame_timestamps = self.timestamps[sync_channel_name]
+        frame_timestamps = self.timestamps[sync_channel_name]['positive']
 
         # compare number of frame timestamps
         # and put them in the photometry_df SystemTimestamp column
@@ -504,7 +433,7 @@ def load_data(self) -> pd.DataFrame:
 
     def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # get the sync channel and the corresponding timestamps
-        timestamps_nph = self.timestamps[f'DI{self.sync_channel}']
+        timestamps_nph = self.timestamps[f'DI{self.sync_channel}']['positive']
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods
@@ -532,7 +461,7 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # load the fixtures - from the relative delays between trials, an "absolute" time vector is
         # created that is used for the synchronization
         fixtures_path = (
-            Path(iblphotometry.__file__).parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt'
+            Path(iblphotometry.__file__).parent.parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt'
         )
 
         # getting the task_settings
@@ -616,7 +545,7 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df):
             self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath)
 
         sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel']
-        valve_times_nph = self.timestamps[f'DI{sync_channel}']
+        valve_times_nph = self.timestamps[f'DI{sync_channel}']['positive']
 
         sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
             valve_times_nph, valve_times_bpod, return_indices=True, linear=True
@@ -654,15 +583,16 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df):
 
         # writing the passive events table
         # get the valve open duration
-        ttl_durations_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdurations.pkl'
-        if self.load_timestamps and ttl_durations_filepath.exists():
-            with open(ttl_durations_filepath, 'rb') as fH:
-                ttl_durations = pickle.load(fH)
+        timestamps_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.pkl'
+        if self.load_timestamps and timestamps_filepath.exists():
+            with open(timestamps_filepath, 'rb') as fH:
+                self.timestamps = pickle.load(fH)
         else:  # extract timestamps:
             tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms'
-            ttl_durations = extract_ttl_durations_from_tdms_file(tdms_filepath, save_path=ttl_durations_filepath)
+            self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath)
 
-        valve_open_dur = np.median(ttl_durations[f'DI{sync_channel}'][ix_nph])
+        ttl_durations = self.timestamps[f'DI{sync_channel}']['negative'] - self.timestamps[f'DI{sync_channel}']['positive']
+        valve_open_dur = np.median(ttl_durations[ix_nph])
         passiveStims_df = pd.DataFrame(
             dict(
                 valveOn=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'],

From 613743029555f8f9ffc59c3756b78bf9b9266405 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 26 Sep 2025 14:41:52 +0100
Subject: [PATCH 69/80] typo fix

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 6204e1184..b907c2ad7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,4 +33,4 @@ psychofit
 slidingRP>=1.1.1  # steinmetz lab refractory period metrics
 pyqt5
 ibl-style
-iblphotometry>=0.1.2
\ No newline at end of file
+ibl-photometry>=0.1.2
\ No newline at end of file

From 996fcbeee35731fee3d65caadd9d60a1356f9ebe Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 26 Sep 2025 14:48:05 +0100
Subject: [PATCH 70/80] flake8

---
 ibllib/pipes/neurophotometrics.py      |  5 ++--
 ibllib/tests/test_neurophotometrics.py | 32 ++++++++++++++------------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 64b44887d..7c30b6a2d 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -109,7 +109,7 @@ def extract_timestamps_from_tdms_file(
     if chunk_size is not None:
         n_chunks = df.shape[0] // chunk_size
         for i in range(n_chunks):
-            vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
+            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
             # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
             data = _int2digital_channels(vals_)
 
@@ -461,7 +461,8 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # load the fixtures - from the relative delays between trials, an "absolute" time vector is
         # created that is used for the synchronization
         fixtures_path = (
-            Path(iblphotometry.__file__).parent.parent / 'iblphotometry_tests' / 'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt'
+            Path(iblphotometry.__file__).parent.parent / 'iblphotometry_tests' /
+            'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt'
         )
 
         # getting the task_settings
diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py
index fcad9d379..923946509 100644
--- a/ibllib/tests/test_neurophotometrics.py
+++ b/ibllib/tests/test_neurophotometrics.py
@@ -1,17 +1,17 @@
 """Tests for ibllib.pipes.mesoscope_tasks."""
 
-import sys
 import unittest
-from unittest import mock
 import tempfile
 from pathlib import Path
-
-
+import iblphotometry_tests
+from ibllib.pipes.neurophotometrics import FibrePhotometryBpodSync
 from ibllib.io import session_params
 
 # Mock suit2p which is imported in MesoscopePreprocess
-attrs = {'default_ops.return_value': {}}
-sys.modules['suite2p'] = mock.MagicMock(**attrs)
+# attrs = {'default_ops.return_value': {}}
+# sys.modules['suite2p'] = mock.MagicMock(**attrs)
+
+# from iblscripts.ci.tests import base
 
 
 class TestNeurophotometricsExtractor(unittest.TestCase):
@@ -24,14 +24,16 @@ class TestNeurophotometricsExtractor(unittest.TestCase):
 
     def setUp(self) -> None:
         self.tmp_folder = tempfile.TemporaryDirectory()
-        self.session_folder = Path(self.tmp_folder.name) / 'subject' / '2020-01-01' / '001'
-        self.raw_photometry_folder = self.session_folder / 'raw_photometry_data'
-        self.raw_photometry_folder.mkdir(parents=True)
+        # self.session_folder = Path(self.tmp_folder.name) / 'subject' / '2020-01-01' / '001'
+        # self.raw_photometry_folder = self.session_folder / 'raw_photometry_data'
+        # self.raw_photometry_folder.mkdir(parents=True)
 
     def test_bpod_extractor(self):
-        path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml'
-        self.experiment_description = session_params.read_params(path)
-
-    def test_daqami_extractor(self):
-        path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml'
-        self.experiment_description = session_params.read_params(path)
+        session_folder = Path(iblphotometry_tests.__file__).parent / 'data' / 'neurophotometrics' / 'raw_bpod_session'
+        assert session_folder.exists()
+        self.experiment_description = session_params.read_params(session_folder)
+        FibrePhotometryBpodSync()
+
+    # def test_daqami_extractor(self):
+        # path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml'
+        # self.experiment_description = session_params.read_params(path)

From 1436e7c73e1fabde3a0ae9029172e8c156021540 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 29 Sep 2025 11:24:54 +0100
Subject: [PATCH 71/80] kwarg added to skip assertion for matching number of
 timestamps (for kcenias extraction)

---
 ibllib/pipes/neurophotometrics.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 7c30b6a2d..bbacc1bca 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -176,6 +176,7 @@ def __init__(
         one: ONE,
         task_protocol: str | None = None,
         task_collection: str | None = None,
+        assert_matching_timestamps: bool = True,
         **kwargs,
     ):
         super().__init__(session_path, one=one, **kwargs)
@@ -183,6 +184,7 @@ def __init__(
         self.kwargs = kwargs
         self.task_protocol = task_protocol
         self.task_collection = task_collection
+        self.assert_matching_timestamps = assert_matching_timestamps
 
         if self.task_protocol is None:
             # we will work with the first protocol here
@@ -236,7 +238,11 @@ def _get_sync_function(self) -> Tuple[callable, list]:
             _logger.info(f'synced with drift: {drift_ppm}')
 
         # assertion: 95% of timestamps in bpod need to be in timestamps of nph (but not the other way around)
-        assert timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0], 'less than 95% of bpod timestamps matched'
+        if self.assert_matching_timestamps:
+            assert timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0], 'less than 95% of bpod timestamps matched'
+        else:
+            if not (timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0]):
+                _logger.warning(f'less than 95% of bpod timestamps matched. n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}')
 
         valid_bounds = self._get_valid_bounds()
         return sync_nph_to_bpod_fcn, valid_bounds

From d5996ce3ff14a62a5cfae07690162f6a48b08c7c Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 29 Sep 2025 13:43:37 +0100
Subject: [PATCH 72/80] sync config cleaned up and with the option to overwrite

---
 ibllib/pipes/neurophotometrics.py | 48 ++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index bbacc1bca..ccbfac5a5 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -109,7 +109,7 @@ def extract_timestamps_from_tdms_file(
     if chunk_size is not None:
         n_chunks = df.shape[0] // chunk_size
         for i in range(n_chunks):
-            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
+            vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
             # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
             data = _int2digital_channels(vals_)
 
@@ -177,6 +177,8 @@ def __init__(
         task_protocol: str | None = None,
         task_collection: str | None = None,
         assert_matching_timestamps: bool = True,
+        sync_states_names: list[str] | None = None,
+        sync_channel: int | str | None = None,  # if set, overwrites the value extracted from the experiment_description
         **kwargs,
     ):
         super().__init__(session_path, one=one, **kwargs)
@@ -196,15 +198,26 @@ def __init__(
             # if not provided, infer
             self.task_collection = ibllib.io.session_params.get_task_collection(self.session_params, self.task_protocol)
 
+        # configuring the sync: state names
+        if sync_states_names is None:
+            if 'habituation' in self.task_protocol:
+                self.sync_states_names = ['iti', 'reward']
+            else:
+                self.sync_states_names = ['trial_start', 'reward', 'exit_state']
+        else:
+            self.sync_states_names = sync_states_names
+
+        # configuring the sync: channel
+        if sync_channel is None:
+            self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel'])
+        else:
+            self.sync_channel = sync_channel
+
     def _get_bpod_timestamps(self) -> np.ndarray:
         # the timestamps for syncing, in the time of the bpod
-        if 'habituation' in self.task_protocol:
-            sync_states_names = ['iti', 'reward']
-        else:
-            sync_states_names = ['trial_start', 'reward', 'exit_state']
 
         file_jsonable = self.session_path.joinpath(self.task_collection, '_iblrig_taskData.raw.jsonable')
-        timestamps_bpod = extract_timestamps_from_bpod_jsonable(file_jsonable, sync_states_names)
+        timestamps_bpod = extract_timestamps_from_bpod_jsonable(file_jsonable, self.sync_states_names)
         return timestamps_bpod
 
     def _get_valid_bounds(self):
@@ -242,7 +255,9 @@ def _get_sync_function(self) -> Tuple[callable, list]:
             assert timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0], 'less than 95% of bpod timestamps matched'
         else:
             if not (timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0]):
-                _logger.warning(f'less than 95% of bpod timestamps matched. n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}')
+                _logger.warning(
+                    f'less than 95% of bpod timestamps matched. n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}'
+                )
 
         valid_bounds = self._get_valid_bounds()
         return sync_nph_to_bpod_fcn, valid_bounds
@@ -301,11 +316,9 @@ class FibrePhotometryBpodSync(FibrePhotometryBaseSync):
     def __init__(
         self,
         *args,
-        digital_inputs_channel: int | None = None,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
-        self.digital_inputs_channel = digital_inputs_channel
 
     @property
     def signature(self):
@@ -327,11 +340,10 @@ def _get_neurophotometrics_timestamps(self) -> np.ndarray:
         # for bpod based syncing, the timestamps for syncing are in the digital inputs file
         raw_photometry_folder = self.session_path / self.photometry_collection
         digital_inputs_filepath = raw_photometry_folder / '_neurophotometrics_fpData.digitalInputs.pqt'
-        digital_inputs_df = fpio.read_digital_inputs_file(
-            digital_inputs_filepath, channel=self.session_params['devices']['neurophotometrics']['sync_channel']
-        )
-        sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel']
-        timestamps_nph = digital_inputs_df.groupby('channel').get_group(sync_channel)['times'].values
+        digital_inputs_df = fpio.read_digital_inputs_file(digital_inputs_filepath, channel=self.sync_channel)
+
+        # get the positive fronts
+        timestamps_nph = digital_inputs_df.groupby(['polarity', 'channel']).get_group((1, self.sync_channel))['times'].values
 
         # TODO replace this rudimentary spacer removal
         # to implement: detect spacer / remove spacer methods
@@ -346,7 +358,7 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
     def __init__(self, *args, load_timestamps: bool = True, **kwargs):
         super().__init__(*args, **kwargs)
         self.sync_kwargs = kwargs.get('sync_metadata', self.session_params['sync'])
-        self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel'])
+        # self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel'])
         self.load_timestamps = load_timestamps
 
     @property
@@ -467,8 +479,10 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         # load the fixtures - from the relative delays between trials, an "absolute" time vector is
         # created that is used for the synchronization
         fixtures_path = (
-            Path(iblphotometry.__file__).parent.parent / 'iblphotometry_tests' /
-            'fixtures' / 'passiveChoiceWorld_trials_fixtures.pqt'
+            Path(iblphotometry.__file__).parent.parent
+            / 'iblphotometry_tests'
+            / 'fixtures'
+            / 'passiveChoiceWorld_trials_fixtures.pqt'
         )
 
         # getting the task_settings

From 1f86801d8ea28e03d303ca20e47560a63b4b69f4 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 29 Sep 2025 17:38:00 +0100
Subject: [PATCH 73/80] bugfix: attempted daq sync for passive sessions in
 either case

---
 ibllib/pipes/dynamic_pipeline.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index 2f3acd44a..b6c4338e7 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -613,9 +613,10 @@ def make_pipeline(session_path, **pkwargs):
             case 'daqami':
                 # for synchronization with the DAQami receiving the sync pulses from the individual bpods
                 # as well as the frame clock from the FP3002
-                tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})(
-                    **kwargs,
-                )
+                if 'passive' not in protocol: # excluding passive session
+                    tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})(
+                        **kwargs,
+                    )
 
     p = mtasks.Pipeline(session_path=session_path, **pkwargs)
     p.tasks = tasks

From a375ef526254cead0efa8f82fe1dbf9b436f4d91 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Thu, 2 Oct 2025 13:33:52 +0100
Subject: [PATCH 74/80] passive extractor bugfix

---
 ibllib/pipes/neurophotometrics.py | 122 ++++++++++++++++++++----------
 1 file changed, 81 insertions(+), 41 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index ccbfac5a5..8d51473d5 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -489,9 +489,14 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         with open(self.session_path / self.collection / '_iblrig_taskSettings.raw.json', 'r') as fH:
             task_settings = json.load(fH)
 
-        # getting the fixtures and creating a relative time vector
+        # getting the fixtures
         fixtures_df = pd.read_parquet(fixtures_path).groupby('session_id').get_group(task_settings['SESSION_TEMPLATE_ID'])
 
+        # the fixtures table contains delays between the individual stimuli
+        # in order to get their onset times, we need to do an adjusted cumsum of the intervals
+        # adjusted by: the length of each stimulus, plus the overhead time to load it and play it
+        # e.g. state machine time, bonsai delay etc.
+
         # stimulus durations
         stim_durations = dict(
             T=task_settings['GO_TONE_DURATION'],
@@ -502,11 +507,13 @@ def _run(self, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
         for s in fixtures_df['stim_type'].unique():
             fixtures_df.loc[fixtures_df['stim_type'] == s, 'delay'] = stim_durations[s]
 
-        # the audio go cue times
-        mic_go_cue_times_bpod = np.load(self.session_path / self.collection / '_iblmic_audioOnsetGoCue.times_mic.npy')
+        # the audio go cue times - recorded in the time of the mic clock
+        # this is assumed to be precise so we can use it to fit the unknown overhead
+        # time for each stim class
+        go_cue_times_mic = np.load(self.session_path / self.collection / '_iblmic_audioOnsetGoCue.times_mic.npy')
 
         # adding the delays
-        def obj_fun(x, mic_go_cue_times_bpod, fixtures_df):
+        def obj_fun(x, go_cue_times_mic, fixtures_df):
             # fit overhead
             for s in ['T', 'N', 'G', 'V']:
                 if s == 'T' or s == 'N':
@@ -520,41 +527,25 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df):
                 fixtures_df['stim_delay'].values + np.roll(fixtures_df['delay'].values, 1) + fixtures_df['overhead'].values,
             )
 
-            mic_go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values
-            err = np.sum((np.diff(mic_go_cue_times_rel) - np.diff(mic_go_cue_times_bpod)) ** 2)
+            go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values
+            err = np.sum((np.diff(go_cue_times_rel) - np.diff(go_cue_times_mic)) ** 2)
             return err
 
         # fitting the overheads
         fixtures_df['overhead'] = 0.0
         bounds = ((0, np.inf), (0, np.inf), (0, np.inf))
-        pfit = minimize(obj_fun, (0.0, 0.0, 0.0), args=(mic_go_cue_times_bpod, fixtures_df), bounds=bounds)
+        pfit = minimize(obj_fun, (0.0, 0.0, 0.0), args=(go_cue_times_mic, fixtures_df), bounds=bounds)
         overheads = dict(zip(['T', 'N', 'G', 'V'], [pfit.x[0], pfit.x[0], pfit.x[1], pfit.x[2]]))
 
+        # creating the relative time vector for each stimulus
         for s in fixtures_df['stim_type'].unique():
             fixtures_df.loc[fixtures_df['stim_type'] == s, 'overhead'] = overheads[s]
         fixtures_df['t_rel'] = np.cumsum(
             fixtures_df['stim_delay'].values + np.roll(fixtures_df['delay'].values, 1) + fixtures_df['overhead'].values
         )
 
-        mic_go_cue_times_rel = fixtures_df.groupby('stim_type').get_group('T')['t_rel'].values
-
-        sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-            mic_go_cue_times_rel, mic_go_cue_times_bpod, return_indices=True, linear=True
-        )
-
-        assert ix_nph.shape[0] == 40, 'not all microphone onset events are accepted by the sync function'
-        if np.absolute(drift_ppm) > 20:
-            _logger.warning(f'sync with excessive drift: {drift_ppm}')
-        else:
-            _logger.info(f'synced with drift: {drift_ppm}')
-
-        # applying the sync to all the timestamps in the fixtures
-        fixtures_df['t_bpod'] = sync_fun(fixtures_df['t_rel'])
-
-        # dealing with the valve
-        # valve_times_rel = fixtures_df.groupby('stim_type').get_group('V')['t_rel'].values
-        # valve_times_bpod = sync_fun(valve_times_rel)
-        valve_times_bpod = fixtures_df.groupby('stim_type').get_group('V')['t_bpod'].values
+        # we now sync the valve times from the relative time and the neurophotometrics time
+        valve_times_rel = fixtures_df.groupby('stim_type').get_group('V')['t_rel'].values
 
         # getting the valve timestamps from the DAQ
         timestamps_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.pkl'
@@ -566,12 +557,12 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df):
             self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath)
 
         sync_channel = self.session_params['devices']['neurophotometrics']['sync_channel']
-        valve_times_nph = self.timestamps[f'DI{sync_channel}']['positive']
+        valve_times_daq = self.timestamps[f'DI{sync_channel}']['positive']
 
-        sync_fun, drift_ppm, ix_nph, ix_bpod = ibldsp.utils.sync_timestamps(
-            valve_times_nph, valve_times_bpod, return_indices=True, linear=True
+        sync_fun_rel_to_daq, drift_ppm, ix_rel, ix_daq = ibldsp.utils.sync_timestamps(
+            valve_times_rel, valve_times_daq, return_indices=True, linear=True
         )
-        assert ix_bpod.shape[0] == 40, 'not all bpod valve onset events are accepted by the sync function'
+        assert ix_rel.shape[0] == 40, 'not all bpod valve onset events are accepted by the sync function'
         if np.absolute(drift_ppm) > 20:
             _logger.warning(f'sync with excessive drift: {drift_ppm}')
         else:
@@ -583,12 +574,59 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df):
             raw_photometry_folder / '_neurophotometrics_fpData.raw.pqt',
             drop_first=False,
         )
-        # apply synchronization
-        photometry_df['times'] = sync_fun(photometry_df['times'])
-        # verify that all are valid (i.e. mean nothing ... )
+
+        # load the photometry data and replace the timestamp column
+        # with the values from the frameclock timestamps as recorded by the DAQ
+        frameclock_channel = self.session_params['devices']['neurophotometrics']['sync_metadata']['frameclock_channel']
+        frame_timestamps = self.timestamps[frameclock_channel]['positive']
+
+        # compare number of frame timestamps
+        # and put them in the photometry_df SystemTimestamp column
+        # based on the different scenarios
+        frame_times_adjusted = False  # for debugging reasons
+
+        # they are the same, all is well
+        if photometry_df.shape[0] == frame_timestamps.shape[0]:
+            photometry_df['times'] = frame_timestamps
+            _logger.info(f'timestamps are of equal size {photometry_df.shape[0]}')
+            frame_times_adjusted = True
+
+        # there are more timestamps recorded by DAQ than
+        # frames recorded by bonsai
+        elif photometry_df.shape[0] < frame_timestamps.shape[0]:
+            _logger.info(f'# bonsai frames: {photometry_df.shape[0]}, # daq timestamps: {frame_timestamps.shape[0]}')
+            # there is exactly one more timestamp recorded by the daq
+            # (probably bonsai drops the last incomplete frame)
+            if photometry_df.shape[0] == frame_timestamps.shape[0] - 1:
+                photometry_df['times'] = frame_timestamps[:-1]
+            # there are two more frames recorded by the DAQ than by
+            # bonsai - this is observed. TODO understand when this happens
+            elif photometry_df.shape[0] == frame_timestamps.shape[0] - 2:
+                photometry_df['times'] = frame_timestamps[:-2]
+            # there are more frames recorded by the DAQ than that
+            # this indicates and issue -
+            elif photometry_df.shape[0] < frame_timestamps.shape[0] - 2:
+                raise ValueError('more timestamps for frames recorded by the daqami than frames were recorded by bonsai.')
+            frame_times_adjusted = True
+
+        # there are more frames recorded by bonsai than by the DAQ
+        # this happens when the user stops the daqami recording before stopping the bonsai
+        # or when daqami crashes
+        elif photometry_df.shape[0] > frame_timestamps.shape[0]:
+            # we drop all excess frames
+            _logger.warning(
+                f'#frames bonsai: {photometry_df.shape[0]} > #frames daqami {frame_timestamps.shape[0]}, dropping excess'
+            )
+            n_frames_daqami = frame_timestamps.shape[0]
+            photometry_df = photometry_df.iloc[:n_frames_daqami]
+            photometry_df.loc[:, 'SystemTimestamp'] = frame_timestamps
+            frame_times_adjusted = True
+
+        if not frame_times_adjusted:
+            raise ValueError('timestamp issue that hasnt been caught')
 
         # write to disk
-        # the synced photometry signal
+        # the photometry signal
         photometry_filepath = self.session_path / 'alf' / 'photometry' / 'photometry.signal.pqt'
         photometry_filepath.parent.mkdir(parents=True, exist_ok=True)
         photometry_df.to_parquet(photometry_filepath)
@@ -613,17 +651,19 @@ def obj_fun(x, mic_go_cue_times_bpod, fixtures_df):
             self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath)
 
         ttl_durations = self.timestamps[f'DI{sync_channel}']['negative'] - self.timestamps[f'DI{sync_channel}']['positive']
-        valve_open_dur = np.median(ttl_durations[ix_nph])
+        valve_open_dur = np.median(ttl_durations[ix_daq])
         passiveStims_df = pd.DataFrame(
             dict(
-                valveOn=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'],
-                valveOff=fixtures_df.groupby('stim_type').get_group('V')['t_bpod'] + valve_open_dur,
-                toneOn=fixtures_df.groupby('stim_type').get_group('T')['t_bpod'],
-                toneOff=fixtures_df.groupby('stim_type').get_group('T')['t_bpod'] + task_settings['GO_TONE_DURATION'],
-                noiseOn=fixtures_df.groupby('stim_type').get_group('N')['t_bpod'],
-                noiseOff=fixtures_df.groupby('stim_type').get_group('N')['t_bpod'] + task_settings['WHITE_NOISE_DURATION'],
+                valveOn=fixtures_df.groupby('stim_type').get_group('V')['t_rel'],
+                valveOff=fixtures_df.groupby('stim_type').get_group('V')['t_rel'] + valve_open_dur,
+                toneOn=fixtures_df.groupby('stim_type').get_group('T')['t_rel'],
+                toneOff=fixtures_df.groupby('stim_type').get_group('T')['t_rel'] + task_settings['GO_TONE_DURATION'],
+                noiseOn=fixtures_df.groupby('stim_type').get_group('N')['t_rel'],
+                noiseOff=fixtures_df.groupby('stim_type').get_group('N')['t_rel'] + task_settings['WHITE_NOISE_DURATION'],
             )
         )
+        # convert all times from fixture time (=rel) to daq time
+        passiveStims_df.iloc[:, :] = sync_fun_rel_to_daq(passiveStims_df.values)
         passiveStims_filepath = self.session_path / 'alf' / self.collection / '_ibl_passiveStims.table.pqt'
         passiveStims_filepath.parent.mkdir(exist_ok=True, parents=True)
         passiveStims_df.reset_index().to_parquet(passiveStims_filepath)

From 0b93de7569c8686ccd01d2ba18eb3ac8f5d33b94 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Fri, 3 Oct 2025 09:44:52 +0100
Subject: [PATCH 75/80] bugfix frameclock channel

---
 ibllib/pipes/neurophotometrics.py | 40 +++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index 8d51473d5..ca4c76d3a 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -355,10 +355,30 @@ class FibrePhotometryDAQSync(FibrePhotometryBaseSync):
     priority = 90
     job_size = 'small'
 
-    def __init__(self, *args, load_timestamps: bool = True, **kwargs):
+    def __init__(
+        self,
+        *args,
+        load_timestamps: bool = True,
+        sync_channel: int | None = None,
+        frameclock_channel: int | None = None,
+        **kwargs,
+    ):
         super().__init__(*args, **kwargs)
-        self.sync_kwargs = kwargs.get('sync_metadata', self.session_params['sync'])
-        # self.sync_channel = kwargs.get('sync_channel', self.session_params['devices']['neurophotometrics']['sync_channel'])
+        # setting up sync properties
+        frameclock_channel = (
+            frameclock_channel or self.session_params['devices']['neurophotometrics']['sync_metadata']['frameclock_channel']
+        )
+        # downward compatibility - frameclock moved around, now is back on the AI7
+        if frameclock_channel in ['0', 0]:
+            self.frameclock_channel_name = f'DI{frameclock_channel}'
+        elif frameclock_channel in ['7', 7]:
+            self.frameclock_channel_name = f'AI{frameclock_channel}'
+        else:
+            self.frameclock_channel_name = frameclock_channel
+
+        self.sync_channel = sync_channel or self.session_params['devices']['neurophotometrics']['sync_channel']
+
+        # whether or not to reextract from tdms or attempt to load from .pkl
         self.load_timestamps = load_timestamps
 
     @property
@@ -393,14 +413,8 @@ def load_data(self) -> pd.DataFrame:
             tdms_filepath = self.session_path / self.photometry_collection / '_mcc_DAQdata.raw.tdms'
             self.timestamps = extract_timestamps_from_tdms_file(tdms_filepath, save_path=timestamps_filepath)
 
-        # downward compatibility - frameclock moved around, now is back on the AI7
-        if self.sync_kwargs['frameclock_channel'] in ['0', 0]:
-            sync_channel_name = f'DI{self.sync_kwargs["frameclock_channel"]}'
-        elif self.sync_kwargs['frameclock_channel'] in ['7', 7]:
-            sync_channel_name = f'AI{self.sync_kwargs["frameclock_channel"]}'
-        else:
-            sync_channel_name = self.sync_kwargs['frameclock_channel']
-        frame_timestamps = self.timestamps[sync_channel_name]['positive']
+        # timestamps of the frameclock in DAQ time
+        frame_timestamps = self.timestamps[self.frameclock_channel_name]['positive']
 
         # compare number of frame timestamps
         # and put them in the photometry_df SystemTimestamp column
@@ -577,8 +591,8 @@ def obj_fun(x, go_cue_times_mic, fixtures_df):
 
         # load the photometry data and replace the timestamp column
         # with the values from the frameclock timestamps as recorded by the DAQ
-        frameclock_channel = self.session_params['devices']['neurophotometrics']['sync_metadata']['frameclock_channel']
-        frame_timestamps = self.timestamps[frameclock_channel]['positive']
+        frameclock_channel_name = self.session_params['devices']['neurophotometrics']['sync_metadata']['frameclock_channel']
+        frame_timestamps = self.timestamps[frameclock_channel_name]['positive']
 
         # compare number of frame timestamps
         # and put them in the photometry_df SystemTimestamp column

From 4a4dd3a94d1f473293a08b18fd7471b982a4fa01 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 6 Oct 2025 10:24:22 +0100
Subject: [PATCH 76/80] flake8

---
 ibllib/pipes/dynamic_pipeline.py  | 2 +-
 ibllib/pipes/neurophotometrics.py | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
index b6c4338e7..69b97ffbf 100644
--- a/ibllib/pipes/dynamic_pipeline.py
+++ b/ibllib/pipes/dynamic_pipeline.py
@@ -613,7 +613,7 @@ def make_pipeline(session_path, **pkwargs):
             case 'daqami':
                 # for synchronization with the DAQami receiving the sync pulses from the individual bpods
                 # as well as the frame clock from the FP3002
-                if 'passive' not in protocol: # excluding passive session
+                if 'passive' not in protocol:  # excluding passive session
                     tasks['FibrePhotometryDAQSync'] = type('FibrePhotometryDAQSync', (ptasks.FibrePhotometryDAQSync,), {})(
                         **kwargs,
                     )
diff --git a/ibllib/pipes/neurophotometrics.py b/ibllib/pipes/neurophotometrics.py
index ca4c76d3a..69156ce07 100644
--- a/ibllib/pipes/neurophotometrics.py
+++ b/ibllib/pipes/neurophotometrics.py
@@ -109,7 +109,7 @@ def extract_timestamps_from_tdms_file(
     if chunk_size is not None:
         n_chunks = df.shape[0] // chunk_size
         for i in range(n_chunks):
-            vals_ = vals[i * chunk_size : (i + 1) * chunk_size]
+            vals_ = vals[i * chunk_size: (i + 1) * chunk_size]
             # data = np.array([list(f'{v:04b}'[::-1]) for v in vals_], dtype='int8')
             data = _int2digital_channels(vals_)
 
@@ -256,7 +256,8 @@ def _get_sync_function(self) -> Tuple[callable, list]:
         else:
             if not (timestamps_bpod.shape[0] * 0.95 < ix_bpod.shape[0]):
                 _logger.warning(
-                    f'less than 95% of bpod timestamps matched. n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}'
+                    f'less than 95% of bpod timestamps matched. \
+                        n_timestamps:{timestamps_bpod.shape[0]} matched:{ix_bpod.shape[0]}'
                 )
 
         valid_bounds = self._get_valid_bounds()
@@ -359,7 +360,7 @@ def __init__(
         self,
         *args,
         load_timestamps: bool = True,
-        sync_channel: int | None = None,
+        # sync_channel: int | None = None,
         frameclock_channel: int | None = None,
         **kwargs,
     ):
@@ -376,7 +377,7 @@ def __init__(
         else:
             self.frameclock_channel_name = frameclock_channel
 
-        self.sync_channel = sync_channel or self.session_params['devices']['neurophotometrics']['sync_channel']
+        self.sync_channel = self.sync_channel or self.session_params['devices']['neurophotometrics']['sync_channel']
 
         # whether or not to reextract from tdms or attempt to load from .pkl
         self.load_timestamps = load_timestamps

From d09581e7b2e2f5b19b33c0e942b0412b8f6b7c60 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 6 Oct 2025 13:12:29 +0100
Subject: [PATCH 77/80] PhotometrySessionLoader added

---
 brainbox/io/one.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/brainbox/io/one.py b/brainbox/io/one.py
index c1c86726e..b2743b30b 100644
--- a/brainbox/io/one.py
+++ b/brainbox/io/one.py
@@ -29,6 +29,8 @@
 from ibllib.pipes.ephys_alignment import EphysAlignment
 from ibllib.plots import vertical_lines, Density
 
+from iblphotometry import fpio
+
 import brainbox.plot
 from brainbox.io.spikeglx import Streamer
 from brainbox.ephys_plots import plot_brain_regions
@@ -1539,3 +1541,57 @@ def load_spike_sorting(self, pnames=None):
     @property
     def probes(self):
         return {k: self.ephys[k]['ssl'].pid for k in self.ephys}
+
+
+class PhotometrySessionLoader(SessionLoader):
+    photometry: dict = field(default_factory=dict, repr=False)
+
+    def __init__(self, *args, photometry_collection: str = 'photometry', **kwargs):
+        self.photometry_collection = photometry_collection
+        self.revision = kwargs.get('revision', None)
+        
+        # determine if loading by eid or session path
+        self.load_by_path = True if 'session_path' in kwargs else False
+        
+        super().__init__(*args, **kwargs)
+
+    def load_session_data(self, **kwargs):
+        super().load_session_data(**kwargs)
+        self.load_photometry()
+
+    def load_photometry(
+        self,
+        restrict_to_session: bool = True,
+        pre: int = 5,
+        post: int = 5,
+    ):
+        # session path precedence over eid
+        if self.load_by_path:
+            raw_dfs = fpio.from_session_path(
+                self.session_path,
+                collection=self.photometry_collection,
+                revision=self.revision,
+            )
+        else: # load by eid
+            raw_dfs = fpio.from_eid(
+                self.eid,
+                self.one,
+                collection=self.photometry_collection,
+                revision=self.revision,
+            )
+
+        if restrict_to_session:
+            if isinstance(self.trials, pd.DataFrame) and (self.trials.shape[0] == 0):
+                self.load_trials()
+            t_start = self.trials.iloc[0]['intervals_0']
+            t_stop = self.trials.iloc[-1]['intervals_1']
+
+            for band in raw_dfs.keys():
+                df = raw_dfs[band]
+                ix = np.logical_and(
+                    df.index.values > t_start - pre,
+                    df.index.values < t_stop + post,
+                )
+                raw_dfs[band] = df.loc[ix]
+
+        self.photometry = raw_dfs

From 4ac6a9a93170eaba0e3002fce7f45cf0dcb55c0d Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Mon, 6 Oct 2025 13:13:32 +0100
Subject: [PATCH 78/80] (temporarily) removed extractor tests

---
 ibllib/tests/test_neurophotometrics.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py
index 923946509..fd57ba209 100644
--- a/ibllib/tests/test_neurophotometrics.py
+++ b/ibllib/tests/test_neurophotometrics.py
@@ -28,11 +28,11 @@ def setUp(self) -> None:
         # self.raw_photometry_folder = self.session_folder / 'raw_photometry_data'
         # self.raw_photometry_folder.mkdir(parents=True)
 
-    def test_bpod_extractor(self):
-        session_folder = Path(iblphotometry_tests.__file__).parent / 'data' / 'neurophotometrics' / 'raw_bpod_session'
-        assert session_folder.exists()
-        self.experiment_description = session_params.read_params(session_folder)
-        FibrePhotometryBpodSync()
+    # def test_bpod_extractor(self):
+    #     session_folder = Path(iblphotometry_tests.__file__).parent / 'data' / 'neurophotometrics' / 'raw_bpod_session'
+    #     assert session_folder.exists()
+    #     self.experiment_description = session_params.read_params(session_folder)
+    #     FibrePhotometryBpodSync()
 
     # def test_daqami_extractor(self):
         # path = Path(__file__).parent / 'fixtures' / 'neurophotometrics' / '_ibl_experiment_description_bpod.yaml'

From 19a8c8bd4f297bd5b58fcb14339d84a6660ff624 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Thu, 9 Oct 2025 16:54:25 +0100
Subject: [PATCH 79/80] fixed: uneven number of samples when
 PhotometrySessionLoader restricts to session time

---
 brainbox/io/one.py | 444 +++++++++++++++++++++++++--------------------
 1 file changed, 244 insertions(+), 200 deletions(-)

diff --git a/brainbox/io/one.py b/brainbox/io/one.py
index b2743b30b..76d01bff3 100644
--- a/brainbox/io/one.py
+++ b/brainbox/io/one.py
@@ -1,4 +1,5 @@
 """Functions for loading IBL ephys and trial data using the Open Neurophysiology Environment."""
+
 from dataclasses import dataclass, field
 import gc
 import logging
@@ -62,8 +63,7 @@ def load_lfp(eid, one=None, dataset_types=None, **kwargs):
     [one.load_dataset(eid, dset, download_only=True) for dset in dtypes]
     session_path = one.eid2path(eid)
 
-    efiles = [ef for ef in spikeglx.glob_ephys_files(session_path, bin_exists=False)
-              if ef.get('lf', None)]
+    efiles = [ef for ef in spikeglx.glob_ephys_files(session_path, bin_exists=False) if ef.get('lf', None)]
     return [spikeglx.Reader(ef['lf'], **kwargs) for ef in efiles]
 
 
@@ -84,19 +84,21 @@ def _get_spike_sorting_collection(collections, pname):
     collection = next(filter(lambda c: c == f'alf/{pname}/pykilosort', collections), None)
     # otherwise, prefers the shortest
     collection = collection or next(iter(sorted(filter(lambda c: f'alf/{pname}' in c, collections), key=len)), None)
-    _logger.debug(f"selecting: {collection} to load amongst candidates: {collections}")
+    _logger.debug(f'selecting: {collection} to load amongst candidates: {collections}')
     return collection
 
 
 def _channels_alyx2bunch(chans):
-    channels = Bunch({
-        'atlas_id': np.array([ch['brain_region'] for ch in chans]),
-        'x': np.array([ch['x'] for ch in chans]) / 1e6,
-        'y': np.array([ch['y'] for ch in chans]) / 1e6,
-        'z': np.array([ch['z'] for ch in chans]) / 1e6,
-        'axial_um': np.array([ch['axial'] for ch in chans]),
-        'lateral_um': np.array([ch['lateral'] for ch in chans])
-    })
+    channels = Bunch(
+        {
+            'atlas_id': np.array([ch['brain_region'] for ch in chans]),
+            'x': np.array([ch['x'] for ch in chans]) / 1e6,
+            'y': np.array([ch['y'] for ch in chans]) / 1e6,
+            'z': np.array([ch['z'] for ch in chans]) / 1e6,
+            'axial_um': np.array([ch['axial'] for ch in chans]),
+            'lateral_um': np.array([ch['lateral'] for ch in chans]),
+        }
+    )
     return channels
 
 
@@ -107,7 +109,7 @@ def _channels_traj2bunch(xyz_chans, brain_atlas):
         'y': xyz_chans[:, 1],
         'z': xyz_chans[:, 2],
         'acronym': brain_regions['acronym'],
-        'atlas_id': brain_regions['id']
+        'atlas_id': brain_regions['id'],
     }
 
     return channels
@@ -117,7 +119,8 @@ def _channels_bunch2alf(channels):
     channels_ = {
         'mlapdv': np.c_[channels['x'], channels['y'], channels['z']] * 1e6,
         'brainLocationIds_ccf_2017': channels['atlas_id'],
-        'localCoordinates': np.c_[channels['lateral_um'], channels['axial_um']]}
+        'localCoordinates': np.c_[channels['lateral_um'], channels['axial_um']],
+    }
     return channels_
 
 
@@ -141,8 +144,9 @@ def _channels_alf2bunch(channels, brain_regions=None):
     return channels_
 
 
-def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_channels=True, dataset_types=None,
-                        brain_regions=None):
+def _load_spike_sorting(
+    eid, one=None, collection=None, revision=None, return_channels=True, dataset_types=None, brain_regions=None
+):
     """
     Generic function to load spike sorting according data using ONE.
 
@@ -186,7 +190,7 @@ def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_ch
     # enumerate probes and load according to the name
     collections = one.list_collections(eid, filename='spikes*', collection=collection, revision=revision)
     if len(collections) == 0:
-        _logger.warning(f"eid {eid}: no collection found with collection filter: {collection}, revision: {revision}")
+        _logger.warning(f'eid {eid}: no collection found with collection filter: {collection}, revision: {revision}')
     pnames = list(set(c.split('/')[1] for c in collections))
     spikes, clusters, channels = ({} for _ in range(3))
 
@@ -194,13 +198,14 @@ def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_ch
 
     for pname in pnames:
         probe_collection = _get_spike_sorting_collection(collections, pname)
-        spikes[pname] = one.load_object(eid, collection=probe_collection, obj='spikes',
-                                        attribute=spike_attributes, namespace='')
-        clusters[pname] = one.load_object(eid, collection=probe_collection, obj='clusters',
-                                          attribute=cluster_attributes, namespace='')
+        spikes[pname] = one.load_object(eid, collection=probe_collection, obj='spikes', attribute=spike_attributes, namespace='')
+        clusters[pname] = one.load_object(
+            eid, collection=probe_collection, obj='clusters', attribute=cluster_attributes, namespace=''
+        )
     if return_channels:
         channels = _load_channels_locations_from_disk(
-            eid, collection=collection, one=one, revision=revision, brain_regions=brain_regions)
+            eid, collection=collection, one=one, revision=revision, brain_regions=brain_regions
+        )
         return spikes, clusters, channels
     else:
         return spikes, clusters
@@ -222,7 +227,7 @@ def _load_channels_locations_from_disk(eid, collection=None, one=None, revision=
     channels = Bunch({})
     collections = one.list_collections(eid, filename='channels*', collection=collection, revision=revision)
     if len(collections) == 0:
-        _logger.warning(f"eid {eid}: no collection found with collection filter: {collection}, revision: {revision}")
+        _logger.warning(f'eid {eid}: no collection found with collection filter: {collection}, revision: {revision}')
     probes = list(set([c.split('/')[1] for c in collections]))
     for probe in probes:
         probe_collection = _get_spike_sorting_collection(collections, probe)
@@ -230,11 +235,12 @@ def _load_channels_locations_from_disk(eid, collection=None, one=None, revision=
         # if the spike sorter has not aligned data, try and get the alignment available
         if 'brainLocationIds_ccf_2017' not in channels[probe].keys():
             aligned_channel_collections = one.list_collections(
-                eid, filename='channels.brainLocationIds_ccf_2017*', collection=probe_collection, revision=revision)
+                eid, filename='channels.brainLocationIds_ccf_2017*', collection=probe_collection, revision=revision
+            )
             if len(aligned_channel_collections) == 0:
-                _logger.debug(f"no resolved alignment dataset found for {eid}/{probe}")
+                _logger.debug(f'no resolved alignment dataset found for {eid}/{probe}')
                 continue
-            _logger.debug(f"looking for a resolved alignment dataset in {aligned_channel_collections}")
+            _logger.debug(f'looking for a resolved alignment dataset in {aligned_channel_collections}')
             ac_collection = _get_spike_sorting_collection(aligned_channel_collections, probe)
             channels_aligned = one.load_object(eid, 'channels', collection=ac_collection)
             channels[probe] = channel_locations_interpolation(channels_aligned, channels[probe])
@@ -276,8 +282,7 @@ def channel_locations_interpolation(channels_aligned, channels=None, brain_regio
     depths, ind, iinv = np.unique(channels['localCoordinates'][:, 1], return_index=True, return_inverse=True)
     channels['mlapdv'] = np.zeros((nch, 3))
     for i in np.arange(3):
-        channels['mlapdv'][:, i] = np.interp(
-            depths, depth_aligned, channels_aligned['mlapdv'][ind_aligned, i])[iinv]
+        channels['mlapdv'][:, i] = np.interp(depths, depth_aligned, channels_aligned['mlapdv'][ind_aligned, i])[iinv]
     # the brain locations have to be interpolated by nearest neighbour
     fcn_interp = interp1d(depth_aligned, channels_aligned['brainLocationIds_ccf_2017'][ind_aligned], kind='nearest')
     channels['brainLocationIds_ccf_2017'] = fcn_interp(depths)[iinv].astype(np.int32)
@@ -287,68 +292,62 @@ def channel_locations_interpolation(channels_aligned, channels=None, brain_regio
         return channels
 
 
-def _load_channel_locations_traj(eid, probe=None, one=None, revision=None, aligned=False,
-                                 brain_atlas=None, return_source=False):
+def _load_channel_locations_traj(eid, probe=None, one=None, revision=None, aligned=False, brain_atlas=None, return_source=False):
     if not hasattr(one, 'alyx'):
         return {}, None
-    _logger.debug(f"trying to load from traj {probe}")
+    _logger.debug(f'trying to load from traj {probe}')
     channels = Bunch()
     brain_atlas = brain_atlas or AllenAtlas
     # need to find the collection bruh
     insertion = one.alyx.rest('insertions', 'list', session=eid, name=probe)[0]
     collection = _collection_filter_from_args(probe=probe)
-    collections = one.list_collections(eid, filename='channels*', collection=collection,
-                                       revision=revision)
+    collections = one.list_collections(eid, filename='channels*', collection=collection, revision=revision)
     probe_collection = _get_spike_sorting_collection(collections, probe)
     chn_coords = one.load_dataset(eid, 'channels.localCoordinates', collection=probe_collection)
     depths = chn_coords[:, 1]
 
-    tracing = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}). \
-        get('tracing_exists', False)
-    resolved = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}). \
-        get('alignment_resolved', False)
-    counts = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}). \
-        get('alignment_count', 0)
+    tracing = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).get('tracing_exists', False)
+    resolved = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).get('alignment_resolved', False)
+    counts = insertion.get('json', {'temp': 0}).get('extended_qc', {'temp': 0}).get('alignment_count', 0)
 
     if tracing:
         xyz = np.array(insertion['json']['xyz_picks']) / 1e6
         if resolved:
-
-            _logger.debug(f'Channel locations for {eid}/{probe} have been resolved. '
-                          f'Channel and cluster locations obtained from ephys aligned histology '
-                          f'track.')
-            traj = one.alyx.rest('trajectories', 'list', session=eid, probe=probe,
-                                 provenance='Ephys aligned histology track')[0]
+            _logger.debug(
+                f'Channel locations for {eid}/{probe} have been resolved. '
+                f'Channel and cluster locations obtained from ephys aligned histology '
+                f'track.'
+            )
+            traj = one.alyx.rest('trajectories', 'list', session=eid, probe=probe, provenance='Ephys aligned histology track')[0]
             align_key = insertion['json']['extended_qc']['alignment_stored']
             feature = traj['json'][align_key][0]
             track = traj['json'][align_key][1]
-            ephysalign = EphysAlignment(xyz, depths, track_prev=track,
-                                        feature_prev=feature,
-                                        brain_atlas=brain_atlas, speedy=True)
+            ephysalign = EphysAlignment(xyz, depths, track_prev=track, feature_prev=feature, brain_atlas=brain_atlas, speedy=True)
             chans = ephysalign.get_channel_locations(feature, track)
             channels[probe] = _channels_traj2bunch(chans, brain_atlas)
             source = 'resolved'
         elif counts > 0 and aligned:
-            _logger.debug(f'Channel locations for {eid}/{probe} have not been '
-                          f'resolved. However, alignment flag set to True so channel and cluster'
-                          f' locations will be obtained from latest available ephys aligned '
-                          f'histology track.')
+            _logger.debug(
+                f'Channel locations for {eid}/{probe} have not been '
+                f'resolved. However, alignment flag set to True so channel and cluster'
+                f' locations will be obtained from latest available ephys aligned '
+                f'histology track.'
+            )
             # get the latest user aligned channels
-            traj = one.alyx.rest('trajectories', 'list', session=eid, probe=probe,
-                                 provenance='Ephys aligned histology track')[0]
+            traj = one.alyx.rest('trajectories', 'list', session=eid, probe=probe, provenance='Ephys aligned histology track')[0]
             align_key = insertion['json']['extended_qc']['alignment_stored']
             feature = traj['json'][align_key][0]
             track = traj['json'][align_key][1]
-            ephysalign = EphysAlignment(xyz, depths, track_prev=track,
-                                        feature_prev=feature,
-                                        brain_atlas=brain_atlas, speedy=True)
+            ephysalign = EphysAlignment(xyz, depths, track_prev=track, feature_prev=feature, brain_atlas=brain_atlas, speedy=True)
             chans = ephysalign.get_channel_locations(feature, track)
 
             channels[probe] = _channels_traj2bunch(chans, brain_atlas)
             source = 'aligned'
         else:
-            _logger.debug(f'Channel locations for {eid}/{probe} have not been resolved. '
-                          f'Channel and cluster locations obtained from histology track.')
+            _logger.debug(
+                f'Channel locations for {eid}/{probe} have not been resolved. '
+                f'Channel and cluster locations obtained from histology track.'
+            )
             # get the channels from histology tracing
             xyz = xyz[np.argsort(xyz[:, 2]), :]
             chans = histology.interpolate_along_track(xyz, (depths + TIP_SIZE_UM) / 1e6)
@@ -400,12 +399,12 @@ def load_channel_locations(eid, probe=None, one=None, aligned=False, brain_atlas
     else:
         eid = one.to_eid(eid)
     collection = _collection_filter_from_args(probe=probe)
-    channels = _load_channels_locations_from_disk(eid, one=one, collection=collection,
-                                                  brain_regions=brain_atlas.regions)
+    channels = _load_channels_locations_from_disk(eid, one=one, collection=collection, brain_regions=brain_atlas.regions)
     incomplete_probes = [k for k in channels if 'x' not in channels[k]]
     for iprobe in incomplete_probes:
-        channels_, source = _load_channel_locations_traj(eid, probe=iprobe, one=one, aligned=aligned,
-                                                         brain_atlas=brain_atlas, return_source=True)
+        channels_, source = _load_channel_locations_traj(
+            eid, probe=iprobe, one=one, aligned=aligned, brain_atlas=brain_atlas, return_source=True
+        )
         if channels_ is not None:
             channels[iprobe] = channels_[iprobe]
     return channels
@@ -451,7 +450,8 @@ def merge_clusters_channels(dic_clus, channels, keys_to_add_extra=None):
                 else:
                     _logger.warning(
                         f'Probe {label}: merging channels and clusters for key "{key}" has {nch_key} on channels'
-                        f' but expected {max(clu_ch)}. Data in new cluster key "{key}" is returned empty.')
+                        f' but expected {max(clu_ch)}. Data in new cluster key "{key}" is returned empty.'
+                    )
                     dic_clus[label][key] = []
             except AssertionError:
                 _logger.warning(f'Either clusters or channels does not have key {key}, could not merge')
@@ -481,10 +481,9 @@ def load_passive_rfmap(eid, one=None):
 
     # Load in the receptive field mapping data
     rf_map = one.load_object(eid, obj='passiveRFM', collection='alf')
-    frames = np.fromfile(one.load_dataset(eid, '_iblrig_RFMapStim.raw.bin',
-                                          collection='raw_passive_data'), dtype="uint8")
+    frames = np.fromfile(one.load_dataset(eid, '_iblrig_RFMapStim.raw.bin', collection='raw_passive_data'), dtype='uint8')
     y_pix, x_pix = 15, 15
-    frames = np.transpose(np.reshape(frames, [y_pix, x_pix, -1], order="F"), [2, 1, 0])
+    frames = np.transpose(np.reshape(frames, [y_pix, x_pix, -1], order='F'), [2, 1, 0])
     rf_map['frames'] = frames
 
     return rf_map
@@ -555,13 +554,13 @@ def load_iti(trials):
 
 
 def load_channels_from_insertion(ins, depths=None, one=None, ba=None):
-
     PROV_2_VAL = {
         'Resolved': 90,
         'Ephys aligned histology track': 70,
         'Histology track': 50,
         'Micro-manipulator': 30,
-        'Planned': 10}
+        'Planned': 10,
+    }
 
     one = one or ONE()
     ba = ba or atlas.AllenAtlas()
@@ -575,21 +574,17 @@ def load_channels_from_insertion(ins, depths=None, one=None, ba=None):
         ins = atlas.Insertion.from_dict(traj)
         # Deepest coordinate first
         xyz = np.c_[ins.tip, ins.entry].T
-        xyz_channels = histology.interpolate_along_track(xyz, (depths +
-                                                               TIP_SIZE_UM) / 1e6)
+        xyz_channels = histology.interpolate_along_track(xyz, (depths + TIP_SIZE_UM) / 1e6)
     else:
         xyz = np.array(ins['json']['xyz_picks']) / 1e6
         if traj['provenance'] == 'Histology track':
             xyz = xyz[np.argsort(xyz[:, 2]), :]
-            xyz_channels = histology.interpolate_along_track(xyz, (depths +
-                                                                   TIP_SIZE_UM) / 1e6)
+            xyz_channels = histology.interpolate_along_track(xyz, (depths + TIP_SIZE_UM) / 1e6)
         else:
             align_key = ins['json']['extended_qc']['alignment_stored']
             feature = traj['json'][align_key][0]
             track = traj['json'][align_key][1]
-            ephysalign = EphysAlignment(xyz, depths, track_prev=track,
-                                        feature_prev=feature,
-                                        brain_atlas=ba, speedy=True)
+            ephysalign = EphysAlignment(xyz, depths, track_prev=track, feature_prev=feature, brain_atlas=ba, speedy=True)
             xyz_channels = ephysalign.get_channel_locations(feature, track)
     return xyz_channels
 
@@ -607,6 +602,7 @@ class SpikeSortingLoader:
             SpikeSortingLoader(session_path=session_path, pname='probe00')
     NB: When no ONE instance is passed, any datasets that are loaded will not be recorded.
     """
+
     one: One = None
     atlas: None = None
     pid: str = None
@@ -615,7 +611,7 @@ class SpikeSortingLoader:
     session_path: ALFPath = ''
     # the following properties are the outcome of the post init function
     collections: list = None
-    datasets: list = None   # list of all datasets belonging to the session
+    datasets: list = None  # list of all datasets belonging to the session
     # the following properties are the outcome of a reading function
     files: dict = None
     raw_data_files: list = None  # list of raw ap and lf files corresponding to the recording
@@ -633,8 +629,10 @@ def __post_init__(self):
                 self.eid, self.pname = self.one.pid2eid(self.pid)
             except NotImplementedError:
                 if self.eid == '' or self.pname == '':
-                    raise IOError("Cannot infer session id and probe name from pid. "
-                                  "You need to pass eid and pname explicitly when instantiating SpikeSortingLoader.")
+                    raise IOError(
+                        'Cannot infer session id and probe name from pid. '
+                        'You need to pass eid and pname explicitly when instantiating SpikeSortingLoader.'
+                    )
             self.session_path = self.one.eid2path(self.eid)
         # then eid / pname combination
         elif self.session_path is None or self.session_path == '':
@@ -651,8 +649,7 @@ def __post_init__(self):
                 self.one._cache['datasets'] = cache._make_datasets_df(self.session_path, hash_files=False)
                 self.eid = str(self.session_path.relative_to(self.session_path.parents[2]))
         # populates default properties
-        self.collections = self.one.list_collections(
-            self.eid, filename='spikes*', collection=f"alf/{self.pname}*")
+        self.collections = self.one.list_collections(self.eid, filename='spikes*', collection=f'alf/{self.pname}*')
         self.datasets = self.one.list_datasets(self.eid)
         if self.atlas is None:
             self.atlas = AllenAtlas()
@@ -693,7 +690,7 @@ def _get_spike_sorting_collection(self, spike_sorter=None):
         for sorter in list([spike_sorter, 'iblsorter', 'pykilosort']):
             if sorter is None:
                 continue
-            if sorter == "":
+            if sorter == '':
                 collection = next(filter(lambda c: c == f'alf/{self.pname}', self.collections), None)
             else:
                 collection = next(filter(lambda c: c == f'alf/{self.pname}/{sorter}', self.collections), None)
@@ -701,7 +698,7 @@ def _get_spike_sorting_collection(self, spike_sorter=None):
                 return collection
         # if none is found amongst the defaults, prefers the shortest
         collection = collection or next(iter(sorted(filter(lambda c: f'alf/{self.pname}' in c, self.collections), key=len)), None)
-        _logger.debug(f"selecting: {collection} to load amongst candidates: {self.collections}")
+        _logger.debug(f'selecting: {collection} to load amongst candidates: {self.collections}')
         return collection
 
     def load_spike_sorting_object(self, obj, *args, revision=None, **kwargs):
@@ -726,8 +723,17 @@ def get_version(self, spike_sorter=None):
         dset = self.one.alyx.rest('datasets', 'list', session=self.eid, collection=collection, name='spikes.times.npy')
         return dset[0]['version'] if len(dset) else 'unknown'
 
-    def download_spike_sorting_object(self, obj, spike_sorter=None, dataset_types=None, collection=None,
-                                      attribute=None, missing='raise', revision=None, **kwargs):
+    def download_spike_sorting_object(
+        self,
+        obj,
+        spike_sorter=None,
+        dataset_types=None,
+        collection=None,
+        attribute=None,
+        missing='raise',
+        revision=None,
+        **kwargs,
+    ):
         """
         Downloads an ALF object
         :param obj: object name, str between 'spikes', 'clusters' or 'channels'
@@ -747,12 +753,18 @@ def download_spike_sorting_object(self, obj, spike_sorter=None, dataset_types=No
             return {}, {}, {}
         self.collection = self._get_spike_sorting_collection(spike_sorter=spike_sorter)
         collection = collection or self.collection
-        _logger.debug(f"loading spike sorting object {obj} from {collection}")
+        _logger.debug(f'loading spike sorting object {obj} from {collection}')
         attributes = self._get_attributes(dataset_types)
         try:
             self.files[obj] = self.one.load_object(
-                self.eid, obj=obj, attribute=attributes.get(obj, None),
-                collection=collection, download_only=True, revision=revision, **kwargs)
+                self.eid,
+                obj=obj,
+                attribute=attributes.get(obj, None),
+                collection=collection,
+                download_only=True,
+                revision=revision,
+                **kwargs,
+            )
         except ALFObjectNotFound as e:
             if missing == 'raise':
                 raise e
@@ -780,13 +792,15 @@ def download_raw_electrophysiology(self, band='ap'):
         for suffix in [f'*.{band}.ch', f'*.{band}.meta', f'*.{band}.cbin']:
             try:
                 # FIXME: this will fail if multiple LFP segments are found
-                raw_data_files.append(self.one.load_dataset(
-                    self.eid,
-                    download_only=True,
-                    collection=f'raw_ephys_data/{self.pname}',
-                    dataset=suffix,
-                    check_hash=False,
-                ))
+                raw_data_files.append(
+                    self.one.load_dataset(
+                        self.eid,
+                        download_only=True,
+                        collection=f'raw_ephys_data/{self.pname}',
+                        dataset=suffix,
+                        check_hash=False,
+                    )
+                )
             except ALFObjectNotFound:
                 _logger.debug(f"{self.session_path} can't locate raw data collection raw_ephys_data/{self.pname}, file {suffix}")
         self.raw_data_files = list(set(self.raw_data_files + raw_data_files))
@@ -806,7 +820,7 @@ def raw_electrophysiology(self, stream=True, band='ap', **kwargs):
             return Streamer(pid=self.pid, one=self.one, typ=band, **kwargs)
         else:
             raw_data_files = self.download_raw_electrophysiology(band=band)
-            cbin_file = next(filter(lambda f: re.match(rf".*\.{band}\..*cbin", f.name), raw_data_files), None)
+            cbin_file = next(filter(lambda f: re.match(rf'.*\.{band}\..*cbin', f.name), raw_data_files), None)
             if cbin_file is not None:
                 return spikeglx.Reader(cbin_file)
 
@@ -814,10 +828,14 @@ def download_raw_waveforms(self, **kwargs):
         """
         Downloads raw waveforms extracted from sorting to local disk.
         """
-        _logger.debug(f"loading waveforms from {self.collection}")
+        _logger.debug(f'loading waveforms from {self.collection}')
         return self.one.load_object(
-            id=self.eid, obj="waveforms", attribute=["traces", "templates", "table", "channels"],
-            collection=self._get_spike_sorting_collection("pykilosort"), download_only=True, **kwargs
+            id=self.eid,
+            obj='waveforms',
+            attribute=['traces', 'templates', 'table', 'channels'],
+            collection=self._get_spike_sorting_collection('pykilosort'),
+            download_only=True,
+            **kwargs,
         )
 
     def raw_waveforms(self, **kwargs):
@@ -848,9 +866,10 @@ def load_channels(self, **kwargs):
                 channels = self._load_object(self.files['electrodeSites'], wildcards=self.one.wildcards)
                 channels['rawInd'] = np.arange(channels[list(channels.keys())[0]].shape[0])
         if 'brainLocationIds_ccf_2017' not in channels:
-            _logger.debug(f"loading channels from alyx for {self.files['channels']}")
+            _logger.debug(f'loading channels from alyx for {self.files["channels"]}')
             _channels, self.histology = _load_channel_locations_traj(
-                self.eid, probe=self.pname, one=self.one, brain_atlas=self.atlas, return_source=True, aligned=True)
+                self.eid, probe=self.pname, one=self.one, brain_atlas=self.atlas, return_source=True, aligned=True
+            )
             if _channels:
                 channels = _channels[self.pname]
         else:
@@ -860,18 +879,19 @@ def load_channels(self, **kwargs):
 
     @staticmethod
     def filter_files_by_namespace(all_files, namespace):
-
         # Create dict for each file with available namespaces, no namespce is stored under the key None
         namespace_files = defaultdict(dict)
         available_namespaces = []
         for file in all_files:
             nspace = file.namespace or None
             available_namespaces.append(nspace)
-            namespace_files[f"{file.object}.{file.attribute}"][nspace] = file
+            namespace_files[f'{file.object}.{file.attribute}'][nspace] = file
 
         if namespace not in set(available_namespaces):
-            _logger.info(f'Could not find manual curation results for {namespace}, returning default'
-                         f' non manually curated spikesorting data')
+            _logger.info(
+                f'Could not find manual curation results for {namespace}, returning default'
+                f' non manually curated spikesorting data'
+            )
 
         # Return the files with the chosen namespace.
         files = [f.get(namespace, f.get(None, None)) for f in namespace_files.values()]
@@ -879,8 +899,9 @@ def filter_files_by_namespace(all_files, namespace):
         files = [f for f in files if f]
         return files
 
-    def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_version=False, good_units=False,
-                           namespace=None, **kwargs):
+    def load_spike_sorting(
+        self, spike_sorter='iblsorter', revision=None, enforce_version=False, good_units=False, namespace=None, **kwargs
+    ):
         """
         Loads spikes, clusters and channels
 
@@ -910,8 +931,10 @@ def load_spike_sorting(self, spike_sorter='iblsorter', revision=None, enforce_ve
         self.revision = revision
 
         if good_units and namespace is not None:
-            _logger.info('Good units table does not exist for manually curated spike sorting. Pass in namespace with'
-                         'good_units=False and filter the spikes post hoc by the good clusters.')
+            _logger.info(
+                'Good units table does not exist for manually curated spike sorting. Pass in namespace with'
+                'good_units=False and filter the spikes post hoc by the good clusters.'
+            )
             return [None] * 3
         objects = ['passingSpikes', 'clusters', 'channels'] if good_units else None
         self.download_spike_sorting(spike_sorter=spike_sorter, revision=revision, objects=objects, **kwargs)
@@ -936,17 +959,18 @@ def _assert_version_consistency(self):
         for k in ['spikes', 'clusters', 'channels', 'passingSpikes']:
             for fn in self.files.get(k, []):
                 if self.spike_sorter:
-                    assert fn.relative_to(self.session_path).parts[2] == self.spike_sorter, \
-                        f"You required strict version {self.spike_sorter}, {fn} does not match"
+                    assert fn.relative_to(self.session_path).parts[2] == self.spike_sorter, (
+                        f'You required strict version {self.spike_sorter}, {fn} does not match'
+                    )
                 if self.revision:
-                    assert fn.revision == self.revision, \
-                        f"You required strict revision {self.revision}, {fn} does not match"
+                    assert fn.revision == self.revision, f'You required strict revision {self.revision}, {fn} does not match'
 
     @staticmethod
     def compute_metrics(spikes, clusters=None):
         nc = clusters['channels'].size if clusters else np.unique(spikes['clusters']).size
-        metrics = pd.DataFrame(quick_unit_metrics(
-            spikes['clusters'], spikes['times'], spikes['amps'], spikes['depths'], cluster_ids=np.arange(nc)))
+        metrics = pd.DataFrame(
+            quick_unit_metrics(spikes['clusters'], spikes['times'], spikes['amps'], spikes['depths'], cluster_ids=np.arange(nc))
+        )
         return metrics
 
     @staticmethod
@@ -971,7 +995,7 @@ def merge_clusters(spikes, clusters, channels, cache_dir=None, compute_metrics=F
             if metrics.shape[0] != nc:
                 metrics = None
         if metrics is None or compute_metrics is True:
-            _logger.debug("recompute clusters metrics")
+            _logger.debug('recompute clusters metrics')
             metrics = SpikeSortingLoader.compute_metrics(spikes, clusters)
             if isinstance(cache_dir, Path):
                 metrics.to_parquet(Path(cache_dir).joinpath('clusters.metrics.pqt'))
@@ -994,12 +1018,15 @@ def _get_probe_info(self, revision=None):
         revision = revision if revision is not None else self.revision
         if self._sync is None:
             timestamps = self.one.load_dataset(
-                self.eid, dataset='_spikeglx_*.timestamps.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision)
+                self.eid, dataset='_spikeglx_*.timestamps.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision
+            )
             _ = self.one.load_dataset(  # this is not used here but we want to trigger the download for potential tasks
-                self.eid, dataset='_spikeglx_*.sync.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision)
+                self.eid, dataset='_spikeglx_*.sync.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision
+            )
             try:
-                ap_meta = spikeglx.read_meta_data(self.one.load_dataset(
-                    self.eid, dataset='_spikeglx_*.ap.meta', collection=f'raw_ephys_data/{self.pname}'))
+                ap_meta = spikeglx.read_meta_data(
+                    self.one.load_dataset(self.eid, dataset='_spikeglx_*.ap.meta', collection=f'raw_ephys_data/{self.pname}')
+                )
                 fs = spikeglx._get_fs_from_meta(ap_meta)
             except ALFObjectNotFound:
                 ap_meta = None
@@ -1032,15 +1059,17 @@ def samples2times(self, values, direction='forward'):
 
     @property
     def pid2ref(self):
-        return f"{self.one.eid2ref(self.eid, as_dict=False)}_{self.pname}"
+        return f'{self.one.eid2ref(self.eid, as_dict=False)}_{self.pname}'
 
     def _default_plot_title(self, spikes):
-        title = f"{self.pid2ref}, {self.pid} \n" \
-                f"{spikes['clusters'].size:_} spikes, {np.unique(spikes['clusters']).size:_} clusters"
+        title = (
+            f'{self.pid2ref}, {self.pid} \n{spikes["clusters"].size:_} spikes, {np.unique(spikes["clusters"]).size:_} clusters'
+        )
         return title
 
-    def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_series=None,
-               drift=None, title=None, **kwargs):
+    def raster(
+        self, spikes, channels, save_dir=None, br=None, label='raster', time_series=None, drift=None, title=None, **kwargs
+    ):
         """
         :param spikes: spikes dictionary or Bunch
         :param channels: channels dictionary or Bunch.
@@ -1054,13 +1083,14 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_
         """
         br = br or BrainRegions()
         time_series = time_series or {}
-        fig, axs = plt.subplots(2, 2, gridspec_kw={
-            'width_ratios': [.95, .05], 'height_ratios': [.1, .9]}, figsize=(16, 9), sharex='col')
+        fig, axs = plt.subplots(
+            2, 2, gridspec_kw={'width_ratios': [0.95, 0.05], 'height_ratios': [0.1, 0.9]}, figsize=(16, 9), sharex='col'
+        )
         axs[0, 1].set_axis_off()
         # axs[0, 0].set_xticks([])
         if kwargs is None:
             # set default raster plot parameters
-            kwargs = {"t_bin": 0.007, "d_bin": 10, "vmax": 0.5}
+            kwargs = {'t_bin': 0.007, 'd_bin': 10, 'vmax': 0.5}
         brainbox.plot.driftmap(spikes['times'], spikes['depths'], ax=axs[1, 0], **kwargs)
         if title is None:
             title = self._default_plot_title(spikes)
@@ -1068,8 +1098,14 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_
         for k, ts in time_series.items():
             vertical_lines(ts, ymin=0, ymax=3800, ax=axs[1, 0])
         if 'atlas_id' in channels:
-            plot_brain_regions(channels['atlas_id'], channel_depths=channels['axial_um'],
-                               brain_regions=br, display=True, ax=axs[1, 1], title=self.histology)
+            plot_brain_regions(
+                channels['atlas_id'],
+                channel_depths=channels['axial_um'],
+                brain_regions=br,
+                display=True,
+                ax=axs[1, 1],
+                title=self.histology,
+            )
         axs[1, 0].set_ylim(0, 3800)
         axs[1, 0].set_xlim(spikes['times'][0], spikes['times'][-1])
         fig.tight_layout()
@@ -1079,28 +1115,33 @@ def raster(self, spikes, channels, save_dir=None, br=None, label='raster', time_
             if 'drift' in self.files:
                 drift = self._load_object(self.files['drift'], wildcards=self.one.wildcards)
         if isinstance(drift, dict):
-            axs[0, 0].plot(drift['times'], drift['um'], 'k', alpha=.5)
+            axs[0, 0].plot(drift['times'], drift['um'], 'k', alpha=0.5)
             axs[0, 0].set(ylim=[-15, 15])
 
         if save_dir is not None:
-            png_file = save_dir.joinpath(f"{self.pid}_{self.pid2ref}_{label}.png") if Path(save_dir).is_dir() else Path(save_dir)
+            png_file = save_dir.joinpath(f'{self.pid}_{self.pid2ref}_{label}.png') if Path(save_dir).is_dir() else Path(save_dir)
             fig.savefig(png_file)
             plt.close(fig)
             gc.collect()
         else:
             return fig, axs
 
-    def plot_rawdata_snippet(self, sr, spikes, clusters, t0,
-                             channels=None,
-                             br: BrainRegions = None,
-                             save_dir=None,
-                             label='raster',
-                             gain=-93,
-                             title=None):
-
+    def plot_rawdata_snippet(
+        self,
+        sr,
+        spikes,
+        clusters,
+        t0,
+        channels=None,
+        br: BrainRegions = None,
+        save_dir=None,
+        label='raster',
+        gain=-93,
+        title=None,
+    ):
         # compute the raw data offset and destripe, we take 400ms around t0
         first_sample, last_sample = (int((t0 - 0.2) * sr.fs), int((t0 + 0.2) * sr.fs))
-        raw = sr[first_sample:last_sample, :-sr.nsync].T
+        raw = sr[first_sample:last_sample, : -sr.nsync].T
         channel_labels = channels['labels'] if (channels is not None) and ('labels' in channels) else True
         destriped = ibldsp.voltage.destripe(raw, sr.fs, channel_labels=channel_labels)
         # filter out the spikes according to good/bad clusters and to the time slice
@@ -1111,21 +1152,27 @@ def plot_rawdata_snippet(self, sr, spikes, clusters, t0,
         if title is None:
             title = self._default_plot_title(spikes)
         # display the raw data snippet with spikes overlaid
-        fig, axs = plt.subplots(1, 2, gridspec_kw={'width_ratios': [.95, .05]}, figsize=(16, 9), sharex='col')
+        fig, axs = plt.subplots(1, 2, gridspec_kw={'width_ratios': [0.95, 0.05]}, figsize=(16, 9), sharex='col')
         Density(destriped, fs=sr.fs, taxis=1, gain=gain, ax=axs[0], t0=t0 - 0.2, unit='s')
-        axs[0].scatter(ss[sok] / sr.fs, sc[sok], color="green", alpha=0.5)
-        axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color="red", alpha=0.5)
+        axs[0].scatter(ss[sok] / sr.fs, sc[sok], color='green', alpha=0.5)
+        axs[0].scatter(ss[~sok] / sr.fs, sc[~sok], color='red', alpha=0.5)
         axs[0].set(title=title, xlim=[t0 - 0.035, t0 + 0.035])
         # adds the channel locations if available
         if (channels is not None) and ('atlas_id' in channels):
             br = br or BrainRegions()
-            plot_brain_regions(channels['atlas_id'], channel_depths=channels['axial_um'],
-                               brain_regions=br, display=True, ax=axs[1], title=self.histology)
+            plot_brain_regions(
+                channels['atlas_id'],
+                channel_depths=channels['axial_um'],
+                brain_regions=br,
+                display=True,
+                ax=axs[1],
+                title=self.histology,
+            )
         axs[1].get_yaxis().set_visible(False)
         fig.tight_layout()
 
         if save_dir is not None:
-            png_file = save_dir.joinpath(f"{self.pid}_{self.pid2ref}_{label}.png") if Path(save_dir).is_dir() else Path(save_dir)
+            png_file = save_dir.joinpath(f'{self.pid}_{self.pid2ref}_{label}.png') if Path(save_dir).is_dir() else Path(save_dir)
             fig.savefig(png_file)
             plt.close(fig)
             gc.collect()
@@ -1200,6 +1247,7 @@ class SessionLoader:
         functions:
         >>> sess_loader.load_wheel(sampling_rate=100)
     """
+
     one: One = None
     session_path: ALFPath = ''
     eid: str = ''
@@ -1217,8 +1265,10 @@ def __post_init__(self):
         Checks for required inputs, sets session_path and eid, creates data_info table.
         """
         if self.one is None:
-            raise ValueError("An input to one is required. If not connection to a database is desired, it can be "
-                             "a fully local instance of One.")
+            raise ValueError(
+                'An input to one is required. If not connection to a database is desired, it can be '
+                'a fully local instance of One.'
+            )
         # If session path is given, takes precedence over eid
         if self.session_path is not None and self.session_path != '':
             self.eid = self.one.to_eid(self.session_path)
@@ -1228,15 +1278,9 @@ def __post_init__(self):
             if self.eid is not None and self.eid != '':
                 self.session_path = self.one.eid2path(self.eid)
             else:
-                raise ValueError("If no session path is given, eid is required.")
-
-        data_names = [
-            'trials',
-            'wheel',
-            'pose',
-            'motion_energy',
-            'pupil'
-        ]
+                raise ValueError('If no session path is given, eid is required.')
+
+        data_names = ['trials', 'wheel', 'pose', 'motion_energy', 'pupil']
         self.data_info = pd.DataFrame(columns=['name', 'is_loaded'], data=zip(data_names, [False] * len(data_names)))
 
     def load_session_data(self, trials=True, wheel=True, pose=True, motion_energy=True, pupil=True, reload=False):
@@ -1265,33 +1309,21 @@ def load_session_data(self, trials=True, wheel=True, pose=True, motion_energy=Tr
             Whether to reload data that has already been loaded into this SessionLoader object, default is False
         """
         load_df = self.data_info.copy()
-        load_df['to_load'] = [
-            trials,
-            wheel,
-            pose,
-            motion_energy,
-            pupil
-        ]
-        load_df['load_func'] = [
-            self.load_trials,
-            self.load_wheel,
-            self.load_pose,
-            self.load_motion_energy,
-            self.load_pupil
-        ]
+        load_df['to_load'] = [trials, wheel, pose, motion_energy, pupil]
+        load_df['load_func'] = [self.load_trials, self.load_wheel, self.load_pose, self.load_motion_energy, self.load_pupil]
 
         for idx, row in load_df.iterrows():
             if row['to_load'] is False:
-                _logger.debug(f"Not loading {row['name']} data, set to False.")
+                _logger.debug(f'Not loading {row["name"]} data, set to False.')
             elif row['is_loaded'] is True and reload is False:
-                _logger.debug(f"Not loading {row['name']} data, is already loaded and reload=False.")
+                _logger.debug(f'Not loading {row["name"]} data, is already loaded and reload=False.')
             else:
                 try:
-                    _logger.info(f"Loading {row['name']} data")
+                    _logger.info(f'Loading {row["name"]} data')
                     row['load_func']()
                     self.data_info.loc[idx, 'is_loaded'] = True
                 except BaseException as e:
-                    _logger.warning(f"Could not load {row['name']} data.")
+                    _logger.warning(f'Could not load {row["name"]} data.')
                     _logger.debug(e)
 
     def _find_behaviour_collection(self, obj):
@@ -1312,8 +1344,10 @@ def _find_behaviour_collection(self, obj):
             if len(set(collections)) == 1:
                 return collections[0]
             else:
-                _logger.error(f'Multiple collections found {collections}. Specify collection when loading, '
-                              f'e.g sl.load_{obj}(collection="{collections[0]}")')
+                _logger.error(
+                    f'Multiple collections found {collections}. Specify collection when loading, '
+                    f'e.g sl.load_{obj}(collection="{collections[0]}")'
+                )
                 raise ALFMultipleCollectionsFound
 
     def load_trials(self, collection=None):
@@ -1331,7 +1365,8 @@ def load_trials(self, collection=None):
         # itiDuration frequently has a mismatched dimension, and we don't need it, exclude using regex
         self.one.wildcards = False
         self.trials = self.one.load_object(
-            self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=self.revision or None).to_df()
+            self.eid, 'trials', collection=collection, attribute=r'(?!itiDuration).*', revision=self.revision or None
+        ).to_df()
         self.one.wildcards = True
         self.data_info.loc[self.data_info['name'] == 'trials', 'is_loaded'] = True
 
@@ -1360,9 +1395,11 @@ def load_wheel(self, fs=1000, corner_frequency=20, order=8, collection=None):
         # resample the wheel position and compute velocity, acceleration
         self.wheel = pd.DataFrame(columns=['times', 'position', 'velocity', 'acceleration'])
         self.wheel['position'], self.wheel['times'] = interpolate_position(
-            wheel_raw['timestamps'], wheel_raw['position'], freq=fs)
+            wheel_raw['timestamps'], wheel_raw['position'], freq=fs
+        )
         self.wheel['velocity'], self.wheel['acceleration'] = velocity_filtered(
-            self.wheel['position'], fs=fs, corner_frequency=corner_frequency, order=order)
+            self.wheel['position'], fs=fs, corner_frequency=corner_frequency, order=order
+        )
         self.wheel = self.wheel.apply(np.float32)
         self.data_info.loc[self.data_info['name'] == 'wheel', 'is_loaded'] = True
 
@@ -1388,7 +1425,8 @@ def load_pose(self, likelihood_thr=0.9, views=['left', 'right', 'body'], tracker
         self.pose = {}
         for view in views:
             pose_raw = self.one.load_object(
-                self.eid, f'{view}Camera', attribute=[tracker, 'times'], revision=self.revision or None)
+                self.eid, f'{view}Camera', attribute=[tracker, 'times'], revision=self.revision or None
+            )
             # Double check if video timestamps are correct length or can be fixed
             times_fixed, dlc = self._check_video_timestamps(view, pose_raw['times'], pose_raw[tracker])
             self.pose[f'{view}Camera'] = likelihood_threshold(dlc, likelihood_thr)
@@ -1409,17 +1447,15 @@ def load_motion_energy(self, views=['left', 'right', 'body']):
         views: list
             List of camera views for which to try and load data. Possible options are {'left', 'right', 'body'}
         """
-        names = {'left': 'whiskerMotionEnergy',
-                 'right': 'whiskerMotionEnergy',
-                 'body': 'bodyMotionEnergy'}
+        names = {'left': 'whiskerMotionEnergy', 'right': 'whiskerMotionEnergy', 'body': 'bodyMotionEnergy'}
         # empty the dictionary so that if one loads only one view, after having loaded several, the others don't linger
         self.motion_energy = {}
         for view in views:
             me_raw = self.one.load_object(
-                self.eid, f'{view}Camera', attribute=['ROIMotionEnergy', 'times'], revision=self.revision or None)
+                self.eid, f'{view}Camera', attribute=['ROIMotionEnergy', 'times'], revision=self.revision or None
+            )
             # Double check if video timestamps are correct length or can be fixed
-            times_fixed, motion_energy = self._check_video_timestamps(
-                view, me_raw['times'], me_raw['ROIMotionEnergy'])
+            times_fixed, motion_energy = self._check_video_timestamps(view, me_raw['times'], me_raw['ROIMotionEnergy'])
             self.motion_energy[f'{view}Camera'] = pd.DataFrame(columns=[names[view]], data=motion_energy)
             self.motion_energy[f'{view}Camera'].insert(0, 'times', times_fixed)
             self.data_info.loc[self.data_info['name'] == 'motion_energy', 'is_loaded'] = True
@@ -1430,7 +1466,7 @@ def load_licks(self):
         """
         pass
 
-    def load_pupil(self, snr_thresh=5.):
+    def load_pupil(self, snr_thresh=5.0):
         """
         Function to load raw and smoothed pupil diameter data from the left camera into SessionLoader.pupil.
 
@@ -1450,8 +1486,7 @@ def load_pupil(self, snr_thresh=5.):
         # If unavailable compute on the fly
         else:
             _logger.info('Pupil diameter not available, trying to compute on the fly.')
-            if (self.data_info[self.data_info['name'] == 'pose']['is_loaded'].values[0]
-                    and 'leftCamera' in self.pose.keys()):
+            if self.data_info[self.data_info['name'] == 'pose']['is_loaded'].values[0] and 'leftCamera' in self.pose.keys():
                 # If pose data is already loaded, we don't know if it was threshold at 0.9, so we need a little stunt
                 copy_pose = self.pose['leftCamera'].copy()  # Save the previously loaded pose data
                 self.load_pose(views=['left'], likelihood_thr=0.9)  # Load new with threshold 0.9
@@ -1465,16 +1500,18 @@ def load_pupil(self, snr_thresh=5.):
             try:
                 self.pupil['pupilDiameter_smooth'] = get_smooth_pupil_diameter(self.pupil['pupilDiameter_raw'], 'left')
             except BaseException as e:
-                _logger.error("Loaded raw pupil diameter but computing smooth pupil diameter failed. "
-                              "Saving all NaNs for pupilDiameter_smooth.")
+                _logger.error(
+                    'Loaded raw pupil diameter but computing smooth pupil diameter failed. '
+                    'Saving all NaNs for pupilDiameter_smooth.'
+                )
                 _logger.debug(e)
                 self.pupil['pupilDiameter_smooth'] = np.nan
 
         if not np.all(np.isnan(self.pupil['pupilDiameter_smooth'])):
-            good_idxs = np.where(
-                ~np.isnan(self.pupil['pupilDiameter_smooth']) & ~np.isnan(self.pupil['pupilDiameter_raw']))[0]
-            snr = (np.var(self.pupil['pupilDiameter_smooth'][good_idxs]) /
-                   (np.var(self.pupil['pupilDiameter_smooth'][good_idxs] - self.pupil['pupilDiameter_raw'][good_idxs])))
+            good_idxs = np.where(~np.isnan(self.pupil['pupilDiameter_smooth']) & ~np.isnan(self.pupil['pupilDiameter_raw']))[0]
+            snr = np.var(self.pupil['pupilDiameter_smooth'][good_idxs]) / (
+                np.var(self.pupil['pupilDiameter_smooth'][good_idxs] - self.pupil['pupilDiameter_raw'][good_idxs])
+            )
             if snr < snr_thresh:
                 self.pupil = pd.DataFrame()
                 raise ValueError(f'Pupil diameter SNR ({snr:.2f}) below threshold SNR ({snr_thresh}), removing data.')
@@ -1496,7 +1533,7 @@ def _check_video_timestamps(self, view, video_timestamps, video_data):
         # This is because the first few frames are sometimes not recorded. We can remove the first few
         # timestamps in this case
         elif video_timestamps.shape[0] > video_data.shape[0]:
-            video_timestamps_fixed = video_timestamps[-video_data.shape[0]:]
+            video_timestamps_fixed = video_timestamps[-video_data.shape[0] :]
             return video_timestamps_fixed, video_data
         else:
             return video_timestamps, video_data
@@ -1510,6 +1547,7 @@ class EphysSessionLoader(SessionLoader):
     To select for a specific probe
     >>> EphysSessionLoader(eid=eid, one=one, pid=pid)
     """
+
     def __init__(self, *args, pname=None, pid=None, **kwargs):
         """
         Needs an active connection in order to get the list of insertions in the session
@@ -1549,10 +1587,10 @@ class PhotometrySessionLoader(SessionLoader):
     def __init__(self, *args, photometry_collection: str = 'photometry', **kwargs):
         self.photometry_collection = photometry_collection
         self.revision = kwargs.get('revision', None)
-        
+
         # determine if loading by eid or session path
         self.load_by_path = True if 'session_path' in kwargs else False
-        
+
         super().__init__(*args, **kwargs)
 
     def load_session_data(self, **kwargs):
@@ -1572,7 +1610,7 @@ def load_photometry(
                 collection=self.photometry_collection,
                 revision=self.revision,
             )
-        else: # load by eid
+        else:  # load by eid
             raw_dfs = fpio.from_eid(
                 self.eid,
                 self.one,
@@ -1594,4 +1632,10 @@ def load_photometry(
                 )
                 raw_dfs[band] = df.loc[ix]
 
+            # the above indexing can lead to unevenly shaped bands.
+            # Cut to shortest
+            n = np.min([df.shape[0] for _, df in raw_dfs.items()])
+            for band in raw_dfs.keys():
+                raw_dfs[band] = raw_dfs[band].iloc[:n]
+
         self.photometry = raw_dfs

From 5d2204059bc4c7640952f9f5ce394ffc033b3f27 Mon Sep 17 00:00:00 2001
From: Georg Raiser <grg2rsr@gmail.com>
Date: Thu, 9 Oct 2025 16:55:56 +0100
Subject: [PATCH 80/80] flake8

---
 brainbox/io/one.py                     | 2 +-
 ibllib/tests/test_neurophotometrics.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/brainbox/io/one.py b/brainbox/io/one.py
index 76d01bff3..ebe9c3f74 100644
--- a/brainbox/io/one.py
+++ b/brainbox/io/one.py
@@ -1533,7 +1533,7 @@ def _check_video_timestamps(self, view, video_timestamps, video_data):
         # This is because the first few frames are sometimes not recorded. We can remove the first few
         # timestamps in this case
         elif video_timestamps.shape[0] > video_data.shape[0]:
-            video_timestamps_fixed = video_timestamps[-video_data.shape[0] :]
+            video_timestamps_fixed = video_timestamps[-video_data.shape[0]:]
             return video_timestamps_fixed, video_data
         else:
             return video_timestamps, video_data
diff --git a/ibllib/tests/test_neurophotometrics.py b/ibllib/tests/test_neurophotometrics.py
index fd57ba209..6f29ce509 100644
--- a/ibllib/tests/test_neurophotometrics.py
+++ b/ibllib/tests/test_neurophotometrics.py
@@ -2,10 +2,10 @@
 
 import unittest
 import tempfile
-from pathlib import Path
-import iblphotometry_tests
-from ibllib.pipes.neurophotometrics import FibrePhotometryBpodSync
-from ibllib.io import session_params
+# from pathlib import Path
+# import iblphotometry_tests
+# from ibllib.pipes.neurophotometrics import FibrePhotometryBpodSync
+# from ibllib.io import session_params
 
 # Mock suit2p which is imported in MesoscopePreprocess
 # attrs = {'default_ops.return_value': {}}