Merge pull request #844 from int-brain-lab/aggregate_training

mayofaulkner · web-flow · commit e1eb5513cee0 · 2024-09-20T11:22:03.000+01:00
Aggregate training
diff --git a/ibllib/ephys/ephysqc.py b/ibllib/ephys/ephysqc.py
@@ -205,57 +205,65 @@ def run(self, update: bool = False, overwrite: bool = True, stream: bool = None,
         return qc_files
 
 
-def rmsmap(sglx):
+def rmsmap(sglx, spectra=True, nmod=1):
     """
     Computes RMS map in time domain and spectra for each channel of Neuropixel probe
 
     :param sglx: Open spikeglx reader
+    :param spectra: Whether to compute the spectra
+    :param nmod: take every nmod windows, in cases where we don't want to compute over the whole signal
     :return: a dictionary with amplitudes in channeltime space, channelfrequency space, time
      and frequency scales
     """
     rms_win_length_samples = 2 ** np.ceil(np.log2(sglx.fs * RMS_WIN_LENGTH_SECS))
     # the window generator will generates window indices
     wingen = utils.WindowGenerator(ns=sglx.ns, nswin=rms_win_length_samples, overlap=0)
+    nwin = np.ceil(wingen.nwin / nmod).astype(int)
     # pre-allocate output dictionary of numpy arrays
-    win = {'TRMS': np.zeros((wingen.nwin, sglx.nc)),
-           'nsamples': np.zeros((wingen.nwin,)),
+    win = {'TRMS': np.zeros((nwin, sglx.nc)),
+           'nsamples': np.zeros((nwin,)),
            'fscale': fourier.fscale(WELCH_WIN_LENGTH_SAMPLES, 1 / sglx.fs, one_sided=True),
-           'tscale': wingen.tscale(fs=sglx.fs)}
+           'tscale': wingen.tscale(fs=sglx.fs)[::nmod]}
     win['spectral_density'] = np.zeros((len(win['fscale']), sglx.nc))
     # loop through the whole session
     with tqdm(total=wingen.nwin) as pbar:
-        for first, last in wingen.firstlast:
+        for iwindow, (first, last) in enumerate(wingen.firstlast):
+            if np.mod(iwindow, nmod) != 0:
+                continue
+
             D = sglx.read_samples(first_sample=first, last_sample=last)[0].transpose()
             # remove low frequency noise below 1 Hz
             D = fourier.hp(D, 1 / sglx.fs, [0, 1])
-            iw = wingen.iw
+            iw = np.floor(wingen.iw / nmod).astype(int)
             win['TRMS'][iw, :] = utils.rms(D)
             win['nsamples'][iw] = D.shape[1]
-            # the last window may be smaller than what is needed for welch
-            if last - first < WELCH_WIN_LENGTH_SAMPLES:
-                continue
-            # compute a smoothed spectrum using welch method
-            _, w = signal.welch(
-                D, fs=sglx.fs, window='hann', nperseg=WELCH_WIN_LENGTH_SAMPLES,
-                detrend='constant', return_onesided=True, scaling='density', axis=-1
-            )
-            win['spectral_density'] += w.T
+            if spectra:
+                # the last window may be smaller than what is needed for welch
+                if last - first < WELCH_WIN_LENGTH_SAMPLES:
+                    continue
+                # compute a smoothed spectrum using welch method
+                _, w = signal.welch(
+                    D, fs=sglx.fs, window='hann', nperseg=WELCH_WIN_LENGTH_SAMPLES,
+                    detrend='constant', return_onesided=True, scaling='density', axis=-1
+                )
+                win['spectral_density'] += w.T
             # print at least every 20 windows
             if (iw % min(20, max(int(np.floor(wingen.nwin / 75)), 1))) == 0:
                 pbar.update(iw)
     sglx.close()
     return win
 
 
-def extract_rmsmap(sglx, out_folder=None, overwrite=False):
+def extract_rmsmap(sglx, out_folder=None, overwrite=False, spectra=True, nmod=1):
     """
     Wrapper for rmsmap that outputs _ibl_ephysRmsMap and _ibl_ephysSpectra ALF files
 
     :param sglx: Open spikeglx Reader with data for which to compute rmsmap
     :param out_folder: folder in which to store output ALF files. Default uses the folder in which
      the `fbin` file lives.
     :param overwrite: do not re-extract if all ALF files already exist
-    :param label: string or list of strings that will be appended to the filename before extension
+    :param spectra: Whether to compute the spectral density across the signal
+    :param nmod: take every nmod windows, in cases where we don't want to compute over the whole signal
     :return: None
     """
     if out_folder is None:
@@ -271,18 +279,19 @@ def extract_rmsmap(sglx, out_folder=None, overwrite=False):
         _logger.warning(f'RMS map already exists for .{sglx.type} data in {out_folder}, skipping. Use overwrite option.')
         return files_time + files_freq
     # crunch numbers
-    rms = rmsmap(sglx)
+    rms = rmsmap(sglx, spectra=spectra, nmod=nmod)
     # output ALF files, single precision with the optional label as suffix before extension
     if not out_folder.exists():
         out_folder.mkdir()
     tdict = {'rms': rms['TRMS'].astype(np.single), 'timestamps': rms['tscale'].astype(np.single)}
-    fdict = {'power': rms['spectral_density'].astype(np.single),
-             'freqs': rms['fscale'].astype(np.single)}
     out_time = alfio.save_object_npy(
         out_folder, object=alf_object_time, dico=tdict, namespace='iblqc')
-    out_freq = alfio.save_object_npy(
-        out_folder, object=alf_object_freq, dico=fdict, namespace='iblqc')
-    return out_time + out_freq
+    if spectra:
+        fdict = {'power': rms['spectral_density'].astype(np.single),
+                 'freqs': rms['fscale'].astype(np.single)}
+        out_freq = alfio.save_object_npy(
+            out_folder, object=alf_object_freq, dico=fdict, namespace='iblqc')
+    return out_time + out_freq if spectra else out_time
 
 
 def raw_qc_session(session_path, overwrite=False):
diff --git a/ibllib/io/extractors/ephys_fpga.py b/ibllib/io/extractors/ephys_fpga.py
@@ -1083,7 +1083,7 @@ def get_audio_event_times(self, sync, chmap, audio_event_ttls=None, display=Fals
         if audio_event_ttls is None:
             # For training/biased/ephys protocols, the ready tone should be below 110 ms. The error
             # tone should be between 400ms and 1200ms
-            audio_event_ttls = {'ready_tone': (0, 0.11), 'error_tone': (0.4, 1.2)}
+            audio_event_ttls = {'ready_tone': (0, 0.1101), 'error_tone': (0.4, 1.2)}
         audio_event_intervals = self._assign_events(audio['times'], audio['polarities'], audio_event_ttls, display=display)
 
         return audio, audio_event_intervals
diff --git a/ibllib/pipes/dynamic_pipeline.py b/ibllib/pipes/dynamic_pipeline.py
@@ -532,7 +532,7 @@ def make_pipeline(session_path, **pkwargs):
             # The PostDLC plots require a trials object for QC
             # Find the first task that outputs a trials.table dataset
             trials_task = (
-                t for t in tasks.values() if any('trials.table' in f for f in t.signature.get('output_files', []))
+                t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', []))
             )
             if trials_task := next(trials_task, None):
                 parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}'], trials_task]
diff --git a/ibllib/pipes/training_status.py b/ibllib/pipes/training_status.py
@@ -215,7 +215,7 @@ def load_combined_trials(sess_paths, one, force=True):
     return training.concatenate_trials(trials_dict)
 
 
-def get_latest_training_information(sess_path, one):
+def get_latest_training_information(sess_path, one, save=True):
     """
     Extracts the latest training status.
 
@@ -262,7 +262,8 @@ def get_latest_training_information(sess_path, one):
     df = df.sort_values('date')
     df = df.reset_index(drop=True)
     # Save our dataframe
-    save_dataframe(df, subj_path)
+    if save:
+        save_dataframe(df, subj_path)
 
     # Now go through the backlog and compute the training status for sessions. If for example one was missing as it is cumulative
     # we need to go through and compute all the backlog
@@ -288,10 +289,10 @@ def get_latest_training_information(sess_path, one):
             if 'ready4ephysrig' not in tr_st:
                 sess = un_df.iloc[39].session_path
                 df.loc[df['session_path'] == sess, 'training_status'] = 'unbiasable'
+    if save:
+        save_dataframe(df, subj_path)
 
-    save_dataframe(df, subj_path)
-
-    if one.mode != 'local':
+    if one.mode != 'local' and save:
         upload_training_table_to_aws(lab, sub)
 
     return df
@@ -519,11 +520,11 @@ def get_sess_dict(session_path, one, protocol, alf_collections=None, raw_collect
         sess_dict['n_delay'] = np.nan
         sess_dict['location'] = np.nan
         sess_dict['training_status'] = 'habituation'
-        sess_dict['bias_50'], sess_dict['thres_50'], sess_dict['lapsehigh_50'], sess_dict['lapselow_50'] = \
+        sess_dict['bias_50'], sess_dict['thres_50'], sess_dict['lapselow_50'], sess_dict['lapsehigh_50'] = \
             (np.nan, np.nan, np.nan, np.nan)
-        sess_dict['bias_20'], sess_dict['thres_20'], sess_dict['lapsehigh_20'], sess_dict['lapselow_20'] = \
+        sess_dict['bias_20'], sess_dict['thres_20'], sess_dict['lapselow_20'], sess_dict['lapsehigh_20'] = \
             (np.nan, np.nan, np.nan, np.nan)
-        sess_dict['bias_80'], sess_dict['thres_80'], sess_dict['lapsehigh_80'], sess_dict['lapselow_80'] = \
+        sess_dict['bias_80'], sess_dict['thres_80'], sess_dict['lapselow_80'], sess_dict['lapsehigh_80'] = \
             (np.nan, np.nan, np.nan, np.nan)
 
     else:
@@ -534,18 +535,18 @@ def get_sess_dict(session_path, one, protocol, alf_collections=None, raw_collect
 
         sess_dict['performance'], sess_dict['contrasts'], _ = training.compute_performance(trials, prob_right=True)
         if sess_dict['task_protocol'] == 'training':
-            sess_dict['bias_50'], sess_dict['thres_50'], sess_dict['lapsehigh_50'], sess_dict['lapselow_50'] = \
+            sess_dict['bias_50'], sess_dict['thres_50'], sess_dict['lapselow_50'], sess_dict['lapsehigh_50'] = \
                 training.compute_psychometric(trials)
-            sess_dict['bias_20'], sess_dict['thres_20'], sess_dict['lapsehigh_20'], sess_dict['lapselow_20'] = \
+            sess_dict['bias_20'], sess_dict['thres_20'], sess_dict['lapselow_20'], sess_dict['lapsehigh_20'] = \
                 (np.nan, np.nan, np.nan, np.nan)
-            sess_dict['bias_80'], sess_dict['thres_80'], sess_dict['lapsehigh_80'], sess_dict['lapselow_80'] = \
+            sess_dict['bias_80'], sess_dict['thres_80'], sess_dict['lapselow_80'], sess_dict['lapsehigh_80'] = \
                 (np.nan, np.nan, np.nan, np.nan)
         else:
-            sess_dict['bias_50'], sess_dict['thres_50'], sess_dict['lapsehigh_50'], sess_dict['lapselow_50'] = \
+            sess_dict['bias_50'], sess_dict['thres_50'], sess_dict['lapselow_50'], sess_dict['lapsehigh_50'] = \
                 training.compute_psychometric(trials, block=0.5)
-            sess_dict['bias_20'], sess_dict['thres_20'], sess_dict['lapsehigh_20'], sess_dict['lapselow_20'] = \
+            sess_dict['bias_20'], sess_dict['thres_20'], sess_dict['lapselow_20'], sess_dict['lapsehigh_20'] = \
                 training.compute_psychometric(trials, block=0.2)
-            sess_dict['bias_80'], sess_dict['thres_80'], sess_dict['lapsehigh_80'], sess_dict['lapselow_80'] = \
+            sess_dict['bias_80'], sess_dict['thres_80'], sess_dict['lapselow_80'], sess_dict['lapsehigh_80'] = \
                 training.compute_psychometric(trials, block=0.8)
 
         sess_dict['performance_easy'] = training.compute_performance_easy(trials)
@@ -646,8 +647,8 @@ def get_training_info_for_session(session_paths, one, force=True):
             for bias in [50, 20, 80]:
                 sess_dict[f'combined_bias_{bias}'] = psychs[f'{bias}'][0]
                 sess_dict[f'combined_thres_{bias}'] = psychs[f'{bias}'][1]
-                sess_dict[f'combined_lapsehigh_{bias}'] = psychs[f'{bias}'][2]
-                sess_dict[f'combined_lapselow_{bias}'] = psychs[f'{bias}'][3]
+                sess_dict[f'combined_lapselow_{bias}'] = psychs[f'{bias}'][2]
+                sess_dict[f'combined_lapsehigh_{bias}'] = psychs[f'{bias}'][3]
 
             # Case where two sessions on same day with different number of contrasts! Oh boy
             if sess_dict['combined_performance'].size != sess_dict['performance'].size:

Original file line number	Diff line number	Diff line change
`@@ -532,7 +532,7 @@ def make_pipeline(session_path, **pkwargs):`
`532`	`532`	`# The PostDLC plots require a trials object for QC`
`533`	`533`	`# Find the first task that outputs a trials.table dataset`
`534`	`534`	`trials_task = (`
`535`		`- t for t in tasks.values() if any('trials.table' in f for f in t.signature.get('output_files', []))`
	`535`	`+ t for t in tasks.values() if any('trials.table' in f[0] for f in t.signature.get('output_files', []))`
`536`	`536`	`)`
`537`	`537`	`if trials_task := next(trials_task, None):`
`538`	`538`	`parents = [tasks['DLC'], tasks[f'VideoSyncQC_{sync}'], trials_task]`