From c8f03bdd226d82b2899138da34874c2486b8ba42 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 17 Jan 2025 17:50:59 +0000 Subject: [PATCH 01/36] Add PSTH based metrics --- src/iblphotometry/metrics.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 77f00d1..4c72e00 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -178,6 +178,31 @@ def has_responses( return np.any(res) +def response_variability_ratio(A: pd.Series, events: np.ndarray, window: tuple = (0, 1)): + signal = A.values.squeeze() + assert signal.ndim == 1 + tpts = A.index.values + dt = np.median(np.diff(tpts)) + events = events[events + window[1] < tpts.max()] + event_inds = tpts.searchsorted(events) + i0s = event_inds - int(window[0] / dt) + i1s = event_inds + int(window[1] / dt) + responses = np.row_stack([signal[i0:i1] for i0, i1 in zip(i0s, i1s)]) + responses = (responses.T - signal[event_inds]).T + return (responses).mean(axis=0).var() / (responses).var(axis=0).mean() + +def response_magnitude(A: pd.Series, events: np.ndarray, window: tuple = (0, 1)): + signal = A.values.squeeze() + assert signal.ndim == 1 + tpts = A.index.values + dt = np.median(np.diff(tpts)) + events = events[events + window[1] < tpts.max()] + event_inds = tpts.searchsorted(events) + i0s = event_inds - int(window[0] / dt) + i1s = event_inds + int(window[1] / dt) + responses = np.row_stack([signal[i0:i1] for i0, i1 in zip(i0s, i1s)]) + responses = (responses.T - signal[event_inds]).T + return np.abs(responses.mean(axis=0)).sum() def low_freq_power_ratio(A: pd.Series, f_cutoff: float = 3.18) -> float: """ From 0eae5b4be734d3c80dc9a5760a503391c8f3ae30 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 17 Jan 2025 17:52:03 +0000 Subject: [PATCH 02/36] Allow flexible input dtype --- src/iblphotometry/metrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 4c72e00..7dd5507 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -230,7 +230,7 @@ def low_freq_power_ratio(A: pd.Series, f_cutoff: float = 3.18) -> float: return psd[freqs <= f_cutoff].sum() / psd.sum() -def spectral_entropy(A: pd.Series, eps: float = np.finfo('float').eps) -> float: +def spectral_entropy(A: pd.Series | np.ndarray, eps: float = np.finfo('float').eps) -> float: """ Compute the normalized entropy of the signal power spectral density and return a metric (1 - entropy) that is low (0) if all frequency components @@ -245,7 +245,7 @@ def spectral_entropy(A: pd.Series, eps: float = np.finfo('float').eps) -> float: eps : small number added to the PSD for numerical stability """ - signal = A.copy() + signal = A.values if isinstance(A, pd.Series) else A assert signal.ndim == 1 # only 1D for now # Compute power spectral density psd = np.abs(np.fft.rfft(signal - signal.mean())) ** 2 @@ -259,7 +259,7 @@ def spectral_entropy(A: pd.Series, eps: float = np.finfo('float').eps) -> float: return 1 - norm_entropy -def ar_score(A: pd.Series) -> float: +def ar_score(A: pd.Series | np.ndarray) -> float: """ R-squared from an AR(1) model fit to the signal as a measure of the temporal structure present in the signal. @@ -271,7 +271,7 @@ def ar_score(A: pd.Series) -> float: times in the index """ # Pull signal out of pandas series - signal = A.values + signal = A.values if isinstance(A, pd.Series) else A assert signal.ndim == 1 # only 1D for now X = signal[:-1] y = signal[1:] From 8ebb16d41b71b0beb94e1d0af9eb90611311a3aa Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 17 Jan 2025 17:52:25 +0000 Subject: [PATCH 03/36] Update n_unique_samples docstring --- src/iblphotometry/metrics.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 7dd5507..a5bf004 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -63,7 +63,11 @@ def signal_skew(A: pd.Series | np.ndarray, axis=-1) -> float: def n_unique_samples(A: pd.Series | np.ndarray) -> int: - """number of unique samples in the signal. Low values indicate that the signal during acquisition was not within the range of the digitizer.""" + """ + Number of unique samples in the signal, expressed as a fraction of the + total number of samples. Low values indicate that the signal was not within + the range of the digitizer during acquisition. + """ a = A.values if isinstance(A, pd.Series) else A return np.unique(a).shape[0] From a9750f73faf662e6187a8fdba05de04290ec126d Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 17 Jan 2025 17:52:44 +0000 Subject: [PATCH 04/36] Normalize n_unique_samples by total samples --- src/iblphotometry/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index a5bf004..dd57e72 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -69,7 +69,7 @@ def n_unique_samples(A: pd.Series | np.ndarray) -> int: the range of the digitizer during acquisition. """ a = A.values if isinstance(A, pd.Series) else A - return np.unique(a).shape[0] + return np.unique(a).shape[0] / a.shape[0] def n_spikes(A: pd.Series | np.ndarray, sd: int = 5): From 8d4e5cc8ee3caf64d390f8c994ef688318127e05 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 17 Jan 2025 17:53:18 +0000 Subject: [PATCH 05/36] Spike detection for both positive and negative jumps --- src/iblphotometry/metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index dd57e72..963c797 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -75,7 +75,8 @@ def n_unique_samples(A: pd.Series | np.ndarray) -> int: def n_spikes(A: pd.Series | np.ndarray, sd: int = 5): """count the number of spike artifacts in the recording.""" a = A.values if isinstance(A, pd.Series) else A - return detect_spikes(a, sd=sd).shape[0] + # return detect_spikes(a, sd=sd).shape[0] + return (np.abs(z(np.diff(a))) > sd).sum() def n_outliers( From 34ca19e6e6dbf182d2984f008161f383a2c29feb Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 17 Jan 2025 17:53:48 +0000 Subject: [PATCH 06/36] Add check for dead signals --- src/iblphotometry/metrics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 963c797..596b75d 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -278,6 +278,8 @@ def ar_score(A: pd.Series | np.ndarray) -> float: # Pull signal out of pandas series signal = A.values if isinstance(A, pd.Series) else A assert signal.ndim == 1 # only 1D for now + if len(np.unique(signal)) == 1: + return np.nan X = signal[:-1] y = signal[1:] res = stats.linregress(X, y) From b2782ebe8857afb0c88d85658757ab3c7b96b87a Mon Sep 17 00:00:00 2001 From: davcrom Date: Wed, 12 Mar 2025 16:30:17 +0000 Subject: [PATCH 07/36] Allow AR(n) models in ar_score --- src/iblphotometry/metrics.py | 42 ++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 596b75d..54766f6 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -264,26 +264,46 @@ def spectral_entropy(A: pd.Series | np.ndarray, eps: float = np.finfo('float').e return 1 - norm_entropy -def ar_score(A: pd.Series | np.ndarray) -> float: +def ar_score(A: pd.Series | np.ndarray, order: int = 2) -> float: """ - R-squared from an AR(1) model fit to the signal as a measure of the temporal + R-squared from an AR(n) model fit to the signal as a measure of the temporal structure present in the signal. Parameters ---------- - A : - the signal time series with signal values in the columns and sample - times in the index + A : pd.Series or np.ndarray + The signal time series with signal values in the columns and sample + times in the index. + order : int, optional + The order of the AR model. Default is 2. + + Returns + ------- + float + The R-squared value indicating the variance explained by the AR model. + Returns NaN if the signal is constant. """ - # Pull signal out of pandas series + # Pull signal out of pandas Series if needed signal = A.values if isinstance(A, pd.Series) else A - assert signal.ndim == 1 # only 1D for now + assert signal.ndim == 1, "Signal must be 1-dimensional." + + # Handle constant signal case if len(np.unique(signal)) == 1: return np.nan - X = signal[:-1] - y = signal[1:] - res = stats.linregress(X, y) - return res.rvalue**2 + + # Create design matrix X and target vector y based on AR order + X = np.column_stack([signal[i:len(signal) - order + i] for i in range(order)]) + y = signal[order:] + + # Fit linear regression using least squares + _, residual, _, _ = np.linalg.lstsq(X, y) + + # Calculate R-squared using residuals + ss_residual = residual[0] + ss_total = np.sum((y - np.mean(y)) ** 2) + r_squared = 1 - (ss_residual / ss_total) + + return r_squared def noise_simulation( From 80a3166a1d5231c7d6433fda3736a3788ef2843f Mon Sep 17 00:00:00 2001 From: davcrom Date: Wed, 12 Mar 2025 16:33:48 +0000 Subject: [PATCH 08/36] Revert to original spike detection --- src/iblphotometry/metrics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 54766f6..d87faec 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -75,8 +75,7 @@ def n_unique_samples(A: pd.Series | np.ndarray) -> int: def n_spikes(A: pd.Series | np.ndarray, sd: int = 5): """count the number of spike artifacts in the recording.""" a = A.values if isinstance(A, pd.Series) else A - # return detect_spikes(a, sd=sd).shape[0] - return (np.abs(z(np.diff(a))) > sd).sum() + return detect_spikes(a, sd=sd).shape[0] def n_outliers( From 9f7c092ef1a057369bd6810dfdaac2a302b6829f Mon Sep 17 00:00:00 2001 From: davcrom Date: Thu, 27 Mar 2025 12:18:38 +0000 Subject: [PATCH 09/36] ruff format --- src/iblphotometry/metrics.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index d87faec..0f74c43 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -182,7 +182,10 @@ def has_responses( return np.any(res) -def response_variability_ratio(A: pd.Series, events: np.ndarray, window: tuple = (0, 1)): + +def response_variability_ratio( + A: pd.Series, events: np.ndarray, window: tuple = (0, 1) +): signal = A.values.squeeze() assert signal.ndim == 1 tpts = A.index.values @@ -195,6 +198,7 @@ def response_variability_ratio(A: pd.Series, events: np.ndarray, window: tuple = responses = (responses.T - signal[event_inds]).T return (responses).mean(axis=0).var() / (responses).var(axis=0).mean() + def response_magnitude(A: pd.Series, events: np.ndarray, window: tuple = (0, 1)): signal = A.values.squeeze() assert signal.ndim == 1 @@ -208,6 +212,7 @@ def response_magnitude(A: pd.Series, events: np.ndarray, window: tuple = (0, 1)) responses = (responses.T - signal[event_inds]).T return np.abs(responses.mean(axis=0)).sum() + def low_freq_power_ratio(A: pd.Series, f_cutoff: float = 3.18) -> float: """ Fraction of the total signal power contained below a given cutoff frequency. @@ -234,7 +239,9 @@ def low_freq_power_ratio(A: pd.Series, f_cutoff: float = 3.18) -> float: return psd[freqs <= f_cutoff].sum() / psd.sum() -def spectral_entropy(A: pd.Series | np.ndarray, eps: float = np.finfo('float').eps) -> float: +def spectral_entropy( + A: pd.Series | np.ndarray, eps: float = np.finfo('float').eps +) -> float: """ Compute the normalized entropy of the signal power spectral density and return a metric (1 - entropy) that is low (0) if all frequency components @@ -284,14 +291,14 @@ def ar_score(A: pd.Series | np.ndarray, order: int = 2) -> float: """ # Pull signal out of pandas Series if needed signal = A.values if isinstance(A, pd.Series) else A - assert signal.ndim == 1, "Signal must be 1-dimensional." + assert signal.ndim == 1, 'Signal must be 1-dimensional.' # Handle constant signal case if len(np.unique(signal)) == 1: return np.nan # Create design matrix X and target vector y based on AR order - X = np.column_stack([signal[i:len(signal) - order + i] for i in range(order)]) + X = np.column_stack([signal[i : len(signal) - order + i] for i in range(order)]) y = signal[order:] # Fit linear regression using least squares From 168445c6b0157ef0d0a9dea6383e81773e94334e Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:21:52 +0100 Subject: [PATCH 10/36] Add dt_violations metric --- src/iblphotometry/metrics.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 0f74c43..d012b08 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -13,6 +13,23 @@ from iblphotometry.behavior import psth +def dt_violations(A: pd.DataFrame | pd.Series, atol: float = 1e-3) -> str: + t = A.index.values + dts = np.diff(t) + n_violations = np.sum((dts - np.median(dts)) > atol) + ## TODO: make ibllib wrapper to convert metrics to QC vals + # if n_violations == 0: + # outcome = QC.PASS + # elif n_violations <= 3: + # outcome = QC.WARNING + # elif n_violations <= 10: + # outcome = QC.CRITICAL + # else: + # outcome = QC.FAIL + # return outcome, n_violations + return n_violations + + def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float: """the distance between two percentiles in units of z. Captures the magnitude of transients. From aff9a50b7c95b9766cddcf9518414532ff113682 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:22:05 +0100 Subject: [PATCH 11/36] Add interleaved acquisition metric --- src/iblphotometry/metrics.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index d012b08..d0e918f 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -30,6 +30,26 @@ def dt_violations(A: pd.DataFrame | pd.Series, atol: float = 1e-3) -> str: return n_violations +def _fill_missing_channel_names(A: pd.Series) -> pd.Series: + a = A.copy() + missing_inds = np.where(a['name'] == '')[0] + missing_idxs = a.iloc[missing_inds].index + prev_idxs = a.iloc[missing_inds - 1].index + name_alternator = {'GCaMP':'Isosbestic', 'Isosbestic':'GCaMP', '':''} + while len(a[a['name'] == '']) > 0: + a.loc[missing_idxs, 'name'] = [name_alternator[prev] for prev in a.loc[prev_idxs, 'name']] + return a + + +def interleaved_acquisition(A: pd.DataFrame | pd.Series) -> float: + if sum(A['name'] == '') > 0: + A = _fill_missing_channel_names(A) + a = A['name'].values if isinstance(A, pd.DataFrame) else A.values + even_check = np.all(a[::2] == a[0]) + odd_check = np.all(a[1::2] == a[1]) + return bool(even_check & odd_check) + + def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float: """the distance between two percentiles in units of z. Captures the magnitude of transients. From 058e887a427ed76e2a6f6367c351b51f76309db8 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:22:37 +0100 Subject: [PATCH 12/36] Add sliding_deviance metric --- src/iblphotometry/metrics.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index d0e918f..41c29b1 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd from scipy import stats +from scipy.signal import medfilt from iblphotometry.processing import ( z, @@ -70,6 +71,15 @@ def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> return P[1] - P[0] +def sliding_deviance( + A: pd.Series | np.ndarray, + w_len: int = 151, +) -> float: + a = A.values if isinstance(A, pd.Series) else A + running_median = medfilt(a, kernel_size=w_len) + return np.median(np.abs(a - running_median) / running_median) + + def signal_asymmetry(A: pd.Series | np.ndarray, pc_comp: int = 95, axis=-1) -> float: """the ratio between the distance of two percentiles to the median. Proportional to the the signal to noise. From e09eed3d22f30cc0c901bda2e75bc02b01cea64e Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:23:53 +0100 Subject: [PATCH 13/36] Add f_unique_samples - revert n_unique_samples to number instead of fraction --- src/iblphotometry/metrics.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 41c29b1..a793eb6 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -111,15 +111,22 @@ def signal_skew(A: pd.Series | np.ndarray, axis=-1) -> float: def n_unique_samples(A: pd.Series | np.ndarray) -> int: """ - Number of unique samples in the signal, expressed as a fraction of the - total number of samples. Low values indicate that the signal was not within - the range of the digitizer during acquisition. + Number of unique samples in the signal. Low values indicate that the signal + was not within the range of the digitizer during acquisition. """ a = A.values if isinstance(A, pd.Series) else A - return np.unique(a).shape[0] / a.shape[0] + return np.unique(a).shape[0] def n_spikes(A: pd.Series | np.ndarray, sd: int = 5): +def f_unique_samples(A: pd.Series | np.ndarray) -> int: + """ + Wrapper that converts n_unique_samples to a fraction of the total number + of samples. + """ + return n_unique_samples(A) / len(A) + + """count the number of spike artifacts in the recording.""" a = A.values if isinstance(A, pd.Series) else A return detect_spikes(a, sd=sd).shape[0] From e0e332c6494ca793135264672a6b8ff0bfd0ac90 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:26:51 +0100 Subject: [PATCH 14/36] Fix bug in remove spikes - now pass t to detect_spikes rather than y --- src/iblphotometry/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 8ab3f48..765e559 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -603,7 +603,7 @@ def detect_spikes(t: np.ndarray, sd: int = 5): def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25): y, t = F.values, F.index.values y = copy(y) - outliers = detect_spikes(y, sd=sd) + outliers = detect_spikes(t, sd=sd) y[outliers] = np.nan try: y = fillnan_kde(y, w=w) From 787b07bfdaffad0d00386c057f5792d1568e4247 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:27:07 +0100 Subject: [PATCH 15/36] Duplicate n_spikes for dt and dy --- src/iblphotometry/metrics.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index a793eb6..80848a8 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -118,7 +118,6 @@ def n_unique_samples(A: pd.Series | np.ndarray) -> int: return np.unique(a).shape[0] -def n_spikes(A: pd.Series | np.ndarray, sd: int = 5): def f_unique_samples(A: pd.Series | np.ndarray) -> int: """ Wrapper that converts n_unique_samples to a fraction of the total number @@ -127,9 +126,16 @@ def f_unique_samples(A: pd.Series | np.ndarray) -> int: return n_unique_samples(A) / len(A) +def n_spikes_dt(A: pd.Series | np.ndarray, sd: int = 5): """count the number of spike artifacts in the recording.""" - a = A.values if isinstance(A, pd.Series) else A - return detect_spikes(a, sd=sd).shape[0] + t = A.times if isinstance(A, pd.Series) else A + return detect_spikes(t, sd=sd).shape[0] + + +def n_spikes_dy(A: pd.Series | np.ndarray, sd: int = 5): + """count the number of spike artifacts in the recording.""" + y = A.values if isinstance(A, pd.Series) else A + return detect_spikes(y, sd=sd).shape[0] def n_outliers( From 51fa9c005f9edc3de584a629d604204e5efb5d7b Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:27:35 +0100 Subject: [PATCH 16/36] Suggest simple nan fill method with local median --- src/iblphotometry/processing.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 765e559..90dcd4c 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -617,6 +617,17 @@ def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25): return pd.Series(y, index=t) +## TODO: consider this simple interpolation method that uses the local median +# def remove_spikes(F: pd.Series, sd: int = 5, w: int = 5): +# f = F.copy() +# y, t = f.values, f.index.values +# outliers = detect_spikes(y, sd=sd) +# outliers = np.unique(np.concatenate([outliers - 1, outliers])) +# i0s = (outliers - w).clip(0) +# i1s = outliers + w +# y[outliers] = [np.nanmedian(y[i0:i1]) for i0, i1 in zip(i0s, i1s)] +# return pd.Series(y, index=t) + """ ###### ## #### ######## #### ## ## ###### ####### ######## ######## ######## ### ######## #### ####### ## ## ###### From 5f519b9aedc516cd61e7042268f4bd221261232c Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:28:27 +0100 Subject: [PATCH 17/36] Add sliding_expmax_violation - takes sum of all points that are above the expected maximum value --- src/iblphotometry/metrics.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 80848a8..4839974 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -148,6 +148,22 @@ def n_outliers( return detect_outliers(a, w_size=w_size, alpha=alpha).shape[0] +def _expected_max_gauss(x): + """ + https://math.stackexchange.com/questions/89030/expectation-of-the-maximum-of-gaussian-random-variables + """ + return np.mean(x) + np.std(x) * np.sqrt(2 * np.log10(len(x))) + + +## TODO: this doesn't need to be sliding, qc.run_qc takes care of it +def sliding_expmax_violation( + A: pd.Series, + w: int = 151, +) -> float: + exp_max = A.rolling(window=w).apply(expected_max_gauss) + return sum(A[A > exp_max] - exp_max[A > exp_max]) / sum(A > exp_max) + + def bleaching_tau(A: pd.Series) -> float: """overall tau of bleaching.""" y, t = A.values, A.index.values From c4ddc8ac9de7d799963883c4dd52acac3503843e Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:28:43 +0100 Subject: [PATCH 18/36] Add photobleaching_amp (WIP) --- src/iblphotometry/metrics.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 4839974..2c53b89 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -172,6 +172,17 @@ def bleaching_tau(A: pd.Series) -> float: return reg.popt[1] +def bleaching_amp(A: pd.Series) -> float: + """overall amplitude of bleaching.""" + # y, t = A.values, A.index.values + # reg = Regression(model=ExponDecay()) + # reg.fit(y, t) + # return reg.popt[0] + rolling_mean = A.rolling(window=1000).mean().dropna() + ## TODO: mean rolling std, not across whole signal + return (rolling_mean.iloc[0] - rolling_mean.iloc[-1]) / A.std() + + def ttest_pre_post( A: pd.Series, trials: pd.DataFrame, From 3c1fd7b6aa73f1ce8c33994e6c3fefa7281d3057 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 4 Apr 2025 17:29:46 +0100 Subject: [PATCH 19/36] ruff format --- src/iblphotometry/metrics.py | 6 ++++-- src/iblphotometry/processing.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 2c53b89..9824616 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -36,9 +36,11 @@ def _fill_missing_channel_names(A: pd.Series) -> pd.Series: missing_inds = np.where(a['name'] == '')[0] missing_idxs = a.iloc[missing_inds].index prev_idxs = a.iloc[missing_inds - 1].index - name_alternator = {'GCaMP':'Isosbestic', 'Isosbestic':'GCaMP', '':''} + name_alternator = {'GCaMP': 'Isosbestic', 'Isosbestic': 'GCaMP', '': ''} while len(a[a['name'] == '']) > 0: - a.loc[missing_idxs, 'name'] = [name_alternator[prev] for prev in a.loc[prev_idxs, 'name']] + a.loc[missing_idxs, 'name'] = [ + name_alternator[prev] for prev in a.loc[prev_idxs, 'name'] + ] return a diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 90dcd4c..9d5020b 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -617,6 +617,7 @@ def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25): return pd.Series(y, index=t) + ## TODO: consider this simple interpolation method that uses the local median # def remove_spikes(F: pd.Series, sd: int = 5, w: int = 5): # f = F.copy() From c07651fe67eb33f306920f04189b404cc29bd134 Mon Sep 17 00:00:00 2001 From: Georg Raiser Date: Tue, 8 Apr 2025 16:05:13 +0100 Subject: [PATCH 20/36] merge not present conflict? --- src/iblphotometry/qc.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index 5276026..0f6266c 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -58,19 +58,27 @@ def eval_metric( metric: Callable, metric_kwargs: dict | None = None, sliding_kwargs: dict | None = None, + full_output=False, ): + result = {} m = metric(F, **metric_kwargs) if metric_kwargs is not None else metric(F) - + result['value'] = m if sliding_kwargs is not None: S = sliding_metric( F, metric=metric, **sliding_kwargs, metric_kwargs=metric_kwargs ) - r, p = linregress(S.times(), S.values)[2:4] + r, p = linregress(S.index.values, S.values)[2:4] + if full_output: + result['sliding_values'] = S.values + result['sliding_timepoints'] = S.index.values + else: r = np.nan p = np.nan - return dict(value=m, rval=r, pval=p) + result['r'] = r + result['p'] = p + return result def qc_series( From 9109b3813a9b225c41bba52703c44a56224a233c Mon Sep 17 00:00:00 2001 From: davcrom Date: Tue, 8 Apr 2025 16:20:27 +0100 Subject: [PATCH 21/36] Remove always True conditional - if KDE fails, always use global median --- src/iblphotometry/processing.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 9d5020b..9293347 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -608,12 +608,8 @@ def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25): try: y = fillnan_kde(y, w=w) except np.linalg.LinAlgError: - if np.all(pd.isna(y[outliers])): # all are NaN! - y[:] = 0 - warnings.warn('all values NaN, setting to zeros') # TODO logger - else: - y[outliers] = np.nanmedian(y) - warnings.warn('KDE fillnan failed, using global median') # TODO logger + y[outliers] = np.nanmedian(y) + warnings.warn('KDE fillnan failed, using global median') # TODO logger return pd.Series(y, index=t) From 287b11f773c2ca995e6b82842341d761d40bcef6 Mon Sep 17 00:00:00 2001 From: davcrom Date: Tue, 8 Apr 2025 16:20:52 +0100 Subject: [PATCH 22/36] Fix bug in n_spikes_dt --- src/iblphotometry/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 9824616..cdb75c9 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -130,7 +130,7 @@ def f_unique_samples(A: pd.Series | np.ndarray) -> int: def n_spikes_dt(A: pd.Series | np.ndarray, sd: int = 5): """count the number of spike artifacts in the recording.""" - t = A.times if isinstance(A, pd.Series) else A + t = A.index.values if isinstance(A, pd.Series) else A return detect_spikes(t, sd=sd).shape[0] From db07a5a7907b03c6e1faa7e2154fa847f5514945 Mon Sep 17 00:00:00 2001 From: davcrom Date: Tue, 8 Apr 2025 16:21:24 +0100 Subject: [PATCH 23/36] Make expmax_violation safe for no outliers case --- src/iblphotometry/metrics.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index cdb75c9..93c9a68 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -158,12 +158,12 @@ def _expected_max_gauss(x): ## TODO: this doesn't need to be sliding, qc.run_qc takes care of it -def sliding_expmax_violation( - A: pd.Series, - w: int = 151, -) -> float: - exp_max = A.rolling(window=w).apply(expected_max_gauss) - return sum(A[A > exp_max] - exp_max[A > exp_max]) / sum(A > exp_max) +def sliding_expmax_violation(A: pd.Series, w: int = 151) -> float: + exp_max = A.rolling(window=w).apply(_expected_max_gauss) + if sum(A > exp_max) == 0: + return 0 + else: + return sum(A[A > exp_max] - exp_max[A > exp_max]) / sum(A > exp_max) def bleaching_tau(A: pd.Series) -> float: From b596663d52d2d4b1742c820a282081e366503482 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:21:37 +0100 Subject: [PATCH 24/36] Housekeeping --- src/iblphotometry/metrics.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 93c9a68..5843f21 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -17,8 +17,9 @@ def dt_violations(A: pd.DataFrame | pd.Series, atol: float = 1e-3) -> str: t = A.index.values dts = np.diff(t) - n_violations = np.sum((dts - np.median(dts)) > atol) - ## TODO: make ibllib wrapper to convert metrics to QC vals + dt = np.median(dts) + n_violations = np.sum(np.abs(dts - dt) > atol) + ## TODO: make ibllib wrappers to convert metrics to QC vals # if n_violations == 0: # outcome = QC.PASS # elif n_violations <= 3: @@ -128,10 +129,10 @@ def f_unique_samples(A: pd.Series | np.ndarray) -> int: return n_unique_samples(A) / len(A) -def n_spikes_dt(A: pd.Series | np.ndarray, sd: int = 5): - """count the number of spike artifacts in the recording.""" - t = A.index.values if isinstance(A, pd.Series) else A - return detect_spikes(t, sd=sd).shape[0] +# def n_spikes_dt(A: pd.Series | np.ndarray, sd: int = 5): +# """count the number of spike artifacts in the recording.""" +# t = A.index.values if isinstance(A, pd.Series) else A +# return detect_spikes(t, sd=sd).shape[0] def n_spikes_dy(A: pd.Series | np.ndarray, sd: int = 5): @@ -157,11 +158,6 @@ def _expected_max_gauss(x): return np.mean(x) + np.std(x) * np.sqrt(2 * np.log10(len(x))) -## TODO: this doesn't need to be sliding, qc.run_qc takes care of it -def sliding_expmax_violation(A: pd.Series, w: int = 151) -> float: - exp_max = A.rolling(window=w).apply(_expected_max_gauss) - if sum(A > exp_max) == 0: - return 0 else: return sum(A[A > exp_max] - exp_max[A > exp_max]) / sum(A > exp_max) From 8d41f343d91051b288beb4e263e02dae6219fb50 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:22:57 +0100 Subject: [PATCH 25/36] Add separate functions for dy and dt spike detection --- src/iblphotometry/metrics.py | 2 +- src/iblphotometry/processing.py | 27 +++++++++++++++++++-------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 5843f21..b8de5b6 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -138,7 +138,7 @@ def f_unique_samples(A: pd.Series | np.ndarray) -> int: def n_spikes_dy(A: pd.Series | np.ndarray, sd: int = 5): """count the number of spike artifacts in the recording.""" y = A.values if isinstance(A, pd.Series) else A - return detect_spikes(y, sd=sd).shape[0] + return detect_spikes_dy(y, sd=sd).shape[0] def n_outliers( diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index e3c63e4..72958c5 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -594,16 +594,27 @@ def remove_outliers( return pd.Series(y, index=t) -def detect_spikes(t: np.ndarray, sd: int = 5): - dt = np.diff(t) - bad_inds = dt < np.average(dt) - sd * np.std(dt) - return np.where(bad_inds)[0] - - -def remove_spikes(F: pd.Series, sd: int = 5, w: int = 25): +def detect_spikes_dt(t: np.ndarray, atol: float = 0.001): + dts = np.diff(t) + dt = np.median(dts) + # bad_inds = dt < np.average(dt) - sd * np.std(dt) + # return np.where(bad_inds)[0] + return np.where(np.abs(dts - dt) > atol)[0] + +def detect_spikes_dy(y: np.ndarray, sd: float = 5.): + dy = np.abs(np.diff(y)) + # bad_inds = dt < np.average(dt) - sd * np.std(dt) + return np.where(dy > np.average(dy) + sd * np.std(dy))[0] + +def remove_spikes(F: pd.Series, delta: str = 't', sd: int = 5, w: int = 25): y, t = F.values, F.index.values y = copy(y) - outliers = detect_spikes(t, sd=sd) + if delta == 't': + outliers = detect_spikes_dt(t, atol=0.001) + elif delta == 'y': + outliers = detect_spikes_dy(y, sd=sd) + else: + raise ValueError('delta must be "t" or "y"') y[outliers] = np.nan try: y = fillnan_kde(y, w=w) From 6cd8811adde846ec80234c0f2fbdf5bb00a4b69b Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:23:23 +0100 Subject: [PATCH 26/36] Replace global median with local median in remove_spikes --- src/iblphotometry/processing.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 72958c5..43852e3 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -618,9 +618,12 @@ def remove_spikes(F: pd.Series, delta: str = 't', sd: int = 5, w: int = 25): y[outliers] = np.nan try: y = fillnan_kde(y, w=w) - except np.linalg.LinAlgError: - y[outliers] = np.nanmedian(y) - warnings.warn('KDE fillnan failed, using global median') # TODO logger + # except np.linalg.LinAlgError: + except: + i0s = (outliers - w).clip(0) + i1s = outliers + w + y[outliers] = [np.nanmedian(y[i0:i1]) for i0, i1 in zip(i0s, i1s)] + warnings.warn('KDE fillnan failed, using local median') # TODO logger return pd.Series(y, index=t) From fa58dfaea46f32d2a2c7306cdf7cba97e0710f6e Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:25:07 +0100 Subject: [PATCH 27/36] Update detection of "spikes" caused by early sampling - now based on fixing repeated sampling of the same channel --- src/iblphotometry/metrics.py | 43 ++++++++++++------------ src/iblphotometry/processing.py | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 21 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index b8de5b6..97048ab 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -8,8 +8,11 @@ z, Regression, ExponDecay, - detect_spikes, + detect_spikes_dt, + detect_spikes_dy, detect_outliers, + find_early_samples, + find_repeated_samples ) from iblphotometry.behavior import psth @@ -32,26 +35,24 @@ def dt_violations(A: pd.DataFrame | pd.Series, atol: float = 1e-3) -> str: return n_violations -def _fill_missing_channel_names(A: pd.Series) -> pd.Series: - a = A.copy() - missing_inds = np.where(a['name'] == '')[0] - missing_idxs = a.iloc[missing_inds].index - prev_idxs = a.iloc[missing_inds - 1].index - name_alternator = {'GCaMP': 'Isosbestic', 'Isosbestic': 'GCaMP', '': ''} - while len(a[a['name'] == '']) > 0: - a.loc[missing_idxs, 'name'] = [ - name_alternator[prev] for prev in a.loc[prev_idxs, 'name'] - ] - return a - - -def interleaved_acquisition(A: pd.DataFrame | pd.Series) -> float: - if sum(A['name'] == '') > 0: - A = _fill_missing_channel_names(A) - a = A['name'].values if isinstance(A, pd.DataFrame) else A.values - even_check = np.all(a[::2] == a[0]) - odd_check = np.all(a[1::2] == a[1]) - return bool(even_check & odd_check) +# def interleaved_acquisition(A: pd.DataFrame | pd.Series) -> bool: +# if sum(A['name'] == '') > 0: +# A = _fill_missing_channel_names(A) +# a = A['name'].values if isinstance(A, pd.DataFrame) else A.values +# even_check = np.all(a[::2] == a[0]) +# odd_check = np.all(a[1::2] == a[1]) +# return bool(even_check & odd_check) + + +def n_early_samples(A: pd.DataFrame | pd.Series, dt_tol: float = 0.001) -> int: + return find_early_samples(A, dt_tol=dt_tol).sum() + + +def n_repeated_samples( + A: pd.DataFrame, + dt_tol: float = 0.001, +) -> int: + return find_repeated_samples(A, dt_tol=dt_tol).sum() def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> float: diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 43852e3..5e48c48 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -639,6 +639,65 @@ def remove_spikes(F: pd.Series, delta: str = 't', sd: int = 5, w: int = 25): # return pd.Series(y, index=t) +def find_early_samples(A: pd.DataFrame | pd.Series, dt_tol: float = 0.001) -> np.ndarray: + dt = np.median(np.diff(A.index)) + return dt - A.index.diff() > dt_tol + + +def _fill_missing_channel_names(A: np.ndarray) -> np.ndarray: + missing_inds = np.where(A == '')[0] + name_alternator = {'GCaMP': 'Isosbestic', 'Isosbestic': 'GCaMP', '': ''} + for i in missing_inds: + if i == 0: + A[i] = name_alternator[A[i + 1]] + else: + A[i] = name_alternator[A[i - 1]] + return A + + +def find_repeated_samples( + A: pd.DataFrame, + dt_tol: float = 0.001, +) -> int: + if any(A['name'] == ''): + A['name'] = _fill_missing_channel_names(A['name'].values) + else: + A + repeated_sample_mask = A['name'].iloc[1:].values == A['name'].iloc[:-1].values + repeated_samples = A.iloc[1:][repeated_sample_mask] + dt = np.median(np.diff(A.index)) + early_samples = A[find_early_samples(A, dt_tol=dt_tol)] + if not all([idx in early_samples.index for idx in repeated_samples.index]): + print("WARNING: repeated samples found without early sampling") + return repeated_sample_mask + +def fix_repeated_sampling( + A: pd.DataFrame, + dt_tol: float = 0.001, + w_size: int = 10, + roi: str | None = None +) -> int: + ## TODO: avoid this by explicitly handling multiple channels + assert roi is not None + # Drop first samples if channel labels are missing + A.loc[A['name'].replace({'':np.nan}).first_valid_index():] + # Fix remaining missing channel labels + if any(A['name'] == ''): + A['name'] = _fill_missing_channel_names(A['name'].values) + repeated_sample_mask = find_repeated_samples(A, dt_tol=dt_tol) + name_alternator = {'GCaMP':'Isosbestic', 'Isosbestic': 'GCaMP'} + for i in (np.where(repeated_sample_mask)[0] + 1): + name = A.iloc[i]['name'] + value = A.iloc[i][roi] + i0, i1 = A.index[i - w_size], A.index[i] + same = A.loc[i0:i1].query('name == @name')[roi].mean() + other_name = name_alternator[name] + other = A.loc[i0:i1].query('name == @other_name')[roi].mean() + assert np.abs(value - same) > np.abs(value - other) + A.loc[A.index[i]:, 'name'] = [name_alternator[name] for name in A.loc[A.index[i]:, 'name']] + return A + + """ ###### ## #### ######## #### ## ## ###### ####### ######## ######## ######## ### ######## #### ####### ## ## ###### ## ## ## ## ## ## ## ### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## ## ## From d5222ac861a8aa4ed623bb292061be48670dcb75 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:26:14 +0100 Subject: [PATCH 28/36] Add deviance metric --- src/iblphotometry/metrics.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 97048ab..8cc2198 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -75,6 +75,14 @@ def percentile_dist(A: pd.Series | np.ndarray, pc: tuple = (50, 95), axis=-1) -> return P[1] - P[0] +def deviance( + A: pd.Series | np.ndarray, + w_len: int = 151, +) -> float: + a = A.values if isinstance(A, pd.Series) else A + return np.median(np.abs(a - np.median(a)) / np.median(a)) + + def sliding_deviance( A: pd.Series | np.ndarray, w_len: int = 151, From 23d9790ac37851946e68dd98c99fa9f6fc8530fa Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:26:51 +0100 Subject: [PATCH 29/36] Update expmax violation metrics --- src/iblphotometry/metrics.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 8cc2198..5012576 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -164,11 +164,23 @@ def _expected_max_gauss(x): """ https://math.stackexchange.com/questions/89030/expectation-of-the-maximum-of-gaussian-random-variables """ - return np.mean(x) + np.std(x) * np.sqrt(2 * np.log10(len(x))) + return np.mean(x) + np.std(x) * np.sqrt(2 * np.log(len(x))) +def n_expmax_violations(A: pd.Series | np.ndarray) -> int: + a = A.values if isinstance(A, pd.Series) else A + exp_max = _expected_max_gauss(a) + return sum(np.abs(a) > exp_max) + + +def expmax_violation(A: pd.Series | np.ndarray) -> float: + a = A.values if isinstance(A, pd.Series) else A + exp_max = _expected_max_gauss(a) + n_violations = sum(np.abs(a) > exp_max) + if n_violations == 0: + return 0. else: - return sum(A[A > exp_max] - exp_max[A > exp_max]) / sum(A > exp_max) + return np.sum(np.abs(a[np.abs(a) > exp_max]) - exp_max) / n_violations def bleaching_tau(A: pd.Series) -> float: From ae5c8cb9e55c5dac8cbb5159920f580a27401d23 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:27:31 +0100 Subject: [PATCH 30/36] Bugfix in model fitting --- src/iblphotometry/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 5012576..8626704 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -187,7 +187,7 @@ def bleaching_tau(A: pd.Series) -> float: """overall tau of bleaching.""" y, t = A.values, A.index.values reg = Regression(model=ExponDecay()) - reg.fit(y, t) + reg.fit(t, y) return reg.popt[1] From 63b1f41b45942ed4864a0e69e1aafa5cd3cf038c Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:27:51 +0100 Subject: [PATCH 31/36] WIP: bleaching_amp metric --- src/iblphotometry/metrics.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 8626704..13e7e1f 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -191,15 +191,19 @@ def bleaching_tau(A: pd.Series) -> float: return reg.popt[1] -def bleaching_amp(A: pd.Series) -> float: +def bleaching_amp(A: pd.Series | np.ndarray) -> float: """overall amplitude of bleaching.""" - # y, t = A.values, A.index.values - # reg = Regression(model=ExponDecay()) - # reg.fit(y, t) - # return reg.popt[0] - rolling_mean = A.rolling(window=1000).mean().dropna() - ## TODO: mean rolling std, not across whole signal - return (rolling_mean.iloc[0] - rolling_mean.iloc[-1]) / A.std() + y = A.values if isinstance(A, pd.Series) else A + reg = Regression(model=LinearModel()) + try: + reg.fit(np.arange(len(a)), y) + slope = reg.popt[0] + except: + slope = np.nan + return slope + # rolling_mean = A.rolling(window=1000).mean().dropna() + # ## TODO: mean rolling std, not across whole signal + # return (rolling_mean.iloc[0] - rolling_mean.iloc[-1]) / A.std() def ttest_pre_post( From 1f111789afeae25b041fd0ab1eba37a0eda25f3e Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:28:27 +0100 Subject: [PATCH 32/36] Update low_freq_power_ratio to accept dt as kwarg --- src/iblphotometry/metrics.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index 13e7e1f..afe0048 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -323,7 +323,11 @@ def response_magnitude(A: pd.Series, events: np.ndarray, window: tuple = (0, 1)) return np.abs(responses.mean(axis=0)).sum() -def low_freq_power_ratio(A: pd.Series, f_cutoff: float = 3.18) -> float: +def low_freq_power_ratio( + A: pd.Series | np.ndarray, + dt: float | None = None, + f_cutoff: float = 3.18 +) -> float: """ Fraction of the total signal power contained below a given cutoff frequency. @@ -336,16 +340,18 @@ def low_freq_power_ratio(A: pd.Series, f_cutoff: float = 3.18) -> float: cutoff frequency, default value of 3.18 esitmated using the formula 1 / (2 * pi * tau) and an approximate tau_rise for GCaMP6f of 0.05s. """ - signal = A.copy() + if isinstance(A, pd.Series): + signal = A.values + dt = np.median(np.diff(A.index)) + else: + assert dt is not None + signal = A assert signal.ndim == 1 # only 1D for now # Get frequency bins - tpts = signal.index.values - dt = np.median(np.diff(tpts)) - n_pts = len(signal) - freqs = np.fft.rfftfreq(n_pts, dt) + freqs = np.fft.rfftfreq(len(signal), dt) # Compute power spectral density psd = np.abs(np.fft.rfft(signal - signal.mean())) ** 2 - # Return the ratio of power contained in low freqs + # Return proportion of total power in low freqs return psd[freqs <= f_cutoff].sum() / psd.sum() From 81d6c0baa38abdcf776c779c02f6980d00ca2d27 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:29:17 +0100 Subject: [PATCH 33/36] Handle edge cases with too few data points in ar_score --- src/iblphotometry/metrics.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index afe0048..d608362 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -417,13 +417,19 @@ def ar_score(A: pd.Series | np.ndarray, order: int = 2) -> float: X = np.column_stack([signal[i : len(signal) - order + i] for i in range(order)]) y = signal[order:] - # Fit linear regression using least squares - _, residual, _, _ = np.linalg.lstsq(X, y) + try: + # Fit linear regression using least squares + _, residual, _, _ = np.linalg.lstsq(X, y) + except np.linalg.LinAlgError: + return np.nan - # Calculate R-squared using residuals - ss_residual = residual[0] - ss_total = np.sum((y - np.mean(y)) ** 2) - r_squared = 1 - (ss_residual / ss_total) + if residual: + # Calculate R-squared using residuals + ss_residual = residual[0] + ss_total = np.sum((y - np.mean(y)) ** 2) + r_squared = 1 - (ss_residual / ss_total) + else: + r_squared = np.nan return r_squared From a0248763849f30bf18d8998a1eaa71fcc62b3ece Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:30:05 +0100 Subject: [PATCH 34/36] WIP: add two methods for sliding_robust_zscore --- src/iblphotometry/processing.py | 143 +++++++++++++++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 5e48c48..2609e33 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -7,6 +7,7 @@ from iblutil.numerical import rcoeff from ibldsp.utils import WindowGenerator +from numpy.lib.stride_tricks import as_strided from scipy.optimize import minimize from scipy.stats.distributions import norm @@ -809,7 +810,7 @@ def sliding_z(F: pd.Series, w_len: float, fs=None, weights=None): return pd.Series(d, index=t) -def sliding_mad(F: pd.Series, w_len: float = None, fs=None, overlap=90): +def sliding_mad(F: pd.DataFrame, w_len: float = None, fs=None, overlap=90): y, t = F.values, F.index.values fs = 1 / np.median(np.diff(t)) if fs is None else fs w_size = int(w_len * fs) @@ -822,3 +823,143 @@ def sliding_mad(F: pd.Series, w_len: float = None, fs=None, overlap=90): gain = np.nanmedian(np.abs(y)) / np.nanmedian(np.abs(rmswin), axis=0) gain = np.interp(t, trms, gain) return pd.Series(y * gain, index=t) + + +def sliding_robust_zscore(F: pd.Series, w_len: float, scale: bool = True) -> pd.Series: + """ + Compute a robust z-score for each data point in a pandas Series using a sliding window. + + For each data point at which a full window (centered around that point) + is available, compute the robust z-score: + + z = (x - median(window)) / MAD(window) + + where MAD is the median absolute deviation of the window. If scale=True, + the MAD is multiplied by 1.4826 to approximate the standard deviation under normality. + + Parameters + ---------- + F : pd.Series + Input time-series with a numeric index (time) and signal values. + w_len : float + Window length in seconds. + scale : bool, optional + Whether to scale the MAD by 1.4826. Default is True. + + Returns + ------- + pd.Series + A new Series of the same length as F containing the robust z-scores. + Data points near the boundaries without a full window are NaN. + """ + # Ensure the index is numeric (time in seconds) + times = F.index.values.astype(float) + dt = np.median(np.diff(times)) + + # Number of samples corresponding to w_len in seconds. + w_size = int(w_len // dt) + # Make window size odd so that a window can be centered + if w_size % 2 == 0: + w_size += 1 + half_win = w_size // 2 + + a = F.values # Underlying data + n = len(a) + + # We can only compute a full (centered) window where there's enough data on both sides. + # Valid center positions are indices half_win to n - half_win - 1. + n_valid = n - 2 * half_win + if n_valid <= 0: + raise ValueError("Window length is too long for the given series.") + + # Using step size of 1: each valid index gets its own window. + # Create a 2D view of the signal: + # windows shape: (n_valid, w_size) + windows = as_strided( + a[half_win:n - half_win], + shape=(n_valid, w_size), + strides=(a.strides[0], a.strides[0]) + ) + # However, the above would take contiguous blocks from a[half_win: n - half_win] only. + # To get a sliding window centered at each valid index, we need a trick: + # We'll use as_strided on the full array, starting at index 0, then select the valid windows: + windows_full = as_strided( + a, + shape=(n - w_size + 1, w_size), + strides=(a.strides[0], a.strides[0]) + ) + # The center of the k-th window in windows_full is at index k + half_win. + # We want windows centered at indices half_win, half_win+1, ..., n - half_win - 1. + # Thus, we select: + windows = windows_full[0 + 0 : 0 + n_valid] # shape (n_valid, w_size) + + # Compute the median for each window (row-wise). + medians = np.median(windows, axis=1) + # Compute the MAD for each window. + mads = np.median(np.abs(windows - medians[:, None]), axis=1) + if scale: + mads *= 1.4826 # Scale MAD to approximate standard deviation under a normal distribution. + + # Avoid division by zero: if MAD is zero, set those z-scores to 0. + safe_mads = np.where(mads == 0, np.nan, mads) + + # Compute robust z-scores for the center value of each window. + # The center value for the k-th window is at index: k + half_win in the original array. + centers = a[half_win: n - half_win] + z_scores_valid = (centers - medians) / safe_mads + + # Pre-allocate result (all values NaN) + robust_z = np.full(n, np.nan) + # Fill in the computed z-scores at valid indices. + valid_idx = np.arange(half_win, n - half_win) + robust_z[valid_idx] = z_scores_valid + + # Return as a Series with the original index + return pd.Series(robust_z, index=F.index) + +def sliding_robust_zscore_rolling(F: pd.Series, w_len: float, scale: bool = True) -> pd.Series: + """ + Compute a robust z-score for each data point using a sliding window via pandas’ rolling(). + + For each point where a full, centered window is available, compute: + z = (x_center - median(window)) / MAD(window) + where MAD is the median absolute deviation and, if scale=True, MAD is scaled by 1.4826. + + Parameters + ---------- + F : pd.Series + Input time-series with a numeric index (time in seconds) and signal values. + w_len : float + The window length in seconds. + scale : bool, optional + If True, multiply MAD by 1.4826 (default is True). + + Returns + ------- + pd.Series + A Series containing the robust z-scores at the center of each window. + Points for which a full window cannot be computed will be NaN. + """ + # Get the sample interval from the index + times = F.index.values.astype(float) + dt = np.median(np.diff(times)) + # Compute window size in samples and ensure it is odd (so there's a unique center) + w_size = int(w_len / dt) + if w_size % 2 == 0: + w_size += 1 + + def robust_zscore(window): + # window is passed as a NumPy array (raw=True) + center = window[len(window) // 2] + med = np.median(window) + mad = np.median(np.abs(window - med)) + if mad == 0: + return np.nan + if scale: + mad *= 1.4826 + return (center - med) / mad + + # Use rolling window with center=True so that the result corresponds to the window center. + F_proc = F.rolling(window=w_size, center=True).apply(robust_zscore, raw=True) + # Return only valid (non-NaN) portions of the transformed signal + return F_proc.loc[F_proc.first_valid_index():F_proc.last_valid_index()] From 1d7d9dc390c83c2121e2f5c59493a6815f584b27 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:31:03 +0100 Subject: [PATCH 35/36] Major update to qc_series sliding metric handling --- src/iblphotometry/qc.py | 104 +++++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 33 deletions(-) diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index 1f0c1fc..8b92684 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -5,11 +5,13 @@ import logging import numpy as np +from numpy.lib.stride_tricks import as_strided import pandas as pd from scipy.stats import linregress from iblphotometry.processing import make_sliding_window from iblphotometry.pipelines import run_pipeline +import iblphotometry.metrics as metrics logger = logging.getLogger() @@ -51,32 +53,48 @@ def sliding_metric( return pd.Series(m, index=t[inds + int(w_size / 2)]) +def _eval_metric_sliding( + F: pd.Series, + metric: Callable, + w_len: float = 60, + metric_kwargs: dict | None = None, +) -> pd.Series: + metric_kwargs = {} if metric_kwargs is None else metric_kwargs + dt = np.median(np.diff(F.index)) + w_size = int(w_len // dt) + step_size = int(w_size // 2) + n_windows = int((len(F) - w_size) // step_size + 1) + if n_windows <= 2: + return + a = F.values + windows = as_strided(a, shape=(n_windows, w_size), strides=(step_size * a.strides[0], a.strides[0])) + S_values = np.apply_along_axis(lambda w: metric(w, **metric_kwargs), axis=1, arr=windows) + S_times = F.index.values[np.linspace(step_size, n_windows * step_size, n_windows).astype(int)] + return pd.Series(S_values, index=S_times) + + # eval pipleline will be here def eval_metric( F: pd.Series, metric: Callable, metric_kwargs: dict | None = None, sliding_kwargs: dict | None = None, - full_output=False, + full_output=True, ): - result = {} - m = metric(F, **metric_kwargs) if metric_kwargs is not None else metric(F) - result['value'] = m - if sliding_kwargs is not None: - S = sliding_metric( - F, metric=metric, **sliding_kwargs, metric_kwargs=metric_kwargs - ) - r, p = linregress(S.index.values, S.values)[2:4] - if full_output: - result['sliding_values'] = S.values - result['sliding_timepoints'] = S.index.values - - else: - r = np.nan - p = np.nan - - result['r'] = r - result['p'] = p + results_vals = ['value', 'sliding_values', 'sliding_timepoints', 'r', 'p'] + result = {k:np.nan for k in results_vals} + metric_func = getattr(metrics, metric) + result['value'] = metric_func(F) if metric_kwargs is None else metric_func(F, **metric_kwargs) + sliding_kwargs = {} if sliding_kwargs is None else sliding_kwargs + if sliding_kwargs: + S = _eval_metric_sliding(F, metric_func, sliding_kwargs['w_len'], metric_kwargs) + if S is None: + pass + else: + result['r'], result['p'] = linregress(S.index.values, S.values)[2:4] + if full_output: + result['sliding_values'] = S.values + result['sliding_timepoints'] = S.index.values return result @@ -89,23 +107,43 @@ def qc_series( brain_region: str = None, # FIXME but left as is for now just to keep the logger happy ) -> dict: if isinstance(F, pd.DataFrame): - raise TypeError('F can not be a dataframe') + raise TypeError('F cannot be a dataframe') + + # if sliding_kwargs is None: # empty dicts indicate no sliding application + # sliding_kwargs = {metric:{} for metric in qc_metrics.keys()} + # elif ( + # isinstance(sliding_kwargs, dict) and + # not sorted(sliding_kwargs.keys()) == sorted(qc_metrics.keys()) + # ): # the same sliding kwargs will be applied to all metrics + # sliding_kwargs = {metric:sliding_kwargs for metric in qc_metrics.keys()} + # elif ( + # isinstance(sliding_kwargs, dict) and + # sorted(sliding_kwargs.keys()) == sorted(qc_metrics.keys()) + # ): # each metric has it's own sliding kwargs + # pass + # else: # is not None, a simple dict, or a nested dict + # raise TypeError( + # 'sliding_kwargs must be None, dict of kwargs, or nested dict with same keys as qc_metrics' + # ) + sliding_kwargs = {} if sliding_kwargs is None else sliding_kwargs # should cover all cases qc_results = {} - for metric, params in qc_metrics: - try: - if trials is not None: # if trials are passed - params['trials'] = trials - res = eval_metric(F, metric, params, sliding_kwargs) - qc_results[f'{metric.__name__}'] = res['value'] - if sliding_kwargs: - qc_results[f'{metric.__name__}_r'] = res['rval'] - qc_results[f'{metric.__name__}_p'] = res['pval'] - except Exception as e: - logger.warning( - f'{eid}, {brain_region}: metric {metric.__name__} failure: {type(e).__name__}:{e}' - ) + for metric, params in qc_metrics.items(): + # try: + if trials is not None: # if trials are passed + params['trials'] = trials + res = eval_metric(F, metric, metric_kwargs=params, sliding_kwargs=sliding_kwargs[metric]) + qc_results[f'{metric}'] = res['value'] + if sliding_kwargs[metric]: + qc_results[f'_{metric}_values'] = res['sliding_values'] + qc_results[f'_{metric}_times'] = res['sliding_timepoints'] + qc_results[f'_{metric}_r'] = res['r'] + qc_results[f'_{metric}_p'] = res['p'] + # except Exception as e: + # logger.warning( + # f'{eid}, {brain_region}: metric {metric.__name__} failure: {type(e).__name__}:{e}' + # ) return qc_results From 50995f987cb100f83036955592d5d3037162beb8 Mon Sep 17 00:00:00 2001 From: davcrom Date: Fri, 18 Apr 2025 12:37:12 +0100 Subject: [PATCH 36/36] Ruff format --- src/iblphotometry/metrics.py | 8 +++--- src/iblphotometry/processing.py | 45 ++++++++++++++++++--------------- src/iblphotometry/qc.py | 22 +++++++++++----- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/src/iblphotometry/metrics.py b/src/iblphotometry/metrics.py index d608362..a7163e6 100644 --- a/src/iblphotometry/metrics.py +++ b/src/iblphotometry/metrics.py @@ -12,7 +12,7 @@ detect_spikes_dy, detect_outliers, find_early_samples, - find_repeated_samples + find_repeated_samples, ) from iblphotometry.behavior import psth @@ -178,7 +178,7 @@ def expmax_violation(A: pd.Series | np.ndarray) -> float: exp_max = _expected_max_gauss(a) n_violations = sum(np.abs(a) > exp_max) if n_violations == 0: - return 0. + return 0.0 else: return np.sum(np.abs(a[np.abs(a) > exp_max]) - exp_max) / n_violations @@ -324,9 +324,7 @@ def response_magnitude(A: pd.Series, events: np.ndarray, window: tuple = (0, 1)) def low_freq_power_ratio( - A: pd.Series | np.ndarray, - dt: float | None = None, - f_cutoff: float = 3.18 + A: pd.Series | np.ndarray, dt: float | None = None, f_cutoff: float = 3.18 ) -> float: """ Fraction of the total signal power contained below a given cutoff frequency. diff --git a/src/iblphotometry/processing.py b/src/iblphotometry/processing.py index 2609e33..dc2e5d9 100644 --- a/src/iblphotometry/processing.py +++ b/src/iblphotometry/processing.py @@ -602,11 +602,13 @@ def detect_spikes_dt(t: np.ndarray, atol: float = 0.001): # return np.where(bad_inds)[0] return np.where(np.abs(dts - dt) > atol)[0] -def detect_spikes_dy(y: np.ndarray, sd: float = 5.): + +def detect_spikes_dy(y: np.ndarray, sd: float = 5.0): dy = np.abs(np.diff(y)) # bad_inds = dt < np.average(dt) - sd * np.std(dt) return np.where(dy > np.average(dy) + sd * np.std(dy))[0] + def remove_spikes(F: pd.Series, delta: str = 't', sd: int = 5, w: int = 25): y, t = F.values, F.index.values y = copy(y) @@ -640,7 +642,9 @@ def remove_spikes(F: pd.Series, delta: str = 't', sd: int = 5, w: int = 25): # return pd.Series(y, index=t) -def find_early_samples(A: pd.DataFrame | pd.Series, dt_tol: float = 0.001) -> np.ndarray: +def find_early_samples( + A: pd.DataFrame | pd.Series, dt_tol: float = 0.001 +) -> np.ndarray: dt = np.median(np.diff(A.index)) return dt - A.index.diff() > dt_tol @@ -669,25 +673,23 @@ def find_repeated_samples( dt = np.median(np.diff(A.index)) early_samples = A[find_early_samples(A, dt_tol=dt_tol)] if not all([idx in early_samples.index for idx in repeated_samples.index]): - print("WARNING: repeated samples found without early sampling") + print('WARNING: repeated samples found without early sampling') return repeated_sample_mask + def fix_repeated_sampling( - A: pd.DataFrame, - dt_tol: float = 0.001, - w_size: int = 10, - roi: str | None = None + A: pd.DataFrame, dt_tol: float = 0.001, w_size: int = 10, roi: str | None = None ) -> int: ## TODO: avoid this by explicitly handling multiple channels assert roi is not None # Drop first samples if channel labels are missing - A.loc[A['name'].replace({'':np.nan}).first_valid_index():] + A.loc[A['name'].replace({'': np.nan}).first_valid_index() :] # Fix remaining missing channel labels if any(A['name'] == ''): A['name'] = _fill_missing_channel_names(A['name'].values) repeated_sample_mask = find_repeated_samples(A, dt_tol=dt_tol) - name_alternator = {'GCaMP':'Isosbestic', 'Isosbestic': 'GCaMP'} - for i in (np.where(repeated_sample_mask)[0] + 1): + name_alternator = {'GCaMP': 'Isosbestic', 'Isosbestic': 'GCaMP'} + for i in np.where(repeated_sample_mask)[0] + 1: name = A.iloc[i]['name'] value = A.iloc[i][roi] i0, i1 = A.index[i - w_size], A.index[i] @@ -695,7 +697,9 @@ def fix_repeated_sampling( other_name = name_alternator[name] other = A.loc[i0:i1].query('name == @other_name')[roi].mean() assert np.abs(value - same) > np.abs(value - other) - A.loc[A.index[i]:, 'name'] = [name_alternator[name] for name in A.loc[A.index[i]:, 'name']] + A.loc[A.index[i] :, 'name'] = [ + name_alternator[name] for name in A.loc[A.index[i] :, 'name'] + ] return A @@ -870,23 +874,21 @@ def sliding_robust_zscore(F: pd.Series, w_len: float, scale: bool = True) -> pd. # Valid center positions are indices half_win to n - half_win - 1. n_valid = n - 2 * half_win if n_valid <= 0: - raise ValueError("Window length is too long for the given series.") + raise ValueError('Window length is too long for the given series.') # Using step size of 1: each valid index gets its own window. # Create a 2D view of the signal: # windows shape: (n_valid, w_size) windows = as_strided( - a[half_win:n - half_win], + a[half_win : n - half_win], shape=(n_valid, w_size), - strides=(a.strides[0], a.strides[0]) + strides=(a.strides[0], a.strides[0]), ) # However, the above would take contiguous blocks from a[half_win: n - half_win] only. # To get a sliding window centered at each valid index, we need a trick: # We'll use as_strided on the full array, starting at index 0, then select the valid windows: windows_full = as_strided( - a, - shape=(n - w_size + 1, w_size), - strides=(a.strides[0], a.strides[0]) + a, shape=(n - w_size + 1, w_size), strides=(a.strides[0], a.strides[0]) ) # The center of the k-th window in windows_full is at index k + half_win. # We want windows centered at indices half_win, half_win+1, ..., n - half_win - 1. @@ -905,7 +907,7 @@ def sliding_robust_zscore(F: pd.Series, w_len: float, scale: bool = True) -> pd. # Compute robust z-scores for the center value of each window. # The center value for the k-th window is at index: k + half_win in the original array. - centers = a[half_win: n - half_win] + centers = a[half_win : n - half_win] z_scores_valid = (centers - medians) / safe_mads # Pre-allocate result (all values NaN) @@ -917,7 +919,10 @@ def sliding_robust_zscore(F: pd.Series, w_len: float, scale: bool = True) -> pd. # Return as a Series with the original index return pd.Series(robust_z, index=F.index) -def sliding_robust_zscore_rolling(F: pd.Series, w_len: float, scale: bool = True) -> pd.Series: + +def sliding_robust_zscore_rolling( + F: pd.Series, w_len: float, scale: bool = True +) -> pd.Series: """ Compute a robust z-score for each data point using a sliding window via pandas’ rolling(). @@ -962,4 +967,4 @@ def robust_zscore(window): # Use rolling window with center=True so that the result corresponds to the window center. F_proc = F.rolling(window=w_size, center=True).apply(robust_zscore, raw=True) # Return only valid (non-NaN) portions of the transformed signal - return F_proc.loc[F_proc.first_valid_index():F_proc.last_valid_index()] + return F_proc.loc[F_proc.first_valid_index() : F_proc.last_valid_index()] diff --git a/src/iblphotometry/qc.py b/src/iblphotometry/qc.py index 8b92684..6f7ea12 100644 --- a/src/iblphotometry/qc.py +++ b/src/iblphotometry/qc.py @@ -67,9 +67,15 @@ def _eval_metric_sliding( if n_windows <= 2: return a = F.values - windows = as_strided(a, shape=(n_windows, w_size), strides=(step_size * a.strides[0], a.strides[0])) - S_values = np.apply_along_axis(lambda w: metric(w, **metric_kwargs), axis=1, arr=windows) - S_times = F.index.values[np.linspace(step_size, n_windows * step_size, n_windows).astype(int)] + windows = as_strided( + a, shape=(n_windows, w_size), strides=(step_size * a.strides[0], a.strides[0]) + ) + S_values = np.apply_along_axis( + lambda w: metric(w, **metric_kwargs), axis=1, arr=windows + ) + S_times = F.index.values[ + np.linspace(step_size, n_windows * step_size, n_windows).astype(int) + ] return pd.Series(S_values, index=S_times) @@ -82,9 +88,11 @@ def eval_metric( full_output=True, ): results_vals = ['value', 'sliding_values', 'sliding_timepoints', 'r', 'p'] - result = {k:np.nan for k in results_vals} + result = {k: np.nan for k in results_vals} metric_func = getattr(metrics, metric) - result['value'] = metric_func(F) if metric_kwargs is None else metric_func(F, **metric_kwargs) + result['value'] = ( + metric_func(F) if metric_kwargs is None else metric_func(F, **metric_kwargs) + ) sliding_kwargs = {} if sliding_kwargs is None else sliding_kwargs if sliding_kwargs: S = _eval_metric_sliding(F, metric_func, sliding_kwargs['w_len'], metric_kwargs) @@ -133,7 +141,9 @@ def qc_series( # try: if trials is not None: # if trials are passed params['trials'] = trials - res = eval_metric(F, metric, metric_kwargs=params, sliding_kwargs=sliding_kwargs[metric]) + res = eval_metric( + F, metric, metric_kwargs=params, sliding_kwargs=sliding_kwargs[metric] + ) qc_results[f'{metric}'] = res['value'] if sliding_kwargs[metric]: qc_results[f'_{metric}_values'] = res['sliding_values']