Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,782 changes: 1,782 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pysatl_criterion/multiple_testing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .fwer import Holm
from .fdr import BenjaminiYekutieli
27 changes: 18 additions & 9 deletions pysatl_criterion/multiple_testing/abstract_multiple_testing.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,37 @@
from abc import ABC, abstractmethod


class AbstractMultipleTesting(ABC):
@staticmethod
@abstractmethod
def test(p_values: list[float], threshold: float) -> tuple[list[bool], list[float]]:
def _validate_p_values(p_values: list[float]) -> None:
"""
Validate that all p-values are in [0,1] range.
:param p_values: List of p-values for hypothesis testing
"""
if not all(0 <= x <= 1 for x in p_values):
raise ValueError("All p-values must be in range [0,1].")

@classmethod
def test(cls, p_values: list[float], threshold: float = 0.05) -> tuple[list[bool], list[float]]:
"""
Perform multiple testing correction and return both rejection decisions
and adjusted p-values.
:param p_values: List of raw p-values for hypothesis testing
:param threshold: Significance level for controlling FWER (Family-Wise Error Rate)
or FDR (False Discovery Rate)
or FDR (False Discovery Rate) (default is 0.05)
:return: Tuple containing:
- Boolean list indicating rejected hypotheses (True where rejected)
- List of adjusted p-values after multiple testing correction
"""
raise NotImplementedError("Method is not implemented")
cls._validate_p_values(p_values)
p_values_adjusted = cls.adjust(p_values)
rejected = [p_value < threshold for p_value in p_values_adjusted]
return rejected, p_values_adjusted

@staticmethod
@classmethod
@abstractmethod
def adjust(p_values: list[float]) -> list[float]:
def adjust(cls, p_values: list[float]) -> list[float]:
"""
Compute adjusted p-values for multiple testing correction.
:param p_values: List of raw p-values for hypothesis testing
:return: List of adjusted p-values after multiple testing correction
:return: List of adjusted p-values after multiple testing correction
"""
raise NotImplementedError("Method is not implemented")
54 changes: 54 additions & 0 deletions pysatl_criterion/multiple_testing/fdr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from .abstract_multiple_testing import AbstractMultipleTesting
import math

class BenjaminiYekutieli(AbstractMultipleTesting):
"""
Adjust p-values using Benjamini-Yekutieli correction for controlling FDR.

This method controls the false discovery rate (FDR) under arbitrary dependence.
It uses a harmonic series correction factor.

Steps:
1. Compute harmonic factor c = Σ(1/i) for i=1 to n
2. Sort p-values and calculate adjusted values: p_adjusted[i] = p[i] * n * c / rank
3. Enforce monotonicity using step-up procedure

Parameters
----------
p_values : list[float]
List of raw p-values between 0 and 1

Returns
-------
list[float]
Adjusted p-values in original order
"""

@classmethod
def adjust(cls, p_values: list[float]) -> list[float]:
n = len(p_values)
if n == 0:
return []

if n > 10000:
c = math.log(n) + 0.5772156649015328606065120900824024310421
else:
c = sum(1.0 / i for i in range(1, n + 1))

sorted_indices = sorted(range(n), key=lambda i: p_values[i])
adjusted = [0.0] * n

for i, idx in enumerate(sorted_indices):
rank = i + 1
adjusted_value = p_values[idx] * n * c / rank
adjusted[idx] = min(1.0, adjusted_value)

current_min = adjusted[sorted_indices[-1]]
for i in range(n - 2, -1, -1):
idx = sorted_indices[i]
if adjusted[idx] > current_min:
adjusted[idx] = current_min
else:
current_min = adjusted[idx]

return adjusted
65 changes: 65 additions & 0 deletions pysatl_criterion/multiple_testing/fwer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from .abstract_multiple_testing import AbstractMultipleTesting

class Holm(AbstractMultipleTesting):
"""
Adjust p-values using the Holm-Bonferroni method for multiple testing correction.

This method controls the family-wise error rate (FWER) and is more powerful than
the standard Bonferroni correction. It uses a step-down procedure.

Steps:
1. Sort p-values and keep original indices
2. Calculate adjusted p-values: p_adjusted[i] = p[i] * (n - i)
3. Enforce monotonicity using step-up procedure

Parameters
----------
p_values : list[float]
List of raw p-values between 0 and 1

Returns
-------
list[float]
Adjusted p-values in original order
"""

@classmethod
def adjust(cls, p_values: list[float]) -> list[float]:
n = len(p_values)
if n == 0:
return []

sorted_indices = sorted(range(n), key=lambda i: p_values[i])
adjusted = [0.0] * n

for i, idx in enumerate(sorted_indices):
adjusted_value = p_values[idx] * (n - i)
adjusted[idx] = min(1.0, adjusted_value)

for i in range(n - 2, -1, -1):
if adjusted[sorted_indices[i]] > adjusted[sorted_indices[i + 1]]:
adjusted[sorted_indices[i]] = adjusted[sorted_indices[i + 1]]

return adjusted


class SidakHolm(AbstractMultipleTesting):
@classmethod
def adjust(cls, p_values: list[float]) -> list[float]:
"""
Adjust p-values using the Šidák correction for multiple hypothesis testing.

Parameters
----------
p_values : list[float]
List of raw p-values for hypothesis testing. Must be in range [0, 1].

Returns
-------
list[float]
List of adjusted p-values, each in range [0, 1].
"""
n = len(p_values)
if n == 0:
return []
return [min(1.0, 1 - (1 - p) ** n) for p in p_values]
50 changes: 50 additions & 0 deletions tests/multiple_testing/test_fdr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pytest
from pysatl_criterion.multiple_testing.fdr import BenjaminiYekutieli

def test_benjamini_yekutieli_complex():
p_values = [0.001, 0.002, 0.01, 0.03, 0.04, 0.1, 0.15, 0.2, 0.25, 0.5]
alpha = 0.05

rejected, _ = BenjaminiYekutieli.test(p_values, alpha)

assert rejected == [True, True, False, False, False, False, False, False, False, False]


def test_benjamini_yekutieli_adjust_complex():
p_values = [0.001, 0.005, 0.05, 0.1, 0.15]
adjusted = BenjaminiYekutieli.adjust(p_values)

n = 5
c = sum(1.0 / i for i in range(1, n + 1))
sorted_p = sorted(p_values)
expected = [
min(1.0, sorted_p[0] * n * c / 1),
min(1.0, sorted_p[1] * n * c / 2),
min(1.0, sorted_p[2] * n * c / 3),
min(1.0, sorted_p[3] * n * c / 4),
min(1.0, sorted_p[4] * n * c / 5)
]

for i in range(n - 2, -1, -1):
if expected[i] > expected[i + 1]:
expected[i] = expected[i + 1]

assert adjusted == pytest.approx(expected, abs=1e-4)


def test_benjamini_yekutieli_edge_cases():
p_values = [0.00001, 0.99999]
rejected, _ = BenjaminiYekutieli.test(p_values, 0.05)
assert rejected == [True, False]


def test_benjamini_yekutieli_large_input():
import numpy as np
np.random.seed(42)
p_values = np.random.rand(1000).tolist()

rejected, adjusted = BenjaminiYekutieli.test(p_values, 0.05)

assert len(rejected) == 1000
assert len(adjusted) == 1000
assert all(0 <= p <= 1 for p in adjusted)
85 changes: 85 additions & 0 deletions tests/multiple_testing/test_fwer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import pytest
from pysatl_criterion.multiple_testing.fwer import Holm, SidakHolm

def test_holm_correction():
p_values = [0.04, 0.001, 0.7, 0.02]
alpha = 0.05

rejected, adjusted = Holm.test(p_values, alpha)

expected_adjusted = [0.08, 0.004, 0.7, 0.06]

assert rejected == [False, True, False, False]
assert adjusted == pytest.approx(expected_adjusted, abs=1e-3)


def test_holm_adjust():
p_values = [0.01, 0.04, 0.03]

expected = [0.03, 0.04, 0.04]

adjusted = Holm.adjust(p_values)
assert adjusted == pytest.approx(expected, abs=1e-3)


def test_holm_with_early_stop():
p_values = [0.1, 0.2, 0.3]
alpha = 0.05

rejected, adjusted = Holm.test(p_values, alpha)

expected_adjusted = [0.3, 0.3, 0.3]

assert rejected == [False, False, False]
assert adjusted == pytest.approx(expected_adjusted, abs=1e-3)


def test_holm_all_rejected():
p_values = [0.001, 0.002, 0.003]
alpha = 0.05

rejected, adjusted = Holm.test(p_values, alpha)

expected_adjusted = [0.003, 0.003, 0.003]

assert rejected == [True, True, True]
assert adjusted == pytest.approx(expected_adjusted, abs=1e-3)


def test_sidak_correction():
p_values = [0.01, 0.02, 0.1]
alpha = 0.05

rejected, adjusted = SidakHolm.test(p_values, alpha)

assert rejected == [True, False, False]

expected = [
1 - (1 - 0.01) ** 3,
1 - (1 - 0.02) ** 3,
1 - (1 - 0.1) ** 3
]
assert adjusted == pytest.approx(expected, abs=1e-10)


def test_sidak_edge_cases():
p_values = [0.0, 0.0, 0.0]
rejected, adjusted = SidakHolm.test(p_values, 0.05001)
assert adjusted == [0.0, 0.0, 0.0]
assert all(rejected)

p_values = [1.0, 1.0, 1.0]
rejected, adjusted = SidakHolm.test(p_values, 0.05001)
assert adjusted == [1.0, 1.0, 1.0]
assert not any(rejected)

p_values = [0.05]
rejected, adjusted = SidakHolm.test(p_values, 0.05001)
assert adjusted == pytest.approx([0.05], abs=1e-10)
assert rejected == [True]

p_values = [0.05, 0.1, 0.15]
n = len(p_values)
expected = [1 - (1 - p) ** n for p in p_values]
rejected, adjusted = SidakHolm.test(p_values, 0.05001)
assert adjusted == pytest.approx(expected, abs=1e-10)
Loading