Fixed edge cases

lucasimi · lucasimi · commit 5ff0566ab605 · 2025-07-18T18:28:00.000+02:00
diff --git a/src/tdamapper/core.py b/src/tdamapper/core.py
@@ -279,14 +279,24 @@ class TrivialCover(ParamsMixin, Generic[T]):
     dataset.
     """
 
+    def fit(self, X: ArrayRead[T]) -> TrivialCover[T]:
+        """
+        Fit the cover algorithm to the data.
+
+        :param X: A dataset of n points. Ignored.
+        :return: self
+        """
+        return self
+
     def apply(self, X: ArrayRead[T]) -> Iterator[list[int]]:
         """
         Covers the dataset with a single open set.
 
         :param X: A dataset of n points.
         :return: A generator of lists of ids.
         """
-        yield list(range(0, len(X)))
+        if len(X) > 0:
+            yield list(range(0, len(X)))
 
 
 class FailSafeClustering(ParamsMixin, Generic[T]):
diff --git a/src/tdamapper/cover.py b/src/tdamapper/cover.py
@@ -377,6 +377,8 @@ def fit(self, X: ArrayRead[NDArray[np.float_]]) -> BaseCubicalCover:
         :param X: A dataset of n points.
         :return: The object itself.
         """
+        if len(X) == 0:
+            return self
         X_ = np.asarray(X).reshape(len(X), -1).astype(float)
         if self.overlap_frac is None:
             dim = 1 if X_.ndim == 1 else X_.shape[1]
diff --git a/src/tdamapper/utils/vptree_flat/builder.py b/src/tdamapper/utils/vptree_flat/builder.py
@@ -91,7 +91,8 @@ def build(self) -> VPArray[T]:
 
         :return: A tuple containing the constructed vp-tree and the VPArray.
         """
-        self._build_iter()
+        if self._array.size() > 0:
+            self._build_iter()
         return self._array
 
     def _build_iter(self) -> None:
diff --git a/src/tdamapper/utils/vptree_hier/builder.py b/src/tdamapper/utils/vptree_hier/builder.py
@@ -94,7 +94,10 @@ def build(self) -> tuple[Tree[T], VPArray[T]]:
 
         :return: A tuple containing the constructed vp-tree and the VPArray.
         """
-        tree = self._build_rec(0, self._array.size())
+        if self._array.size() > 0:
+            tree = self._build_rec(0, self._array.size())
+        else:
+            tree = Leaf(0, 0)
         return tree, self._array
 
     def _build_rec(self, start: int, end: int) -> Tree[T]:
diff --git a/tests/test_unit_cover.py b/tests/test_unit_cover.py
@@ -4,6 +4,7 @@
 
 import math
 
+import numpy as np
 import pytest
 
 from tdamapper.core import TrivialCover
@@ -62,6 +63,26 @@ def count_components(charts):
     return len(unique_components)
 
 
+@pytest.mark.parametrize(
+    "cover",
+    [
+        TrivialCover(),
+        BallCover(radius=0.1, metric="euclidean"),
+        KNNCover(neighbors=1, metric="euclidean"),
+        StandardCubicalCover(n_intervals=2, overlap_frac=0.5),
+        ProximityCubicalCover(n_intervals=2, overlap_frac=0.5),
+    ],
+)
+def test_cover_empty(cover):
+    """
+    Test that the cover algorithms handle empty datasets correctly.
+    """
+    empty_data = np.array([])
+    cover.fit(empty_data)
+    charts = cover.apply(empty_data)
+    assert len(list(charts)) == 0
+
+
 @pytest.mark.parametrize(
     "dataset, cover, num_charts, num_components",
     [
@@ -115,7 +136,20 @@ def count_components(charts):
         (GRID, KNNCover(neighbors=1, metric="euclidean"), 100, 100),
         (GRID, KNNCover(neighbors=10, metric="euclidean"), None, 1),
         (GRID, StandardCubicalCover(n_intervals=2, overlap_frac=0.5), 4, 1),
+        (GRID, StandardCubicalCover(n_intervals=2), 4, 1),
         (GRID, ProximityCubicalCover(n_intervals=2, overlap_frac=0.5), 4, 1),
+        (
+            GRID,
+            CubicalCover(n_intervals=2, overlap_frac=0.5, algorithm="proximity"),
+            4,
+            1,
+        ),
+        (
+            GRID,
+            CubicalCover(n_intervals=2, overlap_frac=0.5, algorithm="standard"),
+            4,
+            1,
+        ),
     ],
 )
 def test_cover(dataset, cover, num_charts, num_components):
diff --git a/tests/test_unit_heap.py b/tests/test_unit_heap.py
@@ -34,3 +34,8 @@ def test_max_heap(data):
     for x in data:
         m.add(x, x)
     _check_heap_property(list(m))
+    assert len(m) == len(data)
+    if not data:
+        assert m.is_empty()
+        assert m.top() is None
+        assert m.pop() is None
diff --git a/tests/test_unit_metrics.py b/tests/test_unit_metrics.py
@@ -1,3 +1,7 @@
+"""
+Unit tests for the metrics module.
+"""
+
 import math
 
 import numpy as np
@@ -66,8 +70,11 @@ def _check_values(m1, m2, a, b):
         (euclidean(), get_metric("euclidean")),
         (manhattan(), get_metric("manhattan")),
         (chebyshev(), get_metric("chebyshev")),
-        (minkowski(p=3), get_metric("minkowski", p=3)),
+        (manhattan(), get_metric("minkowski", p=1)),
+        (euclidean(), get_metric("minkowski", p=2)),
         (minkowski(p=2.5), get_metric("minkowski", p=2.5)),
+        (minkowski(p=3), get_metric("minkowski", p=3)),
+        (chebyshev(), get_metric("minkowski", p=float("inf"))),
         (cosine(), get_metric("cosine")),
     ],
 )
@@ -87,3 +94,8 @@ def test_supported_metrics():
     ]
     supported_metrics = get_supported_metrics()
     assert set(supported_metrics) == set(expected_metrics)
+
+
+def test_non_existent_metric():
+    with pytest.raises(ValueError):
+        get_metric("non_existent_metric")
diff --git a/tests/test_unit_vptree.py b/tests/test_unit_vptree.py
@@ -7,6 +7,7 @@
 import pytest
 
 from tdamapper.utils.metrics import get_metric
+from tdamapper.utils.vptree import VPTree
 from tdamapper.utils.vptree_flat.vptree import VPTree as FVPT
 from tdamapper.utils.vptree_hier.vptree import VPTree as HVPT
 from tests.ball_tree import SkBallTree
@@ -117,6 +118,17 @@ def _check_rec(start, end):
     _check_rec(0, len(data))
 
 
+@pytest.mark.parametrize("builder", [HVPT, FVPT])
+@pytest.mark.parametrize("dataset", [[], [1], [1, 2]])
+def test_vptree_small_dataset(builder, dataset):
+    """
+    Test the vp-tree implementations with an empty dataset.
+    """
+    vpt = builder(dataset, metric=lambda x, y: abs(x - y))
+    array = vpt.array
+    assert array.size() == len(dataset)
+
+
 @pytest.mark.parametrize("pivoting", ["disabled", "random", "furthest"])
 @pytest.mark.parametrize("eps", [0.1, 0.5])
 @pytest.mark.parametrize("neighbors", [2, 10])
@@ -141,6 +153,30 @@ def test_vptree(builder, dataset, metric, eps, neighbors, pivoting):
     _test_nn_search(dataset, metric, vpt)
 
 
+@pytest.mark.parametrize("pivoting", ["disabled", "random", "furthest"])
+@pytest.mark.parametrize("eps", [0.1, 0.5])
+@pytest.mark.parametrize("neighbors", [2, 10])
+@pytest.mark.parametrize("kind", ["flat", "hierarchical"])
+@pytest.mark.parametrize("metric", ["euclidean", "manhattan"])
+@pytest.mark.parametrize("dataset", [SIMPLE, TWO_LINES])
+def test_vptree_public(kind, dataset, metric, eps, neighbors, pivoting):
+    """
+    Test the vp-tree implementations with various datasets and metrics.
+    """
+    metric = get_metric(metric)
+    vpt = VPTree(
+        dataset,
+        kind=kind,
+        metric=metric,
+        leaf_radius=eps,
+        leaf_capacity=neighbors,
+        pivoting=pivoting,
+    )
+    _test_ball_search(dataset, metric, vpt, eps)
+    _test_knn_search(dataset, metric, vpt, neighbors)
+    _test_nn_search(dataset, metric, vpt)
+
+
 @pytest.mark.parametrize("pivoting", ["disabled", "random", "furthest"])
 @pytest.mark.parametrize("eps", [0.1, 0.5])
 @pytest.mark.parametrize("neighbors", [2, 10])