Merge pull request #20 from mctigger/fix-mixed-shape-empty-container

mctigger · web-flow · commit 233da765ba97 · 2025-09-01T17:18:14.000+02:00
refactor(container): centralize pytree mapping and fixes operations o…
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tensorcontainer"
-version = "0.7.1"
+version = "0.8.0"
 description = "TensorDict-like functionality for PyTorch with PyTree compatibility and torch.compile support"
 authors = [{name="Tim Joseph", email="tim@mctigger.com"}]
 license = {text = "MIT"}
diff --git a/src/tensorcontainer/tensor_container.py b/src/tensorcontainer/tensor_container.py
@@ -312,7 +312,29 @@ def wrapped_func(keypath, x, *xs):
                 message = f"Error at path {path}: {type(e).__name__}: {e}"
                 raise type(e)(message) from e
 
-        return pytree.tree_map_with_path(wrapped_func, tree, *rests, is_leaf=is_leaf)
+        return cls.tree_map_with_path(wrapped_func, tree, *rests, is_leaf=is_leaf)
+
+    @classmethod
+    def tree_map_with_path(
+        cls,
+        func: Callable[..., Any],
+        tree: PyTree,
+        *rests: PyTree,
+        is_leaf: Optional[Callable[[PyTree], bool]] = None,
+    ) -> PyTree:
+        # This is copied from pytree.tree_map_with_path()
+        # We add the check for no leaves as operations are currently no supported for
+        # empty TensorContainers.
+        keypath_leaves, treespec = pytree.tree_flatten_with_path(tree, is_leaf)
+
+        if len(keypath_leaves) == 0:
+            raise RuntimeError(
+                "TensorContainer does not allow operations on containers without leaves (i.e. not containing any tensors)."
+            )
+
+        keypath_leaves = list(zip(*keypath_leaves))
+        all_keypath_leaves = keypath_leaves + [treespec.flatten_up_to(r) for r in rests]
+        return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))
 
     @classmethod
     def _is_shape_compatible(cls, parent: TensorContainer, child: TCCompatible):
@@ -358,7 +380,7 @@ def ndim(self):
     # --- Overloaded methods leveraging PyTrees ---
 
     def copy(self) -> Self:
-        return pytree.tree_map(lambda x: x, self)
+        return self._tree_map(lambda x: x, self)
 
     def get_number_of_consuming_dims(self, item) -> int:
         if item is Ellipsis or item is None:
diff --git a/src/tensorcontainer/tensor_dict.py b/src/tensorcontainer/tensor_dict.py
@@ -49,7 +49,6 @@
 class TensorDictPytreeContext(NamedTuple):
     keys: Tuple[str, ...]
     event_ndims: Tuple[int, ...]
-    shape_context: torch.Size
     device_context: torch.device | None
     metadata: Dict[str, Any]
 
@@ -160,9 +159,7 @@ def _get_pytree_context(
         """
         batch_ndim = len(self.shape)
         event_ndims = tuple(leaf.ndim - batch_ndim for leaf in flat_leaves)
-        return TensorDictPytreeContext(
-            tuple(keys), event_ndims, self.shape, self.device, metadata
-        )
+        return TensorDictPytreeContext(tuple(keys), event_ndims, self.device, metadata)
 
     def _pytree_flatten(
         self,
@@ -229,7 +226,7 @@ def _pytree_unflatten(
             from the context. The device is restored from the context.
         """
         # Unpack context tuple
-        keys, event_ndims, shape_context, device_context, metadata = context
+        keys, event_ndims, device_context, metadata = context
 
         obj = cls.__new__(cls)
         obj.device = device_context
@@ -241,22 +238,15 @@ def _pytree_unflatten(
         data.update(metadata)
         obj.data = data
 
-        if not leaves_list:
-            # Empty case - use shape from context
-            obj.shape = shape_context
-            return obj
-
         first_leaf = leaves_list[0]
 
         # Infer batch shape from first leaf and event_ndims
         if (
             event_ndims and event_ndims[0] == 0
         ):  # Leaf was a scalar or had only batch dimensions originally
             reconstructed_shape = first_leaf.shape
-        elif event_ndims:  # Leaf had event dimensions originally
+        else:  # Leaf had event dimensions originally
             reconstructed_shape = first_leaf.shape[: -event_ndims[0]]
-        else:  # No leaves with event_ndims, use context
-            reconstructed_shape = shape_context
 
         obj.shape = reconstructed_shape
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -83,3 +83,16 @@ def pytest_configure(config):
 )
 def device(request):
     return torch.device(request.param)
+
+
+@pytest.fixture(autouse=True)
+def dynamo_reset():
+    """
+    A pytest fixture that automatically resets torch._dynamo state
+    before and after every test function.
+    """
+    # Code before the test runs
+    torch._dynamo.reset()
+    yield
+    # Code after the test runs (optional cleanup)
+    torch._dynamo.reset()
diff --git a/tests/tensor_dict/test_cat.py b/tests/tensor_dict/test_cat.py
@@ -3,29 +3,25 @@
 
 from tensorcontainer.tensor_dict import TensorDict  # adjust import as needed
 from tests.conftest import skipif_no_compile
-from tests.tensor_dict import common
-from tests.tensor_dict.common import compare_nested_dict, compute_cat_shape
 
-nested_dict = common.nested_dict
+
+def create_nested_dict(shape):
+    a = torch.rand(*shape)
+    b = torch.rand(*shape)
+    y = torch.rand(*shape)
+    return {"x": {"a": a, "b": b}, "y": y}
+
 
 # Define parameter sets
 SHAPE_DIM_PARAMS_VALID = [
     # 1D
-    ((4,), 0),
-    ((4,), -1),
+    ((4,), (4,), 0, (8,)),
+    ((4,), (4,), -1, (8,)),
     # 2D
-    ((2, 2), 0),
-    ((2, 2), 1),
-    ((2, 2), -1),
-    ((1, 4), 0),
-    ((1, 4), 1),
-    ((1, 4), -2),
-    # 3D
-    ((2, 1, 2), 0),
-    ((2, 1, 2), 1),
-    ((2, 1, 2), 2),
-    ((2, 1, 2), -1),
-    ((2, 1, 2), -3),
+    ((2, 2), (3, 2), 0, (5, 2)),
+    ((2, 2), (2, 3), 1, (2, 5)),
+    ((2, 2), (2, 3), -1, (2, 5)),
+    ((2, 2), (3, 2), -2, (5, 2)),
 ]
 
 SHAPE_DIM_PARAMS_INVALID = [
@@ -42,33 +38,24 @@
 
 
 # ——— Valid concatenation dims across several shapes ———
-@pytest.mark.parametrize("shape, dim", SHAPE_DIM_PARAMS_VALID)
-def test_cat_valid_eager(nested_dict, shape, dim):
-    data = nested_dict(shape)
-    td = TensorDict(data, shape)
+@pytest.mark.parametrize("shape1, shape2, dim, expected_shape", SHAPE_DIM_PARAMS_VALID)
+def test_cat_valid_eager(shape1, shape2, dim, expected_shape):
+    data1 = create_nested_dict(shape1)
+    data2 = create_nested_dict(shape2)
 
-    def cat_operation(tensor_dict_instance, cat_dimension):
-        return torch.cat(
-            [tensor_dict_instance, tensor_dict_instance], dim=cat_dimension
-        )
+    td1 = TensorDict(data1, shape1)
+    td2 = TensorDict(data2, shape2)
 
-    cat_td = cat_operation(td, dim)
+    cat_td = torch.cat([td1, td2], dim=dim)
 
-    # compute expected shape
-    expected_shape = compute_cat_shape(shape, dim)
     assert cat_td.shape == expected_shape
 
-    # Compare nested structure and values
-    # The lambda for comparison should always use eager torch.cat on original tensor data
-    compare_nested_dict(
-        data, cat_td, lambda orig_tensor: torch.cat([orig_tensor, orig_tensor], dim=dim)
-    )
-
 
 # ——— Error on invalid dims ———
 @pytest.mark.parametrize("shape, dim", SHAPE_DIM_PARAMS_INVALID)
-def test_cat_invalid_dim_raises_eager(shape, dim, nested_dict):
-    td = TensorDict(nested_dict(shape), shape)
+def test_cat_invalid_dim_raises_eager(shape, dim):
+    data = create_nested_dict(shape)
+    td = TensorDict(data, shape)
 
     def cat_operation(tensor_dict_instance, cat_dimension):
         # This is the operation that is expected to raise an error
@@ -82,8 +69,9 @@ def cat_operation(tensor_dict_instance, cat_dimension):
 
 @skipif_no_compile
 @pytest.mark.parametrize("shape, dim", SHAPE_DIM_PARAMS_INVALID)
-def test_cat_invalid_dim_raises_compile(shape, dim, nested_dict):
-    td = TensorDict(nested_dict(shape), shape)
+def test_cat_invalid_dim_raises_compile(shape, dim):
+    data = create_nested_dict(shape)
+    td = TensorDict(data, shape)
 
     def cat_operation(tensor_dict_instance, cat_dimension):
         # This is the operation that is expected to raise an error
diff --git a/tests/tensor_dict/test_copy.py b/tests/tensor_dict/test_copy.py
@@ -152,34 +152,6 @@ def copy_fn(td):
     assert "c" not in td["x"]
 
 
-def test_copy_of_empty_tensor_dict(nested_dict):
-    # an empty dict should still copy correctly
-    td = TensorDict({}, shape=())
-    td_copy = td.copy()
-    assert isinstance(td_copy, TensorDict)
-    assert td_copy is not td
-    assert td_copy.shape == torch.Size([])
-    assert len(td_copy) == 0
-
-
-@skipif_no_compile
-def test_copy_of_empty_tensor_dict_compiled():
-    """Test that copying an empty TensorDict works with torch.compile."""
-
-    def copy_empty_td(td):
-        return td.copy()
-
-    td = TensorDict({}, shape=())
-
-    eager_result, compiled_result = run_and_compare_compiled(copy_empty_td, td)
-
-    # Additional checks specific to empty TensorDict
-    assert isinstance(eager_result, TensorDict)
-    assert eager_result is not td
-    assert eager_result.shape == torch.Size([])
-    assert len(eager_result) == 0
-
-
 def test_copy_with_pytree(nested_dict):
     data = nested_dict((2, 2))
     td = TensorDict(data, shape=(2, 2))
diff --git a/tests/tensor_dict/test_metadata.py b/tests/tensor_dict/test_metadata.py
@@ -84,24 +84,3 @@ def test_nested_tensordict_with_metadata(self):
             td_doubled["nested"]["nested_tensor"], torch.ones(4, 2) * 2
         )
         assert td_doubled["nested"]["nested_meta"] == "level2"
-
-    def test_metadata_only_tensordict(self):
-        """
-        Tests the edge case where a TensorDict contains no tensors at all, only
-        metadata. Pytree operations should not alter it.
-        """
-        td = TensorDict({"meta1": "a", "meta2": 123}, shape=(4,))
-        td_unchanged = tree_map(lambda x: x * 2, td)
-
-        assert td_unchanged.data == td.data
-
-    def test_empty_tensordict(self):
-        """
-        Tests that an empty TensorDict remains empty and handles pytree
-        operations gracefully without errors.
-        """
-        td = TensorDict({}, shape=(4,))
-        td_unchanged = tree_map(lambda x: x * 2, td)
-
-        assert len(td_unchanged) == 0
-        assert td_unchanged.shape == (4,)
diff --git a/tests/tensor_dict/test_stack.py b/tests/tensor_dict/test_stack.py