[BugFix] Better list assignment in tensorclasses

Vincent Moens · Vincent Moens · commit 6d8119cd9e69 · 2025-04-04T09:27:16.000+01:00
ghstack-source-id: 001b0c0 Pull Request resolved: #1284
diff --git a/.github/unittest/linux/scripts/run_test.sh b/.github/unittest/linux/scripts/run_test.sh
@@ -19,6 +19,7 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir
 export MKL_THREADING_LAYER=GNU
 export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
 export TD_GET_DEFAULTS_TO_NONE=1
+export LIST_TO_STACK=1
 
 coverage run -m pytest test/smoke_test.py -v --durations 20
 coverage run -m pytest --runslow --instafail -v --durations 20 --timeout 120
diff --git a/tensordict/_lazy.py b/tensordict/_lazy.py
@@ -278,37 +278,14 @@ def __init__(
                 raise RuntimeError(
                     f"Couldn't infer stack dim from negative value, got stack_dim={stack_dim}"
                 )
-        _batch_size = td0.batch_size
-        if stack_dim > len(_batch_size):
+        self.stack_dim = stack_dim
+        self._reset_batch_size(td0, tensordicts, device, num_tds, strict_shape)
+        if stack_dim > len(self.batch_size):
             raise RuntimeError(
-                f"Stack dim {stack_dim} is too big for batch size {_batch_size}."
+                f"Stack dim {stack_dim} is too big for batch size {self.batch_size}."
             )
 
-        for td in tensordicts[1:]:
-            if not is_tensor_collection(td):
-                raise TypeError(
-                    "Expected all inputs to be TensorDictBase instances but got "
-                    f"{type(td)} instead."
-                )
-            _bs = td.batch_size
-            _device = td.device
-            if device != _device:
-                raise RuntimeError(f"devices differ, got {device} and {_device}")
-            if _bs != _batch_size:
-                if strict_shape or len(_bs) != len(_batch_size):
-                    raise RuntimeError(
-                        f"batch sizes in tensordicts differs, LazyStackedTensorDict "
-                        f"cannot be created. Got td[0].batch_size={_batch_size} "
-                        f"and td[i].batch_size={_bs}. If the length match and you wish "
-                        f"to stack these tensordicts, set strict_shape to False."
-                    )
-                else:
-                    _batch_size = torch.Size(
-                        [s if _bs[i] == s else -1 for i, s in enumerate(_batch_size)]
-                    )
         self.tensordicts: list[TensorDictBase] = list(tensordicts)
-        self.stack_dim = stack_dim
-        self._batch_size = self._compute_batch_size(_batch_size, stack_dim, num_tds)
         self.hook_out = hook_out
         self.hook_in = hook_in
         if batch_size is not None and batch_size != self.batch_size and num_tds != 0:
@@ -578,6 +555,41 @@ def is_memmap(self) -> bool:
             )
         return are_memmap[0]
 
+    def _reset_batch_size(
+        self,
+        td0: TensorDictBase,
+        tensordicts: list[TensorDictBase],
+        device: torch.device,
+        num_tds: int,
+        strict_shape: bool,
+    ):
+        _batch_size = td0.batch_size
+        stack_dim = self.stack_dim
+
+        for td in tensordicts[1:]:
+            if not is_tensor_collection(td):
+                raise TypeError(
+                    "Expected all inputs to be TensorDictBase instances but got "
+                    f"{type(td)} instead."
+                )
+            _bs = td.batch_size
+            _device = td.device
+            if device != _device:
+                raise RuntimeError(f"devices differ, got {device} and {_device}")
+            if _bs != _batch_size:
+                if strict_shape or len(_bs) != len(_batch_size):
+                    raise RuntimeError(
+                        f"batch sizes in tensordicts differs, LazyStackedTensorDict "
+                        f"cannot be created. Got td[0].batch_size={_batch_size} "
+                        f"and td[i].batch_size={_bs}. If the length match and you wish "
+                        f"to stack these tensordicts, set strict_shape to False."
+                    )
+                else:
+                    _batch_size = torch.Size(
+                        [s if _bs[i] == s else -1 for i, s in enumerate(_batch_size)]
+                    )
+        self._batch_size = self._compute_batch_size(_batch_size, stack_dim, num_tds)
+
     @staticmethod
     def _compute_batch_size(
         batch_size: torch.Size, stack_dim: int, num_tds: int
@@ -606,7 +618,9 @@ def _set_str(
             ) from e
         if not validated:
             value = self._validate_value(
-                value, non_blocking=non_blocking, check_shape=not list_to_stack()
+                value,
+                non_blocking=non_blocking,
+                check_shape=not (isinstance(value, list) and list_to_stack()),
             )
             validated = True
         if self._is_vmapped:
@@ -3147,6 +3161,42 @@ def append(self, tensordict: T) -> None:
         """
         self.insert(len(self.tensordicts), tensordict)
 
+    @lock_blocked
+    def extend(self, tensordict: list[T] | T) -> None:
+        """Extends the lazy stack with new tensordicts."""
+        if _is_tensor_collection(type(tensordict)):
+            tensordict = list(tensordict.unbind(self.stack_dim))
+        if any(not isinstance(tensordict, TensorDictBase) for tensordict in tensordict):
+            raise TypeError(
+                "Expected new value to be TensorDictBase instance but got "
+                f"{[type(tensordict) for tensordict in tensordict]} instead."
+            )
+        if self.tensordicts:
+            batch_size = self.tensordicts[0].batch_size
+            device = self.tensordicts[0].device
+
+            for _td in tensordict:
+                _batch_size = _td.batch_size
+                _device = _td.device
+
+                if device != _device:
+                    raise ValueError(
+                        f"Devices differ: stack has device={device}, new value has "
+                        f"device={_device}."
+                    )
+                if _batch_size != batch_size:
+                    raise ValueError(
+                        f"Batch sizes in tensordicts differs: stack has "
+                        f"batch_size={batch_size}, new_value has batch_size={_batch_size}."
+                    )
+        else:
+            batch_size = tensordict.batch_size
+
+        self.tensordicts.extend(tensordict)
+
+        N = len(self.tensordicts)
+        self._batch_size = self._compute_batch_size(batch_size, self.stack_dim, N)
+
     @property
     def is_locked(self) -> bool:
         if self._is_locked is not None:
diff --git a/tensordict/tensorclass.py b/tensordict/tensorclass.py
@@ -72,6 +72,7 @@
     IndexType,
     is_tensorclass,
     KeyDependentDefaultDict,
+    list_to_stack,
     set_capture_non_tensor_stack,
 )
 from torch import multiprocessing as mp, Tensor
@@ -363,6 +364,7 @@ def __subclasscheck__(self, subclass):
     "expand_as",
     "expm1",
     "expm1_",
+    "extend",
     "fill_",
     "filter_empty_",
     "filter_non_tensor_data",
@@ -1025,6 +1027,9 @@ def __torch_function__(
                 _wrap_td_method(method_name, copy_non_tensor=True),
             )
 
+    # if not hasattr(cls, "batch_size") and "batch_size" not in expected_keys:
+    #     cls.batch_size = property(_batch_size, _batch_size_setter)
+
     cls.__enter__ = __enter__
     cls.__exit__ = __exit__
 
@@ -1080,6 +1085,12 @@ def __torch_function__(
     return cls
 
 
+# def _batch_size(self):
+#     return self.__dict__["_tensordict"]._batch_size
+# def _batch_size_setter(self, value):
+#     self.__dict__["_tensordict"].batch_size = value
+
+
 def _arg_to_tensordict(arg):
     # if arg is a tensorclass or sequence of tensorclasses, extract the underlying
     # tensordicts and return those instead
@@ -2347,6 +2358,9 @@ def _is_castable(datatype):
             )
         ):
             return set_tensor()
+        elif issubclass(value_type, list) and list_to_stack():
+            # set() will take care of casting to non tensor
+            non_tensor = False
         else:
             non_tensor = True
 
diff --git a/test/test_tensorclass.py b/test/test_tensorclass.py
@@ -35,6 +35,7 @@
     LazyStackedTensorDict,
     MemoryMappedTensor,
     set_capture_non_tensor_stack,
+    set_list_to_stack,
     tensorclass,
     TensorClass,
     TensorDict,
@@ -1032,19 +1033,28 @@ class MyDataParent:
         assert data.y.v == "test_nested"
         assert data.y.batch_size == torch.Size(batch_size)
 
-    def test_indexing(self):
-        @tensorclass
-        class MyDataNested:
-            X: torch.Tensor
-            z: list
-            y: "MyDataNested" = None
-
-        X = torch.ones(3, 4, 5)
-        z = ["a", "b", "c"]
-        batch_size = [3, 4]
-        data_nest = MyDataNested(X=X, z=z, batch_size=batch_size)
-        data = MyDataNested(X=X, y=data_nest, z=z, batch_size=batch_size)
+    @pytest.mark.parametrize("list_to_stack", [True, False])
+    def test_indexing(self, list_to_stack):
+        with set_list_to_stack(list_to_stack):
 
+            @tensorclass
+            class MyDataNested:
+                X: torch.Tensor
+                z: list
+                y: "MyDataNested" = None
+
+            X = torch.ones(3, 4, 5)
+            z = ["a", "b", "c"]
+            batch_size = [3, 4]
+            with (
+                pytest.raises(RuntimeError, match="batch dimension mismatch")
+                if list_to_stack
+                else contextlib.nullcontext()
+            ):
+                data_nest = MyDataNested(X=X, z=z, batch_size=batch_size)
+                data = MyDataNested(X=X, y=data_nest, z=z, batch_size=batch_size)
+            if list_to_stack:
+                return
         assert data[:2].batch_size == torch.Size([2, 4])
         assert data[:2].X.shape == torch.Size([2, 4, 5])
         assert (data[:2].X == X[:2]).all()
@@ -1462,6 +1472,21 @@ class Data:
         assert (data_select == 1).all()
         assert "a" in data_select._tensordict
 
+    @set_list_to_stack(True)
+    def test_set_list_in_constructor(self):
+        obj = MyTensorClass(
+            a=["a string", "another string"],
+            b=[torch.randn(3), torch.zeros(3)],
+            c="smth completly different",
+            batch_size=2,
+        )
+        assert obj.shape == (2,)
+        assert obj[0].a == "a string"
+        assert obj[1].a == "another string"
+        assert (obj[0].b != 0).all()
+        assert (obj[1].b == 0).all()
+        assert obj.c == obj[0].c
+
     def test_set_dict(self):
         @tensorclass(autocast=True)
         class MyClass:
@@ -1540,7 +1565,8 @@ class MyDataParent:
         # ensure optional fields are writable
         data.k = torch.zeros(3, 4, 5)
 
-    def test_setitem(self):
+    @pytest.mark.parametrize("list_to_stack", [True, False])
+    def test_setitem(self, list_to_stack):
         data = MyData(
             X=torch.ones(3, 4, 5),
             y=torch.zeros(3, 4, 5),
@@ -1599,26 +1625,34 @@ class MyDataNested:
         X = torch.randn(3, 4, 5)
         z = ["a", "b", "c"]
         batch_size = [3, 4]
-        data_nest = MyDataNested(X=X, z=z, batch_size=batch_size)
-        data = MyDataNested(X=X, y=data_nest, z=z, batch_size=batch_size)
-        X2 = torch.ones(3, 4, 5)
-        data_nest2 = MyDataNested(X=X2, z=z, batch_size=batch_size)
-        data2 = MyDataNested(X=X2, y=data_nest2, z=z, batch_size=batch_size)
-        data[:2] = data2[:2].clone()
-        assert (data[:2].X == data2[:2].X).all()
-        assert (data[:2].y.X == data2[:2].y.X).all()
-        assert data[:2].z == z
-
-        # Negative Scenario
-        data3 = MyDataNested(X=X2, y=data_nest2, z=["e", "f"], batch_size=batch_size)
-        data[:2] = data3[:2]
-        assert data[:2].z == data3[:2]._get_str("z", None).tolist()
+        with set_list_to_stack(list_to_stack), (
+            pytest.raises(RuntimeError, match="batch dimension mismatch")
+            if list_to_stack
+            else contextlib.nullcontext()
+        ):
+            data_nest = MyDataNested(X=X, z=z, batch_size=batch_size)
+            data = MyDataNested(X=X, y=data_nest, z=z, batch_size=batch_size)
+            X2 = torch.ones(3, 4, 5)
+            data_nest2 = MyDataNested(X=X2, z=z, batch_size=batch_size)
+            data2 = MyDataNested(X=X2, y=data_nest2, z=z, batch_size=batch_size)
+            data[:2] = data2[:2].clone()
+            assert (data[:2].X == data2[:2].X).all()
+            assert (data[:2].y.X == data2[:2].y.X).all()
+            assert data[:2].z == z
+
+            # Negative Scenario
+            data3 = MyDataNested(
+                X=X2, y=data_nest2, z=["e", "f"], batch_size=batch_size
+            )
+            data[:2] = data3[:2]
+            assert data[:2].z == data3[:2]._get_str("z", None).tolist()
 
     @pytest.mark.parametrize(
         "broadcast_type",
         ["scalar", "tensor", "tensordict", "maptensor"],
     )
-    def test_setitem_broadcast(self, broadcast_type):
+    @pytest.mark.parametrize("list_to_stack", [True, False])
+    def test_setitem_broadcast(self, broadcast_type, list_to_stack):
         @tensorclass
         class MyDataNested:
             X: torch.Tensor
@@ -1628,22 +1662,27 @@ class MyDataNested:
         X = torch.ones(3, 4, 5)
         z = ["a", "b", "c"]
         batch_size = [3, 4]
-        data_nest = MyDataNested(X=X, z=z, batch_size=batch_size)
-        data = MyDataNested(X=X, y=data_nest, z=z, batch_size=batch_size)
-
-        if broadcast_type == "scalar":
-            val = 0
-        elif broadcast_type == "tensor":
-            val = torch.zeros(4, 5)
-        elif broadcast_type == "tensordict":
-            val = TensorDict({"X": torch.zeros(2, 4, 5)}, batch_size=[2, 4])
-        elif broadcast_type == "maptensor":
-            val = MemoryMappedTensor.from_tensor(torch.zeros(4, 5))
-
-        data[:2] = val
-        assert (data[:2] == 0).all()
-        assert (data.X[:2] == 0).all()
-        assert (data.y.X[:2] == 0).all()
+        with set_list_to_stack(list_to_stack), (
+            pytest.raises(RuntimeError, match="batch dimension mismatch")
+            if list_to_stack
+            else contextlib.nullcontext()
+        ):
+            data_nest = MyDataNested(X=X, z=z, batch_size=batch_size)
+            data = MyDataNested(X=X, y=data_nest, z=z, batch_size=batch_size)
+
+            if broadcast_type == "scalar":
+                val = 0
+            elif broadcast_type == "tensor":
+                val = torch.zeros(4, 5)
+            elif broadcast_type == "tensordict":
+                val = TensorDict({"X": torch.zeros(2, 4, 5)}, batch_size=[2, 4])
+            elif broadcast_type == "maptensor":
+                val = MemoryMappedTensor.from_tensor(torch.zeros(4, 5))
+
+            data[:2] = val
+            assert (data[:2] == 0).all()
+            assert (data.X[:2] == 0).all()
+            assert (data.y.X[:2] == 0).all()
 
     def test_setitem_memmap(self):
         # regression test PR #203
diff --git a/test/test_tensordict.py b/test/test_tensordict.py
@@ -11864,16 +11864,15 @@ def test_shared_memmap_single(self, pair, strategy, update, tmpdir):
 
     @staticmethod
     def _run_worker(td, val1, update):
+        set_list_to_stack(True).set()
         # Update in place
         if update == "setitem":
-            td["val"] = val1
+            td["val"] = NonTensorData(val1)
         elif update == "update_":
-            td.get("val").update_(
-                NonTensorData(data=val1, batch_size=[]), non_blocking=False
-            )
+            td.get("val").update_(NonTensorData(data=val1), non_blocking=False)
         elif update == "update-inplace":
             td.get("val").update(
-                NonTensorData(data=val1, batch_size=[]),
+                NonTensorData(data=val1),
                 inplace=True,
                 non_blocking=False,
             )
@@ -11884,6 +11883,7 @@ def _run_worker(td, val1, update):
         assert td["val"] == val1
 
     @pytest.mark.slow
+    @set_list_to_stack(True)
     @pytest.mark.parametrize("pair", PAIRS)
     @pytest.mark.parametrize("strategy", ["shared", "memmap"])
     @pytest.mark.parametrize("update", ["update_", "update-inplace"])