pytorch
diff --git a/‎.github/workflows/docs.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/docs.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎docs/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/reference/config.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/reference/config.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/reference/envs.rst‎
Lines changed: 0 additions & 1 deletion b/‎docs/source/reference/envs.rst‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/source/reference/llms.rst‎
Lines changed: 3 additions & 3 deletions b/‎docs/source/reference/llms.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/source/reference/utils.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/reference/utils.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/_utils_internal.py‎
Lines changed: 2 additions & 8 deletions b/‎test/_utils_internal.py‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎test/llm/test_objectives.py‎
Lines changed: 69 additions & 2 deletions b/‎test/llm/test_objectives.py‎
Lines changed: 69 additions & 2 deletions
diff --git a/‎test/llm/test_vllm.py‎
Lines changed: 2 additions & 2 deletions b/‎test/llm/test_vllm.py‎
Lines changed: 2 additions & 2 deletions
@@ -26,7 +26,7 @@ jobs:
   build-docs:
     strategy:
       matrix:
-        python_version: [ "3.9" ]
+        python_version: [ "3.12" ]
         cuda_arch_version: [ "12.8" ]
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
@@ -60,7 +60,7 @@ jobs:
         bash ./miniconda.sh -b -f -p "${conda_dir}"
         eval "$(${conda_dir}/bin/conda shell.bash hook)"
         printf "* Creating a test environment\n"
-        conda create --prefix "${env_dir}" -y python=3.9
+        conda create --prefix "${env_dir}" -y python=3.12
         printf "* Activating\n"
         conda activate "${env_dir}"
 
 
@@ -16,7 +16,7 @@ sphinx_design
 torchvision
 dm_control
 mujoco<3.3.6
-gym[classic_control,accept-rom-license,ale-py,atari]
+gymnasium[classic_control,atari]
 pygame
 tqdm
 ipython
 
@@ -507,7 +507,7 @@ Training and Optimization Configurations
     SparseAdamConfig
 
 Logging Configurations
-~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~
 
 .. currentmodule:: torchrl.trainers.algorithms.configs.logging
 
 
@@ -1123,7 +1123,6 @@ to be able to create this other composition:
     ExcludeTransform
     FiniteTensorDictCheck
     FlattenObservation
-    FlattenTensorDict
     FrameSkipTransform
     GrayScale
     Hash
 
@@ -118,9 +118,9 @@ Usage
 Adding Custom Templates
 ^^^^^^^^^^^^^^^^^^^^^^^
 
-You can add custom chat templates for new model families using the :func:`torchrl.data.llm.chat.add_chat_template` function.
+You can add custom chat templates for new model families using the :func:`torchrl.data.llm.add_chat_template` function.
 
-.. autofunction:: torchrl.data.llm.chat.add_chat_template
+.. autofunction:: torchrl.data.llm.add_chat_template
 
 Usage Examples
 ^^^^^^^^^^^^^^
@@ -130,7 +130,7 @@ Adding a Llama Template
 
 .. code-block:: python
 
-    >>> from torchrl.data.llm.chat import add_chat_template, History
+    >>> from torchrl.data.llm import add_chat_template, History
     >>> from transformers import AutoTokenizer
     >>> 
     >>> # Define the Llama chat template
 
@@ -1,7 +1,7 @@
 .. currentmodule:: torchrl
 
 torchrl._utils package
-====================
+======================
 
 Set of utility methods that are used internally by the library.
 
 
@@ -149,4 +149,4 @@ first_party_detection = false
 [project.entry-points."vllm.general_plugins"]
 # Ensure FP32 overrides are registered in all vLLM processes (main, workers, and
 # the registry subprocess) before resolving model classes.
-fp32_overrides = "torchrl.modules.llm.backends.vllm_plugin:register_fp32_overrides"
+fp32_overrides = "torchrl.modules.llm.backends.vllm.vllm_plugin:register_fp32_overrides"
@@ -21,13 +21,7 @@
 from tensordict.nn import TensorDictModuleBase
 from torch import nn, vmap
 
-from torchrl._utils import (
-    implement_for,
-    logger,
-    logger as torchrl_logger,
-    RL_WARNINGS,
-    seed_generator,
-)
+from torchrl._utils import implement_for, logger, RL_WARNINGS, seed_generator
 from torchrl.data.utils import CloudpickleWrapper
 from torchrl.envs import MultiThreadedEnv, ObservationNorm
 from torchrl.envs.batched_envs import ParallelEnv, SerialEnv
@@ -230,7 +224,7 @@ def f_retry(*args, **kwargs):
                     return f(*args, **kwargs)
                 except ExceptionToCheck as e:
                     msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
-                    torchrl_logger.info(msg)
+                    logger.info(msg)
                     time.sleep(mdelay)
                     mtries -= 1
             try:
 
@@ -16,7 +16,13 @@
 from torchrl.envs.llm.transforms.kl import RetrieveLogProb
 from torchrl.modules.llm import TransformersWrapper, vLLMWrapper
 from torchrl.modules.llm.policies.common import ChatHistory, Masks, Text, Tokens
-from torchrl.objectives.llm.grpo import GRPOLoss, MCAdvantage
+from torchrl.objectives.llm.grpo import (
+    CISPO,
+    CISPOLossOutput,
+    GRPOLoss,
+    GRPOLossOutput,
+    MCAdvantage,
+)
 from torchrl.objectives.llm.sft import SFTLoss
 
 _has_transformers = importlib.util.find_spec("transformers") is not None
@@ -203,7 +209,6 @@ def test_grpo(self, mock_transformer_model, dapo):
         loss_vals = loss_fn(data)
 
         # Assertions: Check output type and structure
-        from torchrl.objectives.llm.grpo import GRPOLossOutput
 
         assert isinstance(
             loss_vals, GRPOLossOutput
@@ -240,6 +245,68 @@ def test_grpo(self, mock_transformer_model, dapo):
             0 <= loss_vals.clip_fraction <= 1
         ), f"clip_fraction out of range: {loss_vals.clip_fraction}"
 
+    def test_cispo(self, mock_transformer_model):
+        """Test CISPO loss computation with mock models."""
+        vocab_size = 1024
+        device = torch.device("cpu")
+        eps = 0.20
+
+        # Create mock model and wrap it
+        model = mock_transformer_model(vocab_size=vocab_size, device=device)
+        actor_network = TransformersWrapper(
+            model,
+            generate=False,
+            pad_output=True,
+            input_mode="history",
+        )
+
+        # Create loss module
+
+        loss_fn = CISPO(actor_network, clip_epsilon=eps)
+
+        # Create fake data
+        data = _mock_data_grpo(vocab_size=vocab_size, device=device)
+
+        # Compute loss
+        loss_vals = loss_fn(data)
+
+        # Assertions: Check output type and structure
+
+        assert isinstance(
+            loss_vals, CISPOLossOutput
+        ), f"Expected CISPOLossOutput, got {type(loss_vals)}"
+
+        # Check that all expected keys are present (same as GRPO)
+        assert hasattr(loss_vals, "loss_objective"), "Missing loss_objective"
+        assert hasattr(loss_vals, "clip_fraction"), "Missing clip_fraction"
+        assert hasattr(loss_vals, "kl_approx"), "Missing kl_approx"
+        assert hasattr(loss_vals, "ESS"), "Missing ESS"
+        assert hasattr(loss_vals, "entropy"), "Missing entropy"
+        assert hasattr(loss_vals, "loss_entropy"), "Missing loss_entropy"
+
+        # Check tensor shapes (all losses should be scalars after reduction)
+        assert (
+            loss_vals.loss_objective.shape == ()
+        ), f"loss_objective should be scalar, got {loss_vals.loss_objective.shape}"
+        assert (
+            loss_vals.clip_fraction.shape == ()
+        ), f"clip_fraction should be scalar, got {loss_vals.clip_fraction.shape}"
+        assert (
+            loss_vals.kl_approx.shape == ()
+        ), f"kl_approx should be scalar, got {loss_vals.kl_approx.shape}"
+        assert (
+            loss_vals.ESS.shape == ()
+        ), f"ESS should be scalar, got {loss_vals.ESS.shape}"
+
+        # Check that losses are finite
+        assert torch.isfinite(loss_vals.loss_objective), "loss_objective is not finite"
+        assert torch.isfinite(loss_vals.ESS), "ESS is not finite"
+
+        # Check that clip_fraction is in valid range [0, 1]
+        assert (
+            0 <= loss_vals.clip_fraction <= 1
+        ), f"clip_fraction out of range: {loss_vals.clip_fraction}"
+
 
 class TestSFT:
     @pytest.fixture(scope="class")
 
@@ -40,7 +40,7 @@ class TestAsyncVLLMIntegration:
     @pytest.mark.slow
     def test_vllm_api_compatibility(self, sampling_params):
         """Test that AsyncVLLM supports the same inputs as vLLM.LLM.generate()."""
-        from torchrl.modules.llm.backends.vllm_async import AsyncVLLM
+        from torchrl.modules.llm.backends import AsyncVLLM
 
         # Create AsyncVLLM service
         service = AsyncVLLM.from_pretrained(
@@ -113,7 +113,7 @@ def test_vllm_api_compatibility(self, sampling_params):
     def test_weight_updates_with_transformer(self, sampling_params):
         """Test weight updates using vLLMUpdater with a real transformer model."""
         from torchrl.collectors.llm.weight_update.vllm import vLLMUpdater
-        from torchrl.modules.llm.backends.vllm_async import AsyncVLLM
+        from torchrl.modules.llm.backends import AsyncVLLM
         from torchrl.modules.llm.policies.transformers_wrapper import (
             TransformersWrapper,
         )