Update

vmoens · vmoens · commit 15ff85b36454 · 2025-10-23T11:51:37.000-07:00
[ghstack-poisoned]
diff --git a/.github/unittest/llm/scripts_llm/environment.yml b/.github/unittest/llm/scripts_llm/environment.yml
diff --git a/.github/unittest/llm/scripts_llm/install.sh b/.github/unittest/llm/scripts_llm/install.sh
@@ -30,15 +30,15 @@ git submodule sync && git submodule update --init --recursive
 #printf "Installing PyTorch with cu128"
 #if [[ "$TORCH_VERSION" == "nightly" ]]; then
 #  if [ "${CU_VERSION:-}" == cpu ] ; then
-#      pip3 install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cpu -U
+#      pip install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cpu -U
 #  else
-#      pip3 install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cu128 -U
+#      pip install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cu128 -U
 #  fi
 #elif [[ "$TORCH_VERSION" == "stable" ]]; then
 #    if [ "${CU_VERSION:-}" == cpu ] ; then
-#      pip3 install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cpu
+#      pip install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cpu
 #  else
-#      pip3 install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cu128
+#      pip install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cu128
 #  fi
 #else
 #  printf "Failed to install pytorch"
@@ -47,9 +47,10 @@ git submodule sync && git submodule update --init --recursive
 
 # install tensordict
 if [[ "$RELEASE" == 0 ]]; then
-  pip3 install git+https://github.com/pytorch/tensordict.git
+  pip install "pybind11[global]" ninja
+  pip install git+https://github.com/pytorch/tensordict.git
 else
-  pip3 install tensordict
+  pip install tensordict
 fi
 
 # smoke test
diff --git a/.github/unittest/llm/scripts_llm/post_process.sh b/.github/unittest/llm/scripts_llm/post_process.sh
diff --git a/.github/unittest/llm/scripts_llm/run-clang-format.py b/.github/unittest/llm/scripts_llm/run-clang-format.py
diff --git a/.github/unittest/llm/scripts_llm/run_test.sh b/.github/unittest/llm/scripts_llm/run_test.sh
@@ -23,14 +23,4 @@ lib_dir="${env_dir}/lib"
 
 conda deactivate && conda activate ./env
 
-python -c "import transformers, datasets"
-
-pytest test/test_rlhf.py --instafail -v --durations 200 --capture no --error-for-skips
-
-python examples/rlhf/train_rlhf.py \
-  sys.device=cuda:0 sys.ref_device=cuda:0 \
-  model.name_or_path=gpt2 train.max_epochs=2 \
-  data.batch_size=2 train.ppo.ppo_batch_size=2 \
-  train.ppo.ppo_num_epochs=1 reward_model.name_or_path= \
-  train.ppo.episode_length=8 train.ppo.num_rollouts_per_epoch=4 \
-  data.block_size=110 io.logger=csv
+pytest test/llm -vvv --instafail --durations 600 --capture no --error-for-skips
diff --git a/.github/unittest/llm/scripts_llm/setup_env.sh b/.github/unittest/llm/scripts_llm/setup_env.sh
@@ -6,28 +6,19 @@
 # Do not install PyTorch and torchvision here, otherwise they also get cached.
 
 set -e
-apt-get update && apt-get upgrade -y && apt-get install -y git cmake
+export DEBIAN_FRONTEND=noninteractive
+export TZ=UTC
+apt-get update
+apt-get install -yq --no-install-recommends git wget unzip curl patchelf
 # Avoid error: "fatal: unsafe repository"
 git config --global --add safe.directory '*'
-apt-get install -y wget \
-    gcc \
-    g++ \
-    unzip \
-    curl \
-    patchelf \
-    libosmesa6-dev \
-    libgl1-mesa-glx \
-    libglfw3 \
-    swig3.0 \
-    libglew-dev \
-    libglvnd0 \
-    libgl1 \
-    libglx0 \
-    libegl1 \
-    libgles2
+# The base PyTorch devel image provides compilers, CMake >= 3.22, and most build deps.
+# Install only minimal utilities not guaranteed to be present.
 
-# Upgrade specific package
-apt-get upgrade -y libstdc++6
+# CMake available in the PyTorch devel image (Ubuntu 22.04) is sufficient.
+
+# Cleanup APT cache
+apt-get clean && rm -rf /var/lib/apt/lists/*
 
 this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 root_dir="$(git rev-parse --show-toplevel)"
diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml
@@ -21,17 +21,18 @@ permissions:
 
 jobs:
   unittests:
+    if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/')) }}
     strategy:
       matrix:
-        python_version: ["3.9"]
-        cuda_arch_version: ["12.8"]
+        python_version: ["3.12"]
+        cuda_arch_version: ["12.9"]
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       repository: pytorch/rl
-      runner: "linux.g5.4xlarge.nvidia.gpu"
+      runner: "linux.g6.4xlarge.experimental.nvidia.gpu"
       # gpu-arch-type: cuda
       # gpu-arch-version: "11.7"
-      docker-image: "nvidia/cudagl:11.4.0-base"
+      docker-image: "pytorch/pytorch:2.8.0-cuda12.9-cudnn9-devel"
       timeout: 120
       script: |
         if [[ "${{ github.ref }}" =~ release/* ]]; then
@@ -43,14 +44,14 @@ jobs:
         fi
 
         set -euo pipefail
-        export PYTHON_VERSION="3.9"
-        export CU_VERSION="cu117"
+        export PYTHON_VERSION="3.12"
+        export CU_VERSION="cu129"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
         export TD_GET_DEFAULTS_TO_NONE=1
 
-        bash .github/unittest/linux_libs/scripts_llm/setup_env.sh
-        bash .github/unittest/linux_libs/scripts_llm/install.sh
-        bash .github/unittest/linux_libs/scripts_llm/run_test.sh
-        bash .github/unittest/linux_libs/scripts_llm/post_process.sh
+        bash .github/unittest/llm/scripts_llm/setup_env.sh
+        bash .github/unittest/llm/scripts_llm/install.sh
+        bash .github/unittest/llm/scripts_llm/run_test.sh
+        bash .github/unittest/llm/scripts_llm/post_process.sh
diff --git a/test/llm/test_updaters.py b/test/llm/test_updaters.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+from __future__ import annotations
 
 import argparse
 import gc
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -162,7 +162,7 @@ def forward(self, observation):
         output = self.linear(observation)
         if self.multiple_outputs:
             return output, output.sum(), output.min(), output.max()
-        return self.linear(observation)
+        return output
 
 
 class UnwrappablePolicy(nn.Module):
@@ -1512,6 +1512,7 @@ def create_env():
             cudagraph_policy=cudagraph,
             weight_sync_schemes={"policy": MultiProcessWeightSyncScheme()},
         )
+        assert "policy" in collector._weight_senders, collector._weight_senders.keys()
         try:
             # collect state_dict
             state_dict = collector.state_dict()
diff --git a/test/test_env.py b/test/test_env.py
@@ -3836,6 +3836,8 @@ def test_parallel(self, bwad, use_buffers, maybe_fork_ParallelEnv):
         finally:
             env.close(raise_if_closed=False)
             del env
+            time.sleep(0.1)
+            gc.collect()
 
     class AddString(Transform):
         def __init__(self):
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -307,6 +307,19 @@ def _extract_weights_if_needed(self, weights: Any, model_id: str) -> Any:
             else None
         )
 
+        # If no weights were provided and a sync scheme exists, extract the latest
+        # weights from the current model using the scheme strategy (state_dict or tensordict).
+        # This ensures we don't return stale cached weights.
+        if weights is None and scheme is not None:
+            from torchrl.weight_update.weight_sync_schemes import (
+                _resolve_model,
+                WeightStrategy,
+            )
+
+            strategy = WeightStrategy(extract_as=scheme.strategy)
+            model = _resolve_model(self, model_id)
+            return strategy.extract_weights(model)
+
         if weights is None:
             if model_id == "policy" and hasattr(self, "policy_weights"):
                 return self.policy_weights
@@ -462,6 +475,21 @@ def update_policy_weights_(
                 # Apply to local policy
                 if hasattr(self, "policy") and isinstance(self.policy, nn.Module):
                     strategy.apply_weights(self.policy, weights)
+            elif (
+                hasattr(self, "_original_policy")
+                and isinstance(self._original_policy, nn.Module)
+                and hasattr(self, "policy")
+                and isinstance(self.policy, nn.Module)
+            ):
+                # If no weights were provided, mirror weights from the original (trainer) policy
+                from torchrl.weight_update.weight_sync_schemes import WeightStrategy
+
+                strategy = WeightStrategy(extract_as="tensordict")
+                weights = strategy.extract_weights(self._original_policy)
+                # Cast weights to the policy device before applying
+                if self.policy_device is not None:
+                    weights = weights.to(self.policy_device)
+                strategy.apply_weights(self.policy, weights)
             # Otherwise, no action needed - policy is local and changes are immediately visible
 
     def __iter__(self) -> Iterator[TensorDictBase]:
diff --git a/torchrl/envs/batched_envs.py b/torchrl/envs/batched_envs.py
@@ -2489,14 +2489,15 @@ def look_for_cuda(tensor, has_cuda=has_cuda):
             # Make sure the root is updated
             root_shared_tensordict.update_(env._step_mdp(input))
 
+            # Set event before sending non-tensor data so parent knows worker is done
+            # The recv() call itself will provide synchronization for the pipe
+            mp_event.set()
+
             if _non_tensor_keys:
                 child_pipe.send(
                     ("non_tensor", next_td.select(*_non_tensor_keys, strict=False))
                 )
 
-            # Set event only after non-tensor data is sent to avoid race condition
-            mp_event.set()
-
             del next_td
 
         elif cmd == "step_and_maybe_reset":
@@ -2530,14 +2531,15 @@ def look_for_cuda(tensor, has_cuda=has_cuda):
                 event.record()
                 event.synchronize()
 
+            # Set event before sending non-tensor data so parent knows worker is done
+            # The recv() call itself will provide synchronization for the pipe
+            mp_event.set()
+
             if _non_tensor_keys:
                 ntd = root_next_td.select(*_non_tensor_keys)
                 ntd.set("next", td_next.select(*_non_tensor_keys))
                 child_pipe.send(("non_tensor", ntd))
 
-            # Set event only after non-tensor data is sent to avoid race condition
-            mp_event.set()
-
             del td, root_next_td
 
         elif cmd == "close":
diff --git a/torchrl/modules/llm/backends/vllm/vllm_async.py b/torchrl/modules/llm/backends/vllm/vllm_async.py

Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`#`
`3`	`3`	`# This source code is licensed under the MIT license found in the`
`4`	`4`	`# LICENSE file in the root directory of this source tree.`
`5`		`-`
	`5`	`+from __future__ import annotations`
`6`	`6`
`7`	`7`	`import argparse`
`8`	`8`	`import gc`