From f5920a56b2a79662401f0d3594ebeb0ba4c91b99 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 22 Oct 2025 12:26:16 -0700
Subject: [PATCH 1/6] Update

[ghstack-poisoned]
---
 .../{linux_libs => llm}/scripts_llm/environment.yml  |  0
 .../{linux_libs => llm}/scripts_llm/install.sh       |  0
 .../{linux_libs => llm}/scripts_llm/post_process.sh  |  0
 .../scripts_llm/run-clang-format.py                  |  0
 .../{linux_libs => llm}/scripts_llm/run_test.sh      | 12 +-----------
 .../{linux_libs => llm}/scripts_llm/setup_env.sh     |  0
 .github/workflows/test-linux-llm.yml                 |  1 +
 7 files changed, 2 insertions(+), 11 deletions(-)
 rename .github/unittest/{linux_libs => llm}/scripts_llm/environment.yml (100%)
 rename .github/unittest/{linux_libs => llm}/scripts_llm/install.sh (100%)
 rename .github/unittest/{linux_libs => llm}/scripts_llm/post_process.sh (100%)
 rename .github/unittest/{linux_libs => llm}/scripts_llm/run-clang-format.py (100%)
 rename .github/unittest/{linux_libs => llm}/scripts_llm/run_test.sh (59%)
 rename .github/unittest/{linux_libs => llm}/scripts_llm/setup_env.sh (100%)

diff --git a/.github/unittest/linux_libs/scripts_llm/environment.yml b/.github/unittest/llm/scripts_llm/environment.yml
similarity index 100%
rename from .github/unittest/linux_libs/scripts_llm/environment.yml
rename to .github/unittest/llm/scripts_llm/environment.yml
diff --git a/.github/unittest/linux_libs/scripts_llm/install.sh b/.github/unittest/llm/scripts_llm/install.sh
similarity index 100%
rename from .github/unittest/linux_libs/scripts_llm/install.sh
rename to .github/unittest/llm/scripts_llm/install.sh
diff --git a/.github/unittest/linux_libs/scripts_llm/post_process.sh b/.github/unittest/llm/scripts_llm/post_process.sh
similarity index 100%
rename from .github/unittest/linux_libs/scripts_llm/post_process.sh
rename to .github/unittest/llm/scripts_llm/post_process.sh
diff --git a/.github/unittest/linux_libs/scripts_llm/run-clang-format.py b/.github/unittest/llm/scripts_llm/run-clang-format.py
similarity index 100%
rename from .github/unittest/linux_libs/scripts_llm/run-clang-format.py
rename to .github/unittest/llm/scripts_llm/run-clang-format.py
diff --git a/.github/unittest/linux_libs/scripts_llm/run_test.sh b/.github/unittest/llm/scripts_llm/run_test.sh
similarity index 59%
rename from .github/unittest/linux_libs/scripts_llm/run_test.sh
rename to .github/unittest/llm/scripts_llm/run_test.sh
index ac60ae37f1e..bf811b01eb6 100644
--- a/.github/unittest/linux_libs/scripts_llm/run_test.sh
+++ b/.github/unittest/llm/scripts_llm/run_test.sh
@@ -23,14 +23,4 @@ lib_dir="${env_dir}/lib"
 
 conda deactivate && conda activate ./env
 
-python -c "import transformers, datasets"
-
-pytest test/test_rlhf.py --instafail -v --durations 200 --capture no --error-for-skips
-
-python examples/rlhf/train_rlhf.py \
-  sys.device=cuda:0 sys.ref_device=cuda:0 \
-  model.name_or_path=gpt2 train.max_epochs=2 \
-  data.batch_size=2 train.ppo.ppo_batch_size=2 \
-  train.ppo.ppo_num_epochs=1 reward_model.name_or_path= \
-  train.ppo.episode_length=8 train.ppo.num_rollouts_per_epoch=4 \
-  data.block_size=110 io.logger=csv
+pytest test/llm -vvv --instafail --durations 600 --capture no --error-for-skips
diff --git a/.github/unittest/linux_libs/scripts_llm/setup_env.sh b/.github/unittest/llm/scripts_llm/setup_env.sh
similarity index 100%
rename from .github/unittest/linux_libs/scripts_llm/setup_env.sh
rename to .github/unittest/llm/scripts_llm/setup_env.sh
diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml
index 4de8b8165d9..0d040f756c9 100644
--- a/.github/workflows/test-linux-llm.yml
+++ b/.github/workflows/test-linux-llm.yml
@@ -21,6 +21,7 @@ permissions:
 
 jobs:
   unittests:
+    if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'llm/') }}
     strategy:
       matrix:
         python_version: ["3.9"]

From ec8b1cd2ea32d4a33bd3fd6e58b4bb7de1e74502 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 22 Oct 2025 12:31:54 -0700
Subject: [PATCH 2/6] Update

[ghstack-poisoned]
---
 .github/workflows/test-linux-llm.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml
index 0d040f756c9..e43a1522076 100644
--- a/.github/workflows/test-linux-llm.yml
+++ b/.github/workflows/test-linux-llm.yml
@@ -29,7 +29,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       repository: pytorch/rl
-      runner: "linux.g5.4xlarge.nvidia.gpu"
+      runner: "linux.g6.4xlarge.experimental.nvidia.gpu"
       # gpu-arch-type: cuda
       # gpu-arch-version: "11.7"
       docker-image: "nvidia/cudagl:11.4.0-base"

From 8f59fcde00b02c39410352c78dfad56ac81c4d8c Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 22 Oct 2025 12:35:04 -0700
Subject: [PATCH 3/6] Update

[ghstack-poisoned]
---
 .github/workflows/test-linux-llm.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml
index e43a1522076..f738f66b76c 100644
--- a/.github/workflows/test-linux-llm.yml
+++ b/.github/workflows/test-linux-llm.yml
@@ -21,7 +21,7 @@ permissions:
 
 jobs:
   unittests:
-    if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'llm/') }}
+    if: ${{ github.event_name == 'push' || contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/') }}
     strategy:
       matrix:
         python_version: ["3.9"]

From 44807c622511a4ef1ca7213525ed018619777d54 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 22 Oct 2025 13:29:19 -0700
Subject: [PATCH 4/6] Update

[ghstack-poisoned]
---
 .github/workflows/test-linux-llm.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml
index f738f66b76c..08d76736a70 100644
--- a/.github/workflows/test-linux-llm.yml
+++ b/.github/workflows/test-linux-llm.yml
@@ -21,7 +21,7 @@ permissions:
 
 jobs:
   unittests:
-    if: ${{ github.event_name == 'push' || contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/') }}
+    if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/')) }}
     strategy:
       matrix:
         python_version: ["3.9"]
@@ -51,7 +51,7 @@ jobs:
         export TF_CPP_MIN_LOG_LEVEL=0
         export TD_GET_DEFAULTS_TO_NONE=1
 
-        bash .github/unittest/linux_libs/scripts_llm/setup_env.sh
-        bash .github/unittest/linux_libs/scripts_llm/install.sh
-        bash .github/unittest/linux_libs/scripts_llm/run_test.sh
-        bash .github/unittest/linux_libs/scripts_llm/post_process.sh
+        bash .github/unittest/llm/scripts_llm/setup_env.sh
+        bash .github/unittest/llm/scripts_llm/install.sh
+        bash .github/unittest/llm/scripts_llm/run_test.sh
+        bash .github/unittest/llm/scripts_llm/post_process.sh

From 8ba64888b93a99d66985e6eb414e1715849208b4 Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Wed, 22 Oct 2025 17:30:30 -0700
Subject: [PATCH 5/6] Update

[ghstack-poisoned]
---
 .github/unittest/llm/scripts_llm/setup_env.sh | 12 +++-
 .../modules/llm/backends/vllm/vllm_async.py   | 62 ++++++++++++++-----
 2 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/.github/unittest/llm/scripts_llm/setup_env.sh b/.github/unittest/llm/scripts_llm/setup_env.sh
index 53dfc0bd50b..345e520c195 100644
--- a/.github/unittest/llm/scripts_llm/setup_env.sh
+++ b/.github/unittest/llm/scripts_llm/setup_env.sh
@@ -6,10 +6,13 @@
 # Do not install PyTorch and torchvision here, otherwise they also get cached.
 
 set -e
-apt-get update && apt-get upgrade -y && apt-get install -y git cmake
+export DEBIAN_FRONTEND=noninteractive
+export TZ=UTC
+apt-get update
+apt-get install -yq --no-install-recommends git cmake
 # Avoid error: "fatal: unsafe repository"
 git config --global --add safe.directory '*'
-apt-get install -y wget \
+apt-get install -yq --no-install-recommends wget \
     gcc \
     g++ \
     unzip \
@@ -27,7 +30,10 @@ apt-get install -y wget \
     libgles2
 
 # Upgrade specific package
-apt-get upgrade -y libstdc++6
+apt-get install -yq --no-install-recommends --only-upgrade libstdc++6
+
+apt-get clean
+rm -rf /var/lib/apt/lists/*
 
 this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 root_dir="$(git rev-parse --show-toplevel)"
diff --git a/torchrl/modules/llm/backends/vllm/vllm_async.py b/torchrl/modules/llm/backends/vllm/vllm_async.py
index cc5dc0c9cd2..7b74018a7ca 100644
--- a/torchrl/modules/llm/backends/vllm/vllm_async.py
+++ b/torchrl/modules/llm/backends/vllm/vllm_async.py
@@ -20,12 +20,9 @@
 from concurrent.futures import ThreadPoolExecutor, wait
 from typing import Any, Literal, TYPE_CHECKING
 
-import ray
 
 import torch
 
-from ray.util.placement_group import placement_group, remove_placement_group
-from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
 from torchrl._utils import logger as torchrl_logger
 
 # Import RLvLLMEngine and shared utilities
@@ -43,6 +40,24 @@
 TIMEOUT_SECONDS = os.getenv("TORCHRL_VLLM_TIMEOUT_SECONDS", 300)
 
 
+def _get_ray():
+    """Import Ray on demand to avoid global import side-effects.
+
+    Returns:
+        ModuleType: The imported Ray module.
+
+    Raises:
+        ImportError: If Ray is not installed.
+    """
+    try:
+        import ray  # type: ignore
+
+        return ray
+    except Exception as e:  # pragma: no cover - surfaced to callers
+        raise ImportError(
+            "ray is not installed. Please install it with `pip install ray`."
+        ) from e
+
 class _AsyncvLLMWorker:
     """Async vLLM worker for Ray with weight update capabilities.
 
@@ -267,7 +282,7 @@ async def generate(
                 "vllm is not installed. Please install it with `pip install vllm`."
             )
 
-        from vllm import RequestOutput, SamplingParams, TokensPrompt
+        from vllm import SamplingParams, TokensPrompt
 
         # Track whether input was originally a single prompt
         single_prompt_input = False
@@ -474,11 +489,7 @@ def _gpus_per_replica(engine_args: AsyncEngineArgs) -> int:
     )
 
 
-# Create Ray remote versions
-if ray is not None and _has_vllm:
-    _AsyncLLMEngineActor = ray.remote(num_cpus=0, num_gpus=0)(_AsyncLLMEngine)
-else:
-    _AsyncLLMEngineActor = None
+# Ray actor wrapper is created lazily in __init__ to avoid global Ray import.
 
 
 class AsyncVLLM(RLvLLMEngine):
@@ -583,17 +594,18 @@ def __init__(
             raise ImportError(
                 "vllm is not installed. Please install it with `pip install vllm`."
             )
-        if ray is None:
-            raise ImportError(
-                "ray is not installed. Please install it with `pip install ray`."
-            )
+        # Lazily import ray only when constructing the actor class to avoid global import
 
         # Enable prefix caching by default for better performance
         engine_args.enable_prefix_caching = enable_prefix_caching
 
         self.engine_args = engine_args
         self.num_replicas = num_replicas
-        self.actor_class = actor_class or _AsyncLLMEngineActor
+        if actor_class is None:
+            ray = _get_ray()
+            self.actor_class = ray.remote(num_cpus=0, num_gpus=0)(_AsyncLLMEngine)
+        else:
+            self.actor_class = actor_class
         self.actors: list = []
         self._launched = False
         self._service_id = uuid.uuid4().hex[
@@ -608,6 +620,11 @@ def _launch(self):
             torchrl_logger.warning("AsyncVLLMEngineService already launched")
             return
 
+        # Local imports to avoid global Ray dependency
+        ray = _get_ray()
+        from ray.util.placement_group import placement_group
+        from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+
         torchrl_logger.info(
             f"Launching {self.num_replicas} async vLLM engine actors..."
         )
@@ -938,6 +955,7 @@ def generate(
         Returns:
             RequestOutput | list[RequestOutput]: Generated outputs from vLLM.
         """
+        ray = _get_ray()
         # Check if this is a batch request
         if self._is_batch(prompts, prompt_token_ids):
             # Handle batched input by unbinding and sending individual requests
@@ -1062,6 +1080,9 @@ def shutdown(self):
             f"Shutting down {len(self.actors)} async vLLM engine actors..."
         )
 
+        ray = _get_ray()
+        from ray.util.placement_group import remove_placement_group
+
         # Kill all actors
         for i, actor in enumerate(self.actors):
             try:
@@ -1254,6 +1275,7 @@ def _update_weights_with_nccl_broadcast_simple(
         )
 
         updated_weights = 0
+        ray = _get_ray()
         with torch.cuda.device(0):  # Ensure we're on the correct CUDA device
             for name, weight in gpu_weights.items():
                 # Convert dtype to string name (like periodic-mono)
@@ -1330,6 +1352,7 @@ def get_num_unfinished_requests(
                 "AsyncVLLM service must be launched before getting request counts"
             )
 
+        ray = _get_ray()
         if actor_index is not None:
             if not (0 <= actor_index < len(self.actors)):
                 raise IndexError(
@@ -1360,6 +1383,7 @@ def get_cache_usage(self, actor_index: int | None = None) -> float | list[float]
                 "AsyncVLLM service must be launched before getting cache usage"
             )
 
+        ray = _get_ray()
         if actor_index is not None:
             if not (0 <= actor_index < len(self.actors)):
                 raise IndexError(
@@ -1672,6 +1696,7 @@ def _select_by_requests(self) -> int:
             futures = [
                 actor.get_num_unfinished_requests.remote() for actor in self.actors
             ]
+            ray = _get_ray()
             request_counts = ray.get(futures)
 
         # Find the actor with minimum pending requests
@@ -1699,6 +1724,7 @@ def _select_by_cache_usage(self) -> int:
         else:
             # Query actors directly
             futures = [actor.get_cache_usage.remote() for actor in self.actors]
+            ray = _get_ray()
             cache_usages = ray.get(futures)
 
         # Find the actor with minimum cache usage
@@ -1838,7 +1864,8 @@ def _is_actor_overloaded(self, actor_index: int) -> bool:
                 futures = [
                     actor.get_num_unfinished_requests.remote() for actor in self.actors
                 ]
-                request_counts = ray.get(futures)
+            ray = _get_ray()
+            request_counts = ray.get(futures)
 
             if not request_counts:
                 return False
@@ -1887,8 +1914,9 @@ def get_stats(self) -> dict[str, Any]:
                 cache_futures = [
                     actor.get_cache_usage.remote() for actor in self.actors
                 ]
-                request_counts = ray.get(request_futures)
-                cache_usages = ray.get(cache_futures)
+            ray = _get_ray()
+            request_counts = ray.get(request_futures)
+            cache_usages = ray.get(cache_futures)
 
             for i, (requests, cache_usage) in enumerate(
                 zip(request_counts, cache_usages)

From 430f95bd1f0e99ceba80546f0b8e975628df99cf Mon Sep 17 00:00:00 2001
From: vmoens <vincentmoens@gmail.com>
Date: Thu, 23 Oct 2025 10:29:22 -0700
Subject: [PATCH 6/6] Update

[ghstack-poisoned]
---
 .github/workflows/test-linux-llm.yml | 10 +++++-----
 test/llm/test_updaters.py            |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml
index 9078e37be36..5f2c4199515 100644
--- a/.github/workflows/test-linux-llm.yml
+++ b/.github/workflows/test-linux-llm.yml
@@ -24,15 +24,15 @@ jobs:
     if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/')) }}
     strategy:
       matrix:
-        python_version: ["3.9"]
-        cuda_arch_version: ["12.8"]
+        python_version: ["3.12"]
+        cuda_arch_version: ["12.9"]
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       repository: pytorch/rl
       runner: "linux.g6.4xlarge.experimental.nvidia.gpu"
       # gpu-arch-type: cuda
       # gpu-arch-version: "11.7"
-      docker-image: "pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel"
+      docker-image: "pytorch/pytorch:2.8.0-cuda12.9-cudnn9-devel"
       timeout: 120
       script: |
         if [[ "${{ github.ref }}" =~ release/* ]]; then
@@ -44,8 +44,8 @@ jobs:
         fi
 
         set -euo pipefail
-        export PYTHON_VERSION="3.9"
-        export CU_VERSION="cu128"
+        export PYTHON_VERSION="3.12"
+        export CU_VERSION="cu129"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
diff --git a/test/llm/test_updaters.py b/test/llm/test_updaters.py
index 02e2efed163..4e9c115f7ba 100644
--- a/test/llm/test_updaters.py
+++ b/test/llm/test_updaters.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-
+from __future__ import annotations
 
 import argparse
 import gc