From f5920a56b2a79662401f0d3594ebeb0ba4c91b99 Mon Sep 17 00:00:00 2001 From: vmoens Date: Wed, 22 Oct 2025 12:26:16 -0700 Subject: [PATCH 1/6] Update [ghstack-poisoned] --- .../{linux_libs => llm}/scripts_llm/environment.yml | 0 .../{linux_libs => llm}/scripts_llm/install.sh | 0 .../{linux_libs => llm}/scripts_llm/post_process.sh | 0 .../scripts_llm/run-clang-format.py | 0 .../{linux_libs => llm}/scripts_llm/run_test.sh | 12 +----------- .../{linux_libs => llm}/scripts_llm/setup_env.sh | 0 .github/workflows/test-linux-llm.yml | 1 + 7 files changed, 2 insertions(+), 11 deletions(-) rename .github/unittest/{linux_libs => llm}/scripts_llm/environment.yml (100%) rename .github/unittest/{linux_libs => llm}/scripts_llm/install.sh (100%) rename .github/unittest/{linux_libs => llm}/scripts_llm/post_process.sh (100%) rename .github/unittest/{linux_libs => llm}/scripts_llm/run-clang-format.py (100%) rename .github/unittest/{linux_libs => llm}/scripts_llm/run_test.sh (59%) rename .github/unittest/{linux_libs => llm}/scripts_llm/setup_env.sh (100%) diff --git a/.github/unittest/linux_libs/scripts_llm/environment.yml b/.github/unittest/llm/scripts_llm/environment.yml similarity index 100% rename from .github/unittest/linux_libs/scripts_llm/environment.yml rename to .github/unittest/llm/scripts_llm/environment.yml diff --git a/.github/unittest/linux_libs/scripts_llm/install.sh b/.github/unittest/llm/scripts_llm/install.sh similarity index 100% rename from .github/unittest/linux_libs/scripts_llm/install.sh rename to .github/unittest/llm/scripts_llm/install.sh diff --git a/.github/unittest/linux_libs/scripts_llm/post_process.sh b/.github/unittest/llm/scripts_llm/post_process.sh similarity index 100% rename from .github/unittest/linux_libs/scripts_llm/post_process.sh rename to .github/unittest/llm/scripts_llm/post_process.sh diff --git a/.github/unittest/linux_libs/scripts_llm/run-clang-format.py b/.github/unittest/llm/scripts_llm/run-clang-format.py similarity index 100% rename from .github/unittest/linux_libs/scripts_llm/run-clang-format.py rename to .github/unittest/llm/scripts_llm/run-clang-format.py diff --git a/.github/unittest/linux_libs/scripts_llm/run_test.sh b/.github/unittest/llm/scripts_llm/run_test.sh similarity index 59% rename from .github/unittest/linux_libs/scripts_llm/run_test.sh rename to .github/unittest/llm/scripts_llm/run_test.sh index ac60ae37f1e..bf811b01eb6 100644 --- a/.github/unittest/linux_libs/scripts_llm/run_test.sh +++ b/.github/unittest/llm/scripts_llm/run_test.sh @@ -23,14 +23,4 @@ lib_dir="${env_dir}/lib" conda deactivate && conda activate ./env -python -c "import transformers, datasets" - -pytest test/test_rlhf.py --instafail -v --durations 200 --capture no --error-for-skips - -python examples/rlhf/train_rlhf.py \ - sys.device=cuda:0 sys.ref_device=cuda:0 \ - model.name_or_path=gpt2 train.max_epochs=2 \ - data.batch_size=2 train.ppo.ppo_batch_size=2 \ - train.ppo.ppo_num_epochs=1 reward_model.name_or_path= \ - train.ppo.episode_length=8 train.ppo.num_rollouts_per_epoch=4 \ - data.block_size=110 io.logger=csv +pytest test/llm -vvv --instafail --durations 600 --capture no --error-for-skips diff --git a/.github/unittest/linux_libs/scripts_llm/setup_env.sh b/.github/unittest/llm/scripts_llm/setup_env.sh similarity index 100% rename from .github/unittest/linux_libs/scripts_llm/setup_env.sh rename to .github/unittest/llm/scripts_llm/setup_env.sh diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml index 4de8b8165d9..0d040f756c9 100644 --- a/.github/workflows/test-linux-llm.yml +++ b/.github/workflows/test-linux-llm.yml @@ -21,6 +21,7 @@ permissions: jobs: unittests: + if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'llm/') }} strategy: matrix: python_version: ["3.9"] From ec8b1cd2ea32d4a33bd3fd6e58b4bb7de1e74502 Mon Sep 17 00:00:00 2001 From: vmoens Date: Wed, 22 Oct 2025 12:31:54 -0700 Subject: [PATCH 2/6] Update [ghstack-poisoned] --- .github/workflows/test-linux-llm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml index 0d040f756c9..e43a1522076 100644 --- a/.github/workflows/test-linux-llm.yml +++ b/.github/workflows/test-linux-llm.yml @@ -29,7 +29,7 @@ jobs: uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: repository: pytorch/rl - runner: "linux.g5.4xlarge.nvidia.gpu" + runner: "linux.g6.4xlarge.experimental.nvidia.gpu" # gpu-arch-type: cuda # gpu-arch-version: "11.7" docker-image: "nvidia/cudagl:11.4.0-base" From 8f59fcde00b02c39410352c78dfad56ac81c4d8c Mon Sep 17 00:00:00 2001 From: vmoens Date: Wed, 22 Oct 2025 12:35:04 -0700 Subject: [PATCH 3/6] Update [ghstack-poisoned] --- .github/workflows/test-linux-llm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml index e43a1522076..f738f66b76c 100644 --- a/.github/workflows/test-linux-llm.yml +++ b/.github/workflows/test-linux-llm.yml @@ -21,7 +21,7 @@ permissions: jobs: unittests: - if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'llm/') }} + if: ${{ github.event_name == 'push' || contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/') }} strategy: matrix: python_version: ["3.9"] From 44807c622511a4ef1ca7213525ed018619777d54 Mon Sep 17 00:00:00 2001 From: vmoens Date: Wed, 22 Oct 2025 13:29:19 -0700 Subject: [PATCH 4/6] Update [ghstack-poisoned] --- .github/workflows/test-linux-llm.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml index f738f66b76c..08d76736a70 100644 --- a/.github/workflows/test-linux-llm.yml +++ b/.github/workflows/test-linux-llm.yml @@ -21,7 +21,7 @@ permissions: jobs: unittests: - if: ${{ github.event_name == 'push' || contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/') }} + if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/')) }} strategy: matrix: python_version: ["3.9"] @@ -51,7 +51,7 @@ jobs: export TF_CPP_MIN_LOG_LEVEL=0 export TD_GET_DEFAULTS_TO_NONE=1 - bash .github/unittest/linux_libs/scripts_llm/setup_env.sh - bash .github/unittest/linux_libs/scripts_llm/install.sh - bash .github/unittest/linux_libs/scripts_llm/run_test.sh - bash .github/unittest/linux_libs/scripts_llm/post_process.sh + bash .github/unittest/llm/scripts_llm/setup_env.sh + bash .github/unittest/llm/scripts_llm/install.sh + bash .github/unittest/llm/scripts_llm/run_test.sh + bash .github/unittest/llm/scripts_llm/post_process.sh From 8ba64888b93a99d66985e6eb414e1715849208b4 Mon Sep 17 00:00:00 2001 From: vmoens Date: Wed, 22 Oct 2025 17:30:30 -0700 Subject: [PATCH 5/6] Update [ghstack-poisoned] --- .github/unittest/llm/scripts_llm/setup_env.sh | 12 +++- .../modules/llm/backends/vllm/vllm_async.py | 62 ++++++++++++++----- 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/.github/unittest/llm/scripts_llm/setup_env.sh b/.github/unittest/llm/scripts_llm/setup_env.sh index 53dfc0bd50b..345e520c195 100644 --- a/.github/unittest/llm/scripts_llm/setup_env.sh +++ b/.github/unittest/llm/scripts_llm/setup_env.sh @@ -6,10 +6,13 @@ # Do not install PyTorch and torchvision here, otherwise they also get cached. set -e -apt-get update && apt-get upgrade -y && apt-get install -y git cmake +export DEBIAN_FRONTEND=noninteractive +export TZ=UTC +apt-get update +apt-get install -yq --no-install-recommends git cmake # Avoid error: "fatal: unsafe repository" git config --global --add safe.directory '*' -apt-get install -y wget \ +apt-get install -yq --no-install-recommends wget \ gcc \ g++ \ unzip \ @@ -27,7 +30,10 @@ apt-get install -y wget \ libgles2 # Upgrade specific package -apt-get upgrade -y libstdc++6 +apt-get install -yq --no-install-recommends --only-upgrade libstdc++6 + +apt-get clean +rm -rf /var/lib/apt/lists/* this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" root_dir="$(git rev-parse --show-toplevel)" diff --git a/torchrl/modules/llm/backends/vllm/vllm_async.py b/torchrl/modules/llm/backends/vllm/vllm_async.py index cc5dc0c9cd2..7b74018a7ca 100644 --- a/torchrl/modules/llm/backends/vllm/vllm_async.py +++ b/torchrl/modules/llm/backends/vllm/vllm_async.py @@ -20,12 +20,9 @@ from concurrent.futures import ThreadPoolExecutor, wait from typing import Any, Literal, TYPE_CHECKING -import ray import torch -from ray.util.placement_group import placement_group, remove_placement_group -from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy from torchrl._utils import logger as torchrl_logger # Import RLvLLMEngine and shared utilities @@ -43,6 +40,24 @@ TIMEOUT_SECONDS = os.getenv("TORCHRL_VLLM_TIMEOUT_SECONDS", 300) +def _get_ray(): + """Import Ray on demand to avoid global import side-effects. + + Returns: + ModuleType: The imported Ray module. + + Raises: + ImportError: If Ray is not installed. + """ + try: + import ray # type: ignore + + return ray + except Exception as e: # pragma: no cover - surfaced to callers + raise ImportError( + "ray is not installed. Please install it with `pip install ray`." + ) from e + class _AsyncvLLMWorker: """Async vLLM worker for Ray with weight update capabilities. @@ -267,7 +282,7 @@ async def generate( "vllm is not installed. Please install it with `pip install vllm`." ) - from vllm import RequestOutput, SamplingParams, TokensPrompt + from vllm import SamplingParams, TokensPrompt # Track whether input was originally a single prompt single_prompt_input = False @@ -474,11 +489,7 @@ def _gpus_per_replica(engine_args: AsyncEngineArgs) -> int: ) -# Create Ray remote versions -if ray is not None and _has_vllm: - _AsyncLLMEngineActor = ray.remote(num_cpus=0, num_gpus=0)(_AsyncLLMEngine) -else: - _AsyncLLMEngineActor = None +# Ray actor wrapper is created lazily in __init__ to avoid global Ray import. class AsyncVLLM(RLvLLMEngine): @@ -583,17 +594,18 @@ def __init__( raise ImportError( "vllm is not installed. Please install it with `pip install vllm`." ) - if ray is None: - raise ImportError( - "ray is not installed. Please install it with `pip install ray`." - ) + # Lazily import ray only when constructing the actor class to avoid global import # Enable prefix caching by default for better performance engine_args.enable_prefix_caching = enable_prefix_caching self.engine_args = engine_args self.num_replicas = num_replicas - self.actor_class = actor_class or _AsyncLLMEngineActor + if actor_class is None: + ray = _get_ray() + self.actor_class = ray.remote(num_cpus=0, num_gpus=0)(_AsyncLLMEngine) + else: + self.actor_class = actor_class self.actors: list = [] self._launched = False self._service_id = uuid.uuid4().hex[ @@ -608,6 +620,11 @@ def _launch(self): torchrl_logger.warning("AsyncVLLMEngineService already launched") return + # Local imports to avoid global Ray dependency + ray = _get_ray() + from ray.util.placement_group import placement_group + from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy + torchrl_logger.info( f"Launching {self.num_replicas} async vLLM engine actors..." ) @@ -938,6 +955,7 @@ def generate( Returns: RequestOutput | list[RequestOutput]: Generated outputs from vLLM. """ + ray = _get_ray() # Check if this is a batch request if self._is_batch(prompts, prompt_token_ids): # Handle batched input by unbinding and sending individual requests @@ -1062,6 +1080,9 @@ def shutdown(self): f"Shutting down {len(self.actors)} async vLLM engine actors..." ) + ray = _get_ray() + from ray.util.placement_group import remove_placement_group + # Kill all actors for i, actor in enumerate(self.actors): try: @@ -1254,6 +1275,7 @@ def _update_weights_with_nccl_broadcast_simple( ) updated_weights = 0 + ray = _get_ray() with torch.cuda.device(0): # Ensure we're on the correct CUDA device for name, weight in gpu_weights.items(): # Convert dtype to string name (like periodic-mono) @@ -1330,6 +1352,7 @@ def get_num_unfinished_requests( "AsyncVLLM service must be launched before getting request counts" ) + ray = _get_ray() if actor_index is not None: if not (0 <= actor_index < len(self.actors)): raise IndexError( @@ -1360,6 +1383,7 @@ def get_cache_usage(self, actor_index: int | None = None) -> float | list[float] "AsyncVLLM service must be launched before getting cache usage" ) + ray = _get_ray() if actor_index is not None: if not (0 <= actor_index < len(self.actors)): raise IndexError( @@ -1672,6 +1696,7 @@ def _select_by_requests(self) -> int: futures = [ actor.get_num_unfinished_requests.remote() for actor in self.actors ] + ray = _get_ray() request_counts = ray.get(futures) # Find the actor with minimum pending requests @@ -1699,6 +1724,7 @@ def _select_by_cache_usage(self) -> int: else: # Query actors directly futures = [actor.get_cache_usage.remote() for actor in self.actors] + ray = _get_ray() cache_usages = ray.get(futures) # Find the actor with minimum cache usage @@ -1838,7 +1864,8 @@ def _is_actor_overloaded(self, actor_index: int) -> bool: futures = [ actor.get_num_unfinished_requests.remote() for actor in self.actors ] - request_counts = ray.get(futures) + ray = _get_ray() + request_counts = ray.get(futures) if not request_counts: return False @@ -1887,8 +1914,9 @@ def get_stats(self) -> dict[str, Any]: cache_futures = [ actor.get_cache_usage.remote() for actor in self.actors ] - request_counts = ray.get(request_futures) - cache_usages = ray.get(cache_futures) + ray = _get_ray() + request_counts = ray.get(request_futures) + cache_usages = ray.get(cache_futures) for i, (requests, cache_usage) in enumerate( zip(request_counts, cache_usages) From 430f95bd1f0e99ceba80546f0b8e975628df99cf Mon Sep 17 00:00:00 2001 From: vmoens Date: Thu, 23 Oct 2025 10:29:22 -0700 Subject: [PATCH 6/6] Update [ghstack-poisoned] --- .github/workflows/test-linux-llm.yml | 10 +++++----- test/llm/test_updaters.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml index 9078e37be36..5f2c4199515 100644 --- a/.github/workflows/test-linux-llm.yml +++ b/.github/workflows/test-linux-llm.yml @@ -24,15 +24,15 @@ jobs: if: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ', '), 'llm/')) }} strategy: matrix: - python_version: ["3.9"] - cuda_arch_version: ["12.8"] + python_version: ["3.12"] + cuda_arch_version: ["12.9"] uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: repository: pytorch/rl runner: "linux.g6.4xlarge.experimental.nvidia.gpu" # gpu-arch-type: cuda # gpu-arch-version: "11.7" - docker-image: "pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel" + docker-image: "pytorch/pytorch:2.8.0-cuda12.9-cudnn9-devel" timeout: 120 script: | if [[ "${{ github.ref }}" =~ release/* ]]; then @@ -44,8 +44,8 @@ jobs: fi set -euo pipefail - export PYTHON_VERSION="3.9" - export CU_VERSION="cu128" + export PYTHON_VERSION="3.12" + export CU_VERSION="cu129" export TAR_OPTIONS="--no-same-owner" export UPLOAD_CHANNEL="nightly" export TF_CPP_MIN_LOG_LEVEL=0 diff --git a/test/llm/test_updaters.py b/test/llm/test_updaters.py index 02e2efed163..4e9c115f7ba 100644 --- a/test/llm/test_updaters.py +++ b/test/llm/test_updaters.py @@ -2,7 +2,7 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. - +from __future__ import annotations import argparse import gc