-
-
Notifications
You must be signed in to change notification settings - Fork 11.8k
[ROCm][CI] Fixes tests for pytorch nightly and python only builds #28979
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
5d3294e
65e6376
6364378
d3ff04b
16ebdd0
42f4b6c
2722576
a5b0106
0754972
b1f171f
1a55574
f23cf89
72e3a0f
d7776cf
0e62494
2a4c027
aa2a7f7
e8334d9
eda5676
694f8f4
4913b2d
14c82d4
e48274a
315c44e
dc94057
32a944b
84f899b
75f7a93
702a498
f6b2fdb
2ce0caf
788765c
b31f035
a7a09cb
d93f049
146856c
3990acd
9ad6c9d
099d05f
c10401f
18e3c68
880e698
63372cb
8565bf0
db5f486
c662331
564a75b
576aca9
4a7d0fd
57e14cc
2ce5704
c6883f0
d3e751d
322b3bf
dc12400
afe3984
6e49446
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -67,6 +67,7 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite | |
| # ----------------------- | ||
| # Test vLLM image | ||
| FROM base AS test | ||
| ARG PYTHON_VERSION=3.12 | ||
|
|
||
| RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/* | ||
|
|
||
|
|
@@ -86,10 +87,23 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace | |
|
|
||
| # install development dependencies (for testing) | ||
| RUN cd /vllm-workspace \ | ||
| && rm -rf vllm \ | ||
| && python3 -m pip install -e tests/vllm_test_utils \ | ||
| && python3 -m pip install pytest-shard | ||
|
|
||
| # enable fast downloads from hf (for testing) | ||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||
| uv pip install --system hf_transfer | ||
| ENV HF_HUB_ENABLE_HF_TRANSFER=1 | ||
|
|
||
| # Copy in the v1 package for testing (it isn't distributed yet) | ||
| COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 | ||
|
||
|
|
||
| # Source code is used in the `python_only_compile.sh` test | ||
| # We hide it inside `src/` so that this source code | ||
| # will not be imported by other tests | ||
| RUN mkdir src | ||
| RUN mv vllm src/vllm | ||
|
||
|
|
||
| # ----------------------- | ||
| # Final vLLM image | ||
| FROM base AS final | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,8 @@ ARG PYTORCH_BRANCH="1c57644d" | |
| ARG PYTORCH_VISION_BRANCH="v0.23.0" | ||
| ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git" | ||
| ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" | ||
| ARG PYTORCH_AUDIO_BRANCH="v2.9.0" | ||
| ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git" | ||
| ARG FA_BRANCH="0e60e394" | ||
| ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git" | ||
| ARG AITER_BRANCH="59bd8ff2" | ||
|
|
@@ -45,6 +47,7 @@ RUN apt-get update -y \ | |
| && python3 --version && python3 -m pip --version | ||
|
|
||
| RUN pip install -U packaging 'cmake<4' ninja wheel 'setuptools<80' pybind11 Cython | ||
| RUN apt-get install -y libjpeg-dev libsox-dev libsox-fmt-all sox | ||
|
||
|
|
||
| FROM base AS build_triton | ||
| ARG TRITON_BRANCH | ||
|
|
@@ -66,8 +69,11 @@ RUN mkdir -p /app/install && cp /opt/rocm/share/amd_smi/dist/*.whl /app/install | |
| FROM base AS build_pytorch | ||
| ARG PYTORCH_BRANCH | ||
| ARG PYTORCH_VISION_BRANCH | ||
| ARG PYTORCH_AUDIO_BRANCH | ||
| ARG PYTORCH_REPO | ||
| ARG PYTORCH_VISION_REPO | ||
| ARG PYTORCH_AUDIO_REPO | ||
|
|
||
| RUN git clone ${PYTORCH_REPO} pytorch | ||
| RUN cd pytorch && git checkout ${PYTORCH_BRANCH} && \ | ||
| pip install -r requirements.txt && git submodule update --init --recursive \ | ||
|
|
@@ -78,8 +84,15 @@ RUN git clone ${PYTORCH_VISION_REPO} vision | |
| RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \ | ||
| && python3 setup.py bdist_wheel --dist-dir=dist \ | ||
| && pip install dist/*.whl | ||
| RUN git clone ${PYTORCH_AUDIO_REPO} audio | ||
| RUN cd audio && git checkout ${PYTORCH_AUDIO_BRANCH} \ | ||
| && git submodule update --init --recursive \ | ||
| && pip install -r requirements.txt \ | ||
| && python3 setup.py bdist_wheel --dist-dir=dist \ | ||
| && pip install dist/*.whl | ||
|
Comment on lines
88
to
93
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To reduce the number of Docker image layers and improve build efficiency, it's recommended to combine the |
||
| RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \ | ||
| && cp /app/vision/dist/*.whl /app/install | ||
| && cp /app/vision/dist/*.whl /app/install \ | ||
| && cp /app/audio/dist/*.whl /app/install | ||
|
|
||
| FROM base AS build_fa | ||
| ARG FA_BRANCH | ||
|
|
@@ -130,6 +143,8 @@ ARG PYTORCH_BRANCH | |
| ARG PYTORCH_VISION_BRANCH | ||
| ARG PYTORCH_REPO | ||
| ARG PYTORCH_VISION_REPO | ||
| ARG PYTORCH_AUDIO_BRANCH | ||
| ARG PYTORCH_AUDIO_REPO | ||
| ARG FA_BRANCH | ||
| ARG FA_REPO | ||
| ARG AITER_BRANCH | ||
|
|
@@ -141,6 +156,8 @@ RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \ | |
| && echo "PYTORCH_VISION_BRANCH: ${PYTORCH_VISION_BRANCH}" >> /app/versions.txt \ | ||
| && echo "PYTORCH_REPO: ${PYTORCH_REPO}" >> /app/versions.txt \ | ||
| && echo "PYTORCH_VISION_REPO: ${PYTORCH_VISION_REPO}" >> /app/versions.txt \ | ||
| && echo "PYTORCH_AUDIO_BRANCH: ${PYTORCH_AUDIO_BRANCH}" >> /app/versions.txt \ | ||
| && echo "PYTORCH_AUDIO_REPO: ${PYTORCH_AUDIO_REPO}" >> /app/versions.txt \ | ||
| && echo "FA_BRANCH: ${FA_BRANCH}" >> /app/versions.txt \ | ||
| && echo "FA_REPO: ${FA_REPO}" >> /app/versions.txt \ | ||
| && echo "AITER_BRANCH: ${AITER_BRANCH}" >> /app/versions.txt \ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,15 +49,15 @@ def load_module_from_path(module_name, path): | |
| sys.platform, | ||
| ) | ||
| VLLM_TARGET_DEVICE = "empty" | ||
| elif ( | ||
| sys.platform.startswith("linux") | ||
| and torch.version.cuda is None | ||
| and os.getenv("VLLM_TARGET_DEVICE") is None | ||
| and torch.version.hip is None | ||
| ): | ||
| # if cuda or hip is not available and VLLM_TARGET_DEVICE is not set, | ||
| # fallback to cpu | ||
| VLLM_TARGET_DEVICE = "cpu" | ||
| elif sys.platform.startswith("linux") and os.getenv("VLLM_TARGET_DEVICE") is None: | ||
| if torch.version.hip is not None: | ||
| VLLM_TARGET_DEVICE = "rocm" | ||
| logger.info("Auto-detected ROCm") | ||
| elif torch.version.cuda is not None: | ||
| VLLM_TARGET_DEVICE = "cuda" | ||
| logger.info("Auto-detected CUDA") | ||
| else: | ||
| VLLM_TARGET_DEVICE = "cpu" | ||
|
|
||
|
|
||
| def is_sccache_available() -> bool: | ||
|
|
@@ -115,20 +115,26 @@ def compute_num_jobs(self): | |
| num_jobs = os.cpu_count() | ||
|
|
||
| nvcc_threads = None | ||
| if _is_cuda() and get_nvcc_cuda_version() >= Version("11.2"): | ||
| # `nvcc_threads` is either the value of the NVCC_THREADS | ||
| # environment variable (if defined) or 1. | ||
| # when it is set, we reduce `num_jobs` to avoid | ||
| # overloading the system. | ||
| nvcc_threads = envs.NVCC_THREADS | ||
| if nvcc_threads is not None: | ||
| nvcc_threads = int(nvcc_threads) | ||
| logger.info( | ||
| "Using NVCC_THREADS=%d as the number of nvcc threads.", nvcc_threads | ||
| ) | ||
| else: | ||
| nvcc_threads = 1 | ||
| num_jobs = max(1, num_jobs // nvcc_threads) | ||
| if _is_cuda() and CUDA_HOME is not None: | ||
| try: | ||
| nvcc_version = get_nvcc_cuda_version() | ||
| if nvcc_version >= Version("11.2"): | ||
| # `nvcc_threads` is either the value of the NVCC_THREADS | ||
| # environment variable (if defined) or 1. | ||
| # when it is set, we reduce `num_jobs` to avoid | ||
| # overloading the system. | ||
| nvcc_threads = envs.NVCC_THREADS | ||
| if nvcc_threads is not None: | ||
| nvcc_threads = int(nvcc_threads) | ||
| logger.info( | ||
| "Using NVCC_THREADS=%d as the number of nvcc threads.", | ||
| nvcc_threads, | ||
| ) | ||
| else: | ||
| nvcc_threads = 1 | ||
| num_jobs = max(1, num_jobs // nvcc_threads) | ||
| except Exception as e: | ||
| logger.warning("Failed to get NVCC version: %s", e) | ||
|
|
||
| return num_jobs, nvcc_threads | ||
|
|
||
|
|
@@ -206,9 +212,9 @@ def configure(self, ext: CMakeExtension) -> None: | |
| # Default build tool to whatever cmake picks. | ||
| build_tool = [] | ||
| # Make sure we use the nvcc from CUDA_HOME | ||
| if _is_cuda(): | ||
| if _is_cuda() and CUDA_HOME is not None: | ||
| cmake_args += [f"-DCMAKE_CUDA_COMPILER={CUDA_HOME}/bin/nvcc"] | ||
| elif _is_hip(): | ||
| elif _is_hip() and ROCM_HOME is not None: | ||
| cmake_args += [f"-DROCM_PATH={ROCM_HOME}"] | ||
|
|
||
| other_cmake_args = os.environ.get("CMAKE_ARGS") | ||
|
|
@@ -318,7 +324,9 @@ class precompiled_build_ext(build_ext): | |
| """Disables extension building when using precompiled binaries.""" | ||
|
|
||
| def run(self) -> None: | ||
| assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" | ||
| assert _is_cuda() or _is_hip(), ( | ||
| "VLLM_USE_PRECOMPILED is only supported for CUDA or ROCm builds." | ||
| ) | ||
|
|
||
| def build_extensions(self) -> None: | ||
| print("Skipping build_ext: using precompiled extensions.") | ||
|
|
@@ -490,6 +498,8 @@ def get_rocm_version(): | |
| # Get the Rocm version from the ROCM_HOME/bin/librocm-core.so | ||
| # see https://github.com/ROCm/rocm-core/blob/d11f5c20d500f729c393680a01fa902ebf92094b/rocm_version.cpp#L21 | ||
| try: | ||
| if ROCM_HOME is None: | ||
| return None | ||
| librocm_core_file = Path(ROCM_HOME) / "lib" / "librocm-core.so" | ||
| if not librocm_core_file.is_file(): | ||
| return None | ||
|
|
@@ -656,7 +666,9 @@ def _read_requirements(filename: str) -> list[str]: | |
|
|
||
| if _is_cuda(): | ||
| ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa2_C")) | ||
| if envs.VLLM_USE_PRECOMPILED or get_nvcc_cuda_version() >= Version("12.3"): | ||
| if envs.VLLM_USE_PRECOMPILED or ( | ||
| CUDA_HOME and get_nvcc_cuda_version() >= Version("12.3") | ||
| ): | ||
| # FA3 requires CUDA 12.3 or later | ||
| ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa3_C")) | ||
| # Optional since this doesn't get built (produce an .so file) when | ||
|
|
@@ -679,7 +691,9 @@ def _read_requirements(filename: str) -> list[str]: | |
|
|
||
| # If using precompiled, extract and patch package_data (in advance of setup) | ||
| if envs.VLLM_USE_PRECOMPILED: | ||
| assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" | ||
| assert _is_cuda() or _is_hip(), ( | ||
| "VLLM_USE_PRECOMPILED is only supported for CUDA or ROCm builds." | ||
|
||
| ) | ||
| wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None) | ||
| if wheel_location is not None: | ||
| wheel_url = wheel_location | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is being set in the base docker. A better way would be to set it as an env there and inherit in this image