Auto detect cuda in install script (#15027)

larryliu0820 · web-flow · commit 1a5eaec92dc3 · 2025-10-14T13:15:54.000-07:00
This pull request simplifies the process for building ExecuTorch with
CUDA support by removing the requirement to set the `CMAKE_ARGS`
environment variable. Now, CUDA support is automatically detected and
handled during installation, streamlining both CI workflows and
installation logic. Additionally, related documentation and error
messages have been updated for clarity.

**CI/CD and Installation Process Simplification**
* Removed the need to set `CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"` in
CI scripts and workflow files; `install_executorch.sh` now handles CUDA
detection automatically. (`.ci/scripts/test-cuda-build.sh`,
`.github/workflows/cuda.yml`)
[[1]](diffhunk://#diff-35136b86c3c720f3db97178cee3fda33da5704b4d700d4f82fb367f572f62a95L30-L32)
[[2]](diffhunk://#diff-29abea04e0613c2569973e5c8e3c89e04846d408c855eeb1f3efcfae7cfa6f89L4-R4)
[[3]](diffhunk://#diff-29abea04e0613c2569973e5c8e3c89e04846d408c855eeb1f3efcfae7cfa6f89L46-R46)
[[4]](diffhunk://#diff-29abea04e0613c2569973e5c8e3c89e04846d408c855eeb1f3efcfae7cfa6f89L86-R86)
[[5]](diffhunk://#diff-29abea04e0613c2569973e5c8e3c89e04846d408c855eeb1f3efcfae7cfa6f89L112-R112)
* Deleted the `_is_cuda_enabled()` function from `install_utils.py` and
refactored logic to rely solely on CUDA detection via `nvcc`.
[[1]](diffhunk://#diff-0b2f5cd3e5a14317e108324dd2434d5367fc490e09449a3225c203ef277bf64cL9-L21)
[[2]](diffhunk://#diff-0b2f5cd3e5a14317e108324dd2434d5367fc490e09449a3225c203ef277bf64cL108-R100)
[[3]](diffhunk://#diff-0b2f5cd3e5a14317e108324dd2434d5367fc490e09449a3225c203ef277bf64cL118-R116)

**Error Handling and Messaging Improvements**
* Updated error messages in `install_utils.py` to remove references to
"CUDA delegate" and clarify instructions for users when CUDA is not
detected or supported.

**Internal Refactoring**
* Applied `functools.lru_cache` to `_get_cuda_version()` for more
efficient repeated CUDA version detection.

These changes make the CUDA build process more user-friendly and reduce
the risk of misconfiguration.
diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
@@ -27,9 +27,6 @@ test_executorch_cuda_build() {
     nvcc --version || echo "nvcc not found"
     nvidia-smi || echo "nvidia-smi not found"
 
-    # Set CMAKE_ARGS to enable CUDA build - ExecuTorch will handle PyTorch installation automatically
-    export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
-
     echo "=== Starting ExecuTorch Installation ==="
     # Install ExecuTorch with CUDA support with timeout and error handling
     timeout 5400 ./install_executorch.sh || {
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
@@ -1,7 +1,7 @@
 # Test ExecuTorch CUDA Build Compatibility
 # This workflow tests whether ExecuTorch can be successfully built with CUDA support
 # across different CUDA versions (12.6, 12.8, 12.9) using the command:
-# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
+#   ./install_executorch.sh
 #
 # Note: ExecuTorch automatically detects the system CUDA version using nvcc and
 # installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed.
@@ -43,7 +43,7 @@ jobs:
         set -eux
 
         # Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version
-        # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
+        # and install the appropriate PyTorch wheel
         source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
 
   # This job will fail if any of the CUDA versions fail
@@ -83,7 +83,7 @@ jobs:
       script: |
         set -eux
 
-        PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
+        PYTHON_EXECUTABLE=python ./install_executorch.sh
         export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
         PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
 
@@ -110,7 +110,7 @@ jobs:
         set -eux
 
         echo "::group::Setup ExecuTorch"
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
+        ./install_executorch.sh
         echo "::endgroup::"
 
         echo "::group::Setup Huggingface"
diff --git a/install_utils.py b/install_utils.py
@@ -6,19 +6,12 @@
 # LICENSE file in the root directory of this source tree.
 
 import functools
-import os
 import platform
 import re
 import subprocess
 import sys
 
 
-def _is_cuda_enabled():
-    """Check if CUDA delegate is enabled via CMAKE_ARGS environment variable."""
-    cmake_args = os.environ.get("CMAKE_ARGS", "")
-    return "-DEXECUTORCH_BUILD_CUDA=ON" in cmake_args
-
-
 def _cuda_version_to_pytorch_suffix(major, minor):
     """
     Generate PyTorch CUDA wheel suffix from CUDA version numbers.
@@ -33,6 +26,7 @@ def _cuda_version_to_pytorch_suffix(major, minor):
     return f"cu{major}{minor}"
 
 
+@functools.lru_cache(maxsize=1)
 def _get_cuda_version(supported_cuda_versions):
     """
     Get the CUDA version installed on the system using nvcc command.
@@ -62,25 +56,23 @@ def _get_cuda_version(supported_cuda_versions):
                 )
                 raise RuntimeError(
                     f"Detected CUDA version {major}.{minor} is not supported. "
-                    f"Only the following CUDA versions are supported: {available_versions}. "
-                    f"Please install a supported CUDA version or try on CPU-only delegates."
+                    f"Supported versions: {available_versions}."
                 )
 
             return (major, minor)
         else:
             raise RuntimeError(
-                "CUDA delegate is enabled but could not parse CUDA version from nvcc output. "
-                "Please ensure CUDA is properly installed or try on CPU-only delegates."
+                "Failed to parse CUDA version from nvcc output. "
+                "Ensure CUDA is properly installed."
             )
     except FileNotFoundError:
         raise RuntimeError(
-            "CUDA delegate is enabled but nvcc (CUDA compiler) is not found in PATH. "
-            "Please install CUDA toolkit or try on CPU-only delegates."
+            "nvcc (CUDA compiler) is not found in PATH. Install the CUDA toolkit."
         )
     except subprocess.CalledProcessError as e:
         raise RuntimeError(
-            f"CUDA delegate is enabled but nvcc command failed with error: {e}. "
-            "Please ensure CUDA is properly installed or try on CPU-only delegates."
+            f"nvcc command failed with error: {e}. "
+            "Ensure CUDA is properly installed."
         )
 
 
@@ -105,7 +97,7 @@ def _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base):
 @functools.lru_cache(maxsize=1)
 def determine_torch_url(torch_nightly_url_base, supported_cuda_versions):
     """
-    Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS.
+    Determine the appropriate PyTorch installation URL based on CUDA availability.
     Uses @functools.lru_cache to avoid redundant CUDA detection and print statements.
 
     Args:
@@ -115,15 +107,19 @@ def determine_torch_url(torch_nightly_url_base, supported_cuda_versions):
     Returns:
         URL string for PyTorch packages
     """
-    # Check if CUDA delegate is enabled
-    if not _is_cuda_enabled():
-        print("CUDA delegate not enabled, using CPU-only PyTorch")
+    if platform.system().lower() == "windows":
+        print(
+            "Windows detected, using CPU-only PyTorch until CUDA support is available"
+        )
         return f"{torch_nightly_url_base}/cpu"
 
-    print("CUDA delegate enabled, detecting CUDA version...")
+    print("Attempting to detect CUDA via nvcc...")
 
-    # Get CUDA version
-    cuda_version = _get_cuda_version(supported_cuda_versions)
+    try:
+        cuda_version = _get_cuda_version(supported_cuda_versions)
+    except Exception as err:
+        print(f"CUDA detection failed ({err}), using CPU-only PyTorch")
+        return f"{torch_nightly_url_base}/cpu"
 
     major, minor = cuda_version
     print(f"Detected CUDA version: {major}.{minor}")