VectorInstitute
diff --git a/‎.github/workflows/code_checks.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/code_checks.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/docs.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/docs.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/unit_tests.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/unit_tests.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎Dockerfile‎
Lines changed: 12 additions & 8 deletions b/‎Dockerfile‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎MODEL_TRACKING.md‎
Lines changed: 1 addition & 0 deletions b/‎MODEL_TRACKING.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 4 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎tests/vec_inf/cli/test_cli.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/vec_inf/cli/test_cli.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/vec_inf/cli/test_helper.py‎
Lines changed: 2 additions & 0 deletions b/‎tests/vec_inf/cli/test_helper.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/vec_inf/client/test_slurm_script_generator.py‎
Lines changed: 7 additions & 8 deletions b/‎tests/vec_inf/client/test_slurm_script_generator.py‎
Lines changed: 7 additions & 8 deletions
@@ -40,7 +40,7 @@ jobs:
         with:
           python-version-file: ".python-version"
       - name: Install the project
-        run: uv sync --dev
+        run: uv sync --dev --prerelease=allow
       - name: Install dependencies and check code
         run: |
           source .venv/bin/activate
 
@@ -67,10 +67,10 @@ jobs:
           python-version-file: ".python-version"
 
       - name: Install the project
-        run: uv sync --all-extras --group docs
+        run: uv sync --all-extras --group docs --prerelease=allow
 
       - name: Build docs
-        run: uv run mkdocs build
+        run: uv run --frozen mkdocs build
 
       - name: Create .nojekyll file
         run: touch site/.nojekyll
@@ -104,7 +104,7 @@ jobs:
           python-version-file: ".python-version"
 
       - name: Install the project
-        run: uv sync --all-extras --group docs
+        run: uv sync --all-extras --group docs --frozen
 
       - name: Configure Git Credentials
         run: |
 
@@ -58,18 +58,18 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Install the project
-        run: uv sync --dev
+        run: uv sync --dev --prerelease=allow
 
       - name: Install dependencies and check code
         run: |
-          uv run pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
+          uv run --frozen pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
 
       - name: Install the core package only
         run: uv sync --no-dev
 
       - name: Run package import tests
         run: |
-          uv run pytest tests/test_imports.py
+          uv run --frozen pytest tests/test_imports.py
 
       - name: Import Codecov GPG public key
         run: |
@@ -79,7 +79,7 @@ jobs:
         uses: codecov/codecov-action@v5.5.1
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
-          file: ./coverage.xml
+          files: ./coverage.xml
           name: codecov-umbrella
           fail_ci_if_error: true
           verbose: true
@@ -35,29 +35,33 @@ RUN wget https://bootstrap.pypa.io/get-pip.py && \
     rm get-pip.py && \
     python3.10 -m pip install --upgrade pip setuptools wheel uv
 
-# Install Infiniband/RDMA support
+# Install RDMA support
 RUN apt-get update && apt-get install -y \
     libibverbs1 libibverbs-dev ibverbs-utils \
     librdmacm1 librdmacm-dev rdmacm-utils \
+    rdma-core ibverbs-providers infiniband-diags perftest \
     && rm -rf /var/lib/apt/lists/*
 
 # Set up RDMA environment (these will persist in the final container)
 ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
-ENV UCX_NET_DEVICES=all
 ENV NCCL_IB_DISABLE=0
+ENV NCCL_SOCKET_IFNAME="^lo,docker0"
+ENV NCCL_NET_GDR_LEVEL=PHB
+ENV NCCL_IB_TIMEOUT=22
+ENV NCCL_IB_RETRY_CNT=7
+ENV NCCL_DEBUG=INFO
 
 # Set up project
 WORKDIR /vec-inf
 COPY . /vec-inf
 
 # Install project dependencies with build requirements
-RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu128" uv pip install --system -e .[dev]
+RUN uv pip install --system -e .[dev] --prerelease=allow
 
-# Final configuration
-RUN mkdir -p /vec-inf/nccl && \
-    mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
-ENV VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
-ENV NCCL_DEBUG=INFO
+# Install a single, system NCCL (from NVIDIA CUDA repo in base image)
+RUN apt-get update && apt-get install -y --allow-change-held-packages\
+    libnccl2 libnccl-dev \
+    && rm -rf /var/lib/apt/lists/*
 
 # Set the default command to start an interactive shell
 CMD ["bash"]
@@ -40,6 +40,7 @@ This document tracks all model weights available in the `/model-weights` directo
 | `gemma-2b-it` | ❌ |
 | `gemma-7b` | ❌ |
 | `gemma-7b-it` | ❌ |
+| `gemma-2-2b-it` | ✅ |
 | `gemma-2-9b` | ✅ |
 | `gemma-2-9b-it` | ✅ |
 | `gemma-2-27b` | ✅ |
 
@@ -42,9 +42,10 @@ dev = [
     "xgrammar>=0.1.11",
     "torch>=2.7.0",
     "vllm>=0.10.0",
-    "vllm-nccl-cu12>=2.18,<2.19",
-    "ray>=2.40.0",
-    "cupy-cuda12x==12.1.0"
+    "ray[default]>=2.50.0",
+    "cupy-cuda12x==12.1.0",
+    "flashinfer-python>=0.4.0",
+    "sglang>=0.5.0",
 ]
 
 [project.scripts]
 
@@ -39,6 +39,7 @@ def test_launch_command_success(runner):
             "mem_per_node": "32G",
             "model_weights_parent_dir": "/model-weights",
             "vocab_size": "128000",
+            "venv": "/path/to/venv",
             "vllm_args": {"max_model_len": 8192},
             "env": {"CACHE": "/cache"},
         }
 
@@ -35,6 +35,7 @@ def test_format_table_output(self):
             "mem_per_node": "32G",
             "model_weights_parent_dir": "/model-weights",
             "log_dir": "/tmp/logs",
+            "venv": "/path/to/venv",
             "vllm_args": {"max_model_len": 8192, "enable_prefix_caching": True},
             "env": {"CACHE": "/cache"},
         }
@@ -63,6 +64,7 @@ def test_format_table_output_with_minimal_params(self):
             "mem_per_node": "16G",
             "model_weights_parent_dir": "/weights",
             "log_dir": "/logs",
+            "venv": "/path/to/venv",
             "vllm_args": {},
             "env": {},
         }
 
@@ -53,7 +53,7 @@ def singularity_params(self, basic_params):
         singularity = basic_params.copy()
         singularity.update(
             {
-                "venv": "singularity",
+                "venv": "apptainer",
                 "bind": "/scratch:/scratch,/data:/data",
                 "env": {
                     "CACHE_DIR": "/cache",
@@ -109,7 +109,7 @@ def test_init_singularity(self, singularity_params):
     def test_init_singularity_no_bind(self, basic_params):
         """Test Singularity initialization without additional binds."""
         params = basic_params.copy()
-        params["venv"] = "singularity"
+        params["venv"] = "apptainer"
         generator = SlurmScriptGenerator(params)
 
         assert generator.params == params
@@ -173,7 +173,6 @@ def test_generate_launch_cmd_venv(self, basic_params):
         generator = SlurmScriptGenerator(basic_params)
         launch_cmd = generator._generate_launch_cmd()
 
-        assert "source /path/to/venv/bin/activate" in launch_cmd
         assert "vllm serve /path/to/model_weights/test-model" in launch_cmd
         assert "--served-model-name test-model" in launch_cmd
         assert "--tensor-parallel-size 4" in launch_cmd
@@ -185,7 +184,7 @@ def test_generate_launch_cmd_singularity(self, singularity_params):
         generator = SlurmScriptGenerator(singularity_params)
         launch_cmd = generator._generate_launch_cmd()
 
-        assert "exec --nv" in launch_cmd
+        assert "apptainer exec --nv" in launch_cmd
         assert "--bind /path/to/model_weights/test-model" in launch_cmd
         assert "--bind /scratch:/scratch,/data:/data" in launch_cmd
         assert "source" not in launch_cmd
@@ -306,9 +305,9 @@ def batch_params(self):
     def batch_singularity_params(self, batch_params):
         """Generate batch SLURM configuration parameters with Singularity."""
         singularity_params = batch_params.copy()
-        singularity_params["venv"] = "singularity"  # Set top-level venv to singularity
+        singularity_params["venv"] = "apptainer"  # Set top-level venv to apptainer
         for model_name in singularity_params["models"]:
-            singularity_params["models"][model_name]["venv"] = "singularity"
+            singularity_params["models"][model_name]["venv"] = "apptainer"
             singularity_params["models"][model_name]["bind"] = (
                 "/scratch:/scratch,/data:/data"
             )
@@ -341,9 +340,9 @@ def test_init_singularity(self, batch_singularity_params):
     def test_init_singularity_no_bind(self, batch_params):
         """Test Singularity initialization without additional binds."""
         params = batch_params.copy()
-        params["venv"] = "singularity"  # Set top-level venv to singularity
+        params["venv"] = "apptainer"  # Set top-level venv to apptainer
         for model_name in params["models"]:
-            params["models"][model_name]["venv"] = "singularity"
+            params["models"][model_name]["venv"] = "apptainer"
 
         generator = BatchSlurmScriptGenerator(params)
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@ def test_launch_command_success(runner):`
`39`	`39`	`"mem_per_node": "32G",`
`40`	`40`	`"model_weights_parent_dir": "/model-weights",`
`41`	`41`	`"vocab_size": "128000",`
	`42`	`+ "venv": "/path/to/venv",`
`42`	`43`	`"vllm_args": {"max_model_len": 8192},`
`43`	`44`	`"env": {"CACHE": "/cache"},`
`44`	`45`	`}`