From f4d8e385a4ddf023ddeda69b8dd008da530eddf3 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Wed, 7 May 2025 10:38:06 +0100 Subject: [PATCH 1/7] Added Intel XPU support --- .github/workflows/build-push-vllm-xpu.yml | 43 +++++++++++++++++++ .../azimuth-llm/templates/api/deployment.yml | 7 ++- charts/azimuth-llm/values.yaml | 5 ++- 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/build-push-vllm-xpu.yml diff --git a/.github/workflows/build-push-vllm-xpu.yml b/.github/workflows/build-push-vllm-xpu.yml new file mode 100644 index 0000000..025c47f --- /dev/null +++ b/.github/workflows/build-push-vllm-xpu.yml @@ -0,0 +1,43 @@ +name: Publish vLLM XPU images + +on: + # NOTE(sd109): Since this is checking out an external + # it's probably safer to leave this as workflow dispatch + # only so that we can manually build images from specific + # refs rather than automatically pulling in the latest + # content from the remote repo. + workflow_dispatch: + inputs: + vllm_ref: + type: string + description: The vLLM GitHub ref (tag, branch or commit) to build. + required: true + +jobs: + build_push_xpu_image: + name: Build and push image + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # needed for signing the images with GitHub OIDC Token + packages: write # required for pushing container images + security-events: write # required for pushing SARIF files + steps: + - name: Check out the vLLM repository + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm_ref }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push image + run: | + IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }} + docker build -f Dockerfile.xpu -t $IMAGE --shm-size=4g . + docker push $IMAGE diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 850b0f1..b4e7f09 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -19,7 +19,8 @@ spec: spec: containers: - name: {{ .Release.Name }}-api - {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}} + {{ $gpuChart := ternary "ghrc.io/stackhpc/vllm-xpu" "vllm/vllm-openai" .Values.api.intelXPUsEnabled -}} + {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" $gpuChart (eq (.Values.api.gpus | int) 0)) -}} image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }} ports: - name: api @@ -61,7 +62,11 @@ spec: periodSeconds: 10 resources: limits: + {{- if .Values.api.intelXPUsEnabled }} + gpu.intel.com/i915: {{ .Values.api.gpus | int }} + {{- else }} nvidia.com/gpu: {{ .Values.api.gpus | int }} + {{- end }} volumes: - name: data {{- .Values.api.cacheVolume | toYaml | nindent 10 }} diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index a82b567..48f996e 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -33,7 +33,8 @@ api: enabled: true # Container image config image: - # Defaults to vllm/vllm-openai when api.gpus > 0 + # Defaults to vllm/vllm-openai when api.gpus > 0, + # ghrc.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true, # or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0 repository: version: v0.8.5.post1 @@ -80,6 +81,8 @@ api: # distributed / multi-GPU support should be available, though it # has not been tested against this app. gpus: 1 + # Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs + intelXPUsEnabled: false # The update strategy to use for the deployment # See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment # NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes. From daf91c089c18dd97838b198b761db947df522b14 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Wed, 7 May 2025 10:51:41 +0100 Subject: [PATCH 2/7] added on PR for dev --- .github/workflows/build-push-vllm-xpu.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-push-vllm-xpu.yml b/.github/workflows/build-push-vllm-xpu.yml index 025c47f..af21959 100644 --- a/.github/workflows/build-push-vllm-xpu.yml +++ b/.github/workflows/build-push-vllm-xpu.yml @@ -12,6 +12,7 @@ on: type: string description: The vLLM GitHub ref (tag, branch or commit) to build. required: true + pull_request: # REMOVE THIS jobs: build_push_xpu_image: From 87fbd55a3f73c90f17b08405c849aa1f3bac3f88 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Fri, 9 May 2025 11:18:32 +0100 Subject: [PATCH 3/7] updated xpu build with new dockerfile location --- .github/workflows/build-push-vllm-xpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-push-vllm-xpu.yml b/.github/workflows/build-push-vllm-xpu.yml index af21959..68a338a 100644 --- a/.github/workflows/build-push-vllm-xpu.yml +++ b/.github/workflows/build-push-vllm-xpu.yml @@ -40,5 +40,5 @@ jobs: - name: Build and push image run: | IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }} - docker build -f Dockerfile.xpu -t $IMAGE --shm-size=4g . + docker build -f docker/Dockerfile.xpu -t $IMAGE --shm-size=4g . docker push $IMAGE From 7e573086634feb80178563a4d086999f369ff5df Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Mon, 12 May 2025 09:37:00 +0100 Subject: [PATCH 4/7] fixed ghcr typo --- charts/azimuth-llm/templates/api/deployment.yml | 2 +- charts/azimuth-llm/values.yaml | 4 ++-- tst.yml | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 tst.yml diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index b4e7f09..6ae3974 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -19,7 +19,7 @@ spec: spec: containers: - name: {{ .Release.Name }}-api - {{ $gpuChart := ternary "ghrc.io/stackhpc/vllm-xpu" "vllm/vllm-openai" .Values.api.intelXPUsEnabled -}} + {{ $gpuChart := ternary "ghcr.io/stackhpc/vllm-xpu" "vllm/vllm-openai" .Values.api.intelXPUsEnabled -}} {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" $gpuChart (eq (.Values.api.gpus | int) 0)) -}} image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }} ports: diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index 48f996e..e42a316 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -34,8 +34,8 @@ api: # Container image config image: # Defaults to vllm/vllm-openai when api.gpus > 0, - # ghrc.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true, - # or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0 + # ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true, + # or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0 repository: version: v0.8.5.post1 monitoring: diff --git a/tst.yml b/tst.yml new file mode 100644 index 0000000..c4c76c3 --- /dev/null +++ b/tst.yml @@ -0,0 +1,3 @@ +azimuth-llm: + api: + intelXPUsEnabled: true From 4b1ca87f534c696712c156251636c671924e991a Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Mon, 12 May 2025 13:31:57 +0100 Subject: [PATCH 5/7] removed on-PR for XPU builds --- .github/workflows/build-push-vllm-xpu.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-push-vllm-xpu.yml b/.github/workflows/build-push-vllm-xpu.yml index 68a338a..5723333 100644 --- a/.github/workflows/build-push-vllm-xpu.yml +++ b/.github/workflows/build-push-vllm-xpu.yml @@ -12,7 +12,6 @@ on: type: string description: The vLLM GitHub ref (tag, branch or commit) to build. required: true - pull_request: # REMOVE THIS jobs: build_push_xpu_image: From 09e01a27558576e5199256127c29e2b0744dde23 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Tue, 13 May 2025 08:19:21 +0100 Subject: [PATCH 6/7] refactored image templating --- charts/azimuth-llm/templates/api/deployment.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 6ae3974..68b0052 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -19,9 +19,13 @@ spec: spec: containers: - name: {{ .Release.Name }}-api - {{ $gpuChart := ternary "ghcr.io/stackhpc/vllm-xpu" "vllm/vllm-openai" .Values.api.intelXPUsEnabled -}} - {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" $gpuChart (eq (.Values.api.gpus | int) 0)) -}} - image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }} + {{- if eq (.Values.api.gpus | int) 0 }} + image: "ghcr.io/stackhpc/vllm-cpu" + {{- else if .Values.api.intelXPUsEnabled }} + image: "ghcr.io/stackhpc/vllm-xpu" + {{- else }} + image: "vllm/vllm-openai" + {{- end }} ports: - name: api containerPort: 8000 From ee1ded9a233d58094ae79ea5eec1b0ef8c5abfd4 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Tue, 13 May 2025 08:45:25 +0100 Subject: [PATCH 7/7] fixed missing image version --- charts/azimuth-llm/templates/api/deployment.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 68b0052..0e6206d 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -20,11 +20,11 @@ spec: containers: - name: {{ .Release.Name }}-api {{- if eq (.Values.api.gpus | int) 0 }} - image: "ghcr.io/stackhpc/vllm-cpu" + image: "ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}" {{- else if .Values.api.intelXPUsEnabled }} - image: "ghcr.io/stackhpc/vllm-xpu" + image: "ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}" {{- else }} - image: "vllm/vllm-openai" + image: "vllm/vllm-openai:{{ .Values.api.image.version }}" {{- end }} ports: - name: api