ahmetoner · Ian-Blockmans · Nov 10, 2025 · Nov 10, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/Dockerfile.gpu → Dockerfile.cuda b/Dockerfile.gpu → Dockerfile.cuda
diff --git a/Dockerfile.intel b/Dockerfile.intel
@@ -0,0 +1,42 @@
+FROM onerahmet/ffmpeg:n7.1 AS ffmpeg
+
+FROM swaggerapi/swagger-ui:v5.9.1 AS swagger-ui
+
+FROM intel/intel-extension-for-pytorch:2.8.10-xpu
+
+LABEL org.opencontainers.image.source="https://github.com/ahmetoner/whisper-asr-webservice"
+
+ENV PYTHON_VERSION=3.11
+
+ENV POETRY_VENV=/app/.venv
+
+RUN export DEBIAN_FRONTEND=noninteractive \
+    && apt-get -qq update \
+    && apt-get -qq install --no-install-recommends \
+    python${PYTHON_VERSION}-venv \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 && \
+    ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python && \
+    ln -s -f /usr/bin/pip3 /usr/bin/pip
+
+RUN python3 -m venv $POETRY_VENV \
+    && $POETRY_VENV/bin/pip install -U pip setuptools \
+    && $POETRY_VENV/bin/pip install poetry==2.1.3
+
+ENV PATH="${PATH}:${POETRY_VENV}/bin"
+
+WORKDIR /app
+
+COPY . .
+COPY --from=ffmpeg /usr/local/bin/ffmpeg /usr/local/bin/ffmpeg
+COPY --from=swagger-ui /usr/share/nginx/html/swagger-ui.css swagger-ui-assets/swagger-ui.css
+COPY --from=swagger-ui /usr/share/nginx/html/swagger-ui-bundle.js swagger-ui-assets/swagger-ui-bundle.js
+
+RUN poetry config virtualenvs.in-project true
+RUN poetry install --extras xpu
+
+EXPOSE 9000
+
+ENTRYPOINT ["whisper-asr-webservice"]
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ docker run -d -p 9000:9000 \
   onerahmet/openai-whisper-asr-webservice:latest
 ```
 
-### GPU
+### GPU (cuda)
 
 ```shell
 docker run -d --gpus all -p 9000:9000 \
@@ -37,6 +37,17 @@ docker run -d --gpus all -p 9000:9000 \
   onerahmet/openai-whisper-asr-webservice:latest-gpu
 ```
 
+### GPU (intel)
+
+Only `openai_whisper` engine is avialable on intel gpu.
+
+```shell
+docker run -d --device=/dev/dri all -p 9000:9000 \
+  -e ASR_MODEL=base \
+  -e ASR_ENGINE=openai_whisper \
+  onerahmet/openai-whisper-asr-webservice:latest-gpu
+```
+
 #### Cache
 
 To reduce container startup time by avoiding repeated downloads, you can persist the cache directory:
@@ -55,7 +66,7 @@ docker run -d -p 9000:9000 \
 - Voice activity detection (VAD) filtering
 - Speaker diarization (with WhisperX)
 - FFmpeg integration for broad audio/video format support
-- GPU acceleration support
+- GPU acceleration support (nvidia(cuda) or intel(xpu))
 - Configurable model loading/unloading
 - REST API with Swagger documentation
 
@@ -66,7 +77,7 @@ Key configuration options:
 - `ASR_ENGINE`: Engine selection (openai_whisper, faster_whisper, whisperx)
 - `ASR_MODEL`: Model selection (tiny, base, small, medium, large-v3, etc.)
 - `ASR_MODEL_PATH`: Custom path to store/load models
-- `ASR_DEVICE`: Device selection (cuda, cpu)
+- `ASR_DEVICE`: Device selection (cuda, xpu, cpu)
 - `MODEL_IDLE_TIMEOUT`: Timeout for model unloading
 
 ## Documentation
@@ -86,6 +97,9 @@ poetry install --extras cpu
 # Install dependencies for cuda
 poetry install --extras cuda
 
+# Install dependencies for intel xpu
+poetry install --extras xpu
+
 # Run service
 poetry run whisper-asr-webservice --host 0.0.0.0 --port 9000
 ```

diff --git a/app/asr_models/asr_model.py b/app/asr_models/asr_model.py
@@ -71,6 +71,8 @@ def release_model(self):
         """
         del self.model
         torch.cuda.empty_cache()
+        torch.xpu.memory.empty_cache()
+        #torch.accelerator.memory.empty_cache() available in torch 2.9.0 replacing the above lines
         gc.collect()
         self.model = None
         print("Model unloaded due to timeout")
diff --git a/app/asr_models/openai_whisper_engine.py b/app/asr_models/openai_whisper_engine.py
@@ -4,6 +4,10 @@
 from typing import BinaryIO, Union
 
 import torch
+
+if torch.xpu.is_available():
+    import intel_extension_for_pytorch as ipex
+
 import whisper
 from whisper.utils import ResultWriter, WriteJSON, WriteSRT, WriteTSV, WriteTXT, WriteVTT
 
@@ -18,7 +22,10 @@ def load_model(self):
         if torch.cuda.is_available():
             self.model = whisper.load_model(name=CONFIG.MODEL_NAME, download_root=CONFIG.MODEL_PATH).cuda()
         else:
-            self.model = whisper.load_model(name=CONFIG.MODEL_NAME, download_root=CONFIG.MODEL_PATH)
+            if torch.xpu.is_available():
+                self.model = whisper.load_model(name=CONFIG.MODEL_NAME, device="xpu", download_root=CONFIG.MODEL_PATH)
+            else:
+                self.model = whisper.load_model(name=CONFIG.MODEL_NAME, download_root=CONFIG.MODEL_PATH)
 
         Thread(target=self.monitor_idleness, daemon=True).start()
 

diff --git a/app/config.py b/app/config.py
@@ -17,7 +17,7 @@ class CONFIG:
         print("You must set the HF_TOKEN environment variable to download the diarization model used by WhisperX.")
 
     # Determine the computation device (GPU or CPU)
-    DEVICE = os.getenv("ASR_DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
+    DEVICE = os.getenv("ASR_DEVICE", "cuda" if torch.cuda.is_available() else ("xpu" if torch.xpu.is_available() else "cpu"))
 
     # Model name to use (e.g., "base", "small", etc.)
     MODEL_NAME = os.getenv("ASR_MODEL", "base")

diff --git a/docker-compose.gpu.yml → docker-compose.cuda.yml b/docker-compose.gpu.yml → docker-compose.cuda.yml
@@ -4,7 +4,7 @@ services:
   whisper-asr-webservice-gpu:
     build:
       context: .
-      dockerfile: Dockerfile.gpu
+      dockerfile: Dockerfile.cuda
     deploy:
       resources:
         reservations:

diff --git a/docker-compose.intel.yml b/docker-compose.intel.yml
@@ -0,0 +1,17 @@
+services:
+  whisper-asr-webservice-gpu:
+    build:
+      context: .
+      dockerfile: Dockerfile.intel
+    environment:
+      - ASR_MODEL=base
+      - ASR_DEVICE=xpu
+    ports:
+      - "9000:9000"
+    volumes:
+      - ./app:/app/app
+      - cache-whisper:/root/.cache
+    devices:
+      - /dev/dri:/dev/dri
+volumes:
+  cache-whisper:
diff --git a/docs/build.md b/docs/build.md
@@ -20,6 +20,12 @@ Install dependencies for cuda
 poetry install --extras cuda
 ```
 
+Install dependencies for intel xpu
+
+```shell
+poetry install --extras xpu
+```
+
 !!! Note
     By default, this will install the CPU version of PyTorch. For GPU support, you'll need to install the appropriate CUDA version of PyTorch separately:
     ```shell
@@ -53,16 +59,28 @@ poetry run whisper-asr-webservice --host 0.0.0.0 --port 9000
         docker run -d -p 9000:9000 -e ASR_MODEL=base whisper-asr-webservice
         ```
 
-    === ":octicons-file-code-16: `GPU`"
+    === ":octicons-file-code-16: `GPU (cuda)`"
 
         ```shell
         # Build Image
-        docker build -f Dockerfile.gpu -t whisper-asr-webservice-gpu .
+        docker build -f Dockerfile.cuda -t whisper-asr-webservice-cuda .
 
         # Run Container
-        docker run -d --gpus all -p 9000:9000 whisper-asr-webservice-gpu
+        docker run -d --gpus all -p 9000:9000 whisper-asr-webservice-cuda
         # or with specific model
-        docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base whisper-asr-webservice-gpu
+        docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base whisper-asr-webservice-cuda
+        ```
+
+    === ":octicons-file-code-16: `GPU (intel)`"
+
+        ```shell
+        # Build Image
+        docker build -f Dockerfile.intel -t whisper-asr-webservice-intel .
+
+        # Run Container
+        docker run -d --device=/dev/dri all -p 9000:9000 whisper-asr-webservice-intel
+        # or with specific model
+        docker run -d --device=/dev/dri all -p 9000:9000 -e ASR_MODEL=base whisper-asr-webservice-intel
         ```
 
     With `docker-compose`:
@@ -73,10 +91,15 @@ poetry run whisper-asr-webservice --host 0.0.0.0 --port 9000
         docker-compose up --build
         ```
 
-    === ":octicons-file-code-16: `GPU`"
+    === ":octicons-file-code-16: `GPU (cuda)`"
+
+        ```shell
+        docker-compose -f docker-compose.cuda.yml up --build
+        ```
+    === ":octicons-file-code-16: `GPU (intel)`"
 
         ```shell
-        docker-compose -f docker-compose.gpu.yml up --build
+        docker-compose -f docker-compose.intel.yml up --build
         ```
 === ":octicons-file-code-16: `Poetry`"
 

diff --git a/docs/environmental-variables.md b/docs/environmental-variables.md
@@ -61,11 +61,11 @@ Defaults to `16000`. Default sample rate for audio input. `16 kHz` is commonly u
 ### Configuring Device and Quantization
 
 ```shell
-export ASR_DEVICE=cuda  # or 'cpu'
+export ASR_DEVICE=cuda  # or 'cpu' or 'xpu'
 export ASR_QUANTIZATION=float32  # or 'float16', 'int8'
 ```
 
-The `ASR_DEVICE` defaults to `cuda` if GPU is available, otherwise `cpu`. 
+The `ASR_DEVICE` defaults to `cuda` if A nvidia gpu is available, next in line is `xpu` if available, otherwise `cpu`. 
 
 The `ASR_QUANTIZATION` defines the precision for model weights:
 

diff --git a/docs/index.md b/docs/index.md
@@ -20,17 +20,25 @@ Current release (v1.9.1) supports following whisper models:
     docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest
     ```
 
-=== ":octicons-file-code-16: `GPU`"
+=== ":octicons-file-code-16: `GPU (cuda)`"
 
     ```shell
-    docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-gpu
+    docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-cuda
+    ```
+
+=== ":octicons-file-code-16: `GPU (intel)`"
+
+    ```shell
+    docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-intel
     ```
 
 for more information:
 
 - [Documentation/Run](https://ahmetoner.github.io/whisper-asr-webservice/run)
 - [Docker Hub](https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice)
 
+Disclaimer: intel gpu only curently supported with openai_whisper
+
 ## Credits
 
 - This software uses libraries from the [FFmpeg](http://ffmpeg.org) project under the [LGPLv2.1](http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html)
diff --git a/docs/run.md b/docs/run.md
@@ -28,16 +28,25 @@ Docker Hub: <https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice>
       onerahmet/openai-whisper-asr-webservice:latest
     ```
 
-=== ":octicons-file-code-16: `GPU`"
+=== ":octicons-file-code-16: `GPU (cuda)`"
 
     ```shell
-    docker pull onerahmet/openai-whisper-asr-webservice:latest-gpu
+    docker pull onerahmet/openai-whisper-asr-webservice:latest-cuda
     docker run -d --gpus all -p 9000:9000 \
       -e ASR_MODEL=base \
       -e ASR_ENGINE=openai_whisper \
       onerahmet/openai-whisper-asr-webservice:latest-gpu
     ```
 
+=== ":octicons-file-code-16: `GPU (intel)`"
+
+    ```shell
+    docker pull onerahmet/openai-whisper-asr-webservice:latest-intel
+    docker run -d --device=/dev/dri all -p 9000:9000 \
+      -e ASR_MODEL=base \
+      -e ASR_ENGINE=openai_whisper \
+      onerahmet/openai-whisper-asr-webservice:latest-gpu
+    ```
 ### Environment Variables
 
 The following environment variables can be used to configure the service: