feat: improve GPU support when multiple GPUs

mhdawson · mhdawson · commit 022cb152b7e4 · 2024-12-09T14:36:42.000-05:00
If there are multiple GPUs use the first one that is of
known type instead of first GPU. If no GPUs of are known
type fall back to the first GPU as before.

Add another string for vendor that is accepted as an NVIDIA
GPU when doing GPU detection based on what was seen on linux
with an NVIDIA 4070 Ti Super

Signed-off-by: Michael Dawson &lt;mdawson@devrus.com&gt;
diff --git a/packages/backend/src/managers/GPUManager.spec.ts b/packages/backend/src/managers/GPUManager.spec.ts
@@ -105,3 +105,25 @@ test('NVIDIA controller should return intel vendor', async () => {
     },
   ]);
 });
+
+test('NVIDIA controller can have vendor "NVIDIA Corporation"', async () => {
+  vi.mocked(graphics).mockResolvedValue({
+    controllers: [
+      {
+        vendor: 'NVIDIA Corporation',
+        model: 'NVIDIA GeForce GTX 1060 6GB',
+        vram: 6144,
+      } as unknown as Systeminformation.GraphicsControllerData,
+    ],
+    displays: [],
+  });
+
+  const manager = new GPUManager(webviewMock);
+  expect(await manager.collectGPUs()).toStrictEqual([
+    {
+      vendor: GPUVendor.NVIDIA,
+      model: 'NVIDIA GeForce GTX 1060 6GB',
+      vram: 6144,
+    },
+  ]);
+});
diff --git a/packages/backend/src/managers/GPUManager.ts b/packages/backend/src/managers/GPUManager.ts
@@ -53,6 +53,7 @@ export class GPUManager extends Publisher<IGPUInfo[]> implements Disposable {
       case 'Intel Corporation':
         return GPUVendor.INTEL;
       case 'NVIDIA':
+      case 'NVIDIA Corporation':
         return GPUVendor.NVIDIA;
       case 'Apple':
         return GPUVendor.APPLE;
diff --git a/packages/backend/src/workers/provider/LlamaCppPython.spec.ts b/packages/backend/src/workers/provider/LlamaCppPython.spec.ts
@@ -313,6 +313,100 @@ describe('perform', () => {
     expect(server.labels['gpu']).toBe('nvidia');
   });
 
+  test('gpu experimental should collect GPU data and find first supported gpu - entry 1 supported', async () => {
+    vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
+      experimentalGPU: true,
+      modelsPath: '',
+      apiPort: 10434,
+      experimentalTuning: false,
+      modelUploadDisabled: false,
+      showGPUPromotion: false,
+    });
+
+    vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
+      {
+        vram: 1024,
+        model: 'dummy-model',
+        vendor: GPUVendor.UNKNOWN,
+      },
+      {
+        vram: 1024,
+        model: 'nvidia',
+        vendor: GPUVendor.NVIDIA,
+      },
+    ]);
+
+    const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
+    const server = await provider.perform({
+      port: 8000,
+      image: undefined,
+      labels: {},
+      modelsInfo: [DummyModel],
+      connection: undefined,
+    });
+
+    expect(containerEngine.createContainer).toHaveBeenCalledWith(
+      DummyImageInfo.engineId,
+      expect.objectContaining({
+        Cmd: [
+          '-c',
+          '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
+        ],
+      }),
+    );
+    expect(gpuManager.collectGPUs).toHaveBeenCalled();
+    expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
+    expect('gpu' in server.labels).toBeTruthy();
+    expect(server.labels['gpu']).toBe('nvidia');
+  });
+
+  test('gpu experimental should collect GPU data and find first supported gpu - entry 0 supported', async () => {
+    vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
+      experimentalGPU: true,
+      modelsPath: '',
+      apiPort: 10434,
+      experimentalTuning: false,
+      modelUploadDisabled: false,
+      showGPUPromotion: false,
+    });
+
+    vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
+      {
+        vram: 1024,
+        model: 'nvidia',
+        vendor: GPUVendor.NVIDIA,
+      },
+      {
+        vram: 1024,
+        model: 'dummy-model',
+        vendor: GPUVendor.UNKNOWN,
+      },
+    ]);
+
+    const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
+    const server = await provider.perform({
+      port: 8000,
+      image: undefined,
+      labels: {},
+      modelsInfo: [DummyModel],
+      connection: undefined,
+    });
+
+    expect(containerEngine.createContainer).toHaveBeenCalledWith(
+      DummyImageInfo.engineId,
+      expect.objectContaining({
+        Cmd: [
+          '-c',
+          '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
+        ],
+      }),
+    );
+    expect(gpuManager.collectGPUs).toHaveBeenCalled();
+    expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
+    expect('gpu' in server.labels).toBeTruthy();
+    expect(server.labels['gpu']).toBe('nvidia');
+  });
+
   test('unknown gpu on unsupported vmtype should not provide gpu labels', async () => {
     vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
       experimentalGPU: true,
diff --git a/packages/backend/src/workers/provider/LlamaCppPython.ts b/packages/backend/src/workers/provider/LlamaCppPython.ts
@@ -197,9 +197,21 @@ export class LlamaCppPython extends InferenceProvider {
     if (this.configurationRegistry.getExtensionConfiguration().experimentalGPU) {
       const gpus: IGPUInfo[] = await this.gpuManager.collectGPUs();
       if (gpus.length === 0) throw new Error('no gpu was found.');
-      if (gpus.length > 1)
-        console.warn(`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[0].model}.`);
-      gpu = gpus[0];
+      let selectedGPU = 0;
+      if (gpus.length > 1) {
+        // Look for a GPU that is of a known type, use the first one found.
+        // Fall back to the first one if no GPUs are of known type.
+        for (let i = 0; i < gpus.length; i++) {
+          if (gpus[i].vendor !== GPUVendor.UNKNOWN) {
+            selectedGPU = i;
+            break;
+          }
+        }
+        console.warn(
+          `found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[selectedGPU].model}.`,
+        );
+      }
+      gpu = gpus[selectedGPU];
     }
 
     let connection: ContainerProviderConnection | undefined = undefined;