Skip to content

Commit 022cb15

Browse files
committed
feat: improve GPU support when multiple GPUs
If there are multiple GPUs use the first one that is of known type instead of first GPU. If no GPUs of are known type fall back to the first GPU as before. Add another string for vendor that is accepted as an NVIDIA GPU when doing GPU detection based on what was seen on linux with an NVIDIA 4070 Ti Super Signed-off-by: Michael Dawson <mdawson@devrus.com>
1 parent e9dd107 commit 022cb15

File tree

4 files changed

+132
-3
lines changed

4 files changed

+132
-3
lines changed

packages/backend/src/managers/GPUManager.spec.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,25 @@ test('NVIDIA controller should return intel vendor', async () => {
105105
},
106106
]);
107107
});
108+
109+
test('NVIDIA controller can have vendor "NVIDIA Corporation"', async () => {
110+
vi.mocked(graphics).mockResolvedValue({
111+
controllers: [
112+
{
113+
vendor: 'NVIDIA Corporation',
114+
model: 'NVIDIA GeForce GTX 1060 6GB',
115+
vram: 6144,
116+
} as unknown as Systeminformation.GraphicsControllerData,
117+
],
118+
displays: [],
119+
});
120+
121+
const manager = new GPUManager(webviewMock);
122+
expect(await manager.collectGPUs()).toStrictEqual([
123+
{
124+
vendor: GPUVendor.NVIDIA,
125+
model: 'NVIDIA GeForce GTX 1060 6GB',
126+
vram: 6144,
127+
},
128+
]);
129+
});

packages/backend/src/managers/GPUManager.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ export class GPUManager extends Publisher<IGPUInfo[]> implements Disposable {
5353
case 'Intel Corporation':
5454
return GPUVendor.INTEL;
5555
case 'NVIDIA':
56+
case 'NVIDIA Corporation':
5657
return GPUVendor.NVIDIA;
5758
case 'Apple':
5859
return GPUVendor.APPLE;

packages/backend/src/workers/provider/LlamaCppPython.spec.ts

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,100 @@ describe('perform', () => {
313313
expect(server.labels['gpu']).toBe('nvidia');
314314
});
315315

316+
test('gpu experimental should collect GPU data and find first supported gpu - entry 1 supported', async () => {
317+
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
318+
experimentalGPU: true,
319+
modelsPath: '',
320+
apiPort: 10434,
321+
experimentalTuning: false,
322+
modelUploadDisabled: false,
323+
showGPUPromotion: false,
324+
});
325+
326+
vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
327+
{
328+
vram: 1024,
329+
model: 'dummy-model',
330+
vendor: GPUVendor.UNKNOWN,
331+
},
332+
{
333+
vram: 1024,
334+
model: 'nvidia',
335+
vendor: GPUVendor.NVIDIA,
336+
},
337+
]);
338+
339+
const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
340+
const server = await provider.perform({
341+
port: 8000,
342+
image: undefined,
343+
labels: {},
344+
modelsInfo: [DummyModel],
345+
connection: undefined,
346+
});
347+
348+
expect(containerEngine.createContainer).toHaveBeenCalledWith(
349+
DummyImageInfo.engineId,
350+
expect.objectContaining({
351+
Cmd: [
352+
'-c',
353+
'/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
354+
],
355+
}),
356+
);
357+
expect(gpuManager.collectGPUs).toHaveBeenCalled();
358+
expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
359+
expect('gpu' in server.labels).toBeTruthy();
360+
expect(server.labels['gpu']).toBe('nvidia');
361+
});
362+
363+
test('gpu experimental should collect GPU data and find first supported gpu - entry 0 supported', async () => {
364+
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
365+
experimentalGPU: true,
366+
modelsPath: '',
367+
apiPort: 10434,
368+
experimentalTuning: false,
369+
modelUploadDisabled: false,
370+
showGPUPromotion: false,
371+
});
372+
373+
vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
374+
{
375+
vram: 1024,
376+
model: 'nvidia',
377+
vendor: GPUVendor.NVIDIA,
378+
},
379+
{
380+
vram: 1024,
381+
model: 'dummy-model',
382+
vendor: GPUVendor.UNKNOWN,
383+
},
384+
]);
385+
386+
const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
387+
const server = await provider.perform({
388+
port: 8000,
389+
image: undefined,
390+
labels: {},
391+
modelsInfo: [DummyModel],
392+
connection: undefined,
393+
});
394+
395+
expect(containerEngine.createContainer).toHaveBeenCalledWith(
396+
DummyImageInfo.engineId,
397+
expect.objectContaining({
398+
Cmd: [
399+
'-c',
400+
'/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
401+
],
402+
}),
403+
);
404+
expect(gpuManager.collectGPUs).toHaveBeenCalled();
405+
expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
406+
expect('gpu' in server.labels).toBeTruthy();
407+
expect(server.labels['gpu']).toBe('nvidia');
408+
});
409+
316410
test('unknown gpu on unsupported vmtype should not provide gpu labels', async () => {
317411
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
318412
experimentalGPU: true,

packages/backend/src/workers/provider/LlamaCppPython.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,21 @@ export class LlamaCppPython extends InferenceProvider {
197197
if (this.configurationRegistry.getExtensionConfiguration().experimentalGPU) {
198198
const gpus: IGPUInfo[] = await this.gpuManager.collectGPUs();
199199
if (gpus.length === 0) throw new Error('no gpu was found.');
200-
if (gpus.length > 1)
201-
console.warn(`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[0].model}.`);
202-
gpu = gpus[0];
200+
let selectedGPU = 0;
201+
if (gpus.length > 1) {
202+
// Look for a GPU that is of a known type, use the first one found.
203+
// Fall back to the first one if no GPUs are of known type.
204+
for (let i = 0; i < gpus.length; i++) {
205+
if (gpus[i].vendor !== GPUVendor.UNKNOWN) {
206+
selectedGPU = i;
207+
break;
208+
}
209+
}
210+
console.warn(
211+
`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[selectedGPU].model}.`,
212+
);
213+
}
214+
gpu = gpus[selectedGPU];
203215
}
204216

205217
let connection: ContainerProviderConnection | undefined = undefined;

0 commit comments

Comments
 (0)