From 7c85f4e705ba02a5900f0fcf3c99522a2e2dd515 Mon Sep 17 00:00:00 2001 From: Michael Dawson Date: Wed, 27 Nov 2024 21:58:18 -0500 Subject: [PATCH] feat: initial support for GPU on linux Signed-off-by: Michael Dawson --- packages/backend/src/managers/GPUManager.ts | 1 + .../src/workers/provider/LlamaCppPython.ts | 40 +++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/packages/backend/src/managers/GPUManager.ts b/packages/backend/src/managers/GPUManager.ts index 1ddd324c2..97d99beea 100644 --- a/packages/backend/src/managers/GPUManager.ts +++ b/packages/backend/src/managers/GPUManager.ts @@ -53,6 +53,7 @@ export class GPUManager extends Publisher implements Disposable { case 'Intel Corporation': return GPUVendor.INTEL; case 'NVIDIA': + case 'NVIDIA Corporation': return GPUVendor.NVIDIA; case 'Apple': return GPUVendor.APPLE; diff --git a/packages/backend/src/workers/provider/LlamaCppPython.ts b/packages/backend/src/workers/provider/LlamaCppPython.ts index 9685c2891..c175d0605 100644 --- a/packages/backend/src/workers/provider/LlamaCppPython.ts +++ b/packages/backend/src/workers/provider/LlamaCppPython.ts @@ -134,6 +134,24 @@ export class LlamaCppPython extends InferenceProvider { PathInContainer: '/dev/dri', CgroupPermissions: '', }); + break; + case VMType.UNKNOWN: + // Only supports NVIDIA + if (gpu.vendor !== GPUVendor.NVIDIA) break; + + supported = true; + devices.push({ + PathOnHost: 'nvidia.com/gpu=all', + PathInContainer: '', + CgroupPermissions: '', + }); + + user = '0'; + + entrypoint = '/usr/bin/sh'; + + cmd = ['-c', 'chmod 755 ./run.sh && ./run.sh']; + break; } @@ -197,9 +215,21 @@ export class LlamaCppPython extends InferenceProvider { if (this.configurationRegistry.getExtensionConfiguration().experimentalGPU) { const gpus: IGPUInfo[] = await this.gpuManager.collectGPUs(); if (gpus.length === 0) throw new Error('no gpu was found.'); - if (gpus.length > 1) - console.warn(`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[0].model}.`); - gpu = gpus[0]; + let selectedGPU = 0; + if (gpus.length > 1) { + // Look for a GPU that is of a known type, use the first one found. + // Fall back to the first one if no GPUs are of known type. + for (let i = 0; i < gpus.length; i++) { + if (gpus[i].vendor !== GPUVendor.UNKNOWN) { + selectedGPU = i; + break; + } + } + console.warn( + `found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[selectedGPU].model}.`, + ); + } + gpu = gpus[selectedGPU]; } let connection: ContainerProviderConnection | undefined = undefined; @@ -224,7 +254,7 @@ export class LlamaCppPython extends InferenceProvider { const containerCreateOptions: ContainerCreateOptions = await this.getContainerCreateOptions( config, imageInfo, - connection.vmType as VMType, + vmType, gpu, ); @@ -254,6 +284,8 @@ export class LlamaCppPython extends InferenceProvider { case VMType.LIBKRUN_LABEL: return gpu ? llamacpp.vulkan : llamacpp.default; // no GPU support + case VMType.UNKNOWN: + return gpu?.vendor === GPUVendor.NVIDIA ? llamacpp.cuda : llamacpp.default; default: return llamacpp.default; }