Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: basic vllm support for hf cached models #2262

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
},
"dependencies": {
"@huggingface/gguf": "^0.1.12",
"@huggingface/hub": "^0.21.0",
"express": "^4.21.2",
"express-openapi-validator": "^5.3.9",
"isomorphic-git": "^1.27.2",
Expand Down
3 changes: 3 additions & 0 deletions packages/backend/src/assets/inference-images.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@
"default": "ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat@sha256:20734e9d60f047d27e4c9cf6a3b663e0627d48bd06d0a73b968f9d81c82de2f1",
"cuda": "ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat-cuda@sha256:798acced911527254601d0e39a90c5a29ecad82755f28594bea9a587ea9e6043",
"vulkan": "ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat-vulkan@sha256:22e11661fe66ace7c30b419703305b803eb937da10e19c23cb6767f03578256c"
},
"vllm": {
"default": "quay.io/rh-ee-astefani/vllm:cpu-1734105797"
}
}
73 changes: 61 additions & 12 deletions packages/backend/src/managers/modelsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,14 @@
import type { PodmanConnection } from './podmanConnection';
import { VMType } from '@shared/src/models/IPodman';
import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry';
import { InferenceType } from '@shared/src/models/IInference';
import { scanCacheDir } from '@huggingface/hub';
import { basename, join } from 'node:path';

export class ModelsManager implements Disposable {
#models: Map<string, ModelInfo>;
#hfCache: Map<string, ModelInfo>;

#watcher?: podmanDesktopApi.FileSystemWatcher;
#disposables: Disposable[];

Expand All @@ -58,6 +63,7 @@
private configurationRegistry: ConfigurationRegistry,
) {
this.#models = new Map();
this.#hfCache = new Map();
this.#disposables = [];
}

Expand All @@ -72,6 +78,44 @@
this.loadLocalModels().catch((err: unknown) => {
console.error('Something went wrong while trying to load local models', err);
});

scanCacheDir()
.then(results => {
this.#hfCache.clear();
results.repos.forEach(repo => {
if (repo.revisions.length === 0) {
console.warn(`found hugging face cache repository ${repo.id} without any revision`);
return;
}

// ensure at least one safetensor is available
if (!repo.revisions[0].files.some(file => file.path.endsWith('.safetensors'))) {
console.warn(
`hugging face cache repository ${repo.id.name} do not contain any .safetensors file: ignoring`,
);
return;
}

const id = basename(repo.path);
this.#hfCache.set(id, {
id: id,
backend: InferenceType.VLLM,
file: {
file: repo.revisions[0].commitOid,
path: join(repo.path, 'snapshots'),
creation: repo.lastModifiedAt,
size: repo.size,
},
name: repo.id.name,
description: repo.id.name,
properties: {
origin: 'HF_CACHE',
},
});
});
this.notify();
})
.catch(console.error);
}

dispose(): void {
Expand All @@ -85,7 +129,7 @@
this.catalogManager.getModels().forEach(m => this.#models.set(m.id, m));
const reloadLocalModels = async (): Promise<void> => {
this.getLocalModelsFromDisk();
await this.sendModelsInfo();
this.notify();
};
if (this.#watcher === undefined) {
this.#watcher = apiFs.createFileSystemWatcher(this.modelsDir);
Expand All @@ -99,15 +143,17 @@
}

getModelsInfo(): ModelInfo[] {
return [...this.#models.values()];
return [...this.#models.values(), ...this.#hfCache.values()];
}

async sendModelsInfo(): Promise<void> {
notify(): void {
const models = this.getModelsInfo();
await this.webview.postMessage({
id: Messages.MSG_NEW_MODELS_STATE,
body: models,
});
this.webview
.postMessage({

Check failure on line 152 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

Unhandled error

TypeError: this.webview.postMessage is not a function ❯ ModelsManager.notify src/managers/modelsManager.ts:152:8 ❯ src/managers/modelsManager.ts:380:14 ❯ ModelsManager.onDownloadUploadEvent src/managers/modelsManager.ts:352:11 ❯ src/managers/modelsManager.ts:462:38 ❯ Timeout._onTimeout src/managers/modelsManager.spec.ts:836:9 ❯ listOnTimeout ../../node:internal/timers:581:17 ❯ processTimers ../../node:internal/timers:519:7 This error originated in "src/managers/modelsManager.spec.ts" test file. It doesn't mean the error was thrown inside the file itself, but while it was running. The latest test that might've caused the error is "multiple download request same model - second call before first completed". It might mean one of the following: - The error was thrown, while Vitest was running this test. - If the error occurred after the test had been completed, this was the last documented test before it was thrown.

Check failure on line 152 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

Unhandled error

TypeError: this.webview.postMessage is not a function ❯ ModelsManager.notify src/managers/modelsManager.ts:152:8 ❯ src/managers/modelsManager.ts:380:14 ❯ ModelsManager.onDownloadUploadEvent src/managers/modelsManager.ts:352:11 ❯ src/managers/modelsManager.ts:462:38 ❯ Timeout._onTimeout src/managers/modelsManager.spec.ts:836:9 ❯ listOnTimeout ../../node:internal/timers:581:17 ❯ processTimers ../../node:internal/timers:519:7 This error originated in "src/managers/modelsManager.spec.ts" test file. It doesn't mean the error was thrown inside the file itself, but while it was running. The latest test that might've caused the error is "multiple download request same model - second call before first completed". It might mean one of the following: - The error was thrown, while Vitest was running this test. - If the error occurred after the test had been completed, this was the last documented test before it was thrown.

Check failure on line 152 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

Unhandled error

TypeError: this.webview.postMessage is not a function ❯ ModelsManager.notify src/managers/modelsManager.ts:152:8 ❯ src/managers/modelsManager.ts:380:14 ❯ ModelsManager.onDownloadUploadEvent src/managers/modelsManager.ts:352:11 ❯ src/managers/modelsManager.ts:462:38 ❯ Timeout._onTimeout src/managers/modelsManager.spec.ts:836:9 ❯ listOnTimeout ../../node:internal/timers:581:17 ❯ processTimers ../../node:internal/timers:519:7 This error originated in "src/managers/modelsManager.spec.ts" test file. It doesn't mean the error was thrown inside the file itself, but while it was running. The latest test that might've caused the error is "multiple download request same model - second call before first completed". It might mean one of the following: - The error was thrown, while Vitest was running this test. - If the error occurred after the test had been completed, this was the last documented test before it was thrown.
id: Messages.MSG_NEW_MODELS_STATE,
body: models,
})

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > getModelsInfo should get models in local directory

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:215:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should return undefined Date and size when stat fail

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:308:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should skip folders containing tmp files

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:368:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > loadLocalModels should post a message with the message on disk and on catalog

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:410:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > deleteModel deletes the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:462:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > deleting models > deleteModel fails to delete the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:528:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > deleting models > delete local model should call catalogManager

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:595:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > getModelsInfo should get models in local directory

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:215:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should return undefined Date and size when stat fail

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:308:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should skip folders containing tmp files

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:368:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > loadLocalModels should post a message with the message on disk and on catalog

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:410:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > deleteModel deletes the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:462:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > deleting models > deleteModel fails to delete the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:528:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > deleting models > delete local model should call catalogManager

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:595:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > getModelsInfo should get models in local directory

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:215:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should return undefined Date and size when stat fail

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:308:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should skip folders containing tmp files

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:368:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > loadLocalModels should post a message with the message on disk and on catalog

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:410:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > deleteModel deletes the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:462:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > deleting models > deleteModel fails to delete the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:528:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > deleting models > delete local model should call catalogManager

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:595:19
.catch(console.error);
}

getModelsDirectory(): string {
Expand Down Expand Up @@ -186,7 +232,7 @@
}

model.state = 'deleting';
await this.sendModelsInfo();
this.notify();
try {
await this.deleteRemoteModel(model);
let modelPath;
Expand Down Expand Up @@ -214,7 +260,7 @@
model.state = undefined;
this.getLocalModelsFromDisk();
} finally {
await this.sendModelsInfo();
this.notify();
}
}

Expand Down Expand Up @@ -331,9 +377,7 @@

// refresh model lists on event completion
this.getLocalModelsFromDisk();
this.sendModelsInfo().catch((err: unknown) => {
console.error('Something went wrong while sending models info.', err);
});
this.notify();

// cleanup downloader
this.#downloaders.delete(event.id);
Expand Down Expand Up @@ -433,6 +477,11 @@
return getLocalModelFile(model);
}

if (model.backend === InferenceType.VLLM) {
console.warn('Model upload for vllm is disabled');
return getLocalModelFile(model);
}

this.taskRegistry.createTask(`Copying model ${model.name} to ${connection.name}`, 'loading', {
...labels,
'model-uploading': model.id,
Expand Down
5 changes: 5 additions & 0 deletions packages/backend/src/studio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import { InstructlabApiImpl } from './instructlab-api-impl';
import { NavigationRegistry } from './registries/NavigationRegistry';
import { StudioAPI } from '@shared/src/StudioAPI';
import { InstructlabAPI } from '@shared/src/InstructlabAPI';
import { VLLM } from './workers/provider/VLLM';

export class Studio {
readonly #extensionContext: ExtensionContext;
Expand Down Expand Up @@ -260,6 +261,10 @@ export class Studio {
this.#inferenceProviderRegistry.register(new WhisperCpp(this.#taskRegistry, this.#podmanConnection)),
);

this.#extensionContext.subscriptions.push(
this.#inferenceProviderRegistry.register(new VLLM(this.#taskRegistry, this.#podmanConnection)),
);

/**
* The inference manager create, stop, manage Inference servers
*/
Expand Down
148 changes: 148 additions & 0 deletions packages/backend/src/workers/provider/VLLM.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/

import { InferenceProvider } from './InferenceProvider';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import type { PodmanConnection } from '../../managers/podmanConnection';
import { type InferenceServer, InferenceType } from '@shared/src/models/IInference';
import type { InferenceServerConfig } from '@shared/src/models/InferenceServerConfig';
import type { ContainerProviderConnection, MountConfig } from '@podman-desktop/api';
import * as images from '../../assets/inference-images.json';
import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils';
import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils';
import { basename, dirname } from 'node:path';
import { join as joinposix } from 'node:path/posix';
import { getLocalModelFile } from '../../utils/modelsUtils';

export class VLLM extends InferenceProvider {
constructor(
taskRegistry: TaskRegistry,
private podmanConnection: PodmanConnection,
) {
super(taskRegistry, InferenceType.VLLM, 'vllm');
}

dispose(): void {}

public enabled = (): boolean => true;

/**
* Here is an example
*
* podman run -it --rm
* -v C:\Users\axels\.cache\huggingface\hub\models--mistralai--Mistral-7B-v0.1:/cache/models--mistralai--Mistral-7B-v0.1
* -e HF_HUB_CACHE=/cache
* localhost/vllm-cpu-env:latest
* --model=/cache/models--mistralai--Mistral-7B-v0.1/snapshots/7231864981174d9bee8c7687c24c8344414eae6b
*
* @param config
*/
override async perform(config: InferenceServerConfig): Promise<InferenceServer> {
if (config.modelsInfo.length !== 1)
throw new Error(`only one model is supported, received ${config.modelsInfo.length}`);

const modelInfo = config.modelsInfo[0];
if (modelInfo.backend !== InferenceType.VLLM) {
throw new Error(`VLLM requires models with backend type ${InferenceType.VLLM} got ${modelInfo.backend}.`);
}

if (modelInfo.file === undefined) {
throw new Error('The model info file provided is undefined');
}

console.log('[VLLM]', config);
console.log('[VLLM] modelInfo.file', modelInfo.file);

const fullPath = getLocalModelFile(modelInfo);

// modelInfo.file.path must be under the form $(HF_HUB_CACHE)/<repo-type>--<repo-id>/snapshots/<commit-hash>
const parent = dirname(fullPath);
const commitHash = basename(fullPath);
const name = basename(parent);
if (name !== 'snapshots') throw new Error('you must provide snapshot path for vllm');
const modelCache = dirname(parent);

let connection: ContainerProviderConnection | undefined;
if (config.connection) {
connection = this.podmanConnection.getContainerProviderConnection(config.connection);
} else {
connection = this.podmanConnection.findRunningContainerProviderConnection();
}

if (!connection) throw new Error('no running connection could be found');

const labels: Record<string, string> = {
...config.labels,
[LABEL_INFERENCE_SERVER]: JSON.stringify(config.modelsInfo.map(model => model.id)),
};

const imageInfo = await this.pullImage(connection, config.image ?? images.vllm.default, labels);
// https://huggingface.co/docs/transformers/main/en/installation#offline-mode
// HF_HUB_OFFLINE in main
// TRANSFORMERS_OFFLINE for legacy
const envs: string[] = [`HF_HUB_CACHE=/cache`, 'TRANSFORMERS_OFFLINE=1', 'HF_HUB_OFFLINE=1'];

labels['api'] = `http://localhost:${config.port}/inference`;

const mounts: MountConfig = [
{
Target: `/cache/${modelInfo.id}`,
Source: modelCache,
Type: 'bind',
},
];

const containerInfo = await this.createContainer(
imageInfo.engineId,
{
Image: imageInfo.Id,
Detach: true,
Labels: labels,
HostConfig: {
AutoRemove: false,
Mounts: mounts,
PortBindings: {
'8000/tcp': [
{
HostPort: `${config.port}`,
},
],
},
SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION],
},
Env: envs,
Cmd: [`--model=${joinposix('/cache', modelInfo.id, 'snapshots', commitHash)}`],
},
labels,
);

return {
models: [modelInfo],
status: 'running',
connection: {
port: config.port,
},
container: {
containerId: containerInfo.id,
engineId: containerInfo.engineId,
},
type: InferenceType.VLLM,
labels: labels,
};
}
}
15 changes: 12 additions & 3 deletions packages/frontend/src/lib/table/model/ModelColumnName.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@
import type { ModelInfo } from '@shared/src/models/IModelInfo';
import { router } from 'tinro';

export let object: ModelInfo;
interface Props {
object: ModelInfo;
}

let { object }: Props = $props();

let hf: boolean = $state(object.properties?.['origin'] === 'HF_CACHE');

function openDetails(): void {
router.goto(`/model/${object.id}`);
}
</script>

<button class="flex flex-col w-full" title={object.name} on:click={openDetails} aria-label="Open Model Details">
<button class="flex flex-col w-full" title={object.name} onclick={openDetails} aria-label="Open Model Details">
<div
class="text-[var(--pd-table-body-text-highlight)] overflow-hidden text-ellipsis w-full text-left"
aria-label="Model Name">
Expand All @@ -19,7 +25,10 @@ function openDetails(): void {
<span class="text-sm text-[var(--pd-table-body-text)]" aria-label="Model Info"
>{object.registry} - {object.license}</span>
{/if}
{#if !object.registry && !object.license && !object.url}
{#if hf}
<span class="text-sm text-[var(--pd-table-body-text)]" aria-label="Imported Model Info"
>Loaded from hugging face cache</span>
{:else if !object.registry && !object.license && !object.url}
<span class="text-sm text-[var(--pd-table-body-text)]" aria-label="Imported Model Info">Imported by User</span>
{/if}
</button>
1 change: 1 addition & 0 deletions packages/shared/src/models/IInference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export enum InferenceType {
LLAMA_CPP = 'llama-cpp',
WHISPER_CPP = 'whisper-cpp',
NONE = 'none',
VLLM = 'vllm',
}

export type InferenceServerStatus = 'stopped' | 'running' | 'deleting' | 'stopping' | 'error' | 'starting';
Expand Down
Loading
Loading