Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: refactor cortex API with new model.yaml structure #656

Merged
merged 4 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cortex-js/src/app.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { DatabaseModule } from './infrastructure/database/database.module';
import { ChatModule } from './usecases/chat/chat.module';
import { AssistantsModule } from './usecases/assistants/assistants.module';
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
import { ModelRepositoryModule } from './infrastructure/repositories/model/model.module';
import { CortexModule } from './usecases/cortex/cortex.module';
import { ConfigModule } from '@nestjs/config';
import { env } from 'node:process';
Expand All @@ -31,6 +32,7 @@ import { FileManagerModule } from './file-manager/file-manager.module';
CortexModule,
ExtensionModule,
FileManagerModule,
ModelRepositoryModule,
],
providers: [SeedService],
})
Expand Down
99 changes: 54 additions & 45 deletions cortex-js/src/domain/models/model.interface.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,6 @@
/**
* Represents the information about a model.
* @stored
*/
export interface ModelInfo {
id: string;
settings: ModelSettingParams;
parameters: ModelRuntimeParams;
engine?: string;
}

export interface ModelArtifact {
url: string;
}

export enum ModelFormat {
GGUF = 'gguf',
API = 'api',
mmproj?: string;
llama_model_path?: string;
}

/**
Expand All @@ -24,64 +9,91 @@ export enum ModelFormat {
*/
export interface Model {
/**
* The type of the object.
* Default: "model"
* Model identifier.
*/
object: string;
model: string;

/**
* The version of the model.
* GGUF metadata: general.name
*/
version: string;
name?: string;

/**
* The format of the model.
* GGUF metadata: version
*/
format: ModelFormat;
version?: string;

/**
* The model download source. It can be an external url or a local filepath.
*/
sources: ModelArtifact[];
files: string[] | ModelArtifact;

/**
* GGUF metadata: tokenizer.chat_template
*/
prompt_template?: string;

/**
* Defines specific tokens or phrases at which the model will stop generating further output.
*/
stop?: string[];

/// Inferencing
/**
* Set probability threshold for more relevant outputs.
*/
top_p?: number;

/**
* The model identifier, which can be referenced in the API endpoints.
* Controls the randomness of the model’s output.
*/
id: string;
temperature?: number;

/**
* Human-readable name that is used for UI.
* Adjusts the likelihood of the model repeating words or phrases in its output.
*/
name: string;
frequency_penalty?: number;

/**
* The Unix timestamp (in seconds) for when the model was created
* Influences the generation of new and varied concepts in the model’s output.
*/
created: number;
presence_penalty?: number;

/// Engines
/**
* Default: "A cool model from Huggingface"
* The context length for model operations varies; the maximum depends on the specific model used.
*/
description: string;
ctx_len?: number;

/**
* The model settings.
* Enable real-time data processing for faster predictions.
*/
settings: ModelSettingParams;
stream?: boolean;

/*
* The maximum number of tokens the model will generate in a single response.
*/
max_tokens?: number;

/**
* The model runtime parameters.
* The number of layers to load onto the GPU for acceleration.
*/
parameters: ModelRuntimeParams;
ngl?: number;

/**
* Metadata of the model.
* The number of parallel operations. Only set when enable continuous batching.
*/
metadata: ModelMetadata;
n_parallel?: number;

/**
* Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)
*/
cpu_threads?: number;

/**
* The model engine.
*/
engine: string;
engine?: string;
}

export interface ModelMetadata {
Expand Down Expand Up @@ -109,6 +121,8 @@ export interface ModelSettingParams {
cont_batching?: boolean;
vision_model?: boolean;
text_model?: boolean;
engine?: string;
stop?: string[];
}

/**
Expand All @@ -133,8 +147,3 @@ export interface ModelRuntimeParams {
export type ModelInitFailed = Model & {
error: Error;
};

export interface NitroModelSettings extends ModelSettingParams {
llama_model_path: string;
cpu_threads: number;
}
4 changes: 2 additions & 2 deletions cortex-js/src/domain/models/thread.interface.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { AssistantTool } from './assistant.interface';
import { ModelInfo } from './model.interface';
import { Model } from './model.interface';

export interface Thread {
/** Unique identifier for the thread, generated by default using the ULID method. **/
Expand Down Expand Up @@ -40,7 +40,7 @@ export interface ThreadMetadata {
export interface ThreadAssistantInfo {
assistant_id: string;
assistant_name: string;
model: ModelInfo;
model: Partial<Model>;
instructions?: string;
tools?: AssistantTool[];
}
4 changes: 4 additions & 0 deletions cortex-js/src/domain/repositories/model.interface.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import { Model } from '../models/model.interface';
import { Repository } from './repository.interface';

export abstract class ModelRepository extends Repository<Model> {}
30 changes: 30 additions & 0 deletions cortex-js/src/file-manager/file-manager.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@ export class FileManagerService {
private configFile = '.cortexrc';
private cortexDirectoryName = 'cortex';
private modelFolderName = 'models';
private extensionFoldername = 'extensions';
private cortexCppFolderName = 'cortex-cpp';

/**
* Get cortex configs
* @returns the config object
*/
async getConfig(): Promise<Config> {
const homeDir = os.homedir();
const configPath = join(homeDir, this.configFile);
Expand Down Expand Up @@ -71,8 +76,33 @@ export class FileManagerService {
};
}

/**
* Get the app data folder path
* Usually it is located at the home directory > cortex
* @returns the path to the data folder
*/
async getDataFolderPath(): Promise<string> {
const config = await this.getConfig();
return config.dataFolderPath;
}

/**
* Get the models data folder path
* Usually it is located at the home directory > cortex > models
* @returns the path to the models folder
*/
async getModelsPath(): Promise<string> {
const dataFolderPath = await this.getDataFolderPath();
return join(dataFolderPath, this.modelFolderName);
}

/**
* Get the extensions data folder path
* Usually it is located at the home directory > cortex > extensions
* @returns the path to the extensions folder
*/
async getExtensionsPath(): Promise<string> {
const dataFolderPath = await this.getDataFolderPath();
return join(dataFolderPath, this.extensionFoldername);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ export class ModelGetCommand extends CommandRunner {
exit(1);
}

const models = await this.modelsCliUsecases.getModel(input[0]);
console.log(models);
const model = await this.modelsCliUsecases.getModel(input[0]);
if (!model) console.error('Model not found');
else console.log(model);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ export class ModelListCommand extends CommandRunner {
option.format === 'table'
? console.table(
models.map((e) => ({
id: e.id,
id: e.model,
engine: e.engine,
format: e.format,
created: e.created,
version: e.version,
})),
)
: console.log(models);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { CommandRunner, InquirerService, SubCommand } from 'nest-commander';
import { exit } from 'node:process';
import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
import { RepoDesignation, listFiles } from '@huggingface/hub';
import { basename } from 'node:path';
import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';

@SubCommand({
name: 'pull',
Expand All @@ -29,12 +29,16 @@ export class ModelPullCommand extends CommandRunner {
? undefined
: await this.tryToGetBranches(input[0]);

if (!branches) {
await this.modelsCliUsecases.pullModel(input[0]);
} else {
// if there's metadata.yaml file, we assumed it's a JanHQ model
await this.handleJanHqModel(input[0], branches);
}
await this.modelsCliUsecases
.pullModel(
!branches ? input[0] : await this.handleJanHqModel(input[0], branches),
)
.catch((e: Error) => {
if (e instanceof ModelNotFoundException)
console.error('Model does not exist.');
else console.error(e);
exit(1);
});

console.log('\nDownload complete!');
exit(0);
Expand Down Expand Up @@ -83,10 +87,6 @@ export class ModelPullCommand extends CommandRunner {
}

private async handleJanHqModel(repoName: string, branches: string[]) {
const sanitizedRepoName = repoName.trim().startsWith(this.janHqModelPrefix)
? repoName
: `${this.janHqModelPrefix}/${repoName}`;

let selectedTag = branches[0];

if (branches.length > 1) {
Expand All @@ -98,30 +98,7 @@ export class ModelPullCommand extends CommandRunner {
console.error("Can't find model revision.");
exit(1);
}

const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
let ggufUrl: string | undefined = undefined;
let fileSize = 0;
for await (const fileInfo of listFiles({
repo: repo,
revision: revision,
})) {
if (fileInfo.path.endsWith('.gguf')) {
ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
fileSize = fileInfo.size;
break;
}
}

if (!ggufUrl) {
console.error("Can't find model file.");
exit(1);
}
console.log('Downloading', basename(ggufUrl));
await this.modelsCliUsecases.pullModelWithExactUrl(
`${sanitizedRepoName}/${revision}`,
ggufUrl,
fileSize,
);
// Return parsed model Id
return `${repoName}:${revision}`;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ export class ModelStartCommand extends CommandRunner {
}
}

const existingModel = await this.modelsCliUsecases.getModel(modelId);
if (
!existingModel ||
!Array.isArray(existingModel.files) ||
/^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0])
) {
console.error('Model is not available. Please pull the model first.');
process.exit(1);
}

await this.cortexUsecases
.startCortex(options.attach)
.then(() => this.modelsCliUsecases.startModel(modelId, options.preset))
Expand All @@ -41,15 +51,19 @@ export class ModelStartCommand extends CommandRunner {
}

modelInquiry = async () => {
const models = await this.modelsCliUsecases.listAllModels();
const models = (await this.modelsCliUsecases.listAllModels()).filter(
(model) =>
Array.isArray(model.files) &&
!/^(http|https):\/\/[^/]+\/.*/.test(model.files[0]),
);
if (!models.length) throw 'No models found';
const { model } = await this.inquirerService.inquirer.prompt({
type: 'list',
name: 'model',
message: 'Select a model to start:',
choices: models.map((e) => ({
name: e.name,
value: e.id,
value: e.model,
})),
});
return model;
Expand Down
Loading