Skip to content

Commit

Permalink
feat: refactor cortex API with new model.yaml structure
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-jan committed Jun 4, 2024
1 parent b879f66 commit 494a155
Show file tree
Hide file tree
Showing 30 changed files with 545 additions and 603 deletions.
2 changes: 2 additions & 0 deletions cortex-js/src/app.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { DatabaseModule } from './infrastructure/database/database.module';
import { ChatModule } from './usecases/chat/chat.module';
import { AssistantsModule } from './usecases/assistants/assistants.module';
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
import { ModelRepositoryModule } from './infrastructure/repositories/model/model.module';
import { CortexModule } from './usecases/cortex/cortex.module';
import { ConfigModule } from '@nestjs/config';
import { env } from 'node:process';
Expand All @@ -31,6 +32,7 @@ import { FileManagerModule } from './file-manager/file-manager.module';
CortexModule,
ExtensionModule,
FileManagerModule,
ModelRepositoryModule,
],
providers: [SeedService],
})
Expand Down
92 changes: 52 additions & 40 deletions cortex-js/src/domain/models/model.interface.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,6 @@
/**
* Represents the information about a model.
* @stored
*/
export interface ModelInfo {
id: string;
settings: ModelSettingParams;
parameters: ModelRuntimeParams;
engine?: string;
}

export interface ModelArtifact {
url: string;
}

export enum ModelFormat {
GGUF = 'gguf',
API = 'api',
mmproj?: string;
llama_model_path?: string;
}

/**
Expand All @@ -24,64 +9,91 @@ export enum ModelFormat {
*/
export interface Model {
/**
* The type of the object.
* Default: "model"
* Model identifier.
*/
object: string;
model: string;

/**
* The version of the model.
* GGUF metadata: general.name
*/
version: string;
name?: string;

/**
* The format of the model.
* GGUF metadata: version
*/
format: ModelFormat;
version?: string;

/**
* The model download source. It can be an external url or a local filepath.
*/
sources: ModelArtifact[];
files: string[] | ModelArtifact;

/**
* GGUF metadata: tokenizer.chat_template
*/
prompt_template?: string;

/**
* Defines specific tokens or phrases at which the model will stop generating further output.
*/
end_token?: string[];

/// Inferencing
/**
* Set probability threshold for more relevant outputs.
*/
top_p?: number;

/**
* The model identifier, which can be referenced in the API endpoints.
* Controls the randomness of the model’s output.
*/
id: string;
temperature?: number;

/**
* Human-readable name that is used for UI.
* Adjusts the likelihood of the model repeating words or phrases in its output.
*/
name: string;
frequency_penalty?: number;

/**
* Influences the generation of new and varied concepts in the model’s output.
*/
presence_penalty?: number;

/// Engines
/**
* The Unix timestamp (in seconds) for when the model was created
* The context length for model operations varies; the maximum depends on the specific model used.
*/
created: number;
ctx_length?: number;

/**
* Default: "A cool model from Huggingface"
* Enable real-time data processing for faster predictions.
*/
description: string;
stream?: boolean;

/*
* The maximum number of tokens the model will generate in a single response.
*/
max_tokens?: number;

/**
* The model settings.
* The number of layers to load onto the GPU for acceleration.
*/
settings: ModelSettingParams;
ngl?: number;

/**
* The model runtime parameters.
* The number of parallel operations. Only set when enable continuous batching.
*/
parameters: ModelRuntimeParams;
n_parallel?: number;

/**
* Metadata of the model.
* Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)
*/
metadata: ModelMetadata;
cpu_threads?: number;

/**
* The model engine.
*/
engine: string;
engine?: string;
}

export interface ModelMetadata {
Expand Down
4 changes: 2 additions & 2 deletions cortex-js/src/domain/models/thread.interface.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { AssistantTool } from './assistant.interface';
import { ModelInfo } from './model.interface';
import { Model } from './model.interface';

export interface Thread {
/** Unique identifier for the thread, generated by default using the ULID method. **/
Expand Down Expand Up @@ -40,7 +40,7 @@ export interface ThreadMetadata {
export interface ThreadAssistantInfo {
assistant_id: string;
assistant_name: string;
model: ModelInfo;
model: Model;
instructions?: string;
tools?: AssistantTool[];
}
4 changes: 4 additions & 0 deletions cortex-js/src/domain/repositories/model.interface.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import { Model } from '../models/model.interface';
import { Repository } from './repository.interface';

export abstract class ModelRepository extends Repository<Model> {}
11 changes: 11 additions & 0 deletions cortex-js/src/file-manager/file-manager.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export class FileManagerService {
private configFile = '.cortexrc';
private cortexDirectoryName = 'cortex';
private modelFolderName = 'models';
private extensionFoldername = 'extensions';
private cortexCppFolderName = 'cortex-cpp';

async getConfig(): Promise<Config> {
Expand Down Expand Up @@ -75,4 +76,14 @@ export class FileManagerService {
const config = await this.getConfig();
return config.dataFolderPath;
}

async getModelsPath(): Promise<string> {
const dataFolderPath = await this.getDataFolderPath();
return join(dataFolderPath, this.modelFolderName);
}

async getExtensionsPath(): Promise<string> {
const dataFolderPath = await this.getDataFolderPath();
return join(dataFolderPath, this.extensionFoldername);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,12 @@ export class ModelPullCommand extends CommandRunner {

const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
let ggufUrl: string | undefined = undefined;
let fileSize = 0;
for await (const fileInfo of listFiles({
repo: repo,
revision: revision,
})) {
if (fileInfo.path.endsWith('.gguf')) {
ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
fileSize = fileInfo.size;
break;
}
}
Expand All @@ -121,7 +119,6 @@ export class ModelPullCommand extends CommandRunner {
await this.modelsCliUsecases.pullModelWithExactUrl(
`${sanitizedRepoName}/${revision}`,
ggufUrl,
fileSize,
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export class ModelStartCommand extends CommandRunner {
message: 'Select a model to start:',
choices: models.map((e) => ({
name: e.name,
value: e.id,
value: e.model,
})),
});
return model;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import { CommandRunner, SubCommand, Option } from 'nest-commander';
import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
import { exit } from 'node:process';
import { ModelParameterParser } from '../utils/model-parameter.parser';
import {
ModelRuntimeParams,
ModelSettingParams,
} from '@/domain/models/model.interface';
import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';

type UpdateOptions = {
model?: string;
Expand All @@ -31,42 +27,13 @@ export class ModelUpdateCommand extends CommandRunner {
exit(0);
}

const parser = new ModelParameterParser();
const settingParams: ModelSettingParams = {};
const runtimeParams: ModelRuntimeParams = {};
const toUpdate: UpdateModelDto = {};

options.forEach((option) => {
const [key, stringValue] = option.split('=');
if (parser.isModelSettingParam(key)) {
const value = parser.parse(key, stringValue);
// @ts-expect-error did the check so it's safe
settingParams[key] = value;
} else if (parser.isModelRuntimeParam(key)) {
const value = parser.parse(key, stringValue);
// @ts-expect-error did the check so it's safe
runtimeParams[key] = value;
}
Object.assign(toUpdate, { key, stringValue });
});

if (Object.keys(settingParams).length > 0) {
const updatedSettingParams =
await this.modelsCliUsecases.updateModelSettingParams(
modelId,
settingParams,
);
console.log(
'Updated setting params! New setting params:',
updatedSettingParams,
);
}

if (Object.keys(runtimeParams).length > 0) {
await this.modelsCliUsecases.updateModelRuntimeParams(
modelId,
runtimeParams,
);
console.log('Updated runtime params! New runtime params:', runtimeParams);
}
this.modelsCliUsecases.updateModel(modelId, toUpdate);
}

@Option({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ export class RunCommand extends CommandRunner {
message: 'Select a model to start:',
choices: models.map((e) => ({
name: e.name,
value: e.id,
value: e.model,
})),
});
return model;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import { Thread } from '@/domain/models/thread.interface';
import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto';
import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases';
import { CreateThreadAssistantDto } from '@/infrastructure/dtos/threads/create-thread-assistant.dto';
import { CreateThreadModelInfoDto } from '@/infrastructure/dtos/threads/create-thread-model-info.dto';
import { ModelsUsecases } from '@/usecases/models/models.usecases';
import stream from 'stream';
import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message.dto';
Expand Down Expand Up @@ -194,16 +193,10 @@ export class ChatCliUsecases {
const assistant = await this.assistantUsecases.findOne('jan');
if (!assistant) throw new Error('No assistant available');

const createThreadModel: CreateThreadModelInfoDto = {
id: modelId,
settings: model.settings,
parameters: model.parameters,
};

const assistantDto: CreateThreadAssistantDto = {
assistant_id: assistant.id,
assistant_name: assistant.name,
model: createThreadModel,
model: model,
};

const createThreadDto: CreateThreadDto = {
Expand Down
Loading

0 comments on commit 494a155

Please sign in to comment.