Skip to content

Commit

Permalink
chore: update models settings (#673)
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-jan authored Jun 11, 2024
1 parent f380642 commit 9b2c8bb
Show file tree
Hide file tree
Showing 10 changed files with 317 additions and 55 deletions.
78 changes: 71 additions & 7 deletions cortex-js/src/domain/models/model.interface.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
export interface ModelArtifact {
mmproj?: string;
llama_model_path?: string;
}

/**
* Model type defines the shape of a model object.
* @stored
Expand Down Expand Up @@ -90,6 +85,56 @@ export interface Model {
*/
cpu_threads?: number;

/**
* The prompt to use for internal configuration
*/
pre_prompt?: string;

/**
* The batch size for prompt eval step
*/
n_batch?: number;

/**
* To enable prompt caching or not
*/
caching_enabled?: boolean;

/**
* Group attention factor in self-extend
*/
grp_attn_n?: number;

/**
* Group attention width in self-extend
*/
grp_attn_w?: number;

/**
* Prevent system swapping of the model to disk in macOS
*/
mlock?: boolean;

/**
* You can constrain the sampling using GBNF grammars by providing path to a grammar file
*/
grammar_file?: string;

/**
* To enable Flash Attention, default is true
*/
flash_attn?: boolean;

/**
* KV cache type: f16, q8_0, q4_0, default is f16
*/
cache_type?: string;

/**
* To enable mmap, default is true
*/
use_mmap?: boolean;

/**
* The model engine.
*/
Expand All @@ -112,10 +157,20 @@ export interface ModelSettingParams {
llama_model_path?: string;
mmproj?: string;
cont_batching?: boolean;
vision_model?: boolean;
text_model?: boolean;
engine?: string;
stop?: string[];
pre_prompt?: string;
n_batch?: number;
caching_enabled?: boolean;
grp_attn_n?: number;
grp_attn_w?: number;
mlock?: boolean;
grammar_file?: string;
model_type?: string;
model_alias?: string;
flash_attn?: boolean;
cache_type?: string;
use_mmap?: boolean;
}

/**
Expand All @@ -133,3 +188,12 @@ export interface ModelRuntimeParams {
presence_penalty?: number;
engine?: string;
}

/**
* The model artifact object.
* In-case the model files is not a raw file list
*/
export interface ModelArtifact {
mmproj?: string;
llama_model_path?: string;
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ import {
OPEN_CHAT_3_5_JINJA,
ZEPHYR,
ZEPHYR_JINJA,
} from '../../constants/prompt-constants';
} from './../../constants/prompt-constants';
import {
HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL,
HUGGING_FACE_REPO_MODEL_API_URL,
HUGGING_FACE_REPO_URL,
HUGGING_FACE_TREE_REF_URL,
} from '../../constants/huggingface';
import { ModelTokenizer } from '../types/model-tokenizer.interface';
import { HttpService } from '@nestjs/axios';
import { firstValueFrom } from 'rxjs';
Expand All @@ -29,12 +35,6 @@ import { join, basename } from 'path';
import { load } from 'js-yaml';
import { existsSync, readdirSync, readFileSync } from 'fs';
import { isLocalModel, normalizeModelId } from '../utils/normalize-model-id';
import {
HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL,
HUGGING_FACE_REPO_MODEL_API_URL,
HUGGING_FACE_REPO_URL,
HUGGING_FACE_TREE_REF_URL,
} from '../../constants/huggingface';

@Injectable()
export class ModelsCliUsecases {
Expand Down
60 changes: 33 additions & 27 deletions cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import { Injectable } from '@nestjs/common';
import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex';
import { HttpStatus, Injectable } from '@nestjs/common';
import {
CORTEX_CPP_MODELS_URL,
defaultCortexCppHost,
defaultCortexCppPort,
} from '@/infrastructure/constants/cortex';
import { HttpService } from '@nestjs/axios';
import { firstValueFrom } from 'rxjs';

export interface ModelStat {
modelId: string;
Expand All @@ -15,6 +21,7 @@ interface ModelStatResponse {
}
@Injectable()
export class PSCliUsecases {
constructor(private readonly httpService: HttpService) {}
/**
* Get models running in the Cortex C++ server
* @param host Cortex host address
Expand All @@ -25,32 +32,31 @@ export class PSCliUsecases {
port: number = defaultCortexCppPort,
): Promise<ModelStat[]> {
return new Promise<ModelStat[]>((resolve, reject) =>
fetch(`http://${host}:${port}/inferences/server/models`)
firstValueFrom(this.httpService.get(CORTEX_CPP_MODELS_URL(host, port)))
.then((res) => {
if (res.ok) {
res
.json()
.then(({ data }: ModelStatResponse) => {
if (data && Array.isArray(data) && data.length > 0) {
resolve(
data.map((e) => {
const startTime = e.start_time ?? new Date();
const currentTime = new Date();
const duration =
currentTime.getTime() - new Date(startTime).getTime();
return {
modelId: e.id,
engine: e.engine ?? 'cortex.llamacpp',
status: 'running',
duration: this.formatDuration(duration),
ram: e.ram ?? '-',
vram: e.vram ?? '-',
};
}),
);
} else reject();
})
.catch(reject);
const data = res.data as ModelStatResponse;
if (
res.status === HttpStatus.OK &&
data &&
Array.isArray(data.data) &&
data.data.length > 0
) {
resolve(
data.data.map((e) => {
const startTime = e.start_time ?? new Date();
const currentTime = new Date();
const duration =
currentTime.getTime() - new Date(startTime).getTime();
return {
modelId: e.id,
engine: e.engine ?? 'cortex.llamacpp',
status: 'running',
duration: this.formatDuration(duration),
ram: e.ram ?? '-',
vram: e.vram ?? '-',
};
}),
);
} else reject();
})
.catch(reject),
Expand Down
5 changes: 5 additions & 0 deletions cortex-js/src/infrastructure/constants/cortex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ export const CORTEX_CPP_HEALTH_Z_URL = (
port: number = defaultCortexCppPort,
) => `http://${host}:${port}/healthz`;

export const CORTEX_CPP_MODELS_URL = (
host: string = defaultCortexCppHost,
port: number = defaultCortexCppPort,
) => `http://${host}:${port}/inferences/server/models`;

// INITIALIZATION
export const CORTEX_RELEASES_URL =
'https://api.github.com/repos/janhq/cortex/releases';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {
IsArray,
IsBoolean,
IsNumber,
IsOptional,
IsString,
ValidateNested,
} from 'class-validator';
Expand Down Expand Up @@ -29,46 +30,53 @@ export class CreateChatCompletionDto {
description:
'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.',
})
@IsOptional()
@IsBoolean()
stream: boolean;
stream?: boolean;

@ApiProperty({
description:
'Sets the upper limit on the number of tokens the model can generate in a single output.',
})
@IsOptional()
@IsNumber()
max_tokens: number;
max_tokens?: number;

@ApiProperty({
description:
'Defines specific tokens or phrases that signal the model to stop producing further output.',
})
@IsOptional()
@IsArray()
stop: string[];
stop?: string[];

@ApiProperty({
description:
'Modifies the likelihood of the model repeating the same words or phrases within a single output.',
})
@IsOptional()
@IsNumber()
frequency_penalty: number;
frequency_penalty?: number;

@ApiProperty({
description:
'Reduces the likelihood of repeating tokens, promoting novelty in the output.',
})
@IsOptional()
@IsNumber()
presence_penalty: number;
presence_penalty?: number;

@ApiProperty({
description: "Influences the randomness of the model's output.",
})
@IsOptional()
@IsNumber()
temperature: number;
temperature?: number;

@ApiProperty({
description: 'Sets probability threshold for more relevant outputs.',
})
@IsOptional()
@IsNumber()
top_p: number;
top_p?: number;
}
5 changes: 4 additions & 1 deletion cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { ApiProperty } from '@nestjs/swagger';
import { IsIP, IsNumber, IsString, Max, Min } from 'class-validator';
import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex';
import {
defaultCortexCppHost,
defaultCortexCppPort,
} from '@/infrastructure/constants/cortex';

export class StartCortexDto {
@ApiProperty({
Expand Down
Loading

0 comments on commit 9b2c8bb

Please sign in to comment.