From 8dc1def4b49b1e83d7c722208ef09b0030cbdd4c Mon Sep 17 00:00:00 2001 From: irfanpena Date: Tue, 11 Jun 2024 14:31:59 +0700 Subject: [PATCH 1/4] api: Update the API description --- .../infrastructure/controllers/embeddings.controller.ts | 4 ++-- .../infrastructure/dtos/chat/embeddings-response.dto.ts | 8 ++++---- .../dtos/embeddings/embeddings-request.dto.ts | 8 ++++---- cortex-js/src/main.ts | 4 ++++ 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/cortex-js/src/infrastructure/controllers/embeddings.controller.ts b/cortex-js/src/infrastructure/controllers/embeddings.controller.ts index 286b791a9..ae3c6a39a 100644 --- a/cortex-js/src/infrastructure/controllers/embeddings.controller.ts +++ b/cortex-js/src/infrastructure/controllers/embeddings.controller.ts @@ -10,8 +10,8 @@ export class EmbeddingsController { constructor(private readonly chatService: ChatUsecases) {} @ApiOperation({ - summary: 'Embedding vector for text', - description: 'Creates an embedding vector representing the input text..', + summary: 'Create embedding vector', + description: 'Creates an embedding vector representing the input text.', }) @HttpCode(200) @ApiResponse({ diff --git a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts index f73a176a1..3d7cf65b3 100644 --- a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts +++ b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts @@ -3,25 +3,25 @@ import { UsageDto } from './usage.dto'; export class EmbeddingsResponseDto { @ApiProperty({ - description: 'Result object type.', + description: 'Type of the result object.', type: String, }) object: string; @ApiProperty({ - description: 'ID of the model used for embeddings', + description: 'Identifier of the model utilized for generating embeddings.', type: String, }) model: string; @ApiProperty({ - description: 'The embedding vector, which is a list of floats. ', + description: 'The embedding vector represented as an array of floating-point numbers. ', type: [Number], }) embedding: [number]; @ApiProperty({ - description: 'Returns prompt_tokens and total_tokens usage ', + description: 'Details of token usage, including prompt_tokens and total_tokens.', type: UsageDto, }) usage: UsageDto; diff --git a/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts b/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts index 0be76e23f..2fe75d961 100644 --- a/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts +++ b/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts @@ -3,21 +3,21 @@ import { ApiProperty } from '@nestjs/swagger'; export class CreateEmbeddingsDto { @ApiProperty({ - description: 'Embedding model', + description: 'The name of the embedding model to be used.', type: String, }) model: string; @ApiProperty({ description: - 'Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays.', + 'The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.', type: [String], }) input: string | string[]; @ApiProperty({ description: - 'Encoding format for the embeddings. Supported formats are float and int.', + 'Specifies the format for the embeddings. Supported formats include `float` and `int`. This field is optional.', type: String, }) @Optional() @@ -25,7 +25,7 @@ export class CreateEmbeddingsDto { @ApiProperty({ description: - 'The number of dimensions the resulting output embeddings should have. Only supported in some models.', + 'Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional.', type: Number, }) @Optional() diff --git a/cortex-js/src/main.ts b/cortex-js/src/main.ts index fa7564669..75261665b 100644 --- a/cortex-js/src/main.ts +++ b/cortex-js/src/main.ts @@ -53,6 +53,10 @@ async function bootstrap() { 'Threads', 'These endpoints handle the creation, retrieval, updating, and deletion of conversation threads.', ) + .addTag( + 'Embeddings', + 'Endpoint for creating and retrieving embedding vectors from text inputs using specified models.', + ) .addServer('http://localhost:1337') .addServer('http://localhost:1337/v1') .build(); From 86162f038e66431935da24b37b3c48064a43430a Mon Sep 17 00:00:00 2001 From: irfanpena Date: Tue, 11 Jun 2024 14:39:22 +0700 Subject: [PATCH 2/4] docs: Update the dependencies --- README.md | 2 +- cortex-js/README.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 66a0f072b..aa2e3804c 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Cortex currently supports two inference engines: Before installation, ensure that you have installed the following: -- **Node.js**: Required for running the installation. +- **Node.js**: version 18 and above is required to run the installation. - **NPM**: Needed to manage packages. - **CPU Instruction Sets**: Available for download from the [Cortex GitHub Releases](https://github.com/janhq/cortex/releases) page. diff --git a/cortex-js/README.md b/cortex-js/README.md index 5730c98f7..6dac72720 100644 --- a/cortex-js/README.md +++ b/cortex-js/README.md @@ -32,8 +32,7 @@ Cortex ### **Dependencies** Before installation, ensure that you have installed the following: - -- **Node.js**: Required for running the installation. +- **Node.js**: version 18 and above is required to run the installation. - **NPM**: Needed to manage packages. - **CPU Instruction Sets**: Available for download from the [Cortex GitHub Releases](https://github.com/janhq/cortex/releases) page. From 0cbbf08f86529f9e505182011164df64e9fe0983 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 12 Jun 2024 15:03:31 +0700 Subject: [PATCH 3/4] feat: cortex benchmark command (#684) --- cortex-js/package.json | 2 + cortex-js/src/command.module.ts | 2 + .../src/file-manager/file-manager.service.ts | 11 + .../commanders/benchmark.command.ts | 18 ++ .../infrastructure/commanders/chat.command.ts | 3 +- .../commanders/cortex-command.commander.ts | 2 + .../commanders/embeddings.command.ts | 3 +- .../types/benchmark-config.interface.ts | 28 ++ .../commanders/types/model-stat.interface.ts | 8 + .../usecases/benchmark.cli.usecases.ts | 250 ++++++++++++++++++ .../usecases/cli.usecases.module.ts | 10 +- .../commanders/usecases/ps.cli.usecases.ts | 9 +- .../src/infrastructure/constants/benchmark.ts | 37 +++ 13 files changed, 372 insertions(+), 11 deletions(-) create mode 100644 cortex-js/src/infrastructure/commanders/benchmark.command.ts create mode 100644 cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts create mode 100644 cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts create mode 100644 cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts create mode 100644 cortex-js/src/infrastructure/constants/benchmark.ts diff --git a/cortex-js/package.json b/cortex-js/package.json index 1e4b08149..62231be75 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -48,11 +48,13 @@ "decompress": "^4.2.1", "js-yaml": "^4.1.0", "nest-commander": "^3.13.0", + "openai": "^4.50.0", "readline": "^1.3.0", "reflect-metadata": "^0.2.0", "rxjs": "^7.8.1", "sqlite": "^5.1.1", "sqlite3": "^5.1.7", + "systeminformation": "^5.22.10", "typeorm": "^0.3.20", "ulid": "^2.3.0", "update-notifier": "^5.0.0", diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index f25d65cea..76f9a00a1 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -28,6 +28,7 @@ import { PSCommand } from './infrastructure/commanders/ps.command'; import { KillCommand } from './infrastructure/commanders/kill.command'; import { PresetCommand } from './infrastructure/commanders/presets.command'; import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command'; +import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command'; @Module({ imports: [ @@ -56,6 +57,7 @@ import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command KillCommand, PresetCommand, EmbeddingCommand, + BenchmarkCommand, // Questions InitRunModeQuestions, diff --git a/cortex-js/src/file-manager/file-manager.service.ts b/cortex-js/src/file-manager/file-manager.service.ts index 78b29270c..498509bb6 100644 --- a/cortex-js/src/file-manager/file-manager.service.ts +++ b/cortex-js/src/file-manager/file-manager.service.ts @@ -12,6 +12,7 @@ export class FileManagerService { private modelFolderName = 'models'; private presetFolderName = 'presets'; private extensionFoldername = 'extensions'; + private benchmarkFoldername = 'benchmark'; private cortexCppFolderName = 'cortex-cpp'; /** @@ -116,4 +117,14 @@ export class FileManagerService { const dataFolderPath = await this.getDataFolderPath(); return join(dataFolderPath, this.extensionFoldername); } + + /** + * Get the benchmark folder path + * Usually it is located at the home directory > cortex > extensions + * @returns the path to the extensions folder + */ + async getBenchmarkPath(): Promise { + const dataFolderPath = await this.getDataFolderPath(); + return join(dataFolderPath, this.benchmarkFoldername); + } } diff --git a/cortex-js/src/infrastructure/commanders/benchmark.command.ts b/cortex-js/src/infrastructure/commanders/benchmark.command.ts new file mode 100644 index 000000000..e3e6a69fd --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/benchmark.command.ts @@ -0,0 +1,18 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases'; + +@SubCommand({ + name: 'benchmark', + subCommands: [], + description: + 'Benchmark and analyze the performance of a specific AI model using a variety of system resources', +}) +export class BenchmarkCommand extends CommandRunner { + constructor(private readonly benchmarkUsecases: BenchmarkCliUsecases) { + super(); + } + + async run(): Promise { + return this.benchmarkUsecases.benchmark(); + } +} diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts index d08d12037..718541d0d 100644 --- a/cortex-js/src/infrastructure/commanders/chat.command.ts +++ b/cortex-js/src/infrastructure/commanders/chat.command.ts @@ -6,8 +6,9 @@ import { } from 'nest-commander'; import { ChatCliUsecases } from './usecases/chat.cli.usecases'; import { exit } from 'node:process'; -import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases'; +import { PSCliUsecases } from './usecases/ps.cli.usecases'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { ModelStat } from './types/model-stat.interface'; type ChatOptions = { threadId?: string; diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts index 88fbe7a70..9bb9c33fd 100644 --- a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts +++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts @@ -10,6 +10,7 @@ import { KillCommand } from './kill.command'; import pkg from '@/../package.json'; import { PresetCommand } from './presets.command'; import { EmbeddingCommand } from './embeddings.command'; +import { BenchmarkCommand } from './benchmark.command'; interface CortexCommandOptions { version: boolean; @@ -26,6 +27,7 @@ interface CortexCommandOptions { KillCommand, PresetCommand, EmbeddingCommand, + BenchmarkCommand, ], description: 'Cortex CLI', }) diff --git a/cortex-js/src/infrastructure/commanders/embeddings.command.ts b/cortex-js/src/infrastructure/commanders/embeddings.command.ts index 45b23cc93..872715762 100644 --- a/cortex-js/src/infrastructure/commanders/embeddings.command.ts +++ b/cortex-js/src/infrastructure/commanders/embeddings.command.ts @@ -5,9 +5,10 @@ import { SubCommand, } from 'nest-commander'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases'; +import { PSCliUsecases } from './usecases/ps.cli.usecases'; import { ChatCliUsecases } from './usecases/chat.cli.usecases'; import { inspect } from 'util'; +import { ModelStat } from './types/model-stat.interface'; interface EmbeddingCommandOptions { encoding_format?: string; diff --git a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts new file mode 100644 index 000000000..aa49c313e --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts @@ -0,0 +1,28 @@ +import { ChatCompletionMessageParam } from 'openai/resources'; + +export interface BenchmarkConfig { + api: { + base_url: string; + api_key: string; + parameters: { + messages: ChatCompletionMessageParam[]; + model: string; + stream?: boolean; + max_tokens?: number; + stop?: string[]; + frequency_penalty?: number; + presence_penalty?: number; + temperature?: number; + top_p?: number; + }; + }; + prompts?: { + min: number; + max: number; + samples: number; + }; + output: string; + concurrency: number; + num_rounds: number; + hardware: string[]; +} diff --git a/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts new file mode 100644 index 000000000..336917b4f --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts @@ -0,0 +1,8 @@ +export interface ModelStat { + modelId: string; + engine?: string; + duration?: string; + status: string; + vram?: string; + ram?: string; +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts new file mode 100644 index 000000000..7d31ca552 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts @@ -0,0 +1,250 @@ +import { Injectable } from '@nestjs/common'; +import si from 'systeminformation'; +import fs, { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import OpenAI from 'openai'; +import { Presets, SingleBar } from 'cli-progress'; +import yaml from 'js-yaml'; +import { FileManagerService } from '@/file-manager/file-manager.service'; +import { join } from 'path'; +import { ModelsCliUsecases } from './models.cli.usecases'; +import { spawn } from 'child_process'; +import { BenchmarkConfig } from '../types/benchmark-config.interface'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { inspect } from 'util'; +import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark'; + +@Injectable() +export class BenchmarkCliUsecases { + constructor( + private readonly modelsCliUsecases: ModelsCliUsecases, + private readonly cortexUsecases: CortexUsecases, + private readonly fileService: FileManagerService, + ) {} + + config: BenchmarkConfig; + openai?: OpenAI; + /** + * Benchmark and analyze the performance of a specific AI model using a variety of system resources + */ + async benchmark() { + return this.getBenchmarkConfig().then((config) => { + this.config = config; + + // TODO: Using OpenAI client or Cortex client to benchmark? + this.openai = new OpenAI({ + apiKey: this.config.api.api_key, + baseURL: this.config.api.base_url, + timeout: 20 * 1000, + }); + + spawn('cortex', ['serve'], { + detached: false, + }); + + return this.cortexUsecases + .startCortex() + .then(() => + this.modelsCliUsecases.startModel(this.config.api.parameters.model), + ) + .then(() => this.runBenchmarks()) + .then(() => process.exit(0)); + }); + } + + /** + * Get the benchmark configuration + * @returns the benchmark configuration + */ + private async getBenchmarkConfig() { + const benchmarkFolder = await this.fileService.getBenchmarkPath(); + const configurationPath = join(benchmarkFolder, 'config.yaml'); + if (existsSync(configurationPath)) { + return yaml.load( + readFileSync(configurationPath, 'utf8'), + ) as BenchmarkConfig; + } else { + const config = yaml.dump(defaultBenchmarkConfiguration); + if (!existsSync(benchmarkFolder)) { + mkdirSync(benchmarkFolder, { + recursive: true, + }); + } + await writeFileSync(configurationPath, config, 'utf8'); + return defaultBenchmarkConfiguration; + } + } + + /** + * Get the system resources for benchmarking + * using the systeminformation library + * @returns the system resources + */ + private async getSystemResources() { + return { + cpu: await si.currentLoad(), + mem: await si.mem(), + gpu: (await si.graphics()).controllers, + }; + } + + /** + * Get the resource change between two data points + * @param startData the start data point + * @param endData the end data point + * @returns the resource change + */ + private async getResourceChange(startData: any, endData: any) { + return { + cpu: + startData.cpu && endData.cpu + ? ((endData.cpu.currentload - startData.cpu.currentload) / + startData.cpu.currentload) * + 100 + : null, + mem: + startData.mem && endData.mem + ? ((endData.mem.used - startData.mem.used) / startData.mem.total) * + 100 + : null, + }; + } + + /** + * Benchmark a user using the OpenAI API + * @returns + */ + private async benchmarkUser() { + const startResources = await this.getSystemResources(); + const start = Date.now(); + let tokenCount = 0; + let firstTokenTime = null; + + try { + const stream = await this.openai!.chat.completions.create({ + model: this.config.api.parameters.model, + messages: this.config.api.parameters.messages, + max_tokens: this.config.api.parameters.max_tokens, + stream: true, + }); + + for await (const chunk of stream) { + if (!firstTokenTime && chunk.choices[0]?.delta?.content) { + firstTokenTime = Date.now(); + } + tokenCount += (chunk.choices[0]?.delta?.content || '').split( + /\s+/, + ).length; + } + } catch (error) { + console.error('Error during API call:', error); + return null; + } + + const latency = Date.now() - start; + const ttft = firstTokenTime ? firstTokenTime - start : null; + const endResources = await this.getSystemResources(); + const resourceChange = await this.getResourceChange( + startResources, + endResources, + ); + + return { + tokens: this.config.api.parameters.max_tokens, + token_length: tokenCount, // Dynamically calculated token count + latency, + resourceChange, + tpot: tokenCount ? latency / tokenCount : 0, + throughput: tokenCount / (latency / 1000), + ttft, + }; + } + + /** + * Calculate the percentiles of the data + * @param data the data to calculate percentiles for + * @param percentile the percentile to calculate + * @returns the percentile value + */ + private calculatePercentiles(data: number[], percentile: number) { + if (data.length === 0) return null; + const sorted = data + .filter((x: number) => x !== null) + .sort((a: number, b: number) => a - b); + const pos = (percentile / 100) * sorted.length; + if (pos < 1) return sorted[0]; + if (pos >= sorted.length) return sorted[sorted.length - 1]; + const lower = sorted[Math.floor(pos) - 1]; + const upper = sorted[Math.ceil(pos) - 1]; + return lower + (upper - lower) * (pos - Math.floor(pos)); + } + + /** + * Run the benchmarks + */ + private async runBenchmarks() { + const allResults: any[] = []; + const rounds = this.config.num_rounds || 1; + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(rounds, 0); + + for (let i = 0; i < rounds; i++) { + const roundResults = []; + const hardwareBefore = await this.getSystemResources(); + + for (let j = 0; j < this.config.concurrency; j++) { + const result = await this.benchmarkUser(); + if (result) { + roundResults.push(result); + } + } + + const hardwareAfter = await this.getSystemResources(); + const hardwareChanges = await this.getResourceChange( + hardwareBefore, + hardwareAfter, + ); + + allResults.push({ + round: i + 1, + results: roundResults, + hardwareChanges, + }); + + bar.update(i + 1); + } + + const metrics: any = { + p50: {}, + p75: {}, + p95: {}, + }; + const keys = ['latency', 'tpot', 'throughput', 'ttft']; + keys.forEach((key) => { + const data = allResults.flatMap((r) => + r.results.map((res: object) => res[key as keyof typeof res]), + ); + metrics.p50[key] = this.calculatePercentiles(data, 50); + metrics.p75[key] = this.calculatePercentiles(data, 75); + metrics.p95[key] = this.calculatePercentiles(data, 95); + }); + + const output = { + hardware: await this.getSystemResources(), + results: allResults, + metrics, + }; + bar.stop(); + + const outputFilePath = join( + await this.fileService.getBenchmarkPath(), + 'output.json', + ); + fs.writeFileSync(outputFilePath, JSON.stringify(output, null, 2)); + console.log(`Benchmark results and metrics saved to ${outputFilePath}`); + + console.log( + inspect(output, { showHidden: false, depth: null, colors: true }), + ); + } +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts index feead245a..de14a704d 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts @@ -11,6 +11,7 @@ import { AssistantsModule } from '@/usecases/assistants/assistants.module'; import { MessagesModule } from '@/usecases/messages/messages.module'; import { FileManagerModule } from '@/file-manager/file-manager.module'; import { PSCliUsecases } from './ps.cli.usecases'; +import { BenchmarkCliUsecases } from './benchmark.cli.usecases'; @Module({ imports: [ @@ -28,7 +29,14 @@ import { PSCliUsecases } from './ps.cli.usecases'; ModelsCliUsecases, ChatCliUsecases, PSCliUsecases, + BenchmarkCliUsecases, + ], + exports: [ + InitCliUsecases, + ModelsCliUsecases, + ChatCliUsecases, + PSCliUsecases, + BenchmarkCliUsecases, ], - exports: [InitCliUsecases, ModelsCliUsecases, ChatCliUsecases, PSCliUsecases], }) export class CliUsecasesModule {} diff --git a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts index 5e2e8db38..885a10c8b 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts @@ -6,15 +6,8 @@ import { } from '@/infrastructure/constants/cortex'; import { HttpService } from '@nestjs/axios'; import { firstValueFrom } from 'rxjs'; +import { ModelStat } from '../types/model-stat.interface'; -export interface ModelStat { - modelId: string; - engine?: string; - duration?: string; - status: string; - vram?: string; - ram?: string; -} interface ModelStatResponse { object: string; data: any; diff --git a/cortex-js/src/infrastructure/constants/benchmark.ts b/cortex-js/src/infrastructure/constants/benchmark.ts new file mode 100644 index 000000000..01dfc59bd --- /dev/null +++ b/cortex-js/src/infrastructure/constants/benchmark.ts @@ -0,0 +1,37 @@ +import { BenchmarkConfig } from '../commanders/types/benchmark-config.interface'; + +export const defaultBenchmarkConfiguration: BenchmarkConfig = { + api: { + base_url: 'http://127.0.0.1:1337/', + api_key: '', + parameters: { + messages: [ + { + content: 'You are a helpful assistant.', + role: 'system', + }, + { + content: 'Hello!', + role: 'user', + }, + ], + model: 'tinyllama', + stream: true, + max_tokens: 2048, + stop: [], + frequency_penalty: 0, + presence_penalty: 0, + temperature: 0.7, + top_p: 0.95, + }, + }, + prompts: { + min: 102, + max: 2048, + samples: 10, + }, + output: 'json', + hardware: ['cpu', 'gpu', 'psu', 'chassis', 'ram'], + concurrency: 1, + num_rounds: 10, +}; From 4763c1c34496cb63d9b00416c4a7c8783993ed33 Mon Sep 17 00:00:00 2001 From: Van Pham <64197333+Van-QA@users.noreply.github.com> Date: Wed, 12 Jun 2024 15:49:52 +0700 Subject: [PATCH 4/4] feat: Add testcase for Cortex CLI (#689) Co-authored-by: Louis Co-authored-by: Van-QA --- cortex-js/.eslintrc.js | 6 + .../commanders/serve.command.ts | 24 ++-- .../commanders/test/helpers.command.spec.ts | 107 ++++++++++++++++++ .../commanders/test/log.service.ts | 8 ++ .../test/model-list.command.spec.ts | 67 ----------- .../commanders/test/models.command.spec.ts | 100 ++++++++++++++++ 6 files changed, 236 insertions(+), 76 deletions(-) create mode 100644 cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts create mode 100644 cortex-js/src/infrastructure/commanders/test/log.service.ts delete mode 100644 cortex-js/src/infrastructure/commanders/test/model-list.command.spec.ts create mode 100644 cortex-js/src/infrastructure/commanders/test/models.command.spec.ts diff --git a/cortex-js/.eslintrc.js b/cortex-js/.eslintrc.js index 259de13c7..448a2d910 100644 --- a/cortex-js/.eslintrc.js +++ b/cortex-js/.eslintrc.js @@ -21,5 +21,11 @@ module.exports = { '@typescript-eslint/explicit-function-return-type': 'off', '@typescript-eslint/explicit-module-boundary-types': 'off', '@typescript-eslint/no-explicit-any': 'off', + "prettier/prettier": [ + "error", + { + "endOfLine": "auto" + }, + ], }, }; diff --git a/cortex-js/src/infrastructure/commanders/serve.command.ts b/cortex-js/src/infrastructure/commanders/serve.command.ts index f839b4a0b..99575bcb2 100644 --- a/cortex-js/src/infrastructure/commanders/serve.command.ts +++ b/cortex-js/src/infrastructure/commanders/serve.command.ts @@ -20,16 +20,22 @@ export class ServeCommand extends CommandRunner { const host = options?.host || defaultCortexJsHost; const port = options?.port || defaultCortexJsPort; - spawn('node', [join(__dirname, '../../main.js')], { - env: { - ...process.env, - CORTEX_JS_HOST: host, - CORTEX_JS_PORT: port.toString(), - NODE_ENV: 'production', + spawn( + 'node', + process.env.TEST + ? [join(__dirname, '../../../dist/src/main.js')] + : [join(__dirname, '../../main.js')], + { + env: { + ...process.env, + CORTEX_JS_HOST: host, + CORTEX_JS_PORT: port.toString(), + NODE_ENV: 'production', + }, + stdio: 'inherit', + detached: false, }, - stdio: 'inherit', - detached: false, - }); + ); } @Option({ diff --git a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts new file mode 100644 index 000000000..05d9562d6 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts @@ -0,0 +1,107 @@ +import { TestingModule } from '@nestjs/testing'; +import { spy, Stub, stubMethod } from 'hanbi'; +import { CommandTestFactory } from 'nest-commander-testing'; +import { CommandModule } from '@/command.module'; +import { LogService } from '@/infrastructure/commanders/test/log.service'; +import axios from 'axios'; + +let commandInstance: TestingModule, + exitSpy: Stub, + stdoutSpy: Stub, + stderrSpy: Stub; +export const timeout = 500000; + +beforeEach( + () => + new Promise(async (res) => { + stubMethod(process.stderr, 'write'); + exitSpy = stubMethod(process, 'exit'); + stdoutSpy = stubMethod(process.stdout, 'write'); + stderrSpy = stubMethod(process.stderr, 'write'); + commandInstance = await CommandTestFactory.createTestingCommand({ + imports: [CommandModule], + }) + .overrideProvider(LogService) + .useValue({ log: spy().handler }) + .compile(); + res(); + stdoutSpy.reset(); + stderrSpy.reset(); + }), +); + +describe('Helper commands', () => { + test( + 'Init with hardware auto detection', + async () => { + await CommandTestFactory.run(commandInstance, ['init', '-s']); + + // Wait for a brief period to allow the command to execute + await new Promise((resolve) => setTimeout(resolve, 1000)); + + expect(stdoutSpy.firstCall?.args.length).toBeGreaterThan(0); + }, + timeout, + ); + + test('Chat with option -m', async () => { + const logMock = stubMethod(console, 'log'); + + await CommandTestFactory.run(commandInstance, [ + 'chat', + // '-m', + // 'hello', + // '>output.txt', + ]); + expect(logMock.firstCall?.args[0]).toBe("Inorder to exit, type 'exit()'."); + // expect(exitSpy.callCount).toBe(1); + // expect(exitSpy.firstCall?.args[0]).toBe(1); + }); + + test('Show / kill running models', async () => { + const tableMock = stubMethod(console, 'table'); + + const logMock = stubMethod(console, 'log'); + await CommandTestFactory.run(commandInstance, ['kill']); + await CommandTestFactory.run(commandInstance, ['ps']); + + expect(logMock.firstCall?.args[0]).toEqual({ + message: 'Cortex stopped successfully', + status: 'success', + }); + expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array); + expect(tableMock.firstCall?.args[0].length).toEqual(0); + }); + + test('Help command return guideline to users', async () => { + await CommandTestFactory.run(commandInstance, ['-h']); + expect(stdoutSpy.firstCall?.args).toBeInstanceOf(Array); + expect(stdoutSpy.firstCall?.args.length).toBe(1); + expect(stdoutSpy.firstCall?.args[0]).toContain('display help for command'); + + expect(exitSpy.callCount).toBeGreaterThan(1); + expect(exitSpy.firstCall?.args[0]).toBe(0); + }); + + test('Should handle missing command', async () => { + await CommandTestFactory.run(commandInstance, ['--unknown']); + expect(stderrSpy.firstCall?.args[0]).toContain('error: unknown option'); + expect(stderrSpy.firstCall?.args[0]).toContain('--unknown'); + expect(exitSpy.callCount).toBe(1); + expect(exitSpy.firstCall?.args[0]).toBe(1); + }); + + test('Local API server via localhost:1337/api', async () => { + await CommandTestFactory.run(commandInstance, ['serve']); + + // Add a delay of 1000 milliseconds (1 second) + return new Promise(async (resolve) => { + setTimeout(async () => { + // Send a request to the API server to check if it's running + const response = await axios.get('http://localhost:1337/api'); + expect(response.status).toBe(200); + resolve(); + }, 1000); + }); + }); +}); diff --git a/cortex-js/src/infrastructure/commanders/test/log.service.ts b/cortex-js/src/infrastructure/commanders/test/log.service.ts new file mode 100644 index 000000000..1151f5fb5 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/test/log.service.ts @@ -0,0 +1,8 @@ +import { Injectable } from '@nestjs/common'; + +@Injectable() +export class LogService { + log(...args: any[]): void { + console.log(...args); + } +} diff --git a/cortex-js/src/infrastructure/commanders/test/model-list.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/model-list.command.spec.ts deleted file mode 100644 index b90b26006..000000000 --- a/cortex-js/src/infrastructure/commanders/test/model-list.command.spec.ts +++ /dev/null @@ -1,67 +0,0 @@ -import { TestingModule } from '@nestjs/testing'; -import { stubMethod } from 'hanbi'; -import { CommandTestFactory } from 'nest-commander-testing'; -import { CommandModule } from '@/command.module'; -import { FileManagerService } from '@/file-manager/file-manager.service'; -import { join } from 'path'; -import { mkdirSync, rmSync, writeFileSync } from 'fs'; - -let commandInstance: TestingModule; - -beforeEach( - () => - new Promise(async (res) => { - commandInstance = await CommandTestFactory.createTestingCommand({ - imports: [CommandModule], - }) - // .overrideProvider(LogService) - // .useValue({}) - .compile(); - const fileService = - await commandInstance.resolve(FileManagerService); - - // Attempt to create test folder - await fileService.writeConfigFile({ - dataFolderPath: join(__dirname, 'test_data'), - }); - res(); - }), -); - -afterEach( - () => - new Promise(async (res) => { - // Attempt to clean test folder - rmSync(join(__dirname, 'test_data'), { - recursive: true, - force: true, - }); - res(); - }), -); - -describe('models list returns array of models', () => { - test('empty model list', async () => { - const logMock = stubMethod(console, 'table'); - - await CommandTestFactory.run(commandInstance, ['models', 'list']); - expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array); - expect(logMock.firstCall?.args[0].length).toBe(0); - }); - - test('many models in the list', async () => { - const logMock = stubMethod(console, 'table'); - - mkdirSync(join(__dirname, 'test_data', 'models'), { recursive: true }); - writeFileSync( - join(__dirname, 'test_data', 'models', 'test.yaml'), - 'model: test', - 'utf8', - ); - - await CommandTestFactory.run(commandInstance, ['models', 'list']); - expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array); - expect(logMock.firstCall?.args[0].length).toBe(1); - expect(logMock.firstCall?.args[0][0].id).toBe('test'); - }); -}); diff --git a/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts new file mode 100644 index 000000000..7d512d2be --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts @@ -0,0 +1,100 @@ +import { TestingModule } from '@nestjs/testing'; +import { stubMethod } from 'hanbi'; +import { CommandTestFactory } from 'nest-commander-testing'; +import { CommandModule } from '@/command.module'; +import { join } from 'path'; +import { rmSync } from 'fs'; +import { timeout } from '@/infrastructure/commanders/test/helpers.command.spec'; + +let commandInstance: TestingModule; + +beforeEach( + () => + new Promise(async (res) => { + commandInstance = await CommandTestFactory.createTestingCommand({ + imports: [CommandModule], + }) + // .overrideProvider(LogService) + // .useValue({}) + .compile(); + res(); + }), +); + +afterEach( + () => + new Promise(async (res) => { + // Attempt to clean test folder + rmSync(join(__dirname, 'test_data'), { + recursive: true, + force: true, + }); + res(); + }), +); + +export const modelName = 'tinyllama'; +describe('Models list returns array of models', () => { + test('Init with CPU', async () => { + const logMock = stubMethod(console, 'log'); + + logMock.passThrough(); + CommandTestFactory.setAnswers(['CPU', '', 'AVX2']); + + await CommandTestFactory.run(commandInstance, ['init']); + expect(logMock.firstCall?.args[0]).toBe( + 'Downloading engine file windows-amd64-avx2.tar.gz', + ); + }, 50000); + + test('Empty model list', async () => { + const logMock = stubMethod(console, 'table'); + + await CommandTestFactory.run(commandInstance, ['models', 'list']); + expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array); + expect(logMock.firstCall?.args[0].length).toBe(0); + }); + + test( + 'Run model and check with cortex ps', + async () => { + const logMock = stubMethod(console, 'log'); + + await CommandTestFactory.run(commandInstance, ['run', modelName]); + expect(logMock.lastCall?.args[0]).toBe("Inorder to exit, type 'exit()'."); + + const tableMock = stubMethod(console, 'table'); + await CommandTestFactory.run(commandInstance, ['ps']); + expect(tableMock.firstCall?.args[0].length).toBeGreaterThan(0); + }, + timeout, + ); + + test('Get model', async () => { + const logMock = stubMethod(console, 'log'); + + await CommandTestFactory.run(commandInstance, ['models', 'get', modelName]); + expect(logMock.firstCall?.args[0]).toBeInstanceOf(Object); + expect(logMock.firstCall?.args[0].files.length).toBe(1); + }); + + test('Many models in the list', async () => { + const logMock = stubMethod(console, 'table'); + await CommandTestFactory.run(commandInstance, ['models', 'list']); + expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array); + expect(logMock.firstCall?.args[0].length).toBe(1); + expect(logMock.firstCall?.args[0][0].id).toBe(modelName); + }); + + test( + 'Model already exists', + async () => { + const stdoutSpy = stubMethod(process.stdout, 'write'); + const exitSpy = stubMethod(process, 'exit'); + await CommandTestFactory.run(commandInstance, ['pull', modelName]); + expect(stdoutSpy.firstCall?.args[0]).toContain('Model already exists'); + expect(exitSpy.firstCall?.args[0]).toBe(1); + }, + timeout, + ); +});