diff --git a/cortex-js/package.json b/cortex-js/package.json index 1e4b08149..62231be75 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -48,11 +48,13 @@ "decompress": "^4.2.1", "js-yaml": "^4.1.0", "nest-commander": "^3.13.0", + "openai": "^4.50.0", "readline": "^1.3.0", "reflect-metadata": "^0.2.0", "rxjs": "^7.8.1", "sqlite": "^5.1.1", "sqlite3": "^5.1.7", + "systeminformation": "^5.22.10", "typeorm": "^0.3.20", "ulid": "^2.3.0", "update-notifier": "^5.0.0", diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index f25d65cea..76f9a00a1 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -28,6 +28,7 @@ import { PSCommand } from './infrastructure/commanders/ps.command'; import { KillCommand } from './infrastructure/commanders/kill.command'; import { PresetCommand } from './infrastructure/commanders/presets.command'; import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command'; +import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command'; @Module({ imports: [ @@ -56,6 +57,7 @@ import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command KillCommand, PresetCommand, EmbeddingCommand, + BenchmarkCommand, // Questions InitRunModeQuestions, diff --git a/cortex-js/src/file-manager/file-manager.service.ts b/cortex-js/src/file-manager/file-manager.service.ts index 78b29270c..498509bb6 100644 --- a/cortex-js/src/file-manager/file-manager.service.ts +++ b/cortex-js/src/file-manager/file-manager.service.ts @@ -12,6 +12,7 @@ export class FileManagerService { private modelFolderName = 'models'; private presetFolderName = 'presets'; private extensionFoldername = 'extensions'; + private benchmarkFoldername = 'benchmark'; private cortexCppFolderName = 'cortex-cpp'; /** @@ -116,4 +117,14 @@ export class FileManagerService { const dataFolderPath = await this.getDataFolderPath(); return join(dataFolderPath, this.extensionFoldername); } + + /** + * Get the benchmark folder path + * Usually it is located at the home directory > cortex > extensions + * @returns the path to the extensions folder + */ + async getBenchmarkPath(): Promise { + const dataFolderPath = await this.getDataFolderPath(); + return join(dataFolderPath, this.benchmarkFoldername); + } } diff --git a/cortex-js/src/infrastructure/commanders/benchmark.command.ts b/cortex-js/src/infrastructure/commanders/benchmark.command.ts new file mode 100644 index 000000000..e3e6a69fd --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/benchmark.command.ts @@ -0,0 +1,18 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases'; + +@SubCommand({ + name: 'benchmark', + subCommands: [], + description: + 'Benchmark and analyze the performance of a specific AI model using a variety of system resources', +}) +export class BenchmarkCommand extends CommandRunner { + constructor(private readonly benchmarkUsecases: BenchmarkCliUsecases) { + super(); + } + + async run(): Promise { + return this.benchmarkUsecases.benchmark(); + } +} diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts index d08d12037..718541d0d 100644 --- a/cortex-js/src/infrastructure/commanders/chat.command.ts +++ b/cortex-js/src/infrastructure/commanders/chat.command.ts @@ -6,8 +6,9 @@ import { } from 'nest-commander'; import { ChatCliUsecases } from './usecases/chat.cli.usecases'; import { exit } from 'node:process'; -import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases'; +import { PSCliUsecases } from './usecases/ps.cli.usecases'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { ModelStat } from './types/model-stat.interface'; type ChatOptions = { threadId?: string; diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts index 88fbe7a70..9bb9c33fd 100644 --- a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts +++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts @@ -10,6 +10,7 @@ import { KillCommand } from './kill.command'; import pkg from '@/../package.json'; import { PresetCommand } from './presets.command'; import { EmbeddingCommand } from './embeddings.command'; +import { BenchmarkCommand } from './benchmark.command'; interface CortexCommandOptions { version: boolean; @@ -26,6 +27,7 @@ interface CortexCommandOptions { KillCommand, PresetCommand, EmbeddingCommand, + BenchmarkCommand, ], description: 'Cortex CLI', }) diff --git a/cortex-js/src/infrastructure/commanders/embeddings.command.ts b/cortex-js/src/infrastructure/commanders/embeddings.command.ts index 45b23cc93..872715762 100644 --- a/cortex-js/src/infrastructure/commanders/embeddings.command.ts +++ b/cortex-js/src/infrastructure/commanders/embeddings.command.ts @@ -5,9 +5,10 @@ import { SubCommand, } from 'nest-commander'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases'; +import { PSCliUsecases } from './usecases/ps.cli.usecases'; import { ChatCliUsecases } from './usecases/chat.cli.usecases'; import { inspect } from 'util'; +import { ModelStat } from './types/model-stat.interface'; interface EmbeddingCommandOptions { encoding_format?: string; diff --git a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts new file mode 100644 index 000000000..aa49c313e --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts @@ -0,0 +1,28 @@ +import { ChatCompletionMessageParam } from 'openai/resources'; + +export interface BenchmarkConfig { + api: { + base_url: string; + api_key: string; + parameters: { + messages: ChatCompletionMessageParam[]; + model: string; + stream?: boolean; + max_tokens?: number; + stop?: string[]; + frequency_penalty?: number; + presence_penalty?: number; + temperature?: number; + top_p?: number; + }; + }; + prompts?: { + min: number; + max: number; + samples: number; + }; + output: string; + concurrency: number; + num_rounds: number; + hardware: string[]; +} diff --git a/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts new file mode 100644 index 000000000..336917b4f --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts @@ -0,0 +1,8 @@ +export interface ModelStat { + modelId: string; + engine?: string; + duration?: string; + status: string; + vram?: string; + ram?: string; +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts new file mode 100644 index 000000000..0ac842877 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts @@ -0,0 +1,223 @@ +import { Injectable } from '@nestjs/common'; +import si from 'systeminformation'; +import fs, { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import OpenAI from 'openai'; +import { Presets, SingleBar } from 'cli-progress'; +import yaml from 'js-yaml'; +import { FileManagerService } from '@/file-manager/file-manager.service'; +import { join } from 'path'; +import { ModelsCliUsecases } from './models.cli.usecases'; +import { spawn } from 'child_process'; +import { BenchmarkConfig } from '../types/benchmark-config.interface'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { inspect } from 'util'; +import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark'; + +@Injectable() +export class BenchmarkCliUsecases { + constructor( + private readonly modelsCliUsecases: ModelsCliUsecases, + private readonly cortexUsecases: CortexUsecases, + private readonly fileService: FileManagerService, + ) {} + + config: BenchmarkConfig; + openai?: OpenAI; + /** + * Benchmark and analyze the performance of a specific AI model using a variety of system resources + */ + async benchmark() { + return this.getBenchmarkConfig().then((config) => { + this.config = config; + + // TODO: Using OpenAI client or Cortex client to benchmark? + this.openai = new OpenAI({ + apiKey: this.config.api.api_key, + baseURL: this.config.api.base_url, + timeout: 20 * 1000, + }); + + spawn('cortex', ['serve'], { + detached: false, + }); + + return this.cortexUsecases + .startCortex() + .then(() => + this.modelsCliUsecases.startModel(this.config.api.parameters.model), + ) + .then(() => this.runBenchmarks()) + .then(() => process.exit(0)); + }); + } + + async getBenchmarkConfig() { + const benchmarkFolder = await this.fileService.getBenchmarkPath(); + const configurationPath = join(benchmarkFolder, 'config.yaml'); + console.log(configurationPath, existsSync(configurationPath)); + if (existsSync(configurationPath)) { + return yaml.load( + readFileSync(configurationPath, 'utf8'), + ) as BenchmarkConfig; + } else { + const config = yaml.dump(defaultBenchmarkConfiguration); + if (!existsSync(benchmarkFolder)) { + mkdirSync(benchmarkFolder, { + recursive: true, + }); + } + await writeFileSync(configurationPath, config, 'utf8'); + return defaultBenchmarkConfiguration; + } + } + + async getSystemResources() { + return { + cpu: await si.currentLoad(), + mem: await si.mem(), + gpu: (await si.graphics()).controllers, + }; + } + + async getResourceChange(startData: any, endData: any) { + return { + cpu: + startData.cpu && endData.cpu + ? ((endData.cpu.currentload - startData.cpu.currentload) / + startData.cpu.currentload) * + 100 + : null, + mem: + startData.mem && endData.mem + ? ((endData.mem.used - startData.mem.used) / startData.mem.total) * + 100 + : null, + }; + } + + async benchmarkUser() { + const startResources = await this.getSystemResources(); + const start = Date.now(); + let tokenCount = 0; + let firstTokenTime = null; + + try { + const stream = await this.openai!.chat.completions.create({ + model: this.config.api.parameters.model, + messages: this.config.api.parameters.messages, + max_tokens: this.config.api.parameters.max_tokens, + stream: true, + }); + + for await (const chunk of stream) { + if (!firstTokenTime && chunk.choices[0]?.delta?.content) { + firstTokenTime = Date.now(); + } + tokenCount += (chunk.choices[0]?.delta?.content || '').split( + /\s+/, + ).length; + } + } catch (error) { + console.error('Error during API call:', error); + return null; + } + + const latency = Date.now() - start; + const ttft = firstTokenTime ? firstTokenTime - start : null; + const endResources = await this.getSystemResources(); + const resourceChange = await this.getResourceChange( + startResources, + endResources, + ); + + return { + tokens: this.config.api.parameters.max_tokens, + token_length: tokenCount, // Dynamically calculated token count + latency, + resourceChange, + tpot: tokenCount ? latency / tokenCount : 0, + throughput: tokenCount / (latency / 1000), + ttft, + }; + } + + calculatePercentiles(data: number[], percentile: number) { + if (data.length === 0) return null; + const sorted = data + .filter((x: number) => x !== null) + .sort((a: number, b: number) => a - b); + const pos = (percentile / 100) * sorted.length; + if (pos < 1) return sorted[0]; + if (pos >= sorted.length) return sorted[sorted.length - 1]; + const lower = sorted[Math.floor(pos) - 1]; + const upper = sorted[Math.ceil(pos) - 1]; + return lower + (upper - lower) * (pos - Math.floor(pos)); + } + + async runBenchmarks() { + const allResults: any[] = []; + const rounds = this.config.num_rounds || 1; + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(rounds, 0); + + for (let i = 0; i < rounds; i++) { + const roundResults = []; + const hardwareBefore = await this.getSystemResources(); + + for (let j = 0; j < this.config.concurrency; j++) { + const result = await this.benchmarkUser(); + if (result) { + roundResults.push(result); + } + } + + const hardwareAfter = await this.getSystemResources(); + const hardwareChanges = await this.getResourceChange( + hardwareBefore, + hardwareAfter, + ); + + allResults.push({ + round: i + 1, + results: roundResults, + hardwareChanges, + }); + + bar.update(i + 1); + } + + const metrics: any = { + p50: {}, + p75: {}, + p95: {}, + }; + const keys = ['latency', 'tpot', 'throughput', 'ttft']; + keys.forEach((key) => { + const data = allResults.flatMap((r) => + r.results.map((res: object) => res[key as keyof typeof res]), + ); + metrics.p50[key] = this.calculatePercentiles(data, 50); + metrics.p75[key] = this.calculatePercentiles(data, 75); + metrics.p95[key] = this.calculatePercentiles(data, 95); + }); + + const output = { + hardware: await this.getSystemResources(), + results: allResults, + metrics, + }; + bar.stop(); + + const outputFilePath = join( + await this.fileService.getBenchmarkPath(), + 'output.json', + ); + fs.writeFileSync(outputFilePath, JSON.stringify(output, null, 2)); + console.log(`Benchmark results and metrics saved to ${outputFilePath}`); + + console.log( + inspect(output, { showHidden: false, depth: null, colors: true }), + ); + } +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts index feead245a..de14a704d 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts @@ -11,6 +11,7 @@ import { AssistantsModule } from '@/usecases/assistants/assistants.module'; import { MessagesModule } from '@/usecases/messages/messages.module'; import { FileManagerModule } from '@/file-manager/file-manager.module'; import { PSCliUsecases } from './ps.cli.usecases'; +import { BenchmarkCliUsecases } from './benchmark.cli.usecases'; @Module({ imports: [ @@ -28,7 +29,14 @@ import { PSCliUsecases } from './ps.cli.usecases'; ModelsCliUsecases, ChatCliUsecases, PSCliUsecases, + BenchmarkCliUsecases, + ], + exports: [ + InitCliUsecases, + ModelsCliUsecases, + ChatCliUsecases, + PSCliUsecases, + BenchmarkCliUsecases, ], - exports: [InitCliUsecases, ModelsCliUsecases, ChatCliUsecases, PSCliUsecases], }) export class CliUsecasesModule {} diff --git a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts index 5e2e8db38..885a10c8b 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts @@ -6,15 +6,8 @@ import { } from '@/infrastructure/constants/cortex'; import { HttpService } from '@nestjs/axios'; import { firstValueFrom } from 'rxjs'; +import { ModelStat } from '../types/model-stat.interface'; -export interface ModelStat { - modelId: string; - engine?: string; - duration?: string; - status: string; - vram?: string; - ram?: string; -} interface ModelStatResponse { object: string; data: any; diff --git a/cortex-js/src/infrastructure/constants/benchmark.ts b/cortex-js/src/infrastructure/constants/benchmark.ts new file mode 100644 index 000000000..01dfc59bd --- /dev/null +++ b/cortex-js/src/infrastructure/constants/benchmark.ts @@ -0,0 +1,37 @@ +import { BenchmarkConfig } from '../commanders/types/benchmark-config.interface'; + +export const defaultBenchmarkConfiguration: BenchmarkConfig = { + api: { + base_url: 'http://127.0.0.1:1337/', + api_key: '', + parameters: { + messages: [ + { + content: 'You are a helpful assistant.', + role: 'system', + }, + { + content: 'Hello!', + role: 'user', + }, + ], + model: 'tinyllama', + stream: true, + max_tokens: 2048, + stop: [], + frequency_penalty: 0, + presence_penalty: 0, + temperature: 0.7, + top_p: 0.95, + }, + }, + prompts: { + min: 102, + max: 2048, + samples: 10, + }, + output: 'json', + hardware: ['cpu', 'gpu', 'psu', 'chassis', 'ram'], + concurrency: 1, + num_rounds: 10, +};