Skip to content

Commit

Permalink
feat: cortex benchmark command (#684)
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-jan authored Jun 12, 2024
1 parent c717df2 commit 0cbbf08
Show file tree
Hide file tree
Showing 13 changed files with 372 additions and 11 deletions.
2 changes: 2 additions & 0 deletions cortex-js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,13 @@
"decompress": "^4.2.1",
"js-yaml": "^4.1.0",
"nest-commander": "^3.13.0",
"openai": "^4.50.0",
"readline": "^1.3.0",
"reflect-metadata": "^0.2.0",
"rxjs": "^7.8.1",
"sqlite": "^5.1.1",
"sqlite3": "^5.1.7",
"systeminformation": "^5.22.10",
"typeorm": "^0.3.20",
"ulid": "^2.3.0",
"update-notifier": "^5.0.0",
Expand Down
2 changes: 2 additions & 0 deletions cortex-js/src/command.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { PSCommand } from './infrastructure/commanders/ps.command';
import { KillCommand } from './infrastructure/commanders/kill.command';
import { PresetCommand } from './infrastructure/commanders/presets.command';
import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command';
import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command';

@Module({
imports: [
Expand Down Expand Up @@ -56,6 +57,7 @@ import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command
KillCommand,
PresetCommand,
EmbeddingCommand,
BenchmarkCommand,

// Questions
InitRunModeQuestions,
Expand Down
11 changes: 11 additions & 0 deletions cortex-js/src/file-manager/file-manager.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export class FileManagerService {
private modelFolderName = 'models';
private presetFolderName = 'presets';
private extensionFoldername = 'extensions';
private benchmarkFoldername = 'benchmark';
private cortexCppFolderName = 'cortex-cpp';

/**
Expand Down Expand Up @@ -116,4 +117,14 @@ export class FileManagerService {
const dataFolderPath = await this.getDataFolderPath();
return join(dataFolderPath, this.extensionFoldername);
}

/**
* Get the benchmark folder path
* Usually it is located at the home directory > cortex > extensions
* @returns the path to the extensions folder
*/
async getBenchmarkPath(): Promise<string> {
const dataFolderPath = await this.getDataFolderPath();
return join(dataFolderPath, this.benchmarkFoldername);
}
}
18 changes: 18 additions & 0 deletions cortex-js/src/infrastructure/commanders/benchmark.command.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { CommandRunner, SubCommand } from 'nest-commander';
import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases';

@SubCommand({
name: 'benchmark',
subCommands: [],
description:
'Benchmark and analyze the performance of a specific AI model using a variety of system resources',
})
export class BenchmarkCommand extends CommandRunner {
constructor(private readonly benchmarkUsecases: BenchmarkCliUsecases) {
super();
}

async run(): Promise<void> {
return this.benchmarkUsecases.benchmark();
}
}
3 changes: 2 additions & 1 deletion cortex-js/src/infrastructure/commanders/chat.command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ import {
} from 'nest-commander';
import { ChatCliUsecases } from './usecases/chat.cli.usecases';
import { exit } from 'node:process';
import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases';
import { PSCliUsecases } from './usecases/ps.cli.usecases';
import { ModelsUsecases } from '@/usecases/models/models.usecases';
import { ModelStat } from './types/model-stat.interface';

type ChatOptions = {
threadId?: string;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { KillCommand } from './kill.command';
import pkg from '@/../package.json';
import { PresetCommand } from './presets.command';
import { EmbeddingCommand } from './embeddings.command';
import { BenchmarkCommand } from './benchmark.command';

interface CortexCommandOptions {
version: boolean;
Expand All @@ -26,6 +27,7 @@ interface CortexCommandOptions {
KillCommand,
PresetCommand,
EmbeddingCommand,
BenchmarkCommand,
],
description: 'Cortex CLI',
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ import {
SubCommand,
} from 'nest-commander';
import { ModelsUsecases } from '@/usecases/models/models.usecases';
import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases';
import { PSCliUsecases } from './usecases/ps.cli.usecases';
import { ChatCliUsecases } from './usecases/chat.cli.usecases';
import { inspect } from 'util';
import { ModelStat } from './types/model-stat.interface';

interface EmbeddingCommandOptions {
encoding_format?: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { ChatCompletionMessageParam } from 'openai/resources';

export interface BenchmarkConfig {
api: {
base_url: string;
api_key: string;
parameters: {
messages: ChatCompletionMessageParam[];
model: string;
stream?: boolean;
max_tokens?: number;
stop?: string[];
frequency_penalty?: number;
presence_penalty?: number;
temperature?: number;
top_p?: number;
};
};
prompts?: {
min: number;
max: number;
samples: number;
};
output: string;
concurrency: number;
num_rounds: number;
hardware: string[];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
export interface ModelStat {
modelId: string;
engine?: string;
duration?: string;
status: string;
vram?: string;
ram?: string;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
import { Injectable } from '@nestjs/common';
import si from 'systeminformation';
import fs, { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
import OpenAI from 'openai';
import { Presets, SingleBar } from 'cli-progress';
import yaml from 'js-yaml';
import { FileManagerService } from '@/file-manager/file-manager.service';
import { join } from 'path';
import { ModelsCliUsecases } from './models.cli.usecases';
import { spawn } from 'child_process';
import { BenchmarkConfig } from '../types/benchmark-config.interface';
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
import { inspect } from 'util';
import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark';

@Injectable()
export class BenchmarkCliUsecases {
constructor(
private readonly modelsCliUsecases: ModelsCliUsecases,
private readonly cortexUsecases: CortexUsecases,
private readonly fileService: FileManagerService,
) {}

config: BenchmarkConfig;
openai?: OpenAI;
/**
* Benchmark and analyze the performance of a specific AI model using a variety of system resources
*/
async benchmark() {
return this.getBenchmarkConfig().then((config) => {
this.config = config;

// TODO: Using OpenAI client or Cortex client to benchmark?
this.openai = new OpenAI({
apiKey: this.config.api.api_key,
baseURL: this.config.api.base_url,
timeout: 20 * 1000,
});

spawn('cortex', ['serve'], {
detached: false,
});

return this.cortexUsecases
.startCortex()
.then(() =>
this.modelsCliUsecases.startModel(this.config.api.parameters.model),
)
.then(() => this.runBenchmarks())
.then(() => process.exit(0));
});
}

/**
* Get the benchmark configuration
* @returns the benchmark configuration
*/
private async getBenchmarkConfig() {
const benchmarkFolder = await this.fileService.getBenchmarkPath();
const configurationPath = join(benchmarkFolder, 'config.yaml');
if (existsSync(configurationPath)) {
return yaml.load(
readFileSync(configurationPath, 'utf8'),
) as BenchmarkConfig;
} else {
const config = yaml.dump(defaultBenchmarkConfiguration);
if (!existsSync(benchmarkFolder)) {
mkdirSync(benchmarkFolder, {
recursive: true,
});
}
await writeFileSync(configurationPath, config, 'utf8');
return defaultBenchmarkConfiguration;
}
}

/**
* Get the system resources for benchmarking
* using the systeminformation library
* @returns the system resources
*/
private async getSystemResources() {
return {
cpu: await si.currentLoad(),
mem: await si.mem(),
gpu: (await si.graphics()).controllers,
};
}

/**
* Get the resource change between two data points
* @param startData the start data point
* @param endData the end data point
* @returns the resource change
*/
private async getResourceChange(startData: any, endData: any) {
return {
cpu:
startData.cpu && endData.cpu
? ((endData.cpu.currentload - startData.cpu.currentload) /
startData.cpu.currentload) *
100
: null,
mem:
startData.mem && endData.mem
? ((endData.mem.used - startData.mem.used) / startData.mem.total) *
100
: null,
};
}

/**
* Benchmark a user using the OpenAI API
* @returns
*/
private async benchmarkUser() {
const startResources = await this.getSystemResources();
const start = Date.now();
let tokenCount = 0;
let firstTokenTime = null;

try {
const stream = await this.openai!.chat.completions.create({
model: this.config.api.parameters.model,
messages: this.config.api.parameters.messages,
max_tokens: this.config.api.parameters.max_tokens,
stream: true,
});

for await (const chunk of stream) {
if (!firstTokenTime && chunk.choices[0]?.delta?.content) {
firstTokenTime = Date.now();
}
tokenCount += (chunk.choices[0]?.delta?.content || '').split(
/\s+/,
).length;
}
} catch (error) {
console.error('Error during API call:', error);
return null;
}

const latency = Date.now() - start;
const ttft = firstTokenTime ? firstTokenTime - start : null;
const endResources = await this.getSystemResources();
const resourceChange = await this.getResourceChange(
startResources,
endResources,
);

return {
tokens: this.config.api.parameters.max_tokens,
token_length: tokenCount, // Dynamically calculated token count
latency,
resourceChange,
tpot: tokenCount ? latency / tokenCount : 0,
throughput: tokenCount / (latency / 1000),
ttft,
};
}

/**
* Calculate the percentiles of the data
* @param data the data to calculate percentiles for
* @param percentile the percentile to calculate
* @returns the percentile value
*/
private calculatePercentiles(data: number[], percentile: number) {
if (data.length === 0) return null;
const sorted = data
.filter((x: number) => x !== null)
.sort((a: number, b: number) => a - b);
const pos = (percentile / 100) * sorted.length;
if (pos < 1) return sorted[0];
if (pos >= sorted.length) return sorted[sorted.length - 1];
const lower = sorted[Math.floor(pos) - 1];
const upper = sorted[Math.ceil(pos) - 1];
return lower + (upper - lower) * (pos - Math.floor(pos));
}

/**
* Run the benchmarks
*/
private async runBenchmarks() {
const allResults: any[] = [];
const rounds = this.config.num_rounds || 1;

const bar = new SingleBar({}, Presets.shades_classic);
bar.start(rounds, 0);

for (let i = 0; i < rounds; i++) {
const roundResults = [];
const hardwareBefore = await this.getSystemResources();

for (let j = 0; j < this.config.concurrency; j++) {
const result = await this.benchmarkUser();
if (result) {
roundResults.push(result);
}
}

const hardwareAfter = await this.getSystemResources();
const hardwareChanges = await this.getResourceChange(
hardwareBefore,
hardwareAfter,
);

allResults.push({
round: i + 1,
results: roundResults,
hardwareChanges,
});

bar.update(i + 1);
}

const metrics: any = {
p50: {},
p75: {},
p95: {},
};
const keys = ['latency', 'tpot', 'throughput', 'ttft'];
keys.forEach((key) => {
const data = allResults.flatMap((r) =>
r.results.map((res: object) => res[key as keyof typeof res]),
);
metrics.p50[key] = this.calculatePercentiles(data, 50);
metrics.p75[key] = this.calculatePercentiles(data, 75);
metrics.p95[key] = this.calculatePercentiles(data, 95);
});

const output = {
hardware: await this.getSystemResources(),
results: allResults,
metrics,
};
bar.stop();

const outputFilePath = join(
await this.fileService.getBenchmarkPath(),
'output.json',
);
fs.writeFileSync(outputFilePath, JSON.stringify(output, null, 2));
console.log(`Benchmark results and metrics saved to ${outputFilePath}`);

console.log(
inspect(output, { showHidden: false, depth: null, colors: true }),
);
}
}
Loading

0 comments on commit 0cbbf08

Please sign in to comment.