Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: cortex benchmark command #684

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cortex-js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,13 @@
"decompress": "^4.2.1",
"js-yaml": "^4.1.0",
"nest-commander": "^3.13.0",
"openai": "^4.50.0",
"readline": "^1.3.0",
"reflect-metadata": "^0.2.0",
"rxjs": "^7.8.1",
"sqlite": "^5.1.1",
"sqlite3": "^5.1.7",
"systeminformation": "^5.22.10",
"typeorm": "^0.3.20",
"ulid": "^2.3.0",
"update-notifier": "^5.0.0",
Expand Down
2 changes: 2 additions & 0 deletions cortex-js/src/command.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { PSCommand } from './infrastructure/commanders/ps.command';
import { KillCommand } from './infrastructure/commanders/kill.command';
import { PresetCommand } from './infrastructure/commanders/presets.command';
import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command';
import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command';

@Module({
imports: [
Expand Down Expand Up @@ -56,6 +57,7 @@ import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command
KillCommand,
PresetCommand,
EmbeddingCommand,
BenchmarkCommand,

// Questions
InitRunModeQuestions,
Expand Down
11 changes: 11 additions & 0 deletions cortex-js/src/file-manager/file-manager.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export class FileManagerService {
private modelFolderName = 'models';
private presetFolderName = 'presets';
private extensionFoldername = 'extensions';
private benchmarkFoldername = 'benchmark';
private cortexCppFolderName = 'cortex-cpp';

/**
Expand Down Expand Up @@ -116,4 +117,14 @@ export class FileManagerService {
const dataFolderPath = await this.getDataFolderPath();
return join(dataFolderPath, this.extensionFoldername);
}

/**
* Get the benchmark folder path
* Usually it is located at the home directory > cortex > extensions
* @returns the path to the extensions folder
*/
async getBenchmarkPath(): Promise<string> {
const dataFolderPath = await this.getDataFolderPath();
return join(dataFolderPath, this.benchmarkFoldername);
}
}
18 changes: 18 additions & 0 deletions cortex-js/src/infrastructure/commanders/benchmark.command.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { CommandRunner, SubCommand } from 'nest-commander';
import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases';

@SubCommand({
name: 'benchmark',
subCommands: [],
description:
'Benchmark and analyze the performance of a specific AI model using a variety of system resources',
})
export class BenchmarkCommand extends CommandRunner {
constructor(private readonly benchmarkUsecases: BenchmarkCliUsecases) {
super();
}

async run(): Promise<void> {
return this.benchmarkUsecases.benchmark();
}
}
3 changes: 2 additions & 1 deletion cortex-js/src/infrastructure/commanders/chat.command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ import {
} from 'nest-commander';
import { ChatCliUsecases } from './usecases/chat.cli.usecases';
import { exit } from 'node:process';
import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases';
import { PSCliUsecases } from './usecases/ps.cli.usecases';
import { ModelsUsecases } from '@/usecases/models/models.usecases';
import { ModelStat } from './types/model-stat.interface';

type ChatOptions = {
threadId?: string;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { KillCommand } from './kill.command';
import pkg from '@/../package.json';
import { PresetCommand } from './presets.command';
import { EmbeddingCommand } from './embeddings.command';
import { BenchmarkCommand } from './benchmark.command';

interface CortexCommandOptions {
version: boolean;
Expand All @@ -26,6 +27,7 @@ interface CortexCommandOptions {
KillCommand,
PresetCommand,
EmbeddingCommand,
BenchmarkCommand,
],
description: 'Cortex CLI',
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ import {
SubCommand,
} from 'nest-commander';
import { ModelsUsecases } from '@/usecases/models/models.usecases';
import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases';
import { PSCliUsecases } from './usecases/ps.cli.usecases';
import { ChatCliUsecases } from './usecases/chat.cli.usecases';
import { inspect } from 'util';
import { ModelStat } from './types/model-stat.interface';

interface EmbeddingCommandOptions {
encoding_format?: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { ChatCompletionMessageParam } from 'openai/resources';

export interface BenchmarkConfig {
api: {
base_url: string;
api_key: string;
parameters: {
messages: ChatCompletionMessageParam[];
model: string;
stream?: boolean;
max_tokens?: number;
stop?: string[];
frequency_penalty?: number;
presence_penalty?: number;
temperature?: number;
top_p?: number;
};
};
prompts?: {
min: number;
max: number;
samples: number;
};
output: string;
concurrency: number;
num_rounds: number;
hardware: string[];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
export interface ModelStat {
modelId: string;
engine?: string;
duration?: string;
status: string;
vram?: string;
ram?: string;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
import { Injectable } from '@nestjs/common';
import si from 'systeminformation';
import fs, { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
import OpenAI from 'openai';
import { Presets, SingleBar } from 'cli-progress';
import yaml from 'js-yaml';
import { FileManagerService } from '@/file-manager/file-manager.service';
import { join } from 'path';
import { ModelsCliUsecases } from './models.cli.usecases';
import { spawn } from 'child_process';
import { BenchmarkConfig } from '../types/benchmark-config.interface';
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
import { inspect } from 'util';
import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark';

@Injectable()
export class BenchmarkCliUsecases {
constructor(
private readonly modelsCliUsecases: ModelsCliUsecases,
private readonly cortexUsecases: CortexUsecases,
private readonly fileService: FileManagerService,
) {}

config: BenchmarkConfig;
openai?: OpenAI;
/**
* Benchmark and analyze the performance of a specific AI model using a variety of system resources
*/
async benchmark() {
return this.getBenchmarkConfig().then((config) => {
this.config = config;

// TODO: Using OpenAI client or Cortex client to benchmark?
this.openai = new OpenAI({
apiKey: this.config.api.api_key,
baseURL: this.config.api.base_url,
timeout: 20 * 1000,
});

spawn('cortex', ['serve'], {
detached: false,
});

return this.cortexUsecases
.startCortex()
.then(() =>
this.modelsCliUsecases.startModel(this.config.api.parameters.model),
)
.then(() => this.runBenchmarks())
.then(() => process.exit(0));
});
}

/**
* Get the benchmark configuration
* @returns the benchmark configuration
*/
private async getBenchmarkConfig() {
const benchmarkFolder = await this.fileService.getBenchmarkPath();
const configurationPath = join(benchmarkFolder, 'config.yaml');
if (existsSync(configurationPath)) {
return yaml.load(
readFileSync(configurationPath, 'utf8'),
) as BenchmarkConfig;
} else {
const config = yaml.dump(defaultBenchmarkConfiguration);
if (!existsSync(benchmarkFolder)) {
mkdirSync(benchmarkFolder, {
recursive: true,
});
}
await writeFileSync(configurationPath, config, 'utf8');
return defaultBenchmarkConfiguration;
}
}

/**
* Get the system resources for benchmarking
* using the systeminformation library
* @returns the system resources
*/
private async getSystemResources() {
return {
cpu: await si.currentLoad(),
mem: await si.mem(),
gpu: (await si.graphics()).controllers,
};
}

/**
* Get the resource change between two data points
* @param startData the start data point
* @param endData the end data point
* @returns the resource change
*/
private async getResourceChange(startData: any, endData: any) {
return {
cpu:
startData.cpu && endData.cpu
? ((endData.cpu.currentload - startData.cpu.currentload) /
startData.cpu.currentload) *
100
: null,
mem:
startData.mem && endData.mem
? ((endData.mem.used - startData.mem.used) / startData.mem.total) *
100
: null,
};
}

/**
* Benchmark a user using the OpenAI API
* @returns
*/
private async benchmarkUser() {
const startResources = await this.getSystemResources();
const start = Date.now();
let tokenCount = 0;
let firstTokenTime = null;

try {
const stream = await this.openai!.chat.completions.create({
model: this.config.api.parameters.model,
messages: this.config.api.parameters.messages,
max_tokens: this.config.api.parameters.max_tokens,
stream: true,
});

for await (const chunk of stream) {
if (!firstTokenTime && chunk.choices[0]?.delta?.content) {
firstTokenTime = Date.now();
}
tokenCount += (chunk.choices[0]?.delta?.content || '').split(
/\s+/,
).length;
}
} catch (error) {
console.error('Error during API call:', error);
return null;
}

const latency = Date.now() - start;
const ttft = firstTokenTime ? firstTokenTime - start : null;
const endResources = await this.getSystemResources();
const resourceChange = await this.getResourceChange(
startResources,
endResources,
);

return {
tokens: this.config.api.parameters.max_tokens,
token_length: tokenCount, // Dynamically calculated token count
latency,
resourceChange,
tpot: tokenCount ? latency / tokenCount : 0,
throughput: tokenCount / (latency / 1000),
ttft,
};
}

/**
* Calculate the percentiles of the data
* @param data the data to calculate percentiles for
* @param percentile the percentile to calculate
* @returns the percentile value
*/
private calculatePercentiles(data: number[], percentile: number) {
if (data.length === 0) return null;
const sorted = data
.filter((x: number) => x !== null)
.sort((a: number, b: number) => a - b);
const pos = (percentile / 100) * sorted.length;
if (pos < 1) return sorted[0];
if (pos >= sorted.length) return sorted[sorted.length - 1];
const lower = sorted[Math.floor(pos) - 1];
const upper = sorted[Math.ceil(pos) - 1];
return lower + (upper - lower) * (pos - Math.floor(pos));
}

/**
* Run the benchmarks
*/
private async runBenchmarks() {
const allResults: any[] = [];
const rounds = this.config.num_rounds || 1;

const bar = new SingleBar({}, Presets.shades_classic);
bar.start(rounds, 0);

for (let i = 0; i < rounds; i++) {
const roundResults = [];
const hardwareBefore = await this.getSystemResources();

for (let j = 0; j < this.config.concurrency; j++) {
const result = await this.benchmarkUser();
if (result) {
roundResults.push(result);
}
}

const hardwareAfter = await this.getSystemResources();
const hardwareChanges = await this.getResourceChange(
hardwareBefore,
hardwareAfter,
);

allResults.push({
round: i + 1,
results: roundResults,
hardwareChanges,
});

bar.update(i + 1);
}

const metrics: any = {
p50: {},
p75: {},
p95: {},
};
const keys = ['latency', 'tpot', 'throughput', 'ttft'];
keys.forEach((key) => {
const data = allResults.flatMap((r) =>
r.results.map((res: object) => res[key as keyof typeof res]),
);
metrics.p50[key] = this.calculatePercentiles(data, 50);
metrics.p75[key] = this.calculatePercentiles(data, 75);
metrics.p95[key] = this.calculatePercentiles(data, 95);
});

const output = {
hardware: await this.getSystemResources(),
results: allResults,
metrics,
};
bar.stop();

const outputFilePath = join(
await this.fileService.getBenchmarkPath(),
'output.json',
);
fs.writeFileSync(outputFilePath, JSON.stringify(output, null, 2));
console.log(`Benchmark results and metrics saved to ${outputFilePath}`);

console.log(
inspect(output, { showHidden: false, depth: null, colors: true }),
);
}
}
Loading