-
Notifications
You must be signed in to change notification settings - Fork 139
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
345 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
cortex-js/src/infrastructure/commanders/benchmark.command.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import { CommandRunner, SubCommand } from 'nest-commander'; | ||
import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases'; | ||
|
||
@SubCommand({ | ||
name: 'benchmark', | ||
subCommands: [], | ||
description: | ||
'Benchmark and analyze the performance of a specific AI model using a variety of system resources', | ||
}) | ||
export class BenchmarkCommand extends CommandRunner { | ||
constructor(private readonly benchmarkUsecases: BenchmarkCliUsecases) { | ||
super(); | ||
} | ||
|
||
async run(): Promise<void> { | ||
return this.benchmarkUsecases.benchmark(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import { ChatCompletionMessageParam } from 'openai/resources'; | ||
|
||
export interface BenchmarkConfig { | ||
api: { | ||
base_url: string; | ||
api_key: string; | ||
parameters: { | ||
messages: ChatCompletionMessageParam[]; | ||
model: string; | ||
stream?: boolean; | ||
max_tokens?: number; | ||
stop?: string[]; | ||
frequency_penalty?: number; | ||
presence_penalty?: number; | ||
temperature?: number; | ||
top_p?: number; | ||
}; | ||
}; | ||
prompts?: { | ||
min: number; | ||
max: number; | ||
samples: number; | ||
}; | ||
output: string; | ||
concurrency: number; | ||
num_rounds: number; | ||
hardware: string[]; | ||
} |
8 changes: 8 additions & 0 deletions
8
cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
export interface ModelStat { | ||
modelId: string; | ||
engine?: string; | ||
duration?: string; | ||
status: string; | ||
vram?: string; | ||
ram?: string; | ||
} |
223 changes: 223 additions & 0 deletions
223
cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
import { Injectable } from '@nestjs/common'; | ||
import si from 'systeminformation'; | ||
import fs, { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; | ||
import OpenAI from 'openai'; | ||
import { Presets, SingleBar } from 'cli-progress'; | ||
import yaml from 'js-yaml'; | ||
import { FileManagerService } from '@/file-manager/file-manager.service'; | ||
import { join } from 'path'; | ||
import { ModelsCliUsecases } from './models.cli.usecases'; | ||
import { spawn } from 'child_process'; | ||
import { BenchmarkConfig } from '../types/benchmark-config.interface'; | ||
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; | ||
import { inspect } from 'util'; | ||
import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark'; | ||
|
||
@Injectable() | ||
export class BenchmarkCliUsecases { | ||
constructor( | ||
private readonly modelsCliUsecases: ModelsCliUsecases, | ||
private readonly cortexUsecases: CortexUsecases, | ||
private readonly fileService: FileManagerService, | ||
) {} | ||
|
||
config: BenchmarkConfig; | ||
openai?: OpenAI; | ||
/** | ||
* Benchmark and analyze the performance of a specific AI model using a variety of system resources | ||
*/ | ||
async benchmark() { | ||
return this.getBenchmarkConfig().then((config) => { | ||
this.config = config; | ||
|
||
// TODO: Using OpenAI client or Cortex client to benchmark? | ||
this.openai = new OpenAI({ | ||
apiKey: this.config.api.api_key, | ||
baseURL: this.config.api.base_url, | ||
timeout: 20 * 1000, | ||
}); | ||
|
||
spawn('cortex', ['serve'], { | ||
detached: false, | ||
}); | ||
|
||
return this.cortexUsecases | ||
.startCortex() | ||
.then(() => | ||
this.modelsCliUsecases.startModel(this.config.api.parameters.model), | ||
) | ||
.then(() => this.runBenchmarks()) | ||
.then(() => process.exit(0)); | ||
}); | ||
} | ||
|
||
async getBenchmarkConfig() { | ||
const benchmarkFolder = await this.fileService.getBenchmarkPath(); | ||
const configurationPath = join(benchmarkFolder, 'config.yaml'); | ||
console.log(configurationPath, existsSync(configurationPath)); | ||
if (existsSync(configurationPath)) { | ||
return yaml.load( | ||
readFileSync(configurationPath, 'utf8'), | ||
) as BenchmarkConfig; | ||
} else { | ||
const config = yaml.dump(defaultBenchmarkConfiguration); | ||
if (!existsSync(benchmarkFolder)) { | ||
mkdirSync(benchmarkFolder, { | ||
recursive: true, | ||
}); | ||
} | ||
await writeFileSync(configurationPath, config, 'utf8'); | ||
return defaultBenchmarkConfiguration; | ||
} | ||
} | ||
|
||
async getSystemResources() { | ||
return { | ||
cpu: await si.currentLoad(), | ||
mem: await si.mem(), | ||
gpu: (await si.graphics()).controllers, | ||
}; | ||
} | ||
|
||
async getResourceChange(startData: any, endData: any) { | ||
return { | ||
cpu: | ||
startData.cpu && endData.cpu | ||
? ((endData.cpu.currentload - startData.cpu.currentload) / | ||
startData.cpu.currentload) * | ||
100 | ||
: null, | ||
mem: | ||
startData.mem && endData.mem | ||
? ((endData.mem.used - startData.mem.used) / startData.mem.total) * | ||
100 | ||
: null, | ||
}; | ||
} | ||
|
||
async benchmarkUser() { | ||
const startResources = await this.getSystemResources(); | ||
const start = Date.now(); | ||
let tokenCount = 0; | ||
let firstTokenTime = null; | ||
|
||
try { | ||
const stream = await this.openai!.chat.completions.create({ | ||
model: this.config.api.parameters.model, | ||
messages: this.config.api.parameters.messages, | ||
max_tokens: this.config.api.parameters.max_tokens, | ||
stream: true, | ||
}); | ||
|
||
for await (const chunk of stream) { | ||
if (!firstTokenTime && chunk.choices[0]?.delta?.content) { | ||
firstTokenTime = Date.now(); | ||
} | ||
tokenCount += (chunk.choices[0]?.delta?.content || '').split( | ||
/\s+/, | ||
).length; | ||
} | ||
} catch (error) { | ||
console.error('Error during API call:', error); | ||
return null; | ||
} | ||
|
||
const latency = Date.now() - start; | ||
const ttft = firstTokenTime ? firstTokenTime - start : null; | ||
const endResources = await this.getSystemResources(); | ||
const resourceChange = await this.getResourceChange( | ||
startResources, | ||
endResources, | ||
); | ||
|
||
return { | ||
tokens: this.config.api.parameters.max_tokens, | ||
token_length: tokenCount, // Dynamically calculated token count | ||
latency, | ||
resourceChange, | ||
tpot: tokenCount ? latency / tokenCount : 0, | ||
throughput: tokenCount / (latency / 1000), | ||
ttft, | ||
}; | ||
} | ||
|
||
calculatePercentiles(data: number[], percentile: number) { | ||
if (data.length === 0) return null; | ||
const sorted = data | ||
.filter((x: number) => x !== null) | ||
.sort((a: number, b: number) => a - b); | ||
const pos = (percentile / 100) * sorted.length; | ||
if (pos < 1) return sorted[0]; | ||
if (pos >= sorted.length) return sorted[sorted.length - 1]; | ||
const lower = sorted[Math.floor(pos) - 1]; | ||
const upper = sorted[Math.ceil(pos) - 1]; | ||
return lower + (upper - lower) * (pos - Math.floor(pos)); | ||
} | ||
|
||
async runBenchmarks() { | ||
const allResults: any[] = []; | ||
const rounds = this.config.num_rounds || 1; | ||
|
||
const bar = new SingleBar({}, Presets.shades_classic); | ||
bar.start(rounds, 0); | ||
|
||
for (let i = 0; i < rounds; i++) { | ||
const roundResults = []; | ||
const hardwareBefore = await this.getSystemResources(); | ||
|
||
for (let j = 0; j < this.config.concurrency; j++) { | ||
const result = await this.benchmarkUser(); | ||
if (result) { | ||
roundResults.push(result); | ||
} | ||
} | ||
|
||
const hardwareAfter = await this.getSystemResources(); | ||
const hardwareChanges = await this.getResourceChange( | ||
hardwareBefore, | ||
hardwareAfter, | ||
); | ||
|
||
allResults.push({ | ||
round: i + 1, | ||
results: roundResults, | ||
hardwareChanges, | ||
}); | ||
|
||
bar.update(i + 1); | ||
} | ||
|
||
const metrics: any = { | ||
p50: {}, | ||
p75: {}, | ||
p95: {}, | ||
}; | ||
const keys = ['latency', 'tpot', 'throughput', 'ttft']; | ||
keys.forEach((key) => { | ||
const data = allResults.flatMap((r) => | ||
r.results.map((res: object) => res[key as keyof typeof res]), | ||
); | ||
metrics.p50[key] = this.calculatePercentiles(data, 50); | ||
metrics.p75[key] = this.calculatePercentiles(data, 75); | ||
metrics.p95[key] = this.calculatePercentiles(data, 95); | ||
}); | ||
|
||
const output = { | ||
hardware: await this.getSystemResources(), | ||
results: allResults, | ||
metrics, | ||
}; | ||
bar.stop(); | ||
|
||
const outputFilePath = join( | ||
await this.fileService.getBenchmarkPath(), | ||
'output.json', | ||
); | ||
fs.writeFileSync(outputFilePath, JSON.stringify(output, null, 2)); | ||
console.log(`Benchmark results and metrics saved to ${outputFilePath}`); | ||
|
||
console.log( | ||
inspect(output, { showHidden: false, depth: null, colors: true }), | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.