From 1a1c551711b859d6d9e7697c8ad908444ff99e64 Mon Sep 17 00:00:00 2001
From: marknguyen1302 <nguyenvu1302.work@gmail.com>
Date: Mon, 1 Jul 2024 14:01:19 +0700
Subject: [PATCH] fix model not found

---
 .../usecases/benchmark.cli.usecases.ts          | 17 ++++++++++++-----
 .../src/infrastructure/constants/benchmark.ts   |  4 ++--
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts
index 9bdaeaad1..db6b7fe90 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts
@@ -66,7 +66,7 @@ export class BenchmarkCliUsecases {
           if (!model)
             throw new Error('Model is not started, please try again!');
         })
-        .then(() => this.runBenchmarks())
+        .then(() => this.runBenchmarks(model))
         .then(() => {
           serveProcess.kill();
           process.exit(0);
@@ -136,19 +136,26 @@ export class BenchmarkCliUsecases {
    * Benchmark a user using the OpenAI API
    * @returns
    */
-  private async benchmarkUser() {
+  private async benchmarkUser(model: string) {
     const startResources = await this.getSystemResources();
     const start = Date.now();
     let tokenCount = 0;
     let firstTokenTime = null;
 
     try {
+      console.log('Benchmarking user...', {
+        model,
+        messages: this.config.api.parameters.messages,
+        max_tokens: this.config.api.parameters.max_tokens,
+        stream: true,
+      });
       const stream = await this.cortexClient!.chat.completions.create({
-        model: this.config.api.parameters.model,
+        model,
         messages: this.config.api.parameters.messages,
         max_tokens: this.config.api.parameters.max_tokens,
         stream: true,
       });
+      
 
       for await (const chunk of stream) {
         if (!firstTokenTime && chunk.choices[0]?.delta?.content) {
@@ -204,7 +211,7 @@ export class BenchmarkCliUsecases {
   /**
    * Run the benchmarks
    */
-  private async runBenchmarks() {
+  private async runBenchmarks(model: string) {
     const allResults: any[] = [];
     const rounds = this.config.num_rounds || 1;
 
@@ -216,7 +223,7 @@ export class BenchmarkCliUsecases {
       const hardwareBefore = await this.getSystemResources();
 
       for (let j = 0; j < this.config.concurrency; j++) {
-        const result = await this.benchmarkUser();
+        const result = await this.benchmarkUser(model);
         if (result) {
           roundResults.push(result);
         }
diff --git a/cortex-js/src/infrastructure/constants/benchmark.ts b/cortex-js/src/infrastructure/constants/benchmark.ts
index 083b46584..4c253d46f 100644
--- a/cortex-js/src/infrastructure/constants/benchmark.ts
+++ b/cortex-js/src/infrastructure/constants/benchmark.ts
@@ -2,7 +2,7 @@ import { BenchmarkConfig } from '@commanders/types/benchmark-config.interface';
 
 export const defaultBenchmarkConfiguration: BenchmarkConfig = {
   api: {
-    base_url: 'http://localhost:1337/',
+    base_url: 'http://localhost:1337/v1',
     api_key: '<api_key>',
     parameters: {
       messages: [
@@ -28,7 +28,7 @@ export const defaultBenchmarkConfiguration: BenchmarkConfig = {
     min: 1024,
     max: 2048,
     samples: 10,
-  },
+  },  
   output: 'table',
   hardware: ['cpu', 'gpu', 'psu', 'chassis', 'ram'],
   concurrency: 1,