From 8dc1def4b49b1e83d7c722208ef09b0030cbdd4c Mon Sep 17 00:00:00 2001
From: irfanpena <irfan@penateam.com>
Date: Tue, 11 Jun 2024 14:31:59 +0700
Subject: [PATCH 1/4] api: Update the API description

---
 .../infrastructure/controllers/embeddings.controller.ts   | 4 ++--
 .../infrastructure/dtos/chat/embeddings-response.dto.ts   | 8 ++++----
 .../dtos/embeddings/embeddings-request.dto.ts             | 8 ++++----
 cortex-js/src/main.ts                                     | 4 ++++
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/cortex-js/src/infrastructure/controllers/embeddings.controller.ts b/cortex-js/src/infrastructure/controllers/embeddings.controller.ts
index 286b791a9..ae3c6a39a 100644
--- a/cortex-js/src/infrastructure/controllers/embeddings.controller.ts
+++ b/cortex-js/src/infrastructure/controllers/embeddings.controller.ts
@@ -10,8 +10,8 @@ export class EmbeddingsController {
   constructor(private readonly chatService: ChatUsecases) {}
 
   @ApiOperation({
-    summary: 'Embedding vector for text',
-    description: 'Creates an embedding vector representing the input text..',
+    summary: 'Create embedding vector',
+    description: 'Creates an embedding vector representing the input text.',
   })
   @HttpCode(200)
   @ApiResponse({
diff --git a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts
index f73a176a1..3d7cf65b3 100644
--- a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts
@@ -3,25 +3,25 @@ import { UsageDto } from './usage.dto';
 
 export class EmbeddingsResponseDto {
   @ApiProperty({
-    description: 'Result object type.',
+    description: 'Type of the result object.',
     type: String,
   })
   object: string;
 
   @ApiProperty({
-    description: 'ID of the model used for embeddings',
+    description: 'Identifier of the model utilized for generating embeddings.',
     type: String,
   })
   model: string;
 
   @ApiProperty({
-    description: 'The embedding vector, which is a list of floats. ',
+    description: 'The embedding vector represented as an array of floating-point numbers. ',
     type: [Number],
   })
   embedding: [number];
 
   @ApiProperty({
-    description: 'Returns prompt_tokens and total_tokens usage ',
+    description: 'Details of token usage, including prompt_tokens and total_tokens.',
     type: UsageDto,
   })
   usage: UsageDto;
diff --git a/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts b/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts
index 0be76e23f..2fe75d961 100644
--- a/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts
@@ -3,21 +3,21 @@ import { ApiProperty } from '@nestjs/swagger';
 
 export class CreateEmbeddingsDto {
   @ApiProperty({
-    description: 'Embedding model',
+    description: 'The name of the embedding model to be used.',
     type: String,
   })
   model: string;
 
   @ApiProperty({
     description:
-      'Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays.',
+      'The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.',
     type: [String],
   })
   input: string | string[];
 
   @ApiProperty({
     description:
-      'Encoding format for the embeddings. Supported formats are float and int.',
+      'Specifies the format for the embeddings. Supported formats include `float` and `int`. This field is optional.',
     type: String,
   })
   @Optional()
@@ -25,7 +25,7 @@ export class CreateEmbeddingsDto {
 
   @ApiProperty({
     description:
-      'The number of dimensions the resulting output embeddings should have. Only supported in some models.',
+      'Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional.',
     type: Number,
   })
   @Optional()
diff --git a/cortex-js/src/main.ts b/cortex-js/src/main.ts
index fa7564669..75261665b 100644
--- a/cortex-js/src/main.ts
+++ b/cortex-js/src/main.ts
@@ -53,6 +53,10 @@ async function bootstrap() {
       'Threads',
       'These endpoints handle the creation, retrieval, updating, and deletion of conversation threads.',
     )
+    .addTag(
+      'Embeddings',
+      'Endpoint for creating and retrieving embedding vectors from text inputs using specified models.',
+    )
     .addServer('http://localhost:1337')
     .addServer('http://localhost:1337/v1')
     .build();

From 86162f038e66431935da24b37b3c48064a43430a Mon Sep 17 00:00:00 2001
From: irfanpena <irfan@penateam.com>
Date: Tue, 11 Jun 2024 14:39:22 +0700
Subject: [PATCH 2/4] docs: Update the dependencies

---
 README.md           | 2 +-
 cortex-js/README.md | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 66a0f072b..aa2e3804c 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Cortex currently supports two inference engines:
 
 Before installation, ensure that you have installed the following:
 
-- **Node.js**: Required for running the installation.
+- **Node.js**: version 18 and above is required to run the installation.
 - **NPM**: Needed to manage packages.
 - **CPU Instruction Sets**: Available for download from the [Cortex GitHub Releases](https://github.com/janhq/cortex/releases) page.
 
diff --git a/cortex-js/README.md b/cortex-js/README.md
index 5730c98f7..6dac72720 100644
--- a/cortex-js/README.md
+++ b/cortex-js/README.md
@@ -32,8 +32,7 @@ Cortex
 ### **Dependencies**
 
 Before installation, ensure that you have installed the following:
-
-- **Node.js**: Required for running the installation.
+- **Node.js**: version 18 and above is required to run the installation.
 - **NPM**: Needed to manage packages.
 - **CPU Instruction Sets**: Available for download from the [Cortex GitHub Releases](https://github.com/janhq/cortex/releases) page.
 

From 0cbbf08f86529f9e505182011164df64e9fe0983 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 12 Jun 2024 15:03:31 +0700
Subject: [PATCH 3/4] feat: cortex benchmark command (#684)

---
 cortex-js/package.json                        |   2 +
 cortex-js/src/command.module.ts               |   2 +
 .../src/file-manager/file-manager.service.ts  |  11 +
 .../commanders/benchmark.command.ts           |  18 ++
 .../infrastructure/commanders/chat.command.ts |   3 +-
 .../commanders/cortex-command.commander.ts    |   2 +
 .../commanders/embeddings.command.ts          |   3 +-
 .../types/benchmark-config.interface.ts       |  28 ++
 .../commanders/types/model-stat.interface.ts  |   8 +
 .../usecases/benchmark.cli.usecases.ts        | 250 ++++++++++++++++++
 .../usecases/cli.usecases.module.ts           |  10 +-
 .../commanders/usecases/ps.cli.usecases.ts    |   9 +-
 .../src/infrastructure/constants/benchmark.ts |  37 +++
 13 files changed, 372 insertions(+), 11 deletions(-)
 create mode 100644 cortex-js/src/infrastructure/commanders/benchmark.command.ts
 create mode 100644 cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts
 create mode 100644 cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts
 create mode 100644 cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts
 create mode 100644 cortex-js/src/infrastructure/constants/benchmark.ts

diff --git a/cortex-js/package.json b/cortex-js/package.json
index 1e4b08149..62231be75 100644
--- a/cortex-js/package.json
+++ b/cortex-js/package.json
@@ -48,11 +48,13 @@
     "decompress": "^4.2.1",
     "js-yaml": "^4.1.0",
     "nest-commander": "^3.13.0",
+    "openai": "^4.50.0",
     "readline": "^1.3.0",
     "reflect-metadata": "^0.2.0",
     "rxjs": "^7.8.1",
     "sqlite": "^5.1.1",
     "sqlite3": "^5.1.7",
+    "systeminformation": "^5.22.10",
     "typeorm": "^0.3.20",
     "ulid": "^2.3.0",
     "update-notifier": "^5.0.0",
diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts
index f25d65cea..76f9a00a1 100644
--- a/cortex-js/src/command.module.ts
+++ b/cortex-js/src/command.module.ts
@@ -28,6 +28,7 @@ import { PSCommand } from './infrastructure/commanders/ps.command';
 import { KillCommand } from './infrastructure/commanders/kill.command';
 import { PresetCommand } from './infrastructure/commanders/presets.command';
 import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command';
+import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command';
 
 @Module({
   imports: [
@@ -56,6 +57,7 @@ import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command
     KillCommand,
     PresetCommand,
     EmbeddingCommand,
+    BenchmarkCommand,
 
     // Questions
     InitRunModeQuestions,
diff --git a/cortex-js/src/file-manager/file-manager.service.ts b/cortex-js/src/file-manager/file-manager.service.ts
index 78b29270c..498509bb6 100644
--- a/cortex-js/src/file-manager/file-manager.service.ts
+++ b/cortex-js/src/file-manager/file-manager.service.ts
@@ -12,6 +12,7 @@ export class FileManagerService {
   private modelFolderName = 'models';
   private presetFolderName = 'presets';
   private extensionFoldername = 'extensions';
+  private benchmarkFoldername = 'benchmark';
   private cortexCppFolderName = 'cortex-cpp';
 
   /**
@@ -116,4 +117,14 @@ export class FileManagerService {
     const dataFolderPath = await this.getDataFolderPath();
     return join(dataFolderPath, this.extensionFoldername);
   }
+
+  /**
+   * Get the benchmark folder path
+   * Usually it is located at the home directory > cortex > extensions
+   * @returns the path to the extensions folder
+   */
+  async getBenchmarkPath(): Promise<string> {
+    const dataFolderPath = await this.getDataFolderPath();
+    return join(dataFolderPath, this.benchmarkFoldername);
+  }
 }
diff --git a/cortex-js/src/infrastructure/commanders/benchmark.command.ts b/cortex-js/src/infrastructure/commanders/benchmark.command.ts
new file mode 100644
index 000000000..e3e6a69fd
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/benchmark.command.ts
@@ -0,0 +1,18 @@
+import { CommandRunner, SubCommand } from 'nest-commander';
+import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases';
+
+@SubCommand({
+  name: 'benchmark',
+  subCommands: [],
+  description:
+    'Benchmark and analyze the performance of a specific AI model using a variety of system resources',
+})
+export class BenchmarkCommand extends CommandRunner {
+  constructor(private readonly benchmarkUsecases: BenchmarkCliUsecases) {
+    super();
+  }
+
+  async run(): Promise<void> {
+    return this.benchmarkUsecases.benchmark();
+  }
+}
diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts
index d08d12037..718541d0d 100644
--- a/cortex-js/src/infrastructure/commanders/chat.command.ts
+++ b/cortex-js/src/infrastructure/commanders/chat.command.ts
@@ -6,8 +6,9 @@ import {
 } from 'nest-commander';
 import { ChatCliUsecases } from './usecases/chat.cli.usecases';
 import { exit } from 'node:process';
-import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases';
+import { PSCliUsecases } from './usecases/ps.cli.usecases';
 import { ModelsUsecases } from '@/usecases/models/models.usecases';
+import { ModelStat } from './types/model-stat.interface';
 
 type ChatOptions = {
   threadId?: string;
diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts
index 88fbe7a70..9bb9c33fd 100644
--- a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts
+++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts
@@ -10,6 +10,7 @@ import { KillCommand } from './kill.command';
 import pkg from '@/../package.json';
 import { PresetCommand } from './presets.command';
 import { EmbeddingCommand } from './embeddings.command';
+import { BenchmarkCommand } from './benchmark.command';
 
 interface CortexCommandOptions {
   version: boolean;
@@ -26,6 +27,7 @@ interface CortexCommandOptions {
     KillCommand,
     PresetCommand,
     EmbeddingCommand,
+    BenchmarkCommand,
   ],
   description: 'Cortex CLI',
 })
diff --git a/cortex-js/src/infrastructure/commanders/embeddings.command.ts b/cortex-js/src/infrastructure/commanders/embeddings.command.ts
index 45b23cc93..872715762 100644
--- a/cortex-js/src/infrastructure/commanders/embeddings.command.ts
+++ b/cortex-js/src/infrastructure/commanders/embeddings.command.ts
@@ -5,9 +5,10 @@ import {
   SubCommand,
 } from 'nest-commander';
 import { ModelsUsecases } from '@/usecases/models/models.usecases';
-import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases';
+import { PSCliUsecases } from './usecases/ps.cli.usecases';
 import { ChatCliUsecases } from './usecases/chat.cli.usecases';
 import { inspect } from 'util';
+import { ModelStat } from './types/model-stat.interface';
 
 interface EmbeddingCommandOptions {
   encoding_format?: string;
diff --git a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts
new file mode 100644
index 000000000..aa49c313e
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts
@@ -0,0 +1,28 @@
+import { ChatCompletionMessageParam } from 'openai/resources';
+
+export interface BenchmarkConfig {
+  api: {
+    base_url: string;
+    api_key: string;
+    parameters: {
+      messages: ChatCompletionMessageParam[];
+      model: string;
+      stream?: boolean;
+      max_tokens?: number;
+      stop?: string[];
+      frequency_penalty?: number;
+      presence_penalty?: number;
+      temperature?: number;
+      top_p?: number;
+    };
+  };
+  prompts?: {
+    min: number;
+    max: number;
+    samples: number;
+  };
+  output: string;
+  concurrency: number;
+  num_rounds: number;
+  hardware: string[];
+}
diff --git a/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts
new file mode 100644
index 000000000..336917b4f
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts
@@ -0,0 +1,8 @@
+export interface ModelStat {
+  modelId: string;
+  engine?: string;
+  duration?: string;
+  status: string;
+  vram?: string;
+  ram?: string;
+}
diff --git a/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts
new file mode 100644
index 000000000..7d31ca552
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/usecases/benchmark.cli.usecases.ts
@@ -0,0 +1,250 @@
+import { Injectable } from '@nestjs/common';
+import si from 'systeminformation';
+import fs, { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
+import OpenAI from 'openai';
+import { Presets, SingleBar } from 'cli-progress';
+import yaml from 'js-yaml';
+import { FileManagerService } from '@/file-manager/file-manager.service';
+import { join } from 'path';
+import { ModelsCliUsecases } from './models.cli.usecases';
+import { spawn } from 'child_process';
+import { BenchmarkConfig } from '../types/benchmark-config.interface';
+import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
+import { inspect } from 'util';
+import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark';
+
+@Injectable()
+export class BenchmarkCliUsecases {
+  constructor(
+    private readonly modelsCliUsecases: ModelsCliUsecases,
+    private readonly cortexUsecases: CortexUsecases,
+    private readonly fileService: FileManagerService,
+  ) {}
+
+  config: BenchmarkConfig;
+  openai?: OpenAI;
+  /**
+   * Benchmark and analyze the performance of a specific AI model using a variety of system resources
+   */
+  async benchmark() {
+    return this.getBenchmarkConfig().then((config) => {
+      this.config = config;
+
+      // TODO: Using OpenAI client or Cortex client to benchmark?
+      this.openai = new OpenAI({
+        apiKey: this.config.api.api_key,
+        baseURL: this.config.api.base_url,
+        timeout: 20 * 1000,
+      });
+
+      spawn('cortex', ['serve'], {
+        detached: false,
+      });
+
+      return this.cortexUsecases
+        .startCortex()
+        .then(() =>
+          this.modelsCliUsecases.startModel(this.config.api.parameters.model),
+        )
+        .then(() => this.runBenchmarks())
+        .then(() => process.exit(0));
+    });
+  }
+
+  /**
+   * Get the benchmark configuration
+   * @returns the benchmark configuration
+   */
+  private async getBenchmarkConfig() {
+    const benchmarkFolder = await this.fileService.getBenchmarkPath();
+    const configurationPath = join(benchmarkFolder, 'config.yaml');
+    if (existsSync(configurationPath)) {
+      return yaml.load(
+        readFileSync(configurationPath, 'utf8'),
+      ) as BenchmarkConfig;
+    } else {
+      const config = yaml.dump(defaultBenchmarkConfiguration);
+      if (!existsSync(benchmarkFolder)) {
+        mkdirSync(benchmarkFolder, {
+          recursive: true,
+        });
+      }
+      await writeFileSync(configurationPath, config, 'utf8');
+      return defaultBenchmarkConfiguration;
+    }
+  }
+
+  /**
+   * Get the system resources for benchmarking
+   * using the systeminformation library
+   * @returns the system resources
+   */
+  private async getSystemResources() {
+    return {
+      cpu: await si.currentLoad(),
+      mem: await si.mem(),
+      gpu: (await si.graphics()).controllers,
+    };
+  }
+
+  /**
+   * Get the resource change between two data points
+   * @param startData the start data point
+   * @param endData the end data point
+   * @returns the resource change
+   */
+  private async getResourceChange(startData: any, endData: any) {
+    return {
+      cpu:
+        startData.cpu && endData.cpu
+          ? ((endData.cpu.currentload - startData.cpu.currentload) /
+              startData.cpu.currentload) *
+            100
+          : null,
+      mem:
+        startData.mem && endData.mem
+          ? ((endData.mem.used - startData.mem.used) / startData.mem.total) *
+            100
+          : null,
+    };
+  }
+
+  /**
+   * Benchmark a user using the OpenAI API
+   * @returns
+   */
+  private async benchmarkUser() {
+    const startResources = await this.getSystemResources();
+    const start = Date.now();
+    let tokenCount = 0;
+    let firstTokenTime = null;
+
+    try {
+      const stream = await this.openai!.chat.completions.create({
+        model: this.config.api.parameters.model,
+        messages: this.config.api.parameters.messages,
+        max_tokens: this.config.api.parameters.max_tokens,
+        stream: true,
+      });
+
+      for await (const chunk of stream) {
+        if (!firstTokenTime && chunk.choices[0]?.delta?.content) {
+          firstTokenTime = Date.now();
+        }
+        tokenCount += (chunk.choices[0]?.delta?.content || '').split(
+          /\s+/,
+        ).length;
+      }
+    } catch (error) {
+      console.error('Error during API call:', error);
+      return null;
+    }
+
+    const latency = Date.now() - start;
+    const ttft = firstTokenTime ? firstTokenTime - start : null;
+    const endResources = await this.getSystemResources();
+    const resourceChange = await this.getResourceChange(
+      startResources,
+      endResources,
+    );
+
+    return {
+      tokens: this.config.api.parameters.max_tokens,
+      token_length: tokenCount, // Dynamically calculated token count
+      latency,
+      resourceChange,
+      tpot: tokenCount ? latency / tokenCount : 0,
+      throughput: tokenCount / (latency / 1000),
+      ttft,
+    };
+  }
+
+  /**
+   * Calculate the percentiles of the data
+   * @param data the data to calculate percentiles for
+   * @param percentile the percentile to calculate
+   * @returns the percentile value
+   */
+  private calculatePercentiles(data: number[], percentile: number) {
+    if (data.length === 0) return null;
+    const sorted = data
+      .filter((x: number) => x !== null)
+      .sort((a: number, b: number) => a - b);
+    const pos = (percentile / 100) * sorted.length;
+    if (pos < 1) return sorted[0];
+    if (pos >= sorted.length) return sorted[sorted.length - 1];
+    const lower = sorted[Math.floor(pos) - 1];
+    const upper = sorted[Math.ceil(pos) - 1];
+    return lower + (upper - lower) * (pos - Math.floor(pos));
+  }
+
+  /**
+   * Run the benchmarks
+   */
+  private async runBenchmarks() {
+    const allResults: any[] = [];
+    const rounds = this.config.num_rounds || 1;
+
+    const bar = new SingleBar({}, Presets.shades_classic);
+    bar.start(rounds, 0);
+
+    for (let i = 0; i < rounds; i++) {
+      const roundResults = [];
+      const hardwareBefore = await this.getSystemResources();
+
+      for (let j = 0; j < this.config.concurrency; j++) {
+        const result = await this.benchmarkUser();
+        if (result) {
+          roundResults.push(result);
+        }
+      }
+
+      const hardwareAfter = await this.getSystemResources();
+      const hardwareChanges = await this.getResourceChange(
+        hardwareBefore,
+        hardwareAfter,
+      );
+
+      allResults.push({
+        round: i + 1,
+        results: roundResults,
+        hardwareChanges,
+      });
+
+      bar.update(i + 1);
+    }
+
+    const metrics: any = {
+      p50: {},
+      p75: {},
+      p95: {},
+    };
+    const keys = ['latency', 'tpot', 'throughput', 'ttft'];
+    keys.forEach((key) => {
+      const data = allResults.flatMap((r) =>
+        r.results.map((res: object) => res[key as keyof typeof res]),
+      );
+      metrics.p50[key] = this.calculatePercentiles(data, 50);
+      metrics.p75[key] = this.calculatePercentiles(data, 75);
+      metrics.p95[key] = this.calculatePercentiles(data, 95);
+    });
+
+    const output = {
+      hardware: await this.getSystemResources(),
+      results: allResults,
+      metrics,
+    };
+    bar.stop();
+
+    const outputFilePath = join(
+      await this.fileService.getBenchmarkPath(),
+      'output.json',
+    );
+    fs.writeFileSync(outputFilePath, JSON.stringify(output, null, 2));
+    console.log(`Benchmark results and metrics saved to ${outputFilePath}`);
+
+    console.log(
+      inspect(output, { showHidden: false, depth: null, colors: true }),
+    );
+  }
+}
diff --git a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts
index feead245a..de14a704d 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts
@@ -11,6 +11,7 @@ import { AssistantsModule } from '@/usecases/assistants/assistants.module';
 import { MessagesModule } from '@/usecases/messages/messages.module';
 import { FileManagerModule } from '@/file-manager/file-manager.module';
 import { PSCliUsecases } from './ps.cli.usecases';
+import { BenchmarkCliUsecases } from './benchmark.cli.usecases';
 
 @Module({
   imports: [
@@ -28,7 +29,14 @@ import { PSCliUsecases } from './ps.cli.usecases';
     ModelsCliUsecases,
     ChatCliUsecases,
     PSCliUsecases,
+    BenchmarkCliUsecases,
+  ],
+  exports: [
+    InitCliUsecases,
+    ModelsCliUsecases,
+    ChatCliUsecases,
+    PSCliUsecases,
+    BenchmarkCliUsecases,
   ],
-  exports: [InitCliUsecases, ModelsCliUsecases, ChatCliUsecases, PSCliUsecases],
 })
 export class CliUsecasesModule {}
diff --git a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
index 5e2e8db38..885a10c8b 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
@@ -6,15 +6,8 @@ import {
 } from '@/infrastructure/constants/cortex';
 import { HttpService } from '@nestjs/axios';
 import { firstValueFrom } from 'rxjs';
+import { ModelStat } from '../types/model-stat.interface';
 
-export interface ModelStat {
-  modelId: string;
-  engine?: string;
-  duration?: string;
-  status: string;
-  vram?: string;
-  ram?: string;
-}
 interface ModelStatResponse {
   object: string;
   data: any;
diff --git a/cortex-js/src/infrastructure/constants/benchmark.ts b/cortex-js/src/infrastructure/constants/benchmark.ts
new file mode 100644
index 000000000..01dfc59bd
--- /dev/null
+++ b/cortex-js/src/infrastructure/constants/benchmark.ts
@@ -0,0 +1,37 @@
+import { BenchmarkConfig } from '../commanders/types/benchmark-config.interface';
+
+export const defaultBenchmarkConfiguration: BenchmarkConfig = {
+  api: {
+    base_url: 'http://127.0.0.1:1337/',
+    api_key: '<api_key>',
+    parameters: {
+      messages: [
+        {
+          content: 'You are a helpful assistant.',
+          role: 'system',
+        },
+        {
+          content: 'Hello!',
+          role: 'user',
+        },
+      ],
+      model: 'tinyllama',
+      stream: true,
+      max_tokens: 2048,
+      stop: [],
+      frequency_penalty: 0,
+      presence_penalty: 0,
+      temperature: 0.7,
+      top_p: 0.95,
+    },
+  },
+  prompts: {
+    min: 102,
+    max: 2048,
+    samples: 10,
+  },
+  output: 'json',
+  hardware: ['cpu', 'gpu', 'psu', 'chassis', 'ram'],
+  concurrency: 1,
+  num_rounds: 10,
+};

From 4763c1c34496cb63d9b00416c4a7c8783993ed33 Mon Sep 17 00:00:00 2001
From: Van Pham <64197333+Van-QA@users.noreply.github.com>
Date: Wed, 12 Jun 2024 15:49:52 +0700
Subject: [PATCH 4/4] feat: Add testcase for Cortex CLI (#689)

Co-authored-by: Louis <louis@jan.ai>
Co-authored-by: Van-QA <van@jan.ai>
---
 cortex-js/.eslintrc.js                        |   6 +
 .../commanders/serve.command.ts               |  24 ++--
 .../commanders/test/helpers.command.spec.ts   | 107 ++++++++++++++++++
 .../commanders/test/log.service.ts            |   8 ++
 .../test/model-list.command.spec.ts           |  67 -----------
 .../commanders/test/models.command.spec.ts    | 100 ++++++++++++++++
 6 files changed, 236 insertions(+), 76 deletions(-)
 create mode 100644 cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts
 create mode 100644 cortex-js/src/infrastructure/commanders/test/log.service.ts
 delete mode 100644 cortex-js/src/infrastructure/commanders/test/model-list.command.spec.ts
 create mode 100644 cortex-js/src/infrastructure/commanders/test/models.command.spec.ts

diff --git a/cortex-js/.eslintrc.js b/cortex-js/.eslintrc.js
index 259de13c7..448a2d910 100644
--- a/cortex-js/.eslintrc.js
+++ b/cortex-js/.eslintrc.js
@@ -21,5 +21,11 @@ module.exports = {
     '@typescript-eslint/explicit-function-return-type': 'off',
     '@typescript-eslint/explicit-module-boundary-types': 'off',
     '@typescript-eslint/no-explicit-any': 'off',
+    "prettier/prettier": [
+      "error",
+      {
+        "endOfLine": "auto"
+      },
+    ],
   },
 };
diff --git a/cortex-js/src/infrastructure/commanders/serve.command.ts b/cortex-js/src/infrastructure/commanders/serve.command.ts
index f839b4a0b..99575bcb2 100644
--- a/cortex-js/src/infrastructure/commanders/serve.command.ts
+++ b/cortex-js/src/infrastructure/commanders/serve.command.ts
@@ -20,16 +20,22 @@ export class ServeCommand extends CommandRunner {
     const host = options?.host || defaultCortexJsHost;
     const port = options?.port || defaultCortexJsPort;
 
-    spawn('node', [join(__dirname, '../../main.js')], {
-      env: {
-        ...process.env,
-        CORTEX_JS_HOST: host,
-        CORTEX_JS_PORT: port.toString(),
-        NODE_ENV: 'production',
+    spawn(
+      'node',
+      process.env.TEST
+        ? [join(__dirname, '../../../dist/src/main.js')]
+        : [join(__dirname, '../../main.js')],
+      {
+        env: {
+          ...process.env,
+          CORTEX_JS_HOST: host,
+          CORTEX_JS_PORT: port.toString(),
+          NODE_ENV: 'production',
+        },
+        stdio: 'inherit',
+        detached: false,
       },
-      stdio: 'inherit',
-      detached: false,
-    });
+    );
   }
 
   @Option({
diff --git a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts
new file mode 100644
index 000000000..05d9562d6
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts
@@ -0,0 +1,107 @@
+import { TestingModule } from '@nestjs/testing';
+import { spy, Stub, stubMethod } from 'hanbi';
+import { CommandTestFactory } from 'nest-commander-testing';
+import { CommandModule } from '@/command.module';
+import { LogService } from '@/infrastructure/commanders/test/log.service';
+import axios from 'axios';
+
+let commandInstance: TestingModule,
+  exitSpy: Stub<typeof process.exit>,
+  stdoutSpy: Stub<typeof process.stdout.write>,
+  stderrSpy: Stub<typeof process.stderr.write>;
+export const timeout = 500000;
+
+beforeEach(
+  () =>
+    new Promise<void>(async (res) => {
+      stubMethod(process.stderr, 'write');
+      exitSpy = stubMethod(process, 'exit');
+      stdoutSpy = stubMethod(process.stdout, 'write');
+      stderrSpy = stubMethod(process.stderr, 'write');
+      commandInstance = await CommandTestFactory.createTestingCommand({
+        imports: [CommandModule],
+      })
+        .overrideProvider(LogService)
+        .useValue({ log: spy().handler })
+        .compile();
+      res();
+      stdoutSpy.reset();
+      stderrSpy.reset();
+    }),
+);
+
+describe('Helper commands', () => {
+  test(
+    'Init with hardware auto detection',
+    async () => {
+      await CommandTestFactory.run(commandInstance, ['init', '-s']);
+
+      // Wait for a brief period to allow the command to execute
+      await new Promise((resolve) => setTimeout(resolve, 1000));
+
+      expect(stdoutSpy.firstCall?.args.length).toBeGreaterThan(0);
+    },
+    timeout,
+  );
+
+  test('Chat with option -m', async () => {
+    const logMock = stubMethod(console, 'log');
+
+    await CommandTestFactory.run(commandInstance, [
+      'chat',
+      // '-m',
+      // 'hello',
+      // '>output.txt',
+    ]);
+    expect(logMock.firstCall?.args[0]).toBe("Inorder to exit, type 'exit()'.");
+    // expect(exitSpy.callCount).toBe(1);
+    // expect(exitSpy.firstCall?.args[0]).toBe(1);
+  });
+
+  test('Show / kill running models', async () => {
+    const tableMock = stubMethod(console, 'table');
+
+    const logMock = stubMethod(console, 'log');
+    await CommandTestFactory.run(commandInstance, ['kill']);
+    await CommandTestFactory.run(commandInstance, ['ps']);
+
+    expect(logMock.firstCall?.args[0]).toEqual({
+      message: 'Cortex stopped successfully',
+      status: 'success',
+    });
+    expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array);
+    expect(tableMock.firstCall?.args[0].length).toEqual(0);
+  });
+
+  test('Help command return guideline to users', async () => {
+    await CommandTestFactory.run(commandInstance, ['-h']);
+    expect(stdoutSpy.firstCall?.args).toBeInstanceOf(Array);
+    expect(stdoutSpy.firstCall?.args.length).toBe(1);
+    expect(stdoutSpy.firstCall?.args[0]).toContain('display help for command');
+
+    expect(exitSpy.callCount).toBeGreaterThan(1);
+    expect(exitSpy.firstCall?.args[0]).toBe(0);
+  });
+
+  test('Should handle missing command', async () => {
+    await CommandTestFactory.run(commandInstance, ['--unknown']);
+    expect(stderrSpy.firstCall?.args[0]).toContain('error: unknown option');
+    expect(stderrSpy.firstCall?.args[0]).toContain('--unknown');
+    expect(exitSpy.callCount).toBe(1);
+    expect(exitSpy.firstCall?.args[0]).toBe(1);
+  });
+
+  test('Local API server via localhost:1337/api', async () => {
+    await CommandTestFactory.run(commandInstance, ['serve']);
+
+    // Add a delay of 1000 milliseconds (1 second)
+    return new Promise<void>(async (resolve) => {
+      setTimeout(async () => {
+        // Send a request to the API server to check if it's running
+        const response = await axios.get('http://localhost:1337/api');
+        expect(response.status).toBe(200);
+        resolve();
+      }, 1000);
+    });
+  });
+});
diff --git a/cortex-js/src/infrastructure/commanders/test/log.service.ts b/cortex-js/src/infrastructure/commanders/test/log.service.ts
new file mode 100644
index 000000000..1151f5fb5
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/test/log.service.ts
@@ -0,0 +1,8 @@
+import { Injectable } from '@nestjs/common';
+
+@Injectable()
+export class LogService {
+  log(...args: any[]): void {
+    console.log(...args);
+  }
+}
diff --git a/cortex-js/src/infrastructure/commanders/test/model-list.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/model-list.command.spec.ts
deleted file mode 100644
index b90b26006..000000000
--- a/cortex-js/src/infrastructure/commanders/test/model-list.command.spec.ts
+++ /dev/null
@@ -1,67 +0,0 @@
-import { TestingModule } from '@nestjs/testing';
-import { stubMethod } from 'hanbi';
-import { CommandTestFactory } from 'nest-commander-testing';
-import { CommandModule } from '@/command.module';
-import { FileManagerService } from '@/file-manager/file-manager.service';
-import { join } from 'path';
-import { mkdirSync, rmSync, writeFileSync } from 'fs';
-
-let commandInstance: TestingModule;
-
-beforeEach(
-  () =>
-    new Promise<void>(async (res) => {
-      commandInstance = await CommandTestFactory.createTestingCommand({
-        imports: [CommandModule],
-      })
-        // .overrideProvider(LogService)
-        // .useValue({})
-        .compile();
-      const fileService =
-        await commandInstance.resolve<FileManagerService>(FileManagerService);
-
-      // Attempt to create test folder
-      await fileService.writeConfigFile({
-        dataFolderPath: join(__dirname, 'test_data'),
-      });
-      res();
-    }),
-);
-
-afterEach(
-  () =>
-    new Promise<void>(async (res) => {
-      // Attempt to clean test folder
-      rmSync(join(__dirname, 'test_data'), {
-        recursive: true,
-        force: true,
-      });
-      res();
-    }),
-);
-
-describe('models list returns array of models', () => {
-  test('empty model list', async () => {
-    const logMock = stubMethod(console, 'table');
-
-    await CommandTestFactory.run(commandInstance, ['models', 'list']);
-    expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array);
-    expect(logMock.firstCall?.args[0].length).toBe(0);
-  });
-
-  test('many models in the list', async () => {
-    const logMock = stubMethod(console, 'table');
-
-    mkdirSync(join(__dirname, 'test_data', 'models'), { recursive: true });
-    writeFileSync(
-      join(__dirname, 'test_data', 'models', 'test.yaml'),
-      'model: test',
-      'utf8',
-    );
-
-    await CommandTestFactory.run(commandInstance, ['models', 'list']);
-    expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array);
-    expect(logMock.firstCall?.args[0].length).toBe(1);
-    expect(logMock.firstCall?.args[0][0].id).toBe('test');
-  });
-});
diff --git a/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts
new file mode 100644
index 000000000..7d512d2be
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts
@@ -0,0 +1,100 @@
+import { TestingModule } from '@nestjs/testing';
+import { stubMethod } from 'hanbi';
+import { CommandTestFactory } from 'nest-commander-testing';
+import { CommandModule } from '@/command.module';
+import { join } from 'path';
+import { rmSync } from 'fs';
+import { timeout } from '@/infrastructure/commanders/test/helpers.command.spec';
+
+let commandInstance: TestingModule;
+
+beforeEach(
+  () =>
+    new Promise<void>(async (res) => {
+      commandInstance = await CommandTestFactory.createTestingCommand({
+        imports: [CommandModule],
+      })
+        // .overrideProvider(LogService)
+        // .useValue({})
+        .compile();
+      res();
+    }),
+);
+
+afterEach(
+  () =>
+    new Promise<void>(async (res) => {
+      // Attempt to clean test folder
+      rmSync(join(__dirname, 'test_data'), {
+        recursive: true,
+        force: true,
+      });
+      res();
+    }),
+);
+
+export const modelName = 'tinyllama';
+describe('Models list returns array of models', () => {
+  test('Init with CPU', async () => {
+    const logMock = stubMethod(console, 'log');
+
+    logMock.passThrough();
+    CommandTestFactory.setAnswers(['CPU', '', 'AVX2']);
+
+    await CommandTestFactory.run(commandInstance, ['init']);
+    expect(logMock.firstCall?.args[0]).toBe(
+      'Downloading engine file windows-amd64-avx2.tar.gz',
+    );
+  }, 50000);
+
+  test('Empty model list', async () => {
+    const logMock = stubMethod(console, 'table');
+
+    await CommandTestFactory.run(commandInstance, ['models', 'list']);
+    expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array);
+    expect(logMock.firstCall?.args[0].length).toBe(0);
+  });
+
+  test(
+    'Run model and check with cortex ps',
+    async () => {
+      const logMock = stubMethod(console, 'log');
+
+      await CommandTestFactory.run(commandInstance, ['run', modelName]);
+      expect(logMock.lastCall?.args[0]).toBe("Inorder to exit, type 'exit()'.");
+
+      const tableMock = stubMethod(console, 'table');
+      await CommandTestFactory.run(commandInstance, ['ps']);
+      expect(tableMock.firstCall?.args[0].length).toBeGreaterThan(0);
+    },
+    timeout,
+  );
+
+  test('Get model', async () => {
+    const logMock = stubMethod(console, 'log');
+
+    await CommandTestFactory.run(commandInstance, ['models', 'get', modelName]);
+    expect(logMock.firstCall?.args[0]).toBeInstanceOf(Object);
+    expect(logMock.firstCall?.args[0].files.length).toBe(1);
+  });
+
+  test('Many models in the list', async () => {
+    const logMock = stubMethod(console, 'table');
+    await CommandTestFactory.run(commandInstance, ['models', 'list']);
+    expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array);
+    expect(logMock.firstCall?.args[0].length).toBe(1);
+    expect(logMock.firstCall?.args[0][0].id).toBe(modelName);
+  });
+
+  test(
+    'Model already exists',
+    async () => {
+      const stdoutSpy = stubMethod(process.stdout, 'write');
+      const exitSpy = stubMethod(process, 'exit');
+      await CommandTestFactory.run(commandInstance, ['pull', modelName]);
+      expect(stdoutSpy.firstCall?.args[0]).toContain('Model already exists');
+      expect(exitSpy.firstCall?.args[0]).toBe(1);
+    },
+    timeout,
+  );
+});