Merge branch 'dev' of https://github.com/janhq/cortex into readme

janhq · May 30, 2024 · b704626 · b704626
2 parents 14dddee + dd551e0
commit b704626
Show file tree

Hide file tree

Showing 14 changed files with 205 additions and 64 deletions.
diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts
@@ -24,6 +24,8 @@ import { AssistantsModule } from './usecases/assistants/assistants.module';
 import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usecases.module';
 import { MessagesModule } from './usecases/messages/messages.module';
 import { FileManagerModule } from './file-manager/file-manager.module';
+import { PSCommand } from './infrastructure/commanders/ps.command';
+import { KillCommand } from './infrastructure/commanders/kill.command';
 
 @Module({
   imports: [
@@ -48,6 +50,8 @@ import { FileManagerModule } from './file-manager/file-manager.module';
     ServeCommand,
     ChatCommand,
     InitCommand,
+    PSCommand,
+    KillCommand,
 
     // Questions
     InitRunModeQuestions,

diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts
@@ -5,6 +5,8 @@ import { ModelsCommand } from './models.command';
 import { InitCommand } from './init.command';
 import { RunCommand } from './shortcuts/run.command';
 import { ModelPullCommand } from './models/model-pull.command';
+import { PSCommand } from './ps.command';
+import { KillCommand } from './kill.command';
 
 @RootCommand({
   subCommands: [
@@ -14,6 +16,8 @@ import { ModelPullCommand } from './models/model-pull.command';
     InitCommand,
     RunCommand,
     ModelPullCommand,
+    PSCommand,
+    KillCommand,
   ],
   description: 'Cortex CLI',
 })

diff --git a/cortex-js/src/infrastructure/commanders/kill.command.ts b/cortex-js/src/infrastructure/commanders/kill.command.ts
@@ -0,0 +1,15 @@
+import { CommandRunner, SubCommand } from 'nest-commander';
+import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
+
+@SubCommand({
+  name: 'kill',
+  description: 'Kill running cortex processes',
+})
+export class KillCommand extends CommandRunner {
+  constructor(private readonly usecases: CortexUsecases) {
+    super();
+  }
+  async run(): Promise<void> {
+    this.usecases.stopCortex().then(console.log);
+  }
+}
diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts
@@ -1,8 +1,11 @@
-import { CommandRunner, SubCommand } from 'nest-commander';
+import { CommandRunner, SubCommand, Option } from 'nest-commander';
 import { exit } from 'node:process';
 import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
 import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
 
+type ModelStartOptions = {
+  attach: boolean;
+};
 @SubCommand({ name: 'start', description: 'Start a model by ID.' })
 export class ModelStartCommand extends CommandRunner {
   constructor(
@@ -12,13 +15,26 @@ export class ModelStartCommand extends CommandRunner {
     super();
   }
 
-  async run(input: string[]): Promise<void> {
+  async run(input: string[], options: ModelStartOptions): Promise<void> {
     if (input.length === 0) {
       console.error('Model ID is required');
       exit(1);
     }
 
-    await this.cortexUsecases.startCortex();
-    await this.modelsCliUsecases.startModel(input[0]);
+    await this.cortexUsecases
+      .startCortex(options.attach)
+      .then(() => this.modelsCliUsecases.startModel(input[0]))
+      .then(console.log)
+      .then(() => !options.attach && process.exit(0));
+  }
+
+  @Option({
+    flags: '-a, --attach',
+    description: 'Attach to interactive chat session',
+    defaultValue: false,
+    name: 'attach',
+  })
+  parseAttach() {
+    return true;
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts
@@ -18,7 +18,9 @@ export class ModelStopCommand extends CommandRunner {
       exit(1);
     }
 
-    await this.modelsCliUsecases.stopModel(input[0]);
-    await this.cortexUsecases.stopCortex();
+    await this.modelsCliUsecases
+      .stopModel(input[0])
+      .then(() => this.cortexUsecases.stopCortex())
+      .then(console.log);
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/ps.command.ts b/cortex-js/src/infrastructure/commanders/ps.command.ts
@@ -0,0 +1,15 @@
+import { CommandRunner, SubCommand } from 'nest-commander';
+import { PSCliUsecases } from './usecases/ps.cli.usecases';
+
+@SubCommand({
+  name: 'ps',
+  description: 'Show running models and their status',
+})
+export class PSCommand extends CommandRunner {
+  constructor(private readonly usecases: PSCliUsecases) {
+    super();
+  }
+  async run(): Promise<void> {
+    return this.usecases.getModels().then(console.table);
+  }
+}
diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
@@ -30,9 +30,9 @@ export class RunCommand extends CommandRunner {
     const modelId = input[0];
 
     await this.cortexUsecases.startCortex(
+      false,
       defaultCortexCppHost,
       defaultCortexCppPort,
-      false,
     );
     await this.modelsUsecases.startModel(modelId);
     await this.chatCliUsecases.chat(modelId, option?.threadId);

diff --git a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts
@@ -10,6 +10,7 @@ import { ThreadsModule } from '@/usecases/threads/threads.module';
 import { AssistantsModule } from '@/usecases/assistants/assistants.module';
 import { MessagesModule } from '@/usecases/messages/messages.module';
 import { FileManagerModule } from '@/file-manager/file-manager.module';
+import { PSCliUsecases } from './ps.cli.usecases';
 
 @Module({
   imports: [
@@ -22,7 +23,12 @@ import { FileManagerModule } from '@/file-manager/file-manager.module';
     MessagesModule,
     FileManagerModule,
   ],
-  providers: [InitCliUsecases, ModelsCliUsecases, ChatCliUsecases],
-  exports: [InitCliUsecases, ModelsCliUsecases, ChatCliUsecases],
+  providers: [
+    InitCliUsecases,
+    ModelsCliUsecases,
+    ChatCliUsecases,
+    PSCliUsecases,
+  ],
+  exports: [InitCliUsecases, ModelsCliUsecases, ChatCliUsecases, PSCliUsecases],
 })
 export class CliUsecasesModule {}
diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
@@ -24,6 +24,7 @@ import {
 import { ModelTokenizer } from '../types/model-tokenizer.interface';
 import { HttpService } from '@nestjs/axios';
 import { firstValueFrom } from 'rxjs';
+import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
 
 const AllQuantizations = [
   'Q3_K_S',
@@ -61,18 +62,25 @@ export class ModelsCliUsecases {
    * Start a model by ID
    * @param modelId
    */
-  async startModel(modelId: string): Promise<void> {
-    await this.getModelOrStop(modelId);
-    await this.modelsUsecases.startModel(modelId);
+  async startModel(modelId: string): Promise<StartModelSuccessDto> {
+    return this.getModelOrStop(modelId)
+      .then(() => this.modelsUsecases.startModel(modelId))
+      .catch(() => {
+        return {
+          modelId: modelId,
+          message: 'Model not found',
+        };
+      });
   }
 
   /**
    * Stop a model by ID
    * @param modelId
    */
   async stopModel(modelId: string): Promise<void> {
-    await this.getModelOrStop(modelId);
-    await this.modelsUsecases.stopModel(modelId);
+    return this.getModelOrStop(modelId)
+      .then(() => this.modelsUsecases.stopModel(modelId))
+      .then();
   }
 
   /**

diff --git a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
@@ -0,0 +1,85 @@
+import { Injectable } from '@nestjs/common';
+import { defaultCortexCppHost, defaultCortexCppPort } from 'constant';
+
+interface ModelStat {
+  modelId: string;
+  engine?: string;
+  duration?: string;
+  status: string;
+  vram?: string;
+  ram?: string;
+}
+interface ModelStatResponse {
+  object: string;
+  data: any;
+}
+@Injectable()
+export class PSCliUsecases {
+  /**
+   * Get models running in the Cortex C++ server
+   * @param host Cortex host address
+   * @param port Cortex port address
+   */
+  async getModels(
+    host: string = defaultCortexCppHost,
+    port: number = defaultCortexCppPort,
+  ): Promise<ModelStat[]> {
+    return new Promise<ModelStat[]>((resolve, reject) =>
+      fetch(`http://${host}:${port}/inferences/server/models`)
+        .then((res) => {
+          if (res.ok) {
+            res
+              .json()
+              .then(({ data }: ModelStatResponse) => {
+                if (data && Array.isArray(data) && data.length > 0) {
+                  resolve(
+                    data.map((e) => {
+                      const startTime = e.start_time ?? new Date();
+                      const currentTime = new Date();
+                      const duration =
+                        currentTime.getTime() - new Date(startTime).getTime();
+                      return {
+                        modelId: e.id,
+                        engine: e.engine ?? 'llama.cpp', // TODO: get engine from model when it's ready
+                        status: 'running',
+                        duration: this.formatDuration(duration),
+                        ram: e.ram ?? '-',
+                        vram: e.vram ?? '-',
+                      };
+                    }),
+                  );
+                } else reject();
+              })
+              .catch(reject);
+          } else reject();
+        })
+        .catch(reject),
+    ).catch(() => []);
+  }
+
+  private formatDuration(milliseconds: number): string {
+    const days = Math.floor(milliseconds / (1000 * 60 * 60 * 24));
+    const hours = Math.floor(
+      (milliseconds % (1000 * 60 * 60 * 24)) / (1000 * 60 * 60),
+    );
+    const minutes = Math.floor((milliseconds % (1000 * 60 * 60)) / (1000 * 60));
+    const seconds = Math.floor((milliseconds % (1000 * 60)) / 1000);
+
+    let formattedDuration = '';
+
+    if (days > 0) {
+      formattedDuration += `${days}d `;
+    }
+    if (hours > 0) {
+      formattedDuration += `${hours}h `;
+    }
+    if (minutes > 0) {
+      formattedDuration += `${minutes}m `;
+    }
+    if (seconds > 0) {
+      formattedDuration += `${seconds}s `;
+    }
+
+    return formattedDuration.trim();
+  }
+}
diff --git a/cortex-js/src/infrastructure/controllers/cortex.controller.ts b/cortex-js/src/infrastructure/controllers/cortex.controller.ts
@@ -22,6 +22,7 @@ export class CortexController {
   @Post('start')
   startCortex(@Body() startCortexDto: StartCortexDto) {
     return this.cortexUsecases.startCortex(
+      false,
       startCortexDto.host,
       startCortexDto.port,
     );

diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
@@ -80,15 +80,15 @@ export default class CortexProvider extends OAIEngineExtension {
       modelSettings.ai_prompt = prompt.ai_prompt;
     }
 
-    await firstValueFrom(
+    return firstValueFrom(
       this.httpService.post(this.loadModelUrl, modelSettings),
-    );
+    ).then();
   }
 
   override async unloadModel(modelId: string): Promise<void> {
-    await firstValueFrom(
+    return firstValueFrom(
       this.httpService.post(this.unloadModelUrl, { model: modelId }),
-    );
+    ).then(); // pipe error or void instead of throwing
   }
 
   private readonly promptTemplateConverter = (