From 18c071fd134eaaa91b181a475594e39160dbe1df Mon Sep 17 00:00:00 2001 From: James Date: Wed, 22 May 2024 08:45:14 +0700 Subject: [PATCH] chore: remove inference stream --- cortex-js/constant.ts | 2 +- .../src/domain/abstracts/engine.abstract.ts | 7 +++--- .../src/domain/abstracts/oai.abstract.ts | 25 +++---------------- .../commanders/usecases/chat.cli.usecases.ts | 6 ++--- .../controllers/chat.controller.ts | 2 +- cortex-js/src/usecases/chat/chat.usecases.ts | 25 ------------------- 6 files changed, 12 insertions(+), 55 deletions(-) diff --git a/cortex-js/constant.ts b/cortex-js/constant.ts index b9a983657..14bda4837 100644 --- a/cortex-js/constant.ts +++ b/cortex-js/constant.ts @@ -3,7 +3,7 @@ export const databaseName = 'cortex'; export const databaseFile = `${databaseName}.db`; export const defaultCortexJsHost = 'localhost'; -export const defaultCortexJsPort = 7331; +export const defaultCortexJsPort = 1337; export const defaultCortexCppHost = '127.0.0.1'; export const defaultCortexCppPort = 3928; diff --git a/cortex-js/src/domain/abstracts/engine.abstract.ts b/cortex-js/src/domain/abstracts/engine.abstract.ts index 14f334140..6e7ba1591 100644 --- a/cortex-js/src/domain/abstracts/engine.abstract.ts +++ b/cortex-js/src/domain/abstracts/engine.abstract.ts @@ -6,9 +6,10 @@ import { Extension } from './extension.abstract'; export abstract class EngineExtension extends Extension { abstract provider: string; - abstract inference(dto: any, headers: Record): Promise; - - abstract inferenceStream(dto: any, headers: any): Promise; + abstract inference( + dto: any, + headers: Record, + ): Promise; async loadModel( model: Model, diff --git a/cortex-js/src/domain/abstracts/oai.abstract.ts b/cortex-js/src/domain/abstracts/oai.abstract.ts index d12360f67..5f145af64 100644 --- a/cortex-js/src/domain/abstracts/oai.abstract.ts +++ b/cortex-js/src/domain/abstracts/oai.abstract.ts @@ -9,37 +9,18 @@ export abstract class OAIEngineExtension extends EngineExtension { super(); } - override async inferenceStream( - createChatDto: any, - headers: Record, - ): Promise { - const response = await this.httpService - .post(this.apiUrl, createChatDto, { - headers: { - 'Content-Type': headers['content-type'] ?? 'application/json', - Authorization: headers['authorization'], - }, - responseType: 'stream', - }) - .toPromise(); - - if (!response) { - throw new Error('No response'); - } - - return response.data; - } - override async inference( createChatDto: any, headers: Record, - ): Promise { + ): Promise { + const { stream } = createChatDto; const response = await this.httpService .post(this.apiUrl, createChatDto, { headers: { 'Content-Type': headers['content-type'] ?? 'application/json', Authorization: headers['authorization'], }, + responseType: stream ? 'stream' : 'json', }) .toPromise(); if (!response) { diff --git a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts index ac92ff7b9..df763446c 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts @@ -60,8 +60,8 @@ export class ChatCliUsecases { }; const decoder = new TextDecoder('utf-8'); - this.chatUsecases.inferenceStream(chatDto, {}).then((response) => { - response.on('error', (error) => { + this.chatUsecases.inference(chatDto, {}).then((response) => { + response.on('error', (error: any) => { console.error(error); rl.prompt(); }); @@ -71,7 +71,7 @@ export class ChatCliUsecases { rl.prompt(); }); - response.on('data', (chunk) => { + response.on('data', (chunk: any) => { let content = ''; const text = decoder.decode(chunk); const lines = text.trim().split('\n'); diff --git a/cortex-js/src/infrastructure/controllers/chat.controller.ts b/cortex-js/src/infrastructure/controllers/chat.controller.ts index d9c664915..ab0594531 100644 --- a/cortex-js/src/infrastructure/controllers/chat.controller.ts +++ b/cortex-js/src/infrastructure/controllers/chat.controller.ts @@ -26,7 +26,7 @@ export class ChatController { if (stream) { this.chatService - .inferenceStream(createChatDto, headers) + .inference(createChatDto, headers) .then((stream) => stream.pipe(res)); } else { res.json(await this.chatService.inference(createChatDto, headers)); diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts index 72c44a1ed..61b0c0296 100644 --- a/cortex-js/src/usecases/chat/chat.usecases.ts +++ b/cortex-js/src/usecases/chat/chat.usecases.ts @@ -4,7 +4,6 @@ import { ExtensionRepository } from '@/domain/repositories/extension.interface'; import { Repository } from 'typeorm'; import { ModelEntity } from '@/infrastructure/entities/model.entity'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; -import stream from 'stream'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; @Injectable() @@ -37,28 +36,4 @@ export class ChatUsecases { } return engine.inference(createChatDto, headers); } - - async inferenceStream( - createChatDto: CreateChatCompletionDto, - headers: Record, - ): Promise { - const { model: modelId } = createChatDto; - const extensions = (await this.extensionRepository.findAll()) ?? []; - const model = await this.modelRepository.findOne({ - where: { id: modelId }, - }); - - if (!model) { - throw new ModelNotFoundException(modelId); - } - - const engine = extensions.find((e: any) => e.provider === model.engine) as - | EngineExtension - | undefined; - if (engine == null) { - throw new Error(`No engine found with name: ${model.engine}`); - } - - return engine?.inferenceStream(createChatDto, headers); - } }