From f19a5f65b2861fb79e4f23443ed3ce5297573db7 Mon Sep 17 00:00:00 2001
From: Louis Le <louis@jan.ai>
Date: Mon, 17 Jun 2024 23:54:17 +0700
Subject: [PATCH 1/3] feat: ship ONNX runtime on Windows

---
 .../commanders/usecases/init.cli.usecases.ts  | 119 +++++++++++++++-
 .../usecases/models.cli.usecases.ts           | 128 +++++++++++++++---
 .../src/infrastructure/constants/cortex.ts    |   3 +
 .../infrastructure/constants/huggingface.ts   |   2 +-
 .../providers/cortex/cortex.provider.ts       |   2 +-
 .../extensions/extension.repository.ts        |   5 +-
 cortex-js/src/usecases/chat/chat.usecases.ts  |   7 +-
 .../src/usecases/models/models.usecases.ts    |  21 +--
 cortex-js/src/utils/huggingface.ts            |  11 +-
 9 files changed, 254 insertions(+), 44 deletions(-)
diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
index 9229db882..cc053df6e 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
@@ -1,4 +1,11 @@
-import { createWriteStream, existsSync, rmSync } from 'fs';
+import {
+  cpSync,
+  createWriteStream,
+  existsSync,
+  readdir,
+  readdirSync,
+  rmSync,
+} from 'fs';
 import { delimiter, join } from 'path';
 import { HttpService } from '@nestjs/axios';
 import { Presets, SingleBar } from 'cli-progress';
@@ -12,6 +19,7 @@ import { rm } from 'fs/promises';
 import { exec } from 'child_process';
 import { appPath } from '@/utils/app-path';
 import {
+  CORTEX_ONNX_ENGINE_RELEASES_URL,
   CORTEX_RELEASES_URL,
   CUDA_DOWNLOAD_URL,
 } from '@/infrastructure/constants/cortex';
@@ -59,7 +67,7 @@ export class InitCliUsecases {
       exit(1);
     }
 
-    console.log(`Downloading engine file ${engineFileName}`);
+    console.log(`Downloading Llama.cpp engine file ${engineFileName}`);
     const dataFolderPath = await this.fileManagerService.getDataFolderPath();
     const engineDir = join(dataFolderPath, 'cortex-cpp');
     if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });
@@ -109,6 +117,9 @@ export class InitCliUsecases {
       exit(1);
     }
     await rm(destination, { force: true });
+
+    // Ship ONNX Runtime on Windows by default
+    if (process.platform === 'win32') await this.installONNXEngine();
   };
 
   parseEngineFileName = (options?: InitOptions) => {
@@ -187,6 +198,7 @@ export class InitCliUsecases {
     ).replace('<platform>', platform);
     const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz');
 
+    console.log('Downloading CUDA Toolkit dependency...');
     const download = await firstValueFrom(
       this.httpService.get(url, {
         responseType: 'stream',
@@ -283,6 +295,109 @@ export class InitCliUsecases {
     });
   };
 
+  /**
+   * Download and install ONNX engine
+   * @param version
+   * @param engineFileName 
+   */
+  async installONNXEngine(
+    version: string = 'v0.1.1',
+    engineFileName: string = 'windows-amd64',
+  ) {
+    const res = await firstValueFrom(
+      this.httpService.get(
+        CORTEX_ONNX_ENGINE_RELEASES_URL +
+          `${version === 'latest' ? '/latest' : ''}`,
+        {
+          headers: {
+            'X-GitHub-Api-Version': '2022-11-28',
+            Accept: 'application/vnd.github+json',
+          },
+        },
+      ),
+    );
+
+    if (!res?.data) {
+      console.log('Failed to fetch releases');
+      exit(1);
+    }
+
+    let release = res?.data;
+    if (Array.isArray(res?.data)) {
+      release = Array(res?.data)[0].find(
+        (e) => e.name === version.replace('v', ''),
+      );
+    }
+    const toDownloadAsset = release.assets.find((s: any) =>
+      s.name.includes(engineFileName),
+    );
+
+    if (!toDownloadAsset) {
+      console.log(`Could not find engine file ${engineFileName}`);
+      exit(1);
+    }
+
+    console.log(`Downloading ONNX engine file ${engineFileName}`);
+    const dataFolderPath = await this.fileManagerService.getDataFolderPath();
+    const engineDir = join(dataFolderPath, 'cortex-cpp');
+
+    const download = await firstValueFrom(
+      this.httpService.get(toDownloadAsset.browser_download_url, {
+        responseType: 'stream',
+      }),
+    );
+    if (!download) {
+      console.log('Failed to download model');
+      process.exit(1);
+    }
+
+    const destination = join(dataFolderPath, toDownloadAsset.name);
+
+    await new Promise((resolve, reject) => {
+      const writer = createWriteStream(destination);
+      let receivedBytes = 0;
+      const totalBytes = download.headers['content-length'];
+
+      writer.on('finish', () => {
+        bar.stop();
+        resolve(true);
+      });
+
+      writer.on('error', (error) => {
+        bar.stop();
+        reject(error);
+      });
+
+      const bar = new SingleBar({}, Presets.shades_classic);
+      bar.start(100, 0);
+
+      download.data.on('data', (chunk: any) => {
+        receivedBytes += chunk.length;
+        bar.update(Math.floor((receivedBytes / totalBytes) * 100));
+      });
+
+      download.data.pipe(writer);
+    });
+
+    try {
+      await decompress(destination, join(engineDir, 'engines'));
+    } catch (e) {
+      console.error('Error decompressing file', e);
+      exit(1);
+    }
+    await rm(destination, { force: true });
+
+    // Copy the additional files to the cortex-cpp directory
+    for (const file of readdirSync(join(engineDir, 'engines', 'cortex.onnx'))) {
+      if (file !== 'engine.dll') {
+        await cpSync(
+          join(engineDir, 'engines', 'cortex.onnx', file),
+          join(engineDir, file),
+        );
+      }
+    }
+  }
+
   private checkFileExistenceInPaths = (
     file: string,
     paths: string[],
diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
index b3ddb5470..55ce533bd 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
@@ -17,6 +17,8 @@ import { load } from 'js-yaml';
 import { existsSync, readdirSync, readFileSync } from 'fs';
 import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id';
 import { getHFModelMetadata } from '@/utils/huggingface';
+import { createWriteStream, mkdirSync, promises } from 'node:fs';
+import { firstValueFrom } from 'rxjs';
 
 @Injectable()
 export class ModelsCliUsecases {
@@ -118,40 +120,116 @@ export class ModelsCliUsecases {
       process.exit(1);
     }
 
-    await this.pullHuggingFaceModel(modelId);
-    const bar = new SingleBar({}, Presets.shades_classic);
-    bar.start(100, 0);
-    const callback = (progress: number) => {
-      bar.update(progress);
-    };
+    if (modelId.includes('onnx')) {
+      await this.pullOnnxModel(modelId);
+    } else {
+      await this.pullGGUFModel(modelId);
+      const bar = new SingleBar({}, Presets.shades_classic);
+      bar.start(100, 0);
+      const callback = (progress: number) => {
+        bar.update(progress);
+      };
+
+      try {
+        await this.modelsUsecases.downloadModel(modelId, callback);
+
+        const model = await this.modelsUsecases.findOne(modelId);
+        const fileUrl = join(
+          await this.fileService.getModelsPath(),
+          normalizeModelId(modelId),
+          basename((model?.files as string[])[0]),
+        );
+        await this.modelsUsecases.update(modelId, {
+          files: [fileUrl],
+          name: modelId.replace(':default', ''),
+        });
+      } catch (err) {
+        bar.stop();
+        throw err;
+      }
+    }
+  }
+
+  /**
+   * It's to pull ONNX model from HuggingFace repository
+   * @param modelId 
+   */
+  private async pullOnnxModel(modelId: string) {
+    const modelsContainerDir = await this.fileService.getModelsPath();
+
+    if (!existsSync(modelsContainerDir)) {
+      mkdirSync(modelsContainerDir, { recursive: true });
+    }
+
+    const modelFolder = join(modelsContainerDir, normalizeModelId(modelId));
+    await promises.mkdir(modelFolder, { recursive: true }).catch(() => {});
 
-    try {
-      await this.modelsUsecases.downloadModel(modelId, callback);
+    const files = [
+      'genai_config.json',
+      'model.onnx',
+      'model.onnx.data',
+      'model.yml',
+      'special_tokens_map.json',
+      'tokenizer.json',
+      'tokenizer_config.json',
+    ];
+    const repo = modelId.split(':')[0];
+    const branch = modelId.split(':')[1] || 'default';
+    for (const file of files) {
+      console.log(`Downloading ${file}`);
+      const bar = new SingleBar({}, Presets.shades_classic);
+      bar.start(100, 0);
 
-      const model = await this.modelsUsecases.findOne(modelId);
-      const fileUrl = join(
-        await this.fileService.getModelsPath(),
-        normalizeModelId(modelId),
-        basename((model?.files as string[])[0]),
+      const response = await firstValueFrom(
+        this.httpService.get(
+          `https://huggingface.co/cortexhub/${repo}/resolve/${branch}/${file}?download=true`,
+          {
+            responseType: 'stream',
+          },
+        ),
       );
-      await this.modelsUsecases.update(modelId, {
-        files: [fileUrl],
-        name: modelId.replace(':default', ''),
+      if (!response) {
+        throw new Error('Failed to download model');
+      }
+
+      await new Promise((resolve, reject) => {
+        const writer = createWriteStream(join(modelFolder, file));
+        let receivedBytes = 0;
+        const totalBytes = response.headers['content-length'];
+
+        writer.on('finish', () => {
+          resolve(true);
+        });
+
+        writer.on('error', (error) => {
+          reject(error);
+        });
+
+        response.data.on('data', (chunk: any) => {
+          receivedBytes += chunk.length;
+          bar.update(Math.floor((receivedBytes / totalBytes) * 100));
+        });
+
+        response.data.pipe(writer);
       });
-    } catch (err) {
       bar.stop();
-      throw err;
     }
-  }
 
-  //// PRIVATE METHODS ////
+    const model: CreateModelDto = load(
+      readFileSync(join(modelFolder, 'model.yml'), 'utf-8'),
+    ) as CreateModelDto;
+    model.files = [join(modelFolder)];
+    model.model = modelId
 
+    if (!(await this.modelsUsecases.findOne(modelId)))
+      await this.modelsUsecases.create(model);
+  }
   /**
    * It's to pull model from HuggingFace repository
    * It could be a model from Jan's repo or other authors
    * @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b"
    */
-  private async pullHuggingFaceModel(modelId: string) {
+  private async pullGGUFModel(modelId: string) {
     const data: HuggingFaceRepoData =
       await this.modelsUsecases.fetchModelMetadata(modelId);
 
@@ -179,6 +257,7 @@ export class ModelsCliUsecases {
     } else {
       modelVersion = data.siblings.find((e) => e.rfilename.includes('.gguf'));
     }
+
     if (!modelVersion) throw 'No expected quantization found';
     const metadata = await getHFModelMetadata(modelVersion.downloadUrl!);
 
@@ -203,12 +282,17 @@ export class ModelsCliUsecases {
       // Default Model Settings
       ctx_len: 4096,
       ngl: 100,
-      engine: 'cortex.llamacpp',
+      engine: modelId.includes('onnx') ? 'cortex.onnx' : 'cortex.llamacpp',
     };
     if (!(await this.modelsUsecases.findOne(modelId)))
       await this.modelsUsecases.create(model);
   }
 
+  /**
+   * Parse preset file
+   * @param preset 
+   * @returns 
+   */
   private async parsePreset(preset?: string): Promise<object> {
     const presetsFolder = await this.fileService.getPresetsPath();
 
diff --git a/cortex-js/src/infrastructure/constants/cortex.ts b/cortex-js/src/infrastructure/constants/cortex.ts
index dc21b027e..7d458da59 100644
--- a/cortex-js/src/infrastructure/constants/cortex.ts
+++ b/cortex-js/src/infrastructure/constants/cortex.ts
@@ -42,6 +42,9 @@ export const CORTEX_JS_STOP_API_SERVER_URL = (
 export const CORTEX_RELEASES_URL =
   'https://api.github.com/repos/janhq/cortex/releases';
 
+export const CORTEX_ONNX_ENGINE_RELEASES_URL =
+  'https://api.github.com/repos/janhq/cortex.onnx/releases';
+
 export const CUDA_DOWNLOAD_URL =
   'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz';
 
diff --git a/cortex-js/src/infrastructure/constants/huggingface.ts b/cortex-js/src/infrastructure/constants/huggingface.ts
index b2282e1f1..1e1c89a78 100644
--- a/cortex-js/src/infrastructure/constants/huggingface.ts
+++ b/cortex-js/src/infrastructure/constants/huggingface.ts
@@ -2,7 +2,7 @@ export const HUGGING_FACE_TREE_REF_URL = (
   repo: string,
   tree: string,
   path: string,
-) => `https://huggingface.co/janhq/${repo}/resolve/${tree}/${path}`;
+) => `https://huggingface.co/cortexhub/${repo}/resolve/${tree}/${path}`;
 
 export const HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL = (
   author: string,
diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
index d186a6278..7e2dc8618 100644
--- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
+++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
@@ -15,7 +15,7 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file-
 
 @Injectable()
 export default class CortexProvider extends OAIEngineExtension {
-  provider: string = 'cortex.llamacpp';
+  provider: string = 'cortex';
   apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`;
 
   private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`;
diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
index 484cea812..389e5f371 100644
--- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
+++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
@@ -11,7 +11,10 @@ import { existsSync } from 'fs';
 @Injectable()
 export class ExtensionRepositoryImpl implements ExtensionRepository {
   // Initialize the Extensions Map with the key-value pairs of the core providers.
-  extensions = new Map<string, Extension>([['cortex', this.cortexProvider]]);
+  extensions = new Map<string, Extension>([
+    ['cortex.llamacpp', this.cortexProvider],
+    ['cortex.onnx', this.cortexProvider],
+  ]);
 
   constructor(
     @Inject('CORTEX_PROVIDER')
diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts
index dd3d41a0e..806ebb06a 100644
--- a/cortex-js/src/usecases/chat/chat.usecases.ts
+++ b/cortex-js/src/usecases/chat/chat.usecases.ts
@@ -25,15 +25,14 @@ export class ChatUsecases {
     headers: Record<string, string>,
   ): Promise<any> {
     const { model: modelId } = createChatDto;
-    const extensions = (await this.extensionRepository.findAll()) ?? [];
     const model = await this.modelRepository.findOne(modelId);
 
     if (!model) {
       throw new ModelNotFoundException(modelId);
     }
-    const engine = extensions.find((e: any) => e.provider === model?.engine) as
-      | EngineExtension
-      | undefined;
+    const engine = (await this.extensionRepository.findOne(
+      model!.engine ?? 'cortex.llamacpp',
+    )) as EngineExtension | undefined;
 
     if (engine == null) {
       throw new Error(`No engine found with name: ${model.engine}`);
diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts
index a14c3230a..c759cea6a 100644
--- a/cortex-js/src/usecases/models/models.usecases.ts
+++ b/cortex-js/src/usecases/models/models.usecases.ts
@@ -28,6 +28,7 @@ import {
   HuggingFaceModelVersion,
   HuggingFaceRepoData,
 } from '@/domain/models/huggingface.interface';
+
 import { LLAMA_2 } from '@/infrastructure/constants/prompt-constants';
 import { isValidUrl } from '@/utils/urls';
 import {
@@ -147,10 +148,9 @@ export class ModelsUsecases {
     settings?: ModelSettingParams,
   ): Promise<StartModelSuccessDto> {
     const model = await this.getModelOrThrow(modelId);
-    const extensions = (await this.extensionRepository.findAll()) ?? [];
-    const engine = extensions.find((e: any) => e.provider === model?.engine) as
-      | EngineExtension
-      | undefined;
+    const engine = (await this.extensionRepository.findOne(
+      model!.engine ?? 'cortex.llamacpp',
+    )) as EngineExtension | undefined;
 
     if (!engine) {
       return {
@@ -180,8 +180,9 @@ export class ModelsUsecases {
         Array.isArray(model.files) &&
         !('llama_model_path' in model) && {
           llama_model_path: (model.files as string[])[0],
+          model_path: (model.files as string[])[0],
         }),
-      engine: 'cortex.llamacpp',
+      engine: model.engine ?? 'cortex.llamacpp',
       // User / Model settings
       ...parser.parseModelEngineSettings(model),
       ...parser.parseModelEngineSettings(settings ?? {}),
@@ -233,10 +234,9 @@ export class ModelsUsecases {
 
   async stopModel(modelId: string): Promise<StartModelSuccessDto> {
     const model = await this.getModelOrThrow(modelId);
-    const extensions = (await this.extensionRepository.findAll()) ?? [];
-    const engine = extensions.find((e: any) => e.provider === model?.engine) as
-      | EngineExtension
-      | undefined;
+    const engine = (await this.extensionRepository.findOne(
+      model!.engine ?? 'cortex.llamacpp',
+    )) as EngineExtension | undefined;
 
     if (!engine) {
       return {
@@ -419,6 +419,7 @@ export class ModelsUsecases {
     modelVersion: HuggingFaceModelVersion,
   ) {
     if (!modelVersion) throw 'No expected quantization found';
+
     const tokenizer = await getHFModelMetadata(modelVersion.downloadUrl!);
 
     const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2;
@@ -442,7 +443,7 @@ export class ModelsUsecases {
       // Default Model Settings
       ctx_len: 4096,
       ngl: 100,
-      engine: 'cortex.llamacpp',
+      engine: modelId.includes('onnx') ? 'cortex.onnx' : 'cortex.llamacpp',
     };
     if (!(await this.findOne(modelId))) await this.create(model);
   }
diff --git a/cortex-js/src/utils/huggingface.ts b/cortex-js/src/utils/huggingface.ts
index 68a6c38f9..975b8fe89 100644
--- a/cortex-js/src/utils/huggingface.ts
+++ b/cortex-js/src/utils/huggingface.ts
@@ -64,6 +64,7 @@ export function guessPromptTemplateFromHuggingFace(jinjaCode?: string): string {
 export async function fetchHuggingFaceRepoData(
   repoId: string,
 ): Promise<HuggingFaceRepoData> {
+
   const sanitizedUrl = getRepoModelsUrl(repoId);
 
   const { data: response } = await axios.get(sanitizedUrl);
@@ -113,7 +114,8 @@ export async function fetchJanRepoData(
 ): Promise<HuggingFaceRepoData> {
   const repo = modelId.split(':')[0];
   const tree = modelId.split(':')[1] ?? 'default';
-  const url = getRepoModelsUrl(`janhq/${repo}`, tree);
+  const url = getRepoModelsUrl(`cortexhub/${repo}`, tree);
+
   const res = await fetch(url);
   const response:
     | {
@@ -140,7 +142,7 @@ export async function fetchJanRepoData(
     tags: ['gguf'],
     id: modelId,
     modelId: modelId,
-    author: 'janhq',
+    author: 'cortexhub',
     sha: '',
     downloads: 0,
     lastModified: '',
@@ -161,6 +163,9 @@ export async function fetchJanRepoData(
   });
 
   data.modelUrl = url;
+
+  
+  
   return data;
 }
 
@@ -199,7 +204,7 @@ export async function getHFModelMetadata(
       version,
     };
   } catch (err) {
-    console.log('Failed to get model metadata:', err);
+    console.log('Failed to get model metadata:', err.message);
     return undefined;
   }
 }

From 13be3b12ee2675b49c5bbb36e2edf0beaaa39e52 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 18 Jun 2024 00:15:15 +0700
Subject: [PATCH 2/3] chore: update `models pull` default registry url so user
 can access and find available models

---
 .../src/infrastructure/commanders/models/model-pull.command.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
index 287fc2258..c6b81d9cf 100644
--- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
+++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
@@ -10,7 +10,8 @@ import { ModelNotFoundException } from '@/infrastructure/exception/model-not-fou
   aliases: ['download'],
   arguments: '<model_id>',
   argsDescription: { model_id: 'Model repo to pull' },
-  description: 'Download a model. Working with HuggingFace model id.',
+  description:
+    'Download a model from a registry. Working with HuggingFace repositories. For available models, please visit https://huggingface.co/cortexhub',
 })
 @SetCommandContext()
 export class ModelPullCommand extends CommandRunner {

From 31d9d7184f1db4a1b6a85d2e817a15996bee0b35 Mon Sep 17 00:00:00 2001
From: Louis Le <louis@jan.ai>
Date: Tue, 18 Jun 2024 14:43:27 +0700
Subject: [PATCH 3/3] chore: get latest onnx

---
 .../src/infrastructure/commanders/usecases/init.cli.usecases.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
index cc053df6e..5c8fda51c 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
@@ -301,7 +301,7 @@ export class InitCliUsecases {
    * @param engineFileName 
    */
   async installONNXEngine(
-    version: string = 'v0.1.1',
+    version: string = 'latest',
     engineFileName: string = 'windows-amd64',
   ) {
     const res = await firstValueFrom(