diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts
index b27888ea2..c4e1d2eda 100644
--- a/cortex-js/src/app.module.ts
+++ b/cortex-js/src/app.module.ts
@@ -7,6 +7,7 @@ import { DatabaseModule } from './infrastructure/database/database.module';
 import { ChatModule } from './usecases/chat/chat.module';
 import { AssistantsModule } from './usecases/assistants/assistants.module';
 import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
+import { ModelRepositoryModule } from './infrastructure/repositories/model/model.module';
 import { CortexModule } from './usecases/cortex/cortex.module';
 import { ConfigModule } from '@nestjs/config';
 import { env } from 'node:process';
@@ -31,6 +32,7 @@ import { FileManagerModule } from './file-manager/file-manager.module';
     CortexModule,
     ExtensionModule,
     FileManagerModule,
+    ModelRepositoryModule,
   ],
   providers: [SeedService],
 })
diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts
index 6f8a834c9..3a3905935 100644
--- a/cortex-js/src/domain/models/model.interface.ts
+++ b/cortex-js/src/domain/models/model.interface.ts
@@ -1,21 +1,6 @@
-/**
- * Represents the information about a model.
- * @stored
- */
-export interface ModelInfo {
-  id: string;
-  settings: ModelSettingParams;
-  parameters: ModelRuntimeParams;
-  engine?: string;
-}
-
 export interface ModelArtifact {
-  url: string;
-}
-
-export enum ModelFormat {
-  GGUF = 'gguf',
-  API = 'api',
+  mmproj?: string;
+  llama_model_path?: string;
 }
 
 /**
@@ -24,64 +9,91 @@ export enum ModelFormat {
  */
 export interface Model {
   /**
-   * The type of the object.
-   * Default: "model"
+   * Model identifier.
    */
-  object: string;
+  model: string;
 
   /**
-   * The version of the model.
+   * GGUF metadata: general.name
    */
-  version: string;
+  name?: string;
 
   /**
-   * The format of the model.
+   * GGUF metadata: version
    */
-  format: ModelFormat;
+  version?: string;
 
   /**
    * The model download source. It can be an external url or a local filepath.
    */
-  sources: ModelArtifact[];
+  files: string[] | ModelArtifact;
+
+  /**
+   * GGUF metadata: tokenizer.chat_template
+   */
+  prompt_template?: string;
+
+  /**
+   * Defines specific tokens or phrases at which the model will stop generating further output.
+   */
+  stop?: string[];
+
+  /// Inferencing
+  /**
+   * Set probability threshold for more relevant outputs.
+   */
+  top_p?: number;
 
   /**
-   * The model identifier, which can be referenced in the API endpoints.
+   * Controls the randomness of the model’s output.
    */
-  id: string;
+  temperature?: number;
 
   /**
-   * Human-readable name that is used for UI.
+   * Adjusts the likelihood of the model repeating words or phrases in its output.
    */
-  name: string;
+  frequency_penalty?: number;
 
   /**
-   * The Unix timestamp (in seconds) for when the model was created
+   * Influences the generation of new and varied concepts in the model’s output.
    */
-  created: number;
+  presence_penalty?: number;
 
+  /// Engines
   /**
-   * Default: "A cool model from Huggingface"
+   * The context length for model operations varies; the maximum depends on the specific model used.
    */
-  description: string;
+  ctx_len?: number;
 
   /**
-   * The model settings.
+   * Enable real-time data processing for faster predictions.
    */
-  settings: ModelSettingParams;
+  stream?: boolean;
+
+  /*
+   * The maximum number of tokens the model will generate in a single response.
+   */
+  max_tokens?: number;
 
   /**
-   * The model runtime parameters.
+   * The number of layers to load onto the GPU for acceleration.
    */
-  parameters: ModelRuntimeParams;
+  ngl?: number;
 
   /**
-   * Metadata of the model.
+   * The number of parallel operations. Only set when enable continuous batching.
    */
-  metadata: ModelMetadata;
+  n_parallel?: number;
+
+  /**
+   * Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)
+   */
+  cpu_threads?: number;
+
   /**
    * The model engine.
    */
-  engine: string;
+  engine?: string;
 }
 
 export interface ModelMetadata {
@@ -109,6 +121,8 @@ export interface ModelSettingParams {
   cont_batching?: boolean;
   vision_model?: boolean;
   text_model?: boolean;
+  engine?: string;
+  stop?: string[];
 }
 
 /**
@@ -133,8 +147,3 @@ export interface ModelRuntimeParams {
 export type ModelInitFailed = Model & {
   error: Error;
 };
-
-export interface NitroModelSettings extends ModelSettingParams {
-  llama_model_path: string;
-  cpu_threads: number;
-}
diff --git a/cortex-js/src/domain/models/thread.interface.ts b/cortex-js/src/domain/models/thread.interface.ts
index 21a481aea..bdfb67b32 100644
--- a/cortex-js/src/domain/models/thread.interface.ts
+++ b/cortex-js/src/domain/models/thread.interface.ts
@@ -1,5 +1,5 @@
 import { AssistantTool } from './assistant.interface';
-import { ModelInfo } from './model.interface';
+import { Model } from './model.interface';
 
 export interface Thread {
   /** Unique identifier for the thread, generated by default using the ULID method. **/
@@ -40,7 +40,7 @@ export interface ThreadMetadata {
 export interface ThreadAssistantInfo {
   assistant_id: string;
   assistant_name: string;
-  model: ModelInfo;
+  model: Partial<Model>;
   instructions?: string;
   tools?: AssistantTool[];
 }
diff --git a/cortex-js/src/domain/repositories/model.interface.ts b/cortex-js/src/domain/repositories/model.interface.ts
new file mode 100644
index 000000000..1d8f3ddac
--- /dev/null
+++ b/cortex-js/src/domain/repositories/model.interface.ts
@@ -0,0 +1,4 @@
+import { Model } from '../models/model.interface';
+import { Repository } from './repository.interface';
+
+export abstract class ModelRepository extends Repository<Model> {}
diff --git a/cortex-js/src/file-manager/file-manager.service.ts b/cortex-js/src/file-manager/file-manager.service.ts
index c47b876d0..62431b98b 100644
--- a/cortex-js/src/file-manager/file-manager.service.ts
+++ b/cortex-js/src/file-manager/file-manager.service.ts
@@ -10,8 +10,13 @@ export class FileManagerService {
   private configFile = '.cortexrc';
   private cortexDirectoryName = 'cortex';
   private modelFolderName = 'models';
+  private extensionFoldername = 'extensions';
   private cortexCppFolderName = 'cortex-cpp';
 
+  /**
+   * Get cortex configs
+   * @returns the config object
+   */
   async getConfig(): Promise<Config> {
     const homeDir = os.homedir();
     const configPath = join(homeDir, this.configFile);
@@ -71,8 +76,33 @@ export class FileManagerService {
     };
   }
 
+  /**
+   * Get the app data folder path
+   * Usually it is located at the home directory > cortex
+   * @returns the path to the data folder
+   */
   async getDataFolderPath(): Promise<string> {
     const config = await this.getConfig();
     return config.dataFolderPath;
   }
+
+  /**
+   * Get the models data folder path
+   * Usually it is located at the home directory > cortex > models
+   * @returns the path to the models folder
+   */
+  async getModelsPath(): Promise<string> {
+    const dataFolderPath = await this.getDataFolderPath();
+    return join(dataFolderPath, this.modelFolderName);
+  }
+
+  /**
+   * Get the extensions data folder path
+   * Usually it is located at the home directory > cortex > extensions
+   * @returns the path to the extensions folder
+   */
+  async getExtensionsPath(): Promise<string> {
+    const dataFolderPath = await this.getDataFolderPath();
+    return join(dataFolderPath, this.extensionFoldername);
+  }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts
index 15136adc6..0ca0f7142 100644
--- a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts
+++ b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts
@@ -14,7 +14,8 @@ export class ModelGetCommand extends CommandRunner {
       exit(1);
     }
 
-    const models = await this.modelsCliUsecases.getModel(input[0]);
-    console.log(models);
+    const model = await this.modelsCliUsecases.getModel(input[0]);
+    if (!model) console.error('Model not found');
+    else console.log(model);
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts
index a32c609cd..90a914963 100644
--- a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts
+++ b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts
@@ -15,10 +15,9 @@ export class ModelListCommand extends CommandRunner {
     option.format === 'table'
       ? console.table(
           models.map((e) => ({
-            id: e.id,
+            id: e.model,
             engine: e.engine,
-            format: e.format,
-            created: e.created,
+            version: e.version,
           })),
         )
       : console.log(models);
diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
index f8b6891b9..7793cf13c 100644
--- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
+++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
@@ -2,7 +2,7 @@ import { CommandRunner, InquirerService, SubCommand } from 'nest-commander';
 import { exit } from 'node:process';
 import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
 import { RepoDesignation, listFiles } from '@huggingface/hub';
-import { basename } from 'node:path';
+import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
 
 @SubCommand({
   name: 'pull',
@@ -29,12 +29,16 @@ export class ModelPullCommand extends CommandRunner {
       ? undefined
       : await this.tryToGetBranches(input[0]);
 
-    if (!branches) {
-      await this.modelsCliUsecases.pullModel(input[0]);
-    } else {
-      // if there's metadata.yaml file, we assumed it's a JanHQ model
-      await this.handleJanHqModel(input[0], branches);
-    }
+    await this.modelsCliUsecases
+      .pullModel(
+        !branches ? input[0] : await this.handleJanHqModel(input[0], branches),
+      )
+      .catch((e: Error) => {
+        if (e instanceof ModelNotFoundException)
+          console.error('Model does not exist.');
+        else console.error(e);
+        exit(1);
+      });
 
     console.log('\nDownload complete!');
     exit(0);
@@ -83,10 +87,6 @@ export class ModelPullCommand extends CommandRunner {
   }
 
   private async handleJanHqModel(repoName: string, branches: string[]) {
-    const sanitizedRepoName = repoName.trim().startsWith(this.janHqModelPrefix)
-      ? repoName
-      : `${this.janHqModelPrefix}/${repoName}`;
-
     let selectedTag = branches[0];
 
     if (branches.length > 1) {
@@ -98,30 +98,7 @@ export class ModelPullCommand extends CommandRunner {
       console.error("Can't find model revision.");
       exit(1);
     }
-
-    const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
-    let ggufUrl: string | undefined = undefined;
-    let fileSize = 0;
-    for await (const fileInfo of listFiles({
-      repo: repo,
-      revision: revision,
-    })) {
-      if (fileInfo.path.endsWith('.gguf')) {
-        ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
-        fileSize = fileInfo.size;
-        break;
-      }
-    }
-
-    if (!ggufUrl) {
-      console.error("Can't find model file.");
-      exit(1);
-    }
-    console.log('Downloading', basename(ggufUrl));
-    await this.modelsCliUsecases.pullModelWithExactUrl(
-      `${sanitizedRepoName}/${revision}`,
-      ggufUrl,
-      fileSize,
-    );
+    // Return parsed model Id
+    return `${repoName}:${revision}`;
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts
index 3fcbd82e8..be91c3fe0 100644
--- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts
+++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts
@@ -33,6 +33,16 @@ export class ModelStartCommand extends CommandRunner {
       }
     }
 
+    const existingModel = await this.modelsCliUsecases.getModel(modelId);
+    if (
+      !existingModel ||
+      !Array.isArray(existingModel.files) ||
+      /^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0])
+    ) {
+      console.error('Model is not available. Please pull the model first.');
+      process.exit(1);
+    }
+
     await this.cortexUsecases
       .startCortex(options.attach)
       .then(() => this.modelsCliUsecases.startModel(modelId, options.preset))
@@ -41,7 +51,11 @@ export class ModelStartCommand extends CommandRunner {
   }
 
   modelInquiry = async () => {
-    const models = await this.modelsCliUsecases.listAllModels();
+    const models = (await this.modelsCliUsecases.listAllModels()).filter(
+      (model) =>
+        Array.isArray(model.files) &&
+        !/^(http|https):\/\/[^/]+\/.*/.test(model.files[0]),
+    );
     if (!models.length) throw 'No models found';
     const { model } = await this.inquirerService.inquirer.prompt({
       type: 'list',
@@ -49,7 +63,7 @@ export class ModelStartCommand extends CommandRunner {
       message: 'Select a model to start:',
       choices: models.map((e) => ({
         name: e.name,
-        value: e.id,
+        value: e.model,
       })),
     });
     return model;
diff --git a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts
index 6f583e64c..7f9c6b0cd 100644
--- a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts
+++ b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts
@@ -1,11 +1,7 @@
 import { CommandRunner, SubCommand, Option } from 'nest-commander';
 import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
 import { exit } from 'node:process';
-import { ModelParameterParser } from '../utils/model-parameter.parser';
-import {
-  ModelRuntimeParams,
-  ModelSettingParams,
-} from '@/domain/models/model.interface';
+import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
 
 type UpdateOptions = {
   model?: string;
@@ -31,42 +27,13 @@ export class ModelUpdateCommand extends CommandRunner {
       exit(0);
     }
 
-    const parser = new ModelParameterParser();
-    const settingParams: ModelSettingParams = {};
-    const runtimeParams: ModelRuntimeParams = {};
+    const toUpdate: UpdateModelDto = {};
 
     options.forEach((option) => {
       const [key, stringValue] = option.split('=');
-      if (parser.isModelSettingParam(key)) {
-        const value = parser.parse(key, stringValue);
-        // @ts-expect-error did the check so it's safe
-        settingParams[key] = value;
-      } else if (parser.isModelRuntimeParam(key)) {
-        const value = parser.parse(key, stringValue);
-        // @ts-expect-error did the check so it's safe
-        runtimeParams[key] = value;
-      }
+      Object.assign(toUpdate, { key, stringValue });
     });
-
-    if (Object.keys(settingParams).length > 0) {
-      const updatedSettingParams =
-        await this.modelsCliUsecases.updateModelSettingParams(
-          modelId,
-          settingParams,
-        );
-      console.log(
-        'Updated setting params! New setting params:',
-        updatedSettingParams,
-      );
-    }
-
-    if (Object.keys(runtimeParams).length > 0) {
-      await this.modelsCliUsecases.updateModelRuntimeParams(
-        modelId,
-        runtimeParams,
-      );
-      console.log('Updated runtime params! New runtime params:', runtimeParams);
-    }
+    this.modelsCliUsecases.updateModel(modelId, toUpdate);
   }
 
   @Option({
diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
index 464818b66..8cbe6f983 100644
--- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
+++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
@@ -63,7 +63,11 @@ export class RunCommand extends CommandRunner {
   }
 
   modelInquiry = async () => {
-    const models = await this.modelsCliUsecases.listAllModels();
+    const models = (await this.modelsCliUsecases.listAllModels()).filter(
+      (model) =>
+        Array.isArray(model.files) &&
+        !/^(http|https):\/\/[^/]+\/.*/.test(model.files[0]),
+    );
     if (!models.length) throw 'No models found';
     const { model } = await this.inquirerService.inquirer.prompt({
       type: 'list',
@@ -71,7 +75,7 @@ export class RunCommand extends CommandRunner {
       message: 'Select a model to start:',
       choices: models.map((e) => ({
         name: e.name,
-        value: e.id,
+        value: e.model,
       })),
     });
     return model;
diff --git a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts
index 2ef890d49..9347137de 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts
@@ -15,11 +15,11 @@ import { Thread } from '@/domain/models/thread.interface';
 import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto';
 import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases';
 import { CreateThreadAssistantDto } from '@/infrastructure/dtos/threads/create-thread-assistant.dto';
-import { CreateThreadModelInfoDto } from '@/infrastructure/dtos/threads/create-thread-model-info.dto';
 import { ModelsUsecases } from '@/usecases/models/models.usecases';
 import stream from 'stream';
 import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message.dto';
 import { MessagesUsecases } from '@/usecases/messages/messages.usecases';
+import { ModelParameterParser } from '../utils/model-parameter.parser';
 
 @Injectable()
 export class ChatCliUsecases {
@@ -68,12 +68,14 @@ export class ChatCliUsecases {
 
     rl.on('line', sendCompletionMessage.bind(this));
 
-    function sendCompletionMessage(userInput: string) {
+    async function sendCompletionMessage(userInput: string) {
       if (userInput.trim() === this.exitClause) {
         rl.close();
         return;
       }
 
+      const model = await this.modelsUsecases.findOne(modelId);
+
       messages.push({
         content: userInput,
         role: ChatCompletionRole.User,
@@ -95,23 +97,64 @@ export class ChatCliUsecases {
       };
       this.messagesUsecases.create(createMessageDto);
 
+      const parser = new ModelParameterParser();
       const chatDto: CreateChatCompletionDto = {
+        // Default results params
         messages,
         model: modelId,
         stream: true,
-        max_tokens: 2048,
+        max_tokens: 4098,
         stop: [],
         frequency_penalty: 0.7,
         presence_penalty: 0.7,
         temperature: 0.7,
         top_p: 0.7,
+
+        // Override with model settings
+        ...parser.parseModelInferenceParams(model),
       };
 
       const decoder = new TextDecoder('utf-8');
 
       this.chatUsecases
         .inference(chatDto, {})
+
         .then((response: stream.Readable) => {
+          // None streaming - json object response
+          if (!chatDto.stream) {
+            const objectData = response as any;
+            const assistantResponse =
+              objectData.choices[0]?.message?.content ?? '';
+
+            stdout.write(assistantResponse);
+            messages.push({
+              content: assistantResponse,
+              role: ChatCompletionRole.Assistant,
+            });
+
+            const createMessageDto: CreateMessageDto = {
+              thread_id: thread.id,
+              role: ChatCompletionRole.Assistant,
+              content: [
+                {
+                  type: ContentType.Text,
+                  text: {
+                    value: assistantResponse,
+                    annotations: [],
+                  },
+                },
+              ],
+              status: MessageStatus.Ready,
+            };
+
+            this.messagesUsecases.create(createMessageDto).then(() => {
+              console.log('\n');
+              if (attach) rl.prompt();
+              else rl.close();
+            });
+            return;
+          }
+          // Streaming
           let assistantResponse: string = '';
 
           response.on('error', (error: any) => {
@@ -194,16 +237,10 @@ export class ChatCliUsecases {
     const assistant = await this.assistantUsecases.findOne('jan');
     if (!assistant) throw new Error('No assistant available');
 
-    const createThreadModel: CreateThreadModelInfoDto = {
-      id: modelId,
-      settings: model.settings,
-      parameters: model.parameters,
-    };
-
     const assistantDto: CreateThreadAssistantDto = {
       assistant_id: assistant.id,
       assistant_name: assistant.name,
-      model: createThreadModel,
+      model: model,
     };
 
     const createThreadDto: CreateThreadDto = {
diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
index abbf95c21..9e03b9723 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
@@ -1,11 +1,6 @@
 import { exit } from 'node:process';
 import { ModelsUsecases } from '@/usecases/models/models.usecases';
-import {
-  Model,
-  ModelFormat,
-  ModelRuntimeParams,
-  ModelSettingParams,
-} from '@/domain/models/model.interface';
+import { Model } from '@/domain/models/model.interface';
 import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto';
 import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface';
 import { gguf } from '@huggingface/gguf';
@@ -25,10 +20,12 @@ import { ModelTokenizer } from '../types/model-tokenizer.interface';
 import { HttpService } from '@nestjs/axios';
 import { firstValueFrom } from 'rxjs';
 import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
+import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
 import { FileManagerService } from '@/file-manager/file-manager.service';
-import { join } from 'path';
+import { join, basename } from 'path';
 import { load } from 'js-yaml';
-import { existsSync, readFileSync } from 'node:fs';
+import { existsSync, readFileSync } from 'fs';
+import { normalizeModelId } from '../utils/normalize-model-id';
 
 const AllQuantizations = [
   'Q3_K_S',
@@ -60,7 +57,7 @@ export class ModelsCliUsecases {
     @Inject(InquirerService)
     private readonly inquirerService: InquirerService,
     private readonly httpService: HttpService,
-    private readonly fileManagerService: FileManagerService,
+    private readonly fileService: FileManagerService,
   ) {}
 
   /**
@@ -74,7 +71,7 @@ export class ModelsCliUsecases {
     const parsedPreset = await this.parsePreset(preset);
     return this.getModelOrStop(modelId)
       .then((model) => ({
-        ...model.settings,
+        ...model,
         ...parsedPreset,
       }))
       .then((settings) => this.modelsUsecases.startModel(modelId, settings))
@@ -97,29 +94,10 @@ export class ModelsCliUsecases {
   }
 
   /**
-   * Update model's settings. E.g. ngl, prompt_template, etc.
-   * @param modelId
-   * @param settingParams
-   * @returns
+   * Update a model by ID with new data
    */
-  async updateModelSettingParams(
-    modelId: string,
-    settingParams: ModelSettingParams,
-  ): Promise<ModelSettingParams> {
-    return this.modelsUsecases.updateModelSettingParams(modelId, settingParams);
-  }
-
-  /**
-   * Update model's runtime parameters. E.g. max_tokens, temperature, etc.
-   * @param modelId
-   * @param runtimeParams
-   * @returns
-   */
-  async updateModelRuntimeParams(
-    modelId: string,
-    runtimeParams: ModelRuntimeParams,
-  ): Promise<ModelRuntimeParams> {
-    return this.modelsUsecases.updateModelRuntimeParams(modelId, runtimeParams);
+  async updateModel(modelId: string, toUpdate: UpdateModelDto) {
+    return this.modelsUsecases.update(modelId, toUpdate);
   }
 
   /**
@@ -127,7 +105,7 @@ export class ModelsCliUsecases {
    * @param modelId
    * @returns
    */
-  private async getModelOrStop(modelId: string): Promise<Model> {
+  async getModelOrStop(modelId: string): Promise<Model> {
     const model = await this.modelsUsecases.findOne(modelId);
     if (!model) {
       console.debug('Model not found');
@@ -149,9 +127,8 @@ export class ModelsCliUsecases {
    * @param modelId
    * @returns
    */
-  async getModel(modelId: string): Promise<Model> {
-    const model = await this.getModelOrStop(modelId);
-    return model;
+  async getModel(modelId: string): Promise<Model | null> {
+    return this.modelsUsecases.findOne(modelId);
   }
 
   /**
@@ -164,52 +141,21 @@ export class ModelsCliUsecases {
     return this.modelsUsecases.remove(modelId);
   }
 
-  async pullModelWithExactUrl(modelId: string, url: string, fileSize: number) {
-    const tokenizer = await this.getHFModelTokenizer(url);
-    const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2;
-    const stopWords: string[] = [tokenizer?.stopWord ?? ''];
-
-    const model: CreateModelDto = {
-      sources: [
-        {
-          url: url,
-        },
-      ],
-      id: modelId,
-      name: modelId,
-      version: '1.0.0',
-      format: ModelFormat.GGUF,
-      description: '',
-      settings: {
-        prompt_template: promptTemplate,
-      },
-      parameters: {
-        stop: stopWords,
-      },
-      metadata: {
-        author: 'janhq',
-        size: fileSize,
-        tags: [],
-      },
-      engine: 'cortex',
-    };
-    if (!(await this.modelsUsecases.findOne(modelId))) {
-      await this.modelsUsecases.create(model);
-    }
-
-    const bar = new SingleBar({}, Presets.shades_classic);
-    bar.start(100, 0);
-    const callback = (progress: number) => {
-      bar.update(progress);
-    };
-    await this.modelsUsecases.downloadModel(modelId, callback);
-  }
-
   /**
    * Pull model from Model repository (HF, Jan...)
    * @param modelId
    */
   async pullModel(modelId: string) {
+    const existingModel = await this.modelsUsecases.findOne(modelId);
+    if (
+      existingModel &&
+      Array.isArray(existingModel.files) &&
+      !/^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0])
+    ) {
+      console.error('Model already exists');
+      process.exit(1);
+    }
+
     if (modelId.includes('/') || modelId.includes(':')) {
       await this.pullHuggingFaceModel(modelId);
     }
@@ -218,7 +164,21 @@ export class ModelsCliUsecases {
     const callback = (progress: number) => {
       bar.update(progress);
     };
-    await this.modelsUsecases.downloadModel(modelId, callback);
+
+    try {
+      await this.modelsUsecases.downloadModel(modelId, callback);
+
+      const model = await this.modelsUsecases.findOne(modelId);
+      const fileUrl = join(
+        await this.fileService.getModelsPath(),
+        normalizeModelId(modelId),
+        basename((model?.files as string[])[0]),
+      );
+      await this.modelsUsecases.update(modelId, { files: [fileUrl] });
+    } catch (err) {
+      bar.stop();
+      throw err;
+    }
   }
 
   private async getHFModelTokenizer(
@@ -289,29 +249,24 @@ export class ModelsCliUsecases {
     const stopWords: string[] = [tokenizer?.stopWord ?? ''];
 
     const model: CreateModelDto = {
-      sources: [
-        {
-          url: sibling?.downloadUrl ?? '',
-        },
-      ],
-      id: modelId,
+      files: [sibling.downloadUrl ?? ''],
+      model: modelId,
       name: modelId,
-      version: '',
-      format: ModelFormat.GGUF,
-      description: '',
-      settings: {
-        prompt_template: promptTemplate,
-        llama_model_path: sibling.rfilename,
-      },
-      parameters: {
-        stop: stopWords,
-      },
-      metadata: {
-        author: data.author,
-        size: sibling.fileSize ?? 0,
-        tags: [],
-      },
-      engine: 'cortex',
+      prompt_template: promptTemplate,
+      stop: stopWords,
+
+      // Default Inference Params
+      stream: true,
+      max_tokens: 4098,
+      frequency_penalty: 0.7,
+      presence_penalty: 0.7,
+      temperature: 0.7,
+      top_p: 0.7,
+
+      // Default Model Settings
+      ctx_len: 4096,
+      ngl: 100,
+      engine: 'cortex.llamacpp',
     };
     if (!(await this.modelsUsecases.findOne(modelId)))
       await this.modelsUsecases.create(model);
@@ -456,7 +411,7 @@ export class ModelsCliUsecases {
 
   private async parsePreset(preset?: string): Promise<object> {
     const presetPath = join(
-      await this.fileManagerService.getDataFolderPath(),
+      await this.fileService.getDataFolderPath(),
       'presets',
       `${preset}.yaml`,
     );
diff --git a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
index 81f2fe8b4..51e097e3e 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
@@ -40,7 +40,7 @@ export class PSCliUsecases {
                         currentTime.getTime() - new Date(startTime).getTime();
                       return {
                         modelId: e.id,
-                        engine: e.engine ?? 'llama.cpp', // TODO: get engine from model when it's ready
+                        engine: e.engine ?? 'cortex.llamacpp',
                         status: 'running',
                         duration: this.formatDuration(duration),
                         ram: e.ram ?? '-',
diff --git a/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts b/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts
index c8ca62650..7ea02e847 100644
--- a/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts
+++ b/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts
@@ -1,25 +1,24 @@
+import {
+  Model,
+  ModelRuntimeParams,
+  ModelSettingParams,
+} from '@/domain/models/model.interface';
+
 // Make this class injectable
 export class ModelParameterParser {
   private modelSettingParamTypes: { [key: string]: string } = {
+    prompt_template: 'string',
     ctx_len: 'number',
     ngl: 'number',
-    embedding: 'boolean',
     n_parallel: 'number',
     cpu_threads: 'number',
-    prompt_template: 'string',
-    system_prompt: 'string',
-    ai_prompt: 'string',
-    user_prompt: 'string',
     llama_model_path: 'string',
     mmproj: 'string',
     cont_batching: 'boolean',
-    vision_model: 'boolean',
-    text_model: 'boolean',
   };
 
   private modelRuntimeParamTypes: { [key: string]: string } = {
     temperature: 'number',
-    token_limit: 'number',
     top_k: 'number',
     top_p: 'number',
     stream: 'boolean',
@@ -29,105 +28,44 @@ export class ModelParameterParser {
     presence_penalty: 'number',
   };
 
-  isModelSettingParam(key: string): boolean {
-    return key in this.modelSettingParamTypes;
-  }
-
-  isModelRuntimeParam(key: string): boolean {
-    return key in this.modelRuntimeParamTypes;
-  }
-
-  parse(key: string, value: string): boolean | number | string | string[] {
-    if (this.isModelSettingParam(key)) {
-      return this.parseModelSettingParams(key, value);
-    }
-
-    if (this.isModelRuntimeParam(key)) {
-      return this.parseModelRuntimeParams(key, value);
-    }
-
-    throw new Error(`Invalid setting key: ${key}`);
-  }
-
-  private parseModelSettingParams(
-    key: string,
-    value: string,
-  ): boolean | number | string | string[] {
-    const settingType = this.modelSettingParamTypes[key];
-    if (!settingType) {
-      throw new Error(`Invalid setting key: ${key}`);
-    }
-
-    switch (settingType) {
-      case 'string':
-        return value;
-
-      case 'number':
-        return this.toNumber(value);
-
-      case 'string[]':
-        return this.toStringArray(value);
-
-      case 'boolean':
-        return this.toBoolean(value);
-
-      default:
-        throw new Error('Invalid setting type');
-    }
+  /**
+   * Parse the model inference parameters from origin Model
+   * @param model
+   * @returns Partial<Model>
+   */
+  parseModelInferenceParams(model: Partial<Model>): Partial<Model> {
+    const inferenceParams: Partial<Model> & ModelRuntimeParams =
+      structuredClone(model);
+    return Object.keys(inferenceParams).reduce((acc, key) => {
+      if (!this.isModelRuntimeParam(key)) {
+        delete acc[key as keyof typeof acc];
+      }
+
+      return acc;
+    }, inferenceParams);
   }
-
-  private parseModelRuntimeParams(
-    key: string,
-    value: string,
-  ): boolean | number | string | string[] {
-    const settingType = this.modelRuntimeParamTypes[key];
-    if (!settingType) {
-      throw new Error(`Invalid setting key: ${key}`);
-    }
-
-    switch (settingType) {
-      case 'string':
-        return value;
-
-      case 'number':
-        return this.toNumber(value);
-
-      case 'string[]':
-        return this.toStringArray(value);
-
-      case 'boolean':
-        return this.toBoolean(value);
-
-      default:
-        throw new Error('Invalid setting type');
-    }
-  }
-
-  private toNumber(str: string): number {
-    const num = parseFloat(str.trim());
-    if (isNaN(num)) {
-      throw new Error(`Invalid number value: ${str}`);
-    }
-    return num;
+  /**
+   * Parse the model engine settings from origin Model
+   * @param model
+   * @returns Partial<Model>
+   */
+  parseModelEngineSettings(model: Partial<Model>): Partial<Model> {
+    const engineSettings: Partial<Model> & ModelSettingParams =
+      structuredClone(model);
+    return Object.keys(engineSettings).reduce((acc, key) => {
+      if (!this.isModelSettingParam(key)) {
+        delete acc[key as keyof typeof acc];
+      }
+
+      return acc;
+    }, engineSettings);
   }
 
-  private toStringArray(str: string, delimiter: string = ','): string[] {
-    return str.split(delimiter).map((s) => s.trim());
+  private isModelSettingParam(key: string): boolean {
+    return key in this.modelSettingParamTypes;
   }
 
-  private toBoolean(str: string): boolean {
-    const normalizedStr = str.trim().toLowerCase();
-    switch (normalizedStr) {
-      case '1':
-      case 'true':
-        return true;
-
-      case '0':
-      case 'false':
-        return false;
-
-      default:
-        throw new Error(`Invalid boolean value: ${str}`);
-    }
+  private isModelRuntimeParam(key: string): boolean {
+    return key in this.modelRuntimeParamTypes;
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/utils/normalize-model-id.ts b/cortex-js/src/infrastructure/commanders/utils/normalize-model-id.ts
index c36cb339e..bd54f1bd9 100644
--- a/cortex-js/src/infrastructure/commanders/utils/normalize-model-id.ts
+++ b/cortex-js/src/infrastructure/commanders/utils/normalize-model-id.ts
@@ -1,3 +1,3 @@
 export const normalizeModelId = (modelId: string): string => {
-  return modelId.replace(':', '%3A');
+  return modelId.replace(':', '-').replace('/', '-');
 };
diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts
index bb1d415b1..5d6340e1a 100644
--- a/cortex-js/src/infrastructure/controllers/models.controller.ts
+++ b/cortex-js/src/infrastructure/controllers/models.controller.ts
@@ -18,7 +18,6 @@ import { DeleteModelResponseDto } from '@/infrastructure/dtos/models/delete-mode
 import { DownloadModelResponseDto } from '@/infrastructure/dtos/models/download-model.dto';
 import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger';
 import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
-import { ModelSettingParamsDto } from '../dtos/models/model-setting-params.dto';
 import { TransformInterceptor } from '../interceptors/transform.interceptor';
 import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
 
@@ -62,13 +61,10 @@ export class ModelsController {
     description: 'The unique identifier of the model.',
   })
   @Post(':modelId(*)/start')
-  startModel(
-    @Param('modelId') modelId: string,
-    @Body() settings: ModelSettingParamsDto,
-  ) {
+  startModel(@Param('modelId') modelId: string, @Body() model: ModelDto) {
     return this.cortexUsecases
       .startCortex()
-      .then(() => this.modelsUsecases.startModel(modelId, settings));
+      .then(() => this.modelsUsecases.startModel(modelId, model));
   }
 
   @HttpCode(200)
diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts
index acc3fd9ab..09beb940b 100644
--- a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts
@@ -1,55 +1,127 @@
-import { Type } from 'class-transformer';
-import { IsArray, IsEnum, IsString, ValidateNested } from 'class-validator';
-import { Model, ModelFormat } from '@/domain/models/model.interface';
+import {
+  IsArray,
+  IsBoolean,
+  IsNumber,
+  IsOptional,
+  IsString,
+} from 'class-validator';
+import { Model } from '@/domain/models/model.interface';
 import { ModelArtifactDto } from './model-artifact.dto';
-import { ModelSettingParamsDto } from './model-setting-params.dto';
-import { ModelRuntimeParamsDto } from './model-runtime-params.dto';
-import { ModelMetadataDto } from './model-metadata.dto';
-import { ApiProperty } from '@nestjs/swagger';
+import { ApiProperty, getSchemaPath } from '@nestjs/swagger';
 
 export class CreateModelDto implements Partial<Model> {
-  @ApiProperty({ description: 'The version of the model.' })
+  // Cortex Meta
+  @ApiProperty({ description: 'The unique identifier of the model.' })
   @IsString()
-  version: string;
+  model: string;
 
-  @ApiProperty({ description: 'The state format of the model.' })
-  @IsEnum(ModelFormat)
-  format: ModelFormat;
+  @ApiProperty({ description: 'The name of the model.' })
+  @IsString()
+  name?: string;
 
   @ApiProperty({
     description: 'The URL sources from which the model downloaded or accessed.',
+    oneOf: [
+      { type: 'array', items: { type: 'string' } },
+      { $ref: getSchemaPath(ModelArtifactDto) },
+    ],
   })
   @IsArray()
-  @ValidateNested({ each: true })
-  @Type(() => ModelArtifactDto)
-  sources: ModelArtifactDto[];
+  files: string[] | ModelArtifactDto;
 
-  @ApiProperty({ description: 'The unique identifier of the model.' })
+  // Model Input / Output Syntax
+  @ApiProperty({
+    description:
+      "A predefined text or framework that guides the AI model's response generation.",
+  })
+  @IsOptional()
   @IsString()
-  id: string;
+  prompt_template?: string;
 
-  @ApiProperty({ description: 'The name of the model.' })
-  @IsString()
-  name: string;
+  @ApiProperty({
+    description:
+      'Defines specific tokens or phrases that signal the model to stop producing further output.',
+  })
+  @IsOptional()
+  @IsArray()
+  stop?: string[];
 
-  @ApiProperty({ description: 'A brief description of the model.' })
-  @IsString()
-  description: string;
+  // Results Preferences
+  @ApiProperty({
+    description:
+      'Sets the upper limit on the number of tokens the model can generate in a single output.',
+  })
+  @IsOptional()
+  @IsNumber()
+  max_tokens?: number;
+
+  @ApiProperty({
+    description: 'Sets probability threshold for more relevant outputs.',
+  })
+  @IsOptional()
+  @IsNumber()
+  top_p?: number;
 
-  @ApiProperty({ description: 'The settings parameters of the model.' })
-  @ValidateNested({ always: true, each: true })
-  @Type(() => ModelSettingParamsDto)
-  settings: ModelSettingParamsDto;
+  @ApiProperty({
+    description: "Influences the randomness of the model's output.",
+  })
+  @IsOptional()
+  @IsNumber()
+  temperature?: number;
 
-  @ApiProperty({ description: 'The parameters configuration of the model.' })
-  @ValidateNested()
-  parameters: ModelRuntimeParamsDto;
+  @ApiProperty({
+    description:
+      'Modifies the likelihood of the model repeating the same words or phrases within a single output.',
+  })
+  @IsOptional()
+  @IsNumber()
+  frequency_penalty?: number;
 
-  @ApiProperty({ description: 'The metadata of the model.' })
-  @ValidateNested()
-  metadata: ModelMetadataDto;
+  @ApiProperty({
+    description:
+      'Reduces the likelihood of repeating tokens, promoting novelty in the output.',
+  })
+  @IsOptional()
+  @IsNumber()
+  presence_penalty?: number;
+
+  @ApiProperty({
+    description:
+      'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.',
+  })
+  @IsOptional()
+  @IsBoolean()
+  stream?: boolean;
+
+  // Engine Settings
+  @ApiProperty({
+    description:
+      'Sets the maximum input the model can use to generate a response, it varies with the model used.',
+  })
+  @IsOptional()
+  @IsNumber()
+  ctx_len?: number;
+
+  @ApiProperty({ description: 'Determines GPU layer usage.' })
+  @IsOptional()
+  @IsNumber()
+  ngl?: number;
+
+  @ApiProperty({ description: 'Number of parallel processing units to use.' })
+  @IsOptional()
+  @IsNumber()
+  n_parallel?: number;
+
+  @ApiProperty({
+    description:
+      'Determines CPU inference threads, limited by hardware and OS. ',
+  })
+  @IsOptional()
+  @IsNumber()
+  cpu_threads?: number;
 
   @ApiProperty({ description: 'The engine used to run the model.' })
+  @IsOptional()
   @IsString()
-  engine: string;
+  engine?: string;
 }
diff --git a/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts
index d1b2cf402..36b72b92e 100644
--- a/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts
@@ -3,7 +3,10 @@ import { ModelArtifact } from '@/domain/models/model.interface';
 import { ApiProperty } from '@nestjs/swagger';
 
 export class ModelArtifactDto implements ModelArtifact {
-  @ApiProperty({ description: 'The URL source of the model.' })
+  @ApiProperty({ description: 'The mmproj bin file url.' })
   @IsString()
-  url: string;
+  mmproj?: string;
+  @ApiProperty({ description: 'The llama model bin file url.' })
+  @IsString()
+  llama_model_path?: string;
 }
diff --git a/cortex-js/src/infrastructure/dtos/models/model-metadata.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-metadata.dto.ts
deleted file mode 100644
index 2e8ea6020..000000000
--- a/cortex-js/src/infrastructure/dtos/models/model-metadata.dto.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator';
-import { ModelMetadata } from '@/domain/models/model.interface';
-import { ApiProperty } from '@nestjs/swagger';
-
-export class ModelMetadataDto implements ModelMetadata {
-  @ApiProperty({ description: 'The author of the model.' })
-  @IsString()
-  author: string;
-
-  @ApiProperty({ description: "The model's tags." })
-  @IsArray()
-  tags: string[];
-
-  @ApiProperty({ description: "The model's size." })
-  @IsNumber()
-  size: number;
-
-  @ApiProperty({ description: "The model's cover." })
-  @IsString()
-  @IsOptional()
-  cover?: string | undefined;
-}
diff --git a/cortex-js/src/infrastructure/dtos/models/model-runtime-params.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-runtime-params.dto.ts
deleted file mode 100644
index 3ee6db44c..000000000
--- a/cortex-js/src/infrastructure/dtos/models/model-runtime-params.dto.ts
+++ /dev/null
@@ -1,86 +0,0 @@
-import {
-  IsArray,
-  IsBoolean,
-  IsNumber,
-  IsOptional,
-  IsString,
-} from 'class-validator';
-import { ModelRuntimeParams } from '@/domain/models/model.interface';
-import { ApiProperty } from '@nestjs/swagger';
-
-export class ModelRuntimeParamsDto implements ModelRuntimeParams {
-  @ApiProperty({
-    description: "Influences the randomness of the model's output.",
-  })
-  @IsOptional()
-  @IsNumber()
-  temperature?: number;
-
-  @ApiProperty({
-    description:
-      'Sets the maximum number of pieces (like words or characters) the model will produce at one time.',
-  })
-  @IsOptional()
-  @IsNumber()
-  token_limit?: number;
-
-  @ApiProperty({
-    description:
-      "Limits the model's choices when it's deciding what to write next.",
-  })
-  @IsOptional()
-  @IsNumber()
-  top_k?: number;
-
-  @ApiProperty({
-    description: 'Sets probability threshold for more relevant outputs.',
-  })
-  @IsOptional()
-  @IsNumber()
-  top_p?: number;
-
-  @ApiProperty({
-    description:
-      'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.',
-  })
-  @IsOptional()
-  @IsBoolean()
-  stream?: boolean;
-
-  @ApiProperty({
-    description:
-      'Sets the upper limit on the number of tokens the model can generate in a single output.',
-  })
-  @IsOptional()
-  @IsNumber()
-  max_tokens?: number;
-
-  @ApiProperty({
-    description:
-      'Defines specific tokens or phrases that signal the model to stop producing further output.',
-  })
-  @IsOptional()
-  @IsArray()
-  stop?: string[];
-
-  @ApiProperty({
-    description:
-      'Modifies the likelihood of the model repeating the same words or phrases within a single output.',
-  })
-  @IsOptional()
-  @IsNumber()
-  frequency_penalty?: number;
-
-  @ApiProperty({
-    description:
-      'Reduces the likelihood of repeating tokens, promoting novelty in the output.',
-  })
-  @IsOptional()
-  @IsNumber()
-  presence_penalty?: number;
-
-  @ApiProperty({ description: 'The engine used to run the model.' })
-  @IsOptional()
-  @IsString()
-  engine?: string;
-}
diff --git a/cortex-js/src/infrastructure/dtos/models/model-setting-params.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-setting-params.dto.ts
deleted file mode 100644
index 146934d0f..000000000
--- a/cortex-js/src/infrastructure/dtos/models/model-setting-params.dto.ts
+++ /dev/null
@@ -1,108 +0,0 @@
-import { IsBoolean, IsNumber, IsOptional, IsString } from 'class-validator';
-import { ModelSettingParams } from '@/domain/models/model.interface';
-import { ApiProperty } from '@nestjs/swagger';
-
-export class ModelSettingParamsDto implements ModelSettingParams {
-  @ApiProperty({
-    description:
-      'Sets the maximum input the model can use to generate a response, it varies with the model used.',
-  })
-  @IsOptional()
-  @IsNumber()
-  ctx_len?: number;
-
-  @ApiProperty({ description: 'Determines GPU layer usage.' })
-  @IsOptional()
-  @IsNumber()
-  ngl?: number;
-
-  @ApiProperty({
-    description:
-      'Enables embedding utilization for tasks like document-enhanced chat in RAG-based applications.',
-  })
-  @IsOptional()
-  @IsBoolean()
-  embedding?: boolean;
-
-  @ApiProperty({ description: 'Number of parallel processing units to use.' })
-  @IsOptional()
-  @IsNumber()
-  n_parallel?: number;
-
-  @ApiProperty({
-    description:
-      'Determines CPU inference threads, limited by hardware and OS. ',
-  })
-  @IsOptional()
-  @IsNumber()
-  cpu_threads?: number;
-
-  @ApiProperty({
-    description:
-      "A predefined text or framework that guides the AI model's response generation.",
-  })
-  @IsOptional()
-  @IsString()
-  prompt_template?: string;
-
-  @ApiProperty({
-    description:
-      'Specific prompt used by the system for generating model outputs.',
-  })
-  @IsOptional()
-  @IsString()
-  system_prompt?: string;
-
-  @ApiProperty({
-    description:
-      'The prompt fed into the AI, typically to guide or specify the nature of the content it should generate.',
-  })
-  @IsOptional()
-  @IsString()
-  ai_prompt?: string;
-
-  @ApiProperty({
-    description:
-      'Customizable prompt input by the user to direct the model’s output generation.',
-  })
-  @IsOptional()
-  @IsString()
-  user_prompt?: string;
-
-  @ApiProperty({ description: 'File path to a specific llama model.' })
-  @IsOptional()
-  @IsString()
-  llama_model_path?: string;
-
-  @ApiProperty({
-    description:
-      'The mmproj is a projection matrix that is used to project the embeddings from CLIP into tokens usable by llama/mistral.',
-  })
-  @IsOptional()
-  @IsString()
-  mmproj?: string;
-
-  @ApiProperty({
-    description:
-      'Controls continuous batching, enhancing throughput for LLM inference.',
-  })
-  @IsOptional()
-  @IsBoolean()
-  cont_batching?: boolean;
-
-  @ApiProperty({
-    description:
-      'Specifies if a vision-based model (for image processing) should be used.',
-  })
-  @IsOptional()
-  @IsBoolean()
-  vision_model?: boolean;
-
-  @ApiProperty({
-    description:
-      'Specifies if a text-based model is to be utilized, for tasks like text generation or analysis.',
-  })
-  @IsOptional()
-  @IsBoolean()
-  text_model?: boolean;
-}
diff --git a/cortex-js/src/infrastructure/dtos/models/model-successfully-created.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-successfully-created.dto.ts
index 64a9e7683..97ab21617 100644
--- a/cortex-js/src/infrastructure/dtos/models/model-successfully-created.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/models/model-successfully-created.dto.ts
@@ -1,92 +1,108 @@
+import { Model } from '@/domain/models/model.interface';
 import { ApiProperty } from '@nestjs/swagger';
+import { IsArray, IsBoolean, IsNumber, IsOptional } from 'class-validator';
 
-export class ModelDto {
+export class ModelDto implements Partial<Model> {
+  // Prompt Settings
   @ApiProperty({
-    example:
-      'https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf',
-    description: 'URL to the source of the model.',
+    example: 'system\n{system_message}\nuser\n{prompt}\nassistant',
+    description:
+      "A predefined text or framework that guides the AI model's response generation.",
   })
-  source_url: string;
+  @IsOptional()
+  prompt_template?: string;
 
   @ApiProperty({
-    example: 'trinity-v1.2-7b',
+    type: [String],
+    example: [],
     description:
-      'Unique identifier used in chat-completions model_name, matches folder name.',
+      'Defines specific tokens or phrases that signal the model to stop producing further output.',
   })
-  id: string;
+  @IsArray()
+  @IsOptional()
+  stop?: string[];
 
-  @ApiProperty({ example: 'model' })
-  object: string;
+  // Results Preferences
 
   @ApiProperty({
-    example: 'Trinity-v1.2 7B Q4',
-    description: 'Name of the model.',
+    example: 4096,
+    description:
+      'Sets the upper limit on the number of tokens the model can generate in a single output.',
   })
-  name: string;
+  @IsOptional()
+  @IsNumber()
+  max_tokens?: number;
 
   @ApiProperty({
-    default: '1.0',
-    description: 'The version number of the model.',
+    example: 0.7,
+    description: "Influences the randomness of the model's output.",
   })
-  version: string;
+  @IsOptional()
+  @IsNumber()
+  temperature?: number;
 
   @ApiProperty({
-    example:
-      'Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.',
-    description: 'Description of the model.',
+    example: 0.95,
+    description: 'Sets probability threshold for more relevant outputs',
   })
-  description: string;
+  @IsOptional()
+  @IsNumber()
+  top_p?: number;
 
   @ApiProperty({
-    example: 'gguf',
-    description: 'State format of the model, distinct from the engine.',
+    example: true,
+    description:
+      'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.',
   })
-  format: string;
-
-  @ApiProperty({ description: 'Context length.', example: 4096 })
-  ctx_len: number;
+  @IsOptional()
+  @IsBoolean()
+  stream?: boolean;
 
   @ApiProperty({
-    example: 'system\n{system_message}\nuser\n{prompt}\nassistant',
+    example: 0,
+    description:
+      'Modifies the likelihood of the model repeating the same words or phrases within a single output.',
   })
-  prompt_template: string;
-
-  @ApiProperty({ example: 0.7 })
-  temperature: number;
-
-  @ApiProperty({ example: 0.95 })
-  top_p: number;
-
-  @ApiProperty({ example: true })
-  stream: boolean;
-
-  @ApiProperty({ example: 4096 })
-  max_tokens: number;
-
-  @ApiProperty({ type: [String], example: [] })
-  stop: string[];
+  @IsOptional()
+  @IsNumber()
+  frequency_penalty?: number;
 
-  @ApiProperty({ example: 0 })
-  frequency_penalty: number;
-
-  @ApiProperty({ example: 0 })
-  presence_penalty: number;
-
-  @ApiProperty({ example: 'Jan' })
-  author: string;
+  @ApiProperty({
+    example: 0,
+    description:
+      'Reduces the likelihood of repeating tokens, promoting novelty in the output.',
+  })
+  @IsOptional()
+  @IsNumber()
+  presence_penalty?: number;
 
-  @ApiProperty({ type: [String], example: ['7B', 'Merged', 'Featured'] })
-  tags: string[];
+  // Engine Settings
+  @ApiProperty({ description: 'Determines GPU layer usage.', example: 4096 })
+  @IsOptional()
+  @IsNumber()
+  ngl?: number;
 
-  @ApiProperty({ example: 4370000000 })
-  size: number;
+  @ApiProperty({
+    description:
+      'The context length for model operations varies; the maximum depends on the specific model used.',
+    example: 4096,
+  })
+  @IsOptional()
+  @IsNumber()
+  ctx_len?: number;
 
   @ApiProperty({
-    example:
-      'https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png',
+    description:
+      'Determines CPU inference threads, limited by hardware and OS. ',
   })
-  cover: string;
+  @IsOptional()
+  @IsNumber()
+  cpu_threads?: number;
 
-  @ApiProperty({ example: 'cortex' })
-  engine: string;
+  @ApiProperty({
+    example: 'cortex.llamacpp',
+    description: 'The engine to use.',
+  })
+  @IsOptional()
+  engine?: string;
 }
diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts
index 659d0296e..2a6e8fbaf 100644
--- a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts
@@ -1,9 +1,9 @@
 import { IsArray, IsOptional, IsString, ValidateNested } from 'class-validator';
 import { ThreadAssistantInfo } from '@/domain/models/thread.interface';
-import { CreateThreadModelInfoDto } from './create-thread-model-info.dto';
 import { AssistantToolDto } from '@/infrastructure/dtos/assistants/assistant-tool.dto';
 import { Type } from 'class-transformer';
 import { ApiProperty } from '@nestjs/swagger';
+import { CreateModelDto } from '../models/create-model.dto';
 
 export class CreateThreadAssistantDto implements ThreadAssistantInfo {
   @ApiProperty({ description: 'The unique identifier of the assistant.' })
@@ -16,7 +16,7 @@ export class CreateThreadAssistantDto implements ThreadAssistantInfo {
 
   @ApiProperty({ description: "The model's unique identifier and settings." })
   @ValidateNested()
-  model: CreateThreadModelInfoDto;
+  model: CreateModelDto;
 
   @ApiProperty({ description: "The assistant's specific instructions." })
   @IsOptional()
diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread-model-info.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread-model-info.dto.ts
deleted file mode 100644
index 9c90085cd..000000000
--- a/cortex-js/src/infrastructure/dtos/threads/create-thread-model-info.dto.ts
+++ /dev/null
@@ -1,26 +0,0 @@
-import { IsOptional, IsString, ValidateNested } from 'class-validator';
-import { ModelInfo } from '@/domain/models/model.interface';
-import { ModelRuntimeParamsDto } from '@/infrastructure/dtos/models/model-runtime-params.dto';
-import { ModelSettingParamsDto } from '@/infrastructure/dtos/models/model-setting-params.dto';
-import { ApiProperty } from '@nestjs/swagger';
-
-export class CreateThreadModelInfoDto implements ModelInfo {
-  @ApiProperty({ description: 'The unique identifier of the thread.' })
-  @IsString()
-  id: string;
-
-  @ApiProperty({ description: 'The settings of the thread.' })
-  @ValidateNested()
-  settings: ModelSettingParamsDto;
-
-  @ApiProperty({ description: 'The parameters of the thread.' })
-  @ValidateNested()
-  parameters: ModelRuntimeParamsDto;
-
-  @ApiProperty({
-    description: 'The engine used in the thread to operate the model.',
-  })
-  @IsOptional()
-  @IsString()
-  engine?: string;
-}
diff --git a/cortex-js/src/infrastructure/entities/model.entity.ts b/cortex-js/src/infrastructure/entities/model.entity.ts
index 878ad192f..052eb7d22 100644
--- a/cortex-js/src/infrastructure/entities/model.entity.ts
+++ b/cortex-js/src/infrastructure/entities/model.entity.ts
@@ -1,47 +1,59 @@
-import {
-  Model,
-  ModelArtifact,
-  ModelFormat,
-  ModelMetadata,
-  ModelRuntimeParams,
-  ModelSettingParams,
-} from '@/domain/models/model.interface';
+import { Model, ModelArtifact } from '@/domain/models/model.interface';
 import { Column, Entity, PrimaryColumn } from 'typeorm';
 
 @Entity('models')
 export class ModelEntity implements Model {
+  // Cortex Meta
   @PrimaryColumn()
-  id: string;
+  model: string;
 
   @Column()
-  object: string;
+  name: string;
 
   @Column()
   version: string;
 
+  @Column({ type: 'simple-json' })
+  files: string[] | ModelArtifact;
+
+  // Model Input / Output Syntax
   @Column()
-  format: ModelFormat;
+  prompt_template: string;
 
   @Column({ type: 'simple-json' })
-  sources: ModelArtifact[];
+  stop: string[];
 
   @Column()
-  name: string;
+  max_tokens: number;
 
+  // Results Preferences
   @Column()
-  created: number;
+  top_p: number;
 
   @Column()
-  description: string;
+  temperature: number;
 
-  @Column({ type: 'simple-json' })
-  settings: ModelSettingParams;
+  @Column()
+  frequency_penalty: number;
 
-  @Column({ type: 'simple-json' })
-  parameters: ModelRuntimeParams;
+  @Column()
+  presence_penalty: number;
 
-  @Column({ type: 'simple-json' })
-  metadata: ModelMetadata;
+  @Column()
+  stream: boolean;
+
+  // Engine Settings
+  @Column()
+  ctx_len: number;
+
+  @Column()
+  ngl: number;
+
+  @Column()
+  n_parallel: number;
+
+  @Column()
+  cpu_threads: number;
 
   @Column()
   engine: string;
diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
index d13eca9fd..33aee645e 100644
--- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
+++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
@@ -12,7 +12,7 @@ import { FileManagerService } from '@/file-manager/file-manager.service';
 
 @Injectable()
 export default class CortexProvider extends OAIEngineExtension {
-  provider: string = 'cortex';
+  provider: string = 'cortex.llamacpp';
   apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`;
 
   private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`;
@@ -25,47 +25,46 @@ export default class CortexProvider extends OAIEngineExtension {
     super(httpService);
   }
 
-  private async getModelDirectory(): Promise<string> {
-    const dataFolderPath = await this.fileManagerService.getDataFolderPath();
-    return join(dataFolderPath, 'models');
-  }
-
   override async loadModel(
     model: Model,
     settings?: ModelSettingParams,
   ): Promise<void> {
-    const modelsContainerDir = await this.getModelDirectory();
-
-    const modelFolderFullPath = join(
-      modelsContainerDir,
-      normalizeModelId(model.id),
-    );
-    const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => {
-      return file.endsWith('.gguf');
-    });
-
-    if (ggufFiles.length === 0) {
-      throw new Error('Model binary not found');
-    }
+    const modelsContainerDir = await this.fileManagerService.getModelsPath();
+
+    let llama_model_path = settings?.llama_model_path;
+    if (!llama_model_path) {
+      const modelFolderFullPath = join(
+        modelsContainerDir,
+        normalizeModelId(model.model),
+      );
+      const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => {
+        return file.endsWith('.gguf');
+      });
+
+      if (ggufFiles.length === 0) {
+        throw new Error('Model binary not found');
+      }
 
-    const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]);
+      const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]);
+      llama_model_path = modelBinaryLocalPath;
+    }
 
     const cpuThreadCount = 1; // TODO: Math.max(1, nitroResourceProbe.numCpuPhysicalCore);
     const modelSettings = {
       // This is critical and requires real CPU physical core count (or performance core)
-      model: model.id,
       cpu_threads: cpuThreadCount,
-      ...model.settings,
+      ...model,
       ...settings,
-      llama_model_path: modelBinaryLocalPath,
-      ...(model.settings.mmproj && {
-        mmproj: join(modelFolderFullPath, model.settings.mmproj),
-      }),
+      llama_model_path,
+      ...('mmproj' in model.files &&
+        model.files.mmproj && {
+          mmproj: settings?.mmproj,
+        }),
     };
 
     // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
-    if (model.settings.prompt_template) {
-      const promptTemplate = model.settings.prompt_template;
+    if (model.prompt_template) {
+      const promptTemplate = model.prompt_template;
       const prompt = this.promptTemplateConverter(promptTemplate);
       if (prompt?.error) {
         throw new Error(prompt.error);
diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts
index e1cbfb289..89456cb4b 100644
--- a/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts
+++ b/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts
@@ -3,9 +3,10 @@ import { ExtensionRepositoryImpl } from './extension.repository';
 import { ExtensionRepository } from '@/domain/repositories/extension.interface';
 import { CortexProviderModule } from '@/infrastructure/providers/cortex/cortex.module';
 import { HttpModule } from '@nestjs/axios';
+import { FileManagerModule } from '@/file-manager/file-manager.module';
 
 @Module({
-  imports: [CortexProviderModule, HttpModule],
+  imports: [CortexProviderModule, HttpModule, FileManagerModule],
   providers: [
     {
       provide: ExtensionRepository,
diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
index a1f2c69f0..3acbdf789 100644
--- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
+++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
@@ -5,6 +5,7 @@ import { readdir, lstat, access } from 'fs/promises';
 import { join } from 'path';
 import { EngineExtension } from '@/domain/abstracts/engine.abstract';
 import { appPath } from '@/infrastructure/commanders/utils/app-path';
+import { FileManagerService } from '@/file-manager/file-manager.service';
 
 @Injectable()
 export class ExtensionRepositoryImpl implements ExtensionRepository {
@@ -14,6 +15,7 @@ export class ExtensionRepositoryImpl implements ExtensionRepository {
   constructor(
     @Inject('CORTEX_PROVIDER')
     private readonly cortexProvider: EngineExtension,
+    private readonly fileService: FileManagerService,
   ) {
     this.loadCoreExtensions();
     this.loadExternalExtensions();
@@ -36,14 +38,15 @@ export class ExtensionRepositoryImpl implements ExtensionRepository {
     return Promise.resolve();
   }
 
-  loadCoreExtensions(): void {
+  private loadCoreExtensions(): void {
     const extensionsPath = join(appPath, 'src', 'extensions');
     this.loadExtensions(extensionsPath);
   }
 
-  loadExternalExtensions(): void {
+  private async loadExternalExtensions() {
     const extensionsPath =
-      process.env.EXTENSIONS_PATH ?? join(appPath, 'extensions');
+      process.env.EXTENSIONS_PATH ??
+      join(await this.fileService.getDataFolderPath(), 'extensions');
     this.loadExtensions(extensionsPath);
   }
 
@@ -57,10 +60,10 @@ export class ExtensionRepositoryImpl implements ExtensionRepository {
 
     readdir(extensionsPath).then((files) => {
       files.forEach(async (extension) => {
-        if (!(await lstat(`${extensionsPath}/${extension}`)).isDirectory())
-          return;
+        const extensionFullPath = join(extensionsPath, extension);
+        if (!(await lstat(extensionFullPath)).isDirectory()) return;
 
-        import(`${extensionsPath}/${extension}`).then((extensionClass) => {
+        import(extensionFullPath).then((extensionClass) => {
           const newExtension = new extensionClass.default();
           this.extensions.set(extension, newExtension);
         });
diff --git a/cortex-js/src/infrastructure/repositories/model/model.module.ts b/cortex-js/src/infrastructure/repositories/model/model.module.ts
new file mode 100644
index 000000000..be257d804
--- /dev/null
+++ b/cortex-js/src/infrastructure/repositories/model/model.module.ts
@@ -0,0 +1,18 @@
+import { Module } from '@nestjs/common';
+import { CortexProviderModule } from '@/infrastructure/providers/cortex/cortex.module';
+import { HttpModule } from '@nestjs/axios';
+import { ModelRepository } from '@/domain/repositories/model.interface';
+import { ModelRepositoryImpl } from './model.repository';
+import { FileManagerModule } from '@/file-manager/file-manager.module';
+
+@Module({
+  imports: [CortexProviderModule, HttpModule, FileManagerModule],
+  providers: [
+    {
+      provide: ModelRepository,
+      useClass: ModelRepositoryImpl,
+    },
+  ],
+  exports: [ModelRepository],
+})
+export class ModelRepositoryModule {}
diff --git a/cortex-js/src/infrastructure/repositories/model/model.repository.ts b/cortex-js/src/infrastructure/repositories/model/model.repository.ts
new file mode 100644
index 000000000..6401c1ec7
--- /dev/null
+++ b/cortex-js/src/infrastructure/repositories/model/model.repository.ts
@@ -0,0 +1,150 @@
+import { Injectable } from '@nestjs/common';
+import { join, extname, basename } from 'path';
+import { ModelRepository } from '@/domain/repositories/model.interface';
+import { Model } from '@/domain/models/model.interface';
+import { FileManagerService } from '@/file-manager/file-manager.service';
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  readdirSync,
+  rmSync,
+  writeFileSync,
+} from 'fs';
+import { load, dump } from 'js-yaml';
+import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id';
+
+@Injectable()
+export class ModelRepositoryImpl implements ModelRepository {
+  // Initialize the Extensions Map with the key-value pairs of the core providers.
+  models = new Map<string, Model>([]);
+  // Map between files and models. E.g. llama3:7b -> llama3-7b.yaml
+  fileModel = new Map<string, string>([]);
+  // Check whether the models have been loaded or not.
+  loaded = false;
+
+  constructor(private readonly fileService: FileManagerService) {
+    this.loadModels();
+  }
+
+  /**
+   * Create a new model
+   * This would persist the model yaml file to the models folder
+   * @param object
+   * @returns the created model
+   */
+  async create(object: Model): Promise<Model> {
+    const modelsFolderPath = join(
+      await this.fileService.getDataFolderPath(),
+      'models',
+    );
+    const modelYaml = dump(object);
+    if (!existsSync(modelsFolderPath)) mkdirSync(modelsFolderPath);
+    const modelsPath =
+      process.env.EXTENSIONS_PATH ?? (await this.fileService.getModelsPath());
+    writeFileSync(
+      join(modelsPath, `${normalizeModelId(object.model)}.yaml`),
+      modelYaml,
+    );
+
+    this.models.set(object.model ?? '', object);
+    return Promise.resolve(object);
+  }
+
+  /**
+   * Find all models
+   * This would load all the models from the models folder
+   * @param object
+   * @returns the created model
+   */
+  findAll(): Promise<Model[]> {
+    return this.loadModels();
+  }
+  /**
+   * Find one model by id
+   * @param id model id
+   * @returns the model
+   */
+  findOne(id: string): Promise<Model | null> {
+    return this.loadModels().then(() => this.models.get(id) ?? null);
+  }
+
+  /**
+   * Update a model
+   * This would update the model yaml file in the models folder
+   * @param id model id
+   * @param object model object
+   */
+  async update(id: string, object: Partial<Model>): Promise<void> {
+    const originalModel = await this.findOne(id);
+    if (!originalModel) throw new Error('Model not found');
+
+    const updatedModel = {
+      ...originalModel,
+      ...object,
+    } satisfies Model;
+
+    const modelYaml = dump(updatedModel);
+    const modelsPath =
+      process.env.EXTENSIONS_PATH ?? (await this.fileService.getModelsPath());
+
+    writeFileSync(
+      join(
+        modelsPath,
+        this.fileModel.get(id) ?? `${normalizeModelId(id)}.yaml`,
+      ),
+      modelYaml,
+    );
+
+    this.models.set(id ?? '', updatedModel);
+  }
+
+  /**
+   * Remove a model
+   * This would remove the model yaml file from the models folder
+   * @param id model id
+   */
+  async remove(id: string): Promise<void> {
+    this.models.delete(id);
+    const yamlFilePath = join(
+      await this.fileService.getModelsPath(),
+      this.fileModel.get(id) ?? id,
+    );
+    if (existsSync(yamlFilePath)) rmSync(yamlFilePath);
+    return Promise.resolve();
+  }
+
+  /**
+   * Load all models
+   * This would load all the models from the models folder
+   * @returns the list of models
+   */
+  private async loadModels(): Promise<Model[]> {
+    if (this.loaded) return Array.from(this.models.values());
+    const modelsPath =
+      process.env.EXTENSIONS_PATH ?? (await this.fileService.getModelsPath());
+
+    if (!existsSync(modelsPath)) return [];
+
+    const modelFiles = readdirSync(modelsPath)
+      .filter(
+        (file) =>
+          extname(file).toLowerCase() === '.yaml' ||
+          extname(file).toLowerCase() === '.yml',
+      )
+      .map((file) => join(modelsPath, file));
+
+    modelFiles.forEach(async (modelFile) => {
+      const model = readFileSync(modelFile, 'utf8');
+      const yamlObject = load(model) as Model;
+      const fileName = basename(modelFile);
+
+      if (yamlObject) {
+        this.fileModel.set(yamlObject.model, fileName);
+        this.models.set(yamlObject.model, yamlObject);
+      }
+    });
+    this.loaded = true;
+    return Array.from(this.models.values());
+  }
+}
diff --git a/cortex-js/src/usecases/chat/chat.module.ts b/cortex-js/src/usecases/chat/chat.module.ts
index e69b10b73..852a13511 100644
--- a/cortex-js/src/usecases/chat/chat.module.ts
+++ b/cortex-js/src/usecases/chat/chat.module.ts
@@ -3,9 +3,10 @@ import { ChatController } from '@/infrastructure/controllers/chat.controller';
 import { ChatUsecases } from './chat.usecases';
 import { DatabaseModule } from '@/infrastructure/database/database.module';
 import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module';
+import { ModelRepositoryModule } from '@/infrastructure/repositories/model/model.module';
 
 @Module({
-  imports: [DatabaseModule, ExtensionModule],
+  imports: [DatabaseModule, ExtensionModule, ModelRepositoryModule],
   controllers: [ChatController],
   providers: [ChatUsecases],
   exports: [ChatUsecases],
diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts
index 61b0c0296..78b9cca26 100644
--- a/cortex-js/src/usecases/chat/chat.usecases.ts
+++ b/cortex-js/src/usecases/chat/chat.usecases.ts
@@ -1,16 +1,14 @@
-import { Inject, Injectable } from '@nestjs/common';
+import { Injectable } from '@nestjs/common';
 import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto';
-import { ExtensionRepository } from '@/domain/repositories/extension.interface';
-import { Repository } from 'typeorm';
-import { ModelEntity } from '@/infrastructure/entities/model.entity';
 import { EngineExtension } from '@/domain/abstracts/engine.abstract';
 import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
+import { ModelRepository } from '@/domain/repositories/model.interface';
+import { ExtensionRepository } from '@/domain/repositories/extension.interface';
 
 @Injectable()
 export class ChatUsecases {
   constructor(
-    @Inject('MODEL_REPOSITORY')
-    private readonly modelRepository: Repository<ModelEntity>,
+    private readonly modelRepository: ModelRepository,
     private readonly extensionRepository: ExtensionRepository,
   ) {}
 
@@ -20,9 +18,7 @@ export class ChatUsecases {
   ): Promise<any> {
     const { model: modelId } = createChatDto;
     const extensions = (await this.extensionRepository.findAll()) ?? [];
-    const model = await this.modelRepository.findOne({
-      where: { id: modelId },
-    });
+    const model = await this.modelRepository.findOne(modelId);
 
     if (!model) {
       throw new ModelNotFoundException(modelId);
diff --git a/cortex-js/src/usecases/models/models.module.ts b/cortex-js/src/usecases/models/models.module.ts
index 2094e98c1..3d10b9868 100644
--- a/cortex-js/src/usecases/models/models.module.ts
+++ b/cortex-js/src/usecases/models/models.module.ts
@@ -6,6 +6,7 @@ import { CortexModule } from '@/usecases/cortex/cortex.module';
 import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module';
 import { HttpModule } from '@nestjs/axios';
 import { FileManagerModule } from '@/file-manager/file-manager.module';
+import { ModelRepositoryModule } from '@/infrastructure/repositories/model/model.module';
 
 @Module({
   imports: [
@@ -14,6 +15,7 @@ import { FileManagerModule } from '@/file-manager/file-manager.module';
     ExtensionModule,
     HttpModule,
     FileManagerModule,
+    ModelRepositoryModule,
   ],
   controllers: [ModelsController],
   providers: [ModelsUsecases],
diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts
index 980b0569e..925101745 100644
--- a/cortex-js/src/usecases/models/models.usecases.ts
+++ b/cortex-js/src/usecases/models/models.usecases.ts
@@ -1,14 +1,7 @@
 import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto';
 import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
-import { ModelEntity } from '@/infrastructure/entities/model.entity';
-import { BadRequestException, Inject, Injectable } from '@nestjs/common';
-import { Repository } from 'typeorm';
-import {
-  Model,
-  ModelFormat,
-  ModelRuntimeParams,
-  ModelSettingParams,
-} from '@/domain/models/model.interface';
+import { BadRequestException, Injectable } from '@nestjs/common';
+import { Model, ModelSettingParams } from '@/domain/models/model.interface';
 import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
 import { join, basename } from 'path';
 import {
@@ -22,17 +15,18 @@ import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-s
 import { ExtensionRepository } from '@/domain/repositories/extension.interface';
 import { EngineExtension } from '@/domain/abstracts/engine.abstract';
 import { HttpService } from '@nestjs/axios';
-import { ModelSettingParamsDto } from '@/infrastructure/dtos/models/model-setting-params.dto';
 import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id';
 import { firstValueFrom } from 'rxjs';
 import { FileManagerService } from '@/file-manager/file-manager.service';
 import { AxiosError } from 'axios';
+import { ModelRepository } from '@/domain/repositories/model.interface';
+import { ModelDto } from '@/infrastructure/dtos/models/model-successfully-created.dto';
+import { ModelParameterParser } from '@/infrastructure/commanders/utils/model-parameter.parser';
 
 @Injectable()
 export class ModelsUsecases {
   constructor(
-    @Inject('MODEL_REPOSITORY')
-    private readonly modelRepository: Repository<ModelEntity>,
+    private readonly modelRepository: ModelRepository,
     private readonly extensionRepository: ExtensionRepository,
     private readonly fileManagerService: FileManagerService,
     private readonly httpService: HttpService,
@@ -41,23 +35,17 @@ export class ModelsUsecases {
   async create(createModelDto: CreateModelDto) {
     const model: Model = {
       ...createModelDto,
-      object: 'model',
-      created: Date.now(),
     };
 
-    await this.modelRepository.insert(model);
+    await this.modelRepository.create(model);
   }
 
   async findAll(): Promise<Model[]> {
-    return this.modelRepository.find();
+    return this.modelRepository.findAll();
   }
 
-  async findOne(id: string) {
-    return this.modelRepository.findOne({
-      where: {
-        id,
-      },
-    });
+  async findOne(model: string) {
+    return this.modelRepository.findOne(model);
   }
 
   async getModelOrThrow(id: string): Promise<Model> {
@@ -72,45 +60,8 @@ export class ModelsUsecases {
     return this.modelRepository.update(id, updateModelDto);
   }
 
-  async updateModelSettingParams(
-    id: string,
-    settingParams: ModelSettingParams,
-  ): Promise<ModelSettingParams> {
-    const model = await this.getModelOrThrow(id);
-    const currentSettingParams = model.settings;
-    const updateDto: UpdateModelDto = {
-      settings: {
-        ...currentSettingParams,
-        ...settingParams,
-      },
-    };
-    await this.update(id, updateDto);
-    return updateDto.settings ?? {};
-  }
-
-  async updateModelRuntimeParams(
-    id: string,
-    runtimeParams: ModelRuntimeParams,
-  ): Promise<ModelRuntimeParams> {
-    const model = await this.getModelOrThrow(id);
-    const currentRuntimeParams = model.parameters;
-    const updateDto: UpdateModelDto = {
-      parameters: {
-        ...currentRuntimeParams,
-        ...runtimeParams,
-      },
-    };
-    await this.update(id, updateDto);
-    return updateDto.parameters ?? {};
-  }
-
-  private async getModelDirectory(): Promise<string> {
-    const dataFolderPath = await this.fileManagerService.getDataFolderPath();
-    return join(dataFolderPath, 'models');
-  }
-
   async remove(id: string) {
-    const modelsContainerDir = await this.getModelDirectory();
+    const modelsContainerDir = await this.fileManagerService.getModelsPath();
     if (!existsSync(modelsContainerDir)) {
       return;
     }
@@ -118,7 +69,7 @@ export class ModelsUsecases {
     const modelFolder = join(modelsContainerDir, normalizeModelId(id));
 
     return this.modelRepository
-      .delete(id)
+      .remove(id)
       .then(() => rmdirSync(modelFolder, { recursive: true }))
       .then(() => {
         return {
@@ -130,7 +81,7 @@ export class ModelsUsecases {
 
   async startModel(
     modelId: string,
-    settings?: ModelSettingParamsDto,
+    settings?: ModelDto,
   ): Promise<StartModelSuccessDto> {
     const model = await this.getModelOrThrow(modelId);
     const extensions = (await this.extensionRepository.findAll()) ?? [];
@@ -145,23 +96,34 @@ export class ModelsUsecases {
       };
     }
 
+    const parser = new ModelParameterParser();
+    const loadModelSettings: ModelSettingParams = {
+      // Default settings
+      ctx_len: 4096,
+      ngl: 100,
+      ...(Array.isArray(model?.files) &&
+        !('llama_model_path' in model) && {
+          llama_model_path: (model.files as string[])[0],
+        }),
+      engine: 'cortex.llamacpp',
+      // User / Model settings
+      ...parser.parseModelEngineSettings(model),
+      ...parser.parseModelEngineSettings(settings ?? {}),
+    };
+
     return engine
-      .loadModel(model, settings)
+      .loadModel(model, loadModelSettings)
       .then(() => ({
         message: 'Model loaded successfully',
         modelId,
       }))
-      .catch((e) =>
-        e.code === AxiosError.ERR_BAD_REQUEST
-          ? {
-              message: 'Model already loaded',
-              modelId,
-            }
-          : {
-              message: 'Model failed to load',
-              modelId,
-            },
-      );
+      .catch((e) => ({
+        message:
+          e.code === AxiosError.ERR_BAD_REQUEST
+            ? 'Model already loaded'
+            : 'Model failed to load',
+        modelId,
+      }));
   }
 
   async stopModel(modelId: string): Promise<StartModelSuccessDto> {
@@ -193,23 +155,27 @@ export class ModelsUsecases {
   async downloadModel(modelId: string, callback?: (progress: number) => void) {
     const model = await this.getModelOrThrow(modelId);
 
-    if (model.format === ModelFormat.API) {
-      throw new BadRequestException('Cannot download remote model');
-    }
+    // TODO: We will support splited gguf files in the future
+    // Leave it as is for now (first element of the array)
+    const downloadUrl = Array.isArray(model.files)
+      ? model.files[0]
+      : model.files.llama_model_path;
 
-    const downloadUrl = model.sources[0].url;
+    if (!downloadUrl) {
+      throw new BadRequestException('No model URL provided');
+    }
     if (!this.isValidUrl(downloadUrl)) {
       throw new BadRequestException(`Invalid download URL: ${downloadUrl}`);
     }
 
     const fileName = basename(downloadUrl);
-    const modelsContainerDir = await this.getModelDirectory();
+    const modelsContainerDir = await this.fileManagerService.getModelsPath();
 
     if (!existsSync(modelsContainerDir)) {
       mkdirSync(modelsContainerDir, { recursive: true });
     }
 
-    const modelFolder = join(modelsContainerDir, normalizeModelId(model.id));
+    const modelFolder = join(modelsContainerDir, normalizeModelId(model.model));
     await promises.mkdir(modelFolder, { recursive: true });
     const destination = join(modelFolder, fileName);