diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 3c9eea724..85050581a 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -188,40 +188,40 @@ jobs:
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
-  build-docker-and-test:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Getting the repo
-        uses: actions/checkout@v3
-        with:
-          submodules: 'recursive'
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+  # build-docker-and-test:
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - name: Getting the repo
+  #       uses: actions/checkout@v3
+  #       with:
+  #         submodules: 'recursive'
+
+  #     - name: Set up QEMU
+  #       uses: docker/setup-qemu-action@v3
+
+  #     - name: Set up Docker Buildx
+  #       uses: docker/setup-buildx-action@v3
       
-      - name: Run Docker
-        run: |
-          docker build -t menloltd/cortex:test -f docker/Dockerfile .
-          docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
-
-      - name: use python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
-      - name: Run e2e tests
-        run: |
-          cd engine
-          python -m pip install --upgrade pip
-          python -m pip install -r e2e-test/requirements.txt
-          pytest e2e-test/test_api_docker.py
-
-      - name: Run Docker
-        continue-on-error: true
-        if: always()  
-        run: |
-          docker stop cortex
-          docker rm cortex
+  #     - name: Run Docker
+  #       run: |
+  #         docker build -t menloltd/cortex:test -f docker/Dockerfile .
+  #         docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
+
+  #     - name: use python
+  #       uses: actions/setup-python@v5
+  #       with:
+  #         python-version: "3.10"
+
+  #     - name: Run e2e tests
+  #       run: |
+  #         cd engine
+  #         python -m pip install --upgrade pip
+  #         python -m pip install -r e2e-test/requirements.txt
+  #         pytest e2e-test/test_api_docker.py
+
+  #     - name: Run Docker
+  #       continue-on-error: true
+  #       if: always()  
+  #       run: |
+  #         docker stop cortex
+  #         docker rm cortex
diff --git a/docs/docs/capabilities/hardware/index.md b/docs/docs/capabilities/hardware/index.mdx
similarity index 90%
rename from docs/docs/capabilities/hardware/index.md
rename to docs/docs/capabilities/hardware/index.mdx
index acf190ecc..707c54373 100644
--- a/docs/docs/capabilities/hardware/index.md
+++ b/docs/docs/capabilities/hardware/index.mdx
@@ -1,8 +1,13 @@
 ---
 title: Hardware Awareness
-draft: True
+description: The Hardware Awareness section overview
 ---
 
+:::warning
+🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
+:::
+
+
 # Hardware Awareness
 
 Cortex is designed to be hardware aware, meaning it can detect your hardware configuration and automatically set parameters to optimize compatibility and performance, and avoid hardware-related errors.
diff --git a/docs/docs/cli/hardware/index.mdx b/docs/docs/cli/hardware/index.mdx
new file mode 100644
index 000000000..febc90c87
--- /dev/null
+++ b/docs/docs/cli/hardware/index.mdx
@@ -0,0 +1,116 @@
+---
+title: Cortex Hardware
+---
+
+import Tabs from "@theme/Tabs";
+import TabItem from "@theme/TabItem";
+
+# `cortex hardware`
+
+This command allows you manage and monitor hardware resources.
+
+
+**Usage**:
+:::info
+You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`.
+:::
+<Tabs>
+  <TabItem value="MacOs/Linux" label="MacOs/Linux">
+  ```sh
+  cortex hardware [options] [subcommand]
+  ```
+  </TabItem>
+  <TabItem value="Windows" label="Windows">
+  ```sh
+  cortex.exe hardware [options]
+
+  ```
+  </TabItem>
+</Tabs>
+
+**Options**:
+
+| Option            | Description                                           | Required | Default value | Example         |
+|-------------------|-------------------------------------------------------|----------|---------------|-----------------|
+| `-h`, `--help`    | Display help information for the command.             | No       | -             | `-h`        |
+
+---
+# Subcommands:
+
+## `cortex hardware list`
+:::info
+This CLI command calls the following API endpoint:
+- [List Model](/api-reference#tag/hardware/get/v1/hardware)
+:::
+This command lists all the hardware resources.
+
+**Usage**:
+<Tabs>
+  <TabItem value="MacOs/Linux" label="MacOs/Linux">
+  ```sh
+  cortex hardware list [options]
+  ```
+  </TabItem>
+  <TabItem value="Windows" label="Windows">
+  ```sh
+  cortex.exe hardware list [options]
+  ```
+  </TabItem>
+</Tabs>
+
+For example, it returns the following:
+```bash
+OS Information:
++---+---------------------------+--------------------+
+| # | Version                   | Name               |
++---+---------------------------+--------------------+
+| 1 | 24.04.1 LTS (Noble Numbat)| Ubuntu 24.04.1 LTS |
++---+---------------------------+--------------------+
+```
+
+**Options**:
+
+| Option                    | Description                                        | Required | Default value | Example              |
+|---------------------------|----------------------------------------------------|----------|---------------|----------------------|
+| `-h`, `--help`            | Display help for command.                          | No       | -             | `-h`                 |
+|`--cpu`                    | Display CPU information                            | No       | -             | `--cpu`              |
+|`--os`                     | Display OS information                             | No       | -             | `--os`               |
+|`--ram`                    | Display RAM information                            | No       | -             | `--ram`              |
+|`--storage`                | Display Storage information                        | No       | -             | `--storage`          |
+|`--gpu`                    | Display GPU information                            | No       | -             | `--gpu`              |
+|`--power`                  | Display Power information                          | No       | -             | `--power`            |
+|`--monitors`               | Display Monitors information                       | No       | -             | `--monitors`         |
+
+## `cortex hardware activate`
+
+::info
+This CLI command calls the following API endpoint:
+- [List Model](/api-reference#tag/hardware/post/v1/hardware/activate)
+:::
+This command activates the Cortex's hardware, currently support only GPUs.
+
+**Usage**:
+<Tabs>
+  <TabItem value="MacOs/Linux" label="MacOs/Linux">
+  ```sh
+  cortex hardware activate [options]
+  ```
+  </TabItem>
+  <TabItem value="Windows" label="Windows">
+  ```sh
+  cortex.exe hardware activate [options]
+  ```
+  </TabItem>
+</Tabs>
+
+For example, it returns the following:
+```bash
+Activated GPUs: 0
+```
+
+**Options**:
+
+| Option                    | Description                                        | Required | Default value | Example              |
+|---------------------------|----------------------------------------------------|----------|---------------|----------------------|
+| `-h`, `--help`            | Display help for command.                          | No       | -             | `-h`                 |
+|`--gpus`                   | List of GPUs to activate                           | Yes      | -             | `[0, 1]`             |
diff --git a/docs/docs/cli/models/index.mdx b/docs/docs/cli/models/index.mdx
index 0445a9ba5..5b29069a6 100644
--- a/docs/docs/cli/models/index.mdx
+++ b/docs/docs/cli/models/index.mdx
@@ -157,6 +157,7 @@ This command uses a `model_id` from the model that you have downloaded or availa
 | Option                    | Description                                                               | Required | Default value                                | Example                |
 |---------------------------|---------------------------------------------------------------------------|----------|----------------------------------------------|------------------------|
 | `model_id`                | The identifier of the model you want to start.                            | Yes       | `Prompt to select from the available models` | `mistral`       |
+| `--gpus`                  | List of GPUs to use.                                                      | No       | -                                            | `[0,1]`           |
 | `-h`, `--help`            | Display help information for the command.                                 | No       | -                                            | `-h`               |
 
 ## `cortex models stop`
diff --git a/docs/docs/cli/models/start.md b/docs/docs/cli/models/start.md
index 892ea01ed..77addd0b4 100644
--- a/docs/docs/cli/models/start.md
+++ b/docs/docs/cli/models/start.md
@@ -12,16 +12,12 @@ description: Cortex models subcommands.
 This command starts a model defined by a `model_id`.
 
 
-
 ## Usage
 
 ```bash
 # Start a model
 cortex models start [model_id]
 
-# Start a model with a preset
-cortex models start [model_id] [options]
-
 # Start with a specified engine
 cortex models start [model_id]:[engine] [options]
 ```
@@ -29,17 +25,15 @@ cortex models start [model_id]:[engine] [options]
 
 :::info
 - This command uses a `model_id` from the model that you have downloaded or available in your file system.
-- Model preset is applied only at the start of the model and does not change during the chat session.
 :::
 
 ## Options
 
-| Option                    | Description                                                               | Required | Default value                                | Example                |
-|---------------------------|---------------------------------------------------------------------------|----------|----------------------------------------------|------------------------|
-| `model_id`                | The identifier of the model you want to start.                            | No       | `Prompt to select from the available models` | `mistral`       |
-| `-a`, `--attach`          | Attach to an interactive chat session.                                    | No       | `false`                                      | `-a`             |
-| `-p`, `--preset <preset>` | Apply a chat preset to the chat session.                                  | No       | `false`                                      | `-p friendly`    |
-| `-h`, `--help`            | Display help information for the command.                                 | No       | -                                            | `-h`               |
+| Option                    | Description                                              | Required | Default value                                | Example           |
+|---------------------------|----------------------------------------------------------|----------|----------------------------------------------|-------------------|
+| `model_id`                | The identifier of the model you want to start.           | No       | `Prompt to select from the available models` | `mistral`         |
+| `--gpus`                  | List of GPUs to use.                                     | No       | -                                            | `[0,1]`           |
+| `-h`, `--help`            | Display help information for the command.                | No       | -                                            | `-h`              |
 
 
 
diff --git a/docs/docs/cli/run.mdx b/docs/docs/cli/run.mdx
index b0b9143ad..bbce017f1 100644
--- a/docs/docs/cli/run.mdx
+++ b/docs/docs/cli/run.mdx
@@ -37,5 +37,6 @@ You can use the `--verbose` flag to display more detailed output of the internal
 | Option                      | Description                                                                 | Required | Default value                                | Example                |
 |-----------------------------|-----------------------------------------------------------------------------|----------|----------------------------------------------|------------------------|
 | `model_id`                  | The identifier of the model you want to chat with.                          | Yes       | - | `mistral`       |
+| `--gpus`                   | List of GPUs to use.                                                         | No       | -                                            | `[0,1]`           |
 | `-h`, `--help`              | Display help information for the command.                                   | No       | -                                            | `-h`               |
 <!-- | `-t`, `--thread <thread_id>`  | Specify the Thread ID. Defaults to creating a new thread if none specified. | No       | -                                            | `-t jan_1717650808`       |                                      | `-c`               | -->
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index 8577b9641..fdb5c4ed2 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -205,11 +205,11 @@
                     "oneOf": [
                       {
                         "type": "string",
-                        "description":"The string that will be turned into an embedding."
+                        "description": "The string that will be turned into an embedding."
                       },
                       {
                         "type": "array",
-                        "description" : "The array of strings that will be turned into an embedding.",
+                        "description": "The array of strings that will be turned into an embedding.",
                         "items": {
                           "type": "string"
                         }
@@ -219,12 +219,11 @@
                         "description": "The array of integers that will be turned into an embedding.",
                         "items": {
                           "type": "integer"
-                          
                         }
                       },
                       {
                         "type": "array",
-                        "description" : "The array of arrays containing integers that will be turned into an embedding.",
+                        "description": "The array of arrays containing integers that will be turned into an embedding.",
                         "items": {
                           "type": "array",
                           "items": {
@@ -1764,6 +1763,134 @@
         ]
       }
     },
+    "/v1/hardware": {
+      "get": {
+        "summary": "Get hardware information",
+        "description": "Retrieves detailed information about the system's hardware configuration, including CPU, GPU(s), operating system, power status, RAM, and storage.",
+        "responses": {
+          "200": {
+            "description": "Hardware information retrieved successfully",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "cpu": {
+                      "$ref": "#/components/schemas/CPUDto"
+                    },
+                    "gpus": {
+                      "type": "array",
+                      "items": {
+                        "$ref": "#/components/schemas/GPUDto"
+                      }
+                    },
+                    "os": {
+                      "$ref": "#/components/schemas/OperatingSystemDto"
+                    },
+                    "power": {
+                      "$ref": "#/components/schemas/PowerDto"
+                    },
+                    "ram": {
+                      "$ref": "#/components/schemas/RAMDto"
+                    },
+                    "storage": {
+                      "$ref": "#/components/schemas/StorageDto"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": [
+          "Hardware"
+        ]
+      }
+    },
+    "/v1/hardware/activate": {
+      "post": {
+        "summary": "Activate GPUs",
+        "description": "Activates the specified GPUs based on their indices provided in the request body.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "gpus": {
+                    "type": "array",
+                    "items": {
+                      "type": "integer"
+                    },
+                    "example": [
+                      0,
+                      1,
+                      2
+                    ],
+                    "description": "An array of GPU indices to activate."
+                  }
+                },
+                "required": [
+                  "gpus"
+                ]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "The hardware configuration has been activated.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "The hardware configuration has been activated.",
+                      "description": "Confirmation message indicating successful activation."
+                    },
+                    "activated_gpus": {
+                      "type": "array",
+                      "items": {
+                        "type": "integer"
+                      },
+                      "example": [
+                        0,
+                        1,
+                        2
+                      ],
+                      "description": "List of GPU indices that were activated."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad Request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Invalid GPU index provided",
+                      "description": "Error message indicating what went wrong."
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": [
+          "Hardware"
+        ]
+      }
+    },
     "/v1/configs": {
       "get": {
         "summary": "Get Configurations",
@@ -1927,6 +2054,10 @@
       "name": "Engines",
       "description": "Endpoints for managing the available engines within Cortex."
     },
+    {
+      "name": "Hardware",
+      "description": "Endpoints for managing the available hardware within Cortex."
+    },
     {
       "name": "System",
       "description": "Endpoints for stopping the Cortex API server, checking its status, and fetching system events."
@@ -1939,6 +2070,7 @@
         "Chat",
         "Embeddings",
         "Engines",
+        "Hardware",
         "Events",
         "Pulling Models",
         "Running Models",
@@ -4773,6 +4905,217 @@
           "object",
           "deleted"
         ]
+      },
+      "CPUDto": {
+        "type": "object",
+        "properties": {
+          "arch": {
+            "type": "string",
+            "example": "amd64",
+            "description": "The architecture of the CPU."
+          },
+          "cores": {
+            "type": "integer",
+            "example": 8,
+            "description": "The number of CPU cores available."
+          },
+          "instructions": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "example": [
+              "fpu",
+              "mmx",
+              "sse",
+              "sse2",
+              "sse3",
+              "ssse3",
+              "sse4_1",
+              "sse4_2",
+              "pclmulqdq",
+              "avx",
+              "avx2",
+              "aes",
+              "f16c"
+            ],
+            "description": "A list of supported CPU instruction sets."
+          },
+          "model": {
+            "type": "string",
+            "example": "AMD Ryzen Threadripper PRO 5955WX 16-Cores",
+            "description": "The model name of the CPU."
+          }
+        },
+        "required": [
+          "arch",
+          "cores",
+          "instructions",
+          "model"
+        ]
+      },
+      "GPUDto": {
+        "type": "object",
+        "properties": {
+          "activated": {
+            "type": "boolean",
+            "example": true,
+            "description": "Indicates if the GPU is currently activated."
+          },
+          "additional_information": {
+            "type": "object",
+            "properties": {
+              "compute_cap": {
+                "type": "string",
+                "example": "8.6",
+                "description": "The compute capability of the GPU."
+              },
+              "driver_version": {
+                "type": "string",
+                "example": "535.183",
+                "description": "The version of the installed driver."
+              }
+            },
+            "required": [
+              "compute_cap",
+              "driver_version"
+            ]
+          },
+          "free_vram": {
+            "type": "integer",
+            "example": 23983,
+            "description": "The amount of free VRAM in MB."
+          },
+          "id": {
+            "type": "string",
+            "example": "0",
+            "description": "Unique identifier for the GPU."
+          },
+          "name": {
+            "type": "string",
+            "example": "NVIDIA GeForce RTX 3090",
+            "description": "The name of the GPU model."
+          },
+          "total_vram": {
+            "type": "integer",
+            "example": 24576,
+            "description": "The total VRAM available in MB."
+          },
+          "uuid": {
+            "type": "string",
+            "example": "GPU-5206045b-2a1c-1e7d-6c60-d7c367d02376",
+            "description": "The universally unique identifier for the GPU."
+          },
+          "version": {
+            "type": "string",
+            "example": "12.2",
+            "description": "The version of the GPU."
+          }
+        },
+        "required": [
+          "activated",
+          "additional_information",
+          "free_vram",
+          "id",
+          "name",
+          "total_vram",
+          "uuid",
+          "version"
+        ]
+      },
+      "OperatingSystemDto": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "example": "Ubuntu 24.04.1 LTS",
+            "description": "The name of the operating system."
+          },
+          "version": {
+            "type": "string",
+            "example": "24.04.1 LTS (Noble Numbat)",
+            "description": "The version of the operating system."
+          }
+        },
+        "required": [
+          "name",
+          "version"
+        ]
+      },
+      "PowerDto": {
+        "type": "object",
+        "properties": {
+          "battery_life": {
+            "type": "integer",
+            "example": 0,
+            "description": "The percentage of battery life remaining."
+          },
+          "charging_status": {
+            "type": "string",
+            "example": "",
+            "description": "The charging status of the device."
+          },
+          "is_power_saving": {
+            "type": "boolean",
+            "example": false,
+            "description": "Indicates if the power-saving mode is enabled."
+          }
+        },
+        "required": [
+          "battery_life",
+          "charging_status",
+          "is_power_saving"
+        ]
+      },
+      "RAMDto": {
+        "type": "object",
+        "properties": {
+          "available": {
+            "type": "integer",
+            "example": 11100,
+            "description": "The amount of available RAM in MB."
+          },
+          "total": {
+            "type": "integer",
+            "example": 15991,
+            "description": "The total RAM in MB."
+          },
+          "type": {
+            "type": "string",
+            "example": "",
+            "description": "The type of RAM."
+          }
+        },
+        "required": [
+          "available",
+          "total",
+          "type"
+        ]
+      },
+      "Storage": {
+        "type": "object",
+        "properties": {
+          "available": {
+            "type": "integer",
+            "example": 0,
+            "description": "The amount of available storage in MB."
+          },
+          "total": {
+            "type": "integer",
+            "example": 0,
+            "description": "The total storage in MB."
+          },
+          "type": {
+            "type": "string",
+            "example": "",
+            "description": "The type of storage."
+          }
+        },
+        "required": [
+          "available",
+          "total",
+          "type"
+        ]
       }
     }
   }
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index dc4ce8807..92e07ec91 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -73,6 +73,7 @@ find_package(LibArchive REQUIRED)
 find_package(CURL REQUIRED)
 find_package(SQLiteCpp REQUIRED)
 find_package(eventpp CONFIG REQUIRED)
+find_package(lfreist-hwinfo CONFIG REQUIRED)
 
 ## Generating openapi json
 file(READ "${CMAKE_CURRENT_SOURCE_DIR}/../docs/static/openapi/cortex.json" JSON_CONTENT)
@@ -150,7 +151,8 @@ target_link_libraries(${TARGET_NAME} PRIVATE JsonCpp::JsonCpp Drogon::Drogon Ope
   ${CMAKE_THREAD_LIBS_INIT})
 target_link_libraries(${TARGET_NAME} PRIVATE SQLiteCpp)
 target_link_libraries(${TARGET_NAME} PRIVATE eventpp::eventpp)
-
+target_link_libraries(${TARGET_NAME} PRIVATE lfreist-hwinfo::hwinfo)
+ 
 # ##############################################################################
 
 if(CMAKE_CXX_STANDARD LESS 17)
diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt
index 0e25a4873..758a51dc8 100644
--- a/engine/cli/CMakeLists.txt
+++ b/engine/cli/CMakeLists.txt
@@ -66,6 +66,7 @@ find_package(CURL REQUIRED)
 find_package(SQLiteCpp REQUIRED)
 find_package(Trantor CONFIG REQUIRED)
 find_package(indicators CONFIG REQUIRED)
+find_package(lfreist-hwinfo CONFIG REQUIRED)
 
 
 add_executable(${TARGET_NAME} main.cc
@@ -76,6 +77,7 @@ add_executable(${TARGET_NAME} main.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../services/engine_service.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../services/model_service.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/../services/inference_service.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc
   )
@@ -91,6 +93,7 @@ target_link_libraries(${TARGET_NAME} PRIVATE JsonCpp::JsonCpp OpenSSL::SSL OpenS
 target_link_libraries(${TARGET_NAME} PRIVATE SQLiteCpp)
 target_link_libraries(${TARGET_NAME} PRIVATE Trantor::Trantor)
 target_link_libraries(${TARGET_NAME} PRIVATE indicators::indicators)
+target_link_libraries(${TARGET_NAME} PRIVATE lfreist-hwinfo::hwinfo)
 
 # ##############################################################################
 
diff --git a/engine/cli/command_line_parser.cc b/engine/cli/command_line_parser.cc
index 2f5f1c917..d4c1ef793 100644
--- a/engine/cli/command_line_parser.cc
+++ b/engine/cli/command_line_parser.cc
@@ -12,6 +12,7 @@
 #include "commands/engine_uninstall_cmd.h"
 #include "commands/engine_update_cmd.h"
 #include "commands/engine_use_cmd.h"
+#include "commands/hardware_activate_cmd.h"
 #include "commands/model_del_cmd.h"
 #include "commands/model_get_cmd.h"
 #include "commands/model_import_cmd.h"
@@ -33,6 +34,7 @@ constexpr const auto kCommonCommandsGroup = "Common Commands";
 constexpr const auto kInferenceGroup = "Inference";
 constexpr const auto kModelsGroup = "Models";
 constexpr const auto kEngineGroup = "Engines";
+constexpr const auto kHardwareGroup = "Hardware";
 constexpr const auto kSystemGroup = "Server";
 constexpr const auto kConfigGroup = "Configurations";
 constexpr const auto kSubcommands = "Subcommands";
@@ -59,6 +61,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
 
   SetupEngineCommands();
 
+  SetupHardwareCommands();
+
   SetupSystemCommands();
 
   SetupConfigsCommands();
@@ -157,6 +161,8 @@ void CommandLineParser::SetupCommonCommands() {
   run_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
                  " run [options] [model_id]");
   run_cmd->add_option("model_id", cml_data_.model_id, "");
+  run_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
+                      "List of GPU to activate, for example [0, 1]");
   run_cmd->add_flag("-d,--detach", cml_data_.run_detach, "Detached mode");
   run_cmd->callback([this, run_cmd] {
     if (std::exchange(executed_, true))
@@ -164,7 +170,7 @@ void CommandLineParser::SetupCommonCommands() {
     commands::RunCmd rc(cml_data_.config.apiServerHost,
                         std::stoi(cml_data_.config.apiServerPort),
                         cml_data_.model_id, download_service_);
-    rc.Exec(cml_data_.run_detach);
+    rc.Exec(cml_data_.run_detach, hw_activate_opts_);
   });
 }
 
@@ -195,6 +201,8 @@ void CommandLineParser::SetupModelCommands() {
   model_start_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
                          " models start [model_id]");
   model_start_cmd->add_option("model_id", cml_data_.model_id, "");
+  model_start_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
+                              "List of GPU to activate, for example [0, 1]");
   model_start_cmd->group(kSubcommands);
   model_start_cmd->callback([this, model_start_cmd]() {
     if (std::exchange(executed_, true))
@@ -206,7 +214,8 @@ void CommandLineParser::SetupModelCommands() {
     };
     commands::ModelStartCmd(model_service_)
         .Exec(cml_data_.config.apiServerHost,
-              std::stoi(cml_data_.config.apiServerPort), cml_data_.model_id);
+              std::stoi(cml_data_.config.apiServerPort), cml_data_.model_id,
+              hw_activate_opts_);
   });
 
   auto stop_model_cmd =
@@ -468,6 +477,77 @@ void CommandLineParser::SetupEngineCommands() {
   EngineGet(engines_cmd);
 }
 
+void CommandLineParser::SetupHardwareCommands() {
+  // Hardware group commands
+  auto hw_cmd =
+      app_.add_subcommand("hardware", "Subcommands for managing hardware");
+  hw_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                " hardware [options] [subcommand]");
+  hw_cmd->group(kHardwareGroup);
+
+  hw_cmd->callback([this, hw_cmd] {
+    if (std::exchange(executed_, true))
+      return;
+    if (hw_cmd->get_subcommands().empty()) {
+      CLI_LOG(hw_cmd->help());
+    }
+  });
+
+  auto hw_list_cmd =
+      hw_cmd->add_subcommand("list", "List all hardware information");
+
+  hw_list_cmd->add_flag("--cpu", hw_opts_.show_cpu, "Display CPU information");
+  hw_list_cmd->add_flag("--os", hw_opts_.show_os, "Display OS information");
+  hw_list_cmd->add_flag("--ram", hw_opts_.show_ram, "Display RAM information");
+  hw_list_cmd->add_flag("--storage", hw_opts_.show_storage,
+                        "Display Storage information");
+  hw_list_cmd->add_flag("--gpu", hw_opts_.show_gpu, "Display GPU information");
+  hw_list_cmd->add_flag("--power", hw_opts_.show_power,
+                        "Display Power information");
+  hw_list_cmd->add_flag("--monitors", hw_opts_.show_monitors,
+                        "Display Monitors information");
+
+  hw_list_cmd->group(kSubcommands);
+  hw_list_cmd->callback([this]() {
+    if (std::exchange(executed_, true))
+      return;
+    if (hw_opts_.has_flag()) {
+      commands::HardwareListCmd().Exec(
+          cml_data_.config.apiServerHost,
+          std::stoi(cml_data_.config.apiServerPort), hw_opts_);
+    } else {
+      commands::HardwareListCmd().Exec(
+          cml_data_.config.apiServerHost,
+          std::stoi(cml_data_.config.apiServerPort), std::nullopt);
+    }
+  });
+
+  auto hw_activate_cmd =
+      hw_cmd->add_subcommand("activate", "Activate hardware");
+  hw_activate_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                         " hardware activate --gpus [list_gpu]");
+  hw_activate_cmd->group(kSubcommands);
+  hw_activate_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
+                              "List of GPU to activate, for example [0, 1]");
+  hw_activate_cmd->callback([this, hw_activate_cmd]() {
+    if (std::exchange(executed_, true))
+      return;
+    if (hw_activate_cmd->get_options().empty()) {
+      CLI_LOG(hw_activate_cmd->help());
+      return;
+    }
+
+    if (hw_activate_opts_["gpus"].empty()) {
+      CLI_LOG("[list_gpu] is required\n");
+      CLI_LOG(hw_activate_cmd->help());
+      return;
+    }
+    commands::HardwareActivateCmd().Exec(
+        cml_data_.config.apiServerHost,
+        std::stoi(cml_data_.config.apiServerPort), hw_activate_opts_);
+  });
+}
+
 void CommandLineParser::SetupSystemCommands() {
   auto start_cmd = app_.add_subcommand("start", "Start the API server");
   start_cmd->group(kSystemGroup);
diff --git a/engine/cli/command_line_parser.h b/engine/cli/command_line_parser.h
index e683039af..a6c8bcd62 100644
--- a/engine/cli/command_line_parser.h
+++ b/engine/cli/command_line_parser.h
@@ -6,6 +6,8 @@
 #include "services/engine_service.h"
 #include "services/model_service.h"
 #include "utils/config_yaml_utils.h"
+#include "commands/hardware_list_cmd.h"
+#include "common/hardware_config.h"
 
 class CommandLineParser {
  public:
@@ -21,6 +23,8 @@ class CommandLineParser {
 
   void SetupEngineCommands();
 
+  void SetupHardwareCommands();
+
   void SetupSystemCommands();
 
   void SetupConfigsCommands();
@@ -70,4 +74,6 @@ class CommandLineParser {
   CmlData cml_data_;
   std::unordered_map<std::string, std::string> config_update_opts_;
   bool executed_ = false;
+  commands::HarwareOptions hw_opts_;
+  std::unordered_map<std::string, std::string> hw_activate_opts_;
 };
diff --git a/engine/cli/commands/chat_cmd.cc b/engine/cli/commands/chat_cmd.cc
deleted file mode 100644
index d0f6cd8ee..000000000
--- a/engine/cli/commands/chat_cmd.cc
+++ /dev/null
@@ -1,11 +0,0 @@
-#include "chat_cmd.h"
-#include "run_cmd.h"
-
-namespace commands {
-void ChatCmd::Exec(const std::string& host, int port,
-                   const std::string& model_handle,
-                   std::shared_ptr<DownloadService> download_service) {
-  RunCmd rc(host, port, model_handle, download_service);
-  rc.Exec(false /*detach mode*/);
-}
-};  // namespace commands
diff --git a/engine/cli/commands/chat_cmd.h b/engine/cli/commands/chat_cmd.h
deleted file mode 100644
index 597a0d752..000000000
--- a/engine/cli/commands/chat_cmd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-#include <string>
-#include "services/download_service.h"
-
-namespace commands {
-class ChatCmd {
- public:
-  void Exec(const std::string& host, int port, const std::string& model_handle,
-            std::shared_ptr<DownloadService> download_service);
-};
-}  // namespace commands
diff --git a/engine/cli/commands/cortex_upd_cmd.cc b/engine/cli/commands/cortex_upd_cmd.cc
index b76d48787..30d1ed3e2 100644
--- a/engine/cli/commands/cortex_upd_cmd.cc
+++ b/engine/cli/commands/cortex_upd_cmd.cc
@@ -355,7 +355,7 @@ bool CortexUpdCmd::GetStable(const std::string& v) {
 
   auto executable_path = file_manager_utils::GetExecutableFolderContainerPath();
   auto dst = executable_path / GetCortexBinary();
-  utils::ScopeExit se([]() {
+  cortex::utils::ScopeExit se([]() {
     auto cortex_tmp = std::filesystem::temp_directory_path() / "cortex";
     try {
       auto n = std::filesystem::remove_all(cortex_tmp);
@@ -423,7 +423,7 @@ bool CortexUpdCmd::GetBeta(const std::string& v) {
 
   auto executable_path = file_manager_utils::GetExecutableFolderContainerPath();
   auto dst = executable_path / GetCortexBinary();
-  utils::ScopeExit se([]() {
+  cortex::utils::ScopeExit se([]() {
     auto cortex_tmp = std::filesystem::temp_directory_path() / "cortex";
     try {
       auto n = std::filesystem::remove_all(cortex_tmp);
@@ -556,7 +556,7 @@ bool CortexUpdCmd::GetNightly(const std::string& v) {
 
   auto executable_path = file_manager_utils::GetExecutableFolderContainerPath();
   auto dst = executable_path / GetCortexBinary();
-  utils::ScopeExit se([]() {
+  cortex::utils::ScopeExit se([]() {
     auto cortex_tmp = std::filesystem::temp_directory_path() / "cortex";
     try {
       auto n = std::filesystem::remove_all(cortex_tmp);
diff --git a/engine/cli/commands/hardware_activate_cmd.cc b/engine/cli/commands/hardware_activate_cmd.cc
new file mode 100644
index 000000000..a0f34e4b7
--- /dev/null
+++ b/engine/cli/commands/hardware_activate_cmd.cc
@@ -0,0 +1,86 @@
+#include "hardware_activate_cmd.h"
+#include "server_start_cmd.h"
+#include "utils/json_helper.h"
+#include "utils/logging_utils.h"
+
+namespace commands {
+namespace {
+std::vector<int> ParseStringToVector(const std::string& str) {
+  // [0, 1, 2, 3]
+  std::string cleaned_str =
+      std::regex_replace(str, std::regex(R"([\[\]\s])"), "");
+
+  // Prepare to parse the cleaned string
+  std::vector<int> result;
+  std::stringstream ss(cleaned_str);
+  std::string number;
+
+  // Use getline to split by comma
+  while (std::getline(ss, number, ',')) {
+    result.push_back(std::stoi(number));
+  }
+
+  return result;
+}
+}  // namespace
+
+bool HardwareActivateCmd::Exec(
+    const std::string& host, int port,
+    const std::unordered_map<std::string, std::string>& options) {
+  // Start server if server is not started yet
+  if (!commands::IsServerAlive(host, port)) {
+    CLI_LOG("Starting server ...");
+    commands::ServerStartCmd ssc;
+    if (!ssc.Exec(host, port)) {
+      return false;
+    }
+  }
+
+  // TODO(sang) should use curl but it does not work (?)
+  Json::Value body;
+  Json::Value gpus_json = Json::arrayValue;
+  std::vector<int> gpus;
+  for (auto const& [key, value] : options) {
+    if (key == "gpus") {
+      gpus = ParseStringToVector(value);
+    }
+  }
+  for (auto g : gpus) {
+    gpus_json.append(g);
+  }
+  body["gpus"] = gpus_json;
+  auto data_str = body.toStyledString();
+
+  httplib::Client cli(host + ":" + std::to_string(port));
+
+  auto res = cli.Post("/v1/hardware/activate", httplib::Headers(),
+                      data_str.data(), data_str.size(), "application/json");
+  if (res) {
+    if (res->status == httplib::StatusCode::OK_200) {
+      auto root = json_helper::ParseJsonString(res->body);
+      if (!root["warning"].isNull()) {
+        CLI_LOG(root["warning"].asString());
+      }
+      if(body["gpus"].empty()) {
+        CLI_LOG("Deactivated all GPUs!");        
+      } else {
+        std::string gpus_str;
+        for(auto i: gpus) {
+            gpus_str += " " + std::to_string(i);
+        }
+        CLI_LOG("Activated GPUs:" << gpus_str);
+      }
+      return true;
+    } else {
+      auto root = json_helper::ParseJsonString(res->body);
+      CLI_LOG(root["message"].asString());
+      return false;
+    }
+  } else {
+    auto err = res.error();
+    CTL_ERR("HTTP error: " << httplib::to_string(err));
+    return false;
+  }
+  return true;
+}
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/cli/commands/hardware_activate_cmd.h b/engine/cli/commands/hardware_activate_cmd.h
new file mode 100644
index 000000000..eb5b68cc3
--- /dev/null
+++ b/engine/cli/commands/hardware_activate_cmd.h
@@ -0,0 +1,12 @@
+#pragma once
+#include <string>
+#include <unordered_map>
+#include "common/hardware_config.h"
+
+namespace commands {
+class HardwareActivateCmd {
+ public:
+  bool Exec(const std::string& host, int port,
+            const std::unordered_map<std::string, std::string>& options);
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/cli/commands/hardware_list_cmd.cc b/engine/cli/commands/hardware_list_cmd.cc
new file mode 100644
index 000000000..0b65bba39
--- /dev/null
+++ b/engine/cli/commands/hardware_list_cmd.cc
@@ -0,0 +1,184 @@
+#include "hardware_list_cmd.h"
+
+#include <json/reader.h>
+#include <json/value.h>
+#include <iostream>
+
+#include <vector>
+#include "httplib.h"
+#include "server_start_cmd.h"
+#include "utils/curl_utils.h"
+#include "utils/hardware/cpu_info.h"
+#include "utils/hardware/gpu_info.h"
+#include "utils/hardware/os_info.h"
+#include "utils/hardware/power_info.h"
+#include "utils/hardware/ram_info.h"
+#include "utils/hardware/storage_info.h"
+#include "utils/logging_utils.h"
+#include "utils/string_utils.h"
+// clang-format off
+#include <tabulate/table.hpp>
+// clang-format on
+
+namespace commands {
+using namespace tabulate;
+using Row_t =
+    std::vector<variant<std::string, const char*, string_view, Table>>;
+
+bool HardwareListCmd::Exec(const std::string& host, int port,
+                           const std::optional<HarwareOptions>& ho) {
+  // Start server if server is not started yet
+  if (!commands::IsServerAlive(host, port)) {
+    CLI_LOG("Starting server ...");
+    commands::ServerStartCmd ssc;
+    if (!ssc.Exec(host, port)) {
+      return false;
+    }
+  }
+
+  auto url = url_parser::Url{
+      .protocol = "http",
+      .host = host + ":" + std::to_string(port),
+      .pathParams = {"v1", "hardware"},
+  };
+  auto result = curl_utils::SimpleGetJson(url.ToFullPath());
+  if (result.has_error()) {
+    CTL_ERR(result.error());
+    return false;
+  }
+
+  if (!ho.has_value() || ho.value().show_cpu) {
+    std::cout << "CPU Information:" << std::endl;
+    Table table;
+    std::vector<std::string> column_headers{"(Index)", "Arch", "Cores", "Model",
+                                            "Instructions"};
+
+    Row_t header{column_headers.begin(), column_headers.end()};
+    table.add_row(header);
+    table.format().font_color(Color::green);
+    std::vector<std::string> row = {"1"};
+    cortex::hw::CPU cpu = cortex::hw::cpu::FromJson(result.value()["cpu"]);
+    row.emplace_back(cpu.arch);
+    row.emplace_back(std::to_string(cpu.cores));
+    row.emplace_back(cpu.model);
+    std::string insts;
+    for (auto const& i : cpu.instructions) {
+      insts += i + " ";
+    };
+    row.emplace_back(insts);
+    table.add_row({row.begin(), row.end()});
+    std::cout << table << std::endl;
+    std::cout << std::endl;
+  }
+
+  if (!ho.has_value() || ho.value().show_os) {
+    std::cout << "OS Information:" << std::endl;
+    Table table;
+    std::vector<std::string> column_headers{"(Index)", "Version", "Name"};
+
+    Row_t header{column_headers.begin(), column_headers.end()};
+    table.add_row(header);
+    table.format().font_color(Color::green);
+    std::vector<std::string> row = {"1"};
+    cortex::hw::OS os = cortex::hw::os::FromJson(result.value()["os"]);
+    row.emplace_back(os.version);
+    row.emplace_back(os.name);
+    table.add_row({row.begin(), row.end()});
+    std::cout << table << std::endl;
+    std::cout << std::endl;
+  }
+
+  if (!ho.has_value() || ho.value().show_ram) {
+    std::cout << "RAM Information:" << std::endl;
+    Table table;
+    std::vector<std::string> column_headers{"(Index)", "Total (MiB)",
+                                            "Available (MiB)"};
+
+    Row_t header{column_headers.begin(), column_headers.end()};
+    table.add_row(header);
+    table.format().font_color(Color::green);
+    std::vector<std::string> row = {"1"};
+    cortex::hw::Memory m = cortex::hw::memory::FromJson(result.value()["ram"]);
+    row.emplace_back(std::to_string(m.total_MiB));
+    row.emplace_back(std::to_string(m.available_MiB));
+    table.add_row({row.begin(), row.end()});
+    std::cout << table << std::endl;
+    std::cout << std::endl;
+  }
+
+ if (!ho.has_value() || ho.value().show_gpu) {
+    std::cout << "GPU Information:" << std::endl;
+    Table table;
+    std::vector<std::string> column_headers{
+        "(Index)",        "ID",
+        "Name",           "Version",
+        "Total (MiB)",    "Available (MiB)",
+        "Driver Version", "Compute Capability", "Activated"};
+
+    Row_t header{column_headers.begin(), column_headers.end()};
+    table.add_row(header);
+    table.format().font_color(Color::green);
+    int count = 1;
+
+    std::vector<cortex::hw::GPU> gpus =
+        cortex::hw::gpu::FromJson(result.value()["gpus"]);
+    for (auto const& gpu : gpus) {
+      std::vector<std::string> row = {std::to_string(count)};
+      row.emplace_back(gpu.id);
+      row.emplace_back(gpu.name);
+      row.emplace_back(gpu.version);
+      row.emplace_back(std::to_string(gpu.total_vram));
+      row.emplace_back(std::to_string(gpu.free_vram));
+      row.emplace_back(
+          std::get<cortex::hw::NvidiaAddInfo>(gpu.add_info).driver_version);
+      row.emplace_back(
+          std::get<cortex::hw::NvidiaAddInfo>(gpu.add_info).compute_cap);
+      row.emplace_back(gpu.is_activated ? "Yes" : "No");
+      table.add_row({row.begin(), row.end()});
+    }
+
+    std::cout << table << std::endl;
+    std::cout << std::endl;
+  }
+
+  if (!ho.has_value() || ho.value().show_storage) {
+    std::cout << "Storage Information:" << std::endl;
+    Table table;
+    std::vector<std::string> column_headers{"(Index)", "Total (GiB)",
+                                            "Available (GiB)"};
+
+    Row_t header{column_headers.begin(), column_headers.end()};
+    table.add_row(header);
+    table.format().font_color(Color::green);
+    std::vector<std::string> row = {"1"};
+    cortex::hw::StorageInfo si =
+        cortex::hw::storage::FromJson(result.value()["storage"]);
+    row.emplace_back(std::to_string(si.total));
+    row.emplace_back(std::to_string(si.available));
+    table.add_row({row.begin(), row.end()});
+    std::cout << table << std::endl;
+    std::cout << std::endl;
+  }
+
+  if (!ho.has_value() || ho.value().show_power) {
+    std::cout << "Power Information:" << std::endl;
+    Table table;
+    std::vector<std::string> column_headers{"(Index)", "Battery Life",
+                                            "Charging Status", "Power Saving"};
+
+    Row_t header{column_headers.begin(), column_headers.end()};
+    table.add_row(header);
+    table.format().font_color(Color::green);
+    std::vector<std::string> row = {"1"};
+    cortex::hw::PowerInfo pi = cortex::hw::power::FromJson(result.value()["power"]);
+    row.emplace_back(std::to_string(pi.battery_life));
+    row.emplace_back(pi.charging_status);
+    row.emplace_back(pi.is_power_saving ? "Yes" : "No");
+    table.add_row({row.begin(), row.end()});
+    std::cout << table << std::endl;
+    std::cout << std::endl;
+  }
+
+  return true;
+}
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/cli/commands/hardware_list_cmd.h b/engine/cli/commands/hardware_list_cmd.h
new file mode 100644
index 000000000..9344c729c
--- /dev/null
+++ b/engine/cli/commands/hardware_list_cmd.h
@@ -0,0 +1,26 @@
+#pragma once
+#include <optional>
+#include <string>
+
+namespace commands {
+struct HarwareOptions {
+  bool show_cpu = false;
+  bool show_os = false;
+  bool show_ram = false;
+  bool show_storage = false;
+  bool show_gpu = false;
+  bool show_power = false;
+  bool show_monitors = false;
+
+  bool has_flag() const {
+    return show_cpu || show_os || show_ram || show_storage || show_gpu ||
+           show_power || show_monitors;
+  }
+};
+
+class HardwareListCmd {
+ public:
+  bool Exec(const std::string& host, int port,
+            const std::optional<HarwareOptions>& ho);
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/cli/commands/model_pull_cmd.cc b/engine/cli/commands/model_pull_cmd.cc
index a4bf68bea..5793c2e09 100644
--- a/engine/cli/commands/model_pull_cmd.cc
+++ b/engine/cli/commands/model_pull_cmd.cc
@@ -127,7 +127,7 @@ std::optional<std::string> ModelPullCmd::Exec(const std::string& host, int port,
     dp.ForceStop();
   };
 
-  utils::ScopeExit se([]() { shutdown_handler = {}; });
+  cortex::utils::ScopeExit se([]() { shutdown_handler = {}; });
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
   struct sigaction sigint_action;
   sigint_action.sa_handler = signal_handler;
diff --git a/engine/cli/commands/model_start_cmd.cc b/engine/cli/commands/model_start_cmd.cc
index cc8f19edc..9b2f9d4b3 100644
--- a/engine/cli/commands/model_start_cmd.cc
+++ b/engine/cli/commands/model_start_cmd.cc
@@ -1,5 +1,6 @@
 #include "model_start_cmd.h"
 #include "cortex_upd_cmd.h"
+#include "hardware_activate_cmd.h"
 #include "httplib.h"
 #include "run_cmd.h"
 #include "server_start_cmd.h"
@@ -8,9 +9,10 @@
 #include "utils/logging_utils.h"
 
 namespace commands {
-bool ModelStartCmd::Exec(const std::string& host, int port,
-                         const std::string& model_handle,
-                         bool print_success_log) {
+bool ModelStartCmd::Exec(
+    const std::string& host, int port, const std::string& model_handle,
+    const std::unordered_map<std::string, std::string>& options,
+    bool print_success_log) {
   std::optional<std::string> model_id =
       SelectLocalModel(host, port, model_service_, model_handle);
 
@@ -26,6 +28,28 @@ bool ModelStartCmd::Exec(const std::string& host, int port,
       return false;
     }
   }
+
+  //
+  bool should_activate_hw = false;
+  for (auto const& [_, v] : options) {
+    if (!v.empty()) {
+      should_activate_hw = true;
+      break;
+    }
+  }
+  if (should_activate_hw) {
+    if (!HardwareActivateCmd().Exec(host, port, options)) {
+      return false;
+    }
+    // wait for server up, max for 3 seconds
+    int count = 6;
+    while (count--) {
+      std::this_thread::sleep_for(std::chrono::milliseconds(500));
+      if (commands::IsServerAlive(host, port))
+        break;      
+    }
+  }
+
   // Call API to start model
   httplib::Client cli(host + ":" + std::to_string(port));
   Json::Value json_data;
@@ -42,6 +66,10 @@ bool ModelStartCmd::Exec(const std::string& host, int port,
                 << commands::GetCortexBinary() << " run " << *model_id
                 << "` for interactive chat shell");
       }
+      auto root = json_helper::ParseJsonString(res->body);
+      if (!root["warning"].isNull()) {
+        CLI_LOG(root["warning"].asString());
+      }
       return true;
     } else {
       auto root = json_helper::ParseJsonString(res->body);
@@ -50,7 +78,7 @@ bool ModelStartCmd::Exec(const std::string& host, int port,
     }
   } else {
     auto err = res.error();
-    CTL_ERR("HTTP error: " << httplib::to_string(err));
+    CLI_LOG("HTTP error: " << httplib::to_string(err));
     return false;
   }
 }
diff --git a/engine/cli/commands/model_start_cmd.h b/engine/cli/commands/model_start_cmd.h
index ffd63d611..652d37994 100644
--- a/engine/cli/commands/model_start_cmd.h
+++ b/engine/cli/commands/model_start_cmd.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <string>
+#include <unordered_map>
 #include "services/model_service.h"
 
 namespace commands {
@@ -10,6 +11,7 @@ class ModelStartCmd {
       : model_service_{model_service} {};
 
   bool Exec(const std::string& host, int port, const std::string& model_handle,
+            const std::unordered_map<std::string, std::string>& options,
             bool print_success_log = true);
 
  private:
diff --git a/engine/cli/commands/model_stop_cmd.cc b/engine/cli/commands/model_stop_cmd.cc
index 06a6acbaf..9a14b0876 100644
--- a/engine/cli/commands/model_stop_cmd.cc
+++ b/engine/cli/commands/model_stop_cmd.cc
@@ -17,11 +17,13 @@ void ModelStopCmd::Exec(const std::string& host, int port,
     if (res->status == httplib::StatusCode::OK_200) {
       CLI_LOG("Model unloaded!");
     } else {
-      CTL_ERR("Model failed to unload with status code: " << res->status);
+      auto root = json_helper::ParseJsonString(res->body);
+      CLI_LOG(root["message"].asString());
+      return;
     }
   } else {
     auto err = res.error();
-    CTL_ERR("HTTP error: " << httplib::to_string(err));
+    CLI_LOG("HTTP error: " << httplib::to_string(err));
   }
 }
 
diff --git a/engine/cli/commands/run_cmd.cc b/engine/cli/commands/run_cmd.cc
index 174255db3..279128552 100644
--- a/engine/cli/commands/run_cmd.cc
+++ b/engine/cli/commands/run_cmd.cc
@@ -67,7 +67,8 @@ std::optional<std::string> SelectLocalModel(std::string host, int port,
   return model_id;
 }
 
-void RunCmd::Exec(bool run_detach) {
+void RunCmd::Exec(bool run_detach,
+                  const std::unordered_map<std::string, std::string>& options) {
   std::optional<std::string> model_id =
       SelectLocalModel(host_, port_, model_service_, model_handle_);
   if (!model_id.has_value()) {
@@ -129,9 +130,9 @@ void RunCmd::Exec(bool run_detach) {
             !commands::ModelStatusCmd(model_service_)
                  .IsLoaded(host_, port_, *model_id)) {
 
-          auto res =
-              commands::ModelStartCmd(model_service_)
-                  .Exec(host_, port_, *model_id, false /*print_success_log*/);
+          auto res = commands::ModelStartCmd(model_service_)
+                         .Exec(host_, port_, *model_id, options,
+                               false /*print_success_log*/);
           if (!res) {
             CLI_LOG("Error: Failed to start model");
             return;
diff --git a/engine/cli/commands/run_cmd.h b/engine/cli/commands/run_cmd.h
index 46a687fce..6e524c6b1 100644
--- a/engine/cli/commands/run_cmd.h
+++ b/engine/cli/commands/run_cmd.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <string>
+#include <unordered_map>
 #include "services/engine_service.h"
 #include "services/model_service.h"
 
@@ -21,7 +22,8 @@ class RunCmd {
         engine_service_{EngineService(download_service)},
         model_service_{ModelService(download_service)} {};
 
-  void Exec(bool chat_flag);
+  void Exec(bool chat_flag,
+            const std::unordered_map<std::string, std::string>& options);
 
  private:
   std::string host_;
diff --git a/engine/common/engine_servicei.h b/engine/common/engine_servicei.h
new file mode 100644
index 000000000..fb81839fc
--- /dev/null
+++ b/engine/common/engine_servicei.h
@@ -0,0 +1,57 @@
+#pragma once
+#include <string>
+#include <vector>
+#include "json/json.h"
+#include "utils/result.hpp"
+
+// TODO: namh think of the other name
+struct DefaultEngineVariant {
+  std::string engine;
+  std::string version;
+  std::string variant;
+
+  Json::Value ToJson() const {
+    Json::Value root;
+    root["engine"] = engine;
+    root["version"] = version;
+    root["variant"] = variant;
+    return root;
+  }
+};
+
+// TODO: namh think of the other name
+struct EngineVariantResponse {
+  std::string name;
+  std::string version;
+  std::string engine;
+
+  Json::Value ToJson() const {
+    Json::Value root;
+    root["name"] = name;
+    root["version"] = version;
+    root["engine"] = engine;
+    return root;
+  }
+};
+
+class EngineServiceI {
+ public:
+  virtual ~EngineServiceI() {}
+  
+  virtual cpp::result<DefaultEngineVariant, std::string>
+  SetDefaultEngineVariant(const std::string& engine, const std::string& version,
+                          const std::string& variant) = 0;
+
+virtual cpp::result<DefaultEngineVariant, std::string>
+  GetDefaultEngineVariant(const std::string& engine) = 0;
+
+  virtual cpp::result<std::vector<EngineVariantResponse>, std::string>
+  GetInstalledEngineVariants(const std::string& engine) const = 0;
+
+  virtual cpp::result<void, std::string> LoadEngine(
+      const std::string& engine_name) = 0;
+
+  virtual cpp::result<void, std::string> UnloadEngine(
+      const std::string& engine_name) = 0;
+
+};
\ No newline at end of file
diff --git a/engine/common/hardware_common.h b/engine/common/hardware_common.h
new file mode 100644
index 000000000..444a5c02c
--- /dev/null
+++ b/engine/common/hardware_common.h
@@ -0,0 +1,217 @@
+#pragma once
+#include <json/json.h>
+#include <string>
+#include <variant>
+#include <vector>
+#include <assert.h>
+
+namespace cortex::hw {
+
+namespace {
+inline constexpr std::string_view GetArch() {
+#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || \
+    defined(__amd64) || defined(__x86_64) || defined(_M_AMD64)
+  return "amd64";
+#elif defined(__arm__) || defined(__arm) || defined(__arm64__) || \
+    defined(__aarch64__) || defined(__thumb__) ||                 \
+    defined(__TARGET_ARCH_ARM) || defined(__TARGET_ARCH_THUMB) || \
+    defined(_ARM) || defined(_M_ARM) || defined(_M_ARMT)
+  return "arm64";
+#else
+  return "Unsupported";
+#endif
+}
+}  // namespace
+struct CPU {
+  int cores;
+  std::string arch;
+  std::string model;
+  std::vector<std::string> instructions;
+};
+
+inline Json::Value ToJson(const CPU& cpu) {
+  Json::Value res;
+  res["arch"] = cpu.arch;
+  res["cores"] = cpu.cores;
+  res["model"] = cpu.model;
+  Json::Value insts(Json::arrayValue);
+  for (auto const& i : cpu.instructions) {
+    insts.append(i);
+  }
+  res["instructions"] = insts;
+  return res;
+}
+
+namespace cpu {
+inline CPU FromJson(const Json::Value& root) {
+  int cores = root["cores"].asInt();
+  std::string arch = root["arch"].asString();
+  std::string model = root["model"].asString();
+  std::vector<std::string> insts;
+  for (auto const& i : root["instructions"]) {
+    insts.emplace_back(i.asString());
+  }
+  return {.cores = cores, .arch = arch, .model = model, .instructions = insts};
+}
+}  // namespace cpu
+
+// This can be different depends on gpu types
+struct NvidiaAddInfo {
+  std::string driver_version;
+  std::string compute_cap;
+};
+struct AmdAddInfo {};
+using GPUAddInfo = std::variant<NvidiaAddInfo, AmdAddInfo>;
+struct GPU {
+  std::string id;
+  std::string name;
+  std::string version;
+  GPUAddInfo add_info;
+  int64_t free_vram;
+  int64_t total_vram;
+  std::string uuid;
+  bool is_activated = true;
+};
+
+inline Json::Value ToJson(const std::vector<GPU>& gpus) {
+  Json::Value res(Json::arrayValue);
+  for (size_t i = 0; i < gpus.size(); i++) {
+    Json::Value gpu;
+    gpu["id"] = std::to_string(i);
+    gpu["name"] = gpus[i].name;
+    gpu["version"] = gpus[i].version;
+    Json::Value add_info;
+    if (std::holds_alternative<NvidiaAddInfo>(gpus[i].add_info)) {
+      auto& v = std::get<NvidiaAddInfo>(gpus[i].add_info);
+      add_info["driver_version"] = v.driver_version;
+      add_info["compute_cap"] = v.compute_cap;
+    }
+    gpu["additional_information"] = add_info;
+
+    gpu["free_vram"] = gpus[i].free_vram;
+    gpu["total_vram"] = gpus[i].total_vram;
+    gpu["uuid"] = gpus[i].uuid;
+    gpu["activated"] = gpus[i].is_activated;
+    res.append(gpu);
+  }
+  return res;
+}
+
+namespace gpu {
+inline std::vector<GPU> FromJson(const Json::Value& root) {
+  assert(root.isArray());
+  std::vector<GPU> res;
+  for (auto const& gpu_json : root) {
+    GPU gpu;
+    gpu.id = gpu_json["id"].asString();
+    gpu.name = gpu_json["name"].asString();
+    gpu.version = gpu_json["version"].asString();
+    NvidiaAddInfo add_inf;
+    add_inf.driver_version =
+        gpu_json["additional_information"]["driver_version"].asString();
+    add_inf.compute_cap =
+        gpu_json["additional_information"]["compute_cap"].asString();
+    gpu.add_info = add_inf;
+    gpu.free_vram = gpu_json["free_vram"].asInt64();
+    gpu.total_vram = gpu_json["total_vram"].asInt64();
+    gpu.uuid = gpu_json["uuid"].asString();
+    gpu.is_activated = gpu_json["activated"].asBool();
+    res.emplace_back(gpu);
+  }
+  return res;
+}
+}  // namespace gpu
+
+struct OS {
+  std::string name;
+  std::string version;
+  std::string arch;
+};
+
+inline Json::Value ToJson(const OS& os) {
+  Json::Value res;
+  res["version"] = os.version;
+  res["name"] = os.name;
+  return res;
+}
+
+namespace os {
+inline OS FromJson(const Json::Value& root) {
+  return {.name = root["name"].asString(),
+          .version = root["version"].asString()};
+}
+}  // namespace os
+
+
+struct PowerInfo {
+  std::string charging_status;
+  int battery_life;
+  bool is_power_saving;
+};
+
+inline Json::Value ToJson(const PowerInfo& pi) {
+  Json::Value res;
+  res["charging_status"] = pi.charging_status;
+  res["battery_life"] = pi.battery_life;
+  res["is_power_saving"] = pi.is_power_saving;
+  return res;
+}
+
+namespace power {
+inline PowerInfo FromJson(const Json::Value& root) {
+  return {.charging_status = root["charging_status"].asString(),
+          .battery_life = root["battery_life"].asInt(),
+          .is_power_saving = root["is_power_saving"].asBool()};
+}
+}  // namespace power
+
+
+namespace {
+int64_t ByteToMiB(int64_t b) {
+  return b / 1024 / 1024;
+}
+}  // namespace
+struct Memory {
+  int64_t total_MiB;
+  int64_t available_MiB;
+  std::string type;
+};
+
+inline Json::Value ToJson(const Memory& m) {
+  Json::Value res;
+  res["total"] = m.total_MiB;
+  res["available"] = m.available_MiB;
+  res["type"] = m.type;
+  return res;
+}
+
+namespace memory {
+inline Memory FromJson(const Json::Value& root) {
+  return {.total_MiB = root["total"].asInt64(),
+          .available_MiB = root["available"].asInt64(),
+          .type = root["type"].asString()};
+}
+}  // namespace memory
+
+struct StorageInfo {
+  std::string type;
+  int64_t total;
+  int64_t available;
+};
+
+inline Json::Value ToJson(const StorageInfo& si) {
+  Json::Value res;
+  res["total"] = si.total;
+  res["available"] = si.available;
+  res["type"] = si.type;
+  return res;
+}
+
+namespace storage {
+inline StorageInfo FromJson(const Json::Value& root) {
+  return {.type = root["type"].asString(),
+          .total = root["total"].asInt64(),
+          .available = root["available"].asInt64()};
+}
+}  // namespace storage
+}
\ No newline at end of file
diff --git a/engine/common/hardware_config.h b/engine/common/hardware_config.h
new file mode 100644
index 000000000..5e947130a
--- /dev/null
+++ b/engine/common/hardware_config.h
@@ -0,0 +1,9 @@
+#pragma once
+#include <vector>
+
+namespace cortex::hw {
+struct ActivateHardwareConfig {
+  std::vector<int> gpus;
+};
+
+}
\ No newline at end of file
diff --git a/engine/controllers/hardware.cc b/engine/controllers/hardware.cc
new file mode 100644
index 000000000..4f5cc2879
--- /dev/null
+++ b/engine/controllers/hardware.cc
@@ -0,0 +1,76 @@
+#include "hardware.h"
+#include "common/hardware_config.h"
+#include "utils/cortex_utils.h"
+#include "utils/file_manager_utils.h"
+#include "utils/scope_exit.h"
+
+void Hardware::GetHardwareInfo(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) {
+  auto hw_inf = hw_svc_->GetHardwareInfo();
+  Json::Value ret;
+  ret["cpu"] = cortex::hw::ToJson(hw_inf.cpu);
+  ret["os"] = cortex::hw::ToJson(hw_inf.os);
+  ret["ram"] = cortex::hw::ToJson(hw_inf.ram);
+  ret["storage"] = cortex::hw::ToJson(hw_inf.storage);
+  ret["gpus"] = cortex::hw::ToJson(hw_inf.gpus);
+  ret["power"] = cortex::hw::ToJson(hw_inf.power);
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+  resp->setStatusCode(k200OK);
+  callback(resp);
+}
+
+void Hardware::Activate(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) {
+#if defined(__APPLE__) && defined(__MACH__)
+  Json::Value ret;
+  ret["message"] = "Item requested was not found";
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+  resp->setStatusCode(k400BadRequest);
+  callback(resp);
+#else
+  // {
+  //   "gpus" : [0, 1]
+  // }
+  cortex::hw::ActivateHardwareConfig ahc;
+  if (auto o = req->getJsonObject(); o) {
+    CTL_INF("activate: " << o->toStyledString());
+    for (auto& g : (*o)["gpus"]) {
+      ahc.gpus.push_back(g.asInt());
+    }
+  }
+  std::sort(ahc.gpus.begin(), ahc.gpus.end());
+  if (!hw_svc_->IsValidConfig(ahc)) {
+    Json::Value ret;
+    ret["message"] = "Invalid GPU index provided.";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+    return;
+  };
+
+  if (!hw_svc_->SetActivateHardwareConfig(ahc)) {
+    Json::Value ret;
+    ret["message"] = "The hardware configuration is already up to date.";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k200OK);
+    callback(resp);
+    return;
+  }
+
+  if (auto r = engine_svc_->UnloadEngine(kLlamaEngine); r.has_error()) {
+    CTL_WRN(r.error());
+  }
+
+  Json::Value ret;
+  ret["message"] = "The hardware configuration has been activated.";
+  if (auto o = req->getJsonObject(); o) {
+    ret["activated_gpus"] = (*o)["gpus"];
+  }
+  auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+  resp->setStatusCode(k200OK);
+  callback(resp);
+  app().quit();
+#endif
+}
\ No newline at end of file
diff --git a/engine/controllers/hardware.h b/engine/controllers/hardware.h
new file mode 100644
index 000000000..6cca4fd2a
--- /dev/null
+++ b/engine/controllers/hardware.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <drogon/HttpController.h>
+#include "common/engine_servicei.h"
+#include "services/hardware_service.h"
+
+using namespace drogon;
+
+class Hardware : public drogon::HttpController<Hardware, false> {
+ public:
+  explicit Hardware(std::shared_ptr<EngineServiceI> engine_svc,
+                    std::shared_ptr<services::HardwareService> hw_svc)
+      : engine_svc_(engine_svc), hw_svc_(hw_svc) {}
+  METHOD_LIST_BEGIN
+  METHOD_ADD(Hardware::GetHardwareInfo, "/hardware", Get);
+  METHOD_ADD(Hardware::Activate, "/hardware/activate", Post);
+
+  ADD_METHOD_TO(Hardware::GetHardwareInfo, "/v1/hardware", Get);
+  ADD_METHOD_TO(Hardware::Activate, "/v1/hardware/activate", Post);
+  METHOD_LIST_END
+
+  void GetHardwareInfo(const HttpRequestPtr& req,
+                       std::function<void(const HttpResponsePtr&)>&& callback);
+
+  void Activate(const HttpRequestPtr& req,
+                std::function<void(const HttpResponsePtr&)>&& callback);
+
+ private:
+  std::shared_ptr<EngineServiceI> engine_svc_ = nullptr;
+  std::shared_ptr<services::HardwareService> hw_svc_= nullptr;
+};
\ No newline at end of file
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index c205e85df..796f70d16 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -486,8 +486,12 @@ void Models::StartModel(
     resp->setStatusCode(drogon::k400BadRequest);
     callback(resp);
   } else {
+    auto& v = result.value();
     Json::Value ret;
     ret["message"] = "Started successfully!";
+    if(v.warning) {
+      ret["warning"] = *(v.warning);
+    }
     auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
     resp->setStatusCode(k200OK);
     callback(resp);
diff --git a/engine/database/hardwares.cc b/engine/database/hardwares.cc
new file mode 100644
index 000000000..c23aec0b7
--- /dev/null
+++ b/engine/database/hardwares.cc
@@ -0,0 +1,111 @@
+#include "hardwares.h"
+#include "database.h"
+#include "utils/scope_exit.h"
+
+namespace cortex::db {
+
+Hardwares::Hardwares() : db_(cortex::db::Database::GetInstance().db()) {
+  db_.exec(
+      "CREATE TABLE IF NOT EXISTS hardwares ("
+      "uuid TEXT PRIMARY KEY,"
+      "type TEXT,"
+      "hardware_id INTEGER,"
+      "software_id INTEGER,"
+      "activated INTEGER);");
+}
+
+Hardwares::Hardwares(SQLite::Database& db) : db_(db) {
+  db_.exec(
+      "CREATE TABLE IF NOT EXISTS hardwares ("
+      "uuid TEXT PRIMARY KEY,"
+      "type TEXT,"
+      "hardware_id INTEGER,"
+      "software_id INTEGER,"
+      "activated INTEGER);");
+}
+
+Hardwares::~Hardwares() {}
+
+cpp::result<std::vector<HardwareEntry>, std::string>
+Hardwares::LoadHardwareList() const {
+  try {
+    db_.exec("BEGIN TRANSACTION;");
+    cortex::utils::ScopeExit se([this] { db_.exec("COMMIT;"); });
+    std::vector<HardwareEntry> entries;
+    SQLite::Statement query(
+        db_,
+        "SELECT uuid, type, "
+        "hardware_id, software_id, activated FROM hardwares");
+
+    while (query.executeStep()) {
+      HardwareEntry entry;
+      entry.uuid = query.getColumn(0).getString();
+      entry.type = query.getColumn(1).getString();
+      entry.hardware_id = query.getColumn(2).getInt();
+      entry.software_id = query.getColumn(3).getInt();
+      entry.activated = query.getColumn(4).getInt();
+      entries.push_back(entry);
+    }
+    return entries;
+  } catch (const std::exception& e) {
+    CTL_WRN(e.what());
+    return cpp::fail(e.what());
+  }
+}
+cpp::result<bool, std::string> Hardwares::AddHardwareEntry(
+    const HardwareEntry& new_entry) {
+  try {
+    SQLite::Statement insert(
+        db_,
+        "INSERT INTO hardwares (uuid, type, "
+        "hardware_id, software_id, activated) VALUES (?, ?, "
+        "?, ?, ?)");
+    insert.bind(1, new_entry.uuid);
+    insert.bind(2, new_entry.type);
+    insert.bind(3, new_entry.hardware_id);
+    insert.bind(4, new_entry.software_id);
+    insert.bind(5, new_entry.activated);
+    insert.exec();
+    CTL_INF("Inserted: " << new_entry.ToJsonString());
+    return true;
+  } catch (const std::exception& e) {
+    CTL_WRN(e.what());
+    return cpp::fail(e.what());
+  }
+}
+cpp::result<bool, std::string> Hardwares::UpdateHardwareEntry(
+    const std::string& id, const HardwareEntry& updated_entry) {
+  try {
+    SQLite::Statement upd(db_,
+                          "UPDATE hardwares "
+                          "SET hardware_id = ?, software_id = ?, activated = ? "
+                          "WHERE uuid = ?");
+    upd.bind(1, updated_entry.hardware_id);
+    upd.bind(2, updated_entry.software_id);
+    upd.bind(3, updated_entry.activated);
+    upd.bind(4, id);
+    if (upd.exec() == 1) {
+      CTL_INF("Updated: " << updated_entry.ToJsonString());
+      return true;
+    }
+    return false;
+  } catch (const std::exception& e) {
+    return cpp::fail(e.what());
+  }
+}
+
+cpp::result<bool, std::string> Hardwares::DeleteHardwareEntry(
+    const std::string& id) {
+  try {
+    SQLite::Statement del(db_, "DELETE from hardwares WHERE uuid = ?");
+    del.bind(1, id);
+    if (del.exec() == 1) {
+      CTL_INF("Deleted: " << id);
+      return true;
+    }
+    return false;
+  } catch (const std::exception& e) {
+    return cpp::fail(e.what());
+  }
+}
+}  // namespace cortex::db
\ No newline at end of file
diff --git a/engine/database/hardwares.h b/engine/database/hardwares.h
new file mode 100644
index 000000000..0966d58a3
--- /dev/null
+++ b/engine/database/hardwares.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <SQLiteCpp/Database.h>
+#include <trantor/utils/Logger.h>
+#include <string>
+#include <vector>
+#include "utils/result.hpp"
+#include "utils/json_helper.h"
+
+namespace cortex::db {
+struct HardwareEntry {
+  std::string uuid;
+  std::string type;
+  int hardware_id;
+  int software_id;
+  bool activated;
+  std::string ToJsonString() const {
+    Json::Value root;
+    root["uuid"] = uuid;
+    root["type"] = type;
+    root["hardware_id"] = hardware_id;
+    root["software_id"] = software_id;
+    root["activated"] = activated;
+    return json_helper::DumpJsonString(root);
+  }
+};
+
+class Hardwares {
+
+ private:
+  SQLite::Database& db_;
+
+
+ public:
+  Hardwares();
+  Hardwares(SQLite::Database& db);
+  ~Hardwares();
+
+  cpp::result<std::vector<HardwareEntry>, std::string> LoadHardwareList() const;
+  cpp::result<bool, std::string> AddHardwareEntry(const HardwareEntry& new_entry);
+  cpp::result<bool, std::string> UpdateHardwareEntry(
+      const std::string& id, const HardwareEntry& updated_entry);
+  cpp::result<bool, std::string> DeleteHardwareEntry(
+      const std::string& id);
+};
+}  // namespace cortex::db
\ No newline at end of file
diff --git a/engine/database/models.cc b/engine/database/models.cc
index 67ecb9723..d0bee405c 100644
--- a/engine/database/models.cc
+++ b/engine/database/models.cc
@@ -34,7 +34,7 @@ cpp::result<std::vector<ModelEntry>, std::string> Models::LoadModelList()
     const {
   try {
     db_.exec("BEGIN TRANSACTION;");
-    utils::ScopeExit se([this] { db_.exec("COMMIT;"); });
+    cortex::utils::ScopeExit se([this] { db_.exec("COMMIT;"); });
     return LoadModelListNoLock();
   } catch (const std::exception& e) {
     CTL_WRN(e.what());
@@ -174,7 +174,7 @@ cpp::result<bool, std::string> Models::AddModelEntry(ModelEntry new_entry,
                                                      bool use_short_alias) {
   try {
     db_.exec("BEGIN TRANSACTION;");
-    utils::ScopeExit se([this] { db_.exec("COMMIT;"); });
+    cortex::utils::ScopeExit se([this] { db_.exec("COMMIT;"); });
     auto model_list = LoadModelListNoLock();
     if (model_list.has_error()) {
       CTL_WRN(model_list.error());
@@ -236,7 +236,7 @@ cpp::result<bool, std::string> Models::UpdateModelAlias(
   }
   try {
     db_.exec("BEGIN TRANSACTION;");
-    utils::ScopeExit se([this] { db_.exec("COMMIT;"); });
+    cortex::utils::ScopeExit se([this] { db_.exec("COMMIT;"); });
     auto model_list = LoadModelListNoLock();
     if (model_list.has_error()) {
       CTL_WRN(model_list.error());
diff --git a/engine/database/models.h b/engine/database/models.h
index ebb006b28..197996ab8 100644
--- a/engine/database/models.h
+++ b/engine/database/models.h
@@ -27,7 +27,6 @@ class Models {
   cpp::result<std::vector<ModelEntry>, std::string> LoadModelListNoLock() const;
 
  public:
-  static const std::string kModelListPath;
   cpp::result<std::vector<ModelEntry>, std::string> LoadModelList() const;
   Models();
   Models(SQLite::Database& db);
diff --git a/engine/e2e-test/test_api_docker.py b/engine/e2e-test/test_api_docker.py
index 432224f80..2f06e6edb 100644
--- a/engine/e2e-test/test_api_docker.py
+++ b/engine/e2e-test/test_api_docker.py
@@ -18,7 +18,7 @@ def setup_and_teardown(self, request):
     @pytest.mark.parametrize("model_url", repo_branches)
     @pytest.mark.asyncio
     async def test_models_on_cortexso_hub(self, model_url):
-
+        print("Pull model from cortexso hub")
         # Pull model from cortexso hub
         json_body = {
             "model": model_url
@@ -28,6 +28,7 @@ async def test_models_on_cortexso_hub(self, model_url):
         
         await wait_for_websocket_download_success_event(timeout=None)
         
+        print("Check if the model was pulled successfully")
         # Check if the model was pulled successfully
         get_model_response = requests.get(
             f"http://127.0.0.1:3928/v1/models/{model_url}"
@@ -37,16 +38,19 @@ async def test_models_on_cortexso_hub(self, model_url):
             get_model_response.json()["model"] == model_url
         ), f"Unexpected model name for: {model_url}"
 
+        print("Check if the model is available in the list of models")
         # Check if the model is available in the list of models
         response = requests.get("http://localhost:3928/v1/models")
         assert response.status_code == 200
         models = [i["id"] for i in response.json()["data"]]
         assert model_url in models, f"Model not found in list: {model_url}"
 
+        print("Start the model")
         # Start the model
         response = requests.post("http://localhost:3928/v1/models/start", json=json_body)
         assert response.status_code == 200, f"status_code: {response.status_code}"
 
+        print("Send an inference request")
         # Send an inference request
         inference_json_body = {
             "frequency_penalty": 0.2,
@@ -69,6 +73,7 @@ async def test_models_on_cortexso_hub(self, model_url):
         response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"})
         assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"
 
+        print("Stop the model")
         # Stop the model
         response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
         assert response.status_code == 200, f"status_code: {response.status_code}"
diff --git a/engine/e2e-test/test_api_model_pull_direct_url.py b/engine/e2e-test/test_api_model_pull_direct_url.py
index ec72de147..604f216f8 100644
--- a/engine/e2e-test/test_api_model_pull_direct_url.py
+++ b/engine/e2e-test/test_api_model_pull_direct_url.py
@@ -12,6 +12,7 @@ class TestApiModelPullDirectUrl:
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
+        stop_server()
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start.py
index fddb33518..830d32da8 100644
--- a/engine/e2e-test/test_api_model_start.py
+++ b/engine/e2e-test/test_api_model_start.py
@@ -8,6 +8,7 @@ class TestApiModelStart:
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
+        stop_server()
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
diff --git a/engine/e2e-test/test_api_model_stop.py b/engine/e2e-test/test_api_model_stop.py
index 315f51ef8..97bec671e 100644
--- a/engine/e2e-test/test_api_model_stop.py
+++ b/engine/e2e-test/test_api_model_stop.py
@@ -8,6 +8,7 @@ class TestApiModelStop:
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
+        stop_server()
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
diff --git a/engine/e2e-test/test_cli_engine_install.py b/engine/e2e-test/test_cli_engine_install.py
index 6c8c4932b..380334222 100644
--- a/engine/e2e-test/test_cli_engine_install.py
+++ b/engine/e2e-test/test_cli_engine_install.py
@@ -9,6 +9,7 @@
 class TestCliEngineInstall:
     def setup_and_teardown(self):
         # Setup
+        stop_server()
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
diff --git a/engine/e2e-test/test_cli_engine_list.py b/engine/e2e-test/test_cli_engine_list.py
index 5cd9a92fe..6a79bb449 100644
--- a/engine/e2e-test/test_cli_engine_list.py
+++ b/engine/e2e-test/test_cli_engine_list.py
@@ -8,7 +8,8 @@ class TestCliEngineList:
 
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
-        # Setup
+        # Setup TODO(sang) should make all the test isolate
+        stop_server()
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py
index d95e21e7b..0ca151d48 100644
--- a/engine/e2e-test/test_cli_engine_uninstall.py
+++ b/engine/e2e-test/test_cli_engine_uninstall.py
@@ -13,6 +13,7 @@ class TestCliEngineUninstall:
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
+        stop_server()
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
diff --git a/engine/main.cc b/engine/main.cc
index b53227ceb..e723a8fc7 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -4,6 +4,7 @@
 #include "controllers/configs.h"
 #include "controllers/engines.h"
 #include "controllers/events.h"
+#include "controllers/hardware.h"
 #include "controllers/models.h"
 #include "controllers/process_manager.h"
 #include "controllers/server.h"
@@ -35,7 +36,7 @@
 #error "Unsupported platform!"
 #endif
 
-void RunServer(std::optional<int> port) {
+void RunServer(std::optional<int> port, bool ignore_cout) {
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
   signal(SIGINT, SIG_IGN);
 #elif defined(_WIN32)
@@ -55,8 +56,10 @@ void RunServer(std::optional<int> port) {
       CTL_ERR("Error update " << config_path.string() << result.error());
     }
   }
-  std::cout << "Host: " << config.apiServerHost
-            << " Port: " << config.apiServerPort << "\n";
+  if (!ignore_cout) {
+    std::cout << "Host: " << config.apiServerHost
+              << " Port: " << config.apiServerPort << "\n";
+  }
   // Create logs/ folder and setup log to file
   std::filesystem::create_directories(
       std::filesystem::path(config.logFolderPath) /
@@ -87,6 +90,14 @@ void RunServer(std::optional<int> port) {
   LOG_INFO << "cortex.cpp version: undefined";
 #endif
 
+  auto hw_service = std::make_shared<services::HardwareService>();
+  hw_service->UpdateHardwareInfos();
+  if (hw_service->ShouldRestart()) {
+    CTL_INF("Restart to update hardware configuration");
+    hw_service->Restart(config.apiServerHost, std::stoi(config.apiServerPort));
+    return;
+  }
+
   using Event = cortex::event::Event;
   using EventQueue =
       eventpp::EventQueue<EventType,
@@ -99,8 +110,8 @@ void RunServer(std::optional<int> port) {
   auto engine_service = std::make_shared<EngineService>(download_service);
   auto inference_svc =
       std::make_shared<services::InferenceService>(engine_service);
-  auto model_service =
-      std::make_shared<ModelService>(download_service, inference_svc);
+  auto model_service = std::make_shared<ModelService>(
+      download_service, inference_svc, engine_service);
   auto config_service = std::make_shared<ConfigService>();
 
   // initialize custom controllers
@@ -108,6 +119,7 @@ void RunServer(std::optional<int> port) {
   auto model_ctl = std::make_shared<Models>(model_service, engine_service);
   auto event_ctl = std::make_shared<Events>(event_queue_ptr);
   auto pm_ctl = std::make_shared<ProcessManager>();
+  auto hw_ctl = std::make_shared<Hardware>(engine_service, hw_service);
   auto server_ctl =
       std::make_shared<inferences::server>(inference_svc, engine_service);
   auto config_ctl = std::make_shared<Configs>(config_service);
@@ -117,6 +129,7 @@ void RunServer(std::optional<int> port) {
   drogon::app().registerController(event_ctl);
   drogon::app().registerController(pm_ctl);
   drogon::app().registerController(server_ctl);
+  drogon::app().registerController(hw_ctl);
   drogon::app().registerController(config_ctl);
 
   auto upload_path = std::filesystem::temp_directory_path() / "cortex-uploads";
@@ -125,6 +138,9 @@ void RunServer(std::optional<int> port) {
   LOG_INFO << "Server started, listening at: " << config.apiServerHost << ":"
            << config.apiServerPort;
   LOG_INFO << "Please load your model";
+#ifndef _WIN32
+  drogon::app().enableReusePort();
+#endif
   drogon::app().addListener(config.apiServerHost,
                             std::stoi(config.apiServerPort));
   drogon::app().setThreadNum(drogon_thread_num);
@@ -166,6 +182,10 @@ void RunServer(std::optional<int> port) {
       });
 
   drogon::app().run();
+  if (hw_service->ShouldRestart()) {
+    CTL_INF("Restart to update hardware configuration");
+    hw_service->Restart(config.apiServerHost, std::stoi(config.apiServerPort));
+  }
 }
 
 int main(int argc, char* argv[]) {
@@ -182,6 +202,7 @@ int main(int argc, char* argv[]) {
   is_server = true;
 
   std::optional<int> server_port;
+  bool ignore_cout_log = false;
   for (int i = 0; i < argc; i++) {
     if (strcmp(argv[i], "--config_file_path") == 0) {
       file_manager_utils::cortex_config_file_path = argv[i + 1];
@@ -189,9 +210,11 @@ int main(int argc, char* argv[]) {
       file_manager_utils::cortex_data_folder_path = argv[i + 1];
     } else if (strcmp(argv[i], "--port") == 0) {
       server_port = std::stoi(argv[i + 1]);
+    } else if (strcmp(argv[i], "--ignore_cout") == 0) {
+      ignore_cout_log = true;
     } else if (strcmp(argv[i], "--loglevel") == 0) {
       std::string log_level = argv[i + 1];
-      logging_utils_helper::SetLogLevel(log_level);
+      logging_utils_helper::SetLogLevel(log_level, ignore_cout_log);
     }
   }
 
@@ -234,6 +257,6 @@ int main(int argc, char* argv[]) {
     }
   }
 
-  RunServer(server_port);
+  RunServer(server_port, ignore_cout_log);
   return 0;
 }
diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h
index 4e58fccfd..b339fd7df 100644
--- a/engine/services/engine_service.h
+++ b/engine/services/engine_service.h
@@ -13,36 +13,7 @@
 #include "utils/github_release_utils.h"
 #include "utils/result.hpp"
 #include "utils/system_info_utils.h"
-
-// TODO: namh think of the other name
-struct DefaultEngineVariant {
-  std::string engine;
-  std::string version;
-  std::string variant;
-
-  Json::Value ToJson() const {
-    Json::Value root;
-    root["engine"] = engine;
-    root["version"] = version;
-    root["variant"] = variant;
-    return root;
-  }
-};
-
-// TODO: namh think of the other name
-struct EngineVariantResponse {
-  std::string name;
-  std::string version;
-  std::string engine;
-
-  Json::Value ToJson() const {
-    Json::Value root;
-    root["name"] = name;
-    root["version"] = version;
-    root["engine"] = engine;
-    return root;
-  }
-};
+#include "common/engine_servicei.h"
 
 struct EngineUpdateResult {
   std::string engine;
@@ -66,7 +37,7 @@ struct SystemInfo;
 
 using EngineV = std::variant<EngineI*, CortexPythonEngineI*>;
 
-class EngineService {
+class EngineService: public EngineServiceI {
  private:
   using EngineRelease = github_release_utils::GitHubRelease;
   using EngineVariant = github_release_utils::GitHubAsset;
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
new file mode 100644
index 000000000..c40133564
--- /dev/null
+++ b/engine/services/hardware_service.cc
@@ -0,0 +1,314 @@
+// clang-format off
+#include "cli/commands/server_start_cmd.h"
+// clang-format on
+#include "hardware_service.h"
+#if defined(_WIN32) || defined(_WIN64)
+#include <minwindef.h>
+#include <processenv.h>
+#endif
+#include "cli/commands/cortex_upd_cmd.h"
+#include "database/hardwares.h"
+#include "services/engine_service.h"
+#include "utils/cortex_utils.h"
+
+namespace services {
+
+namespace {
+bool TryConnectToServer(const std::string& host, int port) {
+  constexpr const auto kMaxRetry = 3u;
+  auto count = 0u;
+  // Check if server is started
+  while (true) {
+    if (commands::IsServerAlive(host, port))
+      break;
+    // Wait for server up
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+    if (count++ == kMaxRetry) {
+      std::cerr << "Could not start server" << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+}  // namespace
+
+HardwareInfo HardwareService::GetHardwareInfo() {
+  // append active state
+  cortex::db::Hardwares hw_db;
+  auto gpus = cortex::hw::GetGPUInfo();
+  auto res = hw_db.LoadHardwareList();
+  if (res.has_value()) {
+    // Only a few elements, brute-force is enough
+    for (auto& entry : res.value()) {
+      for (auto& gpu : gpus) {
+        if (gpu.uuid == entry.uuid) {
+          gpu.is_activated = entry.activated;
+        }
+      }
+    };
+  }
+
+  return HardwareInfo{.cpu = cortex::hw::GetCPUInfo(),
+                      .os = cortex::hw::GetOSInfo(),
+                      .ram = cortex::hw::GetMemoryInfo(),
+                      .storage = cortex::hw::GetStorageInfo(),
+                      .gpus = gpus,
+                      .power = cortex::hw::GetPowerInfo()};
+}
+
+bool HardwareService::Restart(const std::string& host, int port) {
+  if (!ahc_)
+    return true;
+  auto exe = commands::GetCortexServerBinary();
+  auto get_config_file_path = []() -> std::string {
+    if (file_manager_utils::cortex_config_file_path.empty()) {
+      return file_manager_utils::GetConfigurationPath().string();
+    }
+    return file_manager_utils::cortex_config_file_path;
+  };
+
+  auto get_data_folder_path = []() -> std::string {
+    if (file_manager_utils::cortex_data_folder_path.empty()) {
+      return file_manager_utils::GetCortexDataPath().string();
+    }
+    return file_manager_utils::cortex_data_folder_path;
+  };
+
+  auto set_env = [](const std::string& name, const std::string& value,
+                    bool is_override = true) -> bool {
+#if defined(_WIN32) || defined(_WIN64)
+    return _putenv_s(name.c_str(), value.c_str()) == 0;
+#else
+    return setenv(name.c_str(), value.c_str(), is_override) == 0;
+#endif
+  };
+
+#if defined(_WIN32) || defined(_WIN64) || defined(__linux__)
+  std::string cuda_visible_devices = "";
+  for (auto i : (*ahc_).gpus) {
+    if (!cuda_visible_devices.empty())
+      cuda_visible_devices += ",";
+    cuda_visible_devices += std::to_string(i);
+  }
+  if (cuda_visible_devices.empty())
+    cuda_visible_devices += " ";
+  // Set the CUDA_VISIBLE_DEVICES environment variable
+  if (!set_env("CUDA_VISIBLE_DEVICES", cuda_visible_devices)) {
+    LOG_WARN << "Error setting CUDA_VISIBLE_DEVICES";
+    return false;
+  }
+
+  const char* value = std::getenv("CUDA_VISIBLE_DEVICES");
+  if (value) {
+    LOG_INFO << "CUDA_VISIBLE_DEVICES is set to: " << value;
+  } else {
+    LOG_WARN << "CUDA_VISIBLE_DEVICES is not set.";
+  }
+#endif
+
+#if defined(_WIN32) || defined(_WIN64)
+  // Windows-specific code to create a new process
+  STARTUPINFO si;
+  PROCESS_INFORMATION pi;
+
+  ZeroMemory(&si, sizeof(si));
+  si.cb = sizeof(si);
+  ZeroMemory(&pi, sizeof(pi));
+  std::string params = "--ignore_cout";
+  params += " --config_file_path " + get_config_file_path();
+  params += " --data_folder_path " + get_data_folder_path();
+  std::string cmds = cortex_utils::GetCurrentPath() + "/" + exe + " " + params;
+  // Create child process
+  if (!CreateProcess(
+          NULL,  // No module name (use command line)
+          const_cast<char*>(
+              cmds.c_str()),  // Command line (replace with your actual executable)
+          NULL,               // Process handle not inheritable
+          NULL,               // Thread handle not inheritable
+          TRUE,               // Handle inheritance
+          0,                  // No creation flags
+          NULL,               // Use parent's environment block
+          NULL,               // Use parent's starting directory
+          &si,                // Pointer to STARTUPINFO structure
+          &pi))               // Pointer to PROCESS_INFORMATION structure
+  {
+    std::cout << "Could not start server: " << GetLastError() << std::endl;
+    return false;
+  } else {
+    if (!TryConnectToServer(host, port)) {
+      return false;
+    }
+  }
+
+#else
+  // Unix-like system-specific code to fork a child process
+  pid_t pid = fork();
+
+  if (pid < 0) {
+    // Fork failed
+    std::cerr << "Could not start server: " << std::endl;
+    return false;
+  } else if (pid == 0) {
+    // No need to configure LD_LIBRARY_PATH for macOS
+#if !defined(__APPLE__) || !defined(__MACH__)
+    const char* name = "LD_LIBRARY_PATH";
+    auto data = getenv(name);
+    std::string v;
+    if (auto g = getenv(name); g) {
+      v += g;
+    }
+    CTL_INF("LD_LIBRARY_PATH: " << v);
+    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
+    auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
+
+    auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v;
+    setenv(name, new_v.c_str(), true);
+    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
+#endif
+    std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
+    execl(p.c_str(), exe.c_str(), "--ignore_cout", "--config_file_path",
+          get_config_file_path().c_str(), "--data_folder_path",
+          get_data_folder_path().c_str(), "--loglevel", "INFO", (char*)0);
+  } else {
+    // Parent process
+    if (!TryConnectToServer(host, port)) {
+      return false;
+    }
+  }
+#endif
+  return true;
+}
+
+bool HardwareService::SetActivateHardwareConfig(
+    const cortex::hw::ActivateHardwareConfig& ahc) {
+  // Note: need to map software_id and hardware_id
+  // Update to db
+  cortex::db::Hardwares hw_db;
+  auto activate = [&ahc](int software_id) {
+    return std::count(ahc.gpus.begin(), ahc.gpus.end(), software_id) > 0;
+  };
+  auto res = hw_db.LoadHardwareList();
+  if (res.has_value()) {
+    bool need_update = false;
+    std::vector<int> activated_ids;
+    // Check if need to update
+    for (auto const& e : res.value()) {
+      if (e.activated) {
+        activated_ids.push_back(e.software_id);
+      }
+    }
+    std::sort(activated_ids.begin(), activated_ids.end());
+    if (ahc.gpus.size() != activated_ids.size()) {
+      need_update = true;
+    } else {
+      for (size_t i = 0; i < ahc.gpus.size(); i++) {
+        if (ahc.gpus[i] != activated_ids[i])
+          need_update = true;
+      }
+    }
+
+    if (!need_update) {
+      CTL_INF("No hardware activation changes -> No need to update");
+      return false;
+    }
+
+    // Need to update, proceed
+    for (auto& e : res.value()) {
+      e.activated = activate(e.software_id);
+      auto res = hw_db.UpdateHardwareEntry(e.uuid, e);
+      if (res.has_error()) {
+        CTL_WRN(res.error());
+      }
+    }
+  }
+  ahc_ = ahc;
+  return true;
+}
+
+void HardwareService::UpdateHardwareInfos() {
+  using HwEntry = cortex::db::HardwareEntry;
+  auto gpus = cortex::hw::GetGPUInfo();
+  cortex::db::Hardwares hw_db;
+  auto b = hw_db.LoadHardwareList();
+  std::vector<int> activated_gpu_bf;
+  std::string debug_b;
+  for (auto const& he : b.value()) {
+    if (he.type == "gpu" && he.activated) {
+      debug_b += std::to_string(he.software_id) + " ";
+      activated_gpu_bf.push_back(he.software_id);
+    }
+  }
+  CTL_INF("Activated GPUs before: " << debug_b);
+  for (auto const& gpu : gpus) {
+    // ignore error
+    // Note: only support NVIDIA for now, so hardware_id = software_id
+    auto res = hw_db.AddHardwareEntry(HwEntry{.uuid = gpu.uuid,
+                                              .type = "gpu",
+                                              .hardware_id = std::stoi(gpu.id),
+                                              .software_id = std::stoi(gpu.id),
+                                              .activated = true});
+    if (res.has_error()) {
+      CTL_WRN(res.error());
+    }
+  }
+
+  auto a = hw_db.LoadHardwareList();
+  std::vector<HwEntry> a_gpu;
+  std::vector<int> activated_gpu_af;
+  std::string debug_a;
+  for (auto const& he : a.value()) {
+    if (he.type == "gpu" && he.activated) {
+      debug_a += std::to_string(he.software_id) + " ";
+      activated_gpu_af.push_back(he.software_id);
+    }
+  }
+  CTL_INF("Activated GPUs after: " << debug_a);
+  // if hardware list changes, need to restart
+  std::sort(activated_gpu_bf.begin(), activated_gpu_bf.end());
+  std::sort(activated_gpu_af.begin(), activated_gpu_af.end());
+  bool need_restart = false;
+  if (activated_gpu_bf.size() != activated_gpu_af.size()) {
+    need_restart = true;
+  } else {
+    for (size_t i = 0; i < activated_gpu_bf.size(); i++) {
+      if (activated_gpu_bf[i] != activated_gpu_af[i]) {
+        need_restart = true;
+        break;
+      }
+    }
+  }
+
+#if defined(_WIN32) || defined(_WIN64) || defined(__linux__)
+  if (system_info_utils::IsNvidiaSmiAvailable()) {
+    const char* value = std::getenv("CUDA_VISIBLE_DEVICES");
+    if (value) {
+      LOG_INFO << "CUDA_VISIBLE_DEVICES: " << value;
+    } else {
+      need_restart = true;
+    }
+  }
+#endif
+
+  if (need_restart) {
+    CTL_INF("Need restart");
+    ahc_ = {.gpus = activated_gpu_af};
+  }
+}
+
+bool HardwareService::IsValidConfig(
+    const cortex::hw::ActivateHardwareConfig& ahc) {
+  cortex::db::Hardwares hw_db;
+  auto is_valid = [&ahc](int software_id) {
+    return std::count(ahc.gpus.begin(), ahc.gpus.end(), software_id) > 0;
+  };
+  auto res = hw_db.LoadHardwareList();
+  if (res.has_value()) {
+    for (auto const& e : res.value()) {
+      if (!is_valid(e.software_id)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+}  // namespace services
\ No newline at end of file
diff --git a/engine/services/hardware_service.h b/engine/services/hardware_service.h
new file mode 100644
index 000000000..48ab7a4b1
--- /dev/null
+++ b/engine/services/hardware_service.h
@@ -0,0 +1,37 @@
+#pragma once
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "common/hardware_config.h"
+#include "utils/hardware/cpu_info.h"
+#include "utils/hardware/gpu_info.h"
+#include "utils/hardware/os_info.h"
+#include "utils/hardware/power_info.h"
+#include "utils/hardware/ram_info.h"
+#include "utils/hardware/storage_info.h"
+
+namespace services {
+
+struct HardwareInfo {
+  cortex::hw::CPU cpu;
+  cortex::hw::OS os;
+  cortex::hw::Memory ram;
+  cortex::hw::StorageInfo storage;
+  std::vector<cortex::hw::GPU> gpus;
+  cortex::hw::PowerInfo power;
+};
+
+class HardwareService {
+ public:
+  HardwareInfo GetHardwareInfo();
+  bool Restart(const std::string& host, int port);
+  bool SetActivateHardwareConfig(const cortex::hw::ActivateHardwareConfig& ahc);
+  bool ShouldRestart() const { return !!ahc_; }
+  void UpdateHardwareInfos();
+  bool IsValidConfig(const cortex::hw::ActivateHardwareConfig& ahc);
+
+ private:
+  std::optional<cortex::hw::ActivateHardwareConfig> ahc_;
+};
+}  // namespace services
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 387346f6d..3a8507c22 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -6,7 +6,9 @@
 #include "config/gguf_parser.h"
 #include "config/yaml_config.h"
 #include "database/models.h"
+#include "hardware_service.h"
 #include "httplib.h"
+#include "services/engine_service.h"
 #include "utils/cli_selection_utils.h"
 #include "utils/engine_constants.h"
 #include "utils/file_manager_utils.h"
@@ -596,7 +598,7 @@ cpp::result<void, std::string> ModelService::DeleteModel(
   }
 }
 
-cpp::result<bool, std::string> ModelService::StartModel(
+cpp::result<StartModelResult, std::string> ModelService::StartModel(
     const std::string& model_handle,
     const StartParameterOverride& params_override) {
   namespace fs = std::filesystem;
@@ -626,7 +628,7 @@ cpp::result<bool, std::string> ModelService::StartModel(
             fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
       } else {
         LOG_WARN << "model_path is empty";
-        return false;
+        return StartModelResult{.success = false};
       }
       json_data["system_prompt"] = mc.system_template;
       json_data["user_prompt"] = mc.user_template;
@@ -659,17 +661,113 @@ cpp::result<bool, std::string> ModelService::StartModel(
 #undef ASSIGN_IF_PRESENT
 
     CTL_INF(json_data.toStyledString());
+    // TODO(sang) move this into another function
+    // Calculate ram/vram needed to load model
+    services::HardwareService hw_svc;
+    auto hw_info = hw_svc.GetHardwareInfo();
+    assert(!!engine_svc_);
+    auto default_engine = engine_svc_->GetDefaultEngineVariant(kLlamaEngine);
+    bool is_cuda = false;
+    if (default_engine.has_error()) {
+      CTL_INF("Could not get default engine");
+    } else {
+      auto& de = default_engine.value();
+      is_cuda = de.variant.find("cuda") != std::string::npos;
+      CTL_INF("is_cuda: " << is_cuda);
+    }
+
+    std::optional<std::string> warning;
+    if (is_cuda && !system_info_utils::IsNvidiaSmiAvailable()) {
+      CTL_INF(
+          "Running cuda variant but nvidia-driver is not installed yet, "
+          "fallback to CPU mode");
+      auto res = engine_svc_->GetInstalledEngineVariants(kLlamaEngine);
+      if (res.has_error()) {
+        CTL_WRN("Could not get engine variants");
+        return cpp::fail("Nvidia-driver is not installed!");
+      } else {
+        auto& es = res.value();
+        std::sort(
+            es.begin(), es.end(),
+            [](const EngineVariantResponse& e1,
+               const EngineVariantResponse& e2) { return e1.name > e2.name; });
+        for (auto& e : es) {
+          CTL_INF(e.name << " " << e.version << " " << e.engine);
+          // Select the first CPU candidate
+          if (e.name.find("cuda") == std::string::npos) {
+            auto r = engine_svc_->SetDefaultEngineVariant(kLlamaEngine,
+                                                          e.version, e.name);
+            if (r.has_error()) {
+              CTL_WRN("Could not set default engine variant");
+              return cpp::fail("Nvidia-driver is not installed!");
+            } else {
+              CTL_INF("Change default engine to: " << e.name);
+              auto rl = engine_svc_->LoadEngine(kLlamaEngine);
+              if (rl.has_error()) {
+                return cpp::fail("Nvidia-driver is not installed!");
+              } else {
+                CTL_INF("Engine started");
+                is_cuda = false;
+                warning = "Nvidia-driver is not installed, use CPU variant: " +
+                          e.version + "-" + e.name;
+                break;
+              }
+            }
+          }
+        }
+        // If we reach here, means that no CPU variant to fallback
+        if (!warning) {
+          return cpp::fail(
+              "Nvidia-driver is not installed, no available CPU version to "
+              "fallback");
+        }
+      }
+    }
+    // If in GPU acceleration mode:
+    // We use all visible GPUs, so only need to sum all free vram
+    auto free_vram_MiB = 0u;
+    for (const auto& gpu : hw_info.gpus) {
+      free_vram_MiB += gpu.free_vram;
+    }
+
+    auto free_ram_MiB = hw_info.ram.available_MiB;
+
+    auto const& mp = json_data["model_path"].asString();
+    auto ngl = json_data["ngl"].asInt();
+    // Bypass for now
+    auto vram_needed_MiB = 0u;
+    auto ram_needed_MiB = 0u;
+
+    if (vram_needed_MiB > free_vram_MiB && is_cuda) {
+      CTL_WRN("Not enough VRAM - " << "required: " << vram_needed_MiB
+                                   << ", available: " << free_vram_MiB);
+
+      return cpp::fail(
+          "Not enough VRAM - required: " + std::to_string(vram_needed_MiB) +
+          " MiB, available: " + std::to_string(free_vram_MiB) +
+          " MiB - Should adjust ngl to " + std::to_string(free_vram_MiB / (vram_needed_MiB / ngl) - 1));
+    }
+
+    if (ram_needed_MiB > free_ram_MiB) {
+      CTL_WRN("Not enough RAM - " << "required: " << ram_needed_MiB
+                                  << ", available: " << free_ram_MiB);
+      return cpp::fail(
+          "Not enough RAM - required: " + std::to_string(ram_needed_MiB) +
+          " MiB,, available: " + std::to_string(free_ram_MiB) + " MiB");
+    }
+
     assert(!!inference_svc_);
     auto ir =
         inference_svc_->LoadModel(std::make_shared<Json::Value>(json_data));
     auto status = std::get<0>(ir)["status_code"].asInt();
     auto data = std::get<1>(ir);
     if (status == httplib::StatusCode::OK_200) {
-      return true;
+      return StartModelResult{.success = true, .warning = warning};
     } else if (status == httplib::StatusCode::Conflict_409) {
       CTL_INF("Model '" + model_handle + "' is already loaded");
-      return true;
+      return StartModelResult{.success = true, .warning = warning};
     } else {
+      // only report to user the error
       CTL_ERR("Model failed to start with status code: " << status);
       return cpp::fail("Model failed to start: " + data["message"].asString());
     }
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index 2800606ef..47d61c154 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -6,6 +6,7 @@
 #include "config/model_config.h"
 #include "services/download_service.h"
 #include "services/inference_service.h"
+#include "common/engine_servicei.h"
 
 struct ModelPullInfo {
   std::string id;
@@ -28,6 +29,11 @@ struct StartParameterOverride {
   bool bypass_model_check() const { return mmproj.has_value(); }
 };
 
+struct StartModelResult {
+ bool success;
+ std::optional<std::string> warning;
+};
+
 class ModelService {
  public:
   explicit ModelService(std::shared_ptr<DownloadService> download_service)
@@ -35,9 +41,11 @@ class ModelService {
 
   explicit ModelService(
       std::shared_ptr<DownloadService> download_service,
-      std::shared_ptr<services::InferenceService> inference_service)
+      std::shared_ptr<services::InferenceService> inference_service,
+      std::shared_ptr<EngineServiceI> engine_svc)
       : download_service_{download_service},
-        inference_svc_(inference_service) {};
+        inference_svc_(inference_service),
+        engine_svc_(engine_svc) {};
 
   /**
    * Return model id if download successfully
@@ -63,7 +71,7 @@ class ModelService {
    */
   cpp::result<void, std::string> DeleteModel(const std::string& model_handle);
 
-  cpp::result<bool, std::string> StartModel(
+  cpp::result<StartModelResult, std::string> StartModel(
       const std::string& model_handle,
       const StartParameterOverride& params_override);
 
@@ -99,4 +107,5 @@ class ModelService {
   std::shared_ptr<DownloadService> download_service_;
   std::shared_ptr<services::InferenceService> inference_svc_;
   std::unordered_set<std::string> bypass_stop_check_set_;
+  std::shared_ptr<EngineServiceI> engine_svc_ = nullptr;
 };
diff --git a/engine/test/components/main.cc b/engine/test/components/main.cc
index 0fe7f3f26..08080680e 100644
--- a/engine/test/components/main.cc
+++ b/engine/test/components/main.cc
@@ -1,9 +1,14 @@
-#include "gtest/gtest.h"
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
+#include "gtest/gtest.h"
 
-int main(int argc, char **argv) {
-    ::testing::InitGoogleTest(&argc, argv);
-    int ret = RUN_ALL_TESTS();
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::testing::GTEST_FLAG(filter) = "-FileManagerConfigTest.*";
+  int ret = RUN_ALL_TESTS();
+  if (ret != 0)
     return ret;
+  ::testing::GTEST_FLAG(filter) = "FileManagerConfigTest.*";
+  ret = RUN_ALL_TESTS();
+  return ret;
 }
diff --git a/engine/test/components/test_hardware.cc b/engine/test/components/test_hardware.cc
new file mode 100644
index 000000000..d87beb744
--- /dev/null
+++ b/engine/test/components/test_hardware.cc
@@ -0,0 +1,198 @@
+#include "gtest/gtest.h"
+#include "utils/hardware/cpu_info.h"
+#include "utils/hardware/gpu_info.h"
+#include "utils/hardware/os_info.h"
+
+class CpuJsonTests : public ::testing::Test {
+ protected:
+  cortex::hw::CPU test_cpu;
+
+  void SetUp() override {
+    test_cpu.cores = 8;
+    test_cpu.arch = "x86_64";
+    test_cpu.model = "Intel Core i7";
+    test_cpu.instructions = {"MOV", "ADD", "SUB", "MUL"};
+  }
+};
+
+TEST_F(CpuJsonTests, ToJson_ValidCPU_Success) {
+  Json::Value json_result = cortex::hw::ToJson(test_cpu);
+
+  EXPECT_EQ(json_result["cores"].asInt(), test_cpu.cores);
+  EXPECT_EQ(json_result["arch"].asString(), test_cpu.arch);
+  EXPECT_EQ(json_result["model"].asString(), test_cpu.model);
+
+  Json::Value instructions_json = json_result["instructions"];
+  EXPECT_EQ(instructions_json.size(), test_cpu.instructions.size());
+  std::vector<std::string> insts;
+  for (auto const& v : instructions_json) {
+    insts.push_back(v.asString());
+  }
+
+  for (size_t i = 0; i < test_cpu.instructions.size(); ++i) {
+    EXPECT_EQ(insts[i], test_cpu.instructions[i]);
+  }
+}
+
+TEST_F(CpuJsonTests, FromJson_ValidJson_Success) {
+  Json::Value json_input;
+
+  json_input["cores"] = test_cpu.cores;
+  json_input["arch"] = test_cpu.arch;
+  json_input["model"] = test_cpu.model;
+
+  Json::Value instructions_json(Json::arrayValue);
+  for (const auto& instruction : test_cpu.instructions) {
+    instructions_json.append(instruction);
+  }
+
+  json_input["instructions"] = instructions_json;
+
+  cortex::hw::CPU cpu_result = cortex::hw::cpu::FromJson(json_input);
+
+  EXPECT_EQ(cpu_result.cores, test_cpu.cores);
+  EXPECT_EQ(cpu_result.arch, test_cpu.arch);
+  EXPECT_EQ(cpu_result.model, test_cpu.model);
+
+  EXPECT_EQ(cpu_result.instructions.size(), test_cpu.instructions.size());
+
+  for (size_t i = 0; i < test_cpu.instructions.size(); ++i) {
+    EXPECT_EQ(cpu_result.instructions[i], test_cpu.instructions[i]);
+  }
+}
+
+class GpuJsonTests : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // Set up a vector of GPUs for testing
+    cortex::hw::NvidiaAddInfo nvidia_info{"460.32.03", "6.1"};
+
+    test_gpus.push_back({.id = "0",
+                         .name = "NVIDIA GeForce GTX 1080",
+                         .version = "1.0",
+                         .add_info = nvidia_info,
+                         .free_vram = 4096,
+                         .total_vram = 8192,
+                         .uuid = "GPU-12345678",
+                         .is_activated = true});
+
+    test_gpus.push_back({.id = "1",
+                         .name = "NVIDIA GeForce RTX 2080",
+                         .version = "1.1",
+                         .add_info = nvidia_info,
+                         .free_vram = 6144,
+                         .total_vram = 8192,
+                         .uuid = "GPU-87654321",
+                         .is_activated = false});
+  }
+
+  std::vector<cortex::hw::GPU> test_gpus;
+};
+
+TEST_F(GpuJsonTests, ToJson_ValidGPUs_Success) {
+  Json::Value json_result = cortex::hw::ToJson(test_gpus);
+
+  EXPECT_EQ(json_result.size(), test_gpus.size());
+
+  size_t i = 0;
+  for (auto const& jr : json_result) {
+    EXPECT_EQ(jr["id"].asString(), test_gpus[i].id);
+    EXPECT_EQ(jr["name"].asString(), test_gpus[i].name);
+    EXPECT_EQ(jr["version"].asString(), test_gpus[i].version);
+
+    auto& nvidia_info =
+        std::get<cortex::hw::NvidiaAddInfo>(test_gpus[i].add_info);
+
+    EXPECT_EQ(jr["additional_information"]["driver_version"].asString(),
+              nvidia_info.driver_version);
+    EXPECT_EQ(jr["additional_information"]["compute_cap"].asString(),
+              nvidia_info.compute_cap);
+
+    EXPECT_EQ(jr["free_vram"].asInt64(), test_gpus[i].free_vram);
+    EXPECT_EQ(jr["total_vram"].asInt64(), test_gpus[i].total_vram);
+    EXPECT_EQ(jr["uuid"].asString(), test_gpus[i].uuid);
+    EXPECT_EQ(jr["activated"].asBool(), test_gpus[i].is_activated);
+    i++;
+  }
+}
+
+TEST_F(GpuJsonTests, FromJson_ValidJson_Success) {
+  Json::Value json_input(Json::arrayValue);
+
+  for (const auto& gpu : test_gpus) {
+    Json::Value gpu_json;
+
+    gpu_json["id"] = gpu.id;
+    gpu_json["name"] = gpu.name;
+    gpu_json["version"] = gpu.version;
+
+    cortex::hw::NvidiaAddInfo nvidia_info =
+        std::get<cortex::hw::NvidiaAddInfo>(gpu.add_info);
+
+    Json::Value add_info_json;
+    add_info_json["driver_version"] = nvidia_info.driver_version;
+    add_info_json["compute_cap"] = nvidia_info.compute_cap;
+
+    gpu_json["additional_information"] = add_info_json;
+
+    gpu_json["free_vram"] = gpu.free_vram;
+    gpu_json["total_vram"] = gpu.total_vram;
+    gpu_json["uuid"] = gpu.uuid;
+    gpu_json["activated"] = gpu.is_activated;
+
+    json_input.append(gpu_json);
+  }
+
+  auto result_gpus = cortex::hw::gpu::FromJson(json_input);
+
+  EXPECT_EQ(result_gpus.size(), test_gpus.size());
+
+  for (size_t i = 0; i < test_gpus.size(); ++i) {
+    EXPECT_EQ(result_gpus[i].id, test_gpus[i].id);
+    EXPECT_EQ(result_gpus[i].name, test_gpus[i].name);
+    EXPECT_EQ(result_gpus[i].version, test_gpus[i].version);
+
+    auto& nvidia_info_result =
+        std::get<cortex::hw::NvidiaAddInfo>(result_gpus[i].add_info);
+    auto& nvidia_info_test =
+        std::get<cortex::hw::NvidiaAddInfo>(test_gpus[i].add_info);
+
+    EXPECT_EQ(nvidia_info_result.driver_version,
+              nvidia_info_test.driver_version);
+    EXPECT_EQ(nvidia_info_result.compute_cap, nvidia_info_test.compute_cap);
+
+    EXPECT_EQ(result_gpus[i].free_vram, test_gpus[i].free_vram);
+    EXPECT_EQ(result_gpus[i].total_vram, test_gpus[i].total_vram);
+    EXPECT_EQ(result_gpus[i].uuid, test_gpus[i].uuid);
+    EXPECT_EQ(result_gpus[i].is_activated, test_gpus[i].is_activated);
+  }
+}
+
+class OsJsonTests : public ::testing::Test {
+protected:
+    cortex::hw::OS test_os;
+
+    void SetUp() override {
+        test_os.name = "Ubuntu";
+        test_os.version = "20.04";
+        test_os.arch = "x86_64";
+    }
+};
+
+TEST_F(OsJsonTests, ToJson_ValidOS_Success) {
+    Json::Value json_result = cortex::hw::ToJson(test_os);
+
+    EXPECT_EQ(json_result["name"].asString(), test_os.name);
+    EXPECT_EQ(json_result["version"].asString(), test_os.version);
+}
+
+TEST_F(OsJsonTests, FromJson_ValidJson_Success) {
+    Json::Value json_input;
+    json_input["name"] = test_os.name;
+    json_input["version"] = test_os.version;
+
+    cortex::hw::OS os_result = cortex::hw::os::FromJson(json_input);
+
+    EXPECT_EQ(os_result.name, test_os.name);
+    EXPECT_EQ(os_result.version, test_os.version);
+}
\ No newline at end of file
diff --git a/engine/test/components/test_huggingface_utils.cc b/engine/test/components/test_huggingface_utils.cc
index 236c904af..8377200e5 100644
--- a/engine/test/components/test_huggingface_utils.cc
+++ b/engine/test/components/test_huggingface_utils.cc
@@ -16,7 +16,7 @@ TEST_F(HuggingFaceUtilTestSuite, TestGetModelRepositoryBranches) {
   EXPECT_EQ(branches.value()["gguf"].ref, "refs/heads/gguf");
 }
 
-
+// TODO(sang) re-enable when main branch is fixed
 TEST_F(HuggingFaceUtilTestSuite, DISABLED_TestGetHuggingFaceModelRepoInfoSuccessfully) {
   auto model_info =
       huggingface_utils::GetHuggingFaceModelRepoInfo("cortexso", "tinyllama");
diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h
index 5e62661ba..2d250df72 100644
--- a/engine/utils/cortex_utils.h
+++ b/engine/utils/cortex_utils.h
@@ -10,15 +10,8 @@
 #include <regex>
 #include <string>
 #include <vector>
-
-// Include platform-specific headers
-#ifdef _WIN32
-#include <direct.h>
-#include <windows.h>
-#include <winsock2.h>
-#define mkdir _mkdir
-#else
-#include <dirent.h>
+#if defined(__linux__)
+#include <limits.h>
 #include <unistd.h>
 #endif
 
@@ -31,208 +24,6 @@ inline std::string logs_folder = "./logs";
 inline std::string logs_base_name = "./logs/cortex.log";
 inline std::string logs_cli_base_name = "./logs/cortex-cli.log";
 
-inline std::string extractBase64(const std::string& input) {
-  std::regex pattern("base64,(.*)");
-  std::smatch match;
-
-  if (std::regex_search(input, match, pattern)) {
-    std::string base64_data = match[1];
-    base64_data = base64_data.substr(0, base64_data.length() - 1);
-    return base64_data;
-  }
-
-  return "";
-}
-
-// Helper function to encode data to Base64
-inline std::string base64Encode(const std::vector<unsigned char>& data) {
-  static const char encodingTable[] =
-      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-  std::string encodedData;
-  int i = 0;
-  int j = 0;
-  unsigned char array3[3];
-  unsigned char array4[4];
-
-  for (unsigned char c : data) {
-    array3[i++] = c;
-    if (i == 3) {
-      array4[0] = (array3[0] & 0xfc) >> 2;
-      array4[1] = ((array3[0] & 0x03) << 4) + ((array3[1] & 0xf0) >> 4);
-      array4[2] = ((array3[1] & 0x0f) << 2) + ((array3[2] & 0xc0) >> 6);
-      array4[3] = array3[2] & 0x3f;
-
-      for (i = 0; i < 4; i++)
-        encodedData += encodingTable[array4[i]];
-      i = 0;
-    }
-  }
-
-  if (i) {
-    for (j = i; j < 3; j++)
-      array3[j] = '\0';
-
-    array4[0] = (array3[0] & 0xfc) >> 2;
-    array4[1] = ((array3[0] & 0x03) << 4) + ((array3[1] & 0xf0) >> 4);
-    array4[2] = ((array3[1] & 0x0f) << 2) + ((array3[2] & 0xc0) >> 6);
-
-    for (j = 0; j < i + 1; j++)
-      encodedData += encodingTable[array4[j]];
-
-    while (i++ < 3)
-      encodedData += '=';
-  }
-
-  return encodedData;
-}
-
-// Function to load an image and convert it to Base64
-inline std::string imageToBase64(const std::string& imagePath) {
-  std::ifstream imageFile(imagePath, std::ios::binary);
-  if (!imageFile.is_open()) {
-    throw std::runtime_error("Could not open the image file.");
-  }
-
-  std::vector<unsigned char> buffer(std::istreambuf_iterator<char>(imageFile),
-                                    {});
-  return base64Encode(buffer);
-}
-
-inline void processLocalImage(
-    const std::string& localPath,
-    std::function<void(const std::string&)> callback) {
-  try {
-    std::string base64Image = imageToBase64(localPath);
-    callback(base64Image);  // Invoke the callback with the Base64 string
-  } catch (const std::exception& e) {
-    std::cerr << "Error during processing: " << e.what() << std::endl;
-  }
-}
-
-inline std::vector<std::string> listFilesInDir(const std::string& path) {
-  std::vector<std::string> files;
-
-#ifdef _WIN32
-  // Windows-specific code
-  WIN32_FIND_DATA findFileData;
-  HANDLE hFind = FindFirstFile((path + "\\*").c_str(), &findFileData);
-
-  if (hFind != INVALID_HANDLE_VALUE) {
-    do {
-      if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
-        files.push_back(findFileData.cFileName);
-      }
-    } while (FindNextFile(hFind, &findFileData) != 0);
-    FindClose(hFind);
-  }
-#else
-  // POSIX-specific code (Linux, Unix, MacOS)
-  DIR* dir;
-  struct dirent* ent;
-
-  if ((dir = opendir(path.c_str())) != NULL) {
-    while ((ent = readdir(dir)) != NULL) {
-      if (ent->d_type == DT_REG) {  // Check if it's a regular file
-        files.push_back(ent->d_name);
-      }
-    }
-    closedir(dir);
-  }
-#endif
-
-  return files;
-}
-
-inline std::string generate_random_string(std::size_t length) {
-  const std::string characters =
-      "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-
-  std::random_device rd;
-  std::mt19937 generator(rd());
-
-  std::uniform_int_distribution<> distribution(
-      0, static_cast<int>(characters.size()) - 1);
-
-  std::string random_string(length, '\0');
-  std::generate_n(random_string.begin(), length,
-                  [&]() { return characters[distribution(generator)]; });
-
-  return random_string;
-}
-
-#if (defined(__GNUC__) || defined(__clang__)) && \
-    (defined(__x86_64__) || defined(__i386__))
-#include <cpuid.h>
-inline bool isAVX2Supported() {
-  unsigned eax, ebx, ecx, edx;
-  if (__get_cpuid_max(0, nullptr) < 7)
-    return false;
-
-  __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
-  return (ebx & (1 << 5)) != 0;
-}
-#elif defined(_MSC_VER) && defined(_M_X64) || defined(_M_IX86)
-#include <intrin.h>
-inline bool isAVX2Supported() {
-  int cpuInfo[4];
-  __cpuid(cpuInfo, 0);
-  int nIds = cpuInfo[0];
-  if (nIds >= 7) {
-    __cpuidex(cpuInfo, 7, 0);
-    return (cpuInfo[1] & (1 << 5)) != 0;
-  }
-  return false;
-}
-#else
-inline bool isAVX2Supported() {
-  return false;
-}
-#endif
-
-inline void nitro_logo() {
-  std::string rainbowColors[] = {
-      "\033[93m",  // Yellow
-      "\033[94m",  // Blue
-  };
-
-  std::string resetColor = "\033[0m";
-  std::string asciiArt =
-      "      ___                                   ___           ___     \n"
-      "     /__/        ___           ___        /  /\\         /  /\\    \n"
-      "     \\  \\:\\      /  /\\         /  /\\      /  /::\\       /  /::\\  "
-      " \n"
-      "      \\  \\:\\    /  /:/        /  /:/     /  /:/\\:\\     /  /:/\\:\\ "
-      " \n"
-      "  _____\\__\\:\\  /__/::\\       /  /:/     /  /:/  \\:\\   /  /:/  "
-      "\\:\\ \n"
-      " /__/::::::::\\ \\__\\/\\:\\__   /  /::\\    /__/:/ /:/___ /__/:/ "
-      "\\__\\:\\\n"
-      " \\  \\:\\~~\\~~\\/    \\  \\:\\/\\ /__/:/\\:\\   \\  \\:\\/:::::/ \\  "
-      "\\:\\ /  /:/\n"
-      "  \\  \\:\\  ~~~      \\__\\::/ \\__\\/  \\:\\   \\  \\::/~~~~   \\  "
-      "\\:\\  /:/ \n"
-      "   \\  \\:\\          /__/:/       \\  \\:\\   \\  \\:\\        \\  "
-      "\\:\\/:/  \n"
-      "    \\  \\:\\         \\__\\/         \\__\\/    \\  \\:\\        \\  "
-      "\\::/   \n"
-      "     \\__\\/                                 \\__\\/         \\__\\/    "
-      "\n";
-
-  int colorIndex = 0;
-
-  for (char c : asciiArt) {
-    if (c == '\n') {
-      std::cout << resetColor << c;
-      colorIndex = 0;
-    } else {
-      std::cout << rainbowColors[colorIndex % 2] << c;
-      colorIndex++;
-    }
-  }
-
-  std::cout << resetColor;  // Reset color at the endreturn;
-}
-
 inline drogon::HttpResponsePtr CreateCortexHttpResponse() {
   return drogon::HttpResponse::newHttpResponse();
 }
diff --git a/engine/utils/cpuid/cpu_info.cc b/engine/utils/cpuid/cpu_info.cc
index 538221536..3d4a56ffc 100644
--- a/engine/utils/cpuid/cpu_info.cc
+++ b/engine/utils/cpuid/cpu_info.cc
@@ -173,9 +173,9 @@ std::string CpuInfo::to_string() {
   s += "avx512_er = " + get(impl->has_avx512_er) + "| ";
   s += "avx512_cd = " + get(impl->has_avx512_cd) + "| ";
   s += "avx512_bw = " + get(impl->has_avx512_bw) + "| ";
-  s += "has_avx512_vl = " + get(impl->has_avx512_vl) + "| ";
-  s += "has_avx512_vbmi = " + get(impl->has_avx512_vbmi) + "| ";
-  s += "has_avx512_vbmi2 = " + get(impl->has_avx512_vbmi2) + "| ";
+  s += "avx512_vl = " + get(impl->has_avx512_vl) + "| ";
+  s += "avx512_vbmi = " + get(impl->has_avx512_vbmi) + "| ";
+  s += "avx512_vbmi2 = " + get(impl->has_avx512_vbmi2) + "| ";
   s += "avx512_vnni = " + get(impl->has_avx512_vnni) + "| ";
   s += "avx512_bitalg = " + get(impl->has_avx512_bitalg) + "| ";
   s += "avx512_vpopcntdq = " + get(impl->has_avx512_vpopcntdq) + "| ";
@@ -187,4 +187,43 @@ std::string CpuInfo::to_string() {
   return s;
 }
 
-}  // namespace cpuid
+std::vector<std::string> CpuInfo::instructions() {
+  std::vector<std::string> res;
+#define ADD_FEATURE_IF_PRESENT(feature_name) \
+  if (impl->has_##feature_name)              \
+    res.emplace_back(#feature_name);
+
+  ADD_FEATURE_IF_PRESENT(fpu);
+  ADD_FEATURE_IF_PRESENT(mmx);
+  ADD_FEATURE_IF_PRESENT(sse);
+  ADD_FEATURE_IF_PRESENT(sse2);
+  ADD_FEATURE_IF_PRESENT(sse3);
+  ADD_FEATURE_IF_PRESENT(ssse3);
+  ADD_FEATURE_IF_PRESENT(sse4_1);
+  ADD_FEATURE_IF_PRESENT(sse4_2);
+  ADD_FEATURE_IF_PRESENT(pclmulqdq);
+  ADD_FEATURE_IF_PRESENT(avx);
+  ADD_FEATURE_IF_PRESENT(avx2);
+  ADD_FEATURE_IF_PRESENT(avx512_f);
+  ADD_FEATURE_IF_PRESENT(avx512_dq);
+  ADD_FEATURE_IF_PRESENT(avx512_ifma);
+  ADD_FEATURE_IF_PRESENT(avx512_pf);
+  ADD_FEATURE_IF_PRESENT(avx512_er);
+  ADD_FEATURE_IF_PRESENT(avx512_cd);
+  ADD_FEATURE_IF_PRESENT(avx512_bw);
+  ADD_FEATURE_IF_PRESENT(avx512_vl);
+  ADD_FEATURE_IF_PRESENT(avx512_vbmi);
+  ADD_FEATURE_IF_PRESENT(avx512_vbmi2);
+  ADD_FEATURE_IF_PRESENT(avx512_vnni);
+  ADD_FEATURE_IF_PRESENT(avx512_bitalg);
+  ADD_FEATURE_IF_PRESENT(avx512_vpopcntdq);
+  ADD_FEATURE_IF_PRESENT(avx512_4vnniw);
+  ADD_FEATURE_IF_PRESENT(avx512_4fmaps);
+  ADD_FEATURE_IF_PRESENT(avx512_vp2intersect);
+  ADD_FEATURE_IF_PRESENT(aes);
+  ADD_FEATURE_IF_PRESENT(f16c);
+#undef ADD_FEATURE_IF_PRESENT
+  return res;
+}
+
+}  // namespace cortex::cpuid
diff --git a/engine/utils/cpuid/cpu_info.h b/engine/utils/cpuid/cpu_info.h
index 384d0d6f0..fcdf82bd0 100644
--- a/engine/utils/cpuid/cpu_info.h
+++ b/engine/utils/cpuid/cpu_info.h
@@ -5,6 +5,7 @@
 
 #include <memory>
 #include <string>
+#include <vector>
 
 namespace cortex::cpuid {
 /// The CpuInfo object extract information about which, if any, additional
@@ -120,6 +121,8 @@ class CpuInfo {
 
   std::string to_string();
 
+  std::vector<std::string> instructions();
+
  public:
   /// Private implementation
   struct Impl;
diff --git a/engine/utils/hardware/cpu_info.h b/engine/utils/hardware/cpu_info.h
new file mode 100644
index 000000000..4c2cb3027
--- /dev/null
+++ b/engine/utils/hardware/cpu_info.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <json/json.h>
+#include <string>
+#include <string_view>
+#include <vector>
+#include "common/hardware_common.h"
+#include "hwinfo/hwinfo.h"
+#include "utils/cpuid/cpu_info.h"
+
+namespace cortex::hw {
+inline CPU GetCPUInfo() {
+  auto cpu = hwinfo::getAllCPUs()[0];
+  cortex::cpuid::CpuInfo inst;
+  return CPU{.cores = cpu.numPhysicalCores(),
+             .arch = std::string(GetArch()),
+             .model = cpu.modelName(),
+             .instructions = inst.instructions()};
+}
+}  // namespace cortex::hw
\ No newline at end of file
diff --git a/engine/utils/hardware/gpu_info.h b/engine/utils/hardware/gpu_info.h
new file mode 100644
index 000000000..bbd4a49d6
--- /dev/null
+++ b/engine/utils/hardware/gpu_info.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "common/hardware_common.h"
+#include "hwinfo/hwinfo.h"
+#include "utils/system_info_utils.h"
+
+namespace cortex::hw {
+
+inline std::vector<GPU> GetGPUInfo() {
+  std::vector<GPU> res;
+  // Only support for nvidia for now
+  // auto gpus = hwinfo::getAllGPUs();
+  auto nvidia_gpus = system_info_utils::GetGpuInfoList();
+  auto cuda_version = system_info_utils::GetCudaVersion();
+  for (auto& n : nvidia_gpus) {
+    res.emplace_back(
+        GPU{.id = n.id,
+            .name = n.name,
+            .version = cuda_version,
+            .add_info =
+                NvidiaAddInfo{
+                    .driver_version = n.driver_version.value_or("unknown"),
+                    .compute_cap = n.compute_cap.value_or("unknown")},
+            .free_vram = std::stoi(n.vram_free),
+            .total_vram = std::stoi(n.vram_total),
+            .uuid = n.uuid});
+  }
+  return res;
+}
+}  // namespace cortex::hw
\ No newline at end of file
diff --git a/engine/utils/hardware/os_info.h b/engine/utils/hardware/os_info.h
new file mode 100644
index 000000000..a87d448f5
--- /dev/null
+++ b/engine/utils/hardware/os_info.h
@@ -0,0 +1,15 @@
+#pragma once
+#include <json/json.h>
+#include <string>
+#include "common/hardware_common.h"
+#include "hwinfo/hwinfo.h"
+
+namespace cortex::hw {
+
+inline OS GetOSInfo() {
+  hwinfo::OS os;
+  return OS{.name = os.name(),
+            .version = os.version(),
+            .arch = os.is32bit() ? "32 bit" : "64 bit"};
+}
+}  // namespace cortex::hw
\ No newline at end of file
diff --git a/engine/utils/hardware/power_info.h b/engine/utils/hardware/power_info.h
new file mode 100644
index 000000000..d18cfd736
--- /dev/null
+++ b/engine/utils/hardware/power_info.h
@@ -0,0 +1,10 @@
+#pragma once
+#include <json/json.h>
+#include <string>
+#include "common/hardware_common.h"
+
+namespace cortex::hw {
+inline PowerInfo GetPowerInfo() {
+  return PowerInfo{};
+}
+}  // namespace hardware
\ No newline at end of file
diff --git a/engine/utils/hardware/ram_info.h b/engine/utils/hardware/ram_info.h
new file mode 100644
index 000000000..1ee4a55f7
--- /dev/null
+++ b/engine/utils/hardware/ram_info.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <json/json.h>
+#include <string>
+#include "common/hardware_common.h"
+#include "hwinfo/hwinfo.h"
+
+#if defined(__APPLE__) && defined(__MACH__)
+#include <mach/host_info.h>
+#include <mach/mach_host.h>
+#include <sys/sysctl.h>
+#endif
+
+namespace cortex::hw {
+
+inline Memory GetMemoryInfo() {
+  hwinfo::Memory m;
+#if defined(__APPLE__) && defined(__MACH__)
+  int64_t total_memory = 0;
+  int64_t used_memory = 0;
+
+  size_t length = sizeof(total_memory);
+  sysctlbyname("hw.memsize", &total_memory, &length, NULL, 0);
+
+  // Get used memory (this is a rough estimate)
+  vm_size_t page_size;
+  mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
+
+  vm_statistics_data_t vm_stat;
+  host_page_size(mach_host_self(), &page_size);
+
+  if (host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&vm_stat,
+                      &count) == KERN_SUCCESS) {
+    used_memory =
+        (vm_stat.active_count + vm_stat.inactive_count + vm_stat.wire_count) *
+        page_size;
+  }
+  return Memory{.total_MiB = ByteToMiB(total_memory),
+                .available_MiB = ByteToMiB(total_memory - used_memory)};
+#elif defined(__linux__) || defined(_WIN32)
+  return Memory{.total_MiB = ByteToMiB(m.total_Bytes()),
+                .available_MiB = ByteToMiB(m.available_Bytes())};
+#else
+  return Memory{};
+#endif
+}
+}  // namespace cortex::hw
\ No newline at end of file
diff --git a/engine/utils/hardware/storage_info.h b/engine/utils/hardware/storage_info.h
new file mode 100644
index 000000000..743d2949a
--- /dev/null
+++ b/engine/utils/hardware/storage_info.h
@@ -0,0 +1,9 @@
+#pragma once
+#include <string>
+#include "common/hardware_common.h"
+
+namespace cortex::hw {
+inline StorageInfo GetStorageInfo() {
+  return StorageInfo{};
+}
+}  // namespace cortex::hw
\ No newline at end of file
diff --git a/engine/utils/logging_utils.h b/engine/utils/logging_utils.h
index c656fd607..2c5affcd4 100644
--- a/engine/utils/logging_utils.h
+++ b/engine/utils/logging_utils.h
@@ -32,22 +32,27 @@ inline bool is_server = false;
   }
 
 namespace logging_utils_helper {
-inline void SetLogLevel(const std::string& log_level) {
+inline void SetLogLevel(const std::string& log_level, bool ignore_cout) {
   if (log_level == "TRACE") {
     trantor::Logger::setLogLevel(trantor::Logger::kTrace);
-    std::cout << "Set log level to TRACE" << std::endl;
+    if (!ignore_cout)
+      std::cout << "Set log level to TRACE" << std::endl;
   } else if (log_level == "DEBUG") {
     trantor::Logger::setLogLevel(trantor::Logger::kDebug);
-    std::cout << "Set log level to DEBUG" << std::endl;
+    if (!ignore_cout)
+      std::cout << "Set log level to DEBUG" << std::endl;
   } else if (log_level == "INFO") {
     trantor::Logger::setLogLevel(trantor::Logger::kInfo);
-    std::cout << "Set log level to INFO" << std::endl;
+    if (!ignore_cout)
+      std::cout << "Set log level to INFO" << std::endl;
   } else if (log_level == "WARN") {
     trantor::Logger::setLogLevel(trantor::Logger::kWarn);
-    std::cout << "Set log level to WARN" << std::endl;
+    if (!ignore_cout)
+      std::cout << "Set log level to WARN" << std::endl;
   } else if (log_level == "ERROR") {
     trantor::Logger::setLogLevel(trantor::Logger::kError);
-    std::cout << "Set log level to ERROR" << std::endl;
+    if (!ignore_cout)
+      std::cout << "Set log level to ERROR" << std::endl;
   } else {
     std::cerr << "Invalid log level: " << log_level
               << ", loglevel must be (TRACE, DEBUG, INFO, WARN or ERROR)"
diff --git a/engine/utils/scope_exit.h b/engine/utils/scope_exit.h
index d79d0951f..9f7516596 100644
--- a/engine/utils/scope_exit.h
+++ b/engine/utils/scope_exit.h
@@ -1,6 +1,6 @@
 #pragma once
 
-namespace utils {
+namespace cortex::utils {
 template <typename F>
 struct ScopeExit {
   ScopeExit(F&& f) : f_(std::forward<F>(f)) {}
@@ -12,4 +12,4 @@ template <typename F>
 ScopeExit<F> makeScopeExit(F&& f) {
   return ScopeExit<F>(std::forward<F>(f));
 };
-}  // namespace utils
\ No newline at end of file
+}  // namespace cortex::utils
\ No newline at end of file
diff --git a/engine/utils/string_utils.h b/engine/utils/string_utils.h
index 264d04025..02d309169 100644
--- a/engine/utils/string_utils.h
+++ b/engine/utils/string_utils.h
@@ -15,6 +15,11 @@ struct ParsePromptResult {
   std::string ai_prompt;
 };
 
+inline std::string RTrim(const std::string& str) {
+  size_t end = str.find_last_not_of("\n\t ");
+  return (end == std::string::npos) ? "" : str.substr(0, end + 1);
+}
+
 inline void Trim(std::string& s) {
   s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
             return !std::isspace(ch);
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index 01c105422..6183c3095 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -2,6 +2,7 @@
 
 #include <trantor/utils/Logger.h>
 #include <memory>
+#include <optional>
 #include <regex>
 #include <sstream>
 #include <vector>
@@ -18,10 +19,10 @@ constexpr static auto kUnsupported{"Unsupported"};
 constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"};
 constexpr static auto kDriverVersionRegex{R"(Driver Version:\s*(\d+\.\d+))"};
 constexpr static auto kGpuQueryCommand{
-    "nvidia-smi --query-gpu=index,memory.total,name,compute_cap "
+    "nvidia-smi --query-gpu=index,memory.total,memory.free,name,compute_cap,uuid "
     "--format=csv,noheader,nounits"};
 constexpr static auto kGpuInfoRegex{
-    R"((\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+))"};
+    R"((\d+),\s*(\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+),\s*([^\n,]+))"};
 
 struct SystemInfo {
   explicit SystemInfo(std::string os, std::string arch)
@@ -151,13 +152,15 @@ inline std::string GetCudaVersion() {
 
 struct GpuInfo {
   std::string id;
-  std::string vram;
+  std::string vram_total;
+  std::string vram_free;
   std::string name;
   std::string arch;
   // nvidia driver version. Haven't checked for AMD GPU.
   std::optional<std::string> driver_version;
   std::optional<std::string> cuda_driver_version;
   std::optional<std::string> compute_cap;
+  std::string uuid;
 };
 
 inline std::vector<GpuInfo> GetGpuInfoListVulkan() {
@@ -203,7 +206,7 @@ inline std::vector<GpuInfo> GetGpuInfoListVulkan() {
         else if (key == "apiVersion")
           gpuInfo.compute_cap = value;
 
-        gpuInfo.vram = "";  // not available
+        gpuInfo.vram_total = "";  // not available
         gpuInfo.arch = GetGpuArch(gpuInfo.name);
 
         ++field_iter;
@@ -221,7 +224,8 @@ inline std::vector<GpuInfo> GetGpuInfoListVulkan() {
 
 inline std::vector<GpuInfo> GetGpuInfoList() {
   std::vector<GpuInfo> gpuInfoList;
-
+  if (!IsNvidiaSmiAvailable())
+    return gpuInfoList;
   try {
     // TODO: improve by parsing both in one command execution
     auto driver_version = GetDriverVersion();
@@ -238,12 +242,14 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
         std::regex_search(search_start, output.cend(), match, gpu_info_reg)) {
       GpuInfo gpuInfo = {
           match[1].str(),              // id
-          match[2].str(),              // vram
-          match[3].str(),              // name
-          GetGpuArch(match[3].str()),  // arch
+          match[2].str(),              // vram_total
+          match[3].str(),              // vram_free
+          match[4].str(),              // name
+          GetGpuArch(match[4].str()),  // arch
           driver_version,              // driver_version
           cuda_version,                // cuda_driver_version
-          match[4].str()               // compute_cap
+          match[5].str(),              // compute_cap
+          match[6].str()               // uuid  
       };
       gpuInfoList.push_back(gpuInfo);
       search_start = match.suffix().first;
diff --git a/engine/vcpkg.json b/engine/vcpkg.json
index 64e6f6d26..09ddb3368 100644
--- a/engine/vcpkg.json
+++ b/engine/vcpkg.json
@@ -16,6 +16,7 @@
     "eventpp",
     "sqlitecpp",
     "trantor",
-    "indicators"
+    "indicators",
+    "lfreist-hwinfo"
   ]
 }