Skip to content

Commit

Permalink
Merge branch 'dev' of github.com:janhq/cortex.cpp into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenhoangthuan99 committed Nov 11, 2024
2 parents f648f63 + f354af6 commit 65547d5
Show file tree
Hide file tree
Showing 24 changed files with 220 additions and 92 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/cortex-cpp-quality-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,41 @@ jobs:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

build-docker-and-test:
runs-on: ubuntu-latest
steps:
- name: Getting the repo
uses: actions/checkout@v3
with:
submodules: 'recursive'

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Run Docker
run: |
docker build -t menloltd/cortex:test -f docker/Dockerfile .
docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
- name: use python
uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Run e2e tests
run: |
cd engine
python -m pip install --upgrade pip
python -m pip install -r e2e-test/requirements.txt
pytest e2e-test/test_api_docker.py
- name: Run Docker
continue-on-error: true
if: always()
run: |
docker stop cortex
docker rm cortex
6 changes: 3 additions & 3 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

# Install cortex.llamacpp engine

echo "apiServerHost: 0.0.0.0" > /root/.cortexrc
echo "enableCors: true" >> /root/.cortexrc

cortex engines install llama-cpp -s /opt/cortex.llamacpp
cortex -v

# Start the cortex server

sed -i 's/apiServerHost: 127.0.0.1/apiServerHost: 0.0.0.0/' /root/.cortexrc

cortex start

# Keep the container running by tailing the log files
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/capabilities/models/sources/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ curl --request POST \
Clients can abort a downloading task using the task ID. Below is a sample `curl` command to abort a download task:

```sh
curl --location --request DELETE 'http://127.0.0.1:3928/models/pull' \
curl --location --request DELETE 'http://127.0.0.1:39281/v1/models/pull' \
--header 'Content-Type: application/json' \
--data '{
"taskId": "tinyllama:1b-gguf-q2-k"
Expand Down
20 changes: 4 additions & 16 deletions docs/static/openapi/cortex.json
Original file line number Diff line number Diff line change
Expand Up @@ -3595,31 +3595,19 @@
"type": "string",
"description": "The identifier or URL of the model to use. It can be a model ID on Cortexso (https://huggingface.co/cortexso) or a HuggingFace URL pointing to the model file. For example: 'gpt2' or 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf'",
"examples": [
{
"value": "tinyllama:gguf"
},
{
"value": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
}
"tinyllama:gguf",
"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/blob/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
]
},
"id": {
"type": "string",
"description": "The id which will be used to register the model.",
"examples": [
{
"value": "my-custom-model-id"
}
]
"examples": "my-custom-model-id"
},
"name": {
"type": "string",
"description": "The name which will be used to overwrite the model name.",
"examples": [
{
"value": "my-custom-model-name"
}
]
"examples": "my-custom-model-name"
}
}
},
Expand Down
26 changes: 11 additions & 15 deletions engine/cli/utils/download_progress.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,12 @@ bool DownloadProgress::Handle(const DownloadType& event_type) {
}
}
#endif
std::unordered_map<std::string, uint64_t> totals;
status_ = DownloadStatus::DownloadStarted;
std::unique_ptr<indicators::DynamicProgress<indicators::ProgressBar>> bars;

std::vector<std::unique_ptr<indicators::ProgressBar>> items;
indicators::show_console_cursor(false);
auto handle_message = [this, &bars, &items, &totals,
auto handle_message = [this, &bars, &items,
event_type](const std::string& message) {
CTL_INF(message);

Expand Down Expand Up @@ -98,27 +97,24 @@ bool DownloadProgress::Handle(const DownloadType& event_type) {
}
for (int i = 0; i < ev.download_task_.items.size(); i++) {
auto& it = ev.download_task_.items[i];
uint64_t downloaded = it.downloadedBytes.value_or(0);
if (totals.find(it.id) == totals.end()) {
totals[it.id] = it.bytes.value_or(std::numeric_limits<uint64_t>::max());
CTL_INF("Updated " << it.id << " - total: " << totals[it.id]);
}

if (ev.type_ == DownloadStatus::DownloadStarted ||
ev.type_ == DownloadStatus::DownloadUpdated) {
if (ev.type_ == DownloadStatus::DownloadUpdated) {
uint64_t downloaded = it.downloadedBytes.value_or(0u);
uint64_t total =
it.bytes.value_or(std::numeric_limits<uint64_t>::max());
(*bars)[i].set_option(indicators::option::PrefixText{
pad_string(Repo2Engine(it.id)) +
std::to_string(
int(static_cast<double>(downloaded) / totals[it.id] * 100)) +
std::to_string(int(static_cast<double>(downloaded) / total * 100)) +
'%'});
(*bars)[i].set_progress(
int(static_cast<double>(downloaded) / totals[it.id] * 100));
int(static_cast<double>(downloaded) / total * 100));
(*bars)[i].set_option(indicators::option::PostfixText{
format_utils::BytesToHumanReadable(downloaded) + "/" +
format_utils::BytesToHumanReadable(totals[it.id])});
format_utils::BytesToHumanReadable(total)});
} else if (ev.type_ == DownloadStatus::DownloadSuccess) {
uint64_t total =
it.bytes.value_or(std::numeric_limits<uint64_t>::max());
(*bars)[i].set_progress(100);
auto total_str = format_utils::BytesToHumanReadable(totals[it.id]);
auto total_str = format_utils::BytesToHumanReadable(total);
(*bars)[i].set_option(
indicators::option::PostfixText{total_str + "/" + total_str});
(*bars)[i].set_option(indicators::option::PrefixText{
Expand Down
4 changes: 2 additions & 2 deletions engine/controllers/configs.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ class Configs : public drogon::HttpController<Configs, false> {
METHOD_LIST_BEGIN

METHOD_ADD(Configs::GetConfigurations, "", Get);
METHOD_ADD(Configs::UpdateConfigurations, "", Patch);
METHOD_ADD(Configs::UpdateConfigurations, "", Options, Patch);

ADD_METHOD_TO(Configs::GetConfigurations, "/v1/configs", Get);
ADD_METHOD_TO(Configs::UpdateConfigurations, "/v1/configs", Patch);
ADD_METHOD_TO(Configs::UpdateConfigurations, "/v1/configs", Options, Patch);

METHOD_LIST_END

Expand Down
20 changes: 11 additions & 9 deletions engine/controllers/engines.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ class Engines : public drogon::HttpController<Engines, false> {
METHOD_LIST_BEGIN

METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get);
METHOD_ADD(Engines::InstallEngine, "/{1}?version={2}&variant={3}", Post);
METHOD_ADD(Engines::InstallEngine, "/{1}?version={2}&variant={3}", Options,
Post);
METHOD_ADD(Engines::UninstallEngine, "/{1}?version={2}&variant={3}", Options,
Delete);
METHOD_ADD(Engines::SetDefaultEngineVariant,
"/{1}/default?version={2}&variant={3}", Post);
"/{1}/default?version={2}&variant={3}", Options, Post);
METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get);

METHOD_ADD(Engines::LoadEngine, "/{1}/load", Post);
METHOD_ADD(Engines::LoadEngine, "/{1}/load", Options, Post);
METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Options, Delete);
METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Post);
METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Options, Post);
METHOD_ADD(Engines::ListEngine, "", Get);

METHOD_ADD(Engines::GetEngineVersions, "/{1}/versions", Get);
Expand All @@ -31,17 +32,18 @@ class Engines : public drogon::HttpController<Engines, false> {

ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get);
ADD_METHOD_TO(Engines::InstallEngine,
"/v1/engines/{1}?version={2}&variant={3}", Post);
"/v1/engines/{1}?version={2}&variant={3}", Options, Post);
ADD_METHOD_TO(Engines::UninstallEngine,
"/v1/engines/{1}?version={2}&variant={3}", Options, Delete);
ADD_METHOD_TO(Engines::SetDefaultEngineVariant,
"/v1/engines/{1}/default?version={2}&variant={3}", Post);
"/v1/engines/{1}/default?version={2}&variant={3}", Options,
Post);
ADD_METHOD_TO(Engines::GetDefaultEngineVariant, "/v1/engines/{1}/default",
Get);

ADD_METHOD_TO(Engines::LoadEngine, "/v1/engines/{1}/load", Post);
ADD_METHOD_TO(Engines::UnloadEngine, "/v1/engines/{1}/load", Post);
ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Post);
ADD_METHOD_TO(Engines::LoadEngine, "/v1/engines/{1}/load", Options, Post);
ADD_METHOD_TO(Engines::UnloadEngine, "/v1/engines/{1}/load", Options, Post);
ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Options, Post);
ADD_METHOD_TO(Engines::GetEngineVersions, "/v1/engines/{1}/versions", Get);
ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/versions/{2}",
Get);
Expand Down
22 changes: 11 additions & 11 deletions engine/controllers/models.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,27 @@ using namespace drogon;
class Models : public drogon::HttpController<Models, false> {
public:
METHOD_LIST_BEGIN
METHOD_ADD(Models::PullModel, "/pull", Post);
METHOD_ADD(Models::GetModelPullInfo, "/pull/info", Post);
METHOD_ADD(Models::PullModel, "/pull", Options, Post);
METHOD_ADD(Models::GetModelPullInfo, "/pull/info", Options, Post);
METHOD_ADD(Models::AbortPullModel, "/pull", Options, Delete);
METHOD_ADD(Models::ListModel, "", Get);
METHOD_ADD(Models::GetModel, "/{1}", Get);
METHOD_ADD(Models::UpdateModel, "/{1}", Patch);
METHOD_ADD(Models::ImportModel, "/import", Post);
METHOD_ADD(Models::UpdateModel, "/{1}", Options, Patch);
METHOD_ADD(Models::ImportModel, "/import", Options, Post);
METHOD_ADD(Models::DeleteModel, "/{1}", Options, Delete);
METHOD_ADD(Models::StartModel, "/start", Post);
METHOD_ADD(Models::StopModel, "/stop", Post);
METHOD_ADD(Models::StartModel, "/start", Options, Post);
METHOD_ADD(Models::StopModel, "/stop", Options, Post);
METHOD_ADD(Models::GetModelStatus, "/status/{1}", Get);

ADD_METHOD_TO(Models::PullModel, "/v1/models/pull", Post);
ADD_METHOD_TO(Models::PullModel, "/v1/models/pull", Options, Post);
ADD_METHOD_TO(Models::AbortPullModel, "/v1/models/pull", Options, Delete);
ADD_METHOD_TO(Models::ListModel, "/v1/models", Get);
ADD_METHOD_TO(Models::GetModel, "/v1/models/{1}", Get);
ADD_METHOD_TO(Models::UpdateModel, "/v1/models/{1}", Patch);
ADD_METHOD_TO(Models::ImportModel, "/v1/models/import", Post);
ADD_METHOD_TO(Models::UpdateModel, "/v1/models/{1}", Options, Patch);
ADD_METHOD_TO(Models::ImportModel, "/v1/models/import", Options, Post);
ADD_METHOD_TO(Models::DeleteModel, "/v1/models/{1}", Options, Delete);
ADD_METHOD_TO(Models::StartModel, "/v1/models/start", Post);
ADD_METHOD_TO(Models::StopModel, "/v1/models/stop", Post);
ADD_METHOD_TO(Models::StartModel, "/v1/models/start", Options, Post);
ADD_METHOD_TO(Models::StopModel, "/v1/models/stop", Options, Post);
ADD_METHOD_TO(Models::GetModelStatus, "/v1/models/status/{1}", Get);
METHOD_LIST_END

Expand Down
18 changes: 9 additions & 9 deletions engine/controllers/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,20 @@ class server : public drogon::HttpController<server, false>,
~server();
METHOD_LIST_BEGIN
// list path definitions here;
METHOD_ADD(server::ChatCompletion, "chat_completion", Post);
METHOD_ADD(server::Embedding, "embedding", Post);
METHOD_ADD(server::LoadModel, "loadmodel", Post);
METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
METHOD_ADD(server::ModelStatus, "modelstatus", Post);
METHOD_ADD(server::ChatCompletion, "chat_completion", Options, Post);
METHOD_ADD(server::Embedding, "embedding", Options, Post);
METHOD_ADD(server::LoadModel, "loadmodel", Options, Post);
METHOD_ADD(server::UnloadModel, "unloadmodel", Options, Post);
METHOD_ADD(server::ModelStatus, "modelstatus", Options, Post);
METHOD_ADD(server::GetModels, "models", Get);

// cortex.python API
METHOD_ADD(server::FineTuning, "finetuning", Post);
METHOD_ADD(server::FineTuning, "finetuning", Options, Post);

// Openai compatible path
ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Post);
ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Post);
ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Options, Post);
ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Options, Post);
ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Options, Post);

METHOD_LIST_END
void ChatCompletion(
Expand Down
10 changes: 5 additions & 5 deletions engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,22 +100,22 @@ async def test_models_on_cortexso_hub(self, model_url):
json_body = {
"model": model_url
}
response = requests.post("http://localhost:3928/models/pull", json=json_body)
response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
assert response.status_code == 200, f"Failed to pull model: {model_url}"

await wait_for_websocket_download_success_event(timeout=None)

# Check if the model was pulled successfully
get_model_response = requests.get(
f"http://127.0.0.1:3928/models/{model_url}"
f"http://127.0.0.1:3928/v1/models/{model_url}"
)
assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
assert (
get_model_response.json()["model"] == model_url
), f"Unexpected model name for: {model_url}"

# Check if the model is available in the list of models
response = requests.get("http://localhost:3928/models")
response = requests.get("http://localhost:3928/v1/models")
assert response.status_code == 200
models = [i["id"] for i in response.json()["data"]]
assert model_url in models, f"Model not found in list: {model_url}"
Expand All @@ -129,7 +129,7 @@ async def test_models_on_cortexso_hub(self, model_url):
assert exit_code == 0, f"Install engine failed with error: {error}"

# Start the model
response = requests.post("http://localhost:3928/models/start", json=json_body)
response = requests.post("http://localhost:3928/v1/models/start", json=json_body)
assert response.status_code == 200, f"status_code: {response.status_code}"

# Send an inference request
Expand All @@ -155,7 +155,7 @@ async def test_models_on_cortexso_hub(self, model_url):
assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"

# Stop the model
response = requests.post("http://localhost:3928/models/stop", json=json_body)
response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
assert response.status_code == 200, f"status_code: {response.status_code}"

# Uninstall Engine
Expand Down
Loading

0 comments on commit 65547d5

Please sign in to comment.