Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add hugging face token into configuration file #1441

Merged
merged 16 commits into from
Oct 21, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 44 additions & 30 deletions .github/workflows/cortex-cpp-quality-gate.yml
Original file line number Diff line number Diff line change
@@ -106,36 +106,50 @@ jobs:
cd engine
make run-unit-tests

# - name: Run e2e tests
# if: runner.os != 'Windows' && github.event.pull_request.draft == false
# run: |
# cd engine
# cp build/cortex build/cortex-nightly
# cp build/cortex build/cortex-beta
# python -m pip install --upgrade pip
# python -m pip install pytest
# python -m pip install requests
# python e2e-test/main.py
# rm build/cortex-nightly
# rm build/cortex-beta
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


# - name: Run e2e tests
# if: runner.os == 'Windows' && github.event.pull_request.draft == false
# run: |
# cd engine
# cp build/cortex.exe build/cortex-nightly.exe
# cp build/cortex.exe build/cortex-beta.exe
# python -m pip install --upgrade pip
# python -m pip install pytest
# python -m pip install requests
# python e2e-test/main.py
# rm build/cortex-nightly.exe
# rm build/cortex-beta.exe
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run setup config for macos
if: runner.os == 'macOS'
run: |
cd engine
./build/cortex --version
sed -i '' 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc

- name: Run setup config for linux
if: runner.os != 'macOS'
shell: bash
run: |
cd engine
./build/cortex --version
sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc

- name: Run e2e tests
if: runner.os != 'Windows' && github.event.pull_request.draft == false
run: |
cd engine
cp build/cortex build/cortex-nightly
cp build/cortex build/cortex-beta
python -m pip install --upgrade pip
python -m pip install pytest
python -m pip install requests
python e2e-test/main.py
rm build/cortex-nightly
rm build/cortex-beta
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Run e2e tests
if: runner.os == 'Windows' && github.event.pull_request.draft == false
run: |
cd engine
cp build/cortex.exe build/cortex-nightly.exe
cp build/cortex.exe build/cortex-beta.exe
python -m pip install --upgrade pip
python -m pip install pytest
python -m pip install requests
python e2e-test/main.py
rm build/cortex-nightly.exe
rm build/cortex-beta.exe
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Pre-package
run: |
14 changes: 0 additions & 14 deletions .github/workflows/template-build-linux-x64.yml
Original file line number Diff line number Diff line change
@@ -135,20 +135,6 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: '3.10'

# - name: Run e2e tests
# run: |
# cd engine
# cp build/cortex build/cortex-nightly
# cp build/cortex build/cortex-beta
# python -m pip install --upgrade pip
# python -m pip install pytest
# python -m pip install requests
# python e2e-test/main.py
# rm build/cortex-nightly
# rm build/cortex-beta
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Pre-package
run: |
14 changes: 0 additions & 14 deletions .github/workflows/template-build-macos.yml
Original file line number Diff line number Diff line change
@@ -149,20 +149,6 @@ jobs:
run: |
cd engine
make build CMAKE_EXTRA_FLAGS="${{ inputs.cmake-flags }} ${{ matrix.extra-cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ inputs.build-deps-cmake-flags }}"

# - name: Run e2e tests
# run: |
# cd engine
# cp build/cortex build/cortex-nightly
# cp build/cortex build/cortex-beta
# python -m pip install --upgrade pip
# python -m pip install pytest
# python -m pip install requests
# python e2e-test/main.py
# rm build/cortex-nightly
# rm build/cortex-beta
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Pre-package
run: |
14 changes: 0 additions & 14 deletions .github/workflows/template-build-windows-x64.yml
Original file line number Diff line number Diff line change
@@ -172,20 +172,6 @@ jobs:
with:
python-version: '3.10'

# - name: Run e2e tests
# run: |
# cd engine
# cp build/cortex.exe build/cortex-nightly.exe
# cp build/cortex.exe build/cortex-beta.exe
# python -m pip install --upgrade pip
# python -m pip install pytest
# python -m pip install requests
# python e2e-test/main.py
# rm build/cortex-nightly.exe
# rm build/cortex-beta.exe
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Pre-package
run: |
cd engine
2 changes: 1 addition & 1 deletion engine/cli/commands/engine_uninstall_cmd.cc
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ void EngineUninstallCmd::Exec(const std::string& host, int port,
auto res = cli.Delete("/v1/engines/" + engine);
if (res) {
if (res->status == httplib::StatusCode::OK_200) {
CLI_LOG("Engine " + engine + " uninstalled successfully");
CLI_LOG("Engine " + engine + " uninstalled successfully!");
} else {
CTL_ERR("Engine failed to uninstall with status code: " << res->status);
}
2 changes: 1 addition & 1 deletion engine/e2e-test/test_api_model_pull_direct_url.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,6 @@ def setup_and_teardown(self):
yield

# Teardown
stop_server()
run(
"Delete model",
[
@@ -31,6 +30,7 @@ def setup_and_teardown(self):
"TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf",
],
)
stop_server()

def test_model_pull_with_direct_url_should_be_success(self):
myobj = {
2 changes: 1 addition & 1 deletion engine/e2e-test/test_api_model_start.py
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@ def setup_and_teardown(self):

# TODO: using pull with branch for easy testing tinyllama:gguf for example
run("Delete model", ["models", "delete", "tinyllama:gguf"])
popen(["pull", "tinyllama"], "1\n")
run("Pull model", ["pull", "tinyllama:gguf"], timeout=None,)

yield

2 changes: 1 addition & 1 deletion engine/e2e-test/test_api_model_update.py
Original file line number Diff line number Diff line change
@@ -19,5 +19,5 @@ def setup_and_teardown(self):

def test_models_update_should_be_successful(self):
body_json = {'model': 'tinyllama:gguf'}
response = requests.post("http://localhost:3928/models/tinyllama:gguf", json = body_json)
response = requests.patch("http://localhost:3928/models/tinyllama:gguf", json = body_json)
assert response.status_code == 200
14 changes: 13 additions & 1 deletion engine/e2e-test/test_cli_engine_get.py
Original file line number Diff line number Diff line change
@@ -2,9 +2,21 @@

import pytest
from test_runner import run

from test_runner import start_server, stop_server

class TestCliEngineGet:

@pytest.fixture(autouse=True)
def setup_and_teardown(self):
# Setup
success = start_server()
if not success:
raise Exception("Failed to start server")

yield

# Teardown
stop_server()

@pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
def test_engines_get_tensorrt_llm_should_not_be_incompatible(self):
15 changes: 14 additions & 1 deletion engine/e2e-test/test_cli_engine_list.py
Original file line number Diff line number Diff line change
@@ -2,9 +2,22 @@

import pytest
from test_runner import run

from test_runner import start_server, stop_server

class TestCliEngineList:

@pytest.fixture(autouse=True)
def setup_and_teardown(self):
# Setup
success = start_server()
if not success:
raise Exception("Failed to start server")

yield

# Teardown
stop_server()

@pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
def test_engines_list_run_successfully_on_windows(self):
exit_code, output, error = run("List engines", ["engines", "list"])
7 changes: 6 additions & 1 deletion engine/e2e-test/test_cli_engine_uninstall.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import pytest
from test_runner import run

from test_runner import start_server, stop_server

class TestCliEngineUninstall:

@pytest.fixture(autouse=True)
def setup_and_teardown(self):
# Setup
success = start_server()
if not success:
raise Exception("Failed to start server")

# Preinstall llamacpp engine
run("Install Engine", ["engines", "install", "llama-cpp"],timeout = None)

@@ -15,6 +19,7 @@ def setup_and_teardown(self):
# Teardown
# Clean up, removing installed engine
run("Uninstall Engine", ["engines", "uninstall", "llama-cpp"])
stop_server()

def test_engines_uninstall_llamacpp_should_be_successfully(self):
exit_code, output, error = run(
10 changes: 7 additions & 3 deletions engine/e2e-test/test_cli_model_delete.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
import pytest
from test_runner import popen, run

from test_runner import start_server, stop_server

class TestCliModelDelete:

@pytest.fixture(autouse=True)
def setup_and_teardown(self):
# Setup
success = start_server()
if not success:
raise Exception("Failed to start server")

# Pull model

# TODO: using pull with branch for easy testing tinyllama:gguf for example
popen(["pull", "tinyllama"], "1\n")
run("Pull model", ["pull", "tinyllama:gguf"], timeout=None,)

yield

# Teardown
# Clean up
run("Delete model", ["models", "delete", "tinyllama:gguf"])
stop_server()

def test_models_delete_should_be_successful(self):
exit_code, output, error = run(
13 changes: 13 additions & 0 deletions engine/e2e-test/test_cli_model_import.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
import pytest
from test_runner import run
from test_runner import start_server, stop_server

class TestCliModelImport:

@pytest.fixture(autouse=True)
def setup_and_teardown(self):
# Setup
success = start_server()
if not success:
raise Exception("Failed to start server")

yield

# Teardown
stop_server()

@pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
def test_model_import_should_be_success(self):
26 changes: 26 additions & 0 deletions engine/services/download_service.cc
Original file line number Diff line number Diff line change
@@ -9,7 +9,10 @@
#include <utility>
#include "download_service.h"
#include "utils/format_utils.h"
#include "utils/huggingface_utils.h"
#include "utils/logging_utils.h"
#include "utils/result.hpp"
#include "utils/url_parser.h"

#ifdef _WIN32
#define ftell64(f) _ftelli64(f)
@@ -24,6 +27,20 @@ size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) {
size_t written = fwrite(ptr, size, nmemb, (FILE*)userdata);
return written;
}

inline curl_slist* CreateHeaders(const std::string& url) {
try {
auto url_obj = url_parser::FromUrlString(url);
if (url_obj.host == huggingface_utils::kHuggingfaceHost) {
return huggingface_utils::CreateCurlHfHeaders();
} else {
return nullptr;
}
} catch (const std::exception& e) {
CTL_WRN(e.what());
return nullptr;
}
}
} // namespace

cpp::result<void, std::string> DownloadService::VerifyDownloadTask(
@@ -98,6 +115,9 @@ cpp::result<uint64_t, std::string> DownloadService::GetFileSize(
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
if (auto headers = CreateHeaders(url); headers) {
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
CURLcode res = curl_easy_perform(curl);

if (res != CURLE_OK) {
@@ -176,6 +196,9 @@ cpp::result<bool, std::string> DownloadService::Download(
}

curl_easy_setopt(curl, CURLOPT_URL, download_item.downloadUrl.c_str());
if (auto headers = CreateHeaders(download_item.downloadUrl); headers) {
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
@@ -262,6 +285,9 @@ void DownloadService::ProcessTask(DownloadTask& task) {
return;
}
downloading_data_->item_id = item.id;
if (auto headers = CreateHeaders(item.downloadUrl); headers) {
curl_easy_setopt(handle, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(handle, CURLOPT_URL, item.downloadUrl.c_str());
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(handle, CURLOPT_WRITEDATA, file);
3 changes: 2 additions & 1 deletion engine/services/model_service.cc
Original file line number Diff line number Diff line change
@@ -70,7 +70,8 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
.pathParams = {"api", "models", "cortexso", modelId, "tree", branch}};

httplib::Client cli(url.GetProtocolAndHost());
auto res = cli.Get(url.GetPathAndQuery());
auto res =
cli.Get(url.GetPathAndQuery(), huggingface_utils::CreateHttpHfHeaders());
if (res->status != httplib::StatusCode::OK_200) {
return cpp::fail("Model " + modelId + " not found");
}
Loading