diff --git a/CMakeLists.txt b/CMakeLists.txt index 190ca54..e03ca36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,8 @@ target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Llamacpp) target_sources(${CMAKE_PROJECT_NAME} PRIVATE src/plugin-main.c) +target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE vendor/nlohmann-json) + add_subdirectory(src/llm-dock) set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name}) diff --git a/cmake/BuildLlamacpp.cmake b/cmake/BuildLlamacpp.cmake index 0863643..8ebc7b8 100644 --- a/cmake/BuildLlamacpp.cmake +++ b/cmake/BuildLlamacpp.cmake @@ -11,12 +11,11 @@ endif() # On linux add the `-fPIC` flag to the compiler if(UNIX AND NOT APPLE) set(LLAMA_EXTRA_CXX_FLAGS "-fPIC") - set(LLAMA_ADDITIONAL_CMAKE_ARGS -DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_OPENBLAS=OFF -DLLAMA_NO_AVX=ON - -DLLAMA_NO_AVX2=ON) + set(LLAMA_ADDITIONAL_CMAKE_ARGS -DLLAMA_NATIVE=ON) endif() if(APPLE) - set(LLAMA_ADDITIONAL_CMAKE_ARGS -DLLAMA_NATIVE=OFF -DLLAMA_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON - -DLLAMA_FMA=ON -DLLAMA_F16C=ON) + set(LLAMA_ADDITIONAL_CMAKE_ARGS -DLLAMA_NATIVE=OFF -DLLAMA_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON -DLLAMA_FMA=ON + -DLLAMA_F16C=ON) endif() if(WIN32) @@ -51,22 +50,22 @@ if(WIN32) Llamacpp_Build DOWNLOAD_EXTRACT_TIMESTAMP true GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git - GIT_TAG 7b374c9ac9b9861bb737eec060e4dfa29d229259 + GIT_TAG 370359e5baf619f3a8d461023143d1494b1e8fde BUILD_COMMAND ${CMAKE_COMMAND} --build --config ${Llamacpp_BUILD_TYPE} BUILD_BYPRODUCTS /lib/static/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX} /bin/${CMAKE_SHARED_LIBRARY_PREFIX}llama${CMAKE_SHARED_LIBRARY_SUFFIX} /lib/${CMAKE_IMPORT_LIBRARY_PREFIX}llama${CMAKE_IMPORT_LIBRARY_SUFFIX} CMAKE_GENERATOR ${CMAKE_GENERATOR} - INSTALL_COMMAND ${CMAKE_COMMAND} --install --config ${Llamacpp_BUILD_TYPE} && ${CMAKE_COMMAND} -E - copy /${Llamacpp_BUILD_TYPE}/llama.lib /lib + INSTALL_COMMAND ${CMAKE_COMMAND} --install --config ${Llamacpp_BUILD_TYPE} && ${CMAKE_COMMAND} -E copy + /${Llamacpp_BUILD_TYPE}/llama.lib /lib CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${LLAMA_ADDITIONAL_ENV} ${CMAKE_COMMAND} -B -G ${CMAKE_GENERATOR} -DCMAKE_INSTALL_PREFIX= -DCMAKE_BUILD_TYPE=${Llamacpp_BUILD_TYPE} -DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM} -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13 -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES_} -DCMAKE_CXX_FLAGS=${LLAMA_EXTRA_CXX_FLAGS} -DCMAKE_C_FLAGS=${LLAMA_EXTRA_CXX_FLAGS} -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF - -DLLAMA_BUILD_EXAMPLES=OFF ${LLAMA_ADDITIONAL_CMAKE_ARGS} -DLLAMA_STATIC=ON) + -DLLAMA_BUILD_EXAMPLES=OFF ${LLAMA_ADDITIONAL_CMAKE_ARGS} -DLLAMA_STATIC=OFF) if(NOT BRAIN_WITH_CUDA) add_dependencies(Llamacpp_Build OpenBLAS) @@ -97,11 +96,11 @@ ExternalProject_Get_Property(Llamacpp_Build INSTALL_DIR) if(WIN32) add_library(Llamacpp::Llama SHARED IMPORTED) set_target_properties( - Llamacpp::Llama - PROPERTIES IMPORTED_LOCATION ${INSTALL_DIR}/bin/${CMAKE_SHARED_LIBRARY_PREFIX}llama${CMAKE_SHARED_LIBRARY_SUFFIX}) + Llamacpp::Llama PROPERTIES IMPORTED_LOCATION + ${INSTALL_DIR}/bin/${CMAKE_SHARED_LIBRARY_PREFIX}llama${CMAKE_SHARED_LIBRARY_SUFFIX}) set_target_properties( - Llamacpp::Llama - PROPERTIES IMPORTED_IMPLIB ${INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX}) + Llamacpp::Llama PROPERTIES IMPORTED_IMPLIB + ${INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX}) install(FILES ${INSTALL_DIR}/bin/${CMAKE_SHARED_LIBRARY_PREFIX}llama${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION "obs-plugins/64bit") @@ -133,9 +132,8 @@ else() # on Linux and MacOS add the static Llama library to the link line add_library(Llamacpp::Llama STATIC IMPORTED) set_target_properties( - Llamacpp::Llama - PROPERTIES IMPORTED_LOCATION - ${INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX}) + Llamacpp::Llama PROPERTIES IMPORTED_LOCATION + ${INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX}) endif(WIN32) add_library(Llamacpp INTERFACE) diff --git a/cmake/common/buildspec_common.cmake b/cmake/common/buildspec_common.cmake index b2c2414..d0f43c7 100644 --- a/cmake/common/buildspec_common.cmake +++ b/cmake/common/buildspec_common.cmake @@ -73,6 +73,13 @@ function(_setup_obs_studio) set(_cmake_version "3.0.0") endif() + message(STATUS "Patch libobs") + execute_process( + COMMAND patch --forward "libobs/CMakeLists.txt" "${CMAKE_CURRENT_SOURCE_DIR}/patch_libobs.diff" + RESULT_VARIABLE _process_result + WORKING_DIRECTORY "${dependencies_dir}/${_obs_destination}") + message(STATUS "Patch - done") + message(STATUS "Configure ${label} (${arch})") execute_process( COMMAND diff --git a/cmake/macos/helpers.cmake b/cmake/macos/helpers.cmake index 60428b7..37b92bf 100644 --- a/cmake/macos/helpers.cmake +++ b/cmake/macos/helpers.cmake @@ -79,9 +79,9 @@ function(set_target_properties_plugin target) CONFIGURATIONS Release DESTINATION . OPTIONAL) - configure_file(cmake/macos/resources/distribution.in "${CMAKE_CURRENT_BINARY_DIR}/distribution" @ONLY) - configure_file(cmake/macos/resources/create-package.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/create-package.cmake" @ONLY) - install(SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/create-package.cmake") + configure_file(cmake/macos/resources/distribution.in "${CMAKE_CURRENT_BINARY_DIR}/distribution" @ONLY) + configure_file(cmake/macos/resources/create-package.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/create-package.cmake" @ONLY) + install(SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/create-package.cmake") endfunction() # target_install_resources: Helper function to add resources into bundle diff --git a/src/llm-dock/CMakeLists.txt b/src/llm-dock/CMakeLists.txt index 51b91b0..c389979 100644 --- a/src/llm-dock/CMakeLists.txt +++ b/src/llm-dock/CMakeLists.txt @@ -1,4 +1,4 @@ -target_sources(${CMAKE_PROJECT_NAME} PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/llm-dock-ui.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/llama-inference.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/LLMSettingsDialog.cpp) +target_sources( + ${CMAKE_PROJECT_NAME} + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/llm-dock-ui.cpp ${CMAKE_CURRENT_SOURCE_DIR}/llama-inference.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/LLMSettingsDialog.cpp ${CMAKE_CURRENT_SOURCE_DIR}/llm-config-data.cpp) diff --git a/src/llm-dock/LLMSettingsDialog.cpp b/src/llm-dock/LLMSettingsDialog.cpp index 51fa43b..ac22622 100644 --- a/src/llm-dock/LLMSettingsDialog.cpp +++ b/src/llm-dock/LLMSettingsDialog.cpp @@ -1,65 +1,63 @@ #include "LLMSettingsDialog.hpp" #include "llama-inference.h" +#include "llm-config-data.h" +#include "plugin-support.h" + +#include LLMSettingsDialog::LLMSettingsDialog(QWidget *parent) : QDialog(parent) { - // Create a dialog with settings for LLMs usage in OBS - // The settings should allow to select local or cloud LLM (OpenAI API) - // The local LLM should allow selecting a .gguf model file for llama.cpp - // The cloud LLM should allow entering an API key for OpenAI API - // The settings should allow to edit the following: - // - system prompt for LLM - // - max number of tokens to generate - // - temperature - // - end of text token - - setWindowTitle("LLM Settings"); - setWindowFlags(Qt::Dialog | Qt::WindowCloseButtonHint); - setModal(true); - // set a minimum width for the dialog + // Create a dialog with settings for LLMs usage in OBS + // The settings should allow to select local or cloud LLM (OpenAI API) + // The local LLM should allow selecting a .gguf model file for llama.cpp + // The cloud LLM should allow entering an API key for OpenAI API + // The settings should allow to edit the following: + // - system prompt for LLM + // - max number of tokens to generate + // - temperature + + setWindowTitle("LLM Settings"); + setWindowFlags(Qt::Dialog | Qt::WindowCloseButtonHint); + setModal(true); + // set a minimum width for the dialog setMinimumWidth(500); + // Create a layout for the dialog + QGridLayout *layout = new QGridLayout(this); - // Create a layout for the dialog - QGridLayout *layout = new QGridLayout(this); - - // Create a tab widget for the dialog - QTabWidget *tab_widget = new QTabWidget(this); - - - // Create a tab for local LLM settings - QWidget *local_llm_tab = new QWidget(this); - QGridLayout *local_llm_tab_layout = new QGridLayout(local_llm_tab); - local_llm_tab->setLayout(local_llm_tab_layout); - - - // Create a tab for cloud LLM settings - QWidget *cloud_llm_tab = new QWidget(this); - QGridLayout *cloud_llm_tab_layout = new QGridLayout(cloud_llm_tab); - cloud_llm_tab->setLayout(cloud_llm_tab_layout); + // Create a tab widget for the dialog + QTabWidget *tab_widget = new QTabWidget(this); + // Create a tab for local LLM settings + QWidget *local_llm_tab = new QWidget(this); + QGridLayout *local_llm_tab_layout = new QGridLayout(local_llm_tab); + local_llm_tab->setLayout(local_llm_tab_layout); - // Create a tab for general LLM settings - QWidget *general_llm_tab = new QWidget(this); - QGridLayout *general_llm_tab_layout = new QGridLayout(general_llm_tab); - general_llm_tab->setLayout(general_llm_tab_layout); + // Create a tab for cloud LLM settings + QWidget *cloud_llm_tab = new QWidget(this); + QGridLayout *cloud_llm_tab_layout = new QGridLayout(cloud_llm_tab); + cloud_llm_tab->setLayout(cloud_llm_tab_layout); + // Create a tab for general LLM settings + QWidget *general_llm_tab = new QWidget(this); + QGridLayout *general_llm_tab_layout = new QGridLayout(general_llm_tab); + general_llm_tab->setLayout(general_llm_tab_layout); - // Add the tabs to the tab widget - tab_widget->addTab(general_llm_tab, "General"); - tab_widget->addTab(local_llm_tab, "Local LLM"); - tab_widget->addTab(cloud_llm_tab, "Cloud LLM"); + // Add the tabs to the tab widget + tab_widget->addTab(general_llm_tab, "General"); + tab_widget->addTab(local_llm_tab, "Local LLM"); + tab_widget->addTab(cloud_llm_tab, "Cloud LLM"); - // Add the tab widget to the layout - layout->addWidget(tab_widget); + // Add the tab widget to the layout + layout->addWidget(tab_widget); - this->setLayout(layout); - this->setWindowTitle("LLM Settings"); - this->resize(600, 400); + this->setLayout(layout); + this->setWindowTitle("LLM Settings"); + this->resize(600, 400); - // Use a form layout for the local LLM tab - QFormLayout *local_llm_form_layout = new QFormLayout(local_llm_tab); - local_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow); + // Use a form layout for the local LLM tab + QFormLayout *local_llm_form_layout = new QFormLayout(local_llm_tab); + local_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow); // Model file path QHBoxLayout *fileInputLayout = new QHBoxLayout; @@ -67,8 +65,8 @@ LLMSettingsDialog::LLMSettingsDialog(QWidget *parent) : QDialog(parent) QLineEdit *filePathLineEdit = new QLineEdit; filePathLineEdit->setPlaceholderText("Model File"); - // set value from request_data - // filePathLineEdit->setText(QString::fromStdString(request_data->url)); + // set value from config + filePathLineEdit->setText(QString::fromStdString(global_llm_config.local_model_path)); fileInputLayout->addWidget(filePathLineEdit); // add file selector button if file is selected QPushButton *fileButton = new QPushButton("..."); @@ -83,67 +81,93 @@ LLMSettingsDialog::LLMSettingsDialog(QWidget *parent) : QDialog(parent) } }); - // Add the form layout to the local LLM tab layout - local_llm_tab_layout->addLayout(local_llm_form_layout, 0, 0, 1, 1); - - /** CLOUD */ - // Use a form layout for the cloud LLM tab - QFormLayout *cloud_llm_form_layout = new QFormLayout(cloud_llm_tab); - // set growing policy for the form layout - cloud_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow); - - // Add the form layout to the cloud LLM tab layout - cloud_llm_tab_layout->addLayout(cloud_llm_form_layout, 0, 0, 1, 1); - - // add openai api key input - QLineEdit *openai_api_key_input = new QLineEdit(this); - openai_api_key_input->setPlaceholderText("sk-..."); - cloud_llm_form_layout->addRow("OpenAI API Key", openai_api_key_input); - - // add openai engine input - QLineEdit *openai_engine_input = new QLineEdit(this); - openai_engine_input->setPlaceholderText("OpenAI Engine"); - cloud_llm_form_layout->addRow("OpenAI Engine", openai_engine_input); - // default engine gpt-3.5-turbo - openai_engine_input->setText("gpt-3.5-turbo"); - - /** GENERAL */ - // Use a form layout for the general LLM tab - QFormLayout *general_llm_form_layout = new QFormLayout(general_llm_tab); - // auto stretch the form layout - general_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow); - - // Add the form layout to the general LLM tab layout - general_llm_tab_layout->addLayout(general_llm_form_layout, 0, 0, 1, 1); - - // add a selector between local and clod llm - QComboBox *llm_selector = new QComboBox(this); - llm_selector->addItem("Local LLM"); - llm_selector->addItem("Cloud LLM"); - general_llm_form_layout->addRow("LLM", llm_selector); - - // add system prompt input with a multi-line text edit - QTextEdit *system_prompt_input = new QTextEdit(this); - system_prompt_input->setPlaceholderText("System Prompt"); - general_llm_form_layout->addRow("System Prompt", system_prompt_input); - // set the system prompt default - system_prompt_input->setText(QString::fromStdString(LLAMA_DEFAULT_SYSTEM_PROMPT)); - - // add max number of tokens input - QLineEdit *max_tokens_input = new QLineEdit(this); - max_tokens_input->setPlaceholderText("Max Tokens"); - general_llm_form_layout->addRow("Max Tokens", max_tokens_input); - // set the max tokens default - max_tokens_input->setText("64"); - - // add temperature input - QLineEdit *temperature_input = new QLineEdit(this); - temperature_input->setPlaceholderText("Temperature"); - general_llm_form_layout->addRow("Temperature", temperature_input); - // set the temperature default - temperature_input->setText("0.9"); -} + // Add the form layout to the local LLM tab layout + local_llm_tab_layout->addLayout(local_llm_form_layout, 0, 0, 1, 1); + + /** CLOUD */ + // Use a form layout for the cloud LLM tab + QFormLayout *cloud_llm_form_layout = new QFormLayout(cloud_llm_tab); + // set growing policy for the form layout + cloud_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow); + + // Add the form layout to the cloud LLM tab layout + cloud_llm_tab_layout->addLayout(cloud_llm_form_layout, 0, 0, 1, 1); + + // add openai api key input + QLineEdit *openai_api_key_input = new QLineEdit(this); + openai_api_key_input->setPlaceholderText("sk-..."); + openai_api_key_input->setText(QString::fromStdString(global_llm_config.cloud_api_key)); + cloud_llm_form_layout->addRow("OpenAI API Key", openai_api_key_input); + + // add openai engine input + QLineEdit *openai_engine_input = new QLineEdit(this); + openai_engine_input->setPlaceholderText("OpenAI Engine"); + openai_engine_input->setText(QString::fromStdString(global_llm_config.cloud_model_name)); + cloud_llm_form_layout->addRow("OpenAI Engine", openai_engine_input); + + /** GENERAL */ + // Use a form layout for the general LLM tab + QFormLayout *general_llm_form_layout = new QFormLayout(general_llm_tab); + // auto stretch the form layout + general_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow); + + // Add the form layout to the general LLM tab layout + general_llm_tab_layout->addLayout(general_llm_form_layout, 0, 0, 1, 1); + + // add a selector between local and clod llm + QComboBox *llm_selector = new QComboBox(this); + llm_selector->addItem("Local LLM"); + llm_selector->addItem("Cloud LLM"); + // set the default + llm_selector->setCurrentIndex(global_llm_config.local ? 0 : 1); + general_llm_form_layout->addRow("LLM", llm_selector); + + // add system prompt input with a multi-line text edit + QTextEdit *system_prompt_input = new QTextEdit(this); + system_prompt_input->setPlaceholderText("System Prompt"); + general_llm_form_layout->addRow("System Prompt", system_prompt_input); + // set the system prompt default + system_prompt_input->setText(QString::fromStdString(global_llm_config.system_prompt)); + + // add max number of tokens input + QLineEdit *max_tokens_input = new QLineEdit(this); + max_tokens_input->setPlaceholderText("Max Tokens"); + general_llm_form_layout->addRow("Max Tokens", max_tokens_input); + // set the max tokens default + max_tokens_input->setText(QString::number(global_llm_config.max_output_tokens)); + + // add temperature input + QLineEdit *temperature_input = new QLineEdit(this); + temperature_input->setPlaceholderText("Temperature"); + general_llm_form_layout->addRow("Temperature", temperature_input); + // set the temperature default + temperature_input->setText(QString::number(global_llm_config.temperature)); + + // add a save button to save all the settings, add to the grid layout of the dialog + QPushButton *save_button = new QPushButton("Save and Close", this); + layout->addWidget(save_button); + + // connect the save button to save the settings + this->connect(save_button, &QPushButton::clicked, this, [=]() { + // get settings from UI into config struct + global_llm_config.local = llm_selector->currentIndex() == 0; + global_llm_config.local_model_path = filePathLineEdit->text().toStdString(); + global_llm_config.cloud_api_key = openai_api_key_input->text().toStdString(); + global_llm_config.cloud_model_name = openai_engine_input->text().toStdString(); + global_llm_config.system_prompt = system_prompt_input->toPlainText().toStdString(); + global_llm_config.max_output_tokens = max_tokens_input->text().toUShort(); + global_llm_config.temperature = temperature_input->text().toFloat(); + + // serialize to json and save to the OBS module settings + if (saveConfig() == OBS_BRAIN_CONFIG_SUCCESS) { + obs_log(LOG_INFO, "Saved LLM settings"); + } else { + obs_log(LOG_ERROR, "Failed to save LLM settings"); + } -LLMSettingsDialog::~LLMSettingsDialog() -{ + // close the dialog + this->close(); + }); } + +LLMSettingsDialog::~LLMSettingsDialog() {} diff --git a/src/llm-dock/llama-inference.cpp b/src/llm-dock/llama-inference.cpp index 78c8c03..008b742 100644 --- a/src/llm-dock/llama-inference.cpp +++ b/src/llm-dock/llama-inference.cpp @@ -1,6 +1,8 @@ #include "llama-inference.h" #include "plugin-support.h" +#include "llm-config-data.h" + #include #include @@ -9,11 +11,6 @@ #include #include -const std::string LLAMA_DEFAULT_SYSTEM_PROMPT = R"([INST] <> -You are a helpful, respectful, positive, safe and honest assistant. -Don't include harmful, unethical, racist, sexist, toxic, dangerous, socially biased, untruthful or illegal content. -<> Q: {0} [/INST] A:)"; - std::string replace(const std::string &s, const std::string &from, const std::string &to) { std::string result = s; @@ -25,14 +22,15 @@ std::string replace(const std::string &s, const std::string &from, const std::st return result; } -std::string get_system_info(const llama_context_params & params) { - std::ostringstream os; +std::string get_system_info(const llama_context_params ¶ms) +{ + std::ostringstream os; - os << "system_info: n_threads = " << params.n_threads; - os << " (n_threads_batch = " << params.n_threads_batch << ")"; - os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info(); + os << "system_info: n_threads = " << params.n_threads; + os << " (n_threads_batch = " << params.n_threads_batch << ")"; + os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info(); - return os.str(); + return os.str(); } std::vector llama_tokenize(const struct llama_model *model, const std::string &text, @@ -100,11 +98,6 @@ struct llama_context *llama_init_context(const std::string &model_file_path) // initialize the context struct llama_context_params lparams = llama_context_default_params(); - // tune these to your liking - // lparams.n_ctx = 2048; - // lparams.seed = 1; - // lparams.f16_kv = true; - struct llama_context *ctx_llama = llama_new_context_with_model(model_llama, lparams); if (ctx_llama == nullptr) { @@ -117,7 +110,7 @@ struct llama_context *llama_init_context(const std::string &model_file_path) return nullptr; } - obs_log(LOG_INFO, "%s", get_system_info(lparams).c_str()); + obs_log(LOG_INFO, "%s", get_system_info(lparams).c_str()); // Warm up in another thread std::thread t([ctx_llama, lparams]() { @@ -128,10 +121,10 @@ struct llama_context *llama_init_context(const std::string &model_file_path) llama_token_eos(ctx_llama), }; - llama_decode(ctx_llama, - llama_batch_get_one(tokens_list.data(), - (int)std::min(tokens_list.size(), (size_t)lparams.n_batch), - 0, 0)); + llama_decode(ctx_llama, llama_batch_get_one(tokens_list.data(), + (int)std::min(tokens_list.size(), + (size_t)lparams.n_batch), + 0, 0)); llama_kv_cache_tokens_rm(ctx_llama, -1, -1); llama_reset_timings(ctx_llama); @@ -149,7 +142,7 @@ std::string llama_inference(const std::string &promptIn, struct llama_context *c // tokenize the prompt // replace {0} in the system prompt with the prompt - std::string prompt = replace(LLAMA_DEFAULT_SYSTEM_PROMPT, "{0}", promptIn); + std::string prompt = replace(global_llm_config.system_prompt, "{0}", promptIn); std::vector tokens_list; tokens_list = ::llama_tokenize(ctx, prompt, true); diff --git a/src/llm-dock/llama-inference.h b/src/llm-dock/llama-inference.h index ba870c3..0315b49 100644 --- a/src/llm-dock/llama-inference.h +++ b/src/llm-dock/llama-inference.h @@ -5,6 +5,4 @@ struct llama_context *llama_init_context(const std::string &model_file_path); std::string llama_inference(const std::string &prompt, struct llama_context *ctx, - std::function partial_generation_callback); - -extern const std::string LLAMA_DEFAULT_SYSTEM_PROMPT; + std::function partial_generation_callback); diff --git a/src/llm-dock/llm-config-data.cpp b/src/llm-dock/llm-config-data.cpp new file mode 100644 index 0000000..7cb471f --- /dev/null +++ b/src/llm-dock/llm-config-data.cpp @@ -0,0 +1,135 @@ +#include "llm-config-data.h" +#include "plugin-support.h" + +#include +#include +#include + +llm_config_data global_llm_config; +llm_global_context global_llm_context; + +void config_defaults() +{ + const std::string LLAMA_DEFAULT_SYSTEM_PROMPT = R"([INST] <> +You are a helpful, respectful, positive, safe and honest assistant. +Don't include harmful, unethical, racist, sexist, toxic, dangerous, socially biased, untruthful or illegal content. +<> Q: {0} [/INST] A:)"; + + global_llm_config.local = true; + global_llm_config.local_model_path = ""; + global_llm_config.cloud_model_name = ""; + global_llm_config.cloud_api_key = ""; + global_llm_config.temperature = 0.9; + global_llm_config.max_output_tokens = 64; + global_llm_config.system_prompt = LLAMA_DEFAULT_SYSTEM_PROMPT; +} + +void create_config_folder() +{ + char *config_folder_path = obs_module_config_path(""); + if (config_folder_path == nullptr) { + obs_log(LOG_ERROR, "Failed to get config folder path"); + return; + } + std::filesystem::path config_folder_std_path(config_folder_path); + bfree(config_folder_path); + + // create the folder if it doesn't exist + if (!std::filesystem::exists(config_folder_std_path)) { +#ifdef _WIN32 + obs_log(LOG_INFO, "Config folder does not exist, creating: %S", + config_folder_std_path.c_str()); +#else + obs_log(LOG_INFO, "Config folder does not exist, creating: %s", + config_folder_std_path.c_str()); +#endif + // Create the config folder + std::filesystem::create_directories(config_folder_std_path); + } +} + +int getConfig(config_t **config, bool create_if_not_exist = false) +{ + create_config_folder(); // ensure the config folder exists + + // Get the config file + char *config_file_path = obs_module_config_path("config.ini"); + + int ret = config_open(config, config_file_path, + create_if_not_exist ? CONFIG_OPEN_ALWAYS : CONFIG_OPEN_EXISTING); + if (ret != CONFIG_SUCCESS) { + obs_log(LOG_INFO, "Failed to open config file %s", config_file_path); + return OBS_BRAIN_CONFIG_FAIL; + } + + return OBS_BRAIN_CONFIG_SUCCESS; +} + +std::string llm_config_data_to_json(const llm_config_data &data); +llm_config_data llm_config_data_from_json(const std::string &json); + +int saveConfig(bool create_if_not_exist) +{ + config_t *config_file; + if (getConfig(&config_file, create_if_not_exist) == OBS_BRAIN_CONFIG_SUCCESS) { + std::string json = llm_config_data_to_json(global_llm_config); + config_set_string(config_file, "general", "llm_config", json.c_str()); + config_save(config_file); + config_close(config_file); + return OBS_BRAIN_CONFIG_SUCCESS; + } + return OBS_BRAIN_CONFIG_FAIL; +} + +int loadConfig() +{ + config_t *config_file; + if (getConfig(&config_file) == OBS_BRAIN_CONFIG_SUCCESS) { + const char *json = config_get_string(config_file, "general", "llm_config"); + if (json != nullptr) { + global_llm_config = llm_config_data_from_json(json); + config_close(config_file); + return OBS_BRAIN_CONFIG_SUCCESS; + } + config_close(config_file); + } else { + obs_log(LOG_WARNING, "Failed to load config file. Creating a new one."); + config_defaults(); + if (saveConfig(true) == OBS_BRAIN_CONFIG_SUCCESS) { + obs_log(LOG_INFO, "Saved default LLM settings"); + return OBS_BRAIN_CONFIG_SUCCESS; + } else { + obs_log(LOG_ERROR, "Failed to save LLM settings"); + } + } + return OBS_BRAIN_CONFIG_FAIL; +} + +// serialize llm_config_data to a json string +std::string llm_config_data_to_json(const llm_config_data &data) +{ + nlohmann::json j; + j["local"] = data.local; + j["local_model_path"] = data.local_model_path; + j["cloud_model_name"] = data.cloud_model_name; + j["cloud_api_key"] = data.cloud_api_key; + j["temperature"] = data.temperature; + j["max_output_tokens"] = data.max_output_tokens; + j["system_prompt"] = data.system_prompt; + return j.dump(); +} + +// deserialize llm_config_data from a json string +llm_config_data llm_config_data_from_json(const std::string &json) +{ + nlohmann::json j = nlohmann::json::parse(json); + llm_config_data data; + data.local = j["local"]; + data.local_model_path = j["local_model_path"]; + data.cloud_model_name = j["cloud_model_name"]; + data.cloud_api_key = j["cloud_api_key"]; + data.temperature = j["temperature"]; + data.max_output_tokens = j["max_output_tokens"]; + data.system_prompt = j["system_prompt"]; + return data; +} diff --git a/src/llm-dock/llm-config-data.h b/src/llm-dock/llm-config-data.h new file mode 100644 index 0000000..424f5e1 --- /dev/null +++ b/src/llm-dock/llm-config-data.h @@ -0,0 +1,50 @@ +#ifndef LLM_CONFIG_DATA_H +#define LLM_CONFIG_DATA_H + +#include + +#include + +struct llm_config_data { + // local or cloud + bool local; + + // local model path + std::string local_model_path; + + // cloud model name + std::string cloud_model_name; + + // cloud API key + std::string cloud_api_key; + + // temperature + float temperature; + + // max output tokens + uint16_t max_output_tokens; + + // system prompt + std::string system_prompt; +}; + +// forward declaration +struct llama_context; + +struct llm_global_context { + // error message + std::string error_message; + // llama context + struct llama_context *ctx_llama; +}; + +extern llm_config_data global_llm_config; +extern llm_global_context global_llm_context; + +#define OBS_BRAIN_CONFIG_FAIL -1 +#define OBS_BRAIN_CONFIG_SUCCESS 0 + +int saveConfig(bool create_if_not_exist = false); +int loadConfig(); + +#endif // LLM_CONFIG_DATA_H diff --git a/src/llm-dock/llm-dock-ui.cpp b/src/llm-dock/llm-dock-ui.cpp index c686638..6f60783 100644 --- a/src/llm-dock/llm-dock-ui.cpp +++ b/src/llm-dock/llm-dock-ui.cpp @@ -7,61 +7,63 @@ #include "plugin-support.h" #include "llm-dock-ui.hpp" #include "llm-dock.h" -// #include "../model-utils/model-downloader.h" #include "llama-inference.h" #include "LLMSettingsDialog.hpp" +#include "llm-config-data.h" -QDockWidget *createLLMDockWidget(QMainWindow *parent, void *llm_ctx); +QDockWidget *createLLMDockWidget(QMainWindow *parent); void register_llm_dock(void) { - // Find the model file - // std::string model_file_path = find_model_file("models/ggml-gpt2-117M.bin"); - // std::string model_file_path = "/Users/roy_shilkrot/Downloads/open-llama-3b-q4_0.gguf"; - std::string model_file_path = - "/Users/roy_shilkrot/Downloads/mistral-7b-instruct-v0.1.Q4_K_M.gguf"; - - if (model_file_path.empty()) { - // If the model file is not found, start the model downloader UI dialog - // download_model_with_ui_dialog("ggml-gpt2-117M.bin", [](bool success) { - // if (success) { - // // If the download is successful, register the GPT dock - // obs_frontend_add_dock(createGPTDockWidget(obs_frontend_get_main_window())); - // } - // }); - obs_log(LOG_ERROR, "LLM Model not found."); + // load plugin settings from config + if (loadConfig() == OBS_BRAIN_CONFIG_SUCCESS) { + obs_log(LOG_INFO, "Loaded LLM config from config file"); } else { - struct llama_context *ctx_llama = llama_init_context(model_file_path); + obs_log(LOG_INFO, "Failed to load LLM config from config file"); + } - // If the model is loaded successfully, register the GPT dock - if (ctx_llama == nullptr) { - obs_log(LOG_ERROR, "Failed to load LLM model from %s.", - model_file_path.c_str()); - return; + if (global_llm_config.local) { + obs_log(LOG_INFO, "Using local LLM model: %s", + global_llm_config.local_model_path.c_str()); + // initialize the local LLM model + if (global_llm_config.local_model_path.empty()) { + obs_log(LOG_ERROR, "LLM Model not found."); + } else { + global_llm_context.ctx_llama = + llama_init_context(global_llm_config.local_model_path); + + // If the model is loaded successfully, register the GPT dock + if (global_llm_context.ctx_llama == nullptr) { + obs_log(LOG_ERROR, "Failed to load LLM model from %s.", + global_llm_config.local_model_path.c_str()); + global_llm_context.error_message = + "Failed to load local LLM model."; + return; + } } - - // register the GPT dock - obs_frontend_add_dock(createLLMDockWidget( - (QMainWindow *)obs_frontend_get_main_window(), ctx_llama)); + } else { + obs_log(LOG_INFO, "Using cloud LLM model: %s", + global_llm_config.cloud_model_name.c_str()); } + + // register the GPT dock + obs_frontend_add_dock(createLLMDockWidget((QMainWindow *)obs_frontend_get_main_window())); } -QDockWidget *createLLMDockWidget(QMainWindow *parent, void *llm_ctx) +QDockWidget *createLLMDockWidget(QMainWindow *parent) { QDockWidget *dock = new QDockWidget(parent); dock->setObjectName("LLMDockWidget"); dock->setWindowTitle("LLM Dock"); // dock->setAllowedAreas(Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); dock->setFeatures(QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); - dock->setWidget(new LLMDockWidgetUI(dock, llm_ctx)); + dock->setWidget(new LLMDockWidgetUI(dock)); parent->addDockWidget(Qt::BottomDockWidgetArea, dock); return dock; } -LLMDockWidgetUI::LLMDockWidgetUI(QWidget *parent, void *llm_ctx) : QWidget(parent) +LLMDockWidgetUI::LLMDockWidgetUI(QWidget *parent) : QWidget(parent) { - this->llm_ctx = llm_ctx; - this->layout = new QVBoxLayout(this); this->layout->setContentsMargins(0, 0, 0, 0); @@ -76,6 +78,10 @@ LLMDockWidgetUI::LLMDockWidgetUI(QWidget *parent, void *llm_ctx) : QWidget(paren this->input_text_edit->setLineWrapMode(QTextEdit::WidgetWidth); this->input_text_edit->setStyleSheet( "QTextEdit { background-color: #000000; color: #ffffff; }"); + // dont allow rich text + this->input_text_edit->setAcceptRichText(false); + // make it 3 lines tall + this->input_text_edit->setFixedHeight(60); this->layout->addWidget(this->input_text_edit); this->button_layout = new QHBoxLayout(this); @@ -92,6 +98,18 @@ LLMDockWidgetUI::LLMDockWidgetUI(QWidget *parent, void *llm_ctx) : QWidget(paren this->settings_button = new QPushButton("Settings", this); this->button_layout->addWidget(this->settings_button); + // add an error message label, hidden + this->error_message_label = new QLabel(this); + this->error_message_label->setStyleSheet("QLabel { color: #ff0000; }"); + this->error_message_label->setVisible(false); + + // if there's an error message, show it + if (!global_llm_context.error_message.empty()) { + this->error_message_label->setText( + QString::fromStdString(global_llm_context.error_message)); + this->error_message_label->setVisible(true); + } + // connect the settings button to open the settings dialog this->connect(this->settings_button, &QPushButton::clicked, this, [=]() { // open the settings dialog @@ -115,16 +133,21 @@ void LLMDockWidgetUI::generate() return; } - this->text_edit->insertHtml(QString("

%1


").arg(input_text)); + this->text_edit->insertHtml( + QString("

%1


").arg(input_text)); this->text_edit->moveCursor(QTextCursor::End); this->input_text_edit->clear(); + // also clear any styles + this->input_text_edit->setStyleSheet( + "QTextEdit { background-color: #000000; color: #ffffff; }"); // call LLM inference on a separate thread using a lambda function std::thread t([input_text, this]() { std::string generated_text = llama_inference( - input_text.toStdString(), (struct llama_context *)this->llm_ctx, + input_text.toStdString(), global_llm_context.ctx_llama, [this](const std::string &partial_generation) { - emit update_text_signal(QString::fromStdString(partial_generation), true); + emit update_text_signal(QString::fromStdString(partial_generation), + true); }); emit update_text_signal(QString("
"), true); // generated_text = std::regex_replace( @@ -158,10 +181,11 @@ void LLMDockWidgetUI::update_text(const QString &text, bool partial_generation) text_with_non_breaking_spaces.replace(" ", " "); // append text in a different color - this->text_edit->insertHtml( - QString("%1").arg(text_with_non_breaking_spaces)); + this->text_edit->insertHtml(QString("%1") + .arg(text_with_non_breaking_spaces)); } else { - this->text_edit->insertHtml(QString("

%1

").arg(text)); + this->text_edit->insertHtml( + QString("

%1

").arg(text)); } // always scroll to the bottom this->text_edit->moveCursor(QTextCursor::End); diff --git a/src/llm-dock/llm-dock-ui.hpp b/src/llm-dock/llm-dock-ui.hpp index 4d2ace1..cf98245 100644 --- a/src/llm-dock/llm-dock-ui.hpp +++ b/src/llm-dock/llm-dock-ui.hpp @@ -3,7 +3,7 @@ class LLMDockWidgetUI : public QWidget { Q_OBJECT public: - explicit LLMDockWidgetUI(QWidget *parent, void *llm_ctx); + explicit LLMDockWidgetUI(QWidget *parent); ~LLMDockWidgetUI(); public slots: @@ -15,7 +15,6 @@ public slots: void update_text_signal(const QString &text, bool partial_generation); private: - void *llm_ctx; QVBoxLayout *layout; QTextEdit *text_edit; QTextEdit *input_text_edit; @@ -23,4 +22,5 @@ public slots: QPushButton *generate_button; QPushButton *clear_button; QPushButton *settings_button; + QLabel *error_message_label; }; diff --git a/src/plugin-main.c b/src/plugin-main.c index 142b557..1ae23d5 100644 --- a/src/plugin-main.c +++ b/src/plugin-main.c @@ -26,8 +26,7 @@ OBS_MODULE_USE_DEFAULT_LOCALE(PLUGIN_NAME, "en-US") bool obs_module_load(void) { - obs_log(LOG_INFO, "plugin loaded successfully (version %s)", - PLUGIN_VERSION); + obs_log(LOG_INFO, "plugin loaded successfully (version %s)", PLUGIN_VERSION); register_llm_dock(); return true; }