diff --git a/CMakeLists.txt b/CMakeLists.txt
index 190ca54..e03ca36 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,6 +39,8 @@ target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Llamacpp)
 
 target_sources(${CMAKE_PROJECT_NAME} PRIVATE src/plugin-main.c)
 
+target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE vendor/nlohmann-json)
+
 add_subdirectory(src/llm-dock)
 
 set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
diff --git a/cmake/BuildLlamacpp.cmake b/cmake/BuildLlamacpp.cmake
index 0863643..8ebc7b8 100644
--- a/cmake/BuildLlamacpp.cmake
+++ b/cmake/BuildLlamacpp.cmake
@@ -11,12 +11,11 @@ endif()
 # On linux add the `-fPIC` flag to the compiler
 if(UNIX AND NOT APPLE)
   set(LLAMA_EXTRA_CXX_FLAGS "-fPIC")
-  set(LLAMA_ADDITIONAL_CMAKE_ARGS -DLLAMA_BLAS=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_OPENBLAS=OFF -DLLAMA_NO_AVX=ON
-                                    -DLLAMA_NO_AVX2=ON)
+  set(LLAMA_ADDITIONAL_CMAKE_ARGS -DLLAMA_NATIVE=ON)
 endif()
 if(APPLE)
-  set(LLAMA_ADDITIONAL_CMAKE_ARGS -DLLAMA_NATIVE=OFF -DLLAMA_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON
-    -DLLAMA_FMA=ON -DLLAMA_F16C=ON)
+  set(LLAMA_ADDITIONAL_CMAKE_ARGS -DLLAMA_NATIVE=OFF -DLLAMA_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON -DLLAMA_FMA=ON
+                                  -DLLAMA_F16C=ON)
 endif()
 
 if(WIN32)
@@ -51,22 +50,22 @@ if(WIN32)
     Llamacpp_Build
     DOWNLOAD_EXTRACT_TIMESTAMP true
     GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
-    GIT_TAG 7b374c9ac9b9861bb737eec060e4dfa29d229259
+    GIT_TAG 370359e5baf619f3a8d461023143d1494b1e8fde
     BUILD_COMMAND ${CMAKE_COMMAND} --build <BINARY_DIR> --config ${Llamacpp_BUILD_TYPE}
     BUILD_BYPRODUCTS
       <INSTALL_DIR>/lib/static/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX}
       <INSTALL_DIR>/bin/${CMAKE_SHARED_LIBRARY_PREFIX}llama${CMAKE_SHARED_LIBRARY_SUFFIX}
       <INSTALL_DIR>/lib/${CMAKE_IMPORT_LIBRARY_PREFIX}llama${CMAKE_IMPORT_LIBRARY_SUFFIX}
     CMAKE_GENERATOR ${CMAKE_GENERATOR}
-    INSTALL_COMMAND ${CMAKE_COMMAND} --install <BINARY_DIR> --config ${Llamacpp_BUILD_TYPE} && ${CMAKE_COMMAND} -E
-                    copy <BINARY_DIR>/${Llamacpp_BUILD_TYPE}/llama.lib <INSTALL_DIR>/lib
+    INSTALL_COMMAND ${CMAKE_COMMAND} --install <BINARY_DIR> --config ${Llamacpp_BUILD_TYPE} && ${CMAKE_COMMAND} -E copy
+                    <BINARY_DIR>/${Llamacpp_BUILD_TYPE}/llama.lib <INSTALL_DIR>/lib
     CONFIGURE_COMMAND
       ${CMAKE_COMMAND} -E env ${LLAMA_ADDITIONAL_ENV} ${CMAKE_COMMAND} <SOURCE_DIR> -B <BINARY_DIR> -G
       ${CMAKE_GENERATOR} -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_BUILD_TYPE=${Llamacpp_BUILD_TYPE}
       -DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM} -DCMAKE_OSX_DEPLOYMENT_TARGET=10.13
       -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES_} -DCMAKE_CXX_FLAGS=${LLAMA_EXTRA_CXX_FLAGS}
       -DCMAKE_C_FLAGS=${LLAMA_EXTRA_CXX_FLAGS} -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF
-      -DLLAMA_BUILD_EXAMPLES=OFF ${LLAMA_ADDITIONAL_CMAKE_ARGS} -DLLAMA_STATIC=ON)
+      -DLLAMA_BUILD_EXAMPLES=OFF ${LLAMA_ADDITIONAL_CMAKE_ARGS} -DLLAMA_STATIC=OFF)
 
   if(NOT BRAIN_WITH_CUDA)
     add_dependencies(Llamacpp_Build OpenBLAS)
@@ -97,11 +96,11 @@ ExternalProject_Get_Property(Llamacpp_Build INSTALL_DIR)
 if(WIN32)
   add_library(Llamacpp::Llama SHARED IMPORTED)
   set_target_properties(
-    Llamacpp::Llama
-    PROPERTIES IMPORTED_LOCATION ${INSTALL_DIR}/bin/${CMAKE_SHARED_LIBRARY_PREFIX}llama${CMAKE_SHARED_LIBRARY_SUFFIX})
+    Llamacpp::Llama PROPERTIES IMPORTED_LOCATION
+                               ${INSTALL_DIR}/bin/${CMAKE_SHARED_LIBRARY_PREFIX}llama${CMAKE_SHARED_LIBRARY_SUFFIX})
   set_target_properties(
-    Llamacpp::Llama
-    PROPERTIES IMPORTED_IMPLIB ${INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX})
+    Llamacpp::Llama PROPERTIES IMPORTED_IMPLIB
+                               ${INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX})
 
   install(FILES ${INSTALL_DIR}/bin/${CMAKE_SHARED_LIBRARY_PREFIX}llama${CMAKE_SHARED_LIBRARY_SUFFIX}
           DESTINATION "obs-plugins/64bit")
@@ -133,9 +132,8 @@ else()
   # on Linux and MacOS add the static Llama library to the link line
   add_library(Llamacpp::Llama STATIC IMPORTED)
   set_target_properties(
-    Llamacpp::Llama
-    PROPERTIES IMPORTED_LOCATION
-               ${INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX})
+    Llamacpp::Llama PROPERTIES IMPORTED_LOCATION
+                               ${INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}llama${CMAKE_STATIC_LIBRARY_SUFFIX})
 endif(WIN32)
 
 add_library(Llamacpp INTERFACE)
diff --git a/cmake/common/buildspec_common.cmake b/cmake/common/buildspec_common.cmake
index b2c2414..d0f43c7 100644
--- a/cmake/common/buildspec_common.cmake
+++ b/cmake/common/buildspec_common.cmake
@@ -73,6 +73,13 @@ function(_setup_obs_studio)
     set(_cmake_version "3.0.0")
   endif()
 
+  message(STATUS "Patch libobs")
+  execute_process(
+    COMMAND patch --forward "libobs/CMakeLists.txt" "${CMAKE_CURRENT_SOURCE_DIR}/patch_libobs.diff"
+    RESULT_VARIABLE _process_result
+    WORKING_DIRECTORY "${dependencies_dir}/${_obs_destination}")
+  message(STATUS "Patch - done")
+
   message(STATUS "Configure ${label} (${arch})")
   execute_process(
     COMMAND
diff --git a/cmake/macos/helpers.cmake b/cmake/macos/helpers.cmake
index 60428b7..37b92bf 100644
--- a/cmake/macos/helpers.cmake
+++ b/cmake/macos/helpers.cmake
@@ -79,9 +79,9 @@ function(set_target_properties_plugin target)
     CONFIGURATIONS Release
     DESTINATION .
     OPTIONAL)
-    configure_file(cmake/macos/resources/distribution.in "${CMAKE_CURRENT_BINARY_DIR}/distribution" @ONLY)
-    configure_file(cmake/macos/resources/create-package.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/create-package.cmake" @ONLY)
-    install(SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/create-package.cmake")
+  configure_file(cmake/macos/resources/distribution.in "${CMAKE_CURRENT_BINARY_DIR}/distribution" @ONLY)
+  configure_file(cmake/macos/resources/create-package.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/create-package.cmake" @ONLY)
+  install(SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/create-package.cmake")
 endfunction()
 
 # target_install_resources: Helper function to add resources into bundle
diff --git a/src/llm-dock/CMakeLists.txt b/src/llm-dock/CMakeLists.txt
index 51b91b0..c389979 100644
--- a/src/llm-dock/CMakeLists.txt
+++ b/src/llm-dock/CMakeLists.txt
@@ -1,4 +1,4 @@
-target_sources(${CMAKE_PROJECT_NAME} PRIVATE
-  ${CMAKE_CURRENT_SOURCE_DIR}/llm-dock-ui.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/llama-inference.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/LLMSettingsDialog.cpp)
+target_sources(
+  ${CMAKE_PROJECT_NAME}
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/llm-dock-ui.cpp ${CMAKE_CURRENT_SOURCE_DIR}/llama-inference.cpp
+          ${CMAKE_CURRENT_SOURCE_DIR}/LLMSettingsDialog.cpp ${CMAKE_CURRENT_SOURCE_DIR}/llm-config-data.cpp)
diff --git a/src/llm-dock/LLMSettingsDialog.cpp b/src/llm-dock/LLMSettingsDialog.cpp
index 51fa43b..ac22622 100644
--- a/src/llm-dock/LLMSettingsDialog.cpp
+++ b/src/llm-dock/LLMSettingsDialog.cpp
@@ -1,65 +1,63 @@
 #include "LLMSettingsDialog.hpp"
 #include "llama-inference.h"
+#include "llm-config-data.h"
+#include "plugin-support.h"
+
+#include <obs-module.h>
 
 LLMSettingsDialog::LLMSettingsDialog(QWidget *parent) : QDialog(parent)
 {
-  // Create a dialog with settings for LLMs usage in OBS
-  // The settings should allow to select local or cloud LLM (OpenAI API)
-  // The local LLM should allow selecting a .gguf model file for llama.cpp
-  // The cloud LLM should allow entering an API key for OpenAI API
-  // The settings should allow to edit the following:
-  // - system prompt for LLM
-  // - max number of tokens to generate
-  // - temperature
-  // - end of text token
-
-  setWindowTitle("LLM Settings");
-  setWindowFlags(Qt::Dialog | Qt::WindowCloseButtonHint);
-  setModal(true);
-  // set a minimum width for the dialog
+	// Create a dialog with settings for LLMs usage in OBS
+	// The settings should allow to select local or cloud LLM (OpenAI API)
+	// The local LLM should allow selecting a .gguf model file for llama.cpp
+	// The cloud LLM should allow entering an API key for OpenAI API
+	// The settings should allow to edit the following:
+	// - system prompt for LLM
+	// - max number of tokens to generate
+	// - temperature
+
+	setWindowTitle("LLM Settings");
+	setWindowFlags(Qt::Dialog | Qt::WindowCloseButtonHint);
+	setModal(true);
+	// set a minimum width for the dialog
 	setMinimumWidth(500);
 
+	// Create a layout for the dialog
+	QGridLayout *layout = new QGridLayout(this);
 
-  // Create a layout for the dialog
-  QGridLayout *layout = new QGridLayout(this);
-
-  // Create a tab widget for the dialog
-  QTabWidget *tab_widget = new QTabWidget(this);
-
-
-  // Create a tab for local LLM settings
-  QWidget *local_llm_tab = new QWidget(this);
-  QGridLayout *local_llm_tab_layout = new QGridLayout(local_llm_tab);
-  local_llm_tab->setLayout(local_llm_tab_layout);
-
-
-  // Create a tab for cloud LLM settings
-  QWidget *cloud_llm_tab = new QWidget(this);
-  QGridLayout *cloud_llm_tab_layout = new QGridLayout(cloud_llm_tab);
-  cloud_llm_tab->setLayout(cloud_llm_tab_layout);
+	// Create a tab widget for the dialog
+	QTabWidget *tab_widget = new QTabWidget(this);
 
+	// Create a tab for local LLM settings
+	QWidget *local_llm_tab = new QWidget(this);
+	QGridLayout *local_llm_tab_layout = new QGridLayout(local_llm_tab);
+	local_llm_tab->setLayout(local_llm_tab_layout);
 
-  // Create a tab for general LLM settings
-  QWidget *general_llm_tab = new QWidget(this);
-  QGridLayout *general_llm_tab_layout = new QGridLayout(general_llm_tab);
-  general_llm_tab->setLayout(general_llm_tab_layout);
+	// Create a tab for cloud LLM settings
+	QWidget *cloud_llm_tab = new QWidget(this);
+	QGridLayout *cloud_llm_tab_layout = new QGridLayout(cloud_llm_tab);
+	cloud_llm_tab->setLayout(cloud_llm_tab_layout);
 
+	// Create a tab for general LLM settings
+	QWidget *general_llm_tab = new QWidget(this);
+	QGridLayout *general_llm_tab_layout = new QGridLayout(general_llm_tab);
+	general_llm_tab->setLayout(general_llm_tab_layout);
 
-  // Add the tabs to the tab widget
-  tab_widget->addTab(general_llm_tab, "General");
-  tab_widget->addTab(local_llm_tab, "Local LLM");
-  tab_widget->addTab(cloud_llm_tab, "Cloud LLM");
+	// Add the tabs to the tab widget
+	tab_widget->addTab(general_llm_tab, "General");
+	tab_widget->addTab(local_llm_tab, "Local LLM");
+	tab_widget->addTab(cloud_llm_tab, "Cloud LLM");
 
-  // Add the tab widget to the layout
-  layout->addWidget(tab_widget);
+	// Add the tab widget to the layout
+	layout->addWidget(tab_widget);
 
-  this->setLayout(layout);
-  this->setWindowTitle("LLM Settings");
-  this->resize(600, 400);
+	this->setLayout(layout);
+	this->setWindowTitle("LLM Settings");
+	this->resize(600, 400);
 
-  // Use a form layout for the local LLM tab
-  QFormLayout *local_llm_form_layout = new QFormLayout(local_llm_tab);
-  local_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow);
+	// Use a form layout for the local LLM tab
+	QFormLayout *local_llm_form_layout = new QFormLayout(local_llm_tab);
+	local_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow);
 
 	// Model file path
 	QHBoxLayout *fileInputLayout = new QHBoxLayout;
@@ -67,8 +65,8 @@ LLMSettingsDialog::LLMSettingsDialog(QWidget *parent) : QDialog(parent)
 
 	QLineEdit *filePathLineEdit = new QLineEdit;
 	filePathLineEdit->setPlaceholderText("Model File");
-	// set value from request_data
-	// filePathLineEdit->setText(QString::fromStdString(request_data->url));
+	// set value from config
+	filePathLineEdit->setText(QString::fromStdString(global_llm_config.local_model_path));
 	fileInputLayout->addWidget(filePathLineEdit);
 	// add file selector button if file is selected
 	QPushButton *fileButton = new QPushButton("...");
@@ -83,67 +81,93 @@ LLMSettingsDialog::LLMSettingsDialog(QWidget *parent) : QDialog(parent)
 		}
 	});
 
-  // Add the form layout to the local LLM tab layout
-  local_llm_tab_layout->addLayout(local_llm_form_layout, 0, 0, 1, 1);
-
-  /** CLOUD */
-  // Use a form layout for the cloud LLM tab
-  QFormLayout *cloud_llm_form_layout = new QFormLayout(cloud_llm_tab);
-  // set growing policy for the form layout
-  cloud_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow);
-
-  // Add the form layout to the cloud LLM tab layout
-  cloud_llm_tab_layout->addLayout(cloud_llm_form_layout, 0, 0, 1, 1);
-
-  // add openai api key input
-  QLineEdit *openai_api_key_input = new QLineEdit(this);
-  openai_api_key_input->setPlaceholderText("sk-...");
-  cloud_llm_form_layout->addRow("OpenAI API Key", openai_api_key_input);
-
-  // add openai engine input
-  QLineEdit *openai_engine_input = new QLineEdit(this);
-  openai_engine_input->setPlaceholderText("OpenAI Engine");
-  cloud_llm_form_layout->addRow("OpenAI Engine", openai_engine_input);
-  // default engine gpt-3.5-turbo
-  openai_engine_input->setText("gpt-3.5-turbo");
-
-  /** GENERAL */
-  // Use a form layout for the general LLM tab
-  QFormLayout *general_llm_form_layout = new QFormLayout(general_llm_tab);
-  // auto stretch the form layout
-  general_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow);
-
-  // Add the form layout to the general LLM tab layout
-  general_llm_tab_layout->addLayout(general_llm_form_layout, 0, 0, 1, 1);
-
-  // add a selector between local and clod llm
-  QComboBox *llm_selector = new QComboBox(this);
-  llm_selector->addItem("Local LLM");
-  llm_selector->addItem("Cloud LLM");
-  general_llm_form_layout->addRow("LLM", llm_selector);
-
-  // add system prompt input with a multi-line text edit
-  QTextEdit *system_prompt_input = new QTextEdit(this);
-  system_prompt_input->setPlaceholderText("System Prompt");
-  general_llm_form_layout->addRow("System Prompt", system_prompt_input);
-  // set the system prompt default
-  system_prompt_input->setText(QString::fromStdString(LLAMA_DEFAULT_SYSTEM_PROMPT));
-
-  // add max number of tokens input
-  QLineEdit *max_tokens_input = new QLineEdit(this);
-  max_tokens_input->setPlaceholderText("Max Tokens");
-  general_llm_form_layout->addRow("Max Tokens", max_tokens_input);
-  // set the max tokens default
-  max_tokens_input->setText("64");
-
-  // add temperature input
-  QLineEdit *temperature_input = new QLineEdit(this);
-  temperature_input->setPlaceholderText("Temperature");
-  general_llm_form_layout->addRow("Temperature", temperature_input);
-  // set the temperature default
-  temperature_input->setText("0.9");
-}
+	// Add the form layout to the local LLM tab layout
+	local_llm_tab_layout->addLayout(local_llm_form_layout, 0, 0, 1, 1);
+
+	/** CLOUD */
+	// Use a form layout for the cloud LLM tab
+	QFormLayout *cloud_llm_form_layout = new QFormLayout(cloud_llm_tab);
+	// set growing policy for the form layout
+	cloud_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow);
+
+	// Add the form layout to the cloud LLM tab layout
+	cloud_llm_tab_layout->addLayout(cloud_llm_form_layout, 0, 0, 1, 1);
+
+	// add openai api key input
+	QLineEdit *openai_api_key_input = new QLineEdit(this);
+	openai_api_key_input->setPlaceholderText("sk-...");
+	openai_api_key_input->setText(QString::fromStdString(global_llm_config.cloud_api_key));
+	cloud_llm_form_layout->addRow("OpenAI API Key", openai_api_key_input);
+
+	// add openai engine input
+	QLineEdit *openai_engine_input = new QLineEdit(this);
+	openai_engine_input->setPlaceholderText("OpenAI Engine");
+	openai_engine_input->setText(QString::fromStdString(global_llm_config.cloud_model_name));
+	cloud_llm_form_layout->addRow("OpenAI Engine", openai_engine_input);
+
+	/** GENERAL */
+	// Use a form layout for the general LLM tab
+	QFormLayout *general_llm_form_layout = new QFormLayout(general_llm_tab);
+	// auto stretch the form layout
+	general_llm_form_layout->setFieldGrowthPolicy(QFormLayout::ExpandingFieldsGrow);
+
+	// Add the form layout to the general LLM tab layout
+	general_llm_tab_layout->addLayout(general_llm_form_layout, 0, 0, 1, 1);
+
+	// add a selector between local and clod llm
+	QComboBox *llm_selector = new QComboBox(this);
+	llm_selector->addItem("Local LLM");
+	llm_selector->addItem("Cloud LLM");
+	// set the default
+	llm_selector->setCurrentIndex(global_llm_config.local ? 0 : 1);
+	general_llm_form_layout->addRow("LLM", llm_selector);
+
+	// add system prompt input with a multi-line text edit
+	QTextEdit *system_prompt_input = new QTextEdit(this);
+	system_prompt_input->setPlaceholderText("System Prompt");
+	general_llm_form_layout->addRow("System Prompt", system_prompt_input);
+	// set the system prompt default
+	system_prompt_input->setText(QString::fromStdString(global_llm_config.system_prompt));
+
+	// add max number of tokens input
+	QLineEdit *max_tokens_input = new QLineEdit(this);
+	max_tokens_input->setPlaceholderText("Max Tokens");
+	general_llm_form_layout->addRow("Max Tokens", max_tokens_input);
+	// set the max tokens default
+	max_tokens_input->setText(QString::number(global_llm_config.max_output_tokens));
+
+	// add temperature input
+	QLineEdit *temperature_input = new QLineEdit(this);
+	temperature_input->setPlaceholderText("Temperature");
+	general_llm_form_layout->addRow("Temperature", temperature_input);
+	// set the temperature default
+	temperature_input->setText(QString::number(global_llm_config.temperature));
+
+	// add a save button to save all the settings, add to the grid layout of the dialog
+	QPushButton *save_button = new QPushButton("Save and Close", this);
+	layout->addWidget(save_button);
+
+	// connect the save button to save the settings
+	this->connect(save_button, &QPushButton::clicked, this, [=]() {
+		// get settings from UI into config struct
+		global_llm_config.local = llm_selector->currentIndex() == 0;
+		global_llm_config.local_model_path = filePathLineEdit->text().toStdString();
+		global_llm_config.cloud_api_key = openai_api_key_input->text().toStdString();
+		global_llm_config.cloud_model_name = openai_engine_input->text().toStdString();
+		global_llm_config.system_prompt = system_prompt_input->toPlainText().toStdString();
+		global_llm_config.max_output_tokens = max_tokens_input->text().toUShort();
+		global_llm_config.temperature = temperature_input->text().toFloat();
+
+		// serialize to json and save to the OBS module settings
+		if (saveConfig() == OBS_BRAIN_CONFIG_SUCCESS) {
+			obs_log(LOG_INFO, "Saved LLM settings");
+		} else {
+			obs_log(LOG_ERROR, "Failed to save LLM settings");
+		}
 
-LLMSettingsDialog::~LLMSettingsDialog()
-{
+		// close the dialog
+		this->close();
+	});
 }
+
+LLMSettingsDialog::~LLMSettingsDialog() {}
diff --git a/src/llm-dock/llama-inference.cpp b/src/llm-dock/llama-inference.cpp
index 78c8c03..008b742 100644
--- a/src/llm-dock/llama-inference.cpp
+++ b/src/llm-dock/llama-inference.cpp
@@ -1,6 +1,8 @@
 
 #include "llama-inference.h"
 #include "plugin-support.h"
+#include "llm-config-data.h"
+
 #include <obs-module.h>
 
 #include <vector>
@@ -9,11 +11,6 @@
 #include <algorithm>
 #include <sstream>
 
-const std::string LLAMA_DEFAULT_SYSTEM_PROMPT = R"([INST] <<SYS>>
-You are a helpful, respectful, positive, safe and honest assistant.
-Don't include harmful, unethical, racist, sexist, toxic, dangerous, socially biased, untruthful or illegal content.
-<</SYS>> Q: {0} [/INST] A:)";
-
 std::string replace(const std::string &s, const std::string &from, const std::string &to)
 {
 	std::string result = s;
@@ -25,14 +22,15 @@ std::string replace(const std::string &s, const std::string &from, const std::st
 	return result;
 }
 
-std::string get_system_info(const llama_context_params & params) {
-    std::ostringstream os;
+std::string get_system_info(const llama_context_params &params)
+{
+	std::ostringstream os;
 
-    os << "system_info: n_threads = " << params.n_threads;
-		os << " (n_threads_batch = " << params.n_threads_batch << ")";
-    os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info();
+	os << "system_info: n_threads = " << params.n_threads;
+	os << " (n_threads_batch = " << params.n_threads_batch << ")";
+	os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info();
 
-    return os.str();
+	return os.str();
 }
 
 std::vector<llama_token> llama_tokenize(const struct llama_model *model, const std::string &text,
@@ -100,11 +98,6 @@ struct llama_context *llama_init_context(const std::string &model_file_path)
 	// initialize the context
 	struct llama_context_params lparams = llama_context_default_params();
 
-	// tune these to your liking
-	// lparams.n_ctx = 2048;
-	// lparams.seed = 1;
-	// lparams.f16_kv = true;
-
 	struct llama_context *ctx_llama = llama_new_context_with_model(model_llama, lparams);
 
 	if (ctx_llama == nullptr) {
@@ -117,7 +110,7 @@ struct llama_context *llama_init_context(const std::string &model_file_path)
 		return nullptr;
 	}
 
-  obs_log(LOG_INFO, "%s", get_system_info(lparams).c_str());
+	obs_log(LOG_INFO, "%s", get_system_info(lparams).c_str());
 
 	// Warm up in another thread
 	std::thread t([ctx_llama, lparams]() {
@@ -128,10 +121,10 @@ struct llama_context *llama_init_context(const std::string &model_file_path)
 			llama_token_eos(ctx_llama),
 		};
 
-		llama_decode(ctx_llama,
-			     llama_batch_get_one(tokens_list.data(),
-						 (int)std::min(tokens_list.size(), (size_t)lparams.n_batch),
-						 0, 0));
+		llama_decode(ctx_llama, llama_batch_get_one(tokens_list.data(),
+							    (int)std::min(tokens_list.size(),
+									  (size_t)lparams.n_batch),
+							    0, 0));
 		llama_kv_cache_tokens_rm(ctx_llama, -1, -1);
 		llama_reset_timings(ctx_llama);
 
@@ -149,7 +142,7 @@ std::string llama_inference(const std::string &promptIn, struct llama_context *c
 
 	// tokenize the prompt
 	// replace {0} in the system prompt with the prompt
-	std::string prompt = replace(LLAMA_DEFAULT_SYSTEM_PROMPT, "{0}", promptIn);
+	std::string prompt = replace(global_llm_config.system_prompt, "{0}", promptIn);
 
 	std::vector<llama_token> tokens_list;
 	tokens_list = ::llama_tokenize(ctx, prompt, true);
diff --git a/src/llm-dock/llama-inference.h b/src/llm-dock/llama-inference.h
index ba870c3..0315b49 100644
--- a/src/llm-dock/llama-inference.h
+++ b/src/llm-dock/llama-inference.h
@@ -5,6 +5,4 @@
 struct llama_context *llama_init_context(const std::string &model_file_path);
 
 std::string llama_inference(const std::string &prompt, struct llama_context *ctx,
-  std::function<void(const std::string &)> partial_generation_callback);
-
-extern const std::string LLAMA_DEFAULT_SYSTEM_PROMPT;
+			    std::function<void(const std::string &)> partial_generation_callback);
diff --git a/src/llm-dock/llm-config-data.cpp b/src/llm-dock/llm-config-data.cpp
new file mode 100644
index 0000000..7cb471f
--- /dev/null
+++ b/src/llm-dock/llm-config-data.cpp
@@ -0,0 +1,135 @@
+#include "llm-config-data.h"
+#include "plugin-support.h"
+
+#include <obs-module.h>
+#include <string>
+#include <nlohmann/json.hpp>
+
+llm_config_data global_llm_config;
+llm_global_context global_llm_context;
+
+void config_defaults()
+{
+	const std::string LLAMA_DEFAULT_SYSTEM_PROMPT = R"([INST] <<SYS>>
+You are a helpful, respectful, positive, safe and honest assistant.
+Don't include harmful, unethical, racist, sexist, toxic, dangerous, socially biased, untruthful or illegal content.
+<</SYS>> Q: {0} [/INST] A:)";
+
+	global_llm_config.local = true;
+	global_llm_config.local_model_path = "";
+	global_llm_config.cloud_model_name = "";
+	global_llm_config.cloud_api_key = "";
+	global_llm_config.temperature = 0.9;
+	global_llm_config.max_output_tokens = 64;
+	global_llm_config.system_prompt = LLAMA_DEFAULT_SYSTEM_PROMPT;
+}
+
+void create_config_folder()
+{
+	char *config_folder_path = obs_module_config_path("");
+	if (config_folder_path == nullptr) {
+		obs_log(LOG_ERROR, "Failed to get config folder path");
+		return;
+	}
+	std::filesystem::path config_folder_std_path(config_folder_path);
+	bfree(config_folder_path);
+
+	// create the folder if it doesn't exist
+	if (!std::filesystem::exists(config_folder_std_path)) {
+#ifdef _WIN32
+		obs_log(LOG_INFO, "Config folder does not exist, creating: %S",
+			config_folder_std_path.c_str());
+#else
+		obs_log(LOG_INFO, "Config folder does not exist, creating: %s",
+			config_folder_std_path.c_str());
+#endif
+		// Create the config folder
+		std::filesystem::create_directories(config_folder_std_path);
+	}
+}
+
+int getConfig(config_t **config, bool create_if_not_exist = false)
+{
+	create_config_folder(); // ensure the config folder exists
+
+	// Get the config file
+	char *config_file_path = obs_module_config_path("config.ini");
+
+	int ret = config_open(config, config_file_path,
+			      create_if_not_exist ? CONFIG_OPEN_ALWAYS : CONFIG_OPEN_EXISTING);
+	if (ret != CONFIG_SUCCESS) {
+		obs_log(LOG_INFO, "Failed to open config file %s", config_file_path);
+		return OBS_BRAIN_CONFIG_FAIL;
+	}
+
+	return OBS_BRAIN_CONFIG_SUCCESS;
+}
+
+std::string llm_config_data_to_json(const llm_config_data &data);
+llm_config_data llm_config_data_from_json(const std::string &json);
+
+int saveConfig(bool create_if_not_exist)
+{
+	config_t *config_file;
+	if (getConfig(&config_file, create_if_not_exist) == OBS_BRAIN_CONFIG_SUCCESS) {
+		std::string json = llm_config_data_to_json(global_llm_config);
+		config_set_string(config_file, "general", "llm_config", json.c_str());
+		config_save(config_file);
+		config_close(config_file);
+		return OBS_BRAIN_CONFIG_SUCCESS;
+	}
+	return OBS_BRAIN_CONFIG_FAIL;
+}
+
+int loadConfig()
+{
+	config_t *config_file;
+	if (getConfig(&config_file) == OBS_BRAIN_CONFIG_SUCCESS) {
+		const char *json = config_get_string(config_file, "general", "llm_config");
+		if (json != nullptr) {
+			global_llm_config = llm_config_data_from_json(json);
+			config_close(config_file);
+			return OBS_BRAIN_CONFIG_SUCCESS;
+		}
+		config_close(config_file);
+	} else {
+		obs_log(LOG_WARNING, "Failed to load config file. Creating a new one.");
+		config_defaults();
+		if (saveConfig(true) == OBS_BRAIN_CONFIG_SUCCESS) {
+			obs_log(LOG_INFO, "Saved default LLM settings");
+			return OBS_BRAIN_CONFIG_SUCCESS;
+		} else {
+			obs_log(LOG_ERROR, "Failed to save LLM settings");
+		}
+	}
+	return OBS_BRAIN_CONFIG_FAIL;
+}
+
+// serialize llm_config_data to a json string
+std::string llm_config_data_to_json(const llm_config_data &data)
+{
+	nlohmann::json j;
+	j["local"] = data.local;
+	j["local_model_path"] = data.local_model_path;
+	j["cloud_model_name"] = data.cloud_model_name;
+	j["cloud_api_key"] = data.cloud_api_key;
+	j["temperature"] = data.temperature;
+	j["max_output_tokens"] = data.max_output_tokens;
+	j["system_prompt"] = data.system_prompt;
+	return j.dump();
+}
+
+// deserialize llm_config_data from a json string
+llm_config_data llm_config_data_from_json(const std::string &json)
+{
+	nlohmann::json j = nlohmann::json::parse(json);
+	llm_config_data data;
+	data.local = j["local"];
+	data.local_model_path = j["local_model_path"];
+	data.cloud_model_name = j["cloud_model_name"];
+	data.cloud_api_key = j["cloud_api_key"];
+	data.temperature = j["temperature"];
+	data.max_output_tokens = j["max_output_tokens"];
+	data.system_prompt = j["system_prompt"];
+	return data;
+}
diff --git a/src/llm-dock/llm-config-data.h b/src/llm-dock/llm-config-data.h
new file mode 100644
index 0000000..424f5e1
--- /dev/null
+++ b/src/llm-dock/llm-config-data.h
@@ -0,0 +1,50 @@
+#ifndef LLM_CONFIG_DATA_H
+#define LLM_CONFIG_DATA_H
+
+#include <util/config-file.h>
+
+#include <string>
+
+struct llm_config_data {
+	// local or cloud
+	bool local;
+
+	// local model path
+	std::string local_model_path;
+
+	// cloud model name
+	std::string cloud_model_name;
+
+	// cloud API key
+	std::string cloud_api_key;
+
+	// temperature
+	float temperature;
+
+	// max output tokens
+	uint16_t max_output_tokens;
+
+	// system prompt
+	std::string system_prompt;
+};
+
+// forward declaration
+struct llama_context;
+
+struct llm_global_context {
+	// error message
+	std::string error_message;
+	// llama context
+	struct llama_context *ctx_llama;
+};
+
+extern llm_config_data global_llm_config;
+extern llm_global_context global_llm_context;
+
+#define OBS_BRAIN_CONFIG_FAIL -1
+#define OBS_BRAIN_CONFIG_SUCCESS 0
+
+int saveConfig(bool create_if_not_exist = false);
+int loadConfig();
+
+#endif // LLM_CONFIG_DATA_H
diff --git a/src/llm-dock/llm-dock-ui.cpp b/src/llm-dock/llm-dock-ui.cpp
index c686638..6f60783 100644
--- a/src/llm-dock/llm-dock-ui.cpp
+++ b/src/llm-dock/llm-dock-ui.cpp
@@ -7,61 +7,63 @@
 #include "plugin-support.h"
 #include "llm-dock-ui.hpp"
 #include "llm-dock.h"
-// #include "../model-utils/model-downloader.h"
 #include "llama-inference.h"
 #include "LLMSettingsDialog.hpp"
+#include "llm-config-data.h"
 
-QDockWidget *createLLMDockWidget(QMainWindow *parent, void *llm_ctx);
+QDockWidget *createLLMDockWidget(QMainWindow *parent);
 
 void register_llm_dock(void)
 {
-	// Find the model file
-	// std::string model_file_path = find_model_file("models/ggml-gpt2-117M.bin");
-	// std::string model_file_path = "/Users/roy_shilkrot/Downloads/open-llama-3b-q4_0.gguf";
-	std::string model_file_path =
-		"/Users/roy_shilkrot/Downloads/mistral-7b-instruct-v0.1.Q4_K_M.gguf";
-
-	if (model_file_path.empty()) {
-		// If the model file is not found, start the model downloader UI dialog
-		// download_model_with_ui_dialog("ggml-gpt2-117M.bin", [](bool success) {
-		//     if (success) {
-		//         // If the download is successful, register the GPT dock
-		//         obs_frontend_add_dock(createGPTDockWidget(obs_frontend_get_main_window()));
-		//     }
-		// });
-		obs_log(LOG_ERROR, "LLM Model not found.");
+	// load plugin settings from config
+	if (loadConfig() == OBS_BRAIN_CONFIG_SUCCESS) {
+		obs_log(LOG_INFO, "Loaded LLM config from config file");
 	} else {
-		struct llama_context *ctx_llama = llama_init_context(model_file_path);
+		obs_log(LOG_INFO, "Failed to load LLM config from config file");
+	}
 
-		// If the model is loaded successfully, register the GPT dock
-		if (ctx_llama == nullptr) {
-			obs_log(LOG_ERROR, "Failed to load LLM model from %s.",
-				model_file_path.c_str());
-			return;
+	if (global_llm_config.local) {
+		obs_log(LOG_INFO, "Using local LLM model: %s",
+			global_llm_config.local_model_path.c_str());
+		// initialize the local LLM model
+		if (global_llm_config.local_model_path.empty()) {
+			obs_log(LOG_ERROR, "LLM Model not found.");
+		} else {
+			global_llm_context.ctx_llama =
+				llama_init_context(global_llm_config.local_model_path);
+
+			// If the model is loaded successfully, register the GPT dock
+			if (global_llm_context.ctx_llama == nullptr) {
+				obs_log(LOG_ERROR, "Failed to load LLM model from %s.",
+					global_llm_config.local_model_path.c_str());
+				global_llm_context.error_message =
+					"Failed to load local LLM model.";
+				return;
+			}
 		}
-
-		// register the GPT dock
-		obs_frontend_add_dock(createLLMDockWidget(
-			(QMainWindow *)obs_frontend_get_main_window(), ctx_llama));
+	} else {
+		obs_log(LOG_INFO, "Using cloud LLM model: %s",
+			global_llm_config.cloud_model_name.c_str());
 	}
+
+	// register the GPT dock
+	obs_frontend_add_dock(createLLMDockWidget((QMainWindow *)obs_frontend_get_main_window()));
 }
 
-QDockWidget *createLLMDockWidget(QMainWindow *parent, void *llm_ctx)
+QDockWidget *createLLMDockWidget(QMainWindow *parent)
 {
 	QDockWidget *dock = new QDockWidget(parent);
 	dock->setObjectName("LLMDockWidget");
 	dock->setWindowTitle("LLM Dock");
 	// dock->setAllowedAreas(Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea);
 	dock->setFeatures(QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable);
-	dock->setWidget(new LLMDockWidgetUI(dock, llm_ctx));
+	dock->setWidget(new LLMDockWidgetUI(dock));
 	parent->addDockWidget(Qt::BottomDockWidgetArea, dock);
 	return dock;
 }
 
-LLMDockWidgetUI::LLMDockWidgetUI(QWidget *parent, void *llm_ctx) : QWidget(parent)
+LLMDockWidgetUI::LLMDockWidgetUI(QWidget *parent) : QWidget(parent)
 {
-	this->llm_ctx = llm_ctx;
-
 	this->layout = new QVBoxLayout(this);
 	this->layout->setContentsMargins(0, 0, 0, 0);
 
@@ -76,6 +78,10 @@ LLMDockWidgetUI::LLMDockWidgetUI(QWidget *parent, void *llm_ctx) : QWidget(paren
 	this->input_text_edit->setLineWrapMode(QTextEdit::WidgetWidth);
 	this->input_text_edit->setStyleSheet(
 		"QTextEdit { background-color: #000000; color: #ffffff; }");
+	// dont allow rich text
+	this->input_text_edit->setAcceptRichText(false);
+	// make it 3 lines tall
+	this->input_text_edit->setFixedHeight(60);
 	this->layout->addWidget(this->input_text_edit);
 
 	this->button_layout = new QHBoxLayout(this);
@@ -92,6 +98,18 @@ LLMDockWidgetUI::LLMDockWidgetUI(QWidget *parent, void *llm_ctx) : QWidget(paren
 	this->settings_button = new QPushButton("Settings", this);
 	this->button_layout->addWidget(this->settings_button);
 
+	// add an error message label, hidden
+	this->error_message_label = new QLabel(this);
+	this->error_message_label->setStyleSheet("QLabel { color: #ff0000; }");
+	this->error_message_label->setVisible(false);
+
+	// if there's an error message, show it
+	if (!global_llm_context.error_message.empty()) {
+		this->error_message_label->setText(
+			QString::fromStdString(global_llm_context.error_message));
+		this->error_message_label->setVisible(true);
+	}
+
 	// connect the settings button to open the settings dialog
 	this->connect(this->settings_button, &QPushButton::clicked, this, [=]() {
 		// open the settings dialog
@@ -115,16 +133,21 @@ void LLMDockWidgetUI::generate()
 		return;
 	}
 
-	this->text_edit->insertHtml(QString("<p style=\"color:#ffffff;\">%1</p><br/>").arg(input_text));
+	this->text_edit->insertHtml(
+		QString("<p style=\"color:#ffffff;\">%1</p><br/>").arg(input_text));
 	this->text_edit->moveCursor(QTextCursor::End);
 	this->input_text_edit->clear();
+	// also clear any styles
+	this->input_text_edit->setStyleSheet(
+		"QTextEdit { background-color: #000000; color: #ffffff; }");
 
 	// call LLM inference on a separate thread using a lambda function
 	std::thread t([input_text, this]() {
 		std::string generated_text = llama_inference(
-			input_text.toStdString(), (struct llama_context *)this->llm_ctx,
+			input_text.toStdString(), global_llm_context.ctx_llama,
 			[this](const std::string &partial_generation) {
-				emit update_text_signal(QString::fromStdString(partial_generation), true);
+				emit update_text_signal(QString::fromStdString(partial_generation),
+							true);
 			});
 		emit update_text_signal(QString("<br/>"), true);
 		// generated_text = std::regex_replace(
@@ -158,10 +181,11 @@ void LLMDockWidgetUI::update_text(const QString &text, bool partial_generation)
 		text_with_non_breaking_spaces.replace(" ", "&nbsp;");
 
 		// append text in a different color
-		this->text_edit->insertHtml(
-			QString("<span style=\"color:#00ff00;\">%1</span>").arg(text_with_non_breaking_spaces));
+		this->text_edit->insertHtml(QString("<span style=\"color:#00ff00;\">%1</span>")
+						    .arg(text_with_non_breaking_spaces));
 	} else {
-		this->text_edit->insertHtml(QString("<p style=\"color:#ffffff;\">%1</p>").arg(text));
+		this->text_edit->insertHtml(
+			QString("<p style=\"color:#ffffff;\">%1</p>").arg(text));
 	}
 	// always scroll to the bottom
 	this->text_edit->moveCursor(QTextCursor::End);
diff --git a/src/llm-dock/llm-dock-ui.hpp b/src/llm-dock/llm-dock-ui.hpp
index 4d2ace1..cf98245 100644
--- a/src/llm-dock/llm-dock-ui.hpp
+++ b/src/llm-dock/llm-dock-ui.hpp
@@ -3,7 +3,7 @@
 class LLMDockWidgetUI : public QWidget {
 	Q_OBJECT
 public:
-	explicit LLMDockWidgetUI(QWidget *parent, void *llm_ctx);
+	explicit LLMDockWidgetUI(QWidget *parent);
 	~LLMDockWidgetUI();
 
 public slots:
@@ -15,7 +15,6 @@ public slots:
 	void update_text_signal(const QString &text, bool partial_generation);
 
 private:
-	void *llm_ctx;
 	QVBoxLayout *layout;
 	QTextEdit *text_edit;
 	QTextEdit *input_text_edit;
@@ -23,4 +22,5 @@ public slots:
 	QPushButton *generate_button;
 	QPushButton *clear_button;
 	QPushButton *settings_button;
+	QLabel *error_message_label;
 };
diff --git a/src/plugin-main.c b/src/plugin-main.c
index 142b557..1ae23d5 100644
--- a/src/plugin-main.c
+++ b/src/plugin-main.c
@@ -26,8 +26,7 @@ OBS_MODULE_USE_DEFAULT_LOCALE(PLUGIN_NAME, "en-US")
 
 bool obs_module_load(void)
 {
-	obs_log(LOG_INFO, "plugin loaded successfully (version %s)",
-		PLUGIN_VERSION);
+	obs_log(LOG_INFO, "plugin loaded successfully (version %s)", PLUGIN_VERSION);
 	register_llm_dock();
 	return true;
 }