locaal-ai · royshil · Mar 25, 2024 · Mar 22, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -41,6 +41,7 @@ set(USE_SYSTEM_CURL
     CACHE STRING "Use system cURL")
 
 if(LOCALVOCAL_WITH_CUDA)
+  message(STATUS "Building with CUDA support")
   add_compile_definitions("LOCALVOCAL_WITH_CUDA")
 endif()
 

diff --git a/buildspec.json b/buildspec.json
@@ -45,7 +45,7 @@
         }
     },
     "name": "obs-localvocal",
-    "version": "0.1.1",
+    "version": "0.2.0",
     "author": "Roy Shilkrot",
     "website": "https://github.com/occ-ai/obs-localvocal",
     "email": "[email protected]",

diff --git a/cmake/BuildWhispercpp.cmake b/cmake/BuildWhispercpp.cmake
@@ -42,7 +42,7 @@ if(WIN32)
     endif(NOT DEFINED CUDA_TOOLKIT_ROOT_DIR)
 
     set(WHISPER_ADDITIONAL_ENV "CUDAToolkit_ROOT=${CUDA_TOOLKIT_ROOT_DIR}")
-    set(WHISPER_ADDITIONAL_CMAKE_ARGS -DWHISPER_CUBLAS=ON -DWHISPER_OPENBLAS=OFF
+    set(WHISPER_ADDITIONAL_CMAKE_ARGS -DWHISPER_BLAS=OFF -DWHISPER_CUBLAS=ON -DWHISPER_OPENBLAS=OFF
                                       -DCMAKE_GENERATOR_TOOLSET=cuda=${CUDA_TOOLKIT_ROOT_DIR})
   else()
     # Build with OpenBLAS

diff --git a/data/models/ggml-tiny.en.bin → data/models/ggml-model-whisper-tiny.en.bin b/data/models/ggml-tiny.en.bin → data/models/ggml-model-whisper-tiny.en.bin
diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp
@@ -5,8 +5,7 @@
 
 #include <filesystem>
 
-const std::string MODEL_BASE_PATH = "https://huggingface.co/ggerganov/whisper.cpp";
-const std::string MODEL_PREFIX = "resolve/main/";
+const std::string MODEL_BASE_PATH = "https://ggml.ggerganov.com/";
 
 size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream)
 {
@@ -143,7 +142,7 @@ void ModelDownloadWorker::download_model()
 	const std::string model_filename =
 		this->model_name.substr(this->model_name.find_last_of("/\\") + 1);
 
-	std::string model_url = MODEL_BASE_PATH + "/" + MODEL_PREFIX + model_filename;
+	std::string model_url = MODEL_BASE_PATH + model_filename;
 	obs_log(LOG_INFO, "Model URL: %s", model_url.c_str());
 
 	CURL *curl = curl_easy_init();

diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
@@ -654,7 +654,8 @@ void transcription_filter_defaults(obs_data_t *s)
 	obs_data_set_default_int(s, "log_level", LOG_DEBUG);
 	obs_data_set_default_bool(s, "log_words", true);
 	obs_data_set_default_bool(s, "caption_to_stream", false);
-	obs_data_set_default_string(s, "whisper_model_path", "models/ggml-tiny.en.bin");
+	obs_data_set_default_string(s, "whisper_model_path",
+				    "models/ggml-model-whisper-tiny.en.bin");
 	obs_data_set_default_string(s, "whisper_language_select", "en");
 	obs_data_set_default_string(s, "subtitle_sources", "none");
 	obs_data_set_default_bool(s, "step_by_step_processing", false);
@@ -754,15 +755,38 @@ obs_properties_t *transcription_filter_properties(void *data)
 		obs_properties_add_list(ppts, "whisper_model_path", MT_("whisper_model"),
 					OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
 
-	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) 75Mb",
-				     "models/ggml-tiny.en.bin");
-	obs_property_list_add_string(whisper_models_list, "Tiny 75Mb", "models/ggml-tiny.bin");
-	obs_property_list_add_string(whisper_models_list, "Base (Eng) 142Mb",
-				     "models/ggml-base.en.bin");
-	obs_property_list_add_string(whisper_models_list, "Base 142Mb", "models/ggml-base.bin");
-	obs_property_list_add_string(whisper_models_list, "Small (Eng) 466Mb",
-				     "models/ggml-small.en.bin");
-	obs_property_list_add_string(whisper_models_list, "Small 466Mb", "models/ggml-small.bin");
+	obs_property_list_add_string(whisper_models_list, "Base q5 57M",
+				     "models/ggml-model-whisper-base-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Base 141M",
+				     "models/ggml-model-whisper-base.bin");
+	obs_property_list_add_string(whisper_models_list, "Base (Eng) q5 57M",
+				     "models/ggml-model-whisper-base.en-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Base (Eng) 141M",
+				     "models/ggml-model-whisper-base.en.bin");
+	obs_property_list_add_string(whisper_models_list, "Large q5 1G",
+				     "models/ggml-model-whisper-large-q5_0.bin");
+	obs_property_list_add_string(whisper_models_list, "Medium q5 514M",
+				     "models/ggml-model-whisper-medium-q5_0.bin");
+	obs_property_list_add_string(whisper_models_list, "Medium (Eng) 514M",
+				     "models/ggml-model-whisper-medium.en-q5_0.bin");
+	obs_property_list_add_string(whisper_models_list, "Small q5 181M",
+				     "models/ggml-model-whisper-small-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Small 465M",
+				     "models/ggml-model-whisper-small.bin");
+	obs_property_list_add_string(whisper_models_list, "Small (Eng) q5 181M",
+				     "models/ggml-model-whisper-small.en-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Small (Eng) 465M",
+				     "models/ggml-model-whisper-small.en.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny q5 31M",
+				     "models/ggml-model-whisper-tiny-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny 74M",
+				     "models/ggml-model-whisper-tiny.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) q5 31M",
+				     "models/ggml-model-whisper-tiny.en-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) q8 42M",
+				     "models/ggml-model-whisper-tiny.en-q8_0.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) 74M",
+				     "models/ggml-model-whisper-tiny.en.bin");
 	obs_property_list_add_string(whisper_models_list, "Load external model file",
 				     "!!!external!!!");
 

diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp
@@ -112,11 +112,13 @@ struct whisper_context *init_whisper_context(const std::string &model_path)
 {
 	obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str());
 
-	struct whisper_context_params cparams;
+	struct whisper_context_params cparams = whisper_context_default_params();
 #ifdef LOCALVOCAL_WITH_CUDA
 	cparams.use_gpu = true;
+	obs_log(LOG_INFO, "Using GPU for inference, device %d", cparams.gpu_device);
 #else
 	cparams.use_gpu = false;
+	obs_log(LOG_INFO, "Using CPU for inference");
 #endif
 
 #ifdef _WIN32

diff --git a/vendor/curl b/vendor/curl