fix: use OpenMP flag to avoid macOS segfault (vana-com#34)

vana-com · Mar 18, 2024 · 716df52 · 716df52
1 parent 2c943db
commit 716df52
Show file tree

Hide file tree

Showing 8 changed files with 259 additions and 1,391 deletions.
diff --git a/README.md b/README.md
@@ -117,12 +117,12 @@ For most users, the easiest way to install Selfie is to follow the [Quick Start]
 6. Run `poetry install` to install required Python dependencies.
 7. Optional: Run `./scripts/llama-cpp-python-cublas.sh` to enable hardware acceleration (for details, see [Scripts](#llama-cpp-python-cublassh)).
 8. Run `poetry run python -m selfie`, or `poetry run python -m selfie --gpu` if your device is GPU-enabled. The first time you run this, it will download ~4GB of model weights.
+   -  On macOS, you may need to run `OMP_NUM_THREADS=1 KMP_DUPLICATE_LIB_OK=TRUE poetry run python -m selfie` to avoid OpenMP errors (with or without `--gpu`). [Read more about OMP_NUM_THREADS here](https://github.com/vana-com/selfie/issues/33#issuecomment-2004637058).
 
-[//]: # (1. `git clone
 [//]: # (Disable this note about installing with GPU support until supported via transformers, etc.)
+
 [//]: # (3. `poetry install` or `poetry install -E gpu` &#40;to enable GPU devices via transformers&#41;. Enable GPU or Metal acceleration via llama.cpp by installing GPU-enabled llama-cpp-python, see Scripts.)
 
-[//]: # (This starts a local web server and should launch the UI in your browser at http://localhost:8181. API documentation is available at http://localhost:8181/docs. Now that the server is running, you can use the API to import your data and connect to your LLM.)
 </details>
 
 > **Note**: You can host selfie at a publicly-accessible URL with [ngrok](https://ngrok.com). Add your ngrok token (and optionally, ngrok domain) in `selfie/.env` and run `poetry run python -m selfie --share`.

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,7 @@ colorlog = "^6.8.2"
 fastapi = "^0.109.0"
 uvicorn = "^0.27.0"
 humanize = "^4.9.0"
-llama-cpp-python = "^0.2.26"
+llama-cpp-python = ">=0.2.26, <0.2.56"
 litellm = "^1.23.12"
 txtai = {version = "^7.0.0", extras = ["pipeline-llm"]}
 sse-starlette = "^2.0.0"

diff --git a/scripts/llama-cpp-python-cublas.sh b/scripts/llama-cpp-python-cublas.sh
@@ -48,9 +48,26 @@ get_index_url() {
     echo "https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/${CPU_ARCH}/${ACCELERATION}"
 }
 
+extract_llama_cpp_python_version_range() {
+    VERSION_REQUIREMENT=$(grep "llama-cpp-python" pyproject.toml | head -n 1 | sed 's/ //g; s/"//g')
 
-echo "Installing accelerated llama-cpp-python..."
-poetry run python -m pip install llama-cpp-python --prefer-binary --force-reinstall --extra-index-url="$(get_index_url)"
+    if [[ "$VERSION_REQUIREMENT" =~ \^([0-9]+\.[0-9]+)\.([0-9]+) ]]; then
+        # For caret versions, construct a range that pip understands
+        VERSION_RANGE=">=$(echo ${BASH_REMATCH[1]}).${BASH_REMATCH[2]},<$((${BASH_REMATCH[1]%.*} + 1)).0.0"
+    else
+        # For explicit version ranges, ensure correct format for pip
+        VERSION_RANGE=$(echo $VERSION_REQUIREMENT | sed 's/.*llama-cpp-python=//')
+    fi
+
+    echo "$VERSION_RANGE"
+}
+
+# Use the extracted version range in the pip install command
+VERSION_RANGE=$(extract_llama_cpp_python_version_range)
+
+echo "Installing accelerated llama-cpp-python with version range $VERSION_RANGE..."
+echo "poetry run python -m pip install \"llama-cpp-python$VERSION_RANGE\" --prefer-binary --force-reinstall --extra-index-url=\"$(get_index_url)\""
+poetry run python -m pip install "llama-cpp-python $VERSION_RANGE" --prefer-binary --force-reinstall --extra-index-url="$(get_index_url)"
 
 echo "Installation complete. Please check for any errors above."
 
diff --git a/selfie/connectors/base_connector.py b/selfie/connectors/base_connector.py
@@ -36,7 +36,7 @@ def get_documentation_markdown(self):
     def _read_file(self, file_name: str) -> str | None:
         file_path = os.path.join(os.path.dirname(__file__), self.id, file_name)
         if os.path.exists(file_path):
-            with open(file_path, 'r') as file:
+            with open(file_path, 'r', encoding='utf-8') as file:
                 return file.read()
         else:
             return None

diff --git a/selfie/parsers/chat/__init__.py b/selfie/parsers/chat/__init__.py
@@ -35,7 +35,7 @@
 # current_dir = os.path.dirname(os.path.abspath(__file__))
 # blacklist_file_path = os.path.join(current_dir, "blacklist_patterns.yaml")
 
-with open(blacklist_file_path, "r") as f:
+with open(blacklist_file_path, "r", encoding='utf-8') as f:
     default_blacklist_patterns = yaml.safe_load(f)
     default_blacklist_patterns = [
         pattern.strip() for pattern in default_blacklist_patterns

diff --git a/selfie/types/completion_requests.py b/selfie/types/completion_requests.py
@@ -82,11 +82,11 @@ def openai_params(self):
         return {
             k: v
             for k, v in self.model_dump().items()
-            if k not in BaseCompletionRequest.custom_params and v is not None
+            if k not in self.custom_params and v is not None
         }
 
     def selfie_params(self):
-        return {k: v for k, v in self.model_dump().items() if k in BaseCompletionRequest.custom_params and v is not None}
+        return {k: v for k, v in self.model_dump().items() if k in self.custom_params and v is not None}
 
     def extra_params(self):
         """

diff --git a/start.sh b/start.sh
@@ -26,7 +26,7 @@ else
 fi
 
 echo "Installing Python dependencies with Poetry..."
-poetry check || poetry install
+poetry install
 
 echo "Building UI with Yarn..."
 ./scripts/build-ui.sh
@@ -35,4 +35,13 @@ echo "Running llama-cpp-python-cublas.sh to enable hardware acceleration..."
 ./scripts/llama-cpp-python-cublas.sh
 
 echo "Running selfie..."
+
+if [ "$(uname -m)" = "arm64" ]; then
+    ENV_FLAG="OMP_NUM_THREADS=1 KMP_DUPLICATE_LIB_OK=TRUE"
+fi
+
+if [ ! -z "$ENV_FLAG" ]; then
+    export $ENV_FLAG
+fi
+
 poetry run python -m selfie $GPU_FLAG