From 59762bb6aedd7ac4b592fe8edf3a4c367db2e2e8 Mon Sep 17 00:00:00 2001
From: Song Liu <soliu@redhat.com>
Date: Thu, 15 Aug 2024 16:06:53 +0800
Subject: [PATCH] Add the huggingface token parameter, and modify the file path
 in llama.cpp repo.

Signed-off-by: Song Liu <soliu@redhat.com>
---
 convert_models/README.md               | 4 ++--
 convert_models/download_huggingface.py | 2 ++
 convert_models/run.sh                  | 9 +++++----
 convert_models/ui.py                   | 8 ++++++--
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/convert_models/README.md b/convert_models/README.md
index 1043dbaa..8610f5ca 100644
--- a/convert_models/README.md
+++ b/convert_models/README.md
@@ -19,10 +19,10 @@ podman build -t converter .
 
 ## Quantize and Convert 
 
-You can run the conversion image directly with podman in the terminal. You just need to provide it with the huggingface model name you want to download, the quantization level you want to use and whether or not you want to keep the raw files after conversion.
+You can run the conversion image directly with podman in the terminal. You just need to provide it with the huggingface model name you want to download, the quantization level you want to use and whether or not you want to keep the raw files after conversion. "HF_TOKEN" is optional, it is required for private models.
 
 ```bash
-podman run -it --rm -v models:/converter/converted_models -e HF_MODEL_URL=<ORG/MODEL_NAME> -e QUANTIZATION=Q4_K_M -e KEEP_ORIGINAL_MODEL="False"
+podman run -it --rm -v models:/converter/converted_models -e HF_MODEL_URL=<ORG/MODEL_NAME> -e HF_TOKEN=<YOUR_HF_TOKEN_ID> -e QUANTIZATION=Q4_K_M -e KEEP_ORIGINAL_MODEL="False" converter
 ```
 
 You can also use the UI shown above to do the same.
diff --git a/convert_models/download_huggingface.py b/convert_models/download_huggingface.py
index 4ad55662..008c536a 100644
--- a/convert_models/download_huggingface.py
+++ b/convert_models/download_huggingface.py
@@ -3,9 +3,11 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument("-m", "--model")
+parser.add_argument("-t", "--token")
 args = parser.parse_args()
 
 snapshot_download(repo_id=args.model,
+                token=args.token,
                 local_dir=f"converted_models/{args.model}",
                 local_dir_use_symlinks=True,
                 cache_dir=f"converted_models/cache")
\ No newline at end of file
diff --git a/convert_models/run.sh b/convert_models/run.sh
index 05f86505..a14214f8 100644
--- a/convert_models/run.sh
+++ b/convert_models/run.sh
@@ -1,6 +1,7 @@
 #! /bin/bash
 
 hf_model_url=${HF_MODEL_URL}
+hf_token=${HF_TOKEN:="None"}
 model_org=$(echo $hf_model_url | sed -n 's/\(.*\)\/\(.*\)/\1/p')
 model_name=$(echo $hf_model_url | sed -n 's/\(.*\)\/\(.*\)/\2/p')
 keep_orgi=${KEEP_ORIGINAL_MODEL}
@@ -15,11 +16,11 @@ if [ -e "/opt/app-root/src/converter/converted_models/cache/models--$model_org--
 fi
 
 echo "Downloading $hf_model_url"
-python download_huggingface.py --model $hf_model_url
-python llama.cpp/convert.py /opt/app-root/src/converter/converted_models/$hf_model_url
-python llama.cpp/convert-hf-to-gguf.py /opt/app-root/src/converter/converted_models/$hf_model_url
+python download_huggingface.py --model $hf_model_url --token $hf_token
+python llama.cpp/examples/convert_legacy_llama.py /opt/app-root/src/converter/converted_models/$hf_model_url
+python llama.cpp/convert_hf_to_gguf.py /opt/app-root/src/converter/converted_models/$hf_model_url
 mkdir -p /opt/app-root/src/converter/converted_models/gguf/
-llama.cpp/quantize /opt/app-root/src/converter/converted_models/$hf_model_url/ggml-model-f16.gguf /opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name-${QUANTIZATION}.gguf ${QUANTIZATION}
+llama.cpp/llama-quantize /opt/app-root/src/converter/converted_models/$hf_model_url/ggml-model-f16.gguf /opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name-${QUANTIZATION}.gguf ${QUANTIZATION}
 rm -rf /opt/app-root/src/converter/converted_models/$model_org
 
 if [ $keep_orgi = "False" ]; then
diff --git a/convert_models/ui.py b/convert_models/ui.py
index 8bf65de5..3f539692 100644
--- a/convert_models/ui.py
+++ b/convert_models/ui.py
@@ -22,6 +22,9 @@
 
 model_name = st.text_input(label="Enter a huggingface model url to convert",
                            placeholder="org/model_name")
+token_id = st.text_input(label="Enter your huggingface token (optional)",
+                         help="huggingface token is required for private model"
+                        ) or "None"
 keep_files = st.checkbox("Keep huggingface model files after conversion?")
 submit_button = st.button(label="submit")
 if submit_button:
@@ -30,8 +33,9 @@
                         "run", 
                         "-it", 
                         "--rm", 
-                        "-v", f"{volume}:/opt/app-root/src/converter/converted_models", 
-                        "-e", f"HF_MODEL_URL={model_name}" ,
+                        "-v", f"{volume}:/converter/converted_models", 
+                        "-e", f"HF_MODEL_URL={model_name}",
+                        "-e", f"HF_TOKEN={token_id}",
                         "-e", f"QUANTIZATION={quantization}",
                         "-e", f"KEEP_ORIGINAL_MODEL={keep_files}",
                         "converter"],stdout=subprocess.PIPE)