diff --git a/convert_models/README.md b/convert_models/README.md index 1043dbaa..8610f5ca 100644 --- a/convert_models/README.md +++ b/convert_models/README.md @@ -19,10 +19,10 @@ podman build -t converter . ## Quantize and Convert -You can run the conversion image directly with podman in the terminal. You just need to provide it with the huggingface model name you want to download, the quantization level you want to use and whether or not you want to keep the raw files after conversion. +You can run the conversion image directly with podman in the terminal. You just need to provide it with the huggingface model name you want to download, the quantization level you want to use and whether or not you want to keep the raw files after conversion. "HF_TOKEN" is optional, it is required for private models. ```bash -podman run -it --rm -v models:/converter/converted_models -e HF_MODEL_URL= -e QUANTIZATION=Q4_K_M -e KEEP_ORIGINAL_MODEL="False" +podman run -it --rm -v models:/converter/converted_models -e HF_MODEL_URL= -e HF_TOKEN= -e QUANTIZATION=Q4_K_M -e KEEP_ORIGINAL_MODEL="False" converter ``` You can also use the UI shown above to do the same. diff --git a/convert_models/download_huggingface.py b/convert_models/download_huggingface.py index 4ad55662..008c536a 100644 --- a/convert_models/download_huggingface.py +++ b/convert_models/download_huggingface.py @@ -3,9 +3,11 @@ parser = argparse.ArgumentParser() parser.add_argument("-m", "--model") +parser.add_argument("-t", "--token") args = parser.parse_args() snapshot_download(repo_id=args.model, + token=args.token, local_dir=f"converted_models/{args.model}", local_dir_use_symlinks=True, cache_dir=f"converted_models/cache") \ No newline at end of file diff --git a/convert_models/run.sh b/convert_models/run.sh index 05f86505..a14214f8 100644 --- a/convert_models/run.sh +++ b/convert_models/run.sh @@ -1,6 +1,7 @@ #! /bin/bash hf_model_url=${HF_MODEL_URL} +hf_token=${HF_TOKEN:="None"} model_org=$(echo $hf_model_url | sed -n 's/\(.*\)\/\(.*\)/\1/p') model_name=$(echo $hf_model_url | sed -n 's/\(.*\)\/\(.*\)/\2/p') keep_orgi=${KEEP_ORIGINAL_MODEL} @@ -15,11 +16,11 @@ if [ -e "/opt/app-root/src/converter/converted_models/cache/models--$model_org-- fi echo "Downloading $hf_model_url" -python download_huggingface.py --model $hf_model_url -python llama.cpp/convert.py /opt/app-root/src/converter/converted_models/$hf_model_url -python llama.cpp/convert-hf-to-gguf.py /opt/app-root/src/converter/converted_models/$hf_model_url +python download_huggingface.py --model $hf_model_url --token $hf_token +python llama.cpp/examples/convert_legacy_llama.py /opt/app-root/src/converter/converted_models/$hf_model_url +python llama.cpp/convert_hf_to_gguf.py /opt/app-root/src/converter/converted_models/$hf_model_url mkdir -p /opt/app-root/src/converter/converted_models/gguf/ -llama.cpp/quantize /opt/app-root/src/converter/converted_models/$hf_model_url/ggml-model-f16.gguf /opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name-${QUANTIZATION}.gguf ${QUANTIZATION} +llama.cpp/llama-quantize /opt/app-root/src/converter/converted_models/$hf_model_url/ggml-model-f16.gguf /opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name-${QUANTIZATION}.gguf ${QUANTIZATION} rm -rf /opt/app-root/src/converter/converted_models/$model_org if [ $keep_orgi = "False" ]; then diff --git a/convert_models/ui.py b/convert_models/ui.py index 8bf65de5..3f539692 100644 --- a/convert_models/ui.py +++ b/convert_models/ui.py @@ -22,6 +22,9 @@ model_name = st.text_input(label="Enter a huggingface model url to convert", placeholder="org/model_name") +token_id = st.text_input(label="Enter your huggingface token (optional)", + help="huggingface token is required for private model" + ) or "None" keep_files = st.checkbox("Keep huggingface model files after conversion?") submit_button = st.button(label="submit") if submit_button: @@ -30,8 +33,9 @@ "run", "-it", "--rm", - "-v", f"{volume}:/opt/app-root/src/converter/converted_models", - "-e", f"HF_MODEL_URL={model_name}" , + "-v", f"{volume}:/converter/converted_models", + "-e", f"HF_MODEL_URL={model_name}", + "-e", f"HF_TOKEN={token_id}", "-e", f"QUANTIZATION={quantization}", "-e", f"KEEP_ORIGINAL_MODEL={keep_files}", "converter"],stdout=subprocess.PIPE)