Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
build(utils): download files directly to the provided directory (#103)
Browse files Browse the repository at this point in the history
* build(utils): download files directly to the provided directory

* fix(utils): fix positional argument count

* build(docker): trust remote code is still required for local copies
  • Loading branch information
peakji authored Mar 29, 2023
1 parent 50cb37a commit eb1574d
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 30 deletions.
5 changes: 3 additions & 2 deletions deployments/bundle/bloomz-560m.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ FROM hyperonym/basaran:0.13.4
WORKDIR /app

# Download the model to be bundled
RUN python utils/download.py bigscience/bloomz-560m
RUN python utils/download.py bigscience/bloomz-560m /model

# Provide default environment variables
ENV MODEL="bigscience/bloomz-560m"
ENV MODEL="/model"
ENV MODEL_LOCAL_FILES_ONLY="true"
ENV SERVER_MODEL_NAME="bigscience/bloomz-560m"
5 changes: 3 additions & 2 deletions deployments/bundle/bloomz-7b1-mt.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ FROM hyperonym/basaran:0.13.4
WORKDIR /app

# Download the model to be bundled
RUN python utils/download.py bigscience/bloomz-7b1-mt
RUN python utils/download.py bigscience/bloomz-7b1-mt /model

# Provide default environment variables
ENV MODEL="bigscience/bloomz-7b1-mt"
ENV MODEL="/model"
ENV MODEL_LOCAL_FILES_ONLY="true"
ENV MODEL_HALF_PRECISION="true"
ENV SERVER_MODEL_NAME="bigscience/bloomz-7b1-mt"
5 changes: 3 additions & 2 deletions deployments/bundle/chatglm-6b.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ WORKDIR /app
RUN pip install icetk cpm_kernels

# Download the model to be bundled
RUN MODEL_REVISION=refs/pr/7 python utils/download.py THUDM/chatglm-6b
RUN python utils/download.py THUDM/chatglm-6b /model refs/pr/7

# Provide default environment variables
ENV MODEL="THUDM/chatglm-6b"
ENV MODEL="/model"
ENV MODEL_REVISION="refs/pr/7"
ENV MODEL_LOCAL_FILES_ONLY="true"
ENV MODEL_TRUST_REMOTE_CODE="true"
ENV MODEL_HALF_PRECISION="true"
ENV SERVER_MODEL_NAME="THUDM/chatglm-6b"
36 changes: 12 additions & 24 deletions utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,20 @@
This allows memory-constrained CI/CD runners to build container images
with large bundled models. See ../deployments/bundle/ for examples.
"""
import os
import sys
import tempfile

import huggingface_hub

if len(sys.argv) <= 1:
sys.exit("error: must specify the model to be downloaded")
if len(sys.argv) < 3:
sys.exit("usage: python download.py REPO_ID LOCAL_DIR [REVISION]")

# Get cache directory from arguments or environment variables.
if len(sys.argv) >= 3:
MODEL_CACHE_DIR = sys.argv[2]
elif "MODEL_CACHE_DIR" in os.environ and os.environ["MODEL_CACHE_DIR"]:
MODEL_CACHE_DIR = os.environ["MODEL_CACHE_DIR"]
else:
MODEL_CACHE_DIR = None

# Get model revision from environment variables.
if "MODEL_REVISION" in os.environ and os.environ["MODEL_REVISION"]:
MODEL_REVISION = os.environ["MODEL_REVISION"]
else:
MODEL_REVISION = None

# Download a snapshot of the specified model from Hugging Face Hub.
huggingface_hub.snapshot_download(
sys.argv[1],
cache_dir=MODEL_CACHE_DIR,
revision=MODEL_REVISION,
resume_download=True,
)
with tempfile.TemporaryDirectory() as cache_dir:
huggingface_hub.snapshot_download(
repo_id=sys.argv[1],
local_dir=sys.argv[2],
revision=sys.argv[3] if len(sys.argv) > 3 else None,
cache_dir=cache_dir,
local_dir_use_symlinks=False,
resume_download=True,
)

0 comments on commit eb1574d

Please sign in to comment.