Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a model cache to avoid running out of storage #201

Merged
merged 7 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,4 @@ jobs:

# Delete the SHA image(s) from containerd store
sudo ctr i rm $(sudo ctr i ls -q)


66 changes: 65 additions & 1 deletion sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ OBJECT_ID="${MODEL_ID//\//--}"
S3_BASE_DIRECTORY="models--$OBJECT_ID"
S3_PATH="s3://${HF_CACHE_BUCKET}/${S3_BASE_DIRECTORY}/"
LOCAL_MODEL_DIR="${HUGGINGFACE_HUB_CACHE}/${S3_BASE_DIRECTORY}"
LOCKFILE="${HUGGINGFACE_HUB_CACHE}/cache.lock"
CACHE_FILE="${HUGGINGFACE_HUB_CACHE}/cache.txt"

DEFAULT_CACHE_SIZE=4
CACHE_SIZE=${CACHE_SIZE:-$DEFAULT_CACHE_SIZE}

sudo mkdir -p $LOCAL_MODEL_DIR

# Function to check if lorax-launcher is running
is_launcher_running() {
Expand All @@ -20,7 +27,63 @@ is_launcher_running() {
kill -0 "$launcher_pid" >/dev/null 2>&1
}

sudo mkdir -p $LOCAL_MODEL_DIR
clean_up_cache() {
local temp_file=$(mktemp)
local removed_lines=""
local key=$1
local file=$2

# Remove the key if it exists
grep -v "^$key\$" "$file" > "$temp_file"

# Add the key to the bottom of the file
echo "$key" >> "$temp_file"

# Count total lines in temp file
local total_lines=$(wc -l < "$temp_file")

# Calculate number of lines to be removed, if any
local lines_to_remove=$((total_lines - CACHE_SIZE))

if [ "$lines_to_remove" -gt 0 ]; then
# Store removed lines in a variable
removed_lines=$(head -n "$lines_to_remove" "$temp_file")
echo "Deleting $removed_lines from cache"
fi

# Ensure only the last CACHE_SIZE items are retained
tail -n $CACHE_SIZE "$temp_file" > "$file"

# Clean up the temporary file
rm "$temp_file"

for line in $removed_lines; do
model_to_remove="${HUGGINGFACE_HUB_CACHE}/${line}"
echo "Removing $model_to_remove"
rm -rf $model_to_remove
done
}

(
# Wait for lock on $LOCKFILE (fd 200)
flock -x 200

echo "Lock acquired."

if [ -f "$CACHE_FILE" ]; then
echo "Cache file exists."
while read -r line; do
echo "Line read: $line"
if [ "$line" = "$S3_BASE_DIRECTORY" ]; then
echo "Model found in cache."
fi
done < "$CACHE_FILE"
else
echo "Cache file does not exist."
fi
clean_up_cache "$S3_BASE_DIRECTORY" "$CACHE_FILE"
) 200>$LOCKFILE


if [ -n "$(ls -A $LOCAL_MODEL_DIR)" ]; then
echo "Files have already been downloaded to ${LOCAL_MODEL_DIR}"
Expand Down Expand Up @@ -56,6 +119,7 @@ else
echo "Downloading weights from ${S3_PATH}"
fi


echo "Files found for model ${MODEL_ID}"
aws s3 ls "${S3_PATH}" --recursive | awk '{print $4}'

Expand Down