Skip to content

Commit

Permalink
perf: batch delete in chunks of 50 (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
iBotPeaches authored Aug 9, 2023
1 parent 75a83ea commit d56bfca
Showing 1 changed file with 51 additions and 14 deletions.
65 changes: 51 additions & 14 deletions delete.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ function getBucketContents() {
aws s3api list-objects-v2 \
--bucket "$INPUT_S3_BUCKET_NAME" \
--prefix "$INPUT_S3_PREFIX" \
--output json \
--query "Contents[?LastModified<='$1'].Key"
--output text \
--query "Contents[?LastModified<='$1'].[Key]"
}

function deleteFileFromBucket() {
aws s3api delete-object \
function deleteFilesFromBucket() {
aws s3api delete-objects \
--bucket "$INPUT_S3_BUCKET_NAME" \
--key "$1"
--delete "Objects=[$1]"
}

# Validation of AWS Creds
Expand Down Expand Up @@ -46,22 +46,59 @@ fi
echo -e "${BLUE}Reviewing files older than ${RESET_TEXT}$DATE..."
RESPONSE=$(getBucketContents "$DATE")

# Iterate and delete files if needed.
echo "$RESPONSE" | jq -r '.[]?' | while read -r file; do
# Ignore blank lines and count total files
FILE_COUNT=$(echo "$RESPONSE" | sed '/^\s*$/d' | wc -l | xargs)

# Prepare to loop and count up to a chunk limit to batch-delete files
COUNT=0
FILES=""
for FILE in $RESPONSE; do
# Skip empty lines
if [ -n "$FILE" ]; then
continue;
fi

FILES="$FILES {Key=$FILE},"
(( COUNT++ ))

# Report file that is being staged for a bulk-delete
if [ "$INPUT_NO_DRY_RUN" = true ]; then
echo -e "${RED}Deleting ${RESET_TEXT}$file"
DELETE_RESPONSE=$(deleteFileFromBucket "$file")
echo "$DELETE_RESPONSE"
echo -e "${RED}Staging for deletion: ${RESET_TEXT}$FILE"
else
echo -e "${BLUE}Would have deleted ${RESET_TEXT}$file"
echo -e "${BLUE}Would have deleted: ${RESET_TEXT}$FILE"
fi

# Delete in chunks
if [ "$COUNT" -eq 50 ]; then
if [ "$INPUT_NO_DRY_RUN" = true ]; then
echo -e "${RED}Deleting ${RESET_TEXT}$COUNT files."
DELETE_RESPONSE=$(deleteFilesFromBucket "$FILES")
echo "$DELETE_RESPONSE"
else
echo -e "${BLUE}Would have deleted: ${RESET_TEXT}$COUNT files."
fi

# Reset chunking for next run
FILES=""
COUNT=0
fi
done

# Make sure we get any stragglers
if [ "$COUNT" -gt 0 ]; then
if [ "$INPUT_NO_DRY_RUN" = true ]; then
echo -e "${RED}Deleting ${RESET_TEXT}$COUNT files."
DELETE_RESPONSE=$(deleteFilesFromBucket "$FILES")
echo "$DELETE_RESPONSE"
else
echo -e "${BLUE}Would have deleted: ${RESET_TEXT}$COUNT files."
fi
fi

# Give a little summary of total.
FILE_COUNT=$(echo "$RESPONSE" | jq -r '.[]?' | wc -l | xargs)
if [ "$INPUT_NO_DRY_RUN" = true ]; then
echo -e "Deleted ${FILE_COUNT} files"
echo -e "Deleted in total: ${FILE_COUNT} files"
else
echo -e "${BLUE}Would have deleted ${RESET_TEXT}${FILE_COUNT} files"
echo -e "${BLUE}Would have deleted in total: ${RESET_TEXT}${FILE_COUNT} files"
echo -e "Run with ${BLUE}no_dry_run: true${RESET_TEXT} to delete files."
fi

0 comments on commit d56bfca

Please sign in to comment.