Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: batch delete in chunks of 50 #4

Merged
merged 1 commit into from
Aug 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions delete.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ function getBucketContents() {
aws s3api list-objects-v2 \
--bucket "$INPUT_S3_BUCKET_NAME" \
--prefix "$INPUT_S3_PREFIX" \
--output json \
--query "Contents[?LastModified<='$1'].Key"
--output text \
--query "Contents[?LastModified<='$1'].[Key]"
}

function deleteFileFromBucket() {
aws s3api delete-object \
function deleteFilesFromBucket() {
aws s3api delete-objects \
--bucket "$INPUT_S3_BUCKET_NAME" \
--key "$1"
--delete "Objects=[$1]"
}

# Validation of AWS Creds
Expand Down Expand Up @@ -46,22 +46,59 @@ fi
echo -e "${BLUE}Reviewing files older than ${RESET_TEXT}$DATE..."
RESPONSE=$(getBucketContents "$DATE")

# Iterate and delete files if needed.
echo "$RESPONSE" | jq -r '.[]?' | while read -r file; do
# Ignore blank lines and count total files
FILE_COUNT=$(echo "$RESPONSE" | sed '/^\s*$/d' | wc -l | xargs)

# Prepare to loop and count up to a chunk limit to batch-delete files
COUNT=0
FILES=""
for FILE in $RESPONSE; do
# Skip empty lines
if [ -n "$FILE" ]; then
continue;
fi

FILES="$FILES {Key=$FILE},"
(( COUNT++ ))

# Report file that is being staged for a bulk-delete
if [ "$INPUT_NO_DRY_RUN" = true ]; then
echo -e "${RED}Deleting ${RESET_TEXT}$file"
DELETE_RESPONSE=$(deleteFileFromBucket "$file")
echo "$DELETE_RESPONSE"
echo -e "${RED}Staging for deletion: ${RESET_TEXT}$FILE"
else
echo -e "${BLUE}Would have deleted ${RESET_TEXT}$file"
echo -e "${BLUE}Would have deleted: ${RESET_TEXT}$FILE"
fi

# Delete in chunks
if [ "$COUNT" -eq 50 ]; then
if [ "$INPUT_NO_DRY_RUN" = true ]; then
echo -e "${RED}Deleting ${RESET_TEXT}$COUNT files."
DELETE_RESPONSE=$(deleteFilesFromBucket "$FILES")
echo "$DELETE_RESPONSE"
else
echo -e "${BLUE}Would have deleted: ${RESET_TEXT}$COUNT files."
fi

# Reset chunking for next run
FILES=""
COUNT=0
fi
done

# Make sure we get any stragglers
if [ "$COUNT" -gt 0 ]; then
if [ "$INPUT_NO_DRY_RUN" = true ]; then
echo -e "${RED}Deleting ${RESET_TEXT}$COUNT files."
DELETE_RESPONSE=$(deleteFilesFromBucket "$FILES")
echo "$DELETE_RESPONSE"
else
echo -e "${BLUE}Would have deleted: ${RESET_TEXT}$COUNT files."
fi
fi

# Give a little summary of total.
FILE_COUNT=$(echo "$RESPONSE" | jq -r '.[]?' | wc -l | xargs)
if [ "$INPUT_NO_DRY_RUN" = true ]; then
echo -e "Deleted ${FILE_COUNT} files"
echo -e "Deleted in total: ${FILE_COUNT} files"
else
echo -e "${BLUE}Would have deleted ${RESET_TEXT}${FILE_COUNT} files"
echo -e "${BLUE}Would have deleted in total: ${RESET_TEXT}${FILE_COUNT} files"
echo -e "Run with ${BLUE}no_dry_run: true${RESET_TEXT} to delete files."
fi