From 9a6b64d4b034b4079e469df0541e567299db4140 Mon Sep 17 00:00:00 2001 From: Toby Crawley Date: Tue, 5 Mar 2024 21:49:48 -0500 Subject: [PATCH] Use tmp dir in cwd for stats generation The /tmp partition on the new al2023 instances isn't big enough to support stats file processing, so this moves it to the root partition which has more space. We should do something better longer term, but this should allow the stat generation to at least complete. --- scripts/build-search-index | 13 +++++++++---- scripts/combine-cdn-logs | 4 +++- scripts/update-pom-list | 4 +++- scripts/update-stats | 9 +++++---- src/clojars/tools/generate_feeds.clj | 4 ++-- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/scripts/build-search-index b/scripts/build-search-index index 88945748..32e215c6 100755 --- a/scripts/build-search-index +++ b/scripts/build-search-index @@ -11,11 +11,16 @@ cd "$dir/.." clojars_jar=$1 -java -cp "$clojars_jar" clojure.main -m clojars.tools.build-search-index production | tee /tmp/index-out.txt +mkdir -p tmp -index_path=$(grep "index-path:" /tmp/index-out.txt | sed -n 's/index-path: \(.*\)/\1/p') +index_out=tmp/index-out.txt +java -cp "$clojars_jar" clojure.main -m clojars.tools.build-search-index production | tee "$index_out" + +index_path=$(grep "index-path:" "$index_out" | sed -n 's/index-path: \(.*\)/\1/p') + +index_archive=tmp/artifact-index.tgz # upload the new index the s3 bucket -tar czf /tmp/artifact-index.tgz "$index_path" +tar czf "$index_archive" "$index_path" -aws s3 cp --no-progress --content-type 'application/gzip' /tmp/artifact-index.tgz "s3://clojars-artifact-index/" +aws s3 cp --no-progress --content-type 'application/gzip' "$index_archive" "s3://clojars-artifact-index/" diff --git a/scripts/combine-cdn-logs b/scripts/combine-cdn-logs index d96490a7..3e2767ae 100755 --- a/scripts/combine-cdn-logs +++ b/scripts/combine-cdn-logs @@ -11,13 +11,15 @@ if [ -z "$DATE" ]; then DATE=$(date --date='1 day ago' +%Y%m%d) fi +mkdir -p tmp + S3_LOG_BUCKET=clojars-fastly-logs java -cp "$clojars_jar" clojure.main -m clojars.tools.combine-cdn-logs \ "$S3_LOG_BUCKET" "$DATE" "$output_file" if [ -s "$output_file" ]; then - gzipped_file="/tmp/combined-${DATE}.log.gz" + gzipped_file="tmp/combined-${DATE}.log.gz" gzip --best --stdout "$output_file" > "$gzipped_file" aws s3 cp --no-progress --content-type 'application/gzip' "$gzipped_file" "s3://${S3_LOG_BUCKET}/" fi diff --git a/scripts/update-pom-list b/scripts/update-pom-list index 2cb782ed..e4489892 100755 --- a/scripts/update-pom-list +++ b/scripts/update-pom-list @@ -6,4 +6,6 @@ set -e clojars_jar=$1 -java -cp $clojars_jar clojure.main -m clojars.tools.generate-feeds production 2> /dev/null +mkdir -p tmp + +java -cp $clojars_jar clojure.main -m clojars.tools.generate-feeds tmp production 2> /dev/null diff --git a/scripts/update-stats b/scripts/update-stats index 2231905e..db3b1354 100755 --- a/scripts/update-stats +++ b/scripts/update-stats @@ -13,16 +13,17 @@ if [ -z "$date" ]; then date=$(date --date='1 day ago' +%Y%m%d) fi -cdn_logfile=/tmp/cdn-access-yesterday.log +mkdir -p tmp +cdn_logfile=tmp/cdn-access-yesterday.log # generate the combined cdn stats first "$dir"/combine-cdn-logs "$clojars_jar" "$cdn_logfile" "$date" S3_STATS_BUCKET=clojars-stats-production -downloads_date=/tmp/downloads-$date.edn -downloads_all_old=/tmp/all-old.edn -downloads_all_new=/tmp/all-new.edn +downloads_date=tmp/downloads-$date.edn +downloads_all_old=tmp/all-old.edn +downloads_all_new=tmp/all-new.edn # grab the latest all.edn from s3 aws s3 cp "s3://${S3_STATS_BUCKET}/all.edn" "$downloads_all_old" diff --git a/src/clojars/tools/generate_feeds.clj b/src/clojars/tools/generate_feeds.clj index 44a66e44..e12b3668 100644 --- a/src/clojars/tools/generate_feeds.clj +++ b/src/clojars/tools/generate_feeds.clj @@ -115,8 +115,8 @@ (write-sums jar-file) (write-sums gz-file))))) -(defn -main [env] +(defn -main [feed-dir env] (let [{:keys [db s3]} (config (keyword env))] - (generate-feeds "/tmp" + (generate-feeds feed-dir db (s3/s3-client (:repo-bucket s3)))))