From 97a636caf04784dea33e35b019c9de78731e7721 Mon Sep 17 00:00:00 2001 From: covesturtevant Date: Mon, 27 Nov 2023 18:48:53 -0700 Subject: [PATCH] don't copy errored datums to pub bucket --- ...dwaterPhysical_pub_egress_and_publish.yaml | 24 ++++++++++------- ...parQuantumLine_pub_egress_and_publish.yaml | 24 ++++++++++------- ...arWaterSurface_pub_egress_and_publish.yaml | 27 ++++++++++++------- .../relHumidity_pub_egress_and_publish.yaml | 24 ++++++++++------- ...ewaterPhysical_pub_egress_and_publish.yaml | 24 ++++++++++------- .../tempSoil_pub_egress_and_publish.yaml | 24 ++++++++++------- .../testprod_pub_egress_and_publish.yaml | 27 ++++++++++++------- 7 files changed, 111 insertions(+), 63 deletions(-) diff --git a/pipe/groundwaterPhysical/groundwaterPhysical_pub_egress_and_publish.yaml b/pipe/groundwaterPhysical/groundwaterPhysical_pub_egress_and_publish.yaml index ea2378657..1a2d42e57 100644 --- a/pipe/groundwaterPhysical/groundwaterPhysical_pub_egress_and_publish.yaml +++ b/pipe/groundwaterPhysical/groundwaterPhysical_pub_egress_and_publish.yaml @@ -17,17 +17,23 @@ transform: python3 -m pub_egress.pub_egress_main fi # If there is output, egress it - dirs=$(find $OUT_PATH/* -type d) + dirs=$(find $OUT_PATH/NEON.DOM.SITE* -maxdepth 0 -type d) if [[ ${dirs} ]]; then echo "Syncing files to bucket $BUCKET_NAME" - rclone \ - --no-check-dest \ - --copy-links \ - --gcs-bucket-policy-only \ - --gcs-no-check-bucket \ - copy \ - "${OUT_PATH}" \ - ":gcs://${BUCKET_NAME}" + for DIR in $dirs; do + echo "Syncing $DIR to bucket $BUCKET_NAME" + # Parse the product + [[ "$DIR" =~ ^$OUT_PATH/(.*)$ ]] + PRODUCT="${BASH_REMATCH[1]}" + rclone \ + --no-check-dest \ + --copy-links \ + --gcs-bucket-policy-only \ + --gcs-no-check-bucket \ + copy \ + "${OUT_PATH}/${PRODUCT}" \ + ":gcs://${BUCKET_NAME}/${PRODUCT}" + done echo "Done" fi # Set some environment variables for the second module diff --git a/pipe/parQuantumLine/parQuantumLine_pub_egress_and_publish.yaml b/pipe/parQuantumLine/parQuantumLine_pub_egress_and_publish.yaml index f97e31a47..62d69f05b 100644 --- a/pipe/parQuantumLine/parQuantumLine_pub_egress_and_publish.yaml +++ b/pipe/parQuantumLine/parQuantumLine_pub_egress_and_publish.yaml @@ -17,17 +17,23 @@ transform: python3 -m pub_egress.pub_egress_main fi # If there is output, egress it - dirs=$(find $OUT_PATH/* -type d) + dirs=$(find $OUT_PATH/NEON.DOM.SITE* -maxdepth 0 -type d) if [[ ${dirs} ]]; then echo "Syncing files to bucket $BUCKET_NAME" - rclone \ - --no-check-dest \ - --copy-links \ - --gcs-bucket-policy-only \ - --gcs-no-check-bucket \ - copy \ - "${OUT_PATH}" \ - ":gcs://${BUCKET_NAME}" + for DIR in $dirs; do + echo "Syncing $DIR to bucket $BUCKET_NAME" + # Parse the product + [[ "$DIR" =~ ^$OUT_PATH/(.*)$ ]] + PRODUCT="${BASH_REMATCH[1]}" + rclone \ + --no-check-dest \ + --copy-links \ + --gcs-bucket-policy-only \ + --gcs-no-check-bucket \ + copy \ + "${OUT_PATH}/${PRODUCT}" \ + ":gcs://${BUCKET_NAME}/${PRODUCT}" + done echo "Done" fi # Set some environment variables for the second module diff --git a/pipe/parWaterSurface/parWaterSurface_pub_egress_and_publish.yaml b/pipe/parWaterSurface/parWaterSurface_pub_egress_and_publish.yaml index 6873ea00e..39fecc569 100644 --- a/pipe/parWaterSurface/parWaterSurface_pub_egress_and_publish.yaml +++ b/pipe/parWaterSurface/parWaterSurface_pub_egress_and_publish.yaml @@ -9,22 +9,31 @@ transform: - "-c" - |- /bin/bash <<'EOF' + # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/ + set -euo pipefail + IFS=$'\n\t' # Run first module - pub_egress (using environment variables below as input parameters) if [[ $(echo $DATA_PATH) ]]; then python3 -m pub_egress.pub_egress_main fi # If there is output, egress it - dirs=$(find $OUT_PATH/* -type d) + dirs=$(find $OUT_PATH/NEON.DOM.SITE* -maxdepth 0 -type d) if [[ ${dirs} ]]; then echo "Syncing files to bucket $BUCKET_NAME" - rclone \ - --no-check-dest \ - --copy-links \ - --gcs-bucket-policy-only \ - --gcs-no-check-bucket \ - copy \ - "${OUT_PATH}" \ - ":gcs://${BUCKET_NAME}" + for DIR in $dirs; do + echo "Syncing $DIR to bucket $BUCKET_NAME" + # Parse the product + [[ "$DIR" =~ ^$OUT_PATH/(.*)$ ]] + PRODUCT="${BASH_REMATCH[1]}" + rclone \ + --no-check-dest \ + --copy-links \ + --gcs-bucket-policy-only \ + --gcs-no-check-bucket \ + copy \ + "${OUT_PATH}/${PRODUCT}" \ + ":gcs://${BUCKET_NAME}/${PRODUCT}" + done echo "Done" fi # Set some environment variables for the second module diff --git a/pipe/relHumidity/relHumidity_pub_egress_and_publish.yaml b/pipe/relHumidity/relHumidity_pub_egress_and_publish.yaml index 326d0c0da..85247177c 100644 --- a/pipe/relHumidity/relHumidity_pub_egress_and_publish.yaml +++ b/pipe/relHumidity/relHumidity_pub_egress_and_publish.yaml @@ -17,17 +17,23 @@ transform: python3 -m pub_egress.pub_egress_main fi # If there is output, egress it - dirs=$(find $OUT_PATH/* -type d) + dirs=$(find $OUT_PATH/NEON.DOM.SITE* -maxdepth 0 -type d) if [[ ${dirs} ]]; then echo "Syncing files to bucket $BUCKET_NAME" - rclone \ - --no-check-dest \ - --copy-links \ - --gcs-bucket-policy-only \ - --gcs-no-check-bucket \ - copy \ - "${OUT_PATH}" \ - ":gcs://${BUCKET_NAME}" + for DIR in $dirs; do + echo "Syncing $DIR to bucket $BUCKET_NAME" + # Parse the product + [[ "$DIR" =~ ^$OUT_PATH/(.*)$ ]] + PRODUCT="${BASH_REMATCH[1]}" + rclone \ + --no-check-dest \ + --copy-links \ + --gcs-bucket-policy-only \ + --gcs-no-check-bucket \ + copy \ + "${OUT_PATH}/${PRODUCT}" \ + ":gcs://${BUCKET_NAME}/${PRODUCT}" + done echo "Done" fi # Set some environment variables for the second module diff --git a/pipe/surfacewaterPhysical/surfacewaterPhysical_pub_egress_and_publish.yaml b/pipe/surfacewaterPhysical/surfacewaterPhysical_pub_egress_and_publish.yaml index a24ab75a4..35adbf93b 100644 --- a/pipe/surfacewaterPhysical/surfacewaterPhysical_pub_egress_and_publish.yaml +++ b/pipe/surfacewaterPhysical/surfacewaterPhysical_pub_egress_and_publish.yaml @@ -17,17 +17,23 @@ transform: python3 -m pub_egress.pub_egress_main fi # If there is output, egress it - dirs=$(find $OUT_PATH/* -type d) + dirs=$(find $OUT_PATH/NEON.DOM.SITE* -maxdepth 0 -type d) if [[ ${dirs} ]]; then echo "Syncing files to bucket $BUCKET_NAME" - rclone \ - --no-check-dest \ - --copy-links \ - --gcs-bucket-policy-only \ - --gcs-no-check-bucket \ - copy \ - "${OUT_PATH}" \ - ":gcs://${BUCKET_NAME}" + for DIR in $dirs; do + echo "Syncing $DIR to bucket $BUCKET_NAME" + # Parse the product + [[ "$DIR" =~ ^$OUT_PATH/(.*)$ ]] + PRODUCT="${BASH_REMATCH[1]}" + rclone \ + --no-check-dest \ + --copy-links \ + --gcs-bucket-policy-only \ + --gcs-no-check-bucket \ + copy \ + "${OUT_PATH}/${PRODUCT}" \ + ":gcs://${BUCKET_NAME}/${PRODUCT}" + done echo "Done" fi # Set some environment variables for the second module diff --git a/pipe/tempSoil/tempSoil_pub_egress_and_publish.yaml b/pipe/tempSoil/tempSoil_pub_egress_and_publish.yaml index aebc78231..07b562824 100644 --- a/pipe/tempSoil/tempSoil_pub_egress_and_publish.yaml +++ b/pipe/tempSoil/tempSoil_pub_egress_and_publish.yaml @@ -17,17 +17,23 @@ transform: python3 -m pub_egress.pub_egress_main fi # If there is output, egress it - dirs=$(find $OUT_PATH/* -type d) + dirs=$(find $OUT_PATH/NEON.DOM.SITE* -maxdepth 0 -type d) if [[ ${dirs} ]]; then echo "Syncing files to bucket $BUCKET_NAME" - rclone \ - --no-check-dest \ - --copy-links \ - --gcs-bucket-policy-only \ - --gcs-no-check-bucket \ - copy \ - "${OUT_PATH}" \ - ":gcs://${BUCKET_NAME}" + for DIR in $dirs; do + echo "Syncing $DIR to bucket $BUCKET_NAME" + # Parse the product + [[ "$DIR" =~ ^$OUT_PATH/(.*)$ ]] + PRODUCT="${BASH_REMATCH[1]}" + rclone \ + --no-check-dest \ + --copy-links \ + --gcs-bucket-policy-only \ + --gcs-no-check-bucket \ + copy \ + "${OUT_PATH}/${PRODUCT}" \ + ":gcs://${BUCKET_NAME}/${PRODUCT}" + done echo "Done" fi # Set some environment variables for the second module diff --git a/pipe/testprod/testprod_pub_egress_and_publish.yaml b/pipe/testprod/testprod_pub_egress_and_publish.yaml index 964a1e6af..ce76dc003 100644 --- a/pipe/testprod/testprod_pub_egress_and_publish.yaml +++ b/pipe/testprod/testprod_pub_egress_and_publish.yaml @@ -9,22 +9,31 @@ transform: - "-c" - |- /bin/bash <<'EOF' + # Use bash-scrict mode. See http://redsymbol.net/articles/unofficial-bash-strict-mode/ + set -euo pipefail + IFS=$'\n\t' # Run first module - pub_egress (using environment variables below as input parameters) if [[ $(echo $DATA_PATH) ]]; then python3 -m pub_egress.pub_egress_main fi # If there is output, egress it - dirs=$(find $OUT_PATH/* -type d) + dirs=$(find $OUT_PATH/NEON.DOM.SITE* -maxdepth 0 -type d) if [[ ${dirs} ]]; then echo "Syncing files to bucket $BUCKET_NAME" - rclone \ - --no-check-dest \ - --copy-links \ - --gcs-bucket-policy-only \ - --gcs-no-check-bucket \ - copy \ - "${OUT_PATH}" \ - ":gcs://${BUCKET_NAME}" + for DIR in $dirs; do + echo "Syncing $DIR to bucket $BUCKET_NAME" + # Parse the product + [[ "$DIR" =~ ^$OUT_PATH/(.*)$ ]] + PRODUCT="${BASH_REMATCH[1]}" + rclone \ + --no-check-dest \ + --copy-links \ + --gcs-bucket-policy-only \ + --gcs-no-check-bucket \ + copy \ + "${OUT_PATH}/${PRODUCT}" \ + ":gcs://${BUCKET_NAME}/${PRODUCT}" + done echo "Done" fi # Set some environment variables for the second module