From 4dac76053105a0642a9b2ab7fcc0d88121647003 Mon Sep 17 00:00:00 2001 From: Mike Wendt <1915404+mike-wendt@users.noreply.github.com> Date: Thu, 18 Feb 2021 17:29:59 -0500 Subject: [PATCH 1/4] ENH Add gpuci_mamba_retry --- tools/gpuci_mamba_retry | 140 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tools/gpuci_mamba_retry diff --git a/tools/gpuci_mamba_retry b/tools/gpuci_mamba_retry new file mode 100644 index 0000000..3ecb8a0 --- /dev/null +++ b/tools/gpuci_mamba_retry @@ -0,0 +1,140 @@ +#!/bin/bash +# +# gpuci_mamba_retry +# +# Wrapper for conda that retries the command after a CondaHTTPError, +# ChecksumMismatchError, or JSONDecodeError (ideally, any conda error that +# is normally resolved by retrying) +# +# This must be set in order for the script to recognize failing exit codes when +# output is piped to tee +# +# Example usage: +# $ gpuci_mamba_retry install cudatoolkit=11.0 rapids=0.16 +# +# Configurable options are set using the following env vars: +# +# GPUCI_MAMBA_RETRY_MAX - set to a positive integer to set the max number of retry +# attempts (attempts after the initial try). +# Default is 3 retries +# +# GPUCI_MAMBA_RETRY_SLEEP - set to a positive integer to set the duration, in +# seconds, to wait between retries. +# Default is a 10 second sleep +# +set -o pipefail + +mambaretry_help=" +gpuci_mamba_retry options: + + --mambaretry_max_retries=n Retry the conda command at most n times (default is 3) + --mambaretry_sleep_interval=n Sleep n seconds between retries (default is 5) + +ALSO gpuci_mamba_retry options can be set using the following env vars: + + GPUCI_MAMBA_RETRY_MAX - set to a positive integer to set the max number of retry + attempts (attempts after the initial try). + Default is 3 retries + + GPUCI_MAMBA_RETRY_SLEEP - set to a positive integer to set the duration, in + seconds, to wait between retries. + Default is a 10 second sleep +========== +" +max_retries=${GPUCI_MAMBA_RETRY_MAX:=3} +sleep_interval=${GPUCI_MAMBA_RETRY_SLEEP:=10} +exitcode=0 +needToRetry=0 +retries=0 +args="" + +# Temporarily set this to something else (eg. a script called "testConda" that +# prints "CondaHTTPError:" and exits with 1) for testing this script. +#mambaCmd=./testConda +mambaCmd=${CONDA_EXE:=mamba} + +# Function to output messages to stderr +# FIXME - extend `gpuci_logger` or make another script for this +function echo_stderr { + echo " [gpuci_mamba_retry] $@" >&2 +} + +# Function to run conda and check output for specific retryable errors +# input variables: +# mambaCmd: the command used for running conda, which accepts the args +# passed to this script +# outfile: file to tee output to for checking, likely a temp file +# output variables: +# exitcode: the exit code from running ${mambaCmd} ${args} +# needToRetry: 1 if the command should be retried, 0 if it should not be +function runConda { + ${mambaCmd} ${args} 2>&1| tee ${outfile} + exitcode=$? + needToRetry=0 + retryingMsg="" + + if (( ${exitcode} != 0 )); then + # Show exit code + echo_stderr "Failed, mamba returned exit code: ${exitcode}" + + if grep -q CondaHTTPError: ${outfile}; then + retryingMsg="Retrying, found 'CondaHTTPError:' in output..." + needToRetry=1 + elif grep -q ChecksumMismatchError: ${outfile}; then + retryingMsg="Retrying, found 'ChecksumMismatchError:' in output..." + needToRetry=1 + elif grep -q JSONDecodeError: ${outfile}; then + retryingMsg="Retrying, found 'JSONDecodeError:' in output..." + needToRetry=1 + else + echo_stderr "Exiting, no retryable mamba errors detected: 'ChecksumMismatchError:' or 'CondaHTTPError:' or 'JSONDecodeError:'" + fi + + if (( ${needToRetry} == 1 )) && \ + (( ${retries} >= ${max_retries} )); then + # Catch instance where we run out of retries + echo_stderr "Exiting, reached max retries..." + else + # Give reason for retry + echo_stderr $retryingMsg + fi +fi +} + + +# Process and remove args recognized only by this script, save others for conda +# Process help separately +for arg in $*; do + opt=${arg%%=*} + val=${arg##*=} + if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then + echo "${mambaretry_help}" + ${mambaCmd} --help + exit $? + elif [[ ${opt} == "--mambaretry_max_retries" ]]; then + max_retries=${val} + elif [[ ${opt} == "--mambaretry_sleep_interval" ]]; then + sleep_interval=${val} + else + args="${args} ${arg}" + fi +done + +# Run command +outfile=$(mktemp) +runConda ${args} + +# Retry loop, only if needed +while (( ${needToRetry} == 1 )) && \ + (( ${retries} < ${max_retries} )); do + + retries=$(expr ${retries} + 1) + echo_stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..." + sleep ${sleep_interval} + echo_stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..." + + runConda ${args} +done + +rm -f ${outfile} +exit ${exitcode} From 54812743c31933631e8b5e0ce6d324816bac1bec Mon Sep 17 00:00:00 2001 From: Mike Wendt <1915404+mike-wendt@users.noreply.github.com> Date: Thu, 18 Feb 2021 17:41:13 -0500 Subject: [PATCH 2/4] FIX Update mamba bin location --- tools/gpuci_mamba_retry | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/gpuci_mamba_retry b/tools/gpuci_mamba_retry index 3ecb8a0..454144e 100644 --- a/tools/gpuci_mamba_retry +++ b/tools/gpuci_mamba_retry @@ -51,7 +51,8 @@ args="" # Temporarily set this to something else (eg. a script called "testConda" that # prints "CondaHTTPError:" and exits with 1) for testing this script. #mambaCmd=./testConda -mambaCmd=${CONDA_EXE:=mamba} +MAMBA_BIN=$CONDA_PREFIX/bin/mamba +mambaCmd=${MAMBA_BIN:=mamba} # Function to output messages to stderr # FIXME - extend `gpuci_logger` or make another script for this From e9483180534344c5cefd7154d7dd9d453b3680f4 Mon Sep 17 00:00:00 2001 From: Mike Wendt <1915404+mike-wendt@users.noreply.github.com> Date: Thu, 18 Feb 2021 17:50:37 -0500 Subject: [PATCH 3/4] ENH Require mamba for gpuci-tools conda instal --- conda/recipe/meta.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conda/recipe/meta.yaml b/conda/recipe/meta.yaml index d7b4e79..4c4ec85 100644 --- a/conda/recipe/meta.yaml +++ b/conda/recipe/meta.yaml @@ -13,6 +13,10 @@ source: build: number: {{ git_revision_count }} +requirements: + run: + - mamba::conda-forge + test: commands: - gpuci_logger "test" From 7c409621fb93fce5be9d0adaf59a5e6017c08c79 Mon Sep 17 00:00:00 2001 From: Mike Wendt <1915404+mike-wendt@users.noreply.github.com> Date: Tue, 23 Feb 2021 12:06:46 -0500 Subject: [PATCH 4/4] FIX Update function name to runMamba --- tools/gpuci_mamba_retry | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/gpuci_mamba_retry b/tools/gpuci_mamba_retry index 454144e..bae6fe3 100644 --- a/tools/gpuci_mamba_retry +++ b/tools/gpuci_mamba_retry @@ -68,7 +68,7 @@ function echo_stderr { # output variables: # exitcode: the exit code from running ${mambaCmd} ${args} # needToRetry: 1 if the command should be retried, 0 if it should not be -function runConda { +function runMamba { ${mambaCmd} ${args} 2>&1| tee ${outfile} exitcode=$? needToRetry=0 @@ -123,7 +123,7 @@ done # Run command outfile=$(mktemp) -runConda ${args} +rumMamba ${args} # Retry loop, only if needed while (( ${needToRetry} == 1 )) && \ @@ -134,7 +134,7 @@ while (( ${needToRetry} == 1 )) && \ sleep ${sleep_interval} echo_stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..." - runConda ${args} + rumMamba ${args} done rm -f ${outfile}