Skip to content
This repository has been archived by the owner on Nov 1, 2024. It is now read-only.

Commit

Permalink
Merge pull request #16 from mike-wendt/enh-mamba
Browse files Browse the repository at this point in the history
ENH Add `gpuci_mamba_retry` for mamba support
  • Loading branch information
mike-wendt authored Feb 23, 2021
2 parents 7c537d8 + 7c40962 commit 3dbd993
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
4 changes: 4 additions & 0 deletions conda/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ source:
build:
number: {{ git_revision_count }}

requirements:
run:
- mamba::conda-forge

test:
commands:
- gpuci_logger "test"
Expand Down
141 changes: 141 additions & 0 deletions tools/gpuci_mamba_retry
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/bin/bash
#
# gpuci_mamba_retry
#
# Wrapper for conda that retries the command after a CondaHTTPError,
# ChecksumMismatchError, or JSONDecodeError (ideally, any conda error that
# is normally resolved by retrying)
#
# This must be set in order for the script to recognize failing exit codes when
# output is piped to tee
#
# Example usage:
# $ gpuci_mamba_retry install cudatoolkit=11.0 rapids=0.16
#
# Configurable options are set using the following env vars:
#
# GPUCI_MAMBA_RETRY_MAX - set to a positive integer to set the max number of retry
# attempts (attempts after the initial try).
# Default is 3 retries
#
# GPUCI_MAMBA_RETRY_SLEEP - set to a positive integer to set the duration, in
# seconds, to wait between retries.
# Default is a 10 second sleep
#
set -o pipefail

mambaretry_help="
gpuci_mamba_retry options:
--mambaretry_max_retries=n Retry the conda command at most n times (default is 3)
--mambaretry_sleep_interval=n Sleep n seconds between retries (default is 5)
ALSO gpuci_mamba_retry options can be set using the following env vars:
GPUCI_MAMBA_RETRY_MAX - set to a positive integer to set the max number of retry
attempts (attempts after the initial try).
Default is 3 retries
GPUCI_MAMBA_RETRY_SLEEP - set to a positive integer to set the duration, in
seconds, to wait between retries.
Default is a 10 second sleep
==========
"
max_retries=${GPUCI_MAMBA_RETRY_MAX:=3}
sleep_interval=${GPUCI_MAMBA_RETRY_SLEEP:=10}
exitcode=0
needToRetry=0
retries=0
args=""

# Temporarily set this to something else (eg. a script called "testConda" that
# prints "CondaHTTPError:" and exits with 1) for testing this script.
#mambaCmd=./testConda
MAMBA_BIN=$CONDA_PREFIX/bin/mamba
mambaCmd=${MAMBA_BIN:=mamba}

# Function to output messages to stderr
# FIXME - extend `gpuci_logger` or make another script for this
function echo_stderr {
echo " [gpuci_mamba_retry] $@" >&2
}

# Function to run conda and check output for specific retryable errors
# input variables:
# mambaCmd: the command used for running conda, which accepts the args
# passed to this script
# outfile: file to tee output to for checking, likely a temp file
# output variables:
# exitcode: the exit code from running ${mambaCmd} ${args}
# needToRetry: 1 if the command should be retried, 0 if it should not be
function runMamba {
${mambaCmd} ${args} 2>&1| tee ${outfile}
exitcode=$?
needToRetry=0
retryingMsg=""

if (( ${exitcode} != 0 )); then
# Show exit code
echo_stderr "Failed, mamba returned exit code: ${exitcode}"

if grep -q CondaHTTPError: ${outfile}; then
retryingMsg="Retrying, found 'CondaHTTPError:' in output..."
needToRetry=1
elif grep -q ChecksumMismatchError: ${outfile}; then
retryingMsg="Retrying, found 'ChecksumMismatchError:' in output..."
needToRetry=1
elif grep -q JSONDecodeError: ${outfile}; then
retryingMsg="Retrying, found 'JSONDecodeError:' in output..."
needToRetry=1
else
echo_stderr "Exiting, no retryable mamba errors detected: 'ChecksumMismatchError:' or 'CondaHTTPError:' or 'JSONDecodeError:'"
fi

if (( ${needToRetry} == 1 )) && \
(( ${retries} >= ${max_retries} )); then
# Catch instance where we run out of retries
echo_stderr "Exiting, reached max retries..."
else
# Give reason for retry
echo_stderr $retryingMsg
fi
fi
}


# Process and remove args recognized only by this script, save others for conda
# Process help separately
for arg in $*; do
opt=${arg%%=*}
val=${arg##*=}
if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then
echo "${mambaretry_help}"
${mambaCmd} --help
exit $?
elif [[ ${opt} == "--mambaretry_max_retries" ]]; then
max_retries=${val}
elif [[ ${opt} == "--mambaretry_sleep_interval" ]]; then
sleep_interval=${val}
else
args="${args} ${arg}"
fi
done

# Run command
outfile=$(mktemp)
rumMamba ${args}

# Retry loop, only if needed
while (( ${needToRetry} == 1 )) && \
(( ${retries} < ${max_retries} )); do

retries=$(expr ${retries} + 1)
echo_stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..."
sleep ${sleep_interval}
echo_stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..."

rumMamba ${args}
done

rm -f ${outfile}
exit ${exitcode}

0 comments on commit 3dbd993

Please sign in to comment.