This repository has been archived by the owner on Nov 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from mike-wendt/enh-mamba
ENH Add `gpuci_mamba_retry` for mamba support
- Loading branch information
Showing
2 changed files
with
145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
#!/bin/bash | ||
# | ||
# gpuci_mamba_retry | ||
# | ||
# Wrapper for conda that retries the command after a CondaHTTPError, | ||
# ChecksumMismatchError, or JSONDecodeError (ideally, any conda error that | ||
# is normally resolved by retrying) | ||
# | ||
# This must be set in order for the script to recognize failing exit codes when | ||
# output is piped to tee | ||
# | ||
# Example usage: | ||
# $ gpuci_mamba_retry install cudatoolkit=11.0 rapids=0.16 | ||
# | ||
# Configurable options are set using the following env vars: | ||
# | ||
# GPUCI_MAMBA_RETRY_MAX - set to a positive integer to set the max number of retry | ||
# attempts (attempts after the initial try). | ||
# Default is 3 retries | ||
# | ||
# GPUCI_MAMBA_RETRY_SLEEP - set to a positive integer to set the duration, in | ||
# seconds, to wait between retries. | ||
# Default is a 10 second sleep | ||
# | ||
set -o pipefail | ||
|
||
mambaretry_help=" | ||
gpuci_mamba_retry options: | ||
--mambaretry_max_retries=n Retry the conda command at most n times (default is 3) | ||
--mambaretry_sleep_interval=n Sleep n seconds between retries (default is 5) | ||
ALSO gpuci_mamba_retry options can be set using the following env vars: | ||
GPUCI_MAMBA_RETRY_MAX - set to a positive integer to set the max number of retry | ||
attempts (attempts after the initial try). | ||
Default is 3 retries | ||
GPUCI_MAMBA_RETRY_SLEEP - set to a positive integer to set the duration, in | ||
seconds, to wait between retries. | ||
Default is a 10 second sleep | ||
========== | ||
" | ||
max_retries=${GPUCI_MAMBA_RETRY_MAX:=3} | ||
sleep_interval=${GPUCI_MAMBA_RETRY_SLEEP:=10} | ||
exitcode=0 | ||
needToRetry=0 | ||
retries=0 | ||
args="" | ||
|
||
# Temporarily set this to something else (eg. a script called "testConda" that | ||
# prints "CondaHTTPError:" and exits with 1) for testing this script. | ||
#mambaCmd=./testConda | ||
MAMBA_BIN=$CONDA_PREFIX/bin/mamba | ||
mambaCmd=${MAMBA_BIN:=mamba} | ||
|
||
# Function to output messages to stderr | ||
# FIXME - extend `gpuci_logger` or make another script for this | ||
function echo_stderr { | ||
echo " [gpuci_mamba_retry] $@" >&2 | ||
} | ||
|
||
# Function to run conda and check output for specific retryable errors | ||
# input variables: | ||
# mambaCmd: the command used for running conda, which accepts the args | ||
# passed to this script | ||
# outfile: file to tee output to for checking, likely a temp file | ||
# output variables: | ||
# exitcode: the exit code from running ${mambaCmd} ${args} | ||
# needToRetry: 1 if the command should be retried, 0 if it should not be | ||
function runMamba { | ||
${mambaCmd} ${args} 2>&1| tee ${outfile} | ||
exitcode=$? | ||
needToRetry=0 | ||
retryingMsg="" | ||
|
||
if (( ${exitcode} != 0 )); then | ||
# Show exit code | ||
echo_stderr "Failed, mamba returned exit code: ${exitcode}" | ||
|
||
if grep -q CondaHTTPError: ${outfile}; then | ||
retryingMsg="Retrying, found 'CondaHTTPError:' in output..." | ||
needToRetry=1 | ||
elif grep -q ChecksumMismatchError: ${outfile}; then | ||
retryingMsg="Retrying, found 'ChecksumMismatchError:' in output..." | ||
needToRetry=1 | ||
elif grep -q JSONDecodeError: ${outfile}; then | ||
retryingMsg="Retrying, found 'JSONDecodeError:' in output..." | ||
needToRetry=1 | ||
else | ||
echo_stderr "Exiting, no retryable mamba errors detected: 'ChecksumMismatchError:' or 'CondaHTTPError:' or 'JSONDecodeError:'" | ||
fi | ||
|
||
if (( ${needToRetry} == 1 )) && \ | ||
(( ${retries} >= ${max_retries} )); then | ||
# Catch instance where we run out of retries | ||
echo_stderr "Exiting, reached max retries..." | ||
else | ||
# Give reason for retry | ||
echo_stderr $retryingMsg | ||
fi | ||
fi | ||
} | ||
|
||
|
||
# Process and remove args recognized only by this script, save others for conda | ||
# Process help separately | ||
for arg in $*; do | ||
opt=${arg%%=*} | ||
val=${arg##*=} | ||
if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then | ||
echo "${mambaretry_help}" | ||
${mambaCmd} --help | ||
exit $? | ||
elif [[ ${opt} == "--mambaretry_max_retries" ]]; then | ||
max_retries=${val} | ||
elif [[ ${opt} == "--mambaretry_sleep_interval" ]]; then | ||
sleep_interval=${val} | ||
else | ||
args="${args} ${arg}" | ||
fi | ||
done | ||
|
||
# Run command | ||
outfile=$(mktemp) | ||
rumMamba ${args} | ||
|
||
# Retry loop, only if needed | ||
while (( ${needToRetry} == 1 )) && \ | ||
(( ${retries} < ${max_retries} )); do | ||
|
||
retries=$(expr ${retries} + 1) | ||
echo_stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..." | ||
sleep ${sleep_interval} | ||
echo_stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..." | ||
|
||
rumMamba ${args} | ||
done | ||
|
||
rm -f ${outfile} | ||
exit ${exitcode} |