diff --git a/tools/rapids-conda-retry b/tools/rapids-conda-retry index 9ce59ca..78e25b2 100755 --- a/tools/rapids-conda-retry +++ b/tools/rapids-conda-retry @@ -67,7 +67,7 @@ condaCmd=${RAPIDS_CONDA_EXE:=conda} # needToRetry: 1 if the command should be retried, 0 if it should not be function runConda { # shellcheck disable=SC2086 - ${condaCmd} ${args} 2>&1| tee "${outfile}" + rapids-otel-wrap ${condaCmd} ${args} 2>&1| tee "${outfile}" exitcode=$? needToRetry=0 needToClean=0 diff --git a/tools/rapids-get-pr-conda-artifact b/tools/rapids-get-pr-conda-artifact index 3c3605d..2e9885b 100755 --- a/tools/rapids-get-pr-conda-artifact +++ b/tools/rapids-get-pr-conda-artifact @@ -33,4 +33,4 @@ if [[ -z "${commit}" ]]; then commit=$(git ls-remote https://github.com/rapidsai/"${repo}".git refs/heads/pull-request/"${pr}" | cut -c1-7) fi -rapids-get-artifact "ci/${repo}/pull-request/${pr}/${commit}/${artifact_name}" +rapids-otel-wrap rapids-get-artifact "ci/${repo}/pull-request/${pr}/${commit}/${artifact_name}" diff --git a/tools/rapids-mamba-retry b/tools/rapids-mamba-retry index 5db5d00..e9f2644 100755 --- a/tools/rapids-mamba-retry +++ b/tools/rapids-mamba-retry @@ -46,4 +46,4 @@ for arg in "$@"; do fi done -rapids-conda-retry "$@" +rapids-otel-wrap rapids-conda-retry "$@" diff --git a/tools/rapids-otel-wrap b/tools/rapids-otel-wrap new file mode 100755 index 0000000..565adb4 --- /dev/null +++ b/tools/rapids-otel-wrap @@ -0,0 +1,94 @@ +#!/bin/bash +# Wraps arbitrary commands with arbitrary args. Emits an OpenTelemetry span for tracing the command +# +# To add metadata tags (attrs), set (or append to) the RAPIDS_OTEL_ATTRS str. +# See https://github.com/equinix-labs/otel-cli?tab=readme-ov-file#header-and-attribute-formatting for details +# +# + +TIME_FORMAT="--rfc-3339=ns" + +# this cleanly sets up a tempdir for the fifo that we use to pass data out from subshell into our currrent one +# https://unix.stackexchange.com/a/29918/34459 +tmpdir= +cleanup () { + trap - EXIT + if [ -n "$tmpdir" ] ; then rm -rf "$tmpdir"; fi +} +tmpdir=$(mktemp -d) +trap 'cleanup' EXIT +trap 'cleanup HUP' HUP +trap 'cleanup TERM' TERM +trap 'cleanup INT' INT +mkfifo "$tmpdir/pipe" +mkfifo "$tmpdir/status_pipe" + +start=$(date ${TIME_FORMAT}); + +RAPIDS_OTEL_SERVICE_NAME="${RAPIDS_OTEL_SERVICE_NAME:-${GITHUB_JOB:-"default-telemetry-service-name"}}" +RAPIDS_OTEL_TRACES_EXPORTER="${RAPIDS_OTEL_TRACES_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}" +RAPIDS_OTEL_METRICS_EXPORTER="${RAPIDS_OTEL_METRICS_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}" +RAPIDS_OTEL_LOGS_EXPORTER="${RAPIDS_OTEL_LOGS_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}" +OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/traces}" +OTEL_EXPORTER_OTLP_METRICS_ENDPOINT="${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/metrics}" +OTEL_EXPORTER_OTLP_LOGS_ENDPOINT="${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/logs}" + +if [[ $(type otel-cli >/dev/null 2>&1) -eq 0 ]]; then + echo "Running command with OpenTelemetry instrumentation"; + ( + ( + set -x + opentelemetry-instrument \ + --service_name "${RAPIDS_OTEL_SERVICE_NAME}" \ + --traces_exporter "${RAPIDS_OTEL_TRACES_EXPORTER}" \ + --metrics_exporter "${RAPIDS_OTEL_METRICS_EXPORTER}" \ + --logs_exporter "${RAPIDS_OTEL_LOGS_EXPORTER}" \ + --exporter_otlp_traces_endpoint "${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT}" \ + --exporter_otlp_metrics_endpoint "${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT}" \ + --exporter_otlp_logs_endpoint "${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT}" \ + --exporter_otlp_protocol http/protobuf \ + "$@" > "$tmpdir/pipe" 2>&1; + ) + echo $? > "$tmpdir/status_pipe"; + ) & +else + echo "Skipping instrumentation, running \"${*}\""; + ( + ( + set -x + "$*" > "$tmpdir/pipe" 2>&1 ; + ) + echo $? > "$tmpdir/status_pipe"; + ) & +fi + +echo "stdout/stderr from command:" +while IFS= read -r line; do + echo "$line" +done < "$tmpdir/pipe" +RETURN_STATUS=$(<"$tmpdir/status_pipe") + +type otel-cli >/dev/null 2>&1 && { + if [ "${RAPIDS_OTEL_ATTRS}" != "" ]; then \ + attrs="--attrs $(sed "s/,//" <<< ${RAPIDS_OTEL_ATTRS})"; \ + fi + if [ "${OTEL_EXPORTER_OTLP_HEADERS}" != "" ]; then \ + headers="--otlp-headers ${OTEL_EXPORTER_OTLP_HEADERS}"; \ + fi + # TRACEPARENT gets picked up as an env var automatically + # https://github.com/equinix-labs/otel-cli?tab=readme-ov-file#examples + set -x + otel-cli span -n "$1" -s "${RAPIDS_SERVICE_NAME}" \ + --endpoint "${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT}" \ + --start "$start" --end "$(date ${TIME_FORMAT})" \ + --protocol http/protobuf \ + --status-code "${RETURN_STATUS}" \ + --tls-ca-cert ${OTEL_EXPORTER_OTLP_CERTIFICATE} \ + --tls-client-cert ${OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE} \ + --tls-client-key ${OTEL_EXPORTER_OTLP_CLIENT_KEY} \ + --tp-print \ + ${attrs} ${headers} + set +x +} +cleanup "$@" +exit "${RETURN_STATUS}" diff --git a/tools/rapids-upload-conda-to-s3 b/tools/rapids-upload-conda-to-s3 index 91a049f..7db378d 100755 --- a/tools/rapids-upload-conda-to-s3 +++ b/tools/rapids-upload-conda-to-s3 @@ -30,4 +30,5 @@ pkg_name="$(rapids-package-name "$pkg_type")" # Where conda build artifacts are output path_to_tar_up="${RAPIDS_CONDA_BLD_OUTPUT_DIR}" -rapids-upload-to-s3 "${pkg_name}" "${path_to_tar_up}" +export RAPIDS_OTEL_ATTRS="${RAPIDS_OTEL_ATTRS:-},rapids_task=upload,rapids_package_name=${pkg_name}" +rapids-otel-wrap rapids-upload-to-s3 "${pkg_name}" "${path_to_tar_up}" diff --git a/tools/rapids-upload-wheels-to-s3 b/tools/rapids-upload-wheels-to-s3 index 9b35b39..1b096ac 100755 --- a/tools/rapids-upload-wheels-to-s3 +++ b/tools/rapids-upload-wheels-to-s3 @@ -20,4 +20,5 @@ if [ "${CI:-false}" = "false" ]; then exit 0 fi -rapids-upload-to-s3 "${pkg_name}" "$@" +RAPIDS_OTEL_ATTRS="${RAPIDS_OTEL_ATTRS},rapids_task=upload,rapids_package_name=${pkg_name}" \ + rapids-otel-wrap rapids-upload-to-s3 "${pkg_name}" "$@"