Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add otel wrapper #118

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8796a22
add opentelemery span reporter wrapper
msarahan Sep 11, 2024
b0086f4
set program name to be RAPIDS_JOB_ID variable (to be set in gha job)
msarahan Sep 11, 2024
53b8f69
add debug echo for whether instrumentation is run
msarahan Sep 17, 2024
291d850
chmod +x otel wrap
msarahan Sep 17, 2024
d86e9d7
pass output from subshells
msarahan Sep 18, 2024
003bf1d
add output with while read; fix cmd running
msarahan Sep 18, 2024
57a1c76
don't add attrs unless they are non-null
msarahan Sep 18, 2024
2d01c7d
debug instrumentation command
msarahan Sep 18, 2024
5469dcd
wrong flag format
msarahan Sep 18, 2024
57e0c38
move start time higher so that it isn't out of scope
msarahan Sep 18, 2024
d4eddf8
quote start and end span times
msarahan Sep 18, 2024
7e23539
specify endpoints in instrumentation CLI
msarahan Sep 18, 2024
2ded7f9
unquote attrs to avoid empty quotes as positional arg
msarahan Sep 19, 2024
eb0071d
add otlp endpoint and headers for otel-cli calls
msarahan Sep 19, 2024
214d3a3
add protocol to otel-cli call
msarahan Sep 19, 2024
6ae4c0a
Merge branch 'rapidsai:main' into main
msarahan Sep 25, 2024
696906d
consistently use RAPIDS_SERVICE_NAME for service_name across tools
msarahan Sep 25, 2024
da34239
shellcheck
msarahan Sep 25, 2024
01de8e2
fix service name
msarahan Oct 3, 2024
46916e9
fix handling of empty headers in otel-cli call
msarahan Oct 3, 2024
8978303
handle RAPIDS_OTEL_ATTRS in upload conda script
msarahan Oct 3, 2024
6b51567
fix incorrect replacement expression for leading comma
msarahan Oct 4, 2024
16d6729
print traceparent
msarahan Oct 4, 2024
ae304e3
debugging traceparent
msarahan Oct 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/rapids-conda-retry
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ condaCmd=${RAPIDS_CONDA_EXE:=conda}
# needToRetry: 1 if the command should be retried, 0 if it should not be
function runConda {
# shellcheck disable=SC2086
${condaCmd} ${args} 2>&1| tee "${outfile}"
rapids-otel-wrap ${condaCmd} ${args} 2>&1| tee "${outfile}"
exitcode=$?
needToRetry=0
needToClean=0
Expand Down
2 changes: 1 addition & 1 deletion tools/rapids-get-pr-conda-artifact
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ if [[ -z "${commit}" ]]; then
commit=$(git ls-remote https://github.com/rapidsai/"${repo}".git refs/heads/pull-request/"${pr}" | cut -c1-7)
fi

rapids-get-artifact "ci/${repo}/pull-request/${pr}/${commit}/${artifact_name}"
rapids-otel-wrap rapids-get-artifact "ci/${repo}/pull-request/${pr}/${commit}/${artifact_name}"
2 changes: 1 addition & 1 deletion tools/rapids-mamba-retry
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ for arg in "$@"; do
fi
done

rapids-conda-retry "$@"
rapids-otel-wrap rapids-conda-retry "$@"
94 changes: 94 additions & 0 deletions tools/rapids-otel-wrap
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
# Wraps arbitrary commands with arbitrary args. Emits an OpenTelemetry span for tracing the command
#
# To add metadata tags (attrs), set (or append to) the RAPIDS_OTEL_ATTRS str.
# See https://github.com/equinix-labs/otel-cli?tab=readme-ov-file#header-and-attribute-formatting for details
#
#

TIME_FORMAT="--rfc-3339=ns"

# this cleanly sets up a tempdir for the fifo that we use to pass data out from subshell into our currrent one
# https://unix.stackexchange.com/a/29918/34459
tmpdir=
cleanup () {
trap - EXIT
if [ -n "$tmpdir" ] ; then rm -rf "$tmpdir"; fi
}
tmpdir=$(mktemp -d)
trap 'cleanup' EXIT
trap 'cleanup HUP' HUP
trap 'cleanup TERM' TERM
trap 'cleanup INT' INT
mkfifo "$tmpdir/pipe"
mkfifo "$tmpdir/status_pipe"

start=$(date ${TIME_FORMAT});

RAPIDS_OTEL_SERVICE_NAME="${RAPIDS_OTEL_SERVICE_NAME:-${GITHUB_JOB:-"default-telemetry-service-name"}}"
RAPIDS_OTEL_TRACES_EXPORTER="${RAPIDS_OTEL_TRACES_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}"
RAPIDS_OTEL_METRICS_EXPORTER="${RAPIDS_OTEL_METRICS_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}"
RAPIDS_OTEL_LOGS_EXPORTER="${RAPIDS_OTEL_LOGS_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}"
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/traces}"
OTEL_EXPORTER_OTLP_METRICS_ENDPOINT="${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/metrics}"
OTEL_EXPORTER_OTLP_LOGS_ENDPOINT="${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/logs}"

if [[ $(type otel-cli >/dev/null 2>&1) -eq 0 ]]; then
echo "Running command with OpenTelemetry instrumentation";
(
(
set -x
opentelemetry-instrument \
--service_name "${RAPIDS_OTEL_SERVICE_NAME}" \
--traces_exporter "${RAPIDS_OTEL_TRACES_EXPORTER}" \
--metrics_exporter "${RAPIDS_OTEL_METRICS_EXPORTER}" \
--logs_exporter "${RAPIDS_OTEL_LOGS_EXPORTER}" \
--exporter_otlp_traces_endpoint "${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT}" \
--exporter_otlp_metrics_endpoint "${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT}" \
--exporter_otlp_logs_endpoint "${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT}" \
--exporter_otlp_protocol http/protobuf \
"$@" > "$tmpdir/pipe" 2>&1;
)
echo $? > "$tmpdir/status_pipe";
) &
else
echo "Skipping instrumentation, running \"${*}\"";
(
(
set -x
"$*" > "$tmpdir/pipe" 2>&1 ;
)
echo $? > "$tmpdir/status_pipe";
) &
fi

echo "stdout/stderr from command:"
while IFS= read -r line; do
echo "$line"
done < "$tmpdir/pipe"
RETURN_STATUS=$(<"$tmpdir/status_pipe")

type otel-cli >/dev/null 2>&1 && {
if [ "${RAPIDS_OTEL_ATTRS}" != "" ]; then \
attrs="--attrs $(sed "s/,//" <<< ${RAPIDS_OTEL_ATTRS})"; \
fi
if [ "${OTEL_EXPORTER_OTLP_HEADERS}" != "" ]; then \
headers="--otlp-headers ${OTEL_EXPORTER_OTLP_HEADERS}"; \
fi
# TRACEPARENT gets picked up as an env var automatically
# https://github.com/equinix-labs/otel-cli?tab=readme-ov-file#examples
set -x
otel-cli span -n "$1" -s "${RAPIDS_SERVICE_NAME}" \
--endpoint "${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT}" \
--start "$start" --end "$(date ${TIME_FORMAT})" \
--protocol http/protobuf \
--status-code "${RETURN_STATUS}" \
--tls-ca-cert ${OTEL_EXPORTER_OTLP_CERTIFICATE} \
--tls-client-cert ${OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE} \
--tls-client-key ${OTEL_EXPORTER_OTLP_CLIENT_KEY} \
--tp-print \
${attrs} ${headers}
set +x
}
cleanup "$@"
exit "${RETURN_STATUS}"
3 changes: 2 additions & 1 deletion tools/rapids-upload-conda-to-s3
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@ pkg_name="$(rapids-package-name "$pkg_type")"
# Where conda build artifacts are output
path_to_tar_up="${RAPIDS_CONDA_BLD_OUTPUT_DIR}"

rapids-upload-to-s3 "${pkg_name}" "${path_to_tar_up}"
export RAPIDS_OTEL_ATTRS="${RAPIDS_OTEL_ATTRS:-},rapids_task=upload,rapids_package_name=${pkg_name}"
rapids-otel-wrap rapids-upload-to-s3 "${pkg_name}" "${path_to_tar_up}"
3 changes: 2 additions & 1 deletion tools/rapids-upload-wheels-to-s3
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ if [ "${CI:-false}" = "false" ]; then
exit 0
fi

rapids-upload-to-s3 "${pkg_name}" "$@"
RAPIDS_OTEL_ATTRS="${RAPIDS_OTEL_ATTRS},rapids_task=upload,rapids_package_name=${pkg_name}" \
rapids-otel-wrap rapids-upload-to-s3 "${pkg_name}" "$@"
Loading