From f44af2056138cbf64c1e70a99d62336ca3d949a3 Mon Sep 17 00:00:00 2001 From: Chris Evich Date: Thu, 22 Oct 2020 08:19:31 -0400 Subject: [PATCH] Cirrus: Detailed CPU/Memory/Time runner.sh stats On several occasions, there have been questions about CPU/Memory/IO trends in testing over time. Start collecting this data for all jobs, using a common/stable format so that trending analysis can be performed within/across multiple Cirrus-CI builds. This PR doesn't add any related tooling, it simply arranges for the collection of the data. Stats generation is done at the orchestration level to guarantee they reflect everything happening inside `runner.sh`. For example, the container-based tests re-exec `runner.sh` inside a container, but we're only interested in the top-level stats. Update all tasks to include collection of the stats file. Unfortunately, due to the way the Cirrus-CI YAML parser works, it is *not* possible to alias the artifacts collection more clearly, for example: ```yaml always: <<: *runner_stats <<: *logs_artifacts ``` Signed-off-by: Chris Evich --- .cirrus.yml | 20 +++++++++++++++++++- .gitignore | 2 ++ contrib/cirrus/logcollector.sh | 5 +++++ contrib/cirrus/runner.sh | 10 ++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 1a109f5ba2..a97a22efdd 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -17,6 +17,9 @@ env: CIRRUS_SHELL: "/bin/bash" # Save a little typing (path relative to $CIRRUS_WORKING_DIR) SCRIPT_BASE: "./contrib/cirrus" + # Runner statistics log file path/name + STATS_LOGFILE_SFX: 'runner_stats.log' + STATS_LOGFILE: '$GOSRC/${CIRRUS_TASK_NAME}-${STATS_LOGFILE_SFX}' #### #### Cache-image names to test with (double-quotes around names are critical) @@ -75,7 +78,11 @@ ext_svc_check_task: TEST_FLAVOR: ext_svc CTR_FQIN: ${FEDORA_CONTAINER_FQIN} setup_script: &setup '$GOSRC/$SCRIPT_BASE/setup_environment.sh' - main_script: &main '$GOSRC/$SCRIPT_BASE/runner.sh' + main_script: &main '/usr/bin/time --verbose --output="$STATS_LOGFILE" $GOSRC/$SCRIPT_BASE/runner.sh' + always: &runner_stats + runner_stats_artifacts: + path: ./*-${STATS_LOGFILE_SFX} + type: text/plain # Execute some quick checks to confirm this YAML file and all @@ -91,6 +98,7 @@ automation_task: TEST_ENVIRON: container setup_script: *setup main_script: *main + always: *runner_stats # This task use to be called 'gating', however that name is being @@ -126,6 +134,7 @@ smoke_task: make install.tools setup_script: *setup main_script: *main + always: *runner_stats # N/B: This task is critical. It builds all binaries and release archives @@ -186,6 +195,7 @@ build_task: setup_script: *setup main_script: *main always: &binary_artifacts + <<: *runner_stats gosrc_artifacts: path: ./* # Grab everything in top-level $GOSRC type: application/octet-stream @@ -220,6 +230,7 @@ validate_task: clone_script: *noop setup_script: *setup main_script: *main + always: *runner_stats # Exercise the "libpod" API with a small set of common @@ -239,6 +250,7 @@ bindings_task: clone_script: *noop # Comes from cache setup_script: *setup main_script: *main + always: *runner_stats # Build the "libpod" API documentation `swagger.yaml` for eventual @@ -275,6 +287,7 @@ endpoint_task: clone_script: *full_clone # build-cache not available to container tasks setup_script: *setup main_script: *main + always: *runner_stats # Check that all included go modules from other sources match @@ -293,6 +306,7 @@ vendor_task: clone_script: *full_clone # build-cache not available to container tasks setup_script: *setup main_script: *main + always: *runner_stats # There are several other important variations of podman which @@ -394,6 +408,7 @@ docker-py_test_task: clone_script: *noop # Comes from cache setup_script: *setup main_script: *main + always: *runner_stats # Does exactly what it says, execute the podman unit-tests on all primary @@ -412,6 +427,7 @@ unit_test_task: gopath_cache: *ro_gopath_cache setup_script: *setup main_script: *main + always: *runner_stats apiv2_test_task: @@ -428,6 +444,7 @@ apiv2_test_task: setup_script: *setup main_script: *main always: &logs_artifacts + <<: *runner_stats # Required for `contrib/cirrus/logformatter` to work properly html_artifacts: path: ./*.html @@ -438,6 +455,7 @@ apiv2_test_task: audit_log_script: '$SCRIPT_BASE/logcollector.sh audit' journal_script: '$SCRIPT_BASE/logcollector.sh journal' podman_system_info_script: '$SCRIPT_BASE/logcollector.sh podman' + time_script: '$SCRIPT_BASE/logcollector.sh time' # Execute the podman integration tests on all primary platforms and release diff --git a/.gitignore b/.gitignore index 434a12759e..9e444442ad 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,5 @@ release.txt /test/goecho/goecho .vscode* result +# Necessary to prevent hack/tree-status.sh false-positive +/*runner_stats.log diff --git a/contrib/cirrus/logcollector.sh b/contrib/cirrus/logcollector.sh index 746bccec92..7bf651b365 100755 --- a/contrib/cirrus/logcollector.sh +++ b/contrib/cirrus/logcollector.sh @@ -76,5 +76,10 @@ case $1 in # Any not-present packages will be listed as such $PKG_LST_CMD "${PKG_NAMES[@]}" | sort -u ;; + time) + # Assumed to be empty/undefined outside of Cirrus-CI (.cirrus.yml) + # shellcheck disable=SC2154 + if [[ -r "$STATS_LOGFILE" ]]; then cat "$STATS_LOGFILE"; fi + ;; *) die "Warning, $(basename $0) doesn't know how to handle the parameter '$1'" esac diff --git a/contrib/cirrus/runner.sh b/contrib/cirrus/runner.sh index 084b196a98..1d05dfa71c 100755 --- a/contrib/cirrus/runner.sh +++ b/contrib/cirrus/runner.sh @@ -253,6 +253,16 @@ dotest() { |& logformatter } +# Nearly every task in .cirrus.yml makes use of this shell script +# wrapped by /usr/bin/time to collect runtime statistics. Because the +# --output option is used to log stats to a file, every child-process +# inherits an open FD3 pointing at the log. However, some testing +# operations depend on making use of FD3, and so it must be explicitly +# closed here (and for all further child-processes). +# STATS_LOGFILE assumed empty/undefined outside of Cirrus-CI (.cirrus.yml) +# shellcheck disable=SC2154 +exec 3<&- + msg "************************************************************" # Required to be defined by caller # shellcheck disable=SC2154