diff --git a/CHANGELOG.md b/CHANGELOG.md index b601f42..8ff3ca1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ - Added: - filter histogram: replaces bit.ly's data_hacks with a built-in AWK program to calculate a histogram. May not be entirely portable @hexylena. - mutate scale-table-autovacuum: Dynamically update autovacuum and autoanalyze scale for large tables. @natefoo + - query tools-usage-per-month from @lldelisle - Fixed: - Replaced hardcoded metric_name with the variable in query_tool-metrics function @sanjaysrikakulam - improved man pages a tad diff --git a/Makefile b/Makefile index 4525a78..874aaf7 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ test: @cat $(PARTS) > .tmpgxadmin @chmod +x .tmpgxadmin ./test.sh - shellcheck --severity error gxadmin + shellcheck --severity error .tmpgxadmin @rm -f .tmpgxadmin shellcheck: gxadmin diff --git a/parts/22-query.sh b/parts/22-query.sh index 6938815..444d024 100644 --- a/parts/22-query.sh +++ b/parts/22-query.sh @@ -721,7 +721,7 @@ query_job-state-stats() { ## : Shows all jobs states for the last 30 days in a t Example: $ gxadmin query job-state-stats - date | new | running | queued | upload | ok | error | paused | stopped | deleted + date | new | running | queued | upload | ok | error | paused | stopped | deleted ------------+-------+---------+--------+--------+-------+-------+--------+---------+--------- 2022-04-26 | 921 | 564 | 799 | 0 | 581 | 21 | 1 | 0 | 2 2022-04-25 | 1412 | 1230 | 1642 | 0 | 1132 | 122 | 14 | 0 | 15 @@ -770,7 +770,7 @@ query_monthly-job-runtimes() { ##? [--year=] [--month=] [--sub_dest=] [--month=] [--sub_dest= [--group=]: Lists count of t EOF handle_help "$@" <<-EOFhelp Lists tools use count by users in group. - Requires (2022-03) and + Requires (2022-03) and Example: $ gxadmin query tool-use-by-group 2022-02 NameOfGroup - tool_id | username | count + tool_id | username | count ----------------------------------------------------+----------------------------------+------- CONVERTER_gz_to_uncompressed | user_1 | 1 Convert characters1 | user_2 | 1 @@ -2046,7 +2046,7 @@ query_disk-usage-library() { ##? [--library_name=] [--by_folder] [--human] A --by_folder flag is also available for displaying disk usage for each folder. a$ gxadmin local query-disk-usage-library --library_name 'My Library' --by_folder - folder_name | folder size + folder_name | folder size -------------------------+------------- Contamination Filtering | 10798630750 Metagenomes | 12026310232 @@ -4448,7 +4448,7 @@ query_queue-details-drm() { ##? [--all] [--seconds] [--since-update]: Detailed o WITH job_data AS ( - SELECT + SELECT job.state as jobstate, job.id as jobid, job.job_runner_external_id as extid, @@ -4471,7 +4471,7 @@ query_queue-details-drm() { ##? [--all] [--seconds] [--since-update]: Detailed o state desc, $time_column_name desc ) - SELECT + SELECT jobstate, jobid, extid, @@ -4500,7 +4500,7 @@ query_jobs() { ##? [--tool=] [--destination=] [--limit=50] [--states=] [--endyear=] [--formula EOF } +query_tools-usage-per-month() { ##? [--startmonth=-] [--endmonth=-] [--tools=] [--short_tool_id] [--super_short_tool_id] [--no_version]: By default, startmonth is 1 year ago and end month is current month. tool1, tool2 etc. should correspond to the tool_id with the same format as requested: toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.5.0+galaxy0,Cut1 for default, devteam/bowtie2/bowtie2/2.5.0+galaxy0,Cut1 for --short_tool_id, bowtie2/2.5.0+galaxy0,Cut1 for --super_short_tool_id etc... + meta <<-EOF + AUTHORS: lldelisle + ADDED: 22 + EOF + handle_help "$@" <<-EOF + Tools Usage Tracking: cpu-hours and nb_users by Month-Year. + + $ gxadmin query tools-usage-per-month --super_short_tool_id --no_version --tools bowtie2,Cut1 --startmonth=2023-03 --endmonth 2023-08 + month | cpu_hours | tool_id | nb_users + ------------+-----------+---------+---------- + 2023-08-01 | 326.88 | bowtie2 | 1 + 2023-08-01 | 469.27 | bowtie2 | 1 + 2023-07-01 | 0.01 | Cut1 | 2 + 2023-07-01 | 20.04 | bowtie2 | 1 + 2023-06-01 | 0.04 | Cut1 | 2 + 2023-06-01 | 271.16 | bowtie2 | 3 + 2023-05-01 | 732.74 | bowtie2 | 3 + 2023-04-01 | 1.55 | Cut1 | 2 + 2023-04-01 | 426.32 | bowtie2 | 2 + 2023-03-01 | 0.00 | Cut1 | 1 + 2023-03-01 | 437.31 | bowtie2 | 1 + 2023-03-01 | 506.71 | bowtie2 | 2 + (12 rows) + EOF + + filter_by_time_period="" + if [[ -n $arg_startmonth ]]; then + filter_by_time_period="date_trunc('month', job.create_time AT TIME ZONE 'UTC') >= '$arg_startmonth-01'::date" + else + filter_by_time_period="job.create_time > NOW() - interval '1 year'" + fi + if [[ -n $arg_endmonth ]]; then + filter_by_time_period="$filter_by_time_period AND date_trunc('month', job.create_time AT TIME ZONE 'UTC') <= '$arg_endmonth-01'::date" + fi + tool_id="job.tool_id" + if [[ -n $arg_short_tool_id ]]; then + tool_id="regexp_replace(job.tool_id, '.*toolshed.*/repos/', '')" + fi + if [[ -n $arg_super_short_tool_id ]]; then + tool_id="regexp_replace(job.tool_id, '.*toolshed.*/repos/[^/]*/[^/]*/', '')" + fi + + if [[ -n $arg_no_version ]]; then + tool_id="regexp_replace(${tool_id}::TEXT, '/[0-9.a-z+-]+$', '')" + fi + filter_tool="" + if [[ -n $arg_tools ]]; then + tool_list=$(echo "$arg_tools" | sed -e "s/^/('/" -e "s/,/', '/g" -e "s/$/')/") + filter_tool="AND $tool_id in $tool_list" + fi + + read -r -d '' QUERY <<-EOF + SELECT + date_trunc('month', job.create_time AT TIME ZONE 'UTC')::date as month, + round(sum((a.metric_value * b.metric_value) / 3600 ), 2) as cpu_hours, + $tool_id as tool_id, + COUNT (DISTINCT job.user_id) as nb_users + FROM + job_metric_numeric a, + job_metric_numeric b, + job + WHERE + b.job_id = a.job_id + AND a.job_id = job.id + AND a.metric_name = 'runtime_seconds' + AND b.metric_name = 'galaxy_slots' + AND $filter_by_time_period $filter_tool + GROUP BY + month, tool_id + ORDER BY + month DESC + EOF +}