From 8f8146f81604ddcd401677a5b867e67e6a5fb4e5 Mon Sep 17 00:00:00 2001 From: pauldg Date: Tue, 21 May 2024 12:16:35 +0200 Subject: [PATCH 1/8] add older-than param --- parts/22-query.sh | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/parts/22-query.sh b/parts/22-query.sh index 43781b8..8b77b5a 100644 --- a/parts/22-query.sh +++ b/parts/22-query.sh @@ -818,12 +818,12 @@ query_recent-jobs() { ##? : Jobs run in the past (in any state) EOF } -query_job-state-stats() { ## : Shows all jobs states for the last 30 days in a table counted by state +query_job-state-stats() { ## [--older-than=]: Shows all jobs states within a time interval in a table counted by state meta <<-EOF ADDED: 19 EOF handle_help "$@" <<-EOFhelp - Shows all job states for the last 30 days in a table counted by state + Shows all job states within a time interval in a table counted by state Example: $ gxadmin query job-state-stats @@ -838,6 +838,24 @@ query_job-state-stats() { ## : Shows all jobs states for the last 30 days in a t EOFhelp + fields="new=1;running=2;queued=3;upload=4;ok=5;error=6;paused=7;stopped=8;deleted=9" + tags="date=0" + + interval= + time_column='create_time' + + if (( $# > 0 )); then + for args in "$@"; do + if [[ "${args:0:13}" = '--older-than=' ]]; then + interval="${args:13}" + fi + done + fi + + if [[ -n "$interval" ]]; then + interval="AND job.$time_column < NOW() - interval '$interval'" + fi + read -r -d '' QUERY <<-EOF SELECT date_trunc ('day', job.create_time)::date as date, @@ -855,7 +873,7 @@ EOFhelp job_state_history WHERE job_state_history.job_id = job.id - and job.create_time >= now() - INTERVAL '30 DAYS' + $interval GROUP BY date ORDER BY From 69b70387484e8190572a080468fc3df39c7382c3 Mon Sep 17 00:00:00 2001 From: pauldg Date: Tue, 21 May 2024 14:56:35 +0200 Subject: [PATCH 2/8] small change for job-state-stats --- CHANGELOG.md | 1 + parts/22-query.sh | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0158d1..ecb5454 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - query monthly-users-registered to add YYYY-MM parameter @afgane - query monthly-jobs to add YYYY-MM and --by_state parameters @afgane - query total-jobs to add date and --total parameters @afgane + - query job-state-stats: added a --older-than param from @pauldg - Fixed: - Replaced hardcoded metric_name with the variable in query_tool-metrics function @sanjaysrikakulam - improved man pages a tad diff --git a/parts/22-query.sh b/parts/22-query.sh index 8b77b5a..968af5a 100644 --- a/parts/22-query.sh +++ b/parts/22-query.sh @@ -821,6 +821,7 @@ query_recent-jobs() { ##? : Jobs run in the past (in any state) query_job-state-stats() { ## [--older-than=]: Shows all jobs states within a time interval in a table counted by state meta <<-EOF ADDED: 19 + UPDATED: 22 EOF handle_help "$@" <<-EOFhelp Shows all job states within a time interval in a table counted by state @@ -842,7 +843,6 @@ EOFhelp tags="date=0" interval= - time_column='create_time' if (( $# > 0 )); then for args in "$@"; do @@ -853,7 +853,7 @@ EOFhelp fi if [[ -n "$interval" ]]; then - interval="AND job.$time_column < NOW() - interval '$interval'" + interval="AND job.create_time > (timezone('UTC', now()) - '$interval'::INTERVAL)" fi read -r -d '' QUERY <<-EOF From 5077e7ab34cdb9b9dbf0a63ccca086a58431282f Mon Sep 17 00:00:00 2001 From: pauldg Date: Tue, 21 May 2024 14:58:35 +0200 Subject: [PATCH 3/8] another --- parts/22-query.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parts/22-query.sh b/parts/22-query.sh index 968af5a..f33bffe 100644 --- a/parts/22-query.sh +++ b/parts/22-query.sh @@ -818,7 +818,7 @@ query_recent-jobs() { ##? : Jobs run in the past (in any state) EOF } -query_job-state-stats() { ## [--older-than=]: Shows all jobs states within a time interval in a table counted by state +query_job-state-stats() { ##? [--older-than=]: Shows all jobs states within a time interval in a table counted by state meta <<-EOF ADDED: 19 UPDATED: 22 From a9c0fb03d011b1146f599eb9c51616b9c2601eac Mon Sep 17 00:00:00 2001 From: pauldg Date: Tue, 21 May 2024 15:02:44 +0200 Subject: [PATCH 4/8] readme update --- docs/README.query.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/README.query.md b/docs/README.query.md index 91151d6..5edb3ce 100644 --- a/docs/README.query.md +++ b/docs/README.query.md @@ -9,7 +9,7 @@ Command | Description [`query data-origin-distribution-summary`](#query-data-origin-distribution-summary) | breakdown of data sources (uploaded vs derived) [`query datasets-created-daily`](#query-datasets-created-daily) | The min/max/average/p95/p99 of total size of datasets created in a single day. [`query dataset-usage-and-imports`](#query-dataset-usage-and-imports) | Fetch limited information about which users and histories are using a specific dataset from disk. -[`query destination-queue-run-time The average/median/95%/99% tool spends in queue/run state grouped by tool_id and destination_id.`](#query-destination-queue-run-time-The-average/median/95%/99%-tool-spends-in-queue/run-state-grouped-by-tool_id-and-destination_id.) | query destination-queue-run-time The average/median/95%/99% tool spends in queue/run state grouped by tool_id and destination_id. +[`query destination-queue-run-time`](#query-destination-queue-run-time) | The average/median/95%/99% tool spends in queue/run state grouped by tool and destination. [`query disk-usage`](#query-disk-usage) | Disk usage per object store. [`query disk-usage-library`](#query-disk-usage-library) | Retrieve an approximation of the disk usage for a data library [`query dump-users`](#query-dump-users) | Dump the list of users and their emails @@ -38,7 +38,7 @@ Command | Description [`query jobs-queued-internal-by-handler`](#query-jobs-queued-internal-by-handler) | How many queued jobs do not have external IDs, by handler [`query jobs-ready-to-run`](#query-jobs-ready-to-run) | Find jobs ready to run (Mostly a performance test) [`query job-state`](#query-job-state) | Get current job state given a job ID -[`query job-state-stats`](#query-job-state-stats) | Shows all jobs states for the last 30 days in a table counted by state +[`query job-state-stats`](#query-job-state-stats) | Shows all jobs states within a time interval in a table counted by state [`query jobs`](#query-jobs) | List jobs ordered by most recently updated. = is required. [`query large-old-histories`](#query-large-old-histories) | Find large, old histories that probably should be deleted. [`query largest-collection`](#query-largest-collection) | Returns the size of the single largest collection @@ -273,18 +273,21 @@ This has built in support for "cleaning up" paths like /data/galaxy/.../dataset_ (1 row) -## query destination-queue-run-time The average/median/95%/99% tool spends in queue/run state grouped by tool_id and destination_id. +## query destination-queue-run-time -([*source*](https://github.com/galaxyproject/gxadmin/search?q=query_destination-queue-run-time_The_average/median/95%/99%_tool_spends_in_queue/run_state_grouped_by_tool_id_and_destination_id.&type=Code)) -query destination-queue-run-time - gxadmin query destination-queue-run-time The average/median/95%/99% tool spends in queue/run state grouped by tool_id and destination_id. +([*source*](https://github.com/galaxyproject/gxadmin/search?q=query_destination-queue-run-time&type=Code)) +query destination-queue-run-time - The average/median/95%/99% tool spends in queue/run state grouped by tool and destination. **SYNOPSIS** - gxadmin query destination-queue-run-time The average/median/95%/99% tool spends in queue/run state grouped by tool_id and destination_id. + gxadmin query destination-queue-run-time [--older-than=30] [--seconds] **NOTES** - $ gxadmin query destination-queue-run-time +Lists queue and run time statistics grouped by use tool and destination within a time window (# of days). +Requires a given number of days + + $ gxadmin query destination-queue-run-time --older-than='90' destination_id | tool_id | count | avg | min | median_queue | perc_95_queue | perc_99_queue | max | avg | min | median_run | perc_95_run | perc_99_run | max ----------------+-----------------+-------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-------------- @@ -874,15 +877,15 @@ query job-state - Get current job state given a job ID ## query job-state-stats ([*source*](https://github.com/galaxyproject/gxadmin/search?q=query_job-state-stats&type=Code)) -query job-state-stats - Shows all jobs states for the last 30 days in a table counted by state +query job-state-stats - Shows all jobs states within a time interval in a table counted by state **SYNOPSIS** - gxadmin query job-state-stats + gxadmin query job-state-stats [--older-than=] **NOTES** -Shows all job states for the last 30 days in a table counted by state +Shows all job states within a time interval in a table counted by state Example: $ gxadmin query job-state-stats From 0a9e743a37cffabf1f40adb2002138e7d2e51a97 Mon Sep 17 00:00:00 2001 From: pauldg Date: Tue, 21 May 2024 17:03:36 +0200 Subject: [PATCH 5/8] set default to 30 days --- parts/22-query.sh | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/parts/22-query.sh b/parts/22-query.sh index f33bffe..a7e697a 100644 --- a/parts/22-query.sh +++ b/parts/22-query.sh @@ -818,13 +818,13 @@ query_recent-jobs() { ##? : Jobs run in the past (in any state) EOF } -query_job-state-stats() { ##? [--older-than=]: Shows all jobs states within a time interval in a table counted by state +query_job-state-stats() { ##? [--older-than='30 days']: Shows all jobs states within a time interval in a table counted by state meta <<-EOF ADDED: 19 UPDATED: 22 EOF handle_help "$@" <<-EOFhelp - Shows all job states within a time interval in a table counted by state + Shows all job states within a time interval (default: 30 days) in a table counted by state Example: $ gxadmin query job-state-stats @@ -842,19 +842,12 @@ EOFhelp fields="new=1;running=2;queued=3;upload=4;ok=5;error=6;paused=7;stopped=8;deleted=9" tags="date=0" - interval= - - if (( $# > 0 )); then - for args in "$@"; do - if [[ "${args:0:13}" = '--older-than=' ]]; then - interval="${args:13}" - fi - done + interval="AND job.create_time > (timezone('UTC', now()) - '30 days'::INTERVAL)" + if [[ -n "$arg_older_than" ]]; then + interval="AND job.create_time > (timezone('UTC', now()) - '$arg_older_than'::INTERVAL)" + echo '$interval' fi - if [[ -n "$interval" ]]; then - interval="AND job.create_time > (timezone('UTC', now()) - '$interval'::INTERVAL)" - fi read -r -d '' QUERY <<-EOF SELECT From 167916094b55d61b4211846cd66b1dbbbe7e3c50 Mon Sep 17 00:00:00 2001 From: pauldg Date: Tue, 21 May 2024 17:14:03 +0200 Subject: [PATCH 6/8] changed ##? back to what it was --- parts/22-query.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parts/22-query.sh b/parts/22-query.sh index a7e697a..b8cdc85 100644 --- a/parts/22-query.sh +++ b/parts/22-query.sh @@ -818,7 +818,7 @@ query_recent-jobs() { ##? : Jobs run in the past (in any state) EOF } -query_job-state-stats() { ##? [--older-than='30 days']: Shows all jobs states within a time interval in a table counted by state +query_job-state-stats() { ##? [--older-than=]: Shows all jobs states within a time interval in a table counted by state meta <<-EOF ADDED: 19 UPDATED: 22 @@ -845,7 +845,6 @@ EOFhelp interval="AND job.create_time > (timezone('UTC', now()) - '30 days'::INTERVAL)" if [[ -n "$arg_older_than" ]]; then interval="AND job.create_time > (timezone('UTC', now()) - '$arg_older_than'::INTERVAL)" - echo '$interval' fi From 0cc8db3990ff0288a810f1210cfb352ae7bb2994 Mon Sep 17 00:00:00 2001 From: pauldg Date: Tue, 21 May 2024 17:15:07 +0200 Subject: [PATCH 7/8] readme update --- docs/README.query.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/README.query.md b/docs/README.query.md index 5edb3ce..2bbc054 100644 --- a/docs/README.query.md +++ b/docs/README.query.md @@ -885,7 +885,7 @@ query job-state-stats - Shows all jobs states within a time interval in a table **NOTES** -Shows all job states within a time interval in a table counted by state +Shows all job states within a time interval (default: 30 days) in a table counted by state Example: $ gxadmin query job-state-stats From 148c29c50f5df3f623f73d0faccd2f8428d979f4 Mon Sep 17 00:00:00 2001 From: pauldg Date: Tue, 21 May 2024 17:21:43 +0200 Subject: [PATCH 8/8] added example with --older-than param --- docs/README.query.md | 14 ++++++++++++-- parts/22-query.sh | 12 +++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docs/README.query.md b/docs/README.query.md index 2bbc054..ff97f1e 100644 --- a/docs/README.query.md +++ b/docs/README.query.md @@ -38,7 +38,7 @@ Command | Description [`query jobs-queued-internal-by-handler`](#query-jobs-queued-internal-by-handler) | How many queued jobs do not have external IDs, by handler [`query jobs-ready-to-run`](#query-jobs-ready-to-run) | Find jobs ready to run (Mostly a performance test) [`query job-state`](#query-job-state) | Get current job state given a job ID -[`query job-state-stats`](#query-job-state-stats) | Shows all jobs states within a time interval in a table counted by state +[`query job-state-stats`](#query-job-state-stats) | Shows all jobs states within a time interval (default: 30 days) in a table counted by state [`query jobs`](#query-jobs) | List jobs ordered by most recently updated. = is required. [`query large-old-histories`](#query-large-old-histories) | Find large, old histories that probably should be deleted. [`query largest-collection`](#query-largest-collection) | Returns the size of the single largest collection @@ -877,7 +877,7 @@ query job-state - Get current job state given a job ID ## query job-state-stats ([*source*](https://github.com/galaxyproject/gxadmin/search?q=query_job-state-stats&type=Code)) -query job-state-stats - Shows all jobs states within a time interval in a table counted by state +query job-state-stats - 30 days) in a table counted by state **SYNOPSIS** @@ -898,6 +898,16 @@ $ gxadmin query job-state-stats ... -26 days +The '--older-than=' option takes a value in the PostgreSQL date/time interval +format, see documentation: https://www.postgresql.org/docs/current/functions-datetime.html +Be sure to quote intervals containing spaces: + +$ gxadmin query job-state-stats --older-than='2 days' + date | new | running | queued | upload | ok | error | paused | stopped | deleted +------------+-------+---------+--------+--------+-------+-------+--------+---------+--------- +2022-04-26 | 921 | 564 | 799 | 0 | 581 | 21 | 1 | 0 | 2 +2022-04-25 | 1412 | 1230 | 1642 | 0 | 1132 | 122 | 14 | 0 | 15 + ## query jobs diff --git a/parts/22-query.sh b/parts/22-query.sh index b8cdc85..e00a284 100644 --- a/parts/22-query.sh +++ b/parts/22-query.sh @@ -818,7 +818,7 @@ query_recent-jobs() { ##? : Jobs run in the past (in any state) EOF } -query_job-state-stats() { ##? [--older-than=]: Shows all jobs states within a time interval in a table counted by state +query_job-state-stats() { ##? [--older-than=]: Shows all jobs states within a time interval (default: 30 days) in a table counted by state meta <<-EOF ADDED: 19 UPDATED: 22 @@ -837,6 +837,16 @@ query_job-state-stats() { ##? [--older-than=]: Shows all jobs states w ... -26 days + The '--older-than=' option takes a value in the PostgreSQL date/time interval + format, see documentation: https://www.postgresql.org/docs/current/functions-datetime.html + Be sure to quote intervals containing spaces: + + $ gxadmin query job-state-stats --older-than='2 days' + date | new | running | queued | upload | ok | error | paused | stopped | deleted + ------------+-------+---------+--------+--------+-------+-------+--------+---------+--------- + 2022-04-26 | 921 | 564 | 799 | 0 | 581 | 21 | 1 | 0 | 2 + 2022-04-25 | 1412 | 1230 | 1642 | 0 | 1132 | 122 | 14 | 0 | 15 + EOFhelp fields="new=1;running=2;queued=3;upload=4;ok=5;error=6;paused=7;stopped=8;deleted=9"