From 253cfa637c67ca31655bac3c716c83042627e6d8 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sat, 20 Jan 2024 10:16:36 +0100 Subject: [PATCH 1/4] Add mutate queries to fix missing usernames Either by setting the username to the lowercase first portion of the email address (which excludes any conflicts), or by setting the username to a uuid (you can use this for confictls). --- parts/21-mutate.sh | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/parts/21-mutate.sh b/parts/21-mutate.sh index 310e5d0..2296cd9 100644 --- a/parts/21-mutate.sh +++ b/parts/21-mutate.sh @@ -1606,6 +1606,64 @@ mutate_purge-old-job-metrics() { ##? [--commit]: Purge job metrics older than 1 QUERY="$txn_pre $QUERY; $txn_pos" } +mutate_derive_missing_username_from_email() { ##? [--commit]: Set empty username to email address for users created before 2011 + meta <<-EOF + ADDED: 22 + EOF + handle_help "$@" <<-EOF + Galaxy did not require setting a username for users registered prior to 2011. + This will set the username to the lowercased substring of the email addres before the first @. + The username for a user with the email address "Jane.DoE@example.com" + will be set to "jane.doe" if the the user did not have a username and no other user + has been registered with that username. + It is recommended that usernames that could not be changed due to conflicts are fixed + using mutate_set_missing_username_to_random_uuid() + EOF + + read -r -d '' QUERY <<-EOF + WITH extracted_emails AS ( + SELECT LOWER(SPLIT_PART(email, '@', 1)) AS extracted_email + FROM galaxy_user gu + WHERE username IS NULL + AND NOT EXISTS ( + SELECT 1 + FROM galaxy_user + WHERE LOWER(SPLIT_PART(email, '@', 1)) = LOWER(SPLIT_PART(gu.email, '@', 1)) + AND username IS NOT NULL + ) + ) + UPDATE galaxy_user gu + SET username = e.extracted_email + FROM extracted_emails e + WHERE gu.username IS NULL + AND LOWER(SPLIT_PART(gu.email, '@', 1)) = e.extracted_email + EOF + + txn_pre=$(txn_prefix "$arg_commit") + txn_pos=$(txn_postfix "$arg_commit") + QUERY="$txn_pre $QUERY; $txn_pos" +} + +mutate_set_missing_username_to_random_uuid() { ##? [--commit]: Set empty username to random uuid + meta <<-EOF + ADDED: 22 + EOF + handle_help "$@" <<-EOF + Galaxy did not require setting a username for users registered prior to 2011. + This will set the username column to a random uuid. + EOF + + read -r -d '' QUERY <<-EOF + UPDATE galaxy_user gu + SET username = gen_random_uuid() + WHERE gu.username IS NULL + EOF + + txn_pre=$(txn_prefix "$arg_commit") + txn_pos=$(txn_postfix "$arg_commit") + QUERY="$txn_pre $QUERY; $txn_pos" +} + mutate_scale-table-autovacuum() { ##? [--shift=16] [--commit]: Update autovacuum and autoanalyze scale for large tables. meta <<-EOF ADDED: 22 From 16a6c52bf4e7b9230116eb62f9a4255192159700 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 22 Jan 2024 11:11:20 +0100 Subject: [PATCH 2/4] standard query names --- parts/21-mutate.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/parts/21-mutate.sh b/parts/21-mutate.sh index 2296cd9..ec60c07 100644 --- a/parts/21-mutate.sh +++ b/parts/21-mutate.sh @@ -1606,9 +1606,10 @@ mutate_purge-old-job-metrics() { ##? [--commit]: Purge job metrics older than 1 QUERY="$txn_pre $QUERY; $txn_pos" } -mutate_derive_missing_username_from_email() { ##? [--commit]: Set empty username to email address for users created before 2011 +mutate_derive-missing-username-from-email() { ##? [--commit]: Set empty username to email address for users created before 2011 meta <<-EOF ADDED: 22 + AUTHORS: mvdbeek EOF handle_help "$@" <<-EOF Galaxy did not require setting a username for users registered prior to 2011. @@ -1617,7 +1618,7 @@ mutate_derive_missing_username_from_email() { ##? [--commit]: Set empty username will be set to "jane.doe" if the the user did not have a username and no other user has been registered with that username. It is recommended that usernames that could not be changed due to conflicts are fixed - using mutate_set_missing_username_to_random_uuid() + using mutate set-missing-username-to-random-uuid() EOF read -r -d '' QUERY <<-EOF @@ -1644,9 +1645,10 @@ mutate_derive_missing_username_from_email() { ##? [--commit]: Set empty username QUERY="$txn_pre $QUERY; $txn_pos" } -mutate_set_missing_username_to_random_uuid() { ##? [--commit]: Set empty username to random uuid +mutate_set-missing-username-to-random-uuid() { ##? [--commit]: Set empty username to random uuid meta <<-EOF ADDED: 22 + AUTHORS: mvdbeek EOF handle_help "$@" <<-EOF Galaxy did not require setting a username for users registered prior to 2011. From f147f70bce4c1d3c153fd4af51fb39b6cfed038f Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 22 Jan 2024 11:16:49 +0100 Subject: [PATCH 3/4] add test case --- test.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test.sh b/test.sh index 5d7e7b2..8cd4c12 100755 --- a/test.sh +++ b/test.sh @@ -82,3 +82,11 @@ GXADMIN=./.tmpgxadmin fi [ "$result" -eq 0 ] } + +@test "Ensure query names are standardised and match [type]_q-u-e-r-y" { + result=$(grep -P '^[a-z]+_[a-z-]*_[a-z-_]*\(\)' ${GXADMIN} -c) + if (( result > 20 )); then + grep -P '^[a-z]+_[a-z-]*_[a-z-_]*\(\)' parts/2* + fi + [ "$result" -eq 20 ] +} From 3bd161b13abdaa1d7a237e1e85323323d8344c22 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 22 Jan 2024 11:21:15 +0100 Subject: [PATCH 4/4] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35687b9..7997460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - filter histogram: replaces bit.ly's data_hacks with a built-in AWK program to calculate a histogram. May not be entirely portable @hexylena. - mutate scale-table-autovacuum: Dynamically update autovacuum and autoanalyze scale for large tables. @natefoo - query tools-usage-per-month from @lldelisle + - mutate derive-missing-username-from-email and mutate set-missing-username-to-random-uuid from @mvdbeek - Updated: - query monthly-cpu-stats to add --nb_users --filter_email by @lldelisle - Fixed: