From df3aca65fb9e657f6a1d5727b688cfefa0cb98a8 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Fri, 14 Feb 2025 11:35:20 -0800 Subject: [PATCH 1/2] ingest: Remove use of ncov-ingest geolocation rules Remove the use of the ncov-ingest geolocation rules since Augur now uses the built-in geolocation rules by default. Depends on the release of --- ingest/defaults/config.yaml | 3 --- ingest/rules/curate.smk | 27 ++------------------------- 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml index 953d0fe..1663086 100644 --- a/ingest/defaults/config.yaml +++ b/ingest/defaults/config.yaml @@ -45,10 +45,7 @@ curate: authors_default_value: '?' # Field name for the generated abbreviated authors abbr_authors_field: 'authors' - # General geolocation rules to apply to geolocation fields - geolocation_rules_url: 'https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv' # Local geolocation rules that are only applicable to mpox data - # Local rules can overwrite the general geolocation rules provided above local_geolocation_rules: 'defaults/geolocation-rules.tsv' # User annotations file annotations: 'defaults/annotations.tsv' diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk index 15342d1..88dfea5 100644 --- a/ingest/rules/curate.smk +++ b/ingest/rules/curate.smk @@ -13,29 +13,6 @@ Parameters are expected to be defined in `config.curate`. """ -rule fetch_general_geolocation_rules: - output: - general_geolocation_rules="data/general-geolocation-rules.tsv", - params: - geolocation_rules_url=config["curate"]["geolocation_rules_url"], - shell: - """ - curl {params.geolocation_rules_url} > {output.general_geolocation_rules} - """ - - -rule concat_geolocation_rules: - input: - general_geolocation_rules="data/general-geolocation-rules.tsv", - local_geolocation_rules=config["curate"]["local_geolocation_rules"], - output: - all_geolocation_rules="data/all-geolocation-rules.tsv", - shell: - """ - cat {input.general_geolocation_rules} {input.local_geolocation_rules} >> {output.all_geolocation_rules} - """ - - def format_field_map(field_map: dict[str, str]) -> str: """ Format dict to `"key1"="value1" "key2"="value2"...` for use in shell commands. @@ -46,7 +23,7 @@ def format_field_map(field_map: dict[str, str]) -> str: rule curate: input: sequences_ndjson="data/sequences.ndjson", - all_geolocation_rules="data/all-geolocation-rules.tsv", + geolocation_rules=config["curate"]["local_geolocation_rules"], annotations=config["curate"]["annotations"], output: metadata="data/all_metadata.tsv", @@ -92,7 +69,7 @@ rule curate: --default-value {params.authors_default_value} \ --abbr-authors-field {params.abbr_authors_field} \ | augur curate apply-geolocation-rules \ - --geolocation-rules {input.all_geolocation_rules} \ + --geolocation-rules {input.geolocation_rules} \ | augur curate apply-record-annotations \ --annotations {input.annotations} \ --id-field {params.annotations_id} \ From fd9dd06103c25d371945b037c0cf8afe0f7408c7 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Fri, 14 Feb 2025 12:36:08 -0800 Subject: [PATCH 2/2] color_ordering.tsv: Add "Puerto Rico" as country Following changes in geolocation rules in previous commit, Puerto Rico is now included at the country level. --- phylogenetic/defaults/color_ordering.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phylogenetic/defaults/color_ordering.tsv b/phylogenetic/defaults/color_ordering.tsv index 53b601c..f07a783 100644 --- a/phylogenetic/defaults/color_ordering.tsv +++ b/phylogenetic/defaults/color_ordering.tsv @@ -16857,6 +16857,7 @@ country Bahamas country North America country Bermuda country USA +country Puerto Rico country Canada ################ @@ -16943,4 +16944,3 @@ lineage A.2.3 lineage A.3 ################ -