From e99eded107eb3825c5f4ed8d0d14e383e23a3130 Mon Sep 17 00:00:00 2001 From: Jessica Britton Date: Fri, 18 Aug 2023 12:01:28 -0700 Subject: [PATCH] AG-1196: Sort gene_info.biodomains to generate consistent list ordering --- src/agoradatatools/etl/transform/gene_info.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/agoradatatools/etl/transform/gene_info.py b/src/agoradatatools/etl/transform/gene_info.py index e7cc92d8..b20b6e6e 100644 --- a/src/agoradatatools/etl/transform/gene_info.py +++ b/src/agoradatatools/etl/transform/gene_info.py @@ -80,6 +80,9 @@ def transform_gene_info( .rename(columns={"biodomain": "biodomains"}) ) + # sort biodomains list alphabetically + biodomains['biodomains'] = biodomains['biodomains'].apply(sorted) + # For genes with either is_adi or is_tep set to True, create a resource URL that opens # the portal page to the specific gene. This must be done using the hgnc_symbol from the # tep_info file and not the symbol in gene_info, because there are some mismatches @@ -95,7 +98,6 @@ def transform_gene_info( ) # Merge all the datasets - gene_info = gene_metadata for dataset in [