CDCgov · dthoward96 · Nov 21, 2024 · Nov 14, 2024 · Nov 15, 2024
diff --git a/README.Rmd b/README.Rmd
@@ -26,7 +26,7 @@ github_pages_url <- description$GITHUB_PAGES
 
 <p style="font-size: 16px;"><em>Public Database Submission Pipeline</em></p>
 
-**Beta Version**: v1.2.3. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome! 
+**Beta Version**: v1.2.4. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome! 
 
 **General Disclaimer**: This repository was created for use by CDC programs to collaborate on public health related projects in support of the [CDC mission](https://www.cdc.gov/about/organization/mission.htm).  GitHub is not hosted by the CDC, but is a third party website used by CDC and its partners to share information and collaborate on software. CDC use of GitHub does not imply an endorsement of any one particular service, product, or enterprise.
 

diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@
 
 </p>
 
-**Beta Version**: 1.2.3. This pipeline is currently in Beta testing, and
+**Beta Version**: 1.2.4. This pipeline is currently in Beta testing, and
 issues could appear during submission. Please use it at your own risk.
 Feedback and suggestions are welcome\!
 

diff --git a/docs/app.json b/docs/app.json
diff --git a/seqsender.py b/seqsender.py
@@ -64,7 +64,7 @@ def prep(database: List[str], organism: str, submission_dir: str, submission_nam
 			file_handler.validate_file(file_type=file_type, file_path=updated_path)
 			file_dict[file_type] = updated_path # type: ignore
 	# load config file
-	config_dict = tools.get_config(config_file=file_dict["config_file"], database=database)
+	config_dict = tools.get_config(config_file=file_dict["config_file"], databases=database)
 	# Warn user if submitting biosample & sra together with 'Link_Sample_Between_NCBI_Databases' set to False
 	if not config_dict["NCBI"]["Link_Sample_Between_NCBI_Databases"] and "SRA" in database and "BIOSAMPLE" in database:
 		print("Warning: You are submitting to BioSample and SRA together, and your config has the field 'Link_Sample_Between_NCBI_Databases', turned off. Your BioSample and SRA submission will still be linked together as this is required for submitting to SRA.")

diff --git a/shiny/app.py b/shiny/app.py
@@ -20,7 +20,7 @@
 header = (
     ui.card_header(
         ui.HTML(
-            """<p><strong>Beta Version</strong>: 1.2.3. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!</p>"""
+            """<p><strong>Beta Version</strong>: 1.2.4. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!</p>"""
         )
     ),
 )

diff --git a/tools.py b/tools.py
@@ -19,13 +19,14 @@
 from settings import SCHEMA_EXCLUSIONS, BIOSAMPLE_REGEX, SRA_REGEX, GISAID_REGEX, GENBANK_REGEX, GENBANK_REGEX_CMT, GENBANK_REGEX_SRC
 
 # Check the config file
-def get_config(config_file: str, database: List[str]) -> Dict[str, Any]:
+def get_config(config_file: str, databases: List[str]) -> Dict[str, Any]:
 	# Determine required database
-	submission_portals = []
-	if "BIOSAMPLE" in database or "SRA" in database or "GENBANK" in database:
-		submission_portals.append("ncbi")
-	if "GISAID" in database:
-		submission_portals.append("gisaid")
+	submission_portals = set()
+	for database in databases:
+		if "BIOSAMPLE" in database or "SRA" in database or "GENBANK" in database:
+			submission_portals.add("ncbi")
+		if "GISAID" in database:
+			submission_portals.add("gisaid")
 	# Check if list empty
 	if not submission_portals:
 		print("Error: Submission portals list cannot be empty.", file=sys.stderr)
@@ -36,15 +37,15 @@ def get_config(config_file: str, database: List[str]) -> Dict[str, Any]:
 	# Check if yaml forms dictionary
 	if type(config_dict) is dict:
 		schema = eval(open(os.path.join(PROG_DIR, "config", "seqsender", "config_file", (submission_schema + "_schema.py")), 'r').read())
-		database_specific_config_schema_updates(schema, database)
+		database_specific_config_schema_updates(schema, databases)
 		validator = Validator(schema)
 		# Validate based on schema
 		if validator.validate(config_dict, schema) is False:
 			print("Error: Config file is not properly setup. Please correct config file based on issue below:", file=sys.stderr)
 			print(json.dumps(validator.errors, indent = 4), file=sys.stderr)
 			sys.exit(1)
 		else:
-			if "GENBANK" in database and "GISAID" in database:
+			if "GENBANK" in databases and "GISAID" in databases:
 				validate_submission_position(config_dict=config_dict)
 			config_dict = parse_hold_date(config_dict=config_dict)
 			return config_dict["Submission"]

diff --git a/upload_log.py b/upload_log.py
@@ -298,7 +298,7 @@ def update_grouped_submission(group_df: pd.DataFrame, submission_log_dir: str):
 	submission_organism = group_df.at[0, "Organism"]
 	submission_dir = group_df.at[0, "Submission_Directory"]
 	databases = group_df["Database"].tolist()
-	config_dict = tools.get_config(config_file=group_df.at[0, "Config_File"], database=databases)
+	config_dict = tools.get_config(config_file=group_df.at[0, "Config_File"], databases=databases)
 	if "BIOSAMPLE" in databases:
 		biosample_status = group_df.loc[group_df["Database"] == "BIOSAMPLE", "Submission_Status"].iloc[0]
 		submission_dir = group_df.loc[group_df["Database"] == "BIOSAMPLE", "Submission_Directory"].iloc[0]