diff --git a/sequence_processing_pipeline/Pipeline.py b/sequence_processing_pipeline/Pipeline.py index bbe07ff5..092e3b2b 100644 --- a/sequence_processing_pipeline/Pipeline.py +++ b/sequence_processing_pipeline/Pipeline.py @@ -271,8 +271,7 @@ def _configure_profile(self): profile_paths.append(some_path) # There must be at least one valid profile for the Pipeline to - # continue operation. There must also be a default profile, described - # below. + # continue operation. if not profile_paths: raise ValueError(f"'{profile_dir}' doesn't contain profile files") @@ -293,72 +292,31 @@ def _configure_profile(self): # the 'profile' attribute must have a dictionary as its value. # all profiles must contain 'instrument_type' and 'assay_type', - # unless instrument_type == 'default', in which case the - # profile defines the defaults across all instrument-types and - # assay-types. if 'instrument_type' not in contents['profile']: raise ValueError("'instrument_type' is not an attribute " f"in '{profile_path}'.profile") if 'assay_type' not in contents['profile']: - if contents['profile']['instrument_type'] != 'default': - raise ValueError("'assay_type' is not an attribute " - f"in '{profile_path}'.profile") + raise ValueError("'assay_type' is not an attribute " + f"in '{profile_path}'.profile") profiles.append(contents) - # The default profile provides 'fall-through' configuration settings - # for all items. This allows the user to not have to redefine settings - # for all items for all instrument and assay combinations. - - # the final profile is created by taking the default profile and using - # it as a base. If a profile matching the run-directory's instrument - # and assay types is found, settings from that profile will overwrite - # the base-profile settings as appropriate. - base_profile = None selected_profile = None - # iterate through all the profiles, searching for a default - # profile and the first profile w/matching instrument and assay types. - # if a matching profile isn't found, that's okay, but if a default - # profile isn't found, then raise an Error. - for profile in profiles: - p_i_type = profile['profile']['instrument_type'] - if p_i_type == 'default': - base_profile = profile - else: - p_a_type = profile['profile']['assay_type'] + i_type = profile['profile']['instrument_type'] + a_type = profile['profile']['assay_type'] - # if both items have been found, it's safe to break early. - if base_profile is not None and selected_profile is not None: - break + if i_type == instr_type and a_type == assay_type: + selected_profile = profile + break + + if selected_profile is None: + raise ValueError(f"a matching profile ({instr_type}, {assay_type}" + ") was not found. Please notify an administrator") - if p_i_type == instr_type and p_a_type == assay_type: - selected_profile = profile - - if base_profile is None: - raise ValueError("a 'default' profile was not found") - - if selected_profile: - # overwrite the configuration values in the base-profile with those - # in the matching profile as appropriate. - for attribute in selected_profile['profile']['configuration']: - value = selected_profile['profile']['configuration'][attribute] - base_profile['profile']['configuration'][attribute] = value - - # overwrite default info w/selected profile (if one was found) - # so that complete profile can be written to working directory - # as a log. - base_profile['profile']['instrument_type'] = instr_type - base_profile['profile']['assay_type'] = assay_type - - # load the default first to create a default entry for everything. - # then overwrite the defaults as they appear once you've identified - # the correct (instrument-type, assay-type) pair. - # set this to a new self.config_profile variable and modify the tests - # and code accordingly. - self.config_profile = base_profile + self.config_profile = selected_profile def _search_for_run_dir(self): # this method will catch a run directory as well as its products diff --git a/sequence_processing_pipeline/tests/data/configuration_profiles/default.json b/sequence_processing_pipeline/tests/data/configuration_profiles/miseq_amplicon.json similarity index 80% rename from sequence_processing_pipeline/tests/data/configuration_profiles/default.json rename to sequence_processing_pipeline/tests/data/configuration_profiles/miseq_amplicon.json index 30dda2f7..5b46f417 100644 --- a/sequence_processing_pipeline/tests/data/configuration_profiles/default.json +++ b/sequence_processing_pipeline/tests/data/configuration_profiles/miseq_amplicon.json @@ -1,17 +1,18 @@ { "profile": { - "instrument_type": "default", + "instrument_type": "MiSeq", + "assay_type": "TruSeq HT", "configuration": { "bcl2fastq": { - "nodes": 1, - "nprocs": 16, + "nodes": 2, + "nprocs": 62, "queue": "qiita", - "wallclock_time_in_minutes": 216, + "wallclock_time_in_minutes": 1022, "modules_to_load": [ - "bcl2fastq_2.20.0.422" + "bcl2fastq_2.20.0.222" ], "executable_path": "bcl2fastq", - "per_process_memory_limit": "10gb" + "per_process_memory_limit": "100gb" }, "bcl-convert": { "nodes": 1, @@ -46,10 +47,10 @@ "job_max_array_length": 1000 }, "nu-qc": { - "nodes": 1, - "cpus_per_task": 8, + "nodes": 2, + "cpus_per_task": 32, "queue": "qiita", - "wallclock_time_in_minutes": 240, + "wallclock_time_in_minutes": 2028, "minimap2_databases": "/scratch/databases/minimap2", "modules_to_load": [ "fastp_0.20.1", @@ -64,18 +65,18 @@ "known_adapters_path": "fastp_known_adapters_formatted.fna", "bucket_size": 8, "length_limit": 100, - "cores_per_task": 4 + "cores_per_task": 2 }, "seqpro": { "seqpro_path": "seqpro", "modules_to_load": [] }, "fastqc": { - "nodes": 1, - "nprocs": 16, + "nodes": 2, + "nprocs": 62, "queue": "qiita", - "nthreads": 16, - "wallclock_time_in_minutes": 60, + "nthreads": 62, + "wallclock_time_in_minutes": 220, "modules_to_load": [ "fastqc_0.11.5" ], @@ -83,8 +84,8 @@ "multiqc_executable_path": "multiqc", "multiqc_config_file_path": "sequence_processing_pipeline/multiqc-bclconvert-config.yaml", "job_total_memory_limit": "20gb", - "job_pool_size": 30, - "job_max_array_length": 1000 + "job_pool_size": 120, + "job_max_array_length": 2000 } } } diff --git a/sequence_processing_pipeline/tests/data/configuration_profiles/miseq_metagenomic.json b/sequence_processing_pipeline/tests/data/configuration_profiles/miseq_metagenomic.json index 76073508..d5bc7d97 100644 --- a/sequence_processing_pipeline/tests/data/configuration_profiles/miseq_metagenomic.json +++ b/sequence_processing_pipeline/tests/data/configuration_profiles/miseq_metagenomic.json @@ -14,6 +14,38 @@ "executable_path": "bcl2fastq", "per_process_memory_limit": "100gb" }, + "bcl-convert": { + "nodes": 1, + "nprocs": 16, + "queue": "qiita", + "wallclock_time_in_minutes": 216, + "modules_to_load": [ + "bclconvert_3.7.5" + ], + "executable_path": "bcl-convert", + "per_process_memory_limit": "10gb" + }, + "qc": { + "nodes": 1, + "nprocs": 16, + "queue": "qiita", + "wallclock_time_in_minutes": 60, + "minimap2_databases": [ + "/databases/minimap2/human-phix-db.mmi" + ], + "kraken2_database": "/databases/minimap2/hp_kraken-db.mmi", + "modules_to_load": [ + "fastp_0.20.1", + "samtools_1.12", + "minimap2_2.18" + ], + "fastp_executable_path": "fastp", + "minimap2_executable_path": "minimap2", + "samtools_executable_path": "samtools", + "job_total_memory_limit": "20gb", + "job_pool_size": 30, + "job_max_array_length": 1000 + }, "nu-qc": { "nodes": 2, "cpus_per_task": 32, diff --git a/sequence_processing_pipeline/tests/data/configuration_profiles/novaseq_amplicon.json b/sequence_processing_pipeline/tests/data/configuration_profiles/novaseq_amplicon.json index f88c9dc1..ad244870 100644 --- a/sequence_processing_pipeline/tests/data/configuration_profiles/novaseq_amplicon.json +++ b/sequence_processing_pipeline/tests/data/configuration_profiles/novaseq_amplicon.json @@ -14,6 +14,38 @@ "executable_path": "bcl2fastq", "per_process_memory_limit": "100gb" }, + "bcl-convert": { + "nodes": 1, + "nprocs": 16, + "queue": "qiita", + "wallclock_time_in_minutes": 216, + "modules_to_load": [ + "bclconvert_3.7.5" + ], + "executable_path": "bcl-convert", + "per_process_memory_limit": "10gb" + }, + "qc": { + "nodes": 1, + "nprocs": 16, + "queue": "qiita", + "wallclock_time_in_minutes": 60, + "minimap2_databases": [ + "/databases/minimap2/human-phix-db.mmi" + ], + "kraken2_database": "/databases/minimap2/hp_kraken-db.mmi", + "modules_to_load": [ + "fastp_0.20.1", + "samtools_1.12", + "minimap2_2.18" + ], + "fastp_executable_path": "fastp", + "minimap2_executable_path": "minimap2", + "samtools_executable_path": "samtools", + "job_total_memory_limit": "20gb", + "job_pool_size": 30, + "job_max_array_length": 1000 + }, "nu-qc": { "nodes": 4, "cpus_per_task": 32, diff --git a/sequence_processing_pipeline/tests/data/configuration_profiles/novaseq_metagenomic.json b/sequence_processing_pipeline/tests/data/configuration_profiles/novaseq_metagenomic.json index 136d228e..e0db6798 100644 --- a/sequence_processing_pipeline/tests/data/configuration_profiles/novaseq_metagenomic.json +++ b/sequence_processing_pipeline/tests/data/configuration_profiles/novaseq_metagenomic.json @@ -14,6 +14,38 @@ "executable_path": "bcl2fastq", "per_process_memory_limit": "100gb" }, + "bcl-convert": { + "nodes": 1, + "nprocs": 16, + "queue": "qiita", + "wallclock_time_in_minutes": 216, + "modules_to_load": [ + "bclconvert_3.7.5" + ], + "executable_path": "bcl-convert", + "per_process_memory_limit": "10gb" + }, + "qc": { + "nodes": 1, + "nprocs": 16, + "queue": "qiita", + "wallclock_time_in_minutes": 60, + "minimap2_databases": [ + "/databases/minimap2/human-phix-db.mmi" + ], + "kraken2_database": "/databases/minimap2/hp_kraken-db.mmi", + "modules_to_load": [ + "fastp_0.20.1", + "samtools_1.12", + "minimap2_2.18" + ], + "fastp_executable_path": "fastp", + "minimap2_executable_path": "minimap2", + "samtools_executable_path": "samtools", + "job_total_memory_limit": "20gb", + "job_pool_size": 30, + "job_max_array_length": 1000 + }, "nu-qc": { "nodes": 4, "cpus_per_task": 32, diff --git a/sequence_processing_pipeline/tests/test_Pipeline.py b/sequence_processing_pipeline/tests/test_Pipeline.py index 6f0d37ab..f8134200 100644 --- a/sequence_processing_pipeline/tests/test_Pipeline.py +++ b/sequence_processing_pipeline/tests/test_Pipeline.py @@ -4,7 +4,7 @@ from sequence_processing_pipeline.Pipeline import Pipeline, InstrumentUtils import unittest from os import makedirs, walk -from os.path import abspath, basename, join +from os.path import abspath, basename, join, exists from functools import partial import re from shutil import copy @@ -90,7 +90,9 @@ def delete_rtacomplete_file(self): def delete_more_files(self): for file_path in self.delete_these: - os.remove(file_path) + if exists(file_path): + # if file no longer exists, that's okay. + os.remove(file_path) def _make_mapping_file(self, output_file_path): cols = ('sample_name', 'barcode', 'library_construction_protocol', @@ -309,31 +311,11 @@ def test_creation(self): self.output_file_path, self.qiita_id, Pipeline.METAGENOMIC_PTYPE) - # test Error returned when 'assay_type' does not exist in default - # profile. Error should not be returned in this case as default - # shouldn't have an assay_type. - with open(bad_json_file, 'w') as f: - f.write('{ "profile": { "instrument_type": "default", ' - '"configuration": { "bcl2fastq": { "nodes": 1, "nprocs": ' - '16, "queue": "qiita", "wallclock_time_in_minutes": 216, ' - '"modules_to_load": [ "bcl2fastq_2.20.0.422" ], ' - '"executable_path": "bcl2fastq", ' - '"per_process_memory_limit": "10gb" } } } }') - - pipeline = Pipeline(self.good_config_file, self.good_run_id, - self.good_sample_sheet_path, None, - self.output_file_path, self.qiita_id, - Pipeline.METAGENOMIC_PTYPE) - - self.assertIsNotNone(pipeline) - # test Error returned when a non-default profile is missing assay_type + bad_json_file = self.path('configuration_profiles', 'bad.json') + self.delete_these.append(bad_json_file) - another_bad_json_file = self.path('configuration_profiles', - 'more_bad.json') - self.delete_these.append(another_bad_json_file) - - with open(another_bad_json_file, 'w') as f: + with open(bad_json_file, 'w') as f: f.write('{ "profile": { "instrument_type": "MiSeq", ' '"configuration": { "bcl2fastq": { "nodes": 1, "nprocs": ' '16, "queue": "qiita", "wallclock_time_in_minutes": 216, ' @@ -345,7 +327,7 @@ def test_creation(self): "attribute in 'sequence_" "processing_pipeline/tests/" "data/configuration_profiles/" - "more_bad.json'"): + "bad.json'"): Pipeline(self.good_config_file, self.good_run_id, self.good_sample_sheet_path, None,