From ccc3704b4b70042789c7998487994fc68d741a7f Mon Sep 17 00:00:00 2001 From: phoenixAja Date: Wed, 6 Dec 2023 09:39:24 -0800 Subject: [PATCH] change arg name --- workflows/index-generation/index-generation.wdl | 2 +- .../index-generation/ncbi-compress/src/commands.rs | 14 ++++++-------- .../index-generation/ncbi-compress/src/logging.rs | 4 +--- .../ncbi-compress/src/ncbi_compress.rs | 4 +--- 4 files changed, 9 insertions(+), 15 deletions(-) diff --git a/workflows/index-generation/index-generation.wdl b/workflows/index-generation/index-generation.wdl index 5ab18c2b2..f45ab769b 100644 --- a/workflows/index-generation/index-generation.wdl +++ b/workflows/index-generation/index-generation.wdl @@ -676,7 +676,7 @@ task SplitFastaBySeqLengthAndSort { total_seqs=$(grep ">" ~{fasta} | wc -l) ncbi-compress break-up-fasta-by-sequence-length \ --input-fasta ~{fasta} \ - --temp-file-output-dir outputs \ + --output-dir outputs \ --total-sequence-count ${total_seqs} \ --bin-size 25 \ diff --git a/workflows/index-generation/ncbi-compress/src/commands.rs b/workflows/index-generation/ncbi-compress/src/commands.rs index 67e3f38ce..3cfb44b1b 100644 --- a/workflows/index-generation/ncbi-compress/src/commands.rs +++ b/workflows/index-generation/ncbi-compress/src/commands.rs @@ -1,8 +1,6 @@ pub mod commands { use std::fs; - use std::fs::File; - use std::io::{self, BufRead}; - use std::path::Path; + //use std::io::{self, BufRead}; use tempdir::TempDir; use bio::io::fasta; @@ -78,7 +76,7 @@ pub mod commands { let mut seq_count = 0; let mut split_count = 1; let mut writer = fasta::Writer::to_file(format!("{}/{}_{}.fa", output_dir, taxid, split_count)).unwrap(); - for (i, record) in records_iter.enumerate() { + for (_i, record) in records_iter.enumerate() { let record = record.unwrap(); writer.write_record(&record).unwrap(); seq_count += 1; @@ -113,13 +111,13 @@ pub mod commands { let mut accession_count = 0; let mut unique_accession_count = 0; let mut writer = fasta::Writer::to_file(output_fasta_path).unwrap(); - for (i, entry) in fs::read_dir(input_taxid_dir).unwrap().enumerate() { + for (_i, entry) in fs::read_dir(input_taxid_dir).unwrap().enumerate() { let entry = entry.unwrap(); let path = entry.path(); let input_fasta_path = path.to_str().unwrap(); let taxid = path.file_name().unwrap().to_str().unwrap().split(".").collect::>()[0]; let reads_count = count_fasta_reads(input_fasta_path); - if reads_count >= 5 { //9500000 { // back of the envelope calculation for how many 50,000 character reads that we can store in 488GB of RAM + if reads_count >= 9500000 { // back of the envelope calculation for how many 50,000 character reads we can store in 488GB of RAM log::info!("Breaking apart taxid {} into smaller chunks", taxid); let input_taxid_dir = format!("{}/{}_split", input_taxid_dir, taxid); split_fasta_into_chunks(&input_fasta_path, &input_taxid_dir, &reads_count, &3, taxid).expect("error splitting fasta into chunks"); @@ -130,7 +128,7 @@ pub mod commands { Err(e) => println!("Error deleting input fasta : {:?}", e), } // recursively call fasta_compress_from_taxid_dir on the new directory containing the smaller chunks - for (i, entry) in fs::read_dir(input_taxid_dir).unwrap().enumerate() { + for (_i, entry) in fs::read_dir(input_taxid_dir).unwrap().enumerate() { let entry = entry.unwrap(); let path = entry.path(); let split_taxid_fasta_path = path.to_str().unwrap(); @@ -241,7 +239,7 @@ pub mod commands { let mut accession_count = 0; let mut unique_accession_count = 0; let mut writer = fasta::Writer::to_file(output_fasta_path).unwrap(); - for (i, entry) in fs::read_dir(taxid_dir).unwrap().enumerate() { + for (_i, entry) in fs::read_dir(taxid_dir).unwrap().enumerate() { let entry = entry.unwrap(); let path = entry.path(); let input_fasta_path = path.to_str().unwrap(); diff --git a/workflows/index-generation/ncbi-compress/src/logging.rs b/workflows/index-generation/ncbi-compress/src/logging.rs index f0044681d..856e4d325 100644 --- a/workflows/index-generation/ncbi-compress/src/logging.rs +++ b/workflows/index-generation/ncbi-compress/src/logging.rs @@ -48,9 +48,7 @@ pub mod logging { Ok(()) => { println!("File removed successfully."); } - Err(e) => { - println!("Error while removing file: {}", e); - } + Err(e) => {} // logging file doesn't exist so we don't need to remove it }; let file = OpenOptions::new() diff --git a/workflows/index-generation/ncbi-compress/src/ncbi_compress.rs b/workflows/index-generation/ncbi-compress/src/ncbi_compress.rs index 75fdb6e5a..8cec9874d 100644 --- a/workflows/index-generation/ncbi-compress/src/ncbi_compress.rs +++ b/workflows/index-generation/ncbi-compress/src/ncbi_compress.rs @@ -68,7 +68,7 @@ pub mod ncbi_compress { .from_path(mapping_file_path) .unwrap(); let mut added = 0; - reader.into_records().enumerate().for_each(|(i, result)| { + reader.into_records().enumerate().for_each(|(_i, result)| { // if i % 10_000 == 0 { // log::info!(" Processed {} mappings, added {}", i, added); // } @@ -246,8 +246,6 @@ pub mod ncbi_compress { } } for (hash, accession_id) in tmp { - // only insert accession_id into tree if logging is enabled - tree.insert(hash, accession_id).unwrap(); } chunk = records_iter