Skip to content

Commit

Permalink
change arg name
Browse files Browse the repository at this point in the history
  • Loading branch information
phoenixAja committed Dec 6, 2023
1 parent d81b455 commit ccc3704
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 15 deletions.
2 changes: 1 addition & 1 deletion workflows/index-generation/index-generation.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ task SplitFastaBySeqLengthAndSort {
total_seqs=$(grep ">" ~{fasta} | wc -l)
ncbi-compress break-up-fasta-by-sequence-length \
--input-fasta ~{fasta} \
--temp-file-output-dir outputs \
--output-dir outputs \
--total-sequence-count ${total_seqs} \
--bin-size 25 \

Expand Down
14 changes: 6 additions & 8 deletions workflows/index-generation/ncbi-compress/src/commands.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
pub mod commands {
use std::fs;
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
//use std::io::{self, BufRead};

use tempdir::TempDir;
use bio::io::fasta;
Expand Down Expand Up @@ -78,7 +76,7 @@ pub mod commands {
let mut seq_count = 0;
let mut split_count = 1;
let mut writer = fasta::Writer::to_file(format!("{}/{}_{}.fa", output_dir, taxid, split_count)).unwrap();
for (i, record) in records_iter.enumerate() {
for (_i, record) in records_iter.enumerate() {
let record = record.unwrap();
writer.write_record(&record).unwrap();
seq_count += 1;
Expand Down Expand Up @@ -113,13 +111,13 @@ pub mod commands {
let mut accession_count = 0;
let mut unique_accession_count = 0;
let mut writer = fasta::Writer::to_file(output_fasta_path).unwrap();
for (i, entry) in fs::read_dir(input_taxid_dir).unwrap().enumerate() {
for (_i, entry) in fs::read_dir(input_taxid_dir).unwrap().enumerate() {
let entry = entry.unwrap();
let path = entry.path();
let input_fasta_path = path.to_str().unwrap();
let taxid = path.file_name().unwrap().to_str().unwrap().split(".").collect::<Vec<&str>>()[0];
let reads_count = count_fasta_reads(input_fasta_path);
if reads_count >= 5 { //9500000 { // back of the envelope calculation for how many 50,000 character reads that we can store in 488GB of RAM
if reads_count >= 9500000 { // back of the envelope calculation for how many 50,000 character reads we can store in 488GB of RAM
log::info!("Breaking apart taxid {} into smaller chunks", taxid);
let input_taxid_dir = format!("{}/{}_split", input_taxid_dir, taxid);
split_fasta_into_chunks(&input_fasta_path, &input_taxid_dir, &reads_count, &3, taxid).expect("error splitting fasta into chunks");
Expand All @@ -130,7 +128,7 @@ pub mod commands {
Err(e) => println!("Error deleting input fasta : {:?}", e),
}
// recursively call fasta_compress_from_taxid_dir on the new directory containing the smaller chunks
for (i, entry) in fs::read_dir(input_taxid_dir).unwrap().enumerate() {
for (_i, entry) in fs::read_dir(input_taxid_dir).unwrap().enumerate() {
let entry = entry.unwrap();
let path = entry.path();
let split_taxid_fasta_path = path.to_str().unwrap();
Expand Down Expand Up @@ -241,7 +239,7 @@ pub mod commands {
let mut accession_count = 0;
let mut unique_accession_count = 0;
let mut writer = fasta::Writer::to_file(output_fasta_path).unwrap();
for (i, entry) in fs::read_dir(taxid_dir).unwrap().enumerate() {
for (_i, entry) in fs::read_dir(taxid_dir).unwrap().enumerate() {
let entry = entry.unwrap();
let path = entry.path();
let input_fasta_path = path.to_str().unwrap();
Expand Down
4 changes: 1 addition & 3 deletions workflows/index-generation/ncbi-compress/src/logging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@ pub mod logging {
Ok(()) => {
println!("File removed successfully.");
}
Err(e) => {
println!("Error while removing file: {}", e);
}
Err(e) => {} // logging file doesn't exist so we don't need to remove it
};

let file = OpenOptions::new()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ pub mod ncbi_compress {
.from_path(mapping_file_path)
.unwrap();
let mut added = 0;
reader.into_records().enumerate().for_each(|(i, result)| {
reader.into_records().enumerate().for_each(|(_i, result)| {
// if i % 10_000 == 0 {
// log::info!(" Processed {} mappings, added {}", i, added);
// }
Expand Down Expand Up @@ -246,8 +246,6 @@ pub mod ncbi_compress {
}
}
for (hash, accession_id) in tmp {
// only insert accession_id into tree if logging is enabled

tree.insert(hash, accession_id).unwrap();
}
chunk = records_iter
Expand Down

0 comments on commit ccc3704

Please sign in to comment.