Skip to content

Commit

Permalink
Bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
olliecheng committed Dec 11, 2024
1 parent 6b3ba3f commit e01b2e7
Show file tree
Hide file tree
Showing 5 changed files with 12,065 additions and 12,347 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,21 @@ env:
CARGO_TERM_COLOR: always

jobs:
test:
name: Test Suite
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Test
uses: houseabsolute/actions-rust-cross@v0
with:
command: "test"
target: "x86_64-unknown-linux-gnu"

build-linux:
runs-on: ubuntu-latest
needs: test

steps:
- uses: actions/checkout@v3

Expand All @@ -23,6 +36,7 @@ jobs:
command: "build"
target: "x86_64-unknown-linux-gnu"
args: "--release"

- name: Upload Linux Binary
uses: actions/upload-artifact@v3
with:
Expand Down
22 changes: 18 additions & 4 deletions src/call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use crate::io::{iter_duplicates, ReadType, Record, UMIGroup};

use spoa::{AlignmentEngine, AlignmentType};

use itertools::Itertools;
use rayon::prelude::*;

use std::io::prelude::*;
Expand Down Expand Up @@ -48,15 +47,20 @@ pub fn consensus(
let duplicate_iterator = iter_duplicates(input, duplicates, duplicates_only)?;

const CHUNK_SIZE: usize = 500;

let mut chunk_buffer = Vec::with_capacity(CHUNK_SIZE);
let mut duplicate_buffer = Vec::new();

for elem in duplicate_iterator {
for (idx, elem) in duplicate_iterator.enumerate() {
if (idx > 0) && (idx % 100000 == 0) {
eprintln!("Called {} reads...", idx);
}

// ensure that there was no issue in reading
let group = elem?;

let single = group.records.len() == 1;
if single && !duplicates_only {
if (single && !duplicates_only) || group.ignore {
chunk_buffer.push(GroupType::Simplex(group));
} else {
chunk_buffer.push(GroupType::Duplex(duplicate_buffer.len()));
Expand All @@ -78,6 +82,7 @@ pub fn consensus(
GroupType::Simplex(group) => {
&call_record(group, output_originals)
}
// if this is a duplex read, then use the buffer
GroupType::Duplex(idx) => {
&duplicate_output[*idx]
}
Expand Down Expand Up @@ -110,6 +115,15 @@ fn call_record(group: &UMIGroup, output_originals: bool) -> Vec<u8> {
let length = group.records.len();
let mut output = Cursor::new(Vec::new());

// process ignored reads first
if group.ignore {
for record in group.records.iter() {
io::write_read(&mut output, record, &group, ReadType::Ignored, false).unwrap();
}
return output.into_inner();
}


// for singletons, the read is its own consensus
if length == 1 {
let record = &group.records[0];
Expand Down Expand Up @@ -141,7 +155,7 @@ fn call_record(group: &UMIGroup, output_originals: bool) -> Vec<u8> {
.expect("spoa module did not produce valid utf-8");

let id_string = format!(
"consensus_{} avg_input_quality={:.2}",
"umi_group_id={} avg_input_quality={:.2}",
group.index,
group.avg_qual
);
Expand Down
17 changes: 8 additions & 9 deletions src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::io::{BufReader, Read, Seek, SeekFrom, Write};
pub enum ReadType {
Consensus,
Original,
Ignored,
}

pub struct Record {
Expand Down Expand Up @@ -37,6 +38,8 @@ pub struct UMIGroup {
pub records: Vec<Record>,
/// The average PHRED quality of the UMI group
pub avg_qual: f64,
/// Whether we should NOT consensus call this UMI group, because of quality/other issues
pub ignore: bool,
}

/// Retrieves a FASTQ record from a file at a specified position.
Expand Down Expand Up @@ -130,17 +133,12 @@ pub fn iter_duplicates(
return None;
}

let mut rec = UMIGroup { id, index, records: Vec::new(), avg_qual: 0.0 };
let mut rec = UMIGroup { id, index, records: Vec::new(), avg_qual: 0.0, ignore: false };
let mut total_qual = 0u32;

for pos in positions.iter() {
if pos.length > 30000 {
eprintln!(
"Skipping at position {} due to length {}",
pos.pos,
pos.length
);
continue;
rec.ignore = true;
}

let read = if single {
Expand Down Expand Up @@ -220,8 +218,9 @@ pub fn write_read(
fastq: bool,
) -> std::io::Result<()> {
let read_type_label = match read_type {
ReadType::Consensus => { "CONSENSUS" }
ReadType::Original => { "ORIGINAL" }
ReadType::Consensus => { "CON" }
ReadType::Original => { "ORIG" }
ReadType::Ignored => { "IGN" }
};

if fastq {
Expand Down
Loading

0 comments on commit e01b2e7

Please sign in to comment.