Skip to content

Commit

Permalink
sam/io/reader/record: Disallow newlines to appear in fields
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Jan 19, 2024
1 parent 5388ad9 commit 92c5119
Showing 1 changed file with 55 additions and 21 deletions.
76 changes: 55 additions & 21 deletions noodles-sam/src/io/reader/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,68 +11,94 @@ where

let mut len = 0;

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.name_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.flags_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.reference_sequence_name_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.alignment_start_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.mapping_quality_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.cigar_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.mate_reference_sequence_name_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.mate_alignment_start_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.template_length_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
len += read_required_field(reader, &mut record.buf)?;
record.bounds.sequence_end = record.buf.len();

len += read_field(reader, &mut record.buf)?;
let (n, is_eol) = read_last_required_field(reader, &mut record.buf)?;
len += n;
record.bounds.quality_scores_end = record.buf.len();

len += read_line(reader, &mut record.buf)?;
if !is_eol {
len += read_line(reader, &mut record.buf)?;
}

Ok(len)
}

fn read_field<R>(reader: &mut R, dst: &mut Vec<u8>) -> io::Result<usize>
fn read_required_field<R>(reader: &mut R, dst: &mut Vec<u8>) -> io::Result<usize>
where
R: BufRead,
{
let (len, is_eol) = read_field(reader, dst)?;

if is_eol {
Err(io::Error::new(io::ErrorKind::InvalidData, "unexpected EOL"))
} else {
Ok(len)
}
}

fn read_last_required_field<R>(reader: &mut R, dst: &mut Vec<u8>) -> io::Result<(usize, bool)>
where
R: BufRead,
{
read_field(reader, dst)
}

fn read_field<R>(reader: &mut R, dst: &mut Vec<u8>) -> io::Result<(usize, bool)>
where
R: BufRead,
{
use memchr::memchr2;

const DELIMITER: u8 = b'\t';
const LINE_FEED: u8 = b'\n';

let mut is_delimiter = false;
let mut r#match = None;
let mut len = 0;

loop {
let src = reader.fill_buf()?;

if is_delimiter || src.is_empty() {
if r#match.is_some() || src.is_empty() {
break;
}

let n = match src.iter().position(|&b| b == DELIMITER) {
let n = match memchr2(DELIMITER, LINE_FEED, src) {
Some(i) => {
dst.extend_from_slice(&src[..i]);
is_delimiter = true;
dst.extend(&src[..i]);
r#match = Some(src[i]);
i + 1
}
None => {
dst.extend_from_slice(src);
dst.extend(src);
src.len()
}
};
Expand All @@ -82,7 +108,9 @@ where
reader.consume(n);
}

Ok(len)
let is_eol = matches!(r#match, Some(LINE_FEED));

Ok((len, is_eol))
}

#[cfg(test)]
Expand All @@ -91,7 +119,7 @@ mod tests {

#[test]
fn test_read_record() -> io::Result<()> {
let mut src = &b"*\t4\t*\t0\t255\t*\t*\t0\t0\t*\t*"[..];
let mut src = &b"*\t4\t*\t0\t255\t*\t*\t0\t0\t*\t*\n"[..];

let mut record = Record::default();
read_record(&mut src, &mut record)?;
Expand All @@ -110,6 +138,12 @@ mod tests {
assert_eq!(record.bounds.sequence_end, 12);
assert_eq!(record.bounds.quality_scores_end, 13);

let mut src = &b"\n"[..];
assert!(matches!(
read_record(&mut src, &mut record),
Err(e) if e.kind() == io::ErrorKind::InvalidData,
));

Ok(())
}
}

0 comments on commit 92c5119

Please sign in to comment.