Skip to content

Commit

Permalink
vcf/header/parser: Parse line from byte string
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Oct 26, 2023
1 parent 45d91ca commit d846c76
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 19 deletions.
29 changes: 17 additions & 12 deletions noodles-vcf/src/header/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod builder;
mod file_format_option;
pub(crate) mod record;

use std::error;
use std::{error, str};

use indexmap::IndexMap;

Expand Down Expand Up @@ -49,20 +49,20 @@ impl Parser {
let mut parser = Self::default();

for line in s.lines() {
parser.parse_partial(line)?;
parser.parse_partial(line.as_bytes())?;
}

parser.finish()
}

/// Parses and adds a raw record to the header.
pub fn parse_partial(&mut self, s: &str) -> Result<(), ParseError> {
pub fn parse_partial(&mut self, src: &[u8]) -> Result<(), ParseError> {
if self.state == State::Done {
return Err(ParseError::ExpectedEof);
}

if self.state == State::Empty {
let file_format = match parse_file_format(s) {
let file_format = match parse_file_format(src) {
Ok(f) => match self.file_format_option {
FileFormatOption::Auto => f,
FileFormatOption::FileFormat(g) => g,
Expand All @@ -76,14 +76,14 @@ impl Parser {
return Ok(());
}

if s.starts_with("#CHROM") {
parse_header(s, &mut self.sample_names)?;
if src.starts_with(b"#CHROM") {
parse_header(src, &mut self.sample_names)?;
self.state = State::Done;
return Ok(());
}

let record = record::parse_record(s.as_bytes(), self.file_format)
.map_err(ParseError::InvalidRecord)?;
let record =
record::parse_record(src, self.file_format).map_err(ParseError::InvalidRecord)?;

match record {
Record::FileFormat(_) => return Err(ParseError::UnexpectedFileFormat),
Expand Down Expand Up @@ -134,6 +134,8 @@ impl Parser {
pub enum ParseError {
/// The input is empty.
Empty,
/// The input contains invalid UTF-8.
InvalidUtf8(str::Utf8Error),
/// The file format (`fileformat`) is missing.
MissingFileFormat,
/// The file format (`fileformat`) appears other than the first line.
Expand Down Expand Up @@ -162,6 +164,7 @@ pub enum ParseError {
impl error::Error for ParseError {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self {
Self::InvalidUtf8(e) => Some(e),
Self::InvalidFileFormat(e) => Some(e),
Self::InvalidRecord(e) => Some(e),
Self::InvalidRecordValue(e) => Some(e),
Expand All @@ -174,6 +177,7 @@ impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Empty => f.write_str("empty input"),
Self::InvalidUtf8(_) => f.write_str("invalid UTF-8"),
Self::MissingFileFormat => f.write_str("missing fileformat"),
Self::UnexpectedFileFormat => f.write_str("unexpected file format"),
Self::InvalidFileFormat(_) => f.write_str("invalid file format"),
Expand All @@ -196,9 +200,9 @@ impl std::fmt::Display for ParseError {
}
}

fn parse_file_format(s: &str) -> Result<FileFormat, ParseError> {
let record = record::parse_record(s.as_bytes(), FileFormat::default())
.map_err(ParseError::InvalidRecord)?;
fn parse_file_format(src: &[u8]) -> Result<FileFormat, ParseError> {
let record =
record::parse_record(src, FileFormat::default()).map_err(ParseError::InvalidRecord)?;

match record {
Record::FileFormat(file_format) => Ok(file_format),
Expand Down Expand Up @@ -227,12 +231,13 @@ fn insert_other_record(
Ok(())
}

fn parse_header(line: &str, sample_names: &mut SampleNames) -> Result<(), ParseError> {
fn parse_header(src: &[u8], sample_names: &mut SampleNames) -> Result<(), ParseError> {
static HEADERS: &[&str] = &[
"#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO",
];
static FORMAT_HEADER: &str = "FORMAT";

let line = str::from_utf8(src).map_err(ParseError::InvalidUtf8)?;
let mut fields = line.split(crate::record::FIELD_DELIMITER);

for &expected in HEADERS.iter() {
Expand Down
9 changes: 2 additions & 7 deletions noodles-vcf/src/reader/header.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
use std::{
io::{self, BufRead},
str,
};
use std::io::{self, BufRead};

use crate::{header, Header};

Expand All @@ -13,10 +10,8 @@ where
let mut buf = Vec::new();

while read_header_line(reader, &mut buf)? != 0 {
let s = str::from_utf8(&buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

parser
.parse_partial(s)
.parse_partial(&buf)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
}

Expand Down

0 comments on commit d846c76

Please sign in to comment.