Skip to content

Commit

Permalink
temp
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Jul 31, 2022
1 parent ad57bc2 commit e6c6716
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 16 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ license = "MIT"
[dependencies]
document-features = { version = "0.2", optional = true }
encoding_rs = { version = "0.8", optional = true }
encoding_rs_io = { version = "0.1", optional = true }
serde = { version = "1.0", optional = true }
memchr = "2.5"

Expand Down Expand Up @@ -47,7 +48,7 @@ default = []
## crate, that satisfied the restriction above.
##
## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding
encoding = ["encoding_rs"]
encoding = ["encoding_rs", "encoding_rs_io"]

## This feature enables support for deserializing lists where tags are overlapped
## with tags that do not correspond to the list.
Expand Down
53 changes: 50 additions & 3 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@ use std::io::{self, BufRead, BufReader};
use std::path::Path;

use memchr;
#[cfg(feature = "encoding")]
use encoding_rs::UTF_8;
#[cfg(feature = "encoding")]
use encoding_rs_io::{DecodeReaderBytes, DecodeReaderBytesBuilder};

use crate::errors::{Error, Result};
use crate::events::Event;
use crate::name::QName;
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource};
#[cfg(feature = "encoding")]
use crate::reader::EncodingRef;
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, TagState, XmlSource};

/// This is an implementation of [`Reader`] for reading from a [`BufRead`] as
/// underlying byte stream.
Expand Down Expand Up @@ -217,12 +223,53 @@ impl<R: BufRead> Reader<R> {
}
}

#[cfg(feature = "encoding")]
impl Reader<BufReader<DecodeReaderBytes<File, Vec<u8>>>> {
/// Creates an XML reader from a file path.
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = File::open(path).map_err(Error::Io)?;
let decoder = DecodeReaderBytesBuilder::new()
.encoding(Some(UTF_8))
.bom_override(true)
.build(file);

let reader = Self {
reader: BufReader::new(decoder),
opened_buffer: Vec::new(),
opened_starts: Vec::new(),
tag_state: TagState::Init,
expand_empty_elements: false,
trim_text_start: false,
trim_text_end: false,
trim_markup_names_in_closing_tags: true,
check_end_names: true,
buf_position: 0,
check_comments: false,
encoding: EncodingRef::Implicit(UTF_8),
};
Ok(reader)
}
}

#[cfg(not(feature = "encoding"))]
impl Reader<BufReader<File>> {
/// Creates an XML reader from a file path.
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = File::open(path).map_err(Error::Io)?;
let reader = BufReader::new(file);
Ok(Self::from_reader(reader))
let reader = Self {
reader: BufReader::new(file),
opened_buffer: Vec::new(),
opened_starts: Vec::new(),
tag_state: TagState::Init,
expand_empty_elements: false,
trim_text_start: false,
trim_text_end: false,
trim_markup_names_in_closing_tags: true,
check_end_names: true,
buf_position: 0,
check_comments: false,
};
Ok(reader)
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/reader/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! A module to handle `Reader`
use std::io::Read;
use std::str::from_utf8;

#[cfg(feature = "encoding")]
Expand Down Expand Up @@ -330,7 +331,7 @@ pub struct Reader<R> {
}

/// Builder methods
impl<R> Reader<R> {
impl<R: Read> Reader<R> {
/// Creates a `Reader` that reads from a given reader.
pub fn from_reader(reader: R) -> Self {
Self {
Expand Down
4 changes: 2 additions & 2 deletions src/reader/ns_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::io::{BufRead, BufReader, Read};
use std::ops::Deref;
use std::path::Path;

Expand All @@ -32,7 +32,7 @@ pub struct NsReader<R> {
}

/// Builder methods
impl<R> NsReader<R> {
impl<R: Read> NsReader<R> {
/// Creates a `NsReader` that reads from a reader.
#[inline]
pub fn from_reader(reader: R) -> Self {
Expand Down
25 changes: 16 additions & 9 deletions src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use encoding_rs::UTF_8;
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::name::QName;
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource};
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, TagState, XmlSource};

use memchr;

Expand All @@ -21,15 +21,22 @@ impl<'a> Reader<&'a [u8]> {
/// Creates an XML reader from a string slice.
pub fn from_str(s: &'a str) -> Self {
// Rust strings are guaranteed to be UTF-8, so lock the encoding
#[cfg(feature = "encoding")]
{
let mut reader = Self::from_reader(s.as_bytes());
reader.encoding = EncodingRef::Explicit(UTF_8);
reader
}
Self {
reader: s.as_bytes(),
opened_buffer: Vec::new(),
opened_starts: Vec::new(),
tag_state: TagState::Init,
expand_empty_elements: false,
trim_text_start: false,
trim_text_end: false,
trim_markup_names_in_closing_tags: true,
check_end_names: true,
buf_position: 0,
check_comments: false,

#[cfg(not(feature = "encoding"))]
Self::from_reader(s.as_bytes())
#[cfg(feature = "encoding")]
encoding: EncodingRef::Explicit(UTF_8),
}
}

/// Read an event that borrows from the input rather than a buffer.
Expand Down

0 comments on commit e6c6716

Please sign in to comment.