diff --git a/vaporetto/Cargo.toml b/vaporetto/Cargo.toml index fcbaffc4..82bb33df 100644 --- a/vaporetto/Cargo.toml +++ b/vaporetto/Cargo.toml @@ -13,7 +13,6 @@ categories = ["text-processing"] [dependencies] daachorse = "0.4.0" # MIT or Apache-2.0 -byteorder = "1.4" # Unlicense or MIT liblinear = { version = "1", optional = true } # MIT diff --git a/vaporetto/src/dict_model.rs b/vaporetto/src/dict_model.rs index 54a90d4b..f1a678c0 100644 --- a/vaporetto/src/dict_model.rs +++ b/vaporetto/src/dict_model.rs @@ -1,9 +1,8 @@ use std::io::{Read, Write}; use std::mem; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; - use crate::errors::Result; +use crate::utils; #[derive(Clone, Copy, Default)] pub struct DictWeight { @@ -13,24 +12,24 @@ pub struct DictWeight { } impl DictWeight { - pub fn serialize(&self, mut buf: W) -> Result + pub fn serialize(&self, mut wtr: W) -> Result where W: Write, { - buf.write_i32::(self.right)?; - buf.write_i32::(self.inside)?; - buf.write_i32::(self.left)?; + utils::write_i32(&mut wtr, self.right)?; + utils::write_i32(&mut wtr, self.inside)?; + utils::write_i32(&mut wtr, self.left)?; Ok(mem::size_of::() * 3) } - pub fn deserialize(mut buf: R) -> Result + pub fn deserialize(mut rdr: R) -> Result where R: Read, { Ok(Self { - right: buf.read_i32::()?, - inside: buf.read_i32::()?, - left: buf.read_i32::()?, + right: utils::read_i32(&mut rdr)?, + inside: utils::read_i32(&mut rdr)?, + left: utils::read_i32(&mut rdr)?, }) } } @@ -44,33 +43,33 @@ pub struct WordWeightRecord { } impl WordWeightRecord { - pub fn serialize(&self, mut buf: W) -> Result + pub fn serialize(&self, mut wtr: W) -> Result where W: Write, { let word_size = self.word.len(); let comment_size = self.comment.len(); - buf.write_u32::(word_size.try_into().unwrap())?; - buf.write_u32::(comment_size.try_into().unwrap())?; - buf.write_all(self.word.as_bytes())?; - buf.write_all(self.comment.as_bytes())?; - let weights_size = self.weights.serialize(&mut buf)?; + utils::write_u32(&mut wtr, u32::try_from(word_size).unwrap())?; + utils::write_u32(&mut wtr, u32::try_from(comment_size).unwrap())?; + wtr.write_all(self.word.as_bytes())?; + wtr.write_all(self.comment.as_bytes())?; + let weights_size = self.weights.serialize(&mut wtr)?; Ok(mem::size_of::() * 2 + word_size + weights_size + comment_size) } - pub fn deserialize(mut buf: R) -> Result + pub fn deserialize(mut rdr: R) -> Result where R: Read, { - let word_size = buf.read_u32::()?; - let comment_size = buf.read_u32::()?; + let word_size = utils::read_u32(&mut rdr)?; + let comment_size = utils::read_u32(&mut rdr)?; let mut word_bytes = vec![0; word_size.try_into().unwrap()]; - buf.read_exact(&mut word_bytes)?; + rdr.read_exact(&mut word_bytes)?; let mut comment_bytes = vec![0; comment_size.try_into().unwrap()]; - buf.read_exact(&mut comment_bytes)?; + rdr.read_exact(&mut comment_bytes)?; Ok(Self { word: String::from_utf8(word_bytes)?, - weights: DictWeight::deserialize(&mut buf)?, + weights: DictWeight::deserialize(&mut rdr)?, comment: String::from_utf8(comment_bytes)?, }) } @@ -141,27 +140,27 @@ impl DictModel { &self.dict } - pub fn serialize(&self, mut buf: W) -> Result + pub fn serialize(&self, mut wtr: W) -> Result where W: Write, { let dict_size = self.dict.len(); - buf.write_u32::(dict_size.try_into().unwrap())?; + utils::write_u32(&mut wtr, dict_size.try_into().unwrap())?; let mut total_size = mem::size_of::(); for entry in &self.dict { - total_size += entry.serialize(&mut buf)?; + total_size += entry.serialize(&mut wtr)?; } Ok(total_size) } - pub fn deserialize(mut buf: R) -> Result + pub fn deserialize(mut rdr: R) -> Result where R: Read, { - let dict_size = buf.read_u32::()?; + let dict_size = utils::read_u32(&mut rdr)?; let mut dict = Vec::with_capacity(dict_size.try_into().unwrap()); for _ in 0..dict_size { - dict.push(WordWeightRecord::deserialize(&mut buf)?); + dict.push(WordWeightRecord::deserialize(&mut rdr)?); } Ok(Self { dict }) } diff --git a/vaporetto/src/kytea_model.rs b/vaporetto/src/kytea_model.rs index 2f774c48..67ee42d7 100644 --- a/vaporetto/src/kytea_model.rs +++ b/vaporetto/src/kytea_model.rs @@ -1,13 +1,12 @@ use std::convert::TryFrom; use std::io::BufRead; -use byteorder::{LittleEndian, ReadBytesExt}; - use crate::dict_model::{DictModel, DictWeight, WordWeightRecord}; use crate::errors::{Result, VaporettoError}; use crate::model::Model; use crate::ngram_model::{NgramData, NgramModel}; use crate::tag_model::TagModel; +use crate::utils; struct KyteaConfig { _model_tag: String, @@ -26,20 +25,23 @@ struct KyteaConfig { } impl KyteaConfig { - fn read(rdr: &mut R) -> Result { + fn read(mut rdr: R) -> Result + where + R: BufRead, + { let mut model_tag = String::new(); rdr.read_line(&mut model_tag)?; - let do_ws = rdr.read_u8()? != 0; - let do_tags = rdr.read_u8()? != 0; - let n_tags = rdr.read_u32::()?; - let char_w = rdr.read_u8()?; - let char_n = rdr.read_u8()?; - let type_w = rdr.read_u8()?; - let type_n = rdr.read_u8()?; - let dict_n = rdr.read_u8()?; - let bias = rdr.read_u8()? != 0; - let epsilon = rdr.read_f64::()?; - let solver_type = rdr.read_u8()?; + let do_ws = utils::read_u8(&mut rdr)? != 0; + let do_tags = utils::read_u8(&mut rdr)? != 0; + let n_tags = utils::read_u32(&mut rdr)?; + let char_w = utils::read_u8(&mut rdr)?; + let char_n = utils::read_u8(&mut rdr)?; + let type_w = utils::read_u8(&mut rdr)?; + let type_n = utils::read_u8(&mut rdr)?; + let dict_n = utils::read_u8(&mut rdr)?; + let bias = utils::read_u8(&mut rdr)? != 0; + let epsilon = utils::read_f64(&mut rdr)?; + let solver_type = utils::read_u8(&mut rdr)?; let mut char_map = vec![]; rdr.read_until(0, &mut char_map)?; let char_map: Vec = String::from_utf8(char_map)?.chars().collect(); @@ -62,24 +64,35 @@ impl KyteaConfig { } trait Readable: Sized { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result; + fn read(config: &KyteaConfig, rdr: R) -> Result + where + R: BufRead; } impl Readable for i16 { - fn read(_config: &KyteaConfig, rdr: &mut R) -> Result { - Ok(rdr.read_i16::()?) + fn read(_config: &KyteaConfig, mut rdr: R) -> Result + where + R: BufRead, + { + Ok(utils::read_i16(&mut rdr)?) } } impl Readable for f64 { - fn read(_config: &KyteaConfig, rdr: &mut R) -> Result { - Ok(rdr.read_f64::()?) + fn read(_config: &KyteaConfig, mut rdr: R) -> Result + where + R: BufRead, + { + Ok(utils::read_f64(&mut rdr)?) } } impl Readable for char { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result { - let cidx = rdr.read_u16::()? as usize; + fn read(config: &KyteaConfig, mut rdr: R) -> Result + where + R: BufRead, + { + let cidx = utils::read_u16(&mut rdr)? as usize; Ok(config.char_map[cidx - 1]) } } @@ -88,22 +101,28 @@ impl Readable for Vec where T: Readable, { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result { - let size = rdr.read_u32::()?; + fn read(config: &KyteaConfig, mut rdr: R) -> Result + where + R: BufRead, + { + let size = utils::read_u32(&mut rdr)?; let mut result = Self::with_capacity(size as usize); for _ in 0..size { - result.push(T::read(config, rdr)?); + result.push(T::read(config, &mut rdr)?); } Ok(result) } } impl Readable for String { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result { - let size = rdr.read_u32::()?; + fn read(config: &KyteaConfig, mut rdr: R) -> Result + where + R: BufRead, + { + let size = utils::read_u32(&mut rdr)?; let mut result = Self::new(); for _ in 0..size { - let cidx = rdr.read_u16::()? as usize; + let cidx = utils::read_u16(&mut rdr)? as usize; result.push(config.char_map[cidx - 1]); } Ok(result) @@ -152,29 +171,32 @@ impl Dictionary where T: Readable, { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result> { - let n_dicts = rdr.read_u8()?; - let n_states = rdr.read_u32::()? as usize; + fn read(config: &KyteaConfig, mut rdr: R) -> Result> + where + R: BufRead, + { + let n_dicts = utils::read_u8(&mut rdr)?; + let n_states = utils::read_u32(&mut rdr)? as usize; if n_states == 0 { return Ok(None); } let mut states = Vec::with_capacity(n_states); for _ in 0..n_states { - let failure = rdr.read_u32::()?; - let n_gotos = rdr.read_u32::()?; + let failure = utils::read_u32(&mut rdr)?; + let n_gotos = utils::read_u32(&mut rdr)?; let mut gotos = vec![]; for _ in 0..n_gotos { - let k = char::read(config, rdr)?; - let v = rdr.read_u32::()?; + let k = char::read(config, &mut rdr)?; + let v = utils::read_u32(&mut rdr)?; gotos.push((k, v)); } gotos.sort_unstable(); - let n_outputs = rdr.read_u32::()? as usize; + let n_outputs = utils::read_u32(&mut rdr)? as usize; let mut outputs = Vec::with_capacity(n_outputs); for _ in 0..n_outputs { - outputs.push(rdr.read_u32::()?); + outputs.push(utils::read_u32(&mut rdr)?); } - let is_branch = rdr.read_u8()? != 0; + let is_branch = utils::read_u8(&mut rdr)? != 0; states.push(State { _failure: failure, gotos, @@ -182,10 +204,10 @@ where is_branch, }); } - let n_entries = rdr.read_u32::()? as usize; + let n_entries = utils::read_u32(&mut rdr)? as usize; let mut entries = Vec::with_capacity(n_entries); for _ in 0..n_entries { - entries.push(T::read(config, rdr)?); + entries.push(T::read(config, &mut rdr)?); } Ok(Some(Self { n_dicts, @@ -212,18 +234,21 @@ impl FeatureLookup where T: Readable, { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result> { - let active = rdr.read_u8()?; + fn read(config: &KyteaConfig, mut rdr: R) -> Result> + where + R: BufRead, + { + let active = utils::read_u8(&mut rdr)?; if active == 0 { return Ok(None); } - let char_dict = Dictionary::read(config, rdr)?; - let type_dict = Dictionary::read(config, rdr)?; - let self_dict = Dictionary::read(config, rdr)?; - let dict_vec = Vec::::read(config, rdr)?; - let biases = Vec::::read(config, rdr)?; - let tag_dict_vec = Vec::::read(config, rdr)?; - let tag_unk_vec = Vec::::read(config, rdr)?; + let char_dict = Dictionary::read(config, &mut rdr)?; + let type_dict = Dictionary::read(config, &mut rdr)?; + let self_dict = Dictionary::read(config, &mut rdr)?; + let dict_vec = Vec::::read(config, &mut rdr)?; + let biases = Vec::::read(config, &mut rdr)?; + let tag_dict_vec = Vec::::read(config, &mut rdr)?; + let tag_unk_vec = Vec::::read(config, &mut rdr)?; Ok(Some(Self { char_dict, type_dict, @@ -246,20 +271,23 @@ struct LinearModel { } impl Readable for Option { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result { - let n_classes = rdr.read_u32::()?; + fn read(config: &KyteaConfig, mut rdr: R) -> Result + where + R: BufRead, + { + let n_classes = utils::read_u32(&mut rdr)?; if n_classes == 0 { return Ok(None); } let add_features = false; - let solver_type = rdr.read_u8()?; + let solver_type = utils::read_u8(&mut rdr)?; let mut labels = vec![]; for _ in 0..n_classes { - labels.push(rdr.read_i32::()?); + labels.push(utils::read_i32(&mut rdr)?); } - let bias = rdr.read_u8()? != 0; - let multiplier = rdr.read_f64::()?; - let feature_lookup = FeatureLookup::read(config, rdr)?; + let bias = utils::read_u8(&mut rdr)? != 0; + let multiplier = utils::read_f64(&mut rdr)?; + let feature_lookup = FeatureLookup::read(config, &mut rdr)?; Ok(Some(LinearModel { _add_features: add_features, _solver_type: solver_type, @@ -280,25 +308,28 @@ struct ModelTagEntry { } impl Readable for ModelTagEntry { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result { - let word = String::read(config, rdr)?; + fn read(config: &KyteaConfig, mut rdr: R) -> Result + where + R: BufRead, + { + let word = String::read(config, &mut rdr)?; let mut tags = Vec::with_capacity(config.n_tags as usize); let mut tags_in_dicts = Vec::with_capacity(config.n_tags as usize); for _ in 0..config.n_tags { - let size = rdr.read_u32::()? as usize; + let size = utils::read_u32(&mut rdr)? as usize; let mut t = Vec::with_capacity(size); let mut td = Vec::with_capacity(size); for _ in 0..size { - t.push(String::read(config, rdr)?); - td.push(rdr.read_u8()?); + t.push(String::read(config, &mut rdr)?); + td.push(utils::read_u8(&mut rdr)?); } tags.push(t); tags_in_dicts.push(td); } - let in_dict = rdr.read_u8()?; + let in_dict = utils::read_u8(&mut rdr)?; let mut tag_models = Vec::with_capacity(config.n_tags as usize); for _ in 0..config.n_tags { - tag_models.push(Option::::read(config, rdr)?); + tag_models.push(Option::::read(config, &mut rdr)?); } Ok(Self { _word: word, @@ -317,17 +348,20 @@ struct ProbTagEntry { } impl Readable for ProbTagEntry { - fn read(config: &KyteaConfig, rdr: &mut R) -> Result { - let word = String::read(config, rdr)?; + fn read(config: &KyteaConfig, mut rdr: R) -> Result + where + R: BufRead, + { + let word = String::read(config, &mut rdr)?; let mut tags = Vec::with_capacity(config.n_tags as usize); let mut probs = Vec::with_capacity(config.n_tags as usize); for _ in 0..config.n_tags { - let size = rdr.read_u32::()? as usize; + let size = utils::read_u32(&mut rdr)? as usize; let mut t = Vec::with_capacity(size); let mut p = Vec::with_capacity(size); for _ in 0..size { - t.push(String::read(config, rdr)?); - p.push(rdr.read_f64::()?); + t.push(String::read(config, &mut rdr)?); + p.push(utils::read_f64(&mut rdr)?); } tags.push(t); probs.push(p); @@ -365,21 +399,24 @@ impl KyteaModel { /// # Errors /// /// When `rdr` generates an error, it will be returned as is. - pub fn read(rdr: &mut R) -> Result { - let config = KyteaConfig::read(rdr)?; + pub fn read(mut rdr: R) -> Result + where + R: BufRead, + { + let config = KyteaConfig::read(&mut rdr)?; - let wordseg_model = Option::::read(&config, rdr)?; + let wordseg_model = Option::::read(&config, &mut rdr)?; let mut global_tags = Vec::with_capacity(config.n_tags as usize); let mut global_models = Vec::with_capacity(config.n_tags as usize); for _ in 0..config.n_tags { - global_tags.push(Vec::::read(&config, rdr)?); - global_models.push(Option::::read(&config, rdr)?); + global_tags.push(Vec::::read(&config, &mut rdr)?); + global_models.push(Option::::read(&config, &mut rdr)?); } - let dict = Dictionary::::read(&config, rdr)?; - let subword_dict = Dictionary::::read(&config, rdr)?; + let dict = Dictionary::::read(&config, &mut rdr)?; + let subword_dict = Dictionary::::read(&config, &mut rdr)?; Ok(Self { config, diff --git a/vaporetto/src/model.rs b/vaporetto/src/model.rs index e7d37d8a..f31505cf 100644 --- a/vaporetto/src/model.rs +++ b/vaporetto/src/model.rs @@ -1,11 +1,10 @@ use std::io::{Read, Write}; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; - use crate::dict_model::{DictModel, WordWeightRecord}; use crate::errors::Result; use crate::ngram_model::NgramModel; use crate::tag_model::TagModel; +use crate::utils; /// Model data. pub struct Model { @@ -35,9 +34,9 @@ impl Model { self.char_ngram_model.serialize(&mut wtr)?; self.type_ngram_model.serialize(&mut wtr)?; self.dict_model.serialize(&mut wtr)?; - wtr.write_i32::(self.bias)?; - wtr.write_u32::(self.char_window_size.try_into().unwrap())?; - wtr.write_u32::(self.type_window_size.try_into().unwrap())?; + utils::write_i32(&mut wtr, self.bias)?; + utils::write_u32(&mut wtr, self.char_window_size.try_into().unwrap())?; + utils::write_u32(&mut wtr, self.type_window_size.try_into().unwrap())?; self.tag_model.serialize(&mut wtr)?; Ok(()) } @@ -63,9 +62,9 @@ impl Model { char_ngram_model: NgramModel::::deserialize(&mut rdr)?, type_ngram_model: NgramModel::>::deserialize(&mut rdr)?, dict_model: DictModel::deserialize(&mut rdr)?, - bias: rdr.read_i32::()?, - char_window_size: rdr.read_u32::()?.try_into().unwrap(), - type_window_size: rdr.read_u32::()?.try_into().unwrap(), + bias: utils::read_i32(&mut rdr)?, + char_window_size: utils::read_u32(&mut rdr)?.try_into().unwrap(), + type_window_size: utils::read_u32(&mut rdr)?.try_into().unwrap(), tag_model: TagModel::deserialize(&mut rdr)?, }) } diff --git a/vaporetto/src/ngram_model.rs b/vaporetto/src/ngram_model.rs index 1427e575..d79b287d 100644 --- a/vaporetto/src/ngram_model.rs +++ b/vaporetto/src/ngram_model.rs @@ -1,9 +1,8 @@ use std::io::{Read, Write}; use std::mem; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; - use crate::errors::Result; +use crate::utils; #[derive(Clone)] pub struct NgramData @@ -18,53 +17,53 @@ impl NgramData where T: AsRef<[u8]> + Clone, { - pub fn serialize(&self, mut buf: W) -> Result + pub fn serialize(&self, mut wtr: W) -> Result where W: Write, { let ngram = self.ngram.as_ref(); let ngram_size = ngram.len(); let weights_size = self.weights.len(); - buf.write_u32::(ngram_size.try_into().unwrap())?; - buf.write_u32::(weights_size.try_into().unwrap())?; - buf.write_all(ngram)?; + utils::write_u32(&mut wtr, ngram_size.try_into().unwrap())?; + utils::write_u32(&mut wtr, weights_size.try_into().unwrap())?; + wtr.write_all(ngram)?; for &w in &self.weights { - buf.write_i32::(w)?; + utils::write_i32(&mut wtr, w)?; } Ok(mem::size_of::() * 2 + ngram_size + mem::size_of::() * weights_size) } } impl NgramData { - pub fn deserialize(mut buf: R) -> Result + pub fn deserialize(mut rdr: R) -> Result where R: Read, { - let ngram_size = buf.read_u32::()?; - let weights_size = buf.read_u32::()?; + let ngram_size = utils::read_u32(&mut rdr)?; + let weights_size = utils::read_u32(&mut rdr)?; let mut ngram_bytes = vec![0; ngram_size.try_into().unwrap()]; - buf.read_exact(&mut ngram_bytes)?; + rdr.read_exact(&mut ngram_bytes)?; let ngram = String::from_utf8(ngram_bytes)?; let mut weights = vec![]; for _ in 0..weights_size { - weights.push(buf.read_i32::()?); + weights.push(utils::read_i32(&mut rdr)?); } Ok(Self { ngram, weights }) } } impl NgramData> { - pub fn deserialize(mut buf: R) -> Result + pub fn deserialize(mut rdr: R) -> Result where R: Read, { - let ngram_size = buf.read_u32::()?; - let weights_size = buf.read_u32::()?; + let ngram_size = utils::read_u32(&mut rdr)?; + let weights_size = utils::read_u32(&mut rdr)?; let mut ngram = vec![0; ngram_size.try_into().unwrap()]; - buf.read_exact(&mut ngram)?; + rdr.read_exact(&mut ngram)?; let mut weights = Vec::with_capacity(weights_size.try_into().unwrap()); for _ in 0..weights_size { - weights.push(buf.read_i32::()?); + weights.push(utils::read_i32(&mut rdr)?); } Ok(Self { ngram, weights }) } @@ -87,43 +86,43 @@ where Self { data } } - pub fn serialize(&self, mut buf: W) -> Result + pub fn serialize(&self, mut wtr: W) -> Result where W: Write, { let data_size = self.data.len(); - buf.write_u32::(data_size.try_into().unwrap())?; + utils::write_u32(&mut wtr, data_size.try_into().unwrap())?; let mut total_size = mem::size_of::(); for d in &self.data { - total_size += d.serialize(&mut buf)?; + total_size += d.serialize(&mut wtr)?; } Ok(total_size + mem::size_of::()) } } impl NgramModel { - pub fn deserialize(mut buf: R) -> Result + pub fn deserialize(mut rdr: R) -> Result where R: Read, { - let data_size = buf.read_u32::()?; + let data_size = utils::read_u32(&mut rdr)?; let mut data = Vec::with_capacity(data_size.try_into().unwrap()); for _ in 0..data_size { - data.push(NgramData::::deserialize(&mut buf)?); + data.push(NgramData::::deserialize(&mut rdr)?); } Ok(Self { data }) } } impl NgramModel> { - pub fn deserialize(mut buf: R) -> Result + pub fn deserialize(mut rdr: R) -> Result where R: Read, { - let data_size = buf.read_u32::()?; + let data_size = utils::read_u32(&mut rdr)?; let mut data = Vec::with_capacity(data_size.try_into().unwrap()); for _ in 0..data_size { - data.push(NgramData::>::deserialize(&mut buf)?); + data.push(NgramData::>::deserialize(&mut rdr)?); } Ok(Self { data }) } diff --git a/vaporetto/src/tag_model.rs b/vaporetto/src/tag_model.rs index e4cfd697..205ccfb7 100644 --- a/vaporetto/src/tag_model.rs +++ b/vaporetto/src/tag_model.rs @@ -1,9 +1,8 @@ use std::io::{Read, Write}; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; - use crate::errors::Result; use crate::ngram_model::NgramModel; +use crate::utils; pub struct TagClassInfo { pub(crate) name: String, @@ -15,9 +14,9 @@ impl TagClassInfo { where W: Write, { - wtr.write_u32::(self.name.len().try_into().unwrap())?; + utils::write_u32(&mut wtr, self.name.len().try_into().unwrap())?; wtr.write_all(self.name.as_bytes())?; - wtr.write_i32::(self.bias)?; + utils::write_i32(&mut wtr, self.bias)?; Ok(()) } @@ -25,13 +24,13 @@ impl TagClassInfo { where R: Read, { - let name_size = rdr.read_u32::()?; + let name_size = utils::read_u32(&mut rdr)?; let mut name_bytes = vec![0; name_size.try_into().unwrap()]; rdr.read_exact(&mut name_bytes)?; let name = String::from_utf8(name_bytes)?; Ok(Self { name, - bias: rdr.read_i32::()?, + bias: utils::read_i32(&mut rdr)?, }) } } @@ -57,7 +56,7 @@ impl TagModel { where W: Write, { - wtr.write_u32::(self.class_info.len().try_into().unwrap())?; + utils::write_u32(&mut wtr, self.class_info.len().try_into().unwrap())?; for cls in &self.class_info { cls.serialize(&mut wtr)?; } @@ -71,7 +70,7 @@ impl TagModel { where R: Read, { - let n_class = rdr.read_u32::()?; + let n_class = utils::read_u32(&mut rdr)?; let mut class_info = vec![]; for _ in 0..n_class { class_info.push(TagClassInfo::deserialize(&mut rdr)?); diff --git a/vaporetto/src/utils.rs b/vaporetto/src/utils.rs index e68a9889..8d926549 100644 --- a/vaporetto/src/utils.rs +++ b/vaporetto/src/utils.rs @@ -1,5 +1,6 @@ use std::cell::RefCell; use std::collections::BTreeMap; +use std::io::{self, Read, Write}; pub trait AddWeight { fn add_weight(&self, target: &mut [i32], offset: isize); @@ -94,3 +95,77 @@ where |x1| Some(rhs.as_ref().map_or_else(|| x1.clone(), |x2| f(x1, x2))), ) } + +#[cfg(feature = "kytea")] +pub fn read_u8(mut rdr: R) -> io::Result +where + R: Read, +{ + let mut buf = [0]; + rdr.read_exact(&mut buf)?; + Ok(buf[0]) +} + +#[cfg(feature = "kytea")] +pub fn read_u16(mut rdr: R) -> io::Result +where + R: Read, +{ + let mut buf = [0; 2]; + rdr.read_exact(&mut buf)?; + Ok(u16::from_le_bytes(buf)) +} + +#[cfg(feature = "kytea")] +pub fn read_i16(mut rdr: R) -> io::Result +where + R: Read, +{ + let mut buf = [0; 2]; + rdr.read_exact(&mut buf)?; + Ok(i16::from_le_bytes(buf)) +} + +pub fn read_u32(mut rdr: R) -> io::Result +where + R: Read, +{ + let mut buf = [0; 4]; + rdr.read_exact(&mut buf)?; + Ok(u32::from_le_bytes(buf)) +} + +pub fn write_u32(mut wtr: W, data: u32) -> io::Result<()> +where + W: Write, +{ + wtr.write_all(&data.to_le_bytes())?; + Ok(()) +} + +pub fn read_i32(mut rdr: R) -> io::Result +where + R: Read, +{ + let mut buf = [0; 4]; + rdr.read_exact(&mut buf)?; + Ok(i32::from_le_bytes(buf)) +} + +pub fn write_i32(mut wtr: W, data: i32) -> io::Result<()> +where + W: Write, +{ + wtr.write_all(&data.to_le_bytes())?; + Ok(()) +} + +#[cfg(feature = "kytea")] +pub fn read_f64(mut rdr: R) -> io::Result +where + R: Read, +{ + let mut buf = [0; 8]; + rdr.read_exact(&mut buf)?; + Ok(f64::from_le_bytes(buf)) +}