Skip to content

Commit

Permalink
Update bincode to 2.0.0-rc.3 (#91)
Browse files Browse the repository at this point in the history
* Update bincode to 2.0.0-rc.3

* fix

* Remove unnecessary type hint

* fix
  • Loading branch information
vbkaisetsu authored Mar 31, 2023
1 parent c7853f5 commit 6ee10f9
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 57 deletions.
2 changes: 1 addition & 1 deletion vaporetto/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ keywords = ["japanese", "analyzer", "tokenizer", "morphological"]
categories = ["text-processing", "no-std"]

[dependencies]
bincode = { version = "2.0.0-rc.2", default-features = false, features = ["alloc", "derive"] } # MIT
bincode = { version = "2.0.0-rc.3", default-features = false, features = ["alloc", "derive"] } # MIT
daachorse = "1.0.0" # MIT or Apache-2.0
hashbrown = "0.13.2" # MIT or Apache-2.0

Expand Down
27 changes: 8 additions & 19 deletions vaporetto/src/char_scorer/boundary_tag_scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,22 @@ use bincode::{
use daachorse::charwise::CharwiseDoubleArrayAhoCorasick;
#[cfg(not(feature = "charwise-pma"))]
use daachorse::DoubleArrayAhoCorasick;
use hashbrown::HashMap;

use crate::char_scorer::CharWeightMerger;
use crate::dict_model::DictModel;
use crate::errors::{Result, VaporettoError};
use crate::ngram_model::{NgramModel, TagNgramModel};
use crate::predictor::{PositionalWeight, PositionalWeightWithTag, WeightVector};
use crate::sentence::Sentence;
use crate::utils::SplitMix64Builder;
use crate::utils::{SerializableHashMap, SplitMix64Builder};

pub struct CharScorerBoundaryTag {
#[cfg(not(feature = "charwise-pma"))]
pma: DoubleArrayAhoCorasick<u32>,
#[cfg(feature = "charwise-pma")]
pma: CharwiseDoubleArrayAhoCorasick<u32>,
weights: Vec<Option<PositionalWeight<WeightVector>>>,
tag_weight: Vec<Vec<HashMap<u32, WeightVector, SplitMix64Builder>>>,
tag_weight: Vec<Vec<SerializableHashMap<u32, WeightVector, SplitMix64Builder>>>,
}

impl<'de> BorrowDecode<'de> for CharScorerBoundaryTag {
Expand All @@ -40,11 +39,7 @@ impl<'de> BorrowDecode<'de> for CharScorerBoundaryTag {
#[cfg(feature = "charwise-pma")]
let (pma, _) = unsafe { CharwiseDoubleArrayAhoCorasick::deserialize_unchecked(pma_data) };
let weights = Decode::decode(decoder)?;
let tag_weight: Vec<Vec<Vec<(u32, WeightVector)>>> = Decode::decode(decoder)?;
let tag_weight = tag_weight
.into_iter()
.map(|x| x.into_iter().map(|x| x.into_iter().collect()).collect())
.collect();
let tag_weight = Decode::decode(decoder)?;
Ok(Self {
pma,
weights,
Expand All @@ -58,12 +53,7 @@ impl Encode for CharScorerBoundaryTag {
let pma_data = self.pma.serialize();
Encode::encode(&pma_data, encoder)?;
Encode::encode(&self.weights, encoder)?;
let tag_weight: Vec<Vec<Vec<_>>> = self
.tag_weight
.iter()
.map(|x| x.iter().map(|x| x.iter().collect()).collect())
.collect();
Encode::encode(&tag_weight, encoder)?;
Encode::encode(&self.tag_weight, encoder)?;
Ok(())
}
}
Expand All @@ -90,11 +80,10 @@ impl CharScorerBoundaryTag {
let weight = PositionalWeightWithTag::with_boundary(-word_len, d.weights);
merger.add(d.word, weight);
}
let mut tag_weight =
vec![
vec![HashMap::with_hasher(SplitMix64Builder); usize::from(window_size) + 1];
tag_ngram_model.len()
];
let mut tag_weight = vec![
vec![SerializableHashMap::default(); usize::from(window_size) + 1];
tag_ngram_model.len()
];
for (i, tag_model) in tag_ngram_model.into_iter().enumerate() {
for d in tag_model.0 {
for w in d.weights {
Expand Down
4 changes: 2 additions & 2 deletions vaporetto/src/ngram_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub struct NgramData<T> {
}

#[derive(Default, Debug, Decode, Encode)]
pub struct NgramModel<T>(pub Vec<NgramData<T>>);
pub struct NgramModel<T: 'static>(pub Vec<NgramData<T>>);

#[derive(Clone, Debug, Decode, Encode)]
pub struct TagWeight {
Expand All @@ -24,4 +24,4 @@ pub struct TagNgramData<T> {
}

#[derive(Default, Debug, Decode, Encode)]
pub struct TagNgramModel<T>(pub Vec<TagNgramData<T>>);
pub struct TagNgramModel<T: 'static>(pub Vec<TagNgramData<T>>);
6 changes: 6 additions & 0 deletions vaporetto/src/predictor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ pub enum WeightVector {
Fixed(I32Simd),
}

impl Default for WeightVector {
fn default() -> Self {
Self::Variable(vec![])
}
}

impl Decode for WeightVector {
fn decode<D: Decoder>(decoder: &mut D) -> Result<Self, DecodeError> {
let weight: Vec<i32> = Decode::decode(decoder)?;
Expand Down
27 changes: 8 additions & 19 deletions vaporetto/src/type_scorer/boundary_tag_scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@ use bincode::{
BorrowDecode, Decode, Encode,
};
use daachorse::DoubleArrayAhoCorasick;
use hashbrown::HashMap;

use crate::errors::{Result, VaporettoError};
use crate::ngram_model::{NgramModel, TagNgramModel};
use crate::predictor::{PositionalWeight, PositionalWeightWithTag, WeightVector};
use crate::sentence::Sentence;
use crate::type_scorer::TypeWeightMerger;
use crate::utils::SplitMix64Builder;
use crate::utils::{SerializableHashMap, SplitMix64Builder};

pub struct TypeScorerBoundaryTag {
pma: DoubleArrayAhoCorasick<u32>,
weights: Vec<Option<PositionalWeight<WeightVector>>>,
tag_weight: Vec<Vec<HashMap<u32, WeightVector, SplitMix64Builder>>>,
tag_weight: Vec<Vec<SerializableHashMap<u32, WeightVector, SplitMix64Builder>>>,
}

impl<'de> BorrowDecode<'de> for TypeScorerBoundaryTag {
Expand All @@ -29,11 +28,7 @@ impl<'de> BorrowDecode<'de> for TypeScorerBoundaryTag {
let pma_data: &[u8] = BorrowDecode::borrow_decode(decoder)?;
let (pma, _) = unsafe { DoubleArrayAhoCorasick::deserialize_unchecked(pma_data) };
let weights = Decode::decode(decoder)?;
let tag_weight: Vec<Vec<Vec<(u32, WeightVector)>>> = Decode::decode(decoder)?;
let tag_weight = tag_weight
.into_iter()
.map(|x| x.into_iter().map(|x| x.into_iter().collect()).collect())
.collect();
let tag_weight = Decode::decode(decoder)?;
Ok(Self {
pma,
weights,
Expand All @@ -47,12 +42,7 @@ impl Encode for TypeScorerBoundaryTag {
let pma_data = self.pma.serialize();
Encode::encode(&pma_data, encoder)?;
Encode::encode(&self.weights, encoder)?;
let tag_weight: Vec<Vec<Vec<_>>> = self
.tag_weight
.iter()
.map(|x| x.iter().map(|x| x.iter().collect()).collect())
.collect();
Encode::encode(&tag_weight, encoder)?;
Encode::encode(&self.tag_weight, encoder)?;
Ok(())
}
}
Expand All @@ -68,11 +58,10 @@ impl TypeScorerBoundaryTag {
let weight = PositionalWeightWithTag::with_boundary(-i16::from(window_size), d.weights);
merger.add(d.ngram, weight);
}
let mut tag_weight =
vec![
vec![HashMap::with_hasher(SplitMix64Builder); usize::from(window_size) + 1];
tag_ngram_model.len()
];
let mut tag_weight = vec![
vec![SerializableHashMap::default(); usize::from(window_size) + 1];
tag_ngram_model.len()
];
for (i, tag_model) in tag_ngram_model.into_iter().enumerate() {
for d in tag_model.0 {
for w in d.weights {
Expand Down
43 changes: 27 additions & 16 deletions vaporetto/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use bincode::{
error::{DecodeError, EncodeError},
Decode, Encode,
};
use hashbrown::HashMap;
use hashbrown::{hash_map::DefaultHashBuilder, HashMap};

#[cfg(feature = "fix-weight-length")]
#[inline(always)]
Expand All @@ -35,42 +35,53 @@ impl Writer for VecWriter {
}
}

#[derive(Debug)]
pub struct SerializableHashMap<K, V>(pub HashMap<K, V>);
#[derive(Clone, Debug, Default)]
pub struct SerializableHashMap<K, V, S = DefaultHashBuilder>(pub HashMap<K, V, S>);

impl<K, V> Deref for SerializableHashMap<K, V> {
type Target = HashMap<K, V>;
impl<K, V, S> Deref for SerializableHashMap<K, V, S> {
type Target = HashMap<K, V, S>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<K, V> DerefMut for SerializableHashMap<K, V> {
impl<K, V, S> DerefMut for SerializableHashMap<K, V, S> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<K, V> Decode for SerializableHashMap<K, V>
impl<K, V, S> Decode for SerializableHashMap<K, V, S>
where
K: Encode + Decode + Eq + Hash,
V: Encode + Decode,
K: Decode + Eq + Hash,
V: Decode,
S: BuildHasher + Default,
{
fn decode<D: Decoder>(decoder: &mut D) -> Result<Self, DecodeError> {
let raw: Vec<(K, V)> = Decode::decode(decoder)?;
Ok(Self(raw.into_iter().collect()))
let mut result = HashMap::with_hasher(S::default());
let size: u64 = Decode::decode(decoder)?;
for _ in 0..size {
let k = Decode::decode(decoder)?;
let v = Decode::decode(decoder)?;
result.insert(k, v);
}
Ok(Self(result))
}
}

impl<K, V> Encode for SerializableHashMap<K, V>
impl<K, V, S> Encode for SerializableHashMap<K, V, S>
where
K: Encode + Decode,
V: Encode + Decode,
K: Encode,
V: Encode,
{
fn encode<E: Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> {
let raw: Vec<(&K, &V)> = self.0.iter().collect();
Encode::encode(&raw, encoder)?;
let size = u64::try_from(self.0.len()).unwrap();
Encode::encode(&size, encoder)?;
for (k, v) in &self.0 {
Encode::encode(k, encoder)?;
Encode::encode(v, encoder)?;
}
Ok(())
}
}
Expand Down

0 comments on commit 6ee10f9

Please sign in to comment.