Skip to content

Commit

Permalink
Merge pull request #39 from PyThaiNLP/feature/change-hash-lib
Browse files Browse the repository at this point in the history
I just keep merging lol
  • Loading branch information
bact authored Aug 12, 2021
2 parents be94afb + 26afbab commit 761a399
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 113 deletions.
10 changes: 6 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ homepage = "https://github.com/PyThaiNLP/nlpo3/"
documentation = "https://github.com/PyThaiNLP/nlpo3/"
repository = "https://github.com/PyThaiNLP/nlpo3/"
readme = "README.md"
exclude = [".gitignore", ".github/*", "build_tools/*", "tests/*"]

exclude = [".gitignore", ".github/*", "build_tools/*", "tests/*", "nlpo3-cli/*", "nlpo3-nodejs/*", "nlpo3-python/*"]

[profile.release]
lto = true
codegen-units = 1

[lib]
path = "src/lib.rs"
Expand All @@ -23,13 +25,13 @@ path = "src/lib.rs"
crate-type = ["cdylib", "rlib"]

[dependencies]
ahash = "0.7.2"
anyhow = "1.0.42"
binary-heap-plus = "0.4.1"
bytecount = "0.6.2"
lazy_static = "1.4.0"
rayon = "1.5"
regex = "1.4.6"
smol_str = "0.1.17"
rustc-hash = "1.1.0"


[[test]]
Expand Down
8 changes: 4 additions & 4 deletions src/fixed_bytes_str/four_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ impl CustomString {
let mut output_content: Vec<u8> = Vec::with_capacity(input.len() / 100);
for index in 0..input.chars_len() {
let extracted_bytes =
trim_to_std_utf8(&input.slice_by_char_indice(index, index + 1)).unwrap();
trim_to_std_utf8(input.slice_by_char_indice(index, index + 1)).unwrap();
match extracted_bytes {
(None, None, None, Some(first_byte)) => {
output_content.push(first_byte);
Expand Down Expand Up @@ -293,7 +293,7 @@ impl CustomString {
let mut output_content: Vec<u8> = Vec::with_capacity(input.len() / 100);
for index in 0..input.chars_len() {
let extracted_bytes =
trim_to_std_utf8(&input.slice_by_char_indice(index, index + 1)).unwrap();
trim_to_std_utf8(input.slice_by_char_indice(index, index + 1)).unwrap();
match extracted_bytes {
(None, None, None, Some(first_byte)) => {
output_content.push(first_byte);
Expand Down Expand Up @@ -329,7 +329,7 @@ pub trait FixedCharsLengthByteSlice {

impl FixedCharsLengthByteSlice for &CustomStringBytesSlice {
fn slice_by_char_indice(&self, start: usize, end: usize) -> Self {
unsafe { &self.get_unchecked((start * BYTES_PER_CHAR)..(end * BYTES_PER_CHAR)) }
unsafe { self.get_unchecked((start * BYTES_PER_CHAR)..(end * BYTES_PER_CHAR)) }
}
fn chars_len(&self) -> usize {
self.len() / BYTES_PER_CHAR
Expand Down Expand Up @@ -368,7 +368,7 @@ fn check_slice() {
let ex: &[u8] = &[255, 255, 255, 255, 0, 255, 111, 0];
assert_eq!(ex.slice_by_char_indice(0, 1), &[255, 255, 255, 255]);
assert_eq!(ex.slice_by_char_indice(1, 2), &[0, 255, 111, 0]);
assert_eq!("".is_empty(), true);
assert!("".is_empty());
}

#[test]
Expand Down
6 changes: 3 additions & 3 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pub mod dict_reader_custom;
mod dict_reader_custom;
pub mod newmm_custom;
pub mod tcc_custom;
mod tcc_custom;
pub mod tokenizer_trait;
pub mod trie_custom;
mod trie_custom;
Loading

0 comments on commit 761a399

Please sign in to comment.