Skip to content

Commit

Permalink
Disable japanese-transliteration by default
Browse files Browse the repository at this point in the history
  • Loading branch information
choznerol and ManyTheFish committed Oct 17, 2022
1 parent 49bc172 commit dfcaa62
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 6 deletions.
7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ slice-group-by = "0.3.0"
unicode-segmentation = "1.6.0"
whatlang = "0.16.1"
lindera = { version = "=0.16.0", features = ["ipadic"], optional = true }
pinyin = { version = "0.9", default-features = false, features = ["with_tone"], optional = true }
wana_kana = "2.1.0"
pinyin = { version = "0.9", default-features = false, features = [
"with_tone",
], optional = true }
wana_kana = { version = "2.1.0", optional = true }

[features]
default = ["chinese", "hebrew", "japanese", "thai"]
Expand All @@ -35,6 +37,7 @@ hebrew = []

# allow japanese specialized tokenization
japanese = ["dep:lindera"]
japanese-transliteration = ["dep:wana_kana"]

# allow thai specialized tokenization
thai = []
Expand Down
6 changes: 3 additions & 3 deletions src/normalizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use once_cell::sync::Lazy;
#[cfg(feature = "chinese")]
pub use self::chinese::ChineseNormalizer;
pub use self::control_char::ControlCharNormalizer;
#[cfg(feature = "japanese")]
#[cfg(feature = "japanese-transliteration")]
pub use self::japanese::JapaneseNormalizer;
pub use self::latin::LatinNormalizer;
pub use self::lowercase::LowercaseNormalizer;
Expand All @@ -14,7 +14,7 @@ use crate::Token;
#[cfg(feature = "chinese")]
mod chinese;
mod control_char;
#[cfg(feature = "japanese")]
#[cfg(feature = "japanese-transliteration")]
mod japanese;
mod latin;
mod lowercase;
Expand All @@ -26,7 +26,7 @@ pub static NORMALIZERS: Lazy<Vec<Box<dyn Normalizer>>> = Lazy::new(|| {
Box::new(LowercaseNormalizer),
#[cfg(feature = "chinese")]
Box::new(ChineseNormalizer),
#[cfg(feature = "japanese")]
#[cfg(feature = "japanese-transliteration")]
Box::new(JapaneseNormalizer),
Box::new(LatinNormalizer),
Box::new(ControlCharNormalizer),
Expand Down
3 changes: 2 additions & 1 deletion src/segmenter/japanese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ mod test {
"国際",
"空港",
"限定",
"とうとばっぐ",
// Use "とうとばっぐ" instead when feature "japanese-transliteration" is enabled or become default
"トートバッグ",
" ",
"すもも",
"も",
Expand Down

0 comments on commit dfcaa62

Please sign in to comment.