Skip to content

Commit

Permalink
refactor: Use unicode_titlecase's to_titlecase_lower_rest, remove…
Browse files Browse the repository at this point in the history
… own implementation

Another nice win. See also #13
  • Loading branch information
alexpovel committed May 29, 2023
1 parent 5664118 commit 2c5c8e5
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 40 deletions.
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ log = "0.4.17"
itertools = "0.10.5"
paste = "1.0.12"
serde = { version = "1.0.163", features = ["derive"] }
unicode_titlecase = "2.0.0"

[dev-dependencies]
rstest = "0.17.0"
Expand Down
40 changes: 4 additions & 36 deletions common/src/strings.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,5 @@
use log::trace;

pub fn titlecase(word: &str) -> String {
let mut chars = word.chars();
let mut result = String::with_capacity(word.len());

if let Some(c) = chars.next() {
for upper in c.to_uppercase() {
result.push(upper);
}
}

for c in chars {
for lower in c.to_lowercase() {
result.push(lower);
}
}

result
}
use unicode_titlecase::StrTitleCase;

pub fn is_compound_word(word: &str, predicate: &impl Fn(&str) -> bool) -> bool {
trace!("Checking if word is valid compound word: '{}'", word);
Expand Down Expand Up @@ -47,9 +29,10 @@ pub fn is_compound_word(word: &str, predicate: &impl Fn(&str) -> bool) -> bool {
suffix
);

predicate(&titlecase(suffix))
let tc = suffix.to_titlecase_lower_rest();
predicate(&tc)
|| predicate(suffix)
|| is_compound_word(&titlecase(suffix), predicate)
|| is_compound_word(&tc, predicate)
|| is_compound_word(suffix, predicate)
}
None => false,
Expand All @@ -61,21 +44,6 @@ mod tests {
use super::*;
use rstest::rstest;

#[rstest]
#[case("hello", "Hello")]
#[case("bItTe", "Bitte")]
#[case("dANKE", "Danke")]
#[case("übel", "Übel")]
#[case("uebel", "Uebel")]
#[case("😀", "😀")]
#[case("ßuper", "SSuper")]
#[case("ẞuperduper", "ẞuperduper")]
#[case("WOW!!", "Wow!!")]
#[case("ẞß", "ẞß")]
fn test_titlecase(#[case] word: &str, #[case] expected: &str) {
assert_eq!(titlecase(word), expected);
}

const WORDS: &[&str] = &["Süßwasser", "schwimm", "Bäder", "Mauer", "Dübel", "Kübel"];

#[rstest]
Expand Down
1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ env_logger = "0.10.0"
itertools = "0.10.5"
log = "0.4.17"
common = { path = "../common" }
unicode_titlecase = "2.0.0"

[features]
default = ["all"]
Expand Down
7 changes: 4 additions & 3 deletions core/src/stages/german/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ use crate::stages::{
use cached::proc_macro::cached;
use cached::SizedCache;
use common::lookup::binary_search_uneven;
use common::strings::{is_compound_word, titlecase};
use common::strings::is_compound_word;
use itertools::Itertools;
use log::{debug, trace};
use unicode_titlecase::StrTitleCase;

static VALID_GERMAN_WORDS: &str = include_str!(concat!(env!("OUT_DIR"), "/de.txt")); // Generated in `build.rs`.

Expand Down Expand Up @@ -410,7 +411,7 @@ fn is_valid(word: &str, predicate: &impl Fn(&str) -> bool) -> bool {
}
Ok(WordCasing::AllUppercase) => {
// Convert to something sensible before proceeding.
let tc = titlecase(word);
let tc = word.to_titlecase_lower_rest();
debug_assert!(
WordCasing::try_from(tc.as_str()) == Ok(WordCasing::Titlecase),
"Titlecased word, but isn't categorized correctly."
Expand All @@ -423,7 +424,7 @@ fn is_valid(word: &str, predicate: &impl Fn(&str) -> bool) -> bool {
// treatment.
match word.chars().next() {
Some(c) if c.is_uppercase() => {
let tc = titlecase(word);
let tc = word.to_titlecase_lower_rest();
debug_assert!(
WordCasing::try_from(tc.as_str()) == Ok(WordCasing::Titlecase),
"Titlecased word, but isn't categorized correctly."
Expand Down
2 changes: 1 addition & 1 deletion core/tests/snapshots/cli__german-001_german.snap
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich.
Vogel Quax zwickt Johnys Pferd Bim.
Sylvia wagt quick den Jux bei Pforzheim.
Polyfon zwitschernd aßen Maexchens Vögel Rüben, Joghurt und Quark.
"Fix, Schwyz!" quäkt Jürgen blöd vom Paß.
"Fix, Schwyz!" quäkt Jürgen blöd vom Pass.
Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich.
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg.
Heizölrückstoßabdämpfung.
Expand Down

0 comments on commit 2c5c8e5

Please sign in to comment.