From 2dd2ec8828da2e4c9071d7fa2141aa07b1175101 Mon Sep 17 00:00:00 2001 From: Niels Saurer Date: Wed, 9 Aug 2023 19:01:15 +0200 Subject: [PATCH] fmt --- .../transliterator_parser/src/compile.rs | 42 +++++++++++-- .../src/compile/pass2.rs | 61 ++++++++----------- experimental/transliterator_parser/src/lib.rs | 18 +++++- 3 files changed, 77 insertions(+), 44 deletions(-) diff --git a/experimental/transliterator_parser/src/compile.rs b/experimental/transliterator_parser/src/compile.rs index e394c3d0c6c..f4644121e49 100644 --- a/experimental/transliterator_parser/src/compile.rs +++ b/experimental/transliterator_parser/src/compile.rs @@ -127,12 +127,14 @@ as described in the zero-copy format, and the maps here are just arrays) use crate::parse; use crate::parse::{ElementLocation as EL, HalfRule, QuantifierKind, UnicodeSet}; +use icu_transliteration::provider::RuleBasedTransliterator; use parse::Result; use parse::PEK; use std::collections::{HashMap, HashSet}; mod pass2; mod rule_group_agg; +use crate::compile::pass2::Pass2; use rule_group_agg::RuleGroups; enum SingleDirection { @@ -872,19 +874,47 @@ impl Pass1ResultGenerator { } } +// TODO: define type FilterSet that is just a CPIL (without strings) and use that everywhere + +fn compile_one_direction<'p>( + result: DirectedPass1Result, + variable_definitions: &HashMap, +) -> Result> { + let mut p2 = Pass2::try_new(&result.data, variable_definitions)?; + let t = p2.run(result.groups, result.filter)?; + Ok(t) +} + +// returns (forward, backward) transliterators if they were requested pub(crate) fn compile( rules: Vec, direction: parse::Direction, -) -> Result> { +) -> Result<( + Option>, + Option>, +)> { // TODO(#3736): decide if validation should be metadata-direction dependent // example: transliterator with metadata-direction "forward", and a rule `[a-z] < b ;` (invalid) // - if validation is dependent, this rule is valid because it's not used in the forward direction // - if validation is independent, this rule is invalid because the reverse direction is also checked - let mut pass1 = Pass1::new(direction); - pass1.run(&rules)?; - let _result = pass1.generate_result(); - - todo!() + let mut p1 = Pass1::new(direction); + p1.run(&rules)?; + let p1_result = p1.generate_result()?; + + let forward_t = if direction.permits(parse::Direction::Forward) { + let t = compile_one_direction(p1_result.forward_result, &p1_result.variable_definitions)?; + Some(t) + } else { + None + }; + let reverse_t = if direction.permits(parse::Direction::Reverse) { + let t = compile_one_direction(p1_result.reverse_result, &p1_result.variable_definitions)?; + Some(t) + } else { + None + }; + + Ok((forward_t, reverse_t)) } #[cfg(test)] diff --git a/experimental/transliterator_parser/src/compile/pass2.rs b/experimental/transliterator_parser/src/compile/pass2.rs index 95c3fb8cc11..993ed16a165 100644 --- a/experimental/transliterator_parser/src/compile/pass2.rs +++ b/experimental/transliterator_parser/src/compile/pass2.rs @@ -2,11 +2,10 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use std::borrow::Cow; -use icu_collections::codepointinvlist::CodePointInversionList; -use zerovec::VarZeroVec; use super::*; use crate::parse::UnicodeSet; +use icu_collections::codepointinvlist::CodePointInversionList; +use zerovec::VarZeroVec; use icu_transliteration::provider as ds; @@ -27,7 +26,7 @@ macro_rules! impl_insert { struct MutVarTableField { vec: Vec, base: u32, - current: u32 + current: u32, } struct MutVarTable { @@ -104,12 +103,7 @@ impl MutVarTable { }) } - impl_insert!( - insert_compound, - compounds, - String, - quantifiers_opt - ); + impl_insert!(insert_compound, compounds, String, quantifiers_opt); impl_insert!( insert_quantifier_opt, quantifiers_opt, @@ -128,18 +122,8 @@ impl MutVarTable { String, segments ); - impl_insert!( - insert_segment, - segments, - String, - unicode_sets - ); - impl_insert!( - insert_unicode_set, - unicode_sets, - UnicodeSet, - function_calls - ); + impl_insert!(insert_segment, segments, String, unicode_sets); + impl_insert!(insert_unicode_set, unicode_sets, UnicodeSet, function_calls); fn insert_function_call(&mut self, elt: ds::FunctionCall<'static>) -> char { // pass 1 is responsible for this debug_assert!(self.function_calls.current < self.backref_base - 1); @@ -184,20 +168,12 @@ impl MutVarTable { } } -struct MutRule { - ante: String, - key: String, - post: String, - replacer: String, - cursor_offset: i32, -} - enum LiteralOrStandin<'a> { Literal(&'a str), Standin(char), } -impl ToString for LiteralOrStandin<'_> { +impl<'a> LiteralOrStandin<'a> { fn to_string(&self) -> String { match *self { LiteralOrStandin::Literal(s) => s.to_owned(), @@ -217,7 +193,12 @@ pub(super) struct Pass2<'a, 'p> { } impl<'a, 'p> Pass2<'a, 'p> { - pub(super) fn try_new(data: &'a Pass1Data, var_definitions: &'a HashMap) -> Result { + // TODO: the API for Pass2 could be better, maybe a non-self Pass2::run() + + pub(super) fn try_new( + data: &'a Pass1Data, + var_definitions: &'a HashMap, + ) -> Result { Ok(Pass2 { var_table: MutVarTable::try_new_from_counts(data.counts)?, var_definitions, @@ -237,7 +218,8 @@ impl<'a, 'p> Pass2<'a, 'p> { for id in transform_group { compiled_transform_group.push(self.compile_single_id(&id)); } - self.id_group_list.push(VarZeroVec::from(&compiled_transform_group)); + self.id_group_list + .push(VarZeroVec::from(&compiled_transform_group)); let mut compiled_conversion_group = Vec::new(); for rule in conversion_group { @@ -255,12 +237,15 @@ impl<'a, 'p> Pass2<'a, 'p> { cursor_offset, }); } - self.conversion_group_list.push(VarZeroVec::from(&compiled_conversion_group)); + self.conversion_group_list + .push(VarZeroVec::from(&compiled_conversion_group)); } let res = ds::RuleBasedTransliterator { visibility: true, - filter: global_filter.map(|f| f.code_points().clone()).unwrap_or(CodePointInversionList::all()), + filter: global_filter + .map(|f| f.code_points().clone()) + .unwrap_or(CodePointInversionList::all()), id_group_list: VarZeroVec::from(&self.id_group_list), rule_group_list: VarZeroVec::from(&self.conversion_group_list), variable_table: self.var_table.finalize(), @@ -274,7 +259,11 @@ impl<'a, 'p> Pass2<'a, 'p> { ds::SimpleId { id: id_string.into(), - filter: id.filter.as_ref().map(|f| f.code_points().clone()).unwrap_or(CodePointInversionList::all()), + filter: id + .filter + .as_ref() + .map(|f| f.code_points().clone()) + .unwrap_or(CodePointInversionList::all()), } } diff --git a/experimental/transliterator_parser/src/lib.rs b/experimental/transliterator_parser/src/lib.rs index 993a95285d1..e3ac130f70c 100644 --- a/experimental/transliterator_parser/src/lib.rs +++ b/experimental/transliterator_parser/src/lib.rs @@ -41,7 +41,15 @@ pub use parse::ParseErrorKind; /// /// See [UTS #35 - Transliterators](https://unicode.org/reports/tr35/tr35-general.html#Transforms) for more information. #[cfg(feature = "compiled_data")] -pub fn parse(source: &str) -> Result, parse::ParseError> { +pub fn parse( + source: &str, +) -> Result< + ( + Option>, + Option>, + ), + parse::ParseError, +> { parse_unstable(source, &icu_properties::provider::Baked) } @@ -49,7 +57,13 @@ pub fn parse(source: &str) -> Result, parse::Pa pub fn parse_unstable

( source: &str, provider: &P, -) -> Result, parse::ParseError> +) -> Result< + ( + Option>, + Option>, + ), + parse::ParseError, +> where P: ?Sized + DataProvider