Skip to content

Commit

Permalink
fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
skius committed Aug 9, 2023
1 parent 56774fe commit 2dd2ec8
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 44 deletions.
42 changes: 36 additions & 6 deletions experimental/transliterator_parser/src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,14 @@ as described in the zero-copy format, and the maps here are just arrays)

use crate::parse;
use crate::parse::{ElementLocation as EL, HalfRule, QuantifierKind, UnicodeSet};
use icu_transliteration::provider::RuleBasedTransliterator;
use parse::Result;
use parse::PEK;
use std::collections::{HashMap, HashSet};

mod pass2;
mod rule_group_agg;
use crate::compile::pass2::Pass2;
use rule_group_agg::RuleGroups;

enum SingleDirection {
Expand Down Expand Up @@ -872,19 +874,47 @@ impl Pass1ResultGenerator {
}
}

// TODO: define type FilterSet that is just a CPIL (without strings) and use that everywhere

fn compile_one_direction<'p>(
result: DirectedPass1Result,
variable_definitions: &HashMap<String, &'p [parse::Element]>,
) -> Result<icu_transliteration::provider::RuleBasedTransliterator<'static>> {
let mut p2 = Pass2::try_new(&result.data, variable_definitions)?;
let t = p2.run(result.groups, result.filter)?;
Ok(t)
}

// returns (forward, backward) transliterators if they were requested
pub(crate) fn compile(
rules: Vec<parse::Rule>,
direction: parse::Direction,
) -> Result<icu_transliteration::provider::RuleBasedTransliterator<'static>> {
) -> Result<(
Option<RuleBasedTransliterator<'static>>,
Option<RuleBasedTransliterator<'static>>,
)> {
// TODO(#3736): decide if validation should be metadata-direction dependent
// example: transliterator with metadata-direction "forward", and a rule `[a-z] < b ;` (invalid)
// - if validation is dependent, this rule is valid because it's not used in the forward direction
// - if validation is independent, this rule is invalid because the reverse direction is also checked
let mut pass1 = Pass1::new(direction);
pass1.run(&rules)?;
let _result = pass1.generate_result();

todo!()
let mut p1 = Pass1::new(direction);
p1.run(&rules)?;
let p1_result = p1.generate_result()?;

let forward_t = if direction.permits(parse::Direction::Forward) {
let t = compile_one_direction(p1_result.forward_result, &p1_result.variable_definitions)?;
Some(t)
} else {
None
};
let reverse_t = if direction.permits(parse::Direction::Reverse) {
let t = compile_one_direction(p1_result.reverse_result, &p1_result.variable_definitions)?;
Some(t)
} else {
None
};

Ok((forward_t, reverse_t))
}

#[cfg(test)]
Expand Down
61 changes: 25 additions & 36 deletions experimental/transliterator_parser/src/compile/pass2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use std::borrow::Cow;
use icu_collections::codepointinvlist::CodePointInversionList;
use zerovec::VarZeroVec;
use super::*;
use crate::parse::UnicodeSet;
use icu_collections::codepointinvlist::CodePointInversionList;
use zerovec::VarZeroVec;

use icu_transliteration::provider as ds;

Expand All @@ -27,7 +26,7 @@ macro_rules! impl_insert {
struct MutVarTableField<T> {
vec: Vec<T>,
base: u32,
current: u32
current: u32,
}

struct MutVarTable {
Expand Down Expand Up @@ -104,12 +103,7 @@ impl MutVarTable {
})
}

impl_insert!(
insert_compound,
compounds,
String,
quantifiers_opt
);
impl_insert!(insert_compound, compounds, String, quantifiers_opt);
impl_insert!(
insert_quantifier_opt,
quantifiers_opt,
Expand All @@ -128,18 +122,8 @@ impl MutVarTable {
String,
segments
);
impl_insert!(
insert_segment,
segments,
String,
unicode_sets
);
impl_insert!(
insert_unicode_set,
unicode_sets,
UnicodeSet,
function_calls
);
impl_insert!(insert_segment, segments, String, unicode_sets);
impl_insert!(insert_unicode_set, unicode_sets, UnicodeSet, function_calls);
fn insert_function_call(&mut self, elt: ds::FunctionCall<'static>) -> char {
// pass 1 is responsible for this
debug_assert!(self.function_calls.current < self.backref_base - 1);
Expand Down Expand Up @@ -184,20 +168,12 @@ impl MutVarTable {
}
}

struct MutRule {
ante: String,
key: String,
post: String,
replacer: String,
cursor_offset: i32,
}

enum LiteralOrStandin<'a> {
Literal(&'a str),
Standin(char),
}

impl ToString for LiteralOrStandin<'_> {
impl<'a> LiteralOrStandin<'a> {
fn to_string(&self) -> String {
match *self {
LiteralOrStandin::Literal(s) => s.to_owned(),
Expand All @@ -217,7 +193,12 @@ pub(super) struct Pass2<'a, 'p> {
}

impl<'a, 'p> Pass2<'a, 'p> {
pub(super) fn try_new(data: &'a Pass1Data, var_definitions: &'a HashMap<String, &'p [parse::Element]>) -> Result<Self> {
// TODO: the API for Pass2 could be better, maybe a non-self Pass2::run()

pub(super) fn try_new(
data: &'a Pass1Data,
var_definitions: &'a HashMap<String, &'p [parse::Element]>,
) -> Result<Self> {
Ok(Pass2 {
var_table: MutVarTable::try_new_from_counts(data.counts)?,
var_definitions,
Expand All @@ -237,7 +218,8 @@ impl<'a, 'p> Pass2<'a, 'p> {
for id in transform_group {
compiled_transform_group.push(self.compile_single_id(&id));
}
self.id_group_list.push(VarZeroVec::from(&compiled_transform_group));
self.id_group_list
.push(VarZeroVec::from(&compiled_transform_group));

let mut compiled_conversion_group = Vec::new();
for rule in conversion_group {
Expand All @@ -255,12 +237,15 @@ impl<'a, 'p> Pass2<'a, 'p> {
cursor_offset,
});
}
self.conversion_group_list.push(VarZeroVec::from(&compiled_conversion_group));
self.conversion_group_list
.push(VarZeroVec::from(&compiled_conversion_group));
}

let res = ds::RuleBasedTransliterator {
visibility: true,
filter: global_filter.map(|f| f.code_points().clone()).unwrap_or(CodePointInversionList::all()),
filter: global_filter
.map(|f| f.code_points().clone())
.unwrap_or(CodePointInversionList::all()),
id_group_list: VarZeroVec::from(&self.id_group_list),
rule_group_list: VarZeroVec::from(&self.conversion_group_list),
variable_table: self.var_table.finalize(),
Expand All @@ -274,7 +259,11 @@ impl<'a, 'p> Pass2<'a, 'p> {

ds::SimpleId {
id: id_string.into(),
filter: id.filter.as_ref().map(|f| f.code_points().clone()).unwrap_or(CodePointInversionList::all()),
filter: id
.filter
.as_ref()
.map(|f| f.code_points().clone())
.unwrap_or(CodePointInversionList::all()),
}
}

Expand Down
18 changes: 16 additions & 2 deletions experimental/transliterator_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,29 @@ pub use parse::ParseErrorKind;
///
/// See [UTS #35 - Transliterators](https://unicode.org/reports/tr35/tr35-general.html#Transforms) for more information.
#[cfg(feature = "compiled_data")]
pub fn parse(source: &str) -> Result<RuleBasedTransliterator<'static>, parse::ParseError> {
pub fn parse(
source: &str,
) -> Result<
(
Option<RuleBasedTransliterator<'static>>,
Option<RuleBasedTransliterator<'static>>,
),
parse::ParseError,
> {
parse_unstable(source, &icu_properties::provider::Baked)
}

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, parse())]
pub fn parse_unstable<P>(
source: &str,
provider: &P,
) -> Result<RuleBasedTransliterator<'static>, parse::ParseError>
) -> Result<
(
Option<RuleBasedTransliterator<'static>>,
Option<RuleBasedTransliterator<'static>>,
),
parse::ParseError,
>
where
P: ?Sized
+ DataProvider<AsciiHexDigitV1Marker>
Expand Down

0 comments on commit 2dd2ec8

Please sign in to comment.