Squash of transliterator-datastruct-generation

commit 1145a17 Author: Niels Saurer <[email protected]> Date: Thu Aug 10 02:06:46 2023 +0200 Squash merge transliterator-ir commit 9d55038 Author: Niels Saurer <[email protected]> Date: Thu Aug 10 02:03:34 2023 +0200 fix push_front/push_back mixup commit dc8dda7 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 23:02:10 2023 +0200 remove empty line commit bfe5827 Merge: c85e861 f549131 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 20:57:11 2023 +0200 Merge branch 'main' into transliterator-ir commit c85e861 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:40:53 2023 +0200 borrow SingleID commit 06425a1 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:22:03 2023 +0200 fix comment indentation commit 2f70922 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:09:13 2023 +0200 update comments commit 47444ee Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:06:43 2023 +0200 fmt commit c0de3a0 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:03:19 2023 +0200 fix clippy, allow testing of intermediate pass1 values commit 227f738 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 16:55:53 2023 +0200 fix compile errors by introducing 2 small clones per transliterator commit 512b158 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 16:49:01 2023 +0200 doesn't compile - missing self deconstruction commit 7848f09 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 16:40:51 2023 +0200 use rule group aggregation in pass1 commit 93663e4 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 16:09:29 2023 +0200 add rule group aggregation commit 57666eb Author: Niels Saurer <[email protected]> Date: Wed Aug 9 14:12:19 2023 +0200 Squash of transliterator-compiler commit d1812b4 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 13:31:53 2023 +0200 fix merge mistake commit f15f6eb Merge: abb91cc a39cfed Author: Niels Saurer <[email protected]> Date: Wed Aug 9 13:27:08 2023 +0200 Merge branch 'main' into transliterator-compiler commit abb91cc Author: Niels Saurer <[email protected]> Date: Wed Aug 9 01:12:13 2023 +0200 reformat tests commit f6a10f5 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 00:30:09 2023 +0200 sizes => counts commit 9ffc2f0 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 00:26:27 2023 +0200 add more docs commit eae5748 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:46:20 2023 +0200 remove TODO commit 6b09689 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:28:42 2023 +0200 improve docs commit c9b16d5 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:15:23 2023 +0200 clippy commit 020a677 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 22:53:14 2023 +0200 add result aggregation to first pass commit 2d1bfd7 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 16:28:23 2023 +0200 add tests commit 6f35ea5 Author: Niels Saurer <[email protected]> Date: Mon Aug 7 22:25:56 2023 +0200 CI fixes commit c6c4844 Author: Niels Saurer <[email protected]> Date: Sun Aug 6 20:06:31 2023 +0200 first steps commit fb68218 Author: Niels Saurer <[email protected]> Date: Wed Jul 19 16:21:33 2023 +0000 Squash transliterator-parser structure for transliterator parser start parsing ':: ... ;' rules complete ::-rule parsing add more global filter tests add negative tests for '::'-rules, be more restrictive update error docs add comment about static UnicodeSet type alias add variable defs escaping and fix unicodeset handling fix unicodeset tests function calls add variable-inside-unicodesets update tests rewrite parse_section using parse_element fix unquoted literal handling add cursor/placeholder tests add cursor support add allow(unused) for this PR remove unused dependencies add todo about inefficient unicodeset variablemap handling allow usage of UnicodeSet's VariableMap directly in TransliteratorParser avoid one allocation per parsed unicodeset remove done todo about allocation-free unicodeset parser hook avoid allocations for number parsing invalid num err with offset update comment switch to allocation free hex parsing (and support for multi escapes) fix main merge conflict support \p unicodesets remove todo for \p unicodeset parsing turn low-prio todo about avoiding clones into note turn non-memory-safety safety comments into regular comments add issue number to TODOs add transliteration component crate commit 208abd7 Author: Niels Saurer <[email protected]> Date: Thu Aug 10 02:02:23 2023 +0200 add data struct generation tests commit d1f7e7c Author: Niels Saurer <[email protected]> Date: Thu Aug 10 00:58:50 2023 +0200 fix debug_assert bug commit 1f5c8dd Author: Niels Saurer <[email protected]> Date: Wed Aug 9 23:25:17 2023 +0200 refactor pass2 slightly commit ae14cdc Author: Niels Saurer <[email protected]> Date: Wed Aug 9 21:04:38 2023 +0200 clippy commit 8a14e3e Author: Niels Saurer <[email protected]> Date: Wed Aug 9 21:02:28 2023 +0200 tutorials cargo lock commit 4256873 Merge: 72cff57 f549131 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 20:56:20 2023 +0200 Merge branch 'main' into transliterator-datastruct-generation commit 72cff57 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 20:42:03 2023 +0200 refactor pass2 interface commit 8fa4dfd Author: Niels Saurer <[email protected]> Date: Wed Aug 9 20:31:29 2023 +0200 skip compilation of cursors on source side, anchors on target side commit 54b0542 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 19:09:50 2023 +0200 add comment commit cba53a7 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 19:04:27 2023 +0200 fix clippy warnings commit 2dd2ec8 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 19:01:15 2023 +0200 fmt commit 56774fe Author: Niels Saurer <[email protected]> Date: Wed Aug 9 18:45:22 2023 +0200 refactor MutVarTable commit 6176769 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 18:31:18 2023 +0200 revamp pass2 API commit f8459c9 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 18:22:47 2023 +0200 initial final data struct generation commit d6873b0 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:48:41 2023 +0200 Squash of transliterator-ir commit c85e861 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:40:53 2023 +0200 borrow SingleID commit 06425a1 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:22:03 2023 +0200 fix comment indentation commit 2f70922 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:09:13 2023 +0200 update comments commit 47444ee Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:06:43 2023 +0200 fmt commit c0de3a0 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 17:03:19 2023 +0200 fix clippy, allow testing of intermediate pass1 values commit 227f738 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 16:55:53 2023 +0200 fix compile errors by introducing 2 small clones per transliterator commit 512b158 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 16:49:01 2023 +0200 doesn't compile - missing self deconstruction commit 7848f09 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 16:40:51 2023 +0200 use rule group aggregation in pass1 commit 93663e4 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 16:09:29 2023 +0200 add rule group aggregation commit 57666eb Author: Niels Saurer <[email protected]> Date: Wed Aug 9 14:12:19 2023 +0200 Squash of transliterator-compiler commit d1812b4 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 13:31:53 2023 +0200 fix merge mistake commit f15f6eb Merge: abb91cc a39cfed Author: Niels Saurer <[email protected]> Date: Wed Aug 9 13:27:08 2023 +0200 Merge branch 'main' into transliterator-compiler commit abb91cc Author: Niels Saurer <[email protected]> Date: Wed Aug 9 01:12:13 2023 +0200 reformat tests commit f6a10f5 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 00:30:09 2023 +0200 sizes => counts commit 9ffc2f0 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 00:26:27 2023 +0200 add more docs commit eae5748 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:46:20 2023 +0200 remove TODO commit 6b09689 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:28:42 2023 +0200 improve docs commit c9b16d5 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:15:23 2023 +0200 clippy commit 020a677 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 22:53:14 2023 +0200 add result aggregation to first pass commit 2d1bfd7 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 16:28:23 2023 +0200 add tests commit 6f35ea5 Author: Niels Saurer <[email protected]> Date: Mon Aug 7 22:25:56 2023 +0200 CI fixes commit c6c4844 Author: Niels Saurer <[email protected]> Date: Sun Aug 6 20:06:31 2023 +0200 first steps commit fb68218 Author: Niels Saurer <[email protected]> Date: Wed Jul 19 16:21:33 2023 +0000 Squash transliterator-parser structure for transliterator parser start parsing ':: ... ;' rules complete ::-rule parsing add more global filter tests add negative tests for '::'-rules, be more restrictive update error docs add comment about static UnicodeSet type alias add variable defs escaping and fix unicodeset handling fix unicodeset tests function calls add variable-inside-unicodesets update tests rewrite parse_section using parse_element fix unquoted literal handling add cursor/placeholder tests add cursor support add allow(unused) for this PR remove unused dependencies add todo about inefficient unicodeset variablemap handling allow usage of UnicodeSet's VariableMap directly in TransliteratorParser avoid one allocation per parsed unicodeset remove done todo about allocation-free unicodeset parser hook avoid allocations for number parsing invalid num err with offset update comment switch to allocation free hex parsing (and support for multi escapes) fix main merge conflict support \p unicodesets remove todo for \p unicodeset parsing turn low-prio todo about avoiding clones into note turn non-memory-safety safety comments into regular comments add issue number to TODOs add transliteration component crate commit a39cfed Author: Niels Saurer <[email protected]> Date: Wed Aug 9 13:19:28 2023 +0200 Add Parsing for Rule-Based Transliterators (unicode-org#3730) commit 57e9d59 Author: Andrew Cupps <[email protected]> Date: Tue Aug 8 18:53:26 2023 -0700 Resolve follow-up comments to unicode-org#3760 (unicode-org#3818) * Docs for `U` and `r` * Delete empty test and add todo * Remove old code and empty era check * Add todo commit c55c641 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 02:36:53 2023 +0200 wip commit c6cbb0a Author: Niels Saurer <[email protected]> Date: Wed Aug 9 01:20:08 2023 +0200 Squash of transliterator-compiler commit abb91cc Author: Niels Saurer <[email protected]> Date: Wed Aug 9 01:12:13 2023 +0200 reformat tests commit f6a10f5 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 00:30:09 2023 +0200 sizes => counts commit 9ffc2f0 Author: Niels Saurer <[email protected]> Date: Wed Aug 9 00:26:27 2023 +0200 add more docs commit eae5748 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:46:20 2023 +0200 remove TODO commit 6b09689 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:28:42 2023 +0200 improve docs commit c9b16d5 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 23:15:23 2023 +0200 clippy commit 020a677 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 22:53:14 2023 +0200 add result aggregation to first pass commit 2d1bfd7 Author: Niels Saurer <[email protected]> Date: Tue Aug 8 16:28:23 2023 +0200 add tests commit 6f35ea5 Author: Niels Saurer <[email protected]> Date: Mon Aug 7 22:25:56 2023 +0200 CI fixes commit c6c4844 Author: Niels Saurer <[email protected]> Date: Sun Aug 6 20:06:31 2023 +0200 first steps commit fb68218 Author: Niels Saurer <[email protected]> Date: Wed Jul 19 16:21:33 2023 +0000 Squash transliterator-parser structure for transliterator parser start parsing ':: ... ;' rules complete ::-rule parsing add more global filter tests add negative tests for '::'-rules, be more restrictive update error docs add comment about static UnicodeSet type alias add variable defs escaping and fix unicodeset handling fix unicodeset tests function calls add variable-inside-unicodesets update tests rewrite parse_section using parse_element fix unquoted literal handling add cursor/placeholder tests add cursor support add allow(unused) for this PR remove unused dependencies add todo about inefficient unicodeset variablemap handling allow usage of UnicodeSet's VariableMap directly in TransliteratorParser avoid one allocation per parsed unicodeset remove done todo about allocation-free unicodeset parser hook avoid allocations for number parsing invalid num err with offset update comment switch to allocation free hex parsing (and support for multi escapes) fix main merge conflict support \p unicodesets remove todo for \p unicodeset parsing turn low-prio todo about avoiding clones into note turn non-memory-safety safety comments into regular comments add issue number to TODOs add transliteration component crate
skius · Aug 10, 2023 · c6505b6 · c6505b6
1 parent 28f11dd
commit c6505b6
Show file tree

Hide file tree

Showing 5 changed files with 223 additions and 40 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/experimental/transliterator_parser/Cargo.toml b/experimental/transliterator_parser/Cargo.toml
@@ -24,6 +24,9 @@ include = [
 [package.metadata.docs.rs]
 all-features = true
 
+[dev-dependencies]
+zerofrom = { version = "0.1.1", path = "../../utils/zerofrom" }
+
 [dependencies]
 icu_collections = { path = "../../components/collections" }
 icu_properties = { path = "../../components/properties", default-features = false }

diff --git a/experimental/transliterator_parser/src/compile/pass2.rs b/experimental/transliterator_parser/src/compile/pass2.rs
@@ -9,6 +9,7 @@ use icu_collections::codepointinvlist::CodePointInversionList;
 use std::fmt::{Display, Formatter};
 use zerovec::VarZeroVec;
 
+use crate::compile::rule_group_agg::UniConversionRule;
 use icu_transliteration::provider as ds;
 
 macro_rules! impl_insert {
@@ -194,9 +195,6 @@ pub(super) struct Pass2<'a, 'p> {
     var_definitions: &'a HashMap<String, &'p [parse::Element]>,
     // the inverse of VarTable.compounds
     var_to_char: HashMap<String, char>,
-
-    id_group_list: Vec<VarZeroVec<'static, ds::SimpleIdULE>>,
-    conversion_group_list: Vec<VarZeroVec<'static, ds::RuleULE>>,
 }
 
 impl<'a, 'p> Pass2<'a, 'p> {
@@ -216,8 +214,6 @@ impl<'a, 'p> Pass2<'a, 'p> {
             var_table: MutVarTable::try_new_from_counts(counts)?,
             var_definitions,
             var_to_char: HashMap::new(),
-            id_group_list: Vec::new(),
-            conversion_group_list: Vec::new(),
         })
     }
 
@@ -226,47 +222,61 @@ impl<'a, 'p> Pass2<'a, 'p> {
         rule_groups: super::RuleGroups<'p>,
         global_filter: Option<FilterSet>,
     ) -> Result<ds::RuleBasedTransliterator<'static>> {
+        let mut compiled_transform_groups: Vec<VarZeroVec<'static, ds::SimpleIdULE>> = Vec::new();
+        let mut compiled_conversion_groups: Vec<VarZeroVec<'static, ds::RuleULE>> = Vec::new();
+
         for (transform_group, conversion_group) in rule_groups {
-            let mut compiled_transform_group = Vec::new();
-            for id in transform_group {
-                compiled_transform_group.push(self.compile_single_id(id.into_owned()));
-            }
-            self.id_group_list
-                .push(VarZeroVec::from(&compiled_transform_group));
+            let compiled_transform_group: Vec<_> = transform_group
+                .into_iter()
+                .map(|id| self.compile_single_id(id.into_owned()))
+                .collect();
+            compiled_transform_groups.push(VarZeroVec::from(&compiled_transform_group));
 
-            let mut compiled_conversion_group = Vec::new();
-            for rule in conversion_group {
-                let ante = self.compile_section(rule.ante, parse::ElementLocation::Source);
-                let key = self.compile_section(rule.key, parse::ElementLocation::Source);
-                let post = self.compile_section(rule.post, parse::ElementLocation::Source);
-                let replacer =
-                    self.compile_section(rule.replacement, parse::ElementLocation::Target);
-                let cursor_offset = rule.cursor_offset;
-                compiled_conversion_group.push(ds::Rule {
-                    ante: ante.into(),
-                    key: key.into(),
-                    post: post.into(),
-                    replacer: replacer.into(),
-                    cursor_offset,
-                });
-            }
-            self.conversion_group_list
-                .push(VarZeroVec::from(&compiled_conversion_group));
+            let compiled_conversion_group: Vec<_> = conversion_group
+                .into_iter()
+                .map(|rule| self.compile_conversion_rule(rule))
+                .collect();
+            compiled_conversion_groups.push(VarZeroVec::from(&compiled_conversion_group));
         }
 
         let res = ds::RuleBasedTransliterator {
             visibility: true, // TODO(#3736): use metadata
             filter: global_filter.unwrap_or(CodePointInversionList::all()),
-            id_group_list: VarZeroVec::from(&self.id_group_list),
-            rule_group_list: VarZeroVec::from(&self.conversion_group_list),
+            id_group_list: VarZeroVec::from(&compiled_transform_groups),
+            rule_group_list: VarZeroVec::from(&compiled_conversion_groups),
             variable_table: self.var_table.finalize(),
         };
 
         Ok(res)
     }
 
+    fn compile_conversion_rule(&mut self, rule: UniConversionRule<'p>) -> ds::Rule<'static> {
+        let ante = self.compile_section(rule.ante, parse::ElementLocation::Source);
+        let key = self.compile_section(rule.key, parse::ElementLocation::Source);
+        let post = self.compile_section(rule.post, parse::ElementLocation::Source);
+        let replacer = self.compile_section(rule.replacement, parse::ElementLocation::Target);
+        let cursor_offset = rule.cursor_offset;
+        ds::Rule {
+            ante: ante.into(),
+            key: key.into(),
+            post: post.into(),
+            replacer: replacer.into(),
+            cursor_offset,
+        }
+    }
+
     fn compile_single_id(&mut self, id: parse::SingleId) -> ds::SimpleId<'static> {
-        let id_string = id.basic_id.source.clone(); // TODO(#3736): map legacy ID to internal ID and use here
+        // TODO(#3736): map legacy ID to internal ID and use here
+        let id_string = format!(
+            "{}-{}{}",
+            id.basic_id.source,
+            id.basic_id.target,
+            if let Some(v) = id.basic_id.variant {
+                format!("/{}", v)
+            } else {
+                "".to_owned()
+            }
+        );
 
         ds::SimpleId {
             id: id_string.into(),

diff --git a/experimental/transliterator_parser/src/compile/rule_group_agg.rs b/experimental/transliterator_parser/src/compile/rule_group_agg.rs
@@ -136,7 +136,6 @@ impl<'p> ForwardRuleGroupAggregator<'p> {
 
 // Represents a non-empty rule group for the forward direction.
 #[derive(Debug, Clone)]
-
 enum ForwardRuleGroup<'p> {
     Conversion(Vec<UniConversionRule<'p>>),
     Transform(Vec<Cow<'p, parse::SingleId>>),
@@ -182,7 +181,7 @@ impl<'p> ForwardRuleGroup<'p> {
 // contiguous C's keep the source order, but contiguous T's are reversed. Also the overall order
 // is reversed, of course.
 //
-// We do this by using VecDeque, push_back, and make_contiguous in the end.
+// We do this by using VecDeque, push_front, and make_contiguous in the end.
 #[derive(Debug, Clone)]
 pub(crate) struct ReverseRuleGroupAggregator<'p> {
     current: ReverseRuleGroup<'p>,
@@ -264,7 +263,7 @@ impl<'p> ReverseRuleGroupAggregator<'p> {
                 };
                 let vec_transform_group = transform_group.into(); // non-allocating conversion
                 self.groups
-                    .push_back((vec_transform_group, associated_conv_group));
+                    .push_front((vec_transform_group, associated_conv_group));
             }
         }
     }
@@ -277,7 +276,7 @@ impl<'p> ReverseRuleGroupAggregator<'p> {
         if let Some(conv_group) = self.preceding_conversion_group.take() {
             // a trailing conversion group in source order is the same as having a conversion
             // group as the first in-order group. we can just prepend an empty transform group.
-            self.groups.push_back((Vec::new(), conv_group));
+            self.groups.push_front((Vec::new(), conv_group));
         }
 
         self.groups.into() // non-allocating conversion
@@ -289,7 +288,7 @@ impl<'p> ReverseRuleGroupAggregator<'p> {
 enum ReverseRuleGroup<'p> {
     // because contiguous C's are aggregated in source-order, we can just use a Vec
     Conversion(Vec<UniConversionRule<'p>>),
-    // but contiguous T's are aggregated in reverse-order, so we need to use a VecDeque and push_back
+    // but contiguous T's are aggregated in reverse-order, so we need to use a VecDeque and push_front
     Transform(VecDeque<Cow<'p, parse::SingleId>>),
 }
 
@@ -306,7 +305,7 @@ impl<'p> ReverseRuleGroup<'p> {
 
     fn new_transform(rule: Cow<'p, parse::SingleId>) -> Self {
         let mut group = VecDeque::new();
-        group.push_back(rule);
+        group.push_front(rule);
         Self::Transform(group)
     }
 
@@ -319,8 +318,8 @@ impl<'p> ReverseRuleGroup<'p> {
             }
             (Self::Transform(group), UniRule::Transform(rule)) => {
                 // we receive rules via `push` in source-order, which is the opposite order we want,
-                // so we push_back.
-                group.push_back(rule);
+                // so we push_front.
+                group.push_front(rule);
                 None
             }
             (Self::Conversion(_), UniRule::Transform(new_rule)) => {

diff --git a/experimental/transliterator_parser/src/lib.rs b/experimental/transliterator_parser/src/lib.rs
@@ -128,3 +128,173 @@ where
     // TODO(#3736): pass direction from metadata
     compile::compile(parsed, parse::Direction::Both)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parse::UnicodeSet;
+    use icu_collections::codepointinvlist::CodePointInversionList;
+    use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
+    use icu_transliteration::provider as ds;
+    use zerofrom::ZeroFrom;
+
+    fn parse_set(source: &str) -> UnicodeSet {
+        icu_unicodeset_parser::parse_unstable(source, &icu_properties::provider::Baked)
+            .expect("Parsing failed")
+            .0
+    }
+
+    #[test]
+    fn test_source_to_struct() {
+        let source = r#"
+        :: [1] ;
+        :: Latin-InterIndic ;
+        $a = [a] [b]+ ;
+        $unused = [c{string}]+? ;
+        $b = $a? 'literal chars' ;
+        x } [a-z] > y ;
+        $a > ab ;
+        'reverse output:' &RevFnCall($1 'padding') < ($b) ;
+        ^ left } $ <> ^ { right } [0-9] $ ;
+        :: [\ ] Remove (AnyRev-AddRandomSpaces/FiftyPercent) ;
+        # splits up the forward rules
+        forward rule that > splits up rule groups ;
+        :: InterIndic-Devanagari ;
+        "#;
+
+        let (forward, reverse) = parse(source).expect("parsing failed");
+        let forward = forward.expect("forward transliterator expected");
+        let reverse = reverse.expect("reverse transliterator expected");
+
+        {
+            assert_eq!(&forward.filter, parse_set("[1]").code_points());
+
+            let vt = &forward.variable_table;
+            assert_eq!(vt.compounds.len(), 1);
+            assert_eq!(vt.quantifiers_opt.len(), 0);
+            assert_eq!(vt.quantifiers_kleene.len(), 0);
+            assert_eq!(vt.quantifiers_kleene_plus.len(), 1);
+            assert_eq!(vt.segments.len(), 0);
+            assert_eq!(vt.unicode_sets.len(), 3);
+            assert_eq!(vt.function_calls.len(), 0);
+
+            assert_eq!(&vt.compounds[0], "\u{F0003}\u{F0001}"); // [a] and [b]+ (the quantifier contains [b])
+            assert_eq!(&vt.quantifiers_kleene_plus[0], "\u{F0004}"); // [b] from [b]+
+            let uset1 = CodePointInversionListAndStringList::zero_from(&vt.unicode_sets[0]);
+            assert_eq!(uset1, parse_set("[a-z]"));
+            let uset2 = CodePointInversionListAndStringList::zero_from(&vt.unicode_sets[1]);
+            assert_eq!(uset2, parse_set("[a]"));
+            let uset3 = CodePointInversionListAndStringList::zero_from(&vt.unicode_sets[2]);
+            assert_eq!(uset3, parse_set("[b]"));
+
+            assert_eq!(forward.id_group_list.len(), 3);
+            assert_eq!(forward.rule_group_list.len(), 3);
+
+            assert_eq!(forward.id_group_list[0].len(), 1);
+            assert_eq!(forward.id_group_list[1].len(), 1);
+            assert_eq!(forward.id_group_list[2].len(), 1);
+
+            assert_eq!(forward.rule_group_list[0].len(), 3);
+            assert_eq!(forward.rule_group_list[1].len(), 1);
+            assert_eq!(forward.rule_group_list[2].len(), 0);
+
+            let rule1_1 = ds::Rule::zero_from(&forward.rule_group_list[0][0]);
+            assert_eq!(rule1_1.ante, "");
+            assert_eq!(rule1_1.key, "x");
+            assert_eq!(rule1_1.post, "\u{F0002}"); // [a-z]
+            assert_eq!(rule1_1.replacer, "y");
+
+            let rule1_2 = ds::Rule::zero_from(&forward.rule_group_list[0][1]);
+            assert_eq!(rule1_2.ante, "");
+            assert_eq!(rule1_2.key, "\u{F0000}"); // $a
+            assert_eq!(rule1_2.post, "");
+            assert_eq!(rule1_2.replacer, "ab");
+
+            let rule1_3 = ds::Rule::zero_from(&forward.rule_group_list[0][2]);
+            assert_eq!(rule1_3.ante, "");
+            assert_eq!(rule1_3.key, "\u{FFFFC}left"); // start anchor
+            assert_eq!(rule1_3.post, "\u{FFFFD}"); // end anchor
+            assert_eq!(rule1_3.replacer, "right");
+
+            let rule2_1 = ds::Rule::zero_from(&forward.rule_group_list[1][0]);
+            assert_eq!(rule2_1.ante, "");
+            assert_eq!(rule2_1.key, "forwardrulethat");
+            assert_eq!(rule2_1.post, "");
+            assert_eq!(rule2_1.replacer, "splitsuprulegroups");
+
+            let id1 = ds::SimpleId::zero_from(&forward.id_group_list[0][0]);
+            assert_eq!(id1.id, "Latin-InterIndic");
+            assert_eq!(id1.filter, CodePointInversionList::all());
+
+            let id2 = ds::SimpleId::zero_from(&forward.id_group_list[1][0]);
+            assert_eq!(id2.id, "Any-Remove");
+            assert_eq!(&id2.filter, parse_set(r"[\ ]").code_points());
+
+            let id3 = ds::SimpleId::zero_from(&forward.id_group_list[2][0]);
+            assert_eq!(id3.id, "InterIndic-Devanagari");
+            assert_eq!(id3.filter, CodePointInversionList::all());
+        }
+        {
+            assert_eq!(&reverse.filter, &CodePointInversionList::all());
+
+            let vt = &reverse.variable_table;
+            assert_eq!(vt.compounds.len(), 2); // base: \u{F0000}
+            assert_eq!(vt.quantifiers_opt.len(), 1); // base: \u{F0002}
+            assert_eq!(vt.quantifiers_kleene.len(), 0); // base: \u{F0003}
+            assert_eq!(vt.quantifiers_kleene_plus.len(), 1); // base: \u{F0003}
+            assert_eq!(vt.segments.len(), 1); // base: \u{F0004}
+            assert_eq!(vt.unicode_sets.len(), 3); // base: \u{F0005}
+            assert_eq!(vt.function_calls.len(), 1); // base: \u{F0008}
+                                                    // backref base: \u{F0009}
+
+            assert_eq!(&vt.compounds[0], "\u{F0005}\u{F0003}"); // $a = [a] [b]+ (quantifier contains [b])
+            assert_eq!(&vt.compounds[1], "\u{F0002}literal chars"); // $b = $a? (quantifier contains $a)
+            assert_eq!(&vt.quantifiers_opt[0], "\u{F0000}"); // $a from $a?
+            assert_eq!(&vt.quantifiers_kleene_plus[0], "\u{F0006}"); // [b] from [b]+
+            assert_eq!(&vt.segments[0], "\u{F0001}"); // $b from ($b)
+            let uset1 = CodePointInversionListAndStringList::zero_from(&vt.unicode_sets[0]);
+            assert_eq!(uset1, parse_set("[a]"));
+            let uset2 = CodePointInversionListAndStringList::zero_from(&vt.unicode_sets[1]);
+            assert_eq!(uset2, parse_set("[b]"));
+            let uset3 = CodePointInversionListAndStringList::zero_from(&vt.unicode_sets[2]);
+            assert_eq!(uset3, parse_set("[0-9]"));
+            let fcall = ds::FunctionCall::zero_from(&vt.function_calls[0]);
+            assert_eq!(fcall.translit.id, "Any-RevFnCall");
+            assert_eq!(fcall.translit.filter, CodePointInversionList::all());
+            assert_eq!(fcall.arg, "\u{F0009}padding"); // $1 and 'padding'
+
+            assert_eq!(reverse.id_group_list.len(), 2);
+            assert_eq!(reverse.rule_group_list.len(), 2);
+
+            assert_eq!(reverse.id_group_list[0].len(), 2);
+            assert_eq!(reverse.id_group_list[1].len(), 1);
+
+            assert_eq!(reverse.rule_group_list[0].len(), 2);
+            assert_eq!(reverse.rule_group_list[1].len(), 0);
+
+            let rule1_1 = ds::Rule::zero_from(&reverse.rule_group_list[0][0]);
+            assert_eq!(rule1_1.ante, "");
+            assert_eq!(rule1_1.key, "\u{F0004}");
+            assert_eq!(rule1_1.post, ""); // [a-z]
+            assert_eq!(rule1_1.replacer, "reverse output:\u{F0008}"); // function call
+
+            let rule1_2 = ds::Rule::zero_from(&reverse.rule_group_list[0][1]);
+            assert_eq!(rule1_2.ante, "\u{FFFFC}"); // start anchor
+            assert_eq!(rule1_2.key, "right");
+            assert_eq!(rule1_2.post, "\u{F0007}\u{FFFFD}"); // [0-9] and end anchor
+            assert_eq!(rule1_2.replacer, "left");
+
+            let id1_1 = ds::SimpleId::zero_from(&reverse.id_group_list[0][0]);
+            assert_eq!(id1_1.id, "Devanagari-InterIndic");
+            assert_eq!(id1_1.filter, CodePointInversionList::all());
+
+            let id1_2 = ds::SimpleId::zero_from(&reverse.id_group_list[0][1]);
+            assert_eq!(id1_2.id, "AnyRev-AddRandomSpaces/FiftyPercent");
+            assert_eq!(id1_2.filter, CodePointInversionList::all());
+
+            let id2_1 = ds::SimpleId::zero_from(&reverse.id_group_list[1][0]);
+            assert_eq!(id2_1.id, "InterIndic-Latin");
+            assert_eq!(id2_1.filter, CodePointInversionList::all());
+        }
+    }
+}