diff --git a/Cargo.lock b/Cargo.lock
index c051c4c7655..75e71ecdfa2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1942,6 +1942,18 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "icu_transliterator_parser"
+version = "0.0.0"
+dependencies = [
+ "icu_collections",
+ "icu_properties",
+ "icu_provider",
+ "icu_transliteration",
+ "icu_unicodeset_parser",
+ "log",
+]
+
 [[package]]
 name = "icu_unicodeset_parser"
 version = "0.0.0"
diff --git a/Cargo.toml b/Cargo.toml
index ae376a0c1c0..aab60c5e231 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,6 +43,7 @@ members = [
     "experimental/relativetime",
     "experimental/relativetime/data",
     "experimental/transliteration",
+    "experimental/transliterator_parser",
     "experimental/unicodeset_parser",
     "ffi/capi_cdylib",
     "ffi/capi_staticlib",
diff --git a/experimental/transliteration/Cargo.toml b/experimental/transliteration/Cargo.toml
index f6ba38b3023..eaee3ca49ef 100644
--- a/experimental/transliteration/Cargo.toml
+++ b/experimental/transliteration/Cargo.toml
@@ -31,4 +31,4 @@ icu_collections = { version = "1.2.0", path = "../../components/collections", fe
 serde = { version = "1.0", features = ["derive"] }
 zerovec = { version = "0.9.4", path = "../../utils/zerovec", features = ["derive"] }
 
-# TODO: Add serde, datagen, compiled_data features
\ No newline at end of file
+# TODO: Add serde, datagen, compiled_data features
diff --git a/experimental/transliterator_parser/Cargo.toml b/experimental/transliterator_parser/Cargo.toml
new file mode 100644
index 00000000000..e9243879ca0
--- /dev/null
+++ b/experimental/transliterator_parser/Cargo.toml
@@ -0,0 +1,37 @@
+# This file is part of ICU4X. For terms of use, please see the file
+# called LICENSE at the top level of the ICU4X source tree
+# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+[package]
+name = "icu_transliterator_parser"
+description = "API to parse transform rules into transliterators as defined in UTS35"
+version = "0.0.0"
+authors = ["The ICU4X Project Developers"]
+edition = "2021"
+readme = "README.md"
+repository = "https://github.com/unicode-org/icu4x"
+license = "Unicode-DFS-2016"
+categories = ["internationalization"]
+# Keep this in sync with other crates unless there are exceptions
+include = [
+    "src/**/*",
+    "tests/**/*",
+    "Cargo.toml",
+    "LICENSE",
+    "README.md"
+]
+
+[package.metadata.docs.rs]
+all-features = true
+
+[dependencies]
+icu_collections = { path = "../../components/collections" }
+icu_properties = { path = "../../components/properties", default-features = false }
+icu_provider = { path = "../../provider/core" }
+icu_unicodeset_parser = { path = "../unicodeset_parser" }
+icu_transliteration = { path = "../transliteration" }
+
+log = "0.4"
+
+[features]
+compiled_data = ["icu_properties/compiled_data"]
diff --git a/experimental/transliterator_parser/LICENSE b/experimental/transliterator_parser/LICENSE
new file mode 100644
index 00000000000..9858d01abf5
--- /dev/null
+++ b/experimental/transliterator_parser/LICENSE
@@ -0,0 +1,51 @@
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+
+See Terms of Use <https://www.unicode.org/copyright.html>
+for definitions of Unicode Inc.’s Data Files and Software.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2022 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/experimental/transliterator_parser/README.md b/experimental/transliterator_parser/README.md
new file mode 100644
index 00000000000..cef9a94d048
--- /dev/null
+++ b/experimental/transliterator_parser/README.md
@@ -0,0 +1,13 @@
+# icu_transliterator_parser [![crates.io](https://img.shields.io/crates/v/icu_transliterator_parser)](https://crates.io/crates/icu_transliterator_parser)
+
+`icu_transliterator_parser` is a utility crate of the [`ICU4X`] project.
+
+This crate provides parsing functionality for [UTS #35 - Transliterators](https://unicode.org/reports/tr35/tr35-general.html#Transforms).
+
+See [`parse`](crate::parse()) for more information.
+
+[`ICU4X`]: ../icu/index.html
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/experimental/transliterator_parser/src/compile.rs b/experimental/transliterator_parser/src/compile.rs
new file mode 100644
index 00000000000..025ff2e8d8b
--- /dev/null
+++ b/experimental/transliterator_parser/src/compile.rs
@@ -0,0 +1,1244 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! This module has three main functions. First, it validates many aspects of transliterators.
+//! Second, it compiles them into the zero-copy data struct defined in `icu_transliteration`. Third,
+//! it computes the dependencies of the transliterator.
+//! It is responsible for both directions of a source file, but the rest of this documentation
+//! assumes a single direction. The process is simply repeated for the other direction.
+//!
+//! # Terminology
+//! * The "direction" of a rule: Whether a rule is _forward_ (left-to-right in the source) or
+//!   _reverse_ (right-to-left in the source). At runtime, clients will apply a transliterator
+//!   in one direction. The transliterator `a <> b` replaces `a` with `b` in the forward direction,
+//!   and `b` with `a` in the reverse direction.
+//! * The "side" of a rule: A rule has a _source_ and a _target_ side. The source is replaced
+//!   with the target. If we're looking at a definition of a transliterator in the _forward_
+//!   direction, the source is on the left and the target is on the right, and vice versa for
+//!   the _reverse_ direction.
+//! * "Special matchers" are non-literal items that can appear on the source side of a rule.
+//!   This includes, e.g., UnicodeSets and quantifiers.
+//! * "Special replacers" are non-literal items that can appear on the target side of a rule.
+//!   This includes, e.g., function calls and back references.
+//! * "Special constructs" are just any non-literal rule item.
+//!
+//! # Conversion rule encoding
+//!
+//! Conversion rules are encoded using `str`s, and private use code points are used to represent
+//! the special constructs that can appear in a conversion rule (UnicodeSets, quantifiers, ...).
+//! This works as follows:
+//! * We use PUP (15), code points U+F0000 to U+FFFFD (inclusive)
+//! * A private use code point simply encodes an integer, obtained by subtracting U+F0000 from it
+//! * The integer is used as an index into `VarTable`
+//! * As a `VarTable` has multiple `VarZeroVec`s (one for each special construct), an index
+//!   overflows into the following `VZV`s:
+//!    * An index of `vzv1.len() + vzv2.len() + 4` indexes the third `VZV` at index 4
+//! * Thus, if the length of an earlier `VZV` changes, the index of an element in a later `VZV`
+//!   will change, and its private use encoding will change
+//! * Therefore we must know the number of elements of each `VZV` before we can start encoding
+//!   conversion rules into `str`s.
+//!
+//! # Passes
+//!
+//! This module works by performing multiple passes over the rules.
+//!
+//! ## Pass 1
+//! General validation of the rules and computation of the lengths of the `VZV`s in the `VarTable`.
+//!
+//! Only special constructs for the current direction contribute to the `VZV` lengths,
+//! i.e., the rule `a <> [a-z] { b` will not increment the size of the
+//! `VZV` for UnicodeSets if the current direction is `forward`, but it will if the current
+//! direction is `reverse` (this is because contexts on the target side of a rule are ignored).
+//!
+//! Similarly, only recursive transliterators and variables actually used for this direction are
+//! accounted for.
+//!
+//! ## Pass 2
+//! Encoding of the zero-copy data struct.
+//!
+//! To encode conversion rules into `str`s, we use the previously described encoded `VarTable`
+//! indices. Because we know the lengths of each special construct list (in the form a `VZV`)
+//! from the first pass, we can store the offsets for each special construct list (i.e., the sum of
+//! the lengths of the previous lists) while encoding the conversion rules, and incrementing the
+//! offset of a given special construct when we encode an element. The precomputed lengths mean we
+//! never overflow into the indices of the following `VZV`.
+
+// more (data struct compatible) runtime optimization opportunities:
+// - deduplicate special constructs ($a = hello; $b = hello; should only generate one hello element)
+//   - especially important for equivalent unicodesets
+// - inline single-use variables ($a = x; $a > b; => x > b;)
+// - replace uses of single-element variables with the element itself ($a = [a-z]; $a > a; => [a-z] > a;)
+// - flatten single-element sets into literals ([a] > b; => a > b;)
+
+/*
+Encoding example:
+
+    $b = bc+ ;
+    $a = [a-z] $b ;
+    $a > ;
+
+b-data.counts: 1 compound (the definition itself), 1 quantifier plus (c+)
+b-data.used_vars: -
+
+a-data.counts: 1 compound (the definition itself), 1 unicodeset ([a-z])
+a-data.used_vars: b
+
+forward-data.counts: 0 (rules are inlined)
+forward-data.used_vars: a
+
+when collecting the counts (for forward) at the end, we sum over all counts of the transitive
+dependencies of forward (using used_vars), and add the counts of forward itself.
+we also compute the transitive closure of used variables.
+this gives us the `Pass1Result`:
+forward-data.counts: 2 compound, 1 quantifier plus, 1 unicodeset
+forward-data.used_vars: a, b
+
+this `Pass1Result` we give to Pass2, which will produce something like this:
+(note that the integer-indexed maps shown here are only semantic, in actuality the indices are implicit,
+as described in the zero-copy format, and the maps here are just arrays)
+
+    VarTable {
+        compounds: {
+            0: "b<2>", // b's definition, bc+
+            1: "<3><0>", // a's definition, [a-z] $b
+        },
+        quantifier_kleene_plus: {
+            2: "c", // c+
+        },
+        unicode_sets: {
+            3: <the set of a..z>, // [a-z]
+        }
+    }
+    Rules: [
+        {
+            source: "<1>", // $a
+            target: "",
+        }
+    ]
+*/
+
+use crate::parse;
+use crate::parse::{ElementLocation as EL, HalfRule, QuantifierKind};
+use parse::Result;
+use parse::PEK;
+use std::collections::{HashMap, HashSet};
+
+enum SingleDirection {
+    Forward,
+    Reverse,
+}
+
+/// The number of elements for each `VZV` in the `VarTable`.
+#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
+struct SpecialConstructCounts {
+    num_compounds: usize,
+    num_quantifiers_opt: usize,
+    num_quantifiers_kleene: usize,
+    num_quantifiers_kleene_plus: usize,
+    num_segments: usize,
+    num_unicode_sets: usize,
+    num_function_calls: usize,
+}
+
+// Data for a given direction or variable definition (the "key")
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+struct Pass1Data {
+    counts: SpecialConstructCounts,
+    // the variables used by the associated key
+    used_variables: HashSet<String>,
+    // the recursive transliterators used by the associated key
+    used_transliterators: HashSet<parse::BasicId>,
+}
+
+#[allow(unused)] // TODO: remove annotation
+#[derive(Debug, Clone)]
+struct Pass1Result<'p> {
+    // data with dependencies resolved and counts summed
+    forward_data: Pass1Data,
+    reverse_data: Pass1Data,
+    variable_definitions: HashMap<String, &'p [parse::Element]>,
+}
+
+/// Responsible for the first pass as described in the module-level documentation.
+#[derive(Debug, Clone)]
+struct Pass1<'p> {
+    direction: parse::Direction,
+    // data for *direct* dependencies
+    forward_data: Pass1Data,
+    reverse_data: Pass1Data,
+    variable_data: HashMap<String, Pass1Data>,
+    variable_definitions: HashMap<String, &'p [parse::Element]>,
+    // variables which contain constructs that are only allowed to appear on the source side
+    // e.g., $a = c+; $set = [a-z]; ...
+    target_disallowed_variables: HashSet<String>,
+}
+
+impl<'p> Pass1<'p> {
+    fn new(direction: parse::Direction) -> Self {
+        Self {
+            direction,
+            forward_data: Pass1Data::default(),
+            reverse_data: Pass1Data::default(),
+            variable_data: HashMap::new(),
+            variable_definitions: HashMap::new(),
+            target_disallowed_variables: HashSet::new(),
+        }
+    }
+
+    fn run(&mut self, rules: &'p [parse::Rule]) -> Result<Pass1Result<'p>> {
+        // first check global filter/global inverse filter.
+        // after this check, they may not appear anywhere.
+        let rules = self.validate_global_filters(rules)?;
+
+        // iterate through remaining rules and perform checks according to interim specification
+
+        for rule in rules {
+            match rule {
+                parse::Rule::GlobalFilter(_) | parse::Rule::GlobalInverseFilter(_) => {
+                    // the previous step ensures `rules` has no more global filters
+                    return Err(PEK::UnexpectedGlobalFilter.into());
+                }
+                parse::Rule::Transform(forward_id, reverse_id) => {
+                    self.validate_transform(forward_id, reverse_id.as_ref())?;
+                }
+                parse::Rule::VariableDefinition(name, definition) => {
+                    self.validate_variable_definition(name, definition)?;
+                }
+                parse::Rule::Conversion(hr1, dir, hr2) => {
+                    self.validate_conversion(hr1, *dir, hr2)?;
+                }
+            }
+        }
+
+        Pass1ResultGenerator::generate(self)
+    }
+
+    fn validate_global_filters<'a>(&self, rules: &'a [parse::Rule]) -> Result<&'a [parse::Rule]> {
+        let rules = match rules {
+            [parse::Rule::GlobalFilter(filter), rest @ ..] => {
+                if filter.has_strings() {
+                    return Err(PEK::GlobalFilterWithStrings.into());
+                }
+
+                rest
+            }
+            _ => rules,
+        };
+        let rules = match rules {
+            [rest @ .., parse::Rule::GlobalInverseFilter(filter)] => {
+                if filter.has_strings() {
+                    return Err(PEK::GlobalFilterWithStrings.into());
+                }
+
+                rest
+            }
+            _ => rules,
+        };
+
+        Ok(rules)
+    }
+
+    fn validate_transform(
+        &mut self,
+        forward_id: &parse::SingleId,
+        reverse_id: Option<&parse::SingleId>,
+    ) -> Result<()> {
+        let fwd_dep = forward_id.basic_id.clone();
+        if !fwd_dep.is_null() {
+            self.forward_data.used_transliterators.insert(fwd_dep);
+        }
+        let rev_dep = reverse_id
+            .map(|single_id| single_id.basic_id.clone())
+            .unwrap_or_else(|| forward_id.basic_id.clone().reverse());
+        if !rev_dep.is_null() {
+            self.reverse_data.used_transliterators.insert(rev_dep);
+        }
+        Ok(())
+    }
+
+    fn validate_variable_definition(
+        &mut self,
+        name: &String,
+        definition: &'p [parse::Element],
+    ) -> Result<()> {
+        if self.variable_definitions.contains_key(name) {
+            return Err(PEK::DuplicateVariable.into());
+        }
+        self.variable_definitions.insert(name.clone(), definition);
+
+        let mut data = Pass1Data::default();
+        // the variable definition itself is counted here
+        data.counts.num_compounds = 1;
+
+        let mut validator = VariableDefinitionValidator::new(
+            |s| self.variable_definitions.contains_key(s),
+            &mut data,
+            &self.target_disallowed_variables,
+            definition,
+        );
+        validator.validate()?;
+        if validator.used_target_disallowed_construct {
+            self.target_disallowed_variables.insert(name.clone());
+        }
+
+        self.variable_data.insert(name.clone(), data);
+
+        Ok(())
+    }
+
+    fn validate_conversion(
+        &mut self,
+        source: &HalfRule,
+        dir: parse::Direction,
+        target: &HalfRule,
+    ) -> Result<()> {
+        // TODO(#3736): include source location/actual source text in these logs
+        if !self.direction.permits(dir) {
+            // example: metadata defines this transliterator as forward, but a `<>` or `<` rule is found.
+            log::warn!(
+                "metadata for transliterator specifies direction {:?} but conversion rule specifies {:?}",
+                self.direction,
+                dir,
+            );
+        }
+        // logging for useless contexts
+        if dir == parse::Direction::Forward && (!target.ante.is_empty() || !target.post.is_empty())
+        {
+            log::warn!("forward conversion rule has ignored context on target side");
+        }
+        if dir == parse::Direction::Reverse && (!source.ante.is_empty() || !source.post.is_empty())
+        {
+            log::warn!("reverse conversion rule has ignored context on target side");
+        }
+
+        if self.direction.permits(parse::Direction::Forward)
+            && dir.permits(parse::Direction::Forward)
+        {
+            self.validate_conversion_one_direction(source, target, SingleDirection::Forward)?;
+        }
+        if self.direction.permits(parse::Direction::Reverse)
+            && dir.permits(parse::Direction::Reverse)
+        {
+            self.validate_conversion_one_direction(target, source, SingleDirection::Reverse)?;
+        }
+
+        Ok(())
+    }
+
+    fn validate_conversion_one_direction(
+        &mut self,
+        source: &HalfRule,
+        target: &HalfRule,
+        dir: SingleDirection,
+    ) -> Result<()> {
+        let data = match dir {
+            SingleDirection::Forward => &mut self.forward_data,
+            SingleDirection::Reverse => &mut self.reverse_data,
+        };
+        let mut source_validator = SourceValidator::new(
+            |s| self.variable_definitions.contains_key(s),
+            data,
+            &source.ante,
+            &source.key,
+            &source.post,
+        );
+        source_validator.validate()?;
+        let num_source_segments = source_validator.num_segments;
+
+        let mut target_validator = TargetValidator::new(
+            |s| self.variable_definitions.contains_key(s),
+            &mut self.target_disallowed_variables,
+            data,
+            &target.key,
+            num_source_segments,
+        );
+        target_validator.validate()?;
+
+        Ok(())
+    }
+}
+
+struct SourceValidator<'a, 'p, F: Fn(&str) -> bool> {
+    is_variable_defined: F,
+    data: &'a mut Pass1Data,
+    ante: &'p [parse::Element],
+    key: &'p [parse::Element],
+    post: &'p [parse::Element],
+    // the number of segments this rule defines. consumed by TargetValidator.
+    num_segments: u32,
+}
+
+/// Validates the source side of a rule.
+///
+/// Ensures that only special constructs that may appear on the source side of a rule are used.
+/// Also validates certain other source-side-only constraints, such as anchors needing to be at the
+/// beginning or end of the rule.
+impl<'a, 'p, F: Fn(&str) -> bool> SourceValidator<'a, 'p, F> {
+    fn new(
+        is_variable_defined: F,
+        data: &'a mut Pass1Data,
+        ante: &'p [parse::Element],
+        key: &'p [parse::Element],
+        post: &'p [parse::Element],
+    ) -> Self {
+        Self {
+            is_variable_defined,
+            data,
+            ante,
+            key,
+            post,
+            num_segments: 0,
+        }
+    }
+
+    fn validate(&mut self) -> Result<()> {
+        // first validate position of ^ and $ anchors, if they exist
+        // ^: if ante is non-empty, must be its first element, otherwise must be first element of key
+        // $: if post is non-empty, must be its last element, otherwise must be last element of key
+
+        let sections = [self.ante, self.key, self.post];
+        // split off first element if it is a start anchor
+        let sections = match sections {
+            [[parse::Element::AnchorStart, ante @ ..], key, post] => [ante, key, post],
+            [[], [parse::Element::AnchorStart, key @ ..], post] => [&[], key, post],
+            _ => sections,
+        };
+        // split off last element if it is an end anchor
+        let sections = match sections {
+            [ante, key, [post @ .., parse::Element::AnchorEnd]] => [ante, key, post],
+            [ante, [key @ .., parse::Element::AnchorEnd], []] => [ante, key, &[]],
+            _ => sections,
+        };
+
+        // now neither start nor end anchors may appear anywhere in `order`
+
+        sections
+            .iter()
+            .try_for_each(|s| self.validate_section(s, true))
+    }
+
+    fn validate_section(&mut self, section: &[parse::Element], top_level: bool) -> Result<()> {
+        section
+            .iter()
+            .try_for_each(|element| self.validate_element(element, top_level))
+    }
+
+    fn validate_element(&mut self, element: &parse::Element, top_level: bool) -> Result<()> {
+        match element {
+            parse::Element::Literal(_) => {}
+            parse::Element::VariableRef(name) => {
+                if !(self.is_variable_defined)(name) {
+                    return Err(PEK::UnknownVariable.into());
+                }
+                self.data.used_variables.insert(name.clone());
+            }
+            parse::Element::Quantifier(kind, inner) => {
+                self.validate_element(inner, false)?;
+                match *kind {
+                    QuantifierKind::ZeroOrOne => self.data.counts.num_quantifiers_opt += 1,
+                    QuantifierKind::ZeroOrMore => self.data.counts.num_quantifiers_kleene += 1,
+                    QuantifierKind::OneOrMore => self.data.counts.num_quantifiers_kleene_plus += 1,
+                }
+            }
+            parse::Element::Segment(inner) => {
+                self.validate_section(inner, false)?;
+                // increment the count for this specific rule
+                self.num_segments += 1;
+                // increment the count for this direction of the entire transliterator
+                self.data.counts.num_segments += 1;
+            }
+            parse::Element::UnicodeSet(_) => {
+                self.data.counts.num_unicode_sets += 1;
+            }
+            parse::Element::Cursor(_, _) => {
+                // while cursors have no effect on the source side, they may appear nonetheless
+                // TargetValidator validates these
+
+                // however, cursors are only allowed at the top level
+                if !top_level {
+                    return Err(PEK::InvalidCursor.into());
+                }
+            }
+            parse::Element::AnchorStart => {
+                // we check for these in `validate`
+                return Err(PEK::AnchorStartNotAtStart.into());
+            }
+            parse::Element::AnchorEnd => {
+                // we check for these in `validate`
+                return Err(PEK::AnchorEndNotAtEnd.into());
+            }
+            elt => {
+                return Err(PEK::UnexpectedElement(elt.kind(), EL::Source).into());
+            }
+        }
+        Ok(())
+    }
+}
+
+/// Validates the target side of a rule.
+///
+/// Ensures that only special constructs (including variables) that may appear on the target side
+/// of a rule are used. Also validates other target-side-only constraints, such as
+/// back references not being allowed to overflow and only one cursor being allowed.
+struct TargetValidator<'a, 'p, F: Fn(&str) -> bool> {
+    is_variable_defined: F,
+    target_disallowed_variables: &'a mut HashSet<String>,
+    data: &'a mut Pass1Data,
+    replacer: &'p [parse::Element],
+    // the number of segments defined on the corresponding source side. produced by SourceValidator
+    num_segments: u32,
+    // true if a cursor has already been encountered, i.e., any further cursors are disallowed
+    encountered_cursor: bool,
+}
+
+impl<'a, 'p, F: Fn(&str) -> bool> TargetValidator<'a, 'p, F> {
+    fn new(
+        is_variable_defined: F,
+        target_disallowed_variables: &'a mut HashSet<String>,
+        data: &'a mut Pass1Data,
+        replacer: &'p [parse::Element],
+        num_segments: u32,
+    ) -> Self {
+        Self {
+            is_variable_defined,
+            target_disallowed_variables,
+            data,
+            replacer,
+            num_segments,
+            encountered_cursor: false,
+        }
+    }
+
+    fn validate(&mut self) -> Result<()> {
+        let section = self.replacer;
+        // special case for a single cursor
+        let section = match section {
+            [parse::Element::Cursor(pre, post)] => {
+                self.encounter_cursor()?;
+                if *pre != 0 && *post != 0 {
+                    // corrseponds to `@@@|@@@`, i.e., placeholders on both sides of the cursor
+                    return Err(PEK::InvalidCursor.into());
+                }
+                return Ok(());
+            }
+            _ => section,
+        };
+        // strip |@@@ from beginning
+        let section = match section {
+            [parse::Element::Cursor(pre, _), rest @ ..] => {
+                self.encounter_cursor()?;
+                if *pre != 0 {
+                    // corrseponds to `@@@|...`, i.e., placeholders in front of the cursor
+                    return Err(PEK::InvalidCursor.into());
+                }
+                rest
+            }
+            _ => section,
+        };
+        // strip @@@| from end
+        let section = match section {
+            [rest @ .., parse::Element::Cursor(_, post)] => {
+                self.encounter_cursor()?;
+                if *post != 0 {
+                    // corrseponds to `...|@@@`, i.e., placeholders after the cursor
+                    return Err(PEK::InvalidCursor.into());
+                }
+                rest
+            }
+            _ => section,
+        };
+
+        self.validate_section(section, true)
+    }
+
+    fn validate_section(&mut self, section: &[parse::Element], top_level: bool) -> Result<()> {
+        section
+            .iter()
+            .try_for_each(|element| self.validate_element(element, top_level))
+    }
+
+    fn validate_element(&mut self, element: &parse::Element, top_level: bool) -> Result<()> {
+        match element {
+            parse::Element::Literal(_) => {}
+            parse::Element::VariableRef(name) => {
+                if !(self.is_variable_defined)(name) {
+                    return Err(PEK::UnknownVariable.into());
+                }
+                if self.target_disallowed_variables.contains(name) {
+                    return Err(PEK::SourceOnlyVariable.into());
+                }
+                self.data.used_variables.insert(name.clone());
+            }
+            parse::Element::BackRef(num) => {
+                if *num > self.num_segments {
+                    return Err(PEK::BackReferenceOutOfRange.into());
+                }
+            }
+            parse::Element::FunctionCall(id, inner) => {
+                self.validate_section(inner, false)?;
+                self.data.used_transliterators.insert(id.basic_id.clone());
+                self.data.counts.num_function_calls += 1;
+            }
+            parse::Element::Cursor(pre, post) => {
+                self.encounter_cursor()?;
+                if !top_level || *pre != 0 || *post != 0 {
+                    // pre and post must be 0 if the cursor does not appear at the very beginning or the very end
+                    // we account for the beginning or the end in `validate`.
+                    return Err(PEK::InvalidCursor.into());
+                }
+            }
+            parse::Element::AnchorStart => {
+                // while anchors have no effect on the target side, they may still appear
+            }
+            parse::Element::AnchorEnd => {
+                // while anchors have no effect on the target side, they may still appear
+            }
+            elt => {
+                return Err(PEK::UnexpectedElement(elt.kind(), EL::Target).into());
+            }
+        }
+        Ok(())
+    }
+
+    fn encounter_cursor(&mut self) -> Result<()> {
+        if self.encountered_cursor {
+            return Err(PEK::DuplicateCursor.into());
+        }
+        self.encountered_cursor = true;
+        Ok(())
+    }
+}
+
+/// Validates variable definitions.
+///
+/// This checks that only a limited subset of special constructs appear in a variable's definition.
+/// For example, segments, back references, cursors, anchors, and function calls are not allowed.
+///
+/// It also propagates information about whether a variable may appear on the target side of a rule,
+/// as variables are in general allowed on the target side, but only if they only contain
+/// special constructs that are allowed to appear on the target side.
+struct VariableDefinitionValidator<'a, 'p, F: Fn(&str) -> bool> {
+    is_variable_defined: F,
+    target_disallowed_variables: &'a HashSet<String>,
+    data: &'a mut Pass1Data,
+    definition: &'p [parse::Element],
+    used_target_disallowed_construct: bool,
+}
+
+impl<'a, 'p, F: Fn(&str) -> bool> VariableDefinitionValidator<'a, 'p, F> {
+    fn new(
+        is_variable_defined: F,
+        data: &'a mut Pass1Data,
+        target_disallowed_variables: &'a HashSet<String>,
+        definition: &'p [parse::Element],
+    ) -> Self {
+        Self {
+            is_variable_defined,
+            data,
+            target_disallowed_variables,
+            definition,
+            used_target_disallowed_construct: false,
+        }
+    }
+
+    fn validate(&mut self) -> Result<()> {
+        self.validate_section(self.definition)
+    }
+
+    fn validate_section(&mut self, section: &[parse::Element]) -> Result<()> {
+        section
+            .iter()
+            .try_for_each(|element| self.validate_element(element))
+    }
+
+    fn validate_element(&mut self, element: &parse::Element) -> Result<()> {
+        match element {
+            parse::Element::Literal(_) => {}
+            parse::Element::VariableRef(name) => {
+                if !(self.is_variable_defined)(name) {
+                    return Err(PEK::UnknownVariable.into());
+                }
+                if self.target_disallowed_variables.contains(name) {
+                    self.used_target_disallowed_construct = true;
+                }
+                self.data.used_variables.insert(name.clone());
+            }
+            parse::Element::Quantifier(kind, inner) => {
+                self.used_target_disallowed_construct = true;
+                match *kind {
+                    QuantifierKind::ZeroOrOne => self.data.counts.num_quantifiers_opt += 1,
+                    QuantifierKind::ZeroOrMore => self.data.counts.num_quantifiers_kleene += 1,
+                    QuantifierKind::OneOrMore => self.data.counts.num_quantifiers_kleene_plus += 1,
+                }
+                self.validate_element(inner)?;
+            }
+            parse::Element::UnicodeSet(_) => {
+                self.used_target_disallowed_construct = true;
+                self.data.counts.num_unicode_sets += 1;
+            }
+            elt => {
+                return Err(PEK::UnexpectedElement(elt.kind(), EL::VariableDefinition).into());
+            }
+        }
+        Ok(())
+    }
+}
+
+// TODO(#3736): Think about adding a fourth Validator here that is run for
+//  all conversion rules in full (i.e., all contexts and the direction of the rule is part of the API)
+//  that checks for edge cases that are difficult to validate otherwise:
+//  - cursors (could move functionality from TargetValidator here too, but this is mostly intended for:
+//    - any cursors on the source side for unidirectional rules
+//    - any cursors in contexts)
+//  - anchors (could move functionality from SourceValidator here too, but this is mostly intended for:
+//    - anchors on the target side for unidirectional rules
+//  - contexts on the target side for unidirectional rules (still need to discuss what exactly, could be disallowed
+//    completely or just disallow target-only matchers (backrefs, function calls))
+//  as part of this, it should also be decided whether these edge cases are full-blown errors or
+//  merely logged warnings.
+
+struct Pass1ResultGenerator<'a, 'p> {
+    pass: &'a Pass1<'p>,
+    // for cycle-detection
+    current_vars: HashSet<String>,
+    transitive_var_dependencies: HashMap<String, HashSet<String>>,
+}
+
+impl<'a, 'p> Pass1ResultGenerator<'a, 'p> {
+    fn generate(pass: &'a Pass1<'p>) -> Result<Pass1Result<'p>> {
+        let mut generator = Self {
+            pass,
+            current_vars: HashSet::new(),
+            transitive_var_dependencies: HashMap::new(),
+        };
+        generator.generate_result()
+    }
+
+    fn generate_result(&mut self) -> Result<Pass1Result<'p>> {
+        // the result for a given direction is computed by first computing the transitive
+        // used variables for each direction, then using that data summing over the
+        // special construct counts, and at last filtering the variable definitions based on
+        // the used variables in either direction.
+
+        let forward_data = self.generate_result_one_direction(&self.pass.forward_data)?;
+        let reverse_data = self.generate_result_one_direction(&self.pass.reverse_data)?;
+
+        let variable_definitions = self
+            .pass
+            .variable_definitions
+            .iter()
+            .filter(|&(var, _)| {
+                forward_data.used_variables.contains(var)
+                    || reverse_data.used_variables.contains(var)
+            })
+            .map(|(var, def)| (var.clone(), *def))
+            .collect();
+
+        Ok(Pass1Result {
+            forward_data,
+            reverse_data,
+            variable_definitions,
+        })
+    }
+
+    fn generate_result_one_direction(&mut self, seed_data: &Pass1Data) -> Result<Pass1Data> {
+        let seed_vars = &seed_data.used_variables;
+        let seed_transliterators = &seed_data.used_transliterators;
+
+        let mut used_variables = seed_vars.clone();
+        for var in seed_vars {
+            self.visit_var(var)?;
+            #[allow(clippy::indexing_slicing)] // an non-error `visit_var` ensures this exists
+            let deps = self.transitive_var_dependencies[var].clone();
+            used_variables.extend(deps);
+        }
+
+        // if in the future variables are ever allowed to contain, e.g., function calls, this
+        // will need to take into account recursive dependencies from `used_vars` as well
+        let used_transliterators = seed_transliterators.clone();
+
+        let counts = used_variables
+            .iter()
+            .try_fold(seed_data.counts, |mut counts, var| {
+                // we check for unknown variables during the first pass, so these should exist
+                let var_data = self.pass.variable_data.get(var).ok_or(PEK::Internal)?;
+                counts.num_compounds += var_data.counts.num_compounds;
+                counts.num_segments += var_data.counts.num_segments;
+                counts.num_quantifiers_opt += var_data.counts.num_quantifiers_opt;
+                counts.num_quantifiers_kleene += var_data.counts.num_quantifiers_kleene;
+                counts.num_quantifiers_kleene_plus += var_data.counts.num_quantifiers_kleene_plus;
+                counts.num_unicode_sets += var_data.counts.num_unicode_sets;
+                counts.num_function_calls += var_data.counts.num_function_calls;
+
+                Ok::<_, crate::ParseError>(counts)
+            })?;
+
+        Ok(Pass1Data {
+            used_transliterators,
+            used_variables,
+            counts,
+        })
+    }
+
+    fn visit_var(&mut self, name: &str) -> Result<()> {
+        if self.transitive_var_dependencies.contains_key(name) {
+            return Ok(());
+        }
+        if self.current_vars.contains(name) {
+            // cyclic dependency - should not occur
+            return Err(PEK::Internal.into());
+        }
+        self.current_vars.insert(name.to_owned());
+        // we check for unknown variables during the first pass, so these should exist
+        let var_data = self.pass.variable_data.get(name).ok_or(PEK::Internal)?;
+        let mut transitive_dependencies = var_data.used_variables.clone();
+        var_data.used_variables.iter().try_for_each(|var| {
+            self.visit_var(var)?;
+            #[allow(clippy::indexing_slicing)] // an non-error `visit_var` ensures this exists
+            let deps = self.transitive_var_dependencies[var].clone();
+            transitive_dependencies.extend(deps);
+
+            Ok::<_, crate::ParseError>(())
+        })?;
+        self.current_vars.remove(name);
+        self.transitive_var_dependencies
+            .insert(name.to_owned(), transitive_dependencies);
+        Ok(())
+    }
+}
+
+pub(crate) fn compile(
+    rules: Vec<parse::Rule>,
+    direction: parse::Direction,
+) -> Result<icu_transliteration::provider::RuleBasedTransliterator<'static>> {
+    // TODO(#3736): decide if validation should be metadata-direction dependent
+    //  example: transliterator with metadata-direction "forward", and a rule `[a-z] < b ;` (invalid)
+    //  - if validation is dependent, this rule is valid because it's not used in the forward direction
+    //  - if validation is independent, this rule is invalid because the reverse direction is also checked
+    let mut pass1 = Pass1::new(direction);
+    let _result = pass1.run(&rules)?;
+
+    todo!()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::ops::Deref;
+
+    enum ExpectedOutcome {
+        Pass,
+        Fail,
+    }
+    use ExpectedOutcome::*;
+
+    const BOTH: parse::Direction = parse::Direction::Both;
+
+    fn parse(s: &str) -> Vec<parse::Rule> {
+        match parse::parse(s) {
+            Ok(rules) => rules,
+            Err(e) => panic!("unexpected error parsing rules {s:?}: {:?}", e),
+        }
+    }
+
+    fn pass1data_from_parts(
+        translit_deps: &[(&'static str, &'static str, &'static str)],
+        var_deps: &[&'static str],
+        counts: SpecialConstructCounts,
+    ) -> Pass1Data {
+        let mut data = Pass1Data {
+            counts,
+            ..Default::default()
+        };
+        for &(source, target, variant) in translit_deps {
+            data.used_transliterators.insert(parse::BasicId {
+                source: source.into(),
+                target: target.into(),
+                variant: variant.into(),
+            });
+        }
+        for &var in var_deps {
+            data.used_variables.insert(var.into());
+        }
+        data
+    }
+
+    #[test]
+    fn test_pass1_computed_data() {
+        let source = r"
+        :: [a-z] ;
+        $used_both = [a-z] ; # only transitively used by reverse direction
+        $used_rev = $used_both $used_both+ ;
+        $unused = a+ b+ .? $used_both $used_rev ; # unused
+        $unused2 = $unused ; # unused
+        :: [:L:] Bidi-Dependency/One ;
+        $used_fwd = [just a set] ;
+        ($used_both [a-z]) > &[a-z] Forward-Dependency($1) ;
+        $used_fwd > ;
+        < $used_rev+? ;
+
+        $literal1 = a ;
+        $literal2 = b ;
+        $literal1 <> $literal2 ;
+        :: AnotherForwardDependency () ;
+        :: ([set] Backward-Dependency) ;
+        :: YetAnother-ForwardDependency (AnotherBackwardDependency) ;
+        &Many(&Backwardz(&Deps($2))) < (a(bc)d)+ ;
+
+        :: ([a-z]) ;
+        ";
+
+        let rules = parse(source);
+        let mut pass1 = Pass1::new(BOTH);
+        let result = pass1.run(&rules).expect("pass1 failed");
+
+        {
+            // forward
+            let counts = SpecialConstructCounts {
+                num_segments: 1,
+                num_function_calls: 1,
+                num_unicode_sets: 1,
+                ..Default::default()
+            };
+            let expected_fwd_data = pass1data_from_parts(
+                &[
+                    ("Bidi", "Dependency", "One"),
+                    ("Forward", "Dependency", ""),
+                    ("Any", "AnotherForwardDependency", ""),
+                    ("YetAnother", "ForwardDependency", ""),
+                ],
+                &["used_both", "used_fwd", "literal1", "literal2"],
+                counts,
+            );
+            assert_eq!(expected_fwd_data, pass1.forward_data);
+        }
+        {
+            // reverse
+            let counts = SpecialConstructCounts {
+                num_quantifiers_opt: 1,
+                num_quantifiers_kleene_plus: 2,
+                num_segments: 2,
+                num_function_calls: 3,
+                ..Default::default()
+            };
+            let expected_rev_data = pass1data_from_parts(
+                &[
+                    ("Dependency", "Bidi", "One"),
+                    ("Backward", "Dependency", ""),
+                    ("Any", "AnotherBackwardDependency", ""),
+                    ("Any", "Many", ""),
+                    ("Any", "Backwardz", ""),
+                    ("Any", "Deps", ""),
+                ],
+                &["used_rev", "literal1", "literal2"],
+                counts,
+            );
+            assert_eq!(expected_rev_data, pass1.reverse_data);
+        }
+        {
+            // $used_both
+            let counts = SpecialConstructCounts {
+                num_compounds: 1,
+                num_unicode_sets: 1,
+                ..Default::default()
+            };
+            let expected_data = pass1data_from_parts(&[], &[], counts);
+            assert_eq!(expected_data, pass1.variable_data["used_both"]);
+        }
+        {
+            // $used_rev
+            let counts = SpecialConstructCounts {
+                num_compounds: 1,
+                num_quantifiers_kleene_plus: 1,
+                ..Default::default()
+            };
+            let expected_data = pass1data_from_parts(&[], &["used_both"], counts);
+            assert_eq!(expected_data, pass1.variable_data["used_rev"]);
+        }
+        {
+            // $unused
+            let counts = SpecialConstructCounts {
+                num_compounds: 1,
+                num_unicode_sets: 1,
+                num_quantifiers_opt: 1,
+                num_quantifiers_kleene_plus: 2,
+                ..Default::default()
+            };
+            let expected_data = pass1data_from_parts(&[], &["used_both", "used_rev"], counts);
+            assert_eq!(expected_data, pass1.variable_data["unused"]);
+        }
+        {
+            // $unused2
+            let counts = SpecialConstructCounts {
+                num_compounds: 1,
+                ..Default::default()
+            };
+            let expected_data = pass1data_from_parts(&[], &["unused"], counts);
+            assert_eq!(expected_data, pass1.variable_data["unused2"]);
+        }
+        {
+            // $used_both
+            let counts = SpecialConstructCounts {
+                num_compounds: 1,
+                num_unicode_sets: 1,
+                ..Default::default()
+            };
+            let expected_data = pass1data_from_parts(&[], &[], counts);
+            assert_eq!(expected_data, pass1.variable_data["used_both"]);
+        }
+        {
+            // $used_fwd
+            let counts = SpecialConstructCounts {
+                num_compounds: 1,
+                num_unicode_sets: 1,
+                ..Default::default()
+            };
+            let expected_data = pass1data_from_parts(&[], &[], counts);
+            assert_eq!(expected_data, pass1.variable_data["used_fwd"]);
+        }
+        {
+            // $literal1
+            let counts = SpecialConstructCounts {
+                num_compounds: 1,
+                ..Default::default()
+            };
+            let expected_data = pass1data_from_parts(&[], &[], counts);
+            assert_eq!(expected_data, pass1.variable_data["literal1"]);
+        }
+        {
+            // $literal2
+            let counts = SpecialConstructCounts {
+                num_compounds: 1,
+                ..Default::default()
+            };
+            let expected_data = pass1data_from_parts(&[], &[], counts);
+            assert_eq!(expected_data, pass1.variable_data["literal2"]);
+        }
+        {
+            let vars_with_data: HashSet<_> = pass1.variable_data.keys().map(Deref::deref).collect();
+            let expected_vars_with_data = HashSet::from([
+                "used_both",
+                "used_rev",
+                "unused",
+                "unused2",
+                "used_fwd",
+                "literal1",
+                "literal2",
+            ]);
+            assert_eq!(expected_vars_with_data, vars_with_data);
+        }
+        {
+            // check aggregated Pass1Result
+            let fwd_counts = SpecialConstructCounts {
+                num_compounds: 4,
+                num_unicode_sets: 3,
+                num_function_calls: 1,
+                num_segments: 1,
+                ..Default::default()
+            };
+            let fwd_data = pass1data_from_parts(
+                &[
+                    ("Bidi", "Dependency", "One"),
+                    ("Forward", "Dependency", ""),
+                    ("Any", "AnotherForwardDependency", ""),
+                    ("YetAnother", "ForwardDependency", ""),
+                ],
+                &["used_both", "used_fwd", "literal1", "literal2"],
+                fwd_counts,
+            );
+
+            let rev_counts = SpecialConstructCounts {
+                num_compounds: 4,
+                num_unicode_sets: 1,
+                num_quantifiers_kleene_plus: 3,
+                num_quantifiers_opt: 1,
+                num_segments: 2,
+                num_function_calls: 3,
+                ..Default::default()
+            };
+            let rev_data = pass1data_from_parts(
+                &[
+                    ("Dependency", "Bidi", "One"),
+                    ("Backward", "Dependency", ""),
+                    ("Any", "AnotherBackwardDependency", ""),
+                    ("Any", "Many", ""),
+                    ("Any", "Backwardz", ""),
+                    ("Any", "Deps", ""),
+                ],
+                &["used_both", "used_rev", "literal1", "literal2"],
+                rev_counts,
+            );
+
+            assert_eq!(fwd_data, result.forward_data);
+            assert_eq!(rev_data, result.reverse_data);
+
+            let actual_definition_keys: HashSet<_> = result
+                .variable_definitions
+                .keys()
+                .map(Deref::deref)
+                .collect();
+            let expected_definition_keys =
+                HashSet::from(["used_both", "used_fwd", "used_rev", "literal1", "literal2"]);
+            assert_eq!(expected_definition_keys, actual_definition_keys);
+        }
+    }
+
+    #[test]
+    fn test_pass1_validate_conversion() {
+        let sources = [
+            // anchor start must be at the beginning
+            (Pass, r"^ a > ;"),
+            (Pass, r"^ a > ^ b;"),
+            (Pass, r"^ a < ^ b;"),
+            (Pass, r"^ a <> ^ b;"),
+            (Pass, r"^ { a > ;"),
+            (Pass, r"{ ^ a > ;"),
+            (Fail, r"a { ^ a > ;"),
+            // TODO(#3736): do we enforce this?
+            // (Fail, r"{ ^ a > a ^ ;"),
+            (Fail, r"a ^ a > ;"),
+            (Fail, r"a ^ > ;"),
+            (Fail, r"< a ^ ;"),
+            (Fail, r"a } ^ > ;"),
+            (Fail, r"a } ^ a > ;"),
+            (Fail, r"(^) a > ;"),
+            (Fail, r"^+ a > ;"),
+            // anchor end must be at the end
+            (Pass, r"a $ > ;"),
+            (Pass, r"a $ > $;"),
+            (Pass, r"a $ <> a$;"),
+            (Pass, r"a } $ > ;"),
+            (Pass, r"a $ } > ;"),
+            (Fail, r"a $ } a > ;"),
+            (Fail, r"< $ a ;"),
+            (Fail, r"a $ a > ;"),
+            (Fail, r"$ a > ;"),
+            (Fail, r"$ { a > ;"),
+            (Fail, r"a $ { a > ;"),
+            (Fail, r"a ($) > ;"),
+            (Fail, r"a $+ > ;"),
+            // cursor checks
+            (Pass, r"a | b <> c | d ;"),
+            (Fail, r"a | b | <> | c | d ;"),
+            (Fail, r"a > | c | d ;"),
+            (Pass, r"a > | c d ;"),
+            (Pass, r"a > | ;"),
+            (Fail, r"a > || ;"),
+            (Fail, r"a|? > ;"),
+            (Fail, r"a(|) > ;"),
+            (Fail, r"a > &Remove(|) ;"),
+            (Pass, r"a > |@ ;"),
+            (Pass, r"a > @| ;"),
+            (Fail, r"a > @|@ ;"),
+            (Fail, r"a > @|@| ;"),
+            (Pass, r"a > xa @@@| ;"),
+            (Pass, r"a > |@@ xa ;"),
+            (Fail, r"a > x @| a ;"),
+            (Fail, r"a > x |@ a ;"),
+            (Fail, r"a > x @|@ a ;"),
+            // UnicodeSets
+            (Pass, r"[a-z] > a ;"),
+            (Fail, r"[a-z] < a ;"),
+            (Pass, r". > a ;"),
+            (Fail, r". < a ;"),
+            // segments
+            (Fail, r"(a) <> $1 ;"),
+            (Pass, r"(a) > $1 ;"),
+            (Pass, r"(a()) > $1 $2;"),
+            (Pass, r"(a()) > $2;"),
+            (Fail, r"(a) > $2;"),
+            (Pass, r"(a) } (abc) > $2;"),
+            // variables
+            (Fail, r"a > $a;"),
+            // quantifiers
+            (Pass, r"a+*? } b? > a;"),
+            (Fail, r"a > a+;"),
+            (Fail, r"a > a*;"),
+            (Fail, r"a > a?;"),
+            // function calls
+            (Pass, r"a > &Remove();"),
+            (Fail, r"a < &Remove();"),
+            (Pass, r"a (.*)> &[a-z] Latin-Greek/BGN(abc &[a]Remove($1));"),
+        ];
+
+        for (expected_outcome, source) in sources {
+            let rules = parse(source);
+            let mut pass = Pass1::new(BOTH);
+            let result = pass.run(&rules);
+            match (expected_outcome, result) {
+                (Fail, Ok(_)) => {
+                    panic!("unexpected successful pass1 validation for rules {source:?}")
+                }
+                (Pass, Err(e)) => {
+                    panic!("unexpected error in pass1 validation for rules {source:?}: {e:?}")
+                }
+                _ => {}
+            }
+        }
+    }
+
+    #[test]
+    fn test_pass1_validate_variable_definition() {
+        let sources = [
+            (Fail, r"$a = &Remove() ;"),
+            (Fail, r"$a = (abc) ;"),
+            (Fail, r"$a = | ;"),
+            (Fail, r"$a = ^ ;"),
+            (Fail, r"$a = $ ;"),
+            (Fail, r"$a = $1 ;"),
+            (Fail, r"$var = [a-z] ; a > $var ;"),
+            (Fail, r"$var = a+ ; a > $var ;"),
+            (Pass, r"$var = [a-z] ; $var > a ;"),
+            (Pass, r"$var = a+ ; $var > a ;"),
+            (Pass, r"$b = 'hello'; $var = a+*? [a-z] $b ;"),
+        ];
+
+        for (expected_outcome, source) in sources {
+            let rules = parse(source);
+            let mut pass = Pass1::new(BOTH);
+            let result = pass.run(&rules);
+            match (expected_outcome, result) {
+                (Fail, Ok(_)) => {
+                    panic!("unexpected successful pass1 validation for rules {source:?}")
+                }
+                (Pass, Err(e)) => {
+                    panic!("unexpected error in pass1 validation for rules {source:?}: {e:?}")
+                }
+                _ => {}
+            }
+        }
+    }
+
+    #[test]
+    fn test_pass1_validate_global_filters() {
+        let sources = [
+            (Pass, r":: [a-z];"),
+            (Pass, r":: ([a-z]);"),
+            (Pass, r":: [a-z] ; :: ([a-z]);"),
+            (Fail, r":: [{string}] ;"),
+            (Fail, r":: ([{string}]);"),
+            (Fail, r":: [a-z] ; :: [a-z] ;"),
+            (Fail, r":: ([a-z]) ; :: ([a-z]) ;"),
+            (Fail, r":: ([a-z]) ; :: [a-z] ;"),
+            (Pass, r":: [a-z] ; :: Remove ; :: ([a-z]) ;"),
+            (Fail, r":: Remove ; :: [a-z] ;"),
+            (Fail, r":: ([a-z]) ; :: Remove ;"),
+        ];
+
+        for (expected_outcome, source) in sources {
+            let rules = parse(source);
+            let mut pass = Pass1::new(BOTH);
+            let result = pass.run(&rules);
+            match (expected_outcome, result) {
+                (Fail, Ok(_)) => {
+                    panic!("unexpected successful pass1 validation for rules {source:?}")
+                }
+                (Pass, Err(e)) => {
+                    panic!("unexpected error in pass1 validation for rules {source:?}: {e:?}")
+                }
+                _ => {}
+            }
+        }
+    }
+}
diff --git a/experimental/transliterator_parser/src/lib.rs b/experimental/transliterator_parser/src/lib.rs
new file mode 100644
index 00000000000..993a95285d1
--- /dev/null
+++ b/experimental/transliterator_parser/src/lib.rs
@@ -0,0 +1,114 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! `icu_transliterator_parser` is a utility crate of the [`ICU4X`] project.
+//!
+//! This crate provides parsing functionality for [UTS #35 - Transliterators](https://unicode.org/reports/tr35/tr35-general.html#Transforms).
+//!
+//! See [`parse`](crate::parse()) for more information.
+//!
+//! [`ICU4X`]: ../icu/index.html
+
+// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
+#![cfg_attr(
+    not(test),
+    deny(
+        clippy::indexing_slicing,
+        clippy::unwrap_used,
+        clippy::expect_used,
+        clippy::panic,
+        clippy::exhaustive_structs,
+        clippy::exhaustive_enums,
+        missing_debug_implementations,
+    )
+)]
+#![warn(missing_docs)]
+
+use icu_properties::provider::*;
+use icu_provider::prelude::*;
+use icu_transliteration::provider::RuleBasedTransliterator;
+
+mod compile;
+mod parse;
+
+pub use parse::ElementKind;
+pub use parse::ElementLocation;
+pub use parse::ParseError;
+pub use parse::ParseErrorKind;
+
+/// Parse a rule based transliterator definition into a `TransliteratorDataStruct`.
+///
+/// See [UTS #35 - Transliterators](https://unicode.org/reports/tr35/tr35-general.html#Transforms) for more information.
+#[cfg(feature = "compiled_data")]
+pub fn parse(source: &str) -> Result<RuleBasedTransliterator<'static>, parse::ParseError> {
+    parse_unstable(source, &icu_properties::provider::Baked)
+}
+
+#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, parse())]
+pub fn parse_unstable<P>(
+    source: &str,
+    provider: &P,
+) -> Result<RuleBasedTransliterator<'static>, parse::ParseError>
+where
+    P: ?Sized
+        + DataProvider<AsciiHexDigitV1Marker>
+        + DataProvider<AlphabeticV1Marker>
+        + DataProvider<BidiControlV1Marker>
+        + DataProvider<BidiMirroredV1Marker>
+        + DataProvider<CaseIgnorableV1Marker>
+        + DataProvider<CasedV1Marker>
+        + DataProvider<ChangesWhenCasefoldedV1Marker>
+        + DataProvider<ChangesWhenCasemappedV1Marker>
+        + DataProvider<ChangesWhenLowercasedV1Marker>
+        + DataProvider<ChangesWhenNfkcCasefoldedV1Marker>
+        + DataProvider<ChangesWhenTitlecasedV1Marker>
+        + DataProvider<ChangesWhenUppercasedV1Marker>
+        + DataProvider<DashV1Marker>
+        + DataProvider<DefaultIgnorableCodePointV1Marker>
+        + DataProvider<DeprecatedV1Marker>
+        + DataProvider<DiacriticV1Marker>
+        + DataProvider<EmojiV1Marker>
+        + DataProvider<EmojiComponentV1Marker>
+        + DataProvider<EmojiModifierV1Marker>
+        + DataProvider<EmojiModifierBaseV1Marker>
+        + DataProvider<EmojiPresentationV1Marker>
+        + DataProvider<ExtendedPictographicV1Marker>
+        + DataProvider<ExtenderV1Marker>
+        + DataProvider<GraphemeBaseV1Marker>
+        + DataProvider<GraphemeExtendV1Marker>
+        + DataProvider<HexDigitV1Marker>
+        + DataProvider<IdsBinaryOperatorV1Marker>
+        + DataProvider<IdsTrinaryOperatorV1Marker>
+        + DataProvider<IdContinueV1Marker>
+        + DataProvider<IdStartV1Marker>
+        + DataProvider<IdeographicV1Marker>
+        + DataProvider<JoinControlV1Marker>
+        + DataProvider<LogicalOrderExceptionV1Marker>
+        + DataProvider<LowercaseV1Marker>
+        + DataProvider<MathV1Marker>
+        + DataProvider<NoncharacterCodePointV1Marker>
+        + DataProvider<PatternSyntaxV1Marker>
+        + DataProvider<PatternWhiteSpaceV1Marker>
+        + DataProvider<QuotationMarkV1Marker>
+        + DataProvider<RadicalV1Marker>
+        + DataProvider<RegionalIndicatorV1Marker>
+        + DataProvider<SentenceTerminalV1Marker>
+        + DataProvider<SoftDottedV1Marker>
+        + DataProvider<TerminalPunctuationV1Marker>
+        + DataProvider<UnifiedIdeographV1Marker>
+        + DataProvider<UppercaseV1Marker>
+        + DataProvider<VariationSelectorV1Marker>
+        + DataProvider<WhiteSpaceV1Marker>
+        + DataProvider<XidContinueV1Marker>
+        + DataProvider<GeneralCategoryMaskNameToValueV1Marker>
+        + DataProvider<GeneralCategoryV1Marker>
+        + DataProvider<ScriptNameToValueV1Marker>
+        + DataProvider<ScriptV1Marker>
+        + DataProvider<ScriptWithExtensionsPropertyV1Marker>
+        + DataProvider<XidStartV1Marker>,
+{
+    let parsed = parse::parse_unstable(source, provider)?;
+    // TODO(#3736): pass direction from metadata
+    compile::compile(parsed, parse::Direction::Both)
+}
diff --git a/experimental/transliterator_parser/src/parse.rs b/experimental/transliterator_parser/src/parse.rs
new file mode 100644
index 00000000000..f3b0e9d15d2
--- /dev/null
+++ b/experimental/transliterator_parser/src/parse.rs
@@ -0,0 +1,1634 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::borrow::Cow;
+use std::{iter::Peekable, str::CharIndices};
+
+use icu_collections::{
+    codepointinvlist::CodePointInversionList,
+    codepointinvliststringlist::CodePointInversionListAndStringList,
+};
+use icu_properties::provider::*;
+use icu_properties::sets::{load_pattern_white_space, load_xid_continue, load_xid_start};
+use icu_provider::prelude::*;
+use icu_unicodeset_parser::{VariableMap, VariableValue};
+
+/// An element that can appear in a rule. Used for error reporting in [`ParseError`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum ElementKind {
+    /// A literal string: `abc 'abc'`.
+    Literal,
+    /// A variable reference: `$var`.
+    VariableReference,
+    /// A backreference to a segment: `$1`.
+    BackReference,
+    /// A quantifier of any sort: `c*`, `c+`, `c?`.
+    Quantifier,
+    /// A segment: `(abc)`.
+    Segment,
+    /// A UnicodeSet: `[a-z]`.
+    UnicodeSet,
+    /// A function call: `&[a-z] Remove(...)`.
+    FunctionCall,
+    /// A cursor: `|`.
+    Cursor,
+    /// A start anchor: `^`.
+    AnchorStart,
+    /// An end anchor: `$`.
+    AnchorEnd,
+}
+
+/// The location in which an element can appear. Used for error reporting in [`ParseError`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum ElementLocation {
+    /// The element appears on the source side of a rule (i.e., the side _not_ pointed at
+    /// by the arrow).
+    Source,
+    /// The element appears on the target side of a rule (i.e., the side pointed at by the arrow).
+    Target,
+    /// The element appears inside a variable definition.
+    VariableDefinition,
+}
+
+/// The kind of error that occurred.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum ParseErrorKind {
+    /// An unexpected character was encountered. This variant implies the other variants
+    /// (notably `UnknownProperty` and `Unimplemented`) do not apply.
+    UnexpectedChar(char),
+    /// A reference to an unknown variable.
+    UnknownVariable,
+    /// The source is incomplete.
+    Eof,
+    /// Something unexpected went wrong with our code. Please file a bug report on GitHub.
+    Internal,
+    /// The provided syntax is not supported by us. Please file an issue on GitHub if you need
+    /// this feature.
+    Unimplemented,
+    /// The provided escape sequence is not a valid Unicode code point.
+    InvalidEscape,
+    /// The provided transform ID is invalid.
+    InvalidId,
+    /// The provided number is invalid, which likely means it's too big.
+    InvalidNumber,
+    /// Duplicate variable definition.
+    DuplicateVariable,
+    /// Invalid UnicodeSet syntax. See `icu_unicodeset_parser`'s [`ParseError`](icu_unicodeset_parser::ParseError).
+    UnicodeSetError(icu_unicodeset_parser::ParseError),
+
+    // errors originating from compilation step
+    /// A global filter (forward or backward) in an unexpected position.
+    UnexpectedGlobalFilter,
+    /// A global filter (forward or backward) may not contain strings.
+    GlobalFilterWithStrings,
+    /// An element of [`ElementKind`] appeared in the given [`ElementLocation`], but that is prohibited.
+    UnexpectedElement(ElementKind, ElementLocation),
+    /// The start anchor `^` was not placed at the beginning of a source.
+    AnchorStartNotAtStart,
+    /// The end anchor `$` was not placed at the end of a source.
+    AnchorEndNotAtEnd,
+    /// A variable that contains source-only matchers (e.g., UnicodeSets) was used on the target side.
+    SourceOnlyVariable,
+    /// No matching segment for this backreference was found.
+    BackReferenceOutOfRange,
+    /// The cursor is in an invalid position.
+    InvalidCursor,
+    /// Multiple cursors were defined.
+    DuplicateCursor,
+}
+pub(crate) use ParseErrorKind as PEK;
+
+impl ParseErrorKind {
+    fn with_offset(self, offset: usize) -> ParseError {
+        ParseError {
+            offset: Some(offset),
+            kind: self,
+        }
+    }
+}
+
+/// The error type returned by the `parse` functions in this crate.
+#[allow(unused)] // TODO(#3736): remove when doing compilation
+#[derive(Debug, Clone, Copy)]
+pub struct ParseError {
+    // offset is the index to an arbitrary byte in the last character in the source that makes sense
+    // to display as location for the error, e.g., the unexpected character itself or
+    // for an unknown property name the last character of the name.
+    offset: Option<usize>,
+    kind: ParseErrorKind,
+}
+
+impl From<ParseErrorKind> for ParseError {
+    fn from(kind: ParseErrorKind) -> Self {
+        ParseError { offset: None, kind }
+    }
+}
+
+impl From<icu_unicodeset_parser::ParseError> for ParseError {
+    fn from(e: icu_unicodeset_parser::ParseError) -> Self {
+        ParseError {
+            offset: None,
+            kind: PEK::UnicodeSetError(e),
+        }
+    }
+}
+
+pub(crate) type Result<T, E = ParseError> = core::result::Result<T, E>;
+
+// the only UnicodeSets used in this crate are parsed, and thus 'static.
+pub(crate) type UnicodeSet = CodePointInversionListAndStringList<'static>;
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum QuantifierKind {
+    // ?
+    ZeroOrOne,
+    // *
+    ZeroOrMore,
+    // +
+    OneOrMore,
+}
+
+// source-target/variant
+#[allow(unused)] // TODO(#3736): remove when doing compilation
+#[derive(Debug, Clone, Hash, PartialEq, Eq)]
+pub(crate) struct BasicId {
+    pub(crate) source: String,
+    pub(crate) target: String,
+    pub(crate) variant: String,
+}
+
+impl BasicId {
+    pub(crate) fn is_null(&self) -> bool {
+        self.source == "Any" && self.target == "Null" && self.variant.is_empty()
+    }
+
+    pub(crate) fn reverse(self) -> Self {
+        if self.is_null() {
+            return self;
+        }
+        // TODO(#3736): add hardcoded reverses here
+
+        Self {
+            source: self.target,
+            target: self.source,
+            variant: self.variant,
+        }
+    }
+}
+
+impl Default for BasicId {
+    fn default() -> Self {
+        Self {
+            source: "Any".to_string(),
+            target: "Null".to_string(),
+            variant: "".to_string(),
+        }
+    }
+}
+
+// [set] source-target/variant
+#[allow(unused)] // TODO(#3736): remove when doing compilation
+#[derive(Debug, Clone)]
+pub(crate) struct SingleId {
+    pub(crate) filter: Option<UnicodeSet>,
+    pub(crate) basic_id: BasicId,
+}
+
+#[derive(Debug, Clone)]
+pub(crate) enum Element {
+    // Examples:
+    //  - hello\ world
+    //  - 'hello world'
+    Literal(String),
+    // Example: $my_var
+    VariableRef(String),
+    // Example: $12
+    BackRef(u32),
+    // Examples:
+    //  - <element>?
+    //  - <element>*
+    //  - <element>+
+    // note: Box<Element> instead of Section, because a quantifier only ever refers to the immediately preceding element.
+    // segments or variable refs are used to group multiple elements together.
+    Quantifier(QuantifierKind, Box<Element>),
+    // Example: (<element> <element> ...)
+    Segment(Section),
+    // Example: [:^L:]
+    UnicodeSet(UnicodeSet),
+    // Example: &[a-z] Any-Remove(<element> <element> ...)
+    // single id, function arguments
+    FunctionCall(SingleId, Section),
+    // Example: @@@@ |, |@@@@
+    Cursor(u32, u32),
+    // '^'
+    AnchorStart,
+    // '$'
+    AnchorEnd,
+}
+
+impl Element {
+    pub(crate) fn kind(&self) -> ElementKind {
+        match self {
+            Element::Literal(..) => ElementKind::Literal,
+            Element::VariableRef(..) => ElementKind::VariableReference,
+            Element::BackRef(..) => ElementKind::BackReference,
+            Element::Quantifier(..) => ElementKind::Quantifier,
+            Element::Segment(..) => ElementKind::Segment,
+            Element::UnicodeSet(..) => ElementKind::UnicodeSet,
+            Element::FunctionCall(..) => ElementKind::FunctionCall,
+            Element::Cursor(..) => ElementKind::Cursor,
+            Element::AnchorStart => ElementKind::AnchorStart,
+            Element::AnchorEnd => ElementKind::AnchorEnd,
+        }
+    }
+}
+
+pub(crate) type Section = Vec<Element>;
+
+#[allow(unused)] // TODO(#3736): remove when doing compilation
+#[derive(Debug, Clone)]
+pub(crate) struct HalfRule {
+    pub(crate) ante: Section,
+    pub(crate) key: Section,
+    pub(crate) post: Section,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum Direction {
+    Forward,
+    Reverse,
+    Both,
+}
+
+impl Direction {
+    // whether `self` is a superset of `other` or not
+    pub(crate) fn permits(&self, other: Direction) -> bool {
+        match self {
+            Direction::Forward => other == Direction::Forward,
+            Direction::Reverse => other == Direction::Reverse,
+            Direction::Both => true,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+#[allow(clippy::large_enum_variant)]
+pub(crate) enum Rule {
+    GlobalFilter(UnicodeSet),
+    GlobalInverseFilter(UnicodeSet),
+    // forward and backward IDs.
+    // "A (B)" is Transform(A, Some(B)),
+    // "(B)" is Transform(Null, Some(B)),
+    // "A" is Transform(A, None), which indicates an auto-computed reverse ID,
+    // "A ()" is Transform(A, Some(Null))
+    Transform(SingleId, Option<SingleId>),
+    Conversion(HalfRule, Direction, HalfRule),
+    VariableDefinition(String, Section),
+}
+
+struct TransliteratorParser<'a, P: ?Sized> {
+    iter: Peekable<CharIndices<'a>>,
+    source: &'a str,
+    // flattened variable map specifically for unicodesets, i.e., only contains variables that
+    // are chars, strings, or UnicodeSets when all variables are inlined.
+    variable_map: VariableMap<'static>,
+    // cached set for the special set .
+    dot_set: Option<UnicodeSet>,
+    // for variable identifiers (XID Start, XID Continue)
+    xid_start: &'a CodePointInversionList<'a>,
+    xid_continue: &'a CodePointInversionList<'a>,
+    // for skipped whitespace (Pattern White Space)
+    pat_ws: &'a CodePointInversionList<'a>,
+    property_provider: &'a P,
+}
+
+impl<'a, P> TransliteratorParser<'a, P>
+where
+    P: ?Sized
+        + DataProvider<AsciiHexDigitV1Marker>
+        + DataProvider<AlphabeticV1Marker>
+        + DataProvider<BidiControlV1Marker>
+        + DataProvider<BidiMirroredV1Marker>
+        + DataProvider<CaseIgnorableV1Marker>
+        + DataProvider<CasedV1Marker>
+        + DataProvider<ChangesWhenCasefoldedV1Marker>
+        + DataProvider<ChangesWhenCasemappedV1Marker>
+        + DataProvider<ChangesWhenLowercasedV1Marker>
+        + DataProvider<ChangesWhenNfkcCasefoldedV1Marker>
+        + DataProvider<ChangesWhenTitlecasedV1Marker>
+        + DataProvider<ChangesWhenUppercasedV1Marker>
+        + DataProvider<DashV1Marker>
+        + DataProvider<DefaultIgnorableCodePointV1Marker>
+        + DataProvider<DeprecatedV1Marker>
+        + DataProvider<DiacriticV1Marker>
+        + DataProvider<EmojiV1Marker>
+        + DataProvider<EmojiComponentV1Marker>
+        + DataProvider<EmojiModifierV1Marker>
+        + DataProvider<EmojiModifierBaseV1Marker>
+        + DataProvider<EmojiPresentationV1Marker>
+        + DataProvider<ExtendedPictographicV1Marker>
+        + DataProvider<ExtenderV1Marker>
+        + DataProvider<GraphemeBaseV1Marker>
+        + DataProvider<GraphemeExtendV1Marker>
+        + DataProvider<HexDigitV1Marker>
+        + DataProvider<IdsBinaryOperatorV1Marker>
+        + DataProvider<IdsTrinaryOperatorV1Marker>
+        + DataProvider<IdContinueV1Marker>
+        + DataProvider<IdStartV1Marker>
+        + DataProvider<IdeographicV1Marker>
+        + DataProvider<JoinControlV1Marker>
+        + DataProvider<LogicalOrderExceptionV1Marker>
+        + DataProvider<LowercaseV1Marker>
+        + DataProvider<MathV1Marker>
+        + DataProvider<NoncharacterCodePointV1Marker>
+        + DataProvider<PatternSyntaxV1Marker>
+        + DataProvider<PatternWhiteSpaceV1Marker>
+        + DataProvider<QuotationMarkV1Marker>
+        + DataProvider<RadicalV1Marker>
+        + DataProvider<RegionalIndicatorV1Marker>
+        + DataProvider<SentenceTerminalV1Marker>
+        + DataProvider<SoftDottedV1Marker>
+        + DataProvider<TerminalPunctuationV1Marker>
+        + DataProvider<UnifiedIdeographV1Marker>
+        + DataProvider<UppercaseV1Marker>
+        + DataProvider<VariationSelectorV1Marker>
+        + DataProvider<WhiteSpaceV1Marker>
+        + DataProvider<XidContinueV1Marker>
+        + DataProvider<GeneralCategoryMaskNameToValueV1Marker>
+        + DataProvider<GeneralCategoryV1Marker>
+        + DataProvider<ScriptNameToValueV1Marker>
+        + DataProvider<ScriptV1Marker>
+        + DataProvider<ScriptWithExtensionsPropertyV1Marker>
+        + DataProvider<XidStartV1Marker>,
+{
+    // initiates a line comment
+    const COMMENT: char = '#';
+    // terminates a line comment
+    const COMMENT_END: char = '\n';
+    // terminates a rule
+    const RULE_END: char = ';';
+    // initiates a filter or transform rule, as part of '::'
+    const SPECIAL_START: char = ':';
+    // initiates a UnicodeSet
+    const SET_START: char = '[';
+    // equivalent to the UnicodeSet [^[:Zp:][:Zl:]\r\n$]
+    const DOT: char = '.';
+    const DOT_SET: &'static str = r"[^[:Zp:][:Zl:]\r\n$]";
+    // matches the beginning of the input
+    const ANCHOR_START: char = '^';
+    // initiates a segment or the reverse portion of an ID
+    const OPEN_PAREN: char = '(';
+    // terminates a segment or the reverse portion of an ID
+    const CLOSE_PAREN: char = ')';
+    // separates source and target of an ID
+    const ID_SEP: char = '-';
+    // separates variant from ID
+    const VARIANT_SEP: char = '/';
+    // variable reference prefix, and anchor end character
+    const VAR_PREFIX: char = '$';
+    // variable definition operator
+    const VAR_DEF_OP: char = '=';
+    // left context
+    const LEFT_CONTEXT: char = '{';
+    // right context
+    const RIGHT_CONTEXT: char = '}';
+    // optional quantifier
+    const OPTIONAL: char = '?';
+    // zero or more quantifier
+    const ZERO_OR_MORE: char = '*';
+    // one or more quantifier
+    const ONE_OR_MORE: char = '+';
+    // function prefix
+    const FUNCTION_PREFIX: char = '&';
+    // quoted literals
+    const QUOTE: char = '\'';
+    // escape character
+    const ESCAPE: char = '\\';
+    // cursor
+    const CURSOR: char = '|';
+    // before or after a cursor
+    const CURSOR_PLACEHOLDER: char = '@';
+
+    fn new(
+        source: &'a str,
+        xid_start: &'a CodePointInversionList<'a>,
+        xid_continue: &'a CodePointInversionList<'a>,
+        pat_ws: &'a CodePointInversionList<'a>,
+        provider: &'a P,
+    ) -> Self {
+        Self {
+            iter: source.char_indices().peekable(),
+            source,
+            variable_map: Default::default(),
+            dot_set: None,
+            xid_start,
+            xid_continue,
+            pat_ws,
+            property_provider: provider,
+        }
+    }
+
+    fn parse_rules(&mut self) -> Result<Vec<Rule>> {
+        let mut rules = Vec::new();
+
+        loop {
+            self.skip_whitespace();
+            if self.iter.peek().is_none() {
+                break;
+            }
+            // we skipped whitespace and comments, so any other chars must be part of a rule
+            rules.push(self.parse_rule()?);
+        }
+
+        Ok(rules)
+    }
+
+    // expects a rule
+    fn parse_rule(&mut self) -> Result<Rule> {
+        match self.must_peek_char()? {
+            Self::SPECIAL_START => self.parse_filter_or_transform_rule(),
+            // must be a conversion or variable rule
+            _ => self.parse_conversion_or_variable_rule(),
+        }
+    }
+
+    // any rules starting with '::'
+    fn parse_filter_or_transform_rule(&mut self) -> Result<Rule> {
+        // Syntax:
+        // '::' <unicodeset> ';'                  # global filter
+        // '::' '(' <unicodeset> ')' ';'          # global inverse filter
+        // '::' <single-id> (<single-id>)? ';'    # transform rule
+
+        self.consume(Self::SPECIAL_START)?;
+        self.consume(Self::SPECIAL_START)?;
+
+        // because all three options can start with a UnicodeSet, we just try to parse everything
+        // into options, and assemble at the end
+
+        let (forward_filter, forward_basic_id, reverse_filter, reverse_basic_id, has_reverse) =
+            self.parse_filter_or_transform_rule_parts()?;
+
+        self.skip_whitespace();
+
+        // the offset of ';'
+        let meta_err_offset = self.must_peek_index()?;
+        self.consume(Self::RULE_END)?;
+
+        // try to assemble the rule
+        // first try global filters
+        match (
+            forward_filter.is_some(),
+            forward_basic_id.is_some(),
+            reverse_filter.is_some(),
+            reverse_basic_id.is_some(),
+        ) {
+            (true, false, false, false) => {
+                // by match, forward_filter.is_some() is true
+                #[allow(clippy::unwrap_used)]
+                return Ok(Rule::GlobalFilter(forward_filter.unwrap()));
+            }
+            (false, false, true, false) => {
+                // by match, reverse_filter.is_some() is true
+                #[allow(clippy::unwrap_used)]
+                return Ok(Rule::GlobalInverseFilter(reverse_filter.unwrap()));
+            }
+            _ => {}
+        }
+
+        // if this is not a global (inverse) filter rule, this must be a transform rule
+
+        // either forward_basic_id or reverse_basic_id must be nonempty
+        if forward_basic_id.is_none() && reverse_basic_id.is_none() {
+            return Err(PEK::InvalidId.with_offset(meta_err_offset));
+        }
+
+        if !has_reverse {
+            // we must have a forward id due to:
+            //  1. !has_reverse implying reverse_basic_id.is_none()
+            //  2. the above none checks implying forward_basic_id.is_some()
+            // because this is difficult to verify, returning a PEK::Internal anyway
+            // instead of unwrapping, despite technically being unnecessary
+            let forward_basic_id = forward_basic_id.ok_or(PEK::Internal)?;
+            return Ok(Rule::Transform(
+                SingleId {
+                    basic_id: forward_basic_id,
+                    filter: forward_filter,
+                },
+                None,
+            ));
+        }
+
+        if forward_filter.is_some() && forward_basic_id.is_none()
+            || reverse_filter.is_some() && reverse_basic_id.is_none()
+        {
+            // cannot have a filter without a basic id
+            return Err(PEK::InvalidId.with_offset(meta_err_offset));
+        }
+
+        // an empty forward rule, such as ":: (R) ;" is equivalent to ":: Any-Null (R) ;"
+        let forward_basic_id = forward_basic_id.unwrap_or(BasicId::default());
+        // an empty reverse rule, such as ":: F () ;" is equivalent to ":: F (Any-Null) ;"
+        let reverse_basic_id = reverse_basic_id.unwrap_or(BasicId::default());
+
+        let forward_single_id = SingleId {
+            basic_id: forward_basic_id,
+            filter: forward_filter,
+        };
+        let reverse_single_id = SingleId {
+            basic_id: reverse_basic_id,
+            filter: reverse_filter,
+        };
+
+        Ok(Rule::Transform(forward_single_id, Some(reverse_single_id)))
+    }
+
+    // consumes everything between '::' and ';', exclusive.
+    #[allow(clippy::type_complexity)] // used internally in one place only
+    fn parse_filter_or_transform_rule_parts(
+        &mut self,
+    ) -> Result<(
+        Option<UnicodeSet>,
+        Option<BasicId>,
+        Option<UnicodeSet>,
+        Option<BasicId>,
+        bool,
+    )> {
+        // parse forward things, i.e., everything until Self::OPEN_PAREN
+        self.skip_whitespace();
+        let forward_filter = self.try_parse_unicode_set()?;
+        self.skip_whitespace();
+        let forward_basic_id = self.try_parse_basic_id()?;
+        self.skip_whitespace();
+
+        let has_reverse = match self.must_peek_char()? {
+            // initiates a reverse id
+            Self::OPEN_PAREN => true,
+            // we're done parsing completely, no reverse id
+            Self::RULE_END => false,
+            _ => return self.unexpected_char_here(),
+        };
+
+        let reverse_filter;
+        let reverse_basic_id;
+
+        if has_reverse {
+            // if we have a reverse, parse it
+            self.consume(Self::OPEN_PAREN)?;
+            self.skip_whitespace();
+            reverse_filter = self.try_parse_unicode_set()?;
+            self.skip_whitespace();
+            reverse_basic_id = self.try_parse_basic_id()?;
+            self.skip_whitespace();
+            self.consume(Self::CLOSE_PAREN)?;
+        } else {
+            reverse_filter = None;
+            reverse_basic_id = None;
+        }
+
+        Ok((
+            forward_filter,
+            forward_basic_id,
+            reverse_filter,
+            reverse_basic_id,
+            has_reverse,
+        ))
+    }
+
+    fn parse_conversion_or_variable_rule(&mut self) -> Result<Rule> {
+        // Syntax:
+        // <variable_ref> '=' <section> ';'           # variable rule
+        // <half-rule> <direction> <half-rule> ';'    # conversion rule
+
+        // try parsing into a variable rule
+        let first_elt = if Self::VAR_PREFIX == self.must_peek_char()? {
+            let elt = self.parse_variable_or_backref_or_anchor_end()?;
+            self.skip_whitespace();
+            if Self::VAR_DEF_OP == self.must_peek_char()? {
+                // must be variable ref
+                let var_name = match elt {
+                    Element::VariableRef(var_name) => var_name,
+                    _ => return self.unexpected_char_here(),
+                };
+                self.iter.next();
+                let section = self.parse_section(None)?;
+                let err_offset = self.must_peek_index()?;
+                self.consume(Self::RULE_END)?;
+                self.add_variable(var_name.clone(), section.clone(), err_offset)?;
+                return Ok(Rule::VariableDefinition(var_name, section));
+            }
+            Some(elt)
+        } else {
+            None
+        };
+
+        // must be conversion rule
+        // passing down first_elt that was already parsed for the variable rule check
+        let first_half = self.parse_half_rule(first_elt)?;
+
+        let dir = self.parse_direction()?;
+
+        let second_half = self.parse_half_rule(None)?;
+        self.consume(Self::RULE_END)?;
+        Ok(Rule::Conversion(first_half, dir, second_half))
+    }
+
+    fn parse_single_id(&mut self) -> Result<SingleId> {
+        // Syntax:
+        // <unicodeset>? <basic-id>
+
+        self.skip_whitespace();
+        let filter = self.try_parse_unicode_set()?;
+        self.skip_whitespace();
+        let basic_id = self.parse_basic_id()?;
+        Ok(SingleId { filter, basic_id })
+    }
+
+    fn try_parse_basic_id(&mut self) -> Result<Option<BasicId>> {
+        if let Some(c) = self.peek_char() {
+            if self.xid_start.contains(c) {
+                return Ok(Some(self.parse_basic_id()?));
+            }
+        }
+        Ok(None)
+    }
+
+    // TODO(#3736): factor this out for runtime ID parsing?
+    fn parse_basic_id(&mut self) -> Result<BasicId> {
+        // Syntax:
+        // <identifier> ('-' <identifier>)? ('/' <identifier>)?
+
+        // we must have at least one identifier. the implicit "Null" id is only allowed
+        // in a '::'-rule, which is handled explicitly.
+        let first_id = self.parse_unicode_identifier()?;
+
+        self.skip_whitespace();
+        let second_id = self.try_parse_sep_and_unicode_identifier(Self::ID_SEP)?;
+        self.skip_whitespace();
+        let variant_id = self.try_parse_sep_and_unicode_identifier(Self::VARIANT_SEP)?;
+
+        let (source, target) = match second_id {
+            None => ("Any".to_string(), first_id),
+            Some(second_id) => (first_id, second_id),
+        };
+
+        Ok(BasicId {
+            source,
+            target,
+            variant: variant_id.unwrap_or("".to_string()),
+        })
+    }
+
+    fn try_parse_sep_and_unicode_identifier(&mut self, sep: char) -> Result<Option<String>> {
+        if Some(sep) == self.peek_char() {
+            self.iter.next();
+            self.skip_whitespace();
+            // at this point we must be parsing a identifier
+            return Ok(Some(self.parse_unicode_identifier()?));
+        }
+        Ok(None)
+    }
+
+    // parses an XID-based identifier
+    fn parse_unicode_identifier(&mut self) -> Result<String> {
+        // Syntax:
+        // <xid_start> (<xid_continue>)*
+
+        let mut id = String::new();
+
+        let (first_offset, first_c) = self.must_peek()?;
+        if !self.xid_start.contains(first_c) {
+            return Err(PEK::UnexpectedChar(first_c).with_offset(first_offset));
+        }
+        self.iter.next();
+        id.push(first_c);
+
+        loop {
+            let c = self.must_peek_char()?;
+            if !self.xid_continue.contains(c) {
+                break;
+            }
+            id.push(c);
+            self.iter.next();
+        }
+
+        Ok(id)
+    }
+
+    fn parse_half_rule(&mut self, prev_elt: Option<Element>) -> Result<HalfRule> {
+        // Syntax:
+        // (<section> '{')? <section> ('}' <section>)?
+
+        let ante;
+        let key;
+        let post;
+        let first = self.parse_section(prev_elt)?;
+        if Self::LEFT_CONTEXT == self.must_peek_char()? {
+            self.iter.next();
+            ante = first;
+            key = self.parse_section(None)?;
+        } else {
+            ante = vec![];
+            key = first;
+        }
+        if Self::RIGHT_CONTEXT == self.must_peek_char()? {
+            self.iter.next();
+            post = self.parse_section(None)?;
+        } else {
+            post = vec![];
+        }
+
+        Ok(HalfRule { ante, key, post })
+    }
+
+    fn parse_direction(&mut self) -> Result<Direction> {
+        // Syntax:
+        // '<' | '>' | '<>' | '→' | '←' | '↔'
+
+        match self.must_peek_char()? {
+            '>' | '→' => {
+                self.iter.next();
+                Ok(Direction::Forward)
+            }
+            '↔' => {
+                self.iter.next();
+                Ok(Direction::Both)
+            }
+            '←' => {
+                self.iter.next();
+                Ok(Direction::Reverse)
+            }
+            '<' => {
+                self.iter.next();
+                match self.must_peek_char()? {
+                    '>' => {
+                        self.iter.next();
+                        Ok(Direction::Both)
+                    }
+                    _ => Ok(Direction::Reverse),
+                }
+            }
+            _ => self.unexpected_char_here(),
+        }
+    }
+
+    // whitespace before and after is consumed
+    fn parse_section(&mut self, prev_elt: Option<Element>) -> Result<Section> {
+        let mut section = Section::new();
+        let mut prev_elt = prev_elt;
+
+        loop {
+            self.skip_whitespace();
+            let c = self.must_peek_char()?;
+            if self.is_section_end(c) {
+                if let Some(elt) = prev_elt.take() {
+                    section.push(elt);
+                }
+                break;
+            }
+
+            let next_elt = self.parse_element(&mut prev_elt)?;
+
+            if let Some(elt) = prev_elt {
+                section.push(elt);
+            }
+            prev_elt = Some(next_elt);
+        }
+
+        Ok(section)
+    }
+
+    fn parse_quantifier_kind(&mut self) -> Result<QuantifierKind> {
+        match self.must_peek_char()? {
+            Self::OPTIONAL => {
+                self.iter.next();
+                Ok(QuantifierKind::ZeroOrOne)
+            }
+            Self::ZERO_OR_MORE => {
+                self.iter.next();
+                Ok(QuantifierKind::ZeroOrMore)
+            }
+            Self::ONE_OR_MORE => {
+                self.iter.next();
+                Ok(QuantifierKind::OneOrMore)
+            }
+            _ => self.unexpected_char_here(),
+        }
+    }
+
+    fn parse_element(&mut self, prev_elt: &mut Option<Element>) -> Result<Element> {
+        match self.must_peek_char()? {
+            Self::VAR_PREFIX => self.parse_variable_or_backref_or_anchor_end(),
+            Self::ANCHOR_START => {
+                self.iter.next();
+                Ok(Element::AnchorStart)
+            }
+            Self::OPEN_PAREN => self.parse_segment(),
+            Self::DOT => {
+                self.iter.next();
+                Ok(Element::UnicodeSet(self.get_dot_set()?))
+            }
+            Self::OPTIONAL | Self::ZERO_OR_MORE | Self::ONE_OR_MORE => {
+                let quantifier = self.parse_quantifier_kind()?;
+                if let Some(elt) = prev_elt.take() {
+                    Ok(Element::Quantifier(quantifier, Box::new(elt)))
+                } else {
+                    self.unexpected_char_here()
+                }
+            }
+            Self::FUNCTION_PREFIX => self.parse_function_call(),
+            Self::CURSOR_PLACEHOLDER | Self::CURSOR => self.parse_cursor(),
+            Self::QUOTE => Ok(Element::Literal(self.parse_quoted_literal()?)),
+            _ if self.peek_is_unicode_set_start() => {
+                Ok(Element::UnicodeSet(self.parse_unicode_set()?))
+            }
+            c if self.is_valid_unquoted_literal(c) => Ok(Element::Literal(self.parse_literal()?)),
+            _ => self.unexpected_char_here(),
+        }
+    }
+
+    fn parse_variable_or_backref_or_anchor_end(&mut self) -> Result<Element> {
+        self.consume(Self::VAR_PREFIX)?;
+
+        match self.must_peek_char()? {
+            c if c.is_ascii_digit() => {
+                // we have a backref
+                let num = self.parse_number()?;
+                Ok(Element::BackRef(num))
+            }
+            c if self.xid_start.contains(c) => {
+                // we have a variable
+                let variable_id = self.parse_unicode_identifier()?;
+                Ok(Element::VariableRef(variable_id))
+            }
+            _ => {
+                // this was an anchor end
+                Ok(Element::AnchorEnd)
+            }
+        }
+    }
+
+    fn parse_number(&mut self) -> Result<u32> {
+        let (first_offset, first_c) = self.must_next()?;
+        if !matches!(first_c, '1'..='9') {
+            return Err(PEK::UnexpectedChar(first_c).with_offset(first_offset));
+        }
+        // inclusive end offset
+        let mut end_offset = first_offset;
+
+        loop {
+            let (offset, c) = self.must_peek()?;
+            if !c.is_ascii_digit() {
+                break;
+            }
+            self.iter.next();
+            end_offset = offset;
+        }
+
+        // first_offset is valid by `Chars`, and the inclusive end_offset
+        // is valid because we only set it to the indices of ASCII chars,
+        // which are all exactly 1 UTF-8 byte
+        #[allow(clippy::indexing_slicing)]
+        self.source[first_offset..=end_offset]
+            .parse()
+            .map_err(|_| PEK::InvalidNumber.with_offset(end_offset))
+    }
+
+    fn parse_literal(&mut self) -> Result<String> {
+        let mut buf = String::new();
+        loop {
+            self.skip_whitespace();
+            let c = self.must_peek_char()?;
+            if c == Self::ESCAPE {
+                self.parse_escaped_char_into_buf(&mut buf)?;
+                continue;
+            }
+            if !self.is_valid_unquoted_literal(c) {
+                break;
+            }
+            self.iter.next();
+            buf.push(c);
+        }
+        Ok(buf)
+    }
+
+    fn parse_quoted_literal(&mut self) -> Result<String> {
+        // Syntax:
+        // \' [^']* \'
+
+        let mut buf = String::new();
+        self.consume(Self::QUOTE)?;
+        loop {
+            let c = self.must_next_char()?;
+            if c == Self::QUOTE {
+                break;
+            }
+            buf.push(c);
+        }
+        if buf.is_empty() {
+            // '' is the escaped version of a quote
+            buf.push(Self::QUOTE);
+        }
+        Ok(buf)
+    }
+
+    // parses all supported escapes. code is somewhat duplicated from icu_unicodeset_parser
+    // might want to deduplicate this with unicodeset_parser somehow
+    fn parse_escaped_char_into_buf(&mut self, buf: &mut String) -> Result<()> {
+        self.consume(Self::ESCAPE)?;
+
+        let (offset, next_char) = self.must_next()?;
+
+        match next_char {
+            'u' | 'x' if self.peek_char() == Some('{') => {
+                // bracketedHex
+                self.iter.next();
+
+                // the first codepoint is mandatory
+                self.skip_whitespace();
+                let c = self.parse_hex_digits_into_char(1, 6)?;
+                buf.push(c);
+
+                loop {
+                    let skipped = self.skip_whitespace();
+                    let next_char = self.must_peek_char()?;
+                    if next_char == '}' {
+                        self.iter.next();
+                        break;
+                    }
+                    if skipped == 0 {
+                        // multiple code points must be separated in multi escapes
+                        return self.unexpected_char_here();
+                    }
+
+                    let c = self.parse_hex_digits_into_char(1, 6)?;
+                    buf.push(c);
+                }
+            }
+            'u' => {
+                // 'u' hex{4}
+                let c = self.parse_hex_digits_into_char(4, 4)?;
+                buf.push(c);
+            }
+            'x' => {
+                // 'x' hex{2}
+                let c = self.parse_hex_digits_into_char(2, 2)?;
+                buf.push(c);
+            }
+            'U' => {
+                // 'U00' ('0' hex{5} | '10' hex{4})
+                let c = self.parse_hex_digits_into_char(6, 6)?;
+                buf.push(c);
+            }
+            'N' => {
+                // parse code point with name in {}
+                // tracking issue: https://github.com/unicode-org/icu4x/issues/1397
+                return Err(PEK::Unimplemented.with_offset(offset));
+            }
+            'a' => buf.push('\u{0007}'),
+            'b' => buf.push('\u{0008}'),
+            't' => buf.push('\u{0009}'),
+            'n' => buf.push('\u{000A}'),
+            'v' => buf.push('\u{000B}'),
+            'f' => buf.push('\u{000C}'),
+            'r' => buf.push('\u{000D}'),
+            _ => buf.push(next_char),
+        }
+        Ok(())
+    }
+
+    fn parse_hex_digits_into_char(&mut self, min: usize, max: usize) -> Result<char> {
+        let first_offset = self.must_peek_index()?;
+        let end_offset = self.validate_hex_digits(min, max)?;
+
+        // validate_hex_digits ensures that chars (including the last one) are ascii hex digits,
+        // which are all exactly one UTF-8 byte long, so slicing on these offsets always respects char boundaries
+        #[allow(clippy::indexing_slicing)]
+        let hex_source = &self.source[first_offset..=end_offset];
+        let num = u32::from_str_radix(hex_source, 16).map_err(|_| PEK::Internal)?;
+        char::try_from(num).map_err(|_| PEK::InvalidEscape.with_offset(end_offset))
+    }
+
+    // validates [0-9a-fA-F]{min,max}, returns the offset of the last digit, consuming everything in the process
+    fn validate_hex_digits(&mut self, min: usize, max: usize) -> Result<usize> {
+        let mut last_offset = 0;
+        for count in 0..max {
+            let (offset, c) = self.must_peek()?;
+            if !c.is_ascii_hexdigit() {
+                if count < min {
+                    return self.unexpected_char_here();
+                } else {
+                    break;
+                }
+            }
+            self.iter.next();
+            last_offset = offset;
+        }
+        Ok(last_offset)
+    }
+
+    fn parse_segment(&mut self) -> Result<Element> {
+        self.consume(Self::OPEN_PAREN)?;
+        let elt = Element::Segment(self.parse_section(None)?);
+        self.consume(Self::CLOSE_PAREN)?;
+        Ok(elt)
+    }
+
+    fn try_parse_unicode_set(&mut self) -> Result<Option<UnicodeSet>> {
+        if self.peek_is_unicode_set_start() {
+            return Ok(Some(self.parse_unicode_set()?));
+        }
+        Ok(None)
+    }
+
+    fn parse_unicode_set(&mut self) -> Result<UnicodeSet> {
+        let pre_offset = self.must_peek_index()?;
+        // pre_offset is a valid index because self.iter (used in must_peek_index)
+        // was created from self.source
+        #[allow(clippy::indexing_slicing)]
+        let set_source = &self.source[pre_offset..];
+        let (set, consumed_bytes) = self.unicode_set_from_str(set_source).map_err(|mut e| {
+            e.offset.get_or_insert(pre_offset);
+            e
+        })?;
+
+        // advance self.iter consumed_bytes bytes
+        while let Some(offset) = self.peek_index() {
+            // we can use equality because unicodeset_parser also lexes on char boundaries
+            // note: we must not consume this final token because it is the first non-consumed char
+            if offset == pre_offset + consumed_bytes {
+                break;
+            }
+            self.iter.next();
+        }
+
+        Ok(set)
+    }
+
+    fn get_dot_set(&mut self) -> Result<UnicodeSet> {
+        match &self.dot_set {
+            Some(set) => Ok(set.clone()),
+            None => {
+                let (set, _) = self
+                    .unicode_set_from_str(Self::DOT_SET)
+                    .map_err(|_| PEK::Internal)?;
+                self.dot_set = Some(set.clone());
+                Ok(set)
+            }
+        }
+    }
+
+    fn unicode_set_from_str(&self, set: &str) -> Result<(UnicodeSet, usize)> {
+        let (set, consumed_bytes) = icu_unicodeset_parser::parse_unstable_with_variables(
+            set,
+            &self.variable_map,
+            self.property_provider,
+        )?;
+        Ok((set, consumed_bytes))
+    }
+
+    fn parse_function_call(&mut self) -> Result<Element> {
+        self.consume(Self::FUNCTION_PREFIX)?;
+
+        // parse single-id
+        let single_id = self.parse_single_id()?;
+        self.skip_whitespace();
+        self.consume(Self::OPEN_PAREN)?;
+        let section = self.parse_section(None)?;
+        self.consume(Self::CLOSE_PAREN)?;
+
+        Ok(Element::FunctionCall(single_id, section))
+    }
+
+    fn parse_cursor(&mut self) -> Result<Element> {
+        // Syntax:
+        // '@'* '|' '@'*
+
+        let mut num_pre = 0;
+        let mut num_post = 0;
+        // parse pre
+        loop {
+            self.skip_whitespace();
+            match self.must_peek_char()? {
+                Self::CURSOR_PLACEHOLDER => {
+                    self.iter.next();
+                    num_pre += 1;
+                }
+                Self::CURSOR => {
+                    self.iter.next();
+                    break;
+                }
+                _ => return self.unexpected_char_here(),
+            }
+        }
+        // parse post
+        loop {
+            self.skip_whitespace();
+            match self.must_peek_char()? {
+                Self::CURSOR_PLACEHOLDER => {
+                    self.iter.next();
+                    num_post += 1;
+                }
+                _ => break,
+            }
+        }
+
+        Ok(Element::Cursor(num_pre, num_post))
+    }
+
+    fn add_variable(&mut self, name: String, value: Section, offset: usize) -> Result<()> {
+        if let Some(uset_value) = self.try_uset_flatten_section(&value) {
+            self.variable_map
+                .insert(name.to_string(), uset_value)
+                .map_err(|_| PEK::DuplicateVariable.with_offset(offset))?;
+        }
+        Ok(())
+    }
+
+    fn try_uset_flatten_section(&self, section: &Section) -> Option<VariableValue<'static>> {
+        // note: could avoid some clones here if the VariableMap stored &T's (or both), but that is
+        // quite the edge case in transliterator source files
+
+        // is this just a unicode set?
+        if let [Element::UnicodeSet(set)] = &section[..] {
+            return Some(VariableValue::UnicodeSet(set.clone()));
+        }
+        // if it's just a variable that is already a valid uset variable, we return that
+        if let [Element::VariableRef(name)] = &section[..] {
+            if let Some(value) = self.variable_map.get(name) {
+                return Some(value.clone());
+            }
+            return None;
+        }
+
+        // if not, must be a string literal
+        let mut combined_literal = String::new();
+        for elt in section {
+            match elt {
+                Element::Literal(s) => combined_literal.push_str(s),
+                Element::VariableRef(name) => match self.variable_map.get(name) {
+                    Some(VariableValue::String(s)) => combined_literal.push_str(s),
+                    Some(VariableValue::Char(c)) => combined_literal.push(*c),
+                    _ => return None,
+                },
+                _ => return None,
+            }
+        }
+        Some(VariableValue::String(Cow::Owned(combined_literal)))
+    }
+
+    fn consume(&mut self, expected: char) -> Result<()> {
+        match self.must_next()? {
+            (offset, c) if c != expected => Err(PEK::UnexpectedChar(c).with_offset(offset)),
+            _ => Ok(()),
+        }
+    }
+
+    // skips whitespace and comments, returns the number of skipped chars
+    fn skip_whitespace(&mut self) -> usize {
+        let mut count = 0;
+        while let Some(c) = self.peek_char() {
+            if c == Self::COMMENT {
+                count += self.skip_until(Self::COMMENT_END);
+                continue;
+            }
+            if !self.pat_ws.contains(c) {
+                break;
+            }
+            self.iter.next();
+            count += 1;
+        }
+        count
+    }
+
+    // skips until the next occurrence of c, which is also consumed
+    // returns the number of skipped chars
+    fn skip_until(&mut self, end: char) -> usize {
+        let mut count = 0;
+        for (_, c) in self.iter.by_ref() {
+            count += 1;
+            if c == end {
+                break;
+            }
+        }
+        count
+    }
+
+    fn peek_is_unicode_set_start(&mut self) -> bool {
+        match self.peek_char() {
+            Some(Self::SET_START) => true,
+            Some(Self::ESCAPE) => {
+                let mut it = self.iter.clone();
+                // skip past the ESCAPE
+                it.next();
+                matches!(it.next(), Some((_, 'p' | 'P')))
+            }
+            _ => false,
+        }
+    }
+
+    fn peek_char(&mut self) -> Option<char> {
+        self.iter.peek().map(|(_, c)| *c)
+    }
+
+    fn peek_index(&mut self) -> Option<usize> {
+        self.iter.peek().map(|(idx, _)| *idx)
+    }
+
+    // use this whenever an empty iterator would imply an Eof error
+    fn must_next(&mut self) -> Result<(usize, char)> {
+        self.iter.next().ok_or(PEK::Eof.into())
+    }
+
+    // see must_next
+    fn must_next_char(&mut self) -> Result<char> {
+        self.must_next().map(|(_, c)| c)
+    }
+
+    // use this whenever an empty iterator would imply an Eof error
+    fn must_peek(&mut self) -> Result<(usize, char)> {
+        self.iter.peek().copied().ok_or(PEK::Eof.into())
+    }
+
+    // see must_peek
+    fn must_peek_char(&mut self) -> Result<char> {
+        self.must_peek().map(|(_, c)| c)
+    }
+
+    // see must_peek
+    fn must_peek_index(&mut self) -> Result<usize> {
+        self.must_peek().map(|(idx, _)| idx)
+    }
+
+    fn unexpected_char_here<T>(&mut self) -> Result<T> {
+        let (offset, char) = self.must_peek()?;
+        Err(PEK::UnexpectedChar(char).with_offset(offset))
+    }
+
+    fn is_section_end(&self, c: char) -> bool {
+        matches!(
+            c,
+            Self::RULE_END
+                | Self::CLOSE_PAREN
+                | Self::RIGHT_CONTEXT
+                | Self::LEFT_CONTEXT
+                | Self::VAR_DEF_OP
+                | '<'
+                | '>'
+                | '→'
+                | '←'
+                | '↔'
+        )
+    }
+
+    fn is_valid_unquoted_literal(&self, c: char) -> bool {
+        // allowing \ since it's used for escapes, which are allowed in an unquoted context
+        c.is_ascii() && (c.is_ascii_alphanumeric() || c == '\\')
+            || (!c.is_ascii() && c != '→' && c != '←' && c != '↔')
+    }
+}
+
+// used in tests
+#[allow(unused)]
+#[cfg(feature = "compiled_data")]
+pub(crate) fn parse(source: &str) -> Result<Vec<Rule>> {
+    parse_unstable(source, &icu_properties::provider::Baked)
+}
+
+pub(crate) fn parse_unstable<P>(source: &str, provider: &P) -> Result<Vec<Rule>>
+where
+    P: ?Sized
+        + DataProvider<AsciiHexDigitV1Marker>
+        + DataProvider<AlphabeticV1Marker>
+        + DataProvider<BidiControlV1Marker>
+        + DataProvider<BidiMirroredV1Marker>
+        + DataProvider<CaseIgnorableV1Marker>
+        + DataProvider<CasedV1Marker>
+        + DataProvider<ChangesWhenCasefoldedV1Marker>
+        + DataProvider<ChangesWhenCasemappedV1Marker>
+        + DataProvider<ChangesWhenLowercasedV1Marker>
+        + DataProvider<ChangesWhenNfkcCasefoldedV1Marker>
+        + DataProvider<ChangesWhenTitlecasedV1Marker>
+        + DataProvider<ChangesWhenUppercasedV1Marker>
+        + DataProvider<DashV1Marker>
+        + DataProvider<DefaultIgnorableCodePointV1Marker>
+        + DataProvider<DeprecatedV1Marker>
+        + DataProvider<DiacriticV1Marker>
+        + DataProvider<EmojiV1Marker>
+        + DataProvider<EmojiComponentV1Marker>
+        + DataProvider<EmojiModifierV1Marker>
+        + DataProvider<EmojiModifierBaseV1Marker>
+        + DataProvider<EmojiPresentationV1Marker>
+        + DataProvider<ExtendedPictographicV1Marker>
+        + DataProvider<ExtenderV1Marker>
+        + DataProvider<GraphemeBaseV1Marker>
+        + DataProvider<GraphemeExtendV1Marker>
+        + DataProvider<HexDigitV1Marker>
+        + DataProvider<IdsBinaryOperatorV1Marker>
+        + DataProvider<IdsTrinaryOperatorV1Marker>
+        + DataProvider<IdContinueV1Marker>
+        + DataProvider<IdStartV1Marker>
+        + DataProvider<IdeographicV1Marker>
+        + DataProvider<JoinControlV1Marker>
+        + DataProvider<LogicalOrderExceptionV1Marker>
+        + DataProvider<LowercaseV1Marker>
+        + DataProvider<MathV1Marker>
+        + DataProvider<NoncharacterCodePointV1Marker>
+        + DataProvider<PatternSyntaxV1Marker>
+        + DataProvider<PatternWhiteSpaceV1Marker>
+        + DataProvider<QuotationMarkV1Marker>
+        + DataProvider<RadicalV1Marker>
+        + DataProvider<RegionalIndicatorV1Marker>
+        + DataProvider<SentenceTerminalV1Marker>
+        + DataProvider<SoftDottedV1Marker>
+        + DataProvider<TerminalPunctuationV1Marker>
+        + DataProvider<UnifiedIdeographV1Marker>
+        + DataProvider<UppercaseV1Marker>
+        + DataProvider<VariationSelectorV1Marker>
+        + DataProvider<WhiteSpaceV1Marker>
+        + DataProvider<XidContinueV1Marker>
+        + DataProvider<GeneralCategoryMaskNameToValueV1Marker>
+        + DataProvider<GeneralCategoryV1Marker>
+        + DataProvider<ScriptNameToValueV1Marker>
+        + DataProvider<ScriptV1Marker>
+        + DataProvider<ScriptWithExtensionsPropertyV1Marker>
+        + DataProvider<XidStartV1Marker>,
+{
+    let xid_start = load_xid_start(provider).map_err(|_| PEK::Internal)?;
+    let xid_start_list = xid_start.to_code_point_inversion_list();
+    let xid_continue = load_xid_continue(provider).map_err(|_| PEK::Internal)?;
+    let xid_continue_list = xid_continue.to_code_point_inversion_list();
+
+    let pat_ws = load_pattern_white_space(provider).map_err(|_| PEK::Internal)?;
+    let pat_ws_list = pat_ws.to_code_point_inversion_list();
+
+    let mut parser = TransliteratorParser::new(
+        source,
+        &xid_start_list,
+        &xid_continue_list,
+        &pat_ws_list,
+        provider,
+    );
+    parser.parse_rules()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_full() {
+        let source = r##"
+        :: [a-z\]] ; :: [b-z] Latin/BGN ;
+        :: Source-Target/Variant () ;::([b-z]Target-Source/Variant) ;
+        :: [a-z] Any ([b-z] Target-Source/Variant);
+
+        $my_var = an arbitrary section ',' some quantifiers *+? 'and other variables: $var' $var  ;
+        $innerMinus = '-' ;
+        $minus = $innerMinus ;
+        $good_set = [a $minus z] ;
+
+        ^ (start) { key ' key '+ $good_set } > $102 }  post\-context$;
+        # contexts are optional
+        target < source ;
+        # contexts can be empty
+        { 'source-or-target' } <> { 'target-or-source' } ;
+
+        (nested (sections)+ are () so fun) > ;
+
+        . > ;
+
+        :: ([{Inverse]-filter}]) ;
+        "##;
+
+        if let Err(e) = parse(source) {
+            panic!("Failed to parse {:?}: {:?}", source, e);
+        }
+    }
+
+    #[test]
+    fn test_conversion_rules_ok() {
+        let sources = [
+            r"a > b ;",
+            r"a < b ;",
+            r"a <> b ;",
+            r"a → b ;",
+            r"a ← b ;",
+            r"a ↔ b ;",
+            r"a \> > b ;",
+            r"a \→ > b ;",
+            r"{ a > b ;",
+            r"a {  > b ;",
+            r"{ a } > b ;",
+            r"{ a } > { b ;",
+            r"{ a } > { b } ;",
+            r"^ pre [a-z] { a } post [$] $ > ^ [$] pre { b [b-z] } post $ ;",
+            r"[äöü] > ;",
+            r"([äöü]) > &Remove($1) ;",
+            r"[äöü] { ([äöü]+) > &Remove($1) ;",
+            r"|@@@ a <> b @@@@  @ | ;",
+            r"|a <> b ;",
+        ];
+
+        for source in sources {
+            if let Err(e) = parse(source) {
+                panic!("Failed to parse {:?}: {:?}", source, e);
+            }
+        }
+    }
+
+    #[test]
+    fn test_conversion_rules_err() {
+        let sources = [
+            r"a > > b ;",
+            r"a >< b ;",
+            r"(a > b) > b ;",
+            r"a \← b ;",
+            r"a ↔ { b > } ;",
+            r"a ↔ { b > } ;",
+            r"a > b",
+            r"@ a > b ;",
+            r"a ( {  > b ;",
+            r"a ( { )  > b ;",
+            r"a } + > b ;",
+            r"a (+?*) > b ;",
+            r"+?* > b ;",
+            r"+ > b ;",
+            r"* > b ;",
+            r"? > b ;",
+        ];
+
+        for source in sources {
+            if let Ok(rules) = parse(source) {
+                panic!("Parsed invalid source {:?}: {:?}", source, rules);
+            }
+        }
+    }
+
+    #[test]
+    fn test_variable_rules_ok() {
+        let sources = [
+            r" $my_var = [a-z] ;",
+            r"$my_var = äüöÜ ;",
+            r"$my_var = [a-z] literal ; $other_var = [A-Z] [b-z];",
+            r"$my_var = [a-z] ; $other_var = [A-Z] [b-z];",
+            r"$my_var = [a-z] ; $other_var = $my_var + $2222;",
+            r"$my_var = [a-z] ; $other_var = $my_var \+\ \$2222 \\ 'hello\';",
+            r"
+            $innerMinus = '-' ;
+            $minus = $innerMinus ;
+            $good_set = [a $minus z] ;
+            ",
+        ];
+
+        for source in sources {
+            if let Err(e) = parse(source) {
+                panic!("Failed to parse {:?}: {:?}", source, e);
+            }
+        }
+    }
+
+    #[test]
+    fn test_variable_rules_err() {
+        let sources = [
+            r" $ my_var = a ;",
+            r" $my_var = a_2 ;",
+            r"$my_var 2 = [a-z] literal ;",
+            r"$my_var = [$doesnt_exist] ;",
+        ];
+
+        for source in sources {
+            if let Ok(rules) = parse(source) {
+                panic!("Parsed invalid source {:?}: {:?}", source, rules);
+            }
+        }
+    }
+
+    #[test]
+    fn test_global_filters_ok() {
+        let sources = [
+            r":: [^\[$] ;",
+            r":: \p{L} ;",
+            r":: [^\[{[}$] ;",
+            r":: [^\[{]}$] ;",
+            r":: [^\[{]\}]}$] ;",
+            r":: ([^\[$]) ;",
+            r":: ( [^\[$] ) ;",
+            r":: [^[a-z[]][]] ;",
+            r":: [^[a-z\[\]]\]] ;",
+            r":: [^\]] ;",
+        ];
+
+        for source in sources {
+            if let Err(e) = parse(source) {
+                panic!("Failed to parse {:?}: {:?}", source, e);
+            }
+        }
+    }
+
+    #[test]
+    fn test_global_filters_err() {
+        let sources = [
+            r":: [^\[$ ;",
+            r":: \p{L  ;",
+            r":: [^[$] ;",
+            r":: [^\[$]) ;",
+            r":: ( [^\[$]  ;",
+            r":: [^[a-z[]][]] [] ;",
+            r":: [^[a-z\[\]]\]] ([a-z]);",
+            r":: [a$-^\]] ;",
+            r":: ( [] [] ) ;",
+            r":: () [] ;",
+        ];
+
+        for source in sources {
+            if let Ok(rules) = parse(source) {
+                panic!("Parsed invalid source {:?}: {:?}", source, rules);
+            }
+        }
+    }
+
+    #[test]
+    fn test_function_calls_ok() {
+        let sources = [
+            r"$fn = & Any-Any/Variant ($var literal 'quoted literal' $1) ;",
+            r"$fn = &[a-z] Any-Any/Variant ($var literal 'quoted literal' $1) ;",
+            r"$fn = &[a-z]Any-Any/Variant ($var literal 'quoted literal' $1) ;",
+            r"$fn = &[a-z]Any/Variant ($var literal 'quoted literal' $1) ;",
+            r"$fn = &Any/Variant ($var literal 'quoted literal' $1) ;",
+            r"$fn = &[a-z]Any ($var literal 'quoted literal' $1) ;",
+            r"$fn = &Any($var literal 'quoted literal' $1) ;",
+        ];
+
+        for source in sources {
+            if let Err(e) = parse(source) {
+                panic!("Failed to parse {:?}: {:?}", source, e);
+            }
+        }
+    }
+
+    #[test]
+    fn test_function_calls_err() {
+        let sources = [
+            r"$fn = &[a-z]($var literal 'quoted literal' $1) ;",
+            r"$fn = &[a-z] ($var literal 'quoted literal' $1) ;",
+            r"$fn = &($var literal 'quoted literal' $1) ;",
+        ];
+
+        for source in sources {
+            if let Ok(rules) = parse(source) {
+                panic!("Parsed invalid source {:?}: {:?}", source, rules);
+            }
+        }
+    }
+
+    #[test]
+    fn test_transform_rules_ok() {
+        let sources = [
+            ":: NFD; :: NFKC;",
+            ":: Latin ;",
+            ":: any - Latin;",
+            ":: any - Latin/bgn;",
+            ":: any - Latin/bgn ();",
+            ":: any - Latin/bgn ([a-z] a-z);",
+            ":: ([a-z] a-z);",
+            ":: (a-z);",
+            ":: (a-z / variant);",
+            ":: [a-z] latin/variant (a-z / variant);",
+            ":: [a-z] latin/variant (a-z / variant) ;",
+            ":: [a-z] latin (  );",
+            ":: [a-z] latin ;",
+            "::[];",
+        ];
+
+        for source in sources {
+            if let Err(e) = parse(source) {
+                panic!("Failed to parse {:?}: {:?}", source, e);
+            }
+        }
+    }
+
+    #[test]
+    fn test_transform_rules_err() {
+        let sources = [
+            r":: a a ;",
+            r":: (a a) ;",
+            r":: a - z - b ;",
+            r":: ( a - z - b) ;",
+            r":: [] ( a - z) ;",
+            r":: a-z ( [] ) ;",
+            r":: a-z / ( [] a-z ) ;",
+            r":: Latin-ASCII/BGN Arab-Greek/UNGEGN ;",
+            r":: (Latin-ASCII/BGN Arab-Greek/UNGEGN) ;",
+        ];
+
+        for source in sources {
+            if let Ok(rules) = parse(source) {
+                panic!("Parsed invalid source {:?}: {:?}", source, rules);
+            }
+        }
+    }
+}