Skip to content

Commit

Permalink
CLI Changes Sub-PR: TOML merge Collation (#596)
Browse files Browse the repository at this point in the history
* Add tests for TOML collation

* Refactor: Update and use (non-differentiating) collation API

* Got `merge` collation working

Includes slight refactoring of Helix's TOML merging function, largely
for my sake while I was trying to understand it!

* Some sugar for your tea?
  • Loading branch information
Xophmeister authored Aug 15, 2023
1 parent b7303c2 commit 3db6d95
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 66 deletions.
220 changes: 155 additions & 65 deletions topiary-cli/src/configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ pub enum CollationMode {
Override,
}

/// Map collation modes to merge depths for the TOML collation (see `collate_toml`)
impl From<&CollationMode> for usize {
fn from(collation: &CollationMode) -> Self {
match collation {
CollationMode::Merge => 4,
CollationMode::Revise => 2,
_ => unreachable!(),
}
}
}

/// Consume the configuration from the usual sources, collated as specified
pub fn fetch(
file: &Option<PathBuf>,
Expand Down Expand Up @@ -175,24 +186,23 @@ fn configuration_toml(
collation: &CollationMode,
) -> CLIResult<toml::Value> {
match collation {
CollationMode::Merge => todo!(),

CollationMode::Revise => {
CollationMode::Override => {
// It's safe to unwrap here, as `sources` is guaranteed to contain at least one element
sources
.iter()
.map(|source| source.try_into())
.reduce(|config, toml| Ok(merge_toml_values(config?, toml?, 3)))
.last()
.unwrap()
.try_into()
.map_err(TopiaryError::from)
}

CollationMode::Override => {
// CollationMode::Merge and CollationMode::Revise
_ => {
// It's safe to unwrap here, as `sources` is guaranteed to contain at least one element
sources
.last()
.iter()
.map(|source| source.try_into())
.reduce(|config, toml| Ok(collate_toml(config?, toml?, collation)))
.unwrap()
.try_into()
.map_err(TopiaryError::from)
}
}
}
Expand All @@ -215,75 +225,155 @@ fn find_workspace_configuration_dir() -> Option<PathBuf> {
.find(|path| path.exists())
}

/// Merge two TOML documents, merging values from `right` onto `left`
/// Collate two TOML documents, merging values from `graft` onto `base`.
///
/// When an array exists in both `left` and `right`, `right`'s array is
/// used. When a table exists in both `left` and `right`, the merged table
/// consists of all keys in `left`'s table unioned with all keys in `right`
/// with the values of `right` being merged recursively onto values of
/// `left`.
/// Arrays of tables with a `name` key (e.g., our `[[language]]` tables) are always merged; that
/// is, the union of the `base` and `graft` is taken. Otherwise, the `merge_depth` controls the
/// collation of arrays, resulting in concatenation. This can leave duplicates, in the collated
/// TOML, but for Topiary, this only matters for our `Languages::extensions`, which is implemented
/// as a `HashSet`; thus deserialisation will deduplicate for us.
///
/// `merge_toplevel_arrays` controls whether a top-level array in the TOML
/// document is merged instead of overridden. This is useful for TOML
/// documents that use a top-level array of values like the `languages.toml`,
/// where one usually wants to override or add to the array instead of
/// replacing it altogether.
/// When a table exists in both `base` and `graft`, the merged table consists of all keys in
/// `base`'s table unioned with all keys in `graft` with the values of `graft` being merged
/// recursively onto values of `base`.
///
/// NOTE: This merge function is taken from Helix:
/// https://github.com/helix-editor/helix licensed under MPL-2.0. There
/// it is defined under: helix-loader/src/lib.rs. Taken from commit df09490
pub fn merge_toml_values(left: toml::Value, right: toml::Value, merge_depth: usize) -> toml::Value {
/// NOTE This collation function is forked from Helix, licensed under MPL-2.0
/// * Repo: https://github.com/helix-editor/helix
/// * Rev: df09490
/// * Path: helix-loader/src/lib.rs
fn collate_toml<T>(base: toml::Value, graft: toml::Value, merge_depth: T) -> toml::Value
where
T: Into<usize>,
{
use toml::Value;

fn get_name(v: &Value) -> Option<&str> {
v.get("name").and_then(Value::as_str)
}

match (left, right) {
(Value::Array(mut left_items), Value::Array(right_items)) => {
// The top-level arrays should be merged but nested arrays should
// act as overrides. For the `languages.toml` config, this means
// that you can specify a sub-set of languages in an overriding
// `languages.toml` but that nested arrays like file extensions
// arguments are replaced instead of merged.
if merge_depth > 0 {
left_items.reserve(right_items.len());
for rvalue in right_items {
let lvalue = get_name(&rvalue)
.and_then(|rname| {
left_items.iter().position(|v| get_name(v) == Some(rname))
})
.map(|lpos| left_items.remove(lpos));
let mvalue = match lvalue {
Some(lvalue) => merge_toml_values(lvalue, rvalue, merge_depth - 1),
None => rvalue,
};
left_items.push(mvalue);
}
Value::Array(left_items)
} else {
Value::Array(right_items)
let merge_depth: usize = merge_depth.into();

match (base, graft, merge_depth) {
// Fallback to the graft value if the recursion depth bottoms out
(_, graft, 0) => graft,

(Value::Array(mut base_items), Value::Array(graft_items), _) => {
for rvalue in graft_items {
// If our graft value has a `name` key, then we're dealing with a `[[language]]`
// table. In which case, pop it -- if it exists -- from the base array.
let language = get_name(&rvalue)
.and_then(|rname| base_items.iter().position(|v| get_name(v) == Some(rname)))
.map(|lpos| base_items.remove(lpos));

let mvalue = match language {
// Merge matching language tables
Some(lvalue) => collate_toml(lvalue, rvalue, merge_depth - 1),

// Collate everything else
None => rvalue,
};

base_items.push(mvalue);
}

Value::Array(base_items)
}
(Value::Table(mut left_map), Value::Table(right_map)) => {
if merge_depth > 0 {
for (rname, rvalue) in right_map {
match left_map.remove(&rname) {
Some(lvalue) => {
let merged_value = merge_toml_values(lvalue, rvalue, merge_depth - 1);
left_map.insert(rname, merged_value);
}
None => {
left_map.insert(rname, rvalue);
}

(Value::Table(mut base_map), Value::Table(graft_map), _) => {
for (rname, rvalue) in graft_map {
match base_map.remove(&rname) {
Some(lvalue) => {
let merged_value = collate_toml(lvalue, rvalue, merge_depth - 1);
base_map.insert(rname, merged_value);
}
None => {
base_map.insert(rname, rvalue);
}
}
Value::Table(left_map)
} else {
Value::Table(right_map)
}

Value::Table(base_map)
}
// Catch everything else we didn't handle, and use the right value
(_, value) => value,

// Fallback to the graft value for everything else
(_, graft, _) => graft,
}
}

#[cfg(test)]
mod test_config_collation {
use super::{collate_toml, CollationMode, Configuration};

// NOTE PartialEq for toml::Value is (understandably) order sensitive over array elements, so
// we deserialse to `topiary::Configuration` for equality testing. This also has the effect of
// side-stepping potential duplication, from concatenation, when using `CollationMode::Merge`.

static BASE: &str = r#"
[[language]]
name = "example"
extensions = ["eg"]
[[language]]
name = "demo"
extensions = ["demo"]
"#;

static GRAFT: &str = r#"
[[language]]
name = "example"
extensions = ["example"]
indent = "\t"
"#;

#[test]
fn merge() {
let base = toml::from_str(BASE).unwrap();
let graft = toml::from_str(GRAFT).unwrap();

let merged: Configuration = collate_toml(base, graft, &CollationMode::Merge)
.try_into()
.unwrap();

let expected: Configuration = toml::from_str(
r#"
[[language]]
name = "example"
extensions = ["eg", "example"]
indent = "\t"
[[language]]
name = "demo"
extensions = ["demo"]
"#,
)
.unwrap();

assert_eq!(merged, expected);
}

#[test]
fn revise() {
let base = toml::from_str(BASE).unwrap();
let graft = toml::from_str(GRAFT).unwrap();

let revised: Configuration = collate_toml(base, graft, &CollationMode::Revise)
.try_into()
.unwrap();

let expected: Configuration = toml::from_str(
r#"
[[language]]
name = "example"
extensions = ["example"]
indent = "\t"
[[language]]
name = "demo"
extensions = ["demo"]
"#,
)
.unwrap();

assert_eq!(revised, expected);
}
}
25 changes: 24 additions & 1 deletion topiary/src/configuration.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/// Topiary can be configured using the `Configuration` struct.
/// A basic configuration, written in toml, it is included buildtime and parsed runtime.
/// Additional configuration has to be provided by the user of the library.
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::fmt;

use crate::{language::Language, FormatterError, FormatterResult};
Expand Down Expand Up @@ -67,6 +67,29 @@ impl Default for Configuration {
}
}

/// Convert `Configuration` values into `HashMap`s, keyed on `Language::name`
// NOTE There are optimisations to be had here, to avoid cloning, but life's too short!
impl From<&Configuration> for HashMap<String, Language> {
fn from(config: &Configuration) -> Self {
HashMap::from_iter(config.language.iter().map(|language| {
let name = language.name.clone();
let language = language.clone();

(name, language)
}))
}
}

// Order-invariant equality; required for unit testing
impl PartialEq for Configuration {
fn eq(&self, other: &Self) -> bool {
let lhs: HashMap<String, Language> = self.into();
let rhs: HashMap<String, Language> = other.into();

lhs == rhs
}
}

impl fmt::Display for Configuration {
/// Pretty-print configuration as TOML
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
Expand Down

0 comments on commit 3db6d95

Please sign in to comment.