diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fac72612..64f7bf57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: run: export GC_DONT_GC=1; nix -L flake check - name: Build and test executable - run: 'echo \{ \"foo\": \"bar\" \} | nix run . -- -l json' + run: 'echo \{ \"foo\": \"bar\" \} | nix run . -- fmt -l json' - name: Verify that usage in README.md matches CLI output run: ./verify-documented-usage.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a78f14c..59c78c33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ This name should be decided amongst the team before the release. * [#535](https://github.com/tweag/topiary/pull/535) Improved error message when idempotency fails due to invalid output in the first pass. * [#533](https://github.com/tweag/topiary/pull/533) Update tree-sitter-ocaml to 0.20.3 * [#576](https://github.com/tweag/topiary/pull/576) Allows prepending/appending `@begin_scope` and `@end_scope` +* [#583](https://github.com/tweag/topiary/pull/583) Modernisation of the command line interface (see [the CLI Migration Guide](/docs/migration-0.2-0.3.md), for details) ## v0.2.3 - Cyclic Cypress - 2023-06-20 diff --git a/Cargo.lock b/Cargo.lock index 52408e58..e106b5c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -92,9 +92,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d6b683edf8d1119fe420a94f8a7e389239666aa72e65495d91c00462510151" +checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" dependencies = [ "anstyle", "bstr", @@ -105,6 +105,18 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "async-scoped" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7a6a57c8aeb40da1ec037f5d455836852f7a57e69e1b1ad3d8f38ac1d6cadf" +dependencies = [ + "futures", + "pin-project", + "slab", + "tokio", +] + [[package]] name = "async-stream" version = "0.3.5" @@ -124,7 +136,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -230,9 +242,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.3.11" +version = "4.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d" +checksum = "c27cdf28c0f604ba3f512b0c9a409f8de8513e4816705deb0498b627e7c3a3fd" dependencies = [ "clap_builder", "clap_derive", @@ -241,26 +253,27 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.3.11" +version = "4.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b" +checksum = "08a9f1ab5e9f01a9b81f202e8562eb9a10de70abf9eaeac1be465c28b75aa4aa" dependencies = [ "anstream", "anstyle", "clap_lex", "strsim", + "terminal_size", ] [[package]] name = "clap_derive" -version = "4.3.2" +version = "4.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -449,6 +462,15 @@ dependencies = [ "instant", ] +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + [[package]] name = "futures" version = "0.3.28" @@ -505,7 +527,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -595,6 +617,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indoc" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4" + [[package]] name = "instant" version = "0.1.12" @@ -646,9 +674,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "js-sys" @@ -707,6 +735,12 @@ dependencies = [ "adler", ] +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + [[package]] name = "num-traits" version = "0.2.15" @@ -762,6 +796,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "pin-project" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.26", +] + [[package]] name = "pin-project-lite" version = "0.2.10" @@ -810,8 +864,11 @@ checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" dependencies = [ "anstyle", "difflib", + "float-cmp", "itertools 0.10.5", + "normalize-line-endings", "predicates-core", + "regex", ] [[package]] @@ -852,18 +909,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.64" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78803b62cbf1f46fde80d7c0e803111524b9877184cfe7c3033659490ac7a7da" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.29" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" +checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" dependencies = [ "proc-macro2", ] @@ -983,9 +1040,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -998,9 +1055,9 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" @@ -1028,14 +1085,14 @@ checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] name = "serde_json" -version = "1.0.102" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5062a995d481b2308b6064e9af76011f2921c35f97b0468811ed9f6cd91dfed" +checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" dependencies = [ "itoa", "ryu", @@ -1079,9 +1136,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.25" +version = "2.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e3fc8c0c74267e2df136e5e5fb656a464158aa57624053375eb9c8c6e25ae2" +checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" dependencies = [ "proc-macro2", "quote", @@ -1111,6 +1168,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" +dependencies = [ + "rustix 0.37.23", + "windows-sys", +] + [[package]] name = "termtree" version = "0.4.1" @@ -1145,7 +1212,7 @@ checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -1179,7 +1246,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -1229,9 +1296,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.19.12" +version = "0.19.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c500344a19072298cd05a7224b3c0c629348b78692bf48466c5238656e315a78" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" dependencies = [ "indexmap", "serde", @@ -1277,16 +1344,21 @@ name = "topiary-cli" version = "0.2.3" dependencies = [ "assert_cmd", + "async-scoped", "clap", "directories", "env_logger", "futures", + "indoc", + "itertools 0.11.0", "log", + "predicates", "serde-toml-merge", "tempfile", "tokio", "toml", "topiary", + "tree-sitter-facade", ] [[package]] @@ -1353,9 +1425,9 @@ dependencies = [ [[package]] name = "tree-sitter-ocaml" -version = "0.20.3" +version = "0.20.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0615dd8d80e7f5fe49875aeae795726ca5dc37267f80f82a60dfa375657906b" +checksum = "fd1163abc658cf8ae0ecffbd8f4bd3ee00a2b98729de74f3b08f0e24f3ac208a" dependencies = [ "cc", "tree-sitter", @@ -1408,9 +1480,9 @@ checksum = "ccb97dac3243214f8d8507998906ca3e2e0b900bf9bf4870477f125b82e68f6e" [[package]] name = "unicode-ident" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-width" @@ -1635,9 +1707,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.4.9" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529" +checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index 6dab294f..8661b192 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,14 +36,17 @@ targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows [workspace.dependencies] assert_cmd = "2.0" +async-scoped = { version = "0.7.1", features = ["use-tokio"] } cfg-if = "1.0.0" clap = { version = "4.3", features = [ "env" ] } criterion = "0.5" directories = "5.0" env_logger = "0.10" futures = "0.3.28" +indoc = "2.0" itertools = "0.11" log = "0.4" +predicates = "3.0.3" pretty_assertions = "1.3" prettydiff = { version = "0.6.4", default-features = false } regex = "1.8.2" diff --git a/README.md b/README.md index 01811200..d510febc 100644 --- a/README.md +++ b/README.md @@ -125,9 +125,9 @@ environment variable `TOPIARY_LANGUAGE_DIR` to point to the directory where Topiary's language query files (`.scm`) are located**. By default, you should set it to `/languages`, for example: -```console +```sh export TOPIARY_LANGUAGE_DIR=/home/me/tools/topiary/languages -topiary -i -f ./projects/helloworld/hello.ml +topiary fmt ./projects/helloworld/hello.ml ``` `TOPIARY_LANGUAGE_DIR` can alternatively be set at build time. Topiary will pick @@ -159,44 +159,243 @@ pre-commit-check = nix-pre-commit-hooks.run { ### Usage +The Topiary CLI uses a number of subcommands to delineate functionality. +These can be listed with `topiary --help`; each subcommand then has its +own, dedicated help text. + + + ``` CLI app for Topiary, the universal code formatter. -Usage: topiary [OPTIONS] <--language |--input-files [...]> +Usage: topiary [OPTIONS] + +Commands: + fmt Format inputs + vis Visualise the input's Tree-sitter parse tree + cfg Print the current configuration + help Print this message or the help of the given subcommand(s) Options: - -l, --language - Which language to parse and format [possible values: json, nickel, ocaml, ocaml-interface, ocamllex, toml] - -f, --input-files [...] - Path to an input file or multiple input files. If omitted, or equal to "-", read from standard input. If multiple files are provided, `in_place` is assumed [default: -] - -q, --query - Which query file to use - -o, --output-file - Path to an output file. If omitted, or equal to "-", write to standard output - -i, --in-place - Format the input files in place - -v, --visualise[=] - Visualise the syntax tree, rather than format [possible values: json, dot] - -s, --skip-idempotence - Do not check that formatting twice gives the same output - --output-configuration - Output the full configuration to stderr before continuing - -t, --tolerate-parsing-errors - Format as much as possible even if some of the input causes parsing errors - --configuration-override - Override all configuration with the provided file [env: TOPIARY_CONFIGURATION_OVERRIDE=] - -c, --configuration-file - Add the specified configuration file with the highest prority [env: TOPIARY_CONFIGURATION_FILE=] + -C, --configuration + Configuration file + + [env: TOPIARY_CONFIG_FILE] + + --configuration-collation + Configuration collation mode + + [env: TOPIARY_CONFIG_COLLATION] + [default: merge] + + Possible values: + - merge: When multiple sources of configuration are available, matching items are + updated from the higher priority source, with collections merged as the union of sets + - revise: When multiple sources of configuration are available, matching items + (including collections) are superseded from the higher priority source + - override: When multiple sources of configuration are available, the highest priority + source is taken. All values from lower priority sources are discarded + + -v, --verbose... + Logging verbosity (increased per occurrence) + -h, --help - Print help + Print help (see a summary with '-h') + -V, --version Print version ``` + + +#### Format + + + +``` +Format inputs + +Usage: topiary fmt [OPTIONS] <--language |FILES> + +Arguments: + [FILES]... + Input files and directories (omit to read from stdin) + + Language detection and query selection is automatic, mapped from file extensions defined + in the Topiary configuration. + +Options: + -t, --tolerate-parsing-errors + Consume as much as possible in the presence of parsing errors + + -s, --skip-idempotence + Do not check that formatting twice gives the same output + + -l, --language + Topiary supported language (for formatting stdin) + + [possible values: json, nickel, ocaml, ocaml-interface, ocamllex, toml] + + -q, --query + Topiary query file override (when formatting stdin) + + -C, --configuration + Configuration file + + [env: TOPIARY_CONFIG_FILE] + + --configuration-collation + Configuration collation mode + + [env: TOPIARY_CONFIG_COLLATION] + [default: merge] + + Possible values: + - merge: When multiple sources of configuration are available, matching items are + updated from the higher priority source, with collections merged as the union of sets + - revise: When multiple sources of configuration are available, matching items + (including collections) are superseded from the higher priority source + - override: When multiple sources of configuration are available, the highest priority + source is taken. All values from lower priority sources are discarded + + -v, --verbose... + Logging verbosity (increased per occurrence) + + -h, --help + Print help (see a summary with '-h') +``` + + +When formatting inputs from disk, language selection is detected from +the input files' extensions. To format standard input, you must specify +the `--language` and, optionally, `--query` arguments, omitting any +input files. + +Note: `format` is a recognised alias of the `fmt` subcommand. + +#### Visualise + + + +``` +Visualise the input's Tree-sitter parse tree + +Usage: topiary vis [OPTIONS] <--language |FILE> + +Arguments: + [FILE] + Input file (omit to read from stdin) + + Language detection and query selection is automatic, mapped from file extensions defined + in the Topiary configuration. + +Options: + -f, --format + Visualisation format + + [default: dot] + + Possible values: + - dot: GraphViz DOT serialisation + - json: JSON serialisation + + -l, --language + Topiary supported language (for formatting stdin) + + [possible values: json, nickel, ocaml, ocaml-interface, ocamllex, toml] + + -q, --query + Topiary query file override (when formatting stdin) + + -C, --configuration + Configuration file + + [env: TOPIARY_CONFIG_FILE] + + --configuration-collation + Configuration collation mode + + [env: TOPIARY_CONFIG_COLLATION] + [default: merge] + + Possible values: + - merge: When multiple sources of configuration are available, matching items are + updated from the higher priority source, with collections merged as the union of sets + - revise: When multiple sources of configuration are available, matching items + (including collections) are superseded from the higher priority source + - override: When multiple sources of configuration are available, the highest priority + source is taken. All values from lower priority sources are discarded + + -v, --verbose... + Logging verbosity (increased per occurrence) + + -h, --help + Print help (see a summary with '-h') +``` + + +When visualising inputs from disk, language selection is detected from +the input file's extension. To visualise standard input, you must +specify the `--language` and, optionally, `--query` arguments, omitting +the input file. The visualisation output is written to standard out. + +Note: `visualise`, `visualize` and `view` are recognised aliases of the +`vis` subcommand. + +#### Configuration + + + +``` +Print the current configuration + +Usage: topiary cfg [OPTIONS] + +Options: + -C, --configuration + Configuration file + + [env: TOPIARY_CONFIG_FILE] + + --configuration-collation + Configuration collation mode + + [env: TOPIARY_CONFIG_COLLATION] + [default: merge] + + Possible values: + - merge: When multiple sources of configuration are available, matching items are + updated from the higher priority source, with collections merged as the union of sets + - revise: When multiple sources of configuration are available, matching items + (including collections) are superseded from the higher priority source + - override: When multiple sources of configuration are available, the highest priority + source is taken. All values from lower priority sources are discarded + + -v, --verbose... + Logging verbosity (increased per occurrence) + + -h, --help + Print help (see a summary with '-h') +``` + + +Please refer to the [Configuration](#configuration-1) section below to +understand the different sources of configuration and collation modes. -Language selection is based on precedence, in the following order: -* A specified language -* Detected from the input file's extension -* A specified query file +Note: `config` is a recognised alias of the `cfg` subcommand. + +#### Logging + +By default, the Topiary CLI will only output error messages. You can +increase the logging verbosity with a respective number of +`-v`/`--verbose` flags: + +| Verbosity Flag | Logging Level | +| :------------- | :---------------------- | +| None | Errors | +| `-v` | ...and warnings | +| `-vv` | ...and information | +| `-vvv` | ...and debugging output | +| `-vvvv` | ...and tracing output | #### Exit Codes @@ -204,7 +403,7 @@ The Topiary process will exit with a zero exit code upon successful formatting. Otherwise, the following exit codes are defined: | Reason | Code | -| :--------------------------- | ---- | +| :--------------------------- | ---: | | Unspecified error | 1 | | CLI argument parsing error | 2 | | I/O error | 3 | @@ -213,21 +412,27 @@ formatting. Otherwise, the following exit codes are defined: | Language detection error | 6 | | Idempotency error | 7 | | Unspecified formatting error | 8 | +| Multiple errors | 9 | + +When given multiple inputs, Topiary will do its best to process them +all, even in the presence of errors. Should _any_ errors occur, Topiary +will return a non-zero exit code. For more details on the nature of +these errors, run Topiary at the `warn` logging level (with `-v`). #### Example Once built, the program can be run like this: ```bash -echo '{"foo":"bar"}' | topiary --language json +echo '{"foo":"bar"}' | topiary fmt --language json ``` `topiary` can also be built and run from source via either Cargo or Nix, if you have those installed: ```bash -echo '{"foo":"bar"}' | cargo run -- --language json -echo '{"foo":"bar"}' | nix run . -- --language json +echo '{"foo":"bar"}' | cargo run -- fmt --language json +echo '{"foo":"bar"}' | nix run . -- fmt --language json ``` It will output the following formatted code: @@ -236,44 +441,54 @@ It will output the following formatted code: { "foo": "bar" } ``` -Set the `RUST_LOG=debug` environment variable if you want to enable -debug logging. - ## Configuration -Topiary is configured using `languages.toml` files. There are three -locations where Topiary checks for such a file. -### Locations -At buildtime the [languages.toml](./languages.toml) in the root of -this repository is included into Topiary. This file is parsed at +Topiary is configured using `languages.toml` files. There are up to four +sources where Topiary checks for such a file. + +### Configuration Sources + +At build time the [languages.toml](./languages.toml) in the root of +this repository is embedded into Topiary. This file is parsed at runtime. The purpose of this `languages.toml` file is to provide sane defaults for users of Topiary (both the library and the binary). -The other two are read by the Topiary binary at runtime and allow the user to +The next two are read by the Topiary binary at runtime and allow the user to configure Topiary to their needs. The first is intended to be user specific, and can thus be found in the configuration directory of the OS: -``` -Unix: /home/alice/.config/topiary/languages.toml -Windows: C:\Users\Alice\AppData\Roaming\Topiary\config\languages.toml -MacOS: /Users/Alice/Library/Application Support/Topiary/languages.toml -``` + +| OS | Typical Configuration Path | +| :------ | :---------------------------------------------------------------- | +| Unix | `/home/alice/.config/topiary/languages.toml` | +| Windows | `C:\Users\Alice\AppData\Roaming\Topiary\config\languages.toml` | +| macOS | `/Users/Alice/Library/Application Support/Topiary/languages.toml` | + This file is not automatically created by Topiary. -The last location is intended to be a project-specific settings file for -Topiary. When running Topiary in some directory, it will look up in the file -tree until it finds a .topiary directory. It will then read the `languages.toml` +The next source is intended to be a project-specific settings file for +Topiary. When running Topiary in some directory, it will ascend the file +tree until it finds a `.topiary` directory. It will then read any `languages.toml` file present in that directory. -The Topiary binary parses these file in the following order, any configuration -options defined earlier are overwritten by those defined later. +Finally, an explicit configuration file may be specified using the +`-C`/`--configuration` command line argument (or the +`TOPIARY_CONFIG_FILE` environment variable). This is intended for +driving Topiary under very specific use-cases. -1. The builtin configuration file -2. The user configuration file in the OS's configuration directory -3. The project specific topiary configuration +The Topiary binary parses these sources in the following order. The +action taken to coalesce matching items is dependent on the [collation +mode](#configuration-collation). + +1. The builtin configuration file. +2. The user configuration file in the OS's configuration directory. +3. The project specific Topiary configuration. +4. The explicit configuration file specified as a CLI argument. ### Configuration Options + The configuration file contains a list of languages, each language configuration headed by ``[[language]]``. For instance, the one for Nickel is defined as such: + ```toml [[language]] name = "nickel" @@ -281,7 +496,7 @@ extensions = ["ncl"] ``` The `name` field is used by Topiary to associate the language entry with the -query file and tree-sitter grammar. This field should be written lowercase. +query file and Tree-sitter grammar. This value should be written in lowercase. The `name` field is mandatory for every ``[[language]]`` block in every configuration file. @@ -290,10 +505,95 @@ need to exist in every configuration file. It is sufficient if, for every language, there is a single configuration file that defines the list of extensions for that language. -A final optional field called `indent` exists to define the indentation method +A final optional field, called `indent`, exists to define the indentation method for that language. Topiary defaults to two spaces `" "` if it cannot find the indent field in any configuration file for a specific language. +### Configuration Collation + +When parsing configuration from multiple sources, Topiary can collate +matching configuration items (matched on language name) in various ways. +The collation mode is set by the `--configuration-collation` command +line argument (or the `TOPIARY_CONFIG_COLLATION` environment variable). + +The different modes are best explained by example. Consider the +following two configurations, in priority order from lowest to highest +(comments have been added for illustrative purposes): + +```toml +# Lowest priority configuration + +[[language]] +name = "example" +extensions = ["eg"] + +[[language]] +name = "demo" +extensions = ["demo"] +``` + +```toml +# Highest priority configuration + +[[language]] +name = "example" +extensions = ["example"] +indent = " " +``` + +#### Merge Mode (Default) + +Matching items are updated from the higher priority source, with +collections merged as the union of sets. + +```toml +# For the "example" language: +# * The collated extensions is the union of the source extensions +# * The indentation is taken from the highest priority source +[[language]] +name = "example" +extensions = ["eg", "example"] +indent = " " + +# The "demo" language is unchanged +[[language]] +name = "demo" +extensions = ["demo"] +``` + +#### Revise Mode + +Matching items (including collections) are superseded from the higher +priority source. + +```toml +# The "example" language's values are taken from the highest priority source +[[language]] +name = "example" +extensions = ["example"] +indent = " " + +# The "demo" language is unchanged +[[language]] +name = "demo" +extensions = ["demo"] +``` + +#### Override Mode + +The highest priority source is taken. All values from lower priority +sources are discarded. + +```toml +# The "example" language's values are taken from the highest priority source +[[language]] +name = "example" +extensions = ["example"] +indent = " " + +# The "demo" language does not exist in the highest priority source, so is omitted +``` + ## Design As long as there is a [Tree-sitter grammar][tree-sitter-parsers] defined @@ -982,16 +1282,23 @@ suggested way to work: `crate::language::Language` and process it everywhere, then make a mostly empty query file with just the `(#language!)` configuration. -4. Run `RUST_LOG=debug cargo test`. +4. Run: + + ``` + RUST_LOG=debug \ + cargo test -p topiary \ + input_output_tester \ + -- --nocapture + ``` - Provided it works, it should output a lot of log messages. Copy that - output to a text editor. You are particularly interested in the CST - output that starts with a line like this: `CST node: {Node + Provided it works, it should output a _lot_ of log messages. Copy + that output to a text editor. You are particularly interested in the + CST output that starts with a line like this: `CST node: {Node compilation_unit (0, 0) - (5942, 0)} - Named: true`. :bulb: As an alternative to using the debugging output, the - `--visualise` command line option exists to output the Tree-sitter - syntax tree in a variety of formats. + `vis` visualisation subcommand line option exists to output the + Tree-sitter syntax tree in a variety of formats. 5. The test run will output all the differences between the actual output and the expected output, e.g. missing spaces between tokens. diff --git a/docs/migration-0.2-0.3.md b/docs/migration-0.2-0.3.md new file mode 100644 index 00000000..54d29bea --- /dev/null +++ b/docs/migration-0.2-0.3.md @@ -0,0 +1,161 @@ +# Topiary CLI Migration Guide: v0.2 to v0.3 + +Full documentation for the CLI can be found in the project's +[`README`](/README.md). Herein we summarise how the v0.2.3 functionality +maps to the new interface introduced in v0.3.0, to aid migration. + +## Formatting + +### From Files, In Place + +Before: +``` +topiary [--skip-idempotence] \ + [--tolerate-parsing-errors] \ + --in-place \ + --input-files INPUT_FILES... +``` + +After: +``` +topiary fmt [--skip-idempotence] \ + [--tolerate-parsing-errors] \ + INPUT_FILES... +``` + +### From File, To New File + +Before: +``` +topiary [--skip-idempotence] \ + [--tolerate-parsing-errors] \ + (--language LANGUAGE | --query QUERY) \ + --input-files INPUT_FILE \ + --output-file OUTPUT_FILE +``` + +After (use IO redirection): +``` +topiary fmt [--skip-idempotence] \ + [--tolerate-parsing-errors] \ + (--language LANGUAGE [--query QUERY]) \ + < INPUT_FILE \ + > OUTPUT_FILE +``` + +### Involving Standard Input and Output + +Before: +``` +topiary [--skip-idempotence] \ + [--tolerate-parsing-errors] \ + (--language LANGUAGE [--query QUERY]) \ + (--input-files - | < INPUT_FILE) \ + [--output-file -] +``` + +After (use IO redirection): +``` +topiary fmt [--skip-idempotence] \ + [--tolerate-parsing-errors] \ + (--language LANGUAGE [--query QUERY]) \ + < INPUT_FILE +``` + +## Visualisation + +### From File + +Before: +``` +topiary --visualise[=FORMAT] \ + --input-files INPUT_FILE \ + [--output-file OUTPUT_FILE | > OUTPUT_FILE] +``` + +After: +``` +topiary vis [--format FORMAT] \ + INPUT_FILE \ + [> OUTPUT_FILE] +``` + +### Involving Standard Input and Output + +Before: +``` +topiary --visualise[=FORMAT] \ + (--language LANGUAGE [--query QUERY]) \ + < INPUT_FILE \ + [--output-file OUTPUT_FILE | > OUTPUT_FILE] +``` + +After (use IO redirection): +``` +topiary vis [--format FORMAT] \ + (--language LANGUAGE [--query QUERY]) \ + < INPUT_FILE \ + [> OUTPUT_FILE] +``` + +## Configuration + +### Custom Configuration + +To replicate the behaviour of v0.2.3, set the configuration collation +mode to `revise`. This can be done with the `TOPIARY_CONFIG_COLLATION` +environment variable, or the `--configuration-collation` argument. + +The new default collation method is `merge`, which is subtly different +when it comes to collating collections. + +### Overriding Configuration + +Before (or using the `TOPIARY_CONFIGURATION_OVERRIDE` environment +variable): +``` +topiary --configuration-override CONFIG_FILE ... +``` + +After (or using a combination of `TOPIARY_CONFIG_FILE` and +`TOPIARY_CONFIG_COLLATION` environment variables): +``` +topiary --configuration CONFIG_FILE \ + --configuration-collation override \ + ... +``` + +### Examining Computed Configuration + +Before (to standard error, as debug output, then proceeding with other +functions): +``` +topiary --output-configuration ... +``` + +After (to standard output, in TOML format, as a dedicated function): +``` +topiary cfg +``` + +## Logging + +Before (via the `RUST_LOG` environment variable): +``` +RUST_LOG=warn topiary ... +``` + +After (using `-v` command line flags): +``` +topiary -v ... +``` + +The number of command line flags increases the verbosity: + +| Verbosity Flag | `RUST_LOG` Equivalent | +| :------------- | :-------------------- | +| None | `error` (default) | +| `-v` | `warn` | +| `-vv` | `info` | +| `-vvv` | `debug` | +| `-vvvv` | `trace` | diff --git a/flake.nix b/flake.nix index ffa05811..e1f929d8 100644 --- a/flake.nix +++ b/flake.nix @@ -31,46 +31,41 @@ outputs = inputs: with inputs; - flake-utils.lib.eachDefaultSystem ( - system: let - pkgs = import nixpkgs { - inherit system; - overlays = [rust-overlay.overlays.default]; - }; - code = pkgs.callPackage ./default.nix {inherit advisory-db crane rust-overlay nix-filter;}; - in { - packages = with code; { - inherit topiary-playground; - default = topiary-cli; - }; + flake-utils.lib.eachDefaultSystem ( + system: + let + pkgs = import nixpkgs { + inherit system; + overlays = [ rust-overlay.overlays.default ]; + }; + code = pkgs.callPackage ./default.nix { inherit advisory-db crane rust-overlay nix-filter; }; + in + { + packages = with code; { + inherit topiary-playground; + default = topiary-cli; + }; - checks = { - inherit (code) clippy clippy-wasm fmt topiary-lib topiary-cli topiary-playground audit benchmark; + checks = { + inherit (code) clippy clippy-wasm fmt topiary-lib topiary-cli topiary-playground audit benchmark; - ## Check that the `lib.pre-commit-hook` output builds/evaluates - ## correctly. `deepSeq e1 e2` evaluates `e1` strictly in depth before - ## returning `e2`. We use this trick because checks need to be - ## derivations, which `lib.pre-commit-hook` is not. - pre-commit-hook = builtins.deepSeq self.lib.${system}.pre-commit-hook pkgs.hello; - }; + ## Check that the `lib.pre-commit-hook` output builds/evaluates + ## correctly. `deepSeq e1 e2` evaluates `e1` strictly in depth before + ## returning `e2`. We use this trick because checks need to be + ## derivations, which `lib.pre-commit-hook` is not. + pre-commit-hook = builtins.deepSeq self.lib.${system}.pre-commit-hook pkgs.hello; + }; - devShells.default = pkgs.callPackage ./shell.nix {checks = self.checks.${system};}; + devShells.default = pkgs.callPackage ./shell.nix { checks = self.checks.${system}; }; - ## For easy use in https://github.com/cachix/pre-commit-hooks.nix - lib.pre-commit-hook = { - enable = true; - name = "topiary"; - description = "A general code formatter based on tree-sitter."; - entry = let - topiary-inplace = pkgs.writeShellApplication { - name = "topiary-inplace"; - text = '' - ${code.topiary-cli}/bin/topiary --in-place --input-files "$@"; - ''; - }; - in "${topiary-inplace}/bin/topiary-inplace"; - types = ["text"]; - }; - } - ); + ## For easy use in https://github.com/cachix/pre-commit-hooks.nix + lib.pre-commit-hook = { + enable = true; + name = "topiary"; + description = "A general code formatter based on tree-sitter."; + entry = "${code.topiary-cli}/bin/topiary fmt"; + types = [ "text" ]; + }; + } + ); } diff --git a/playground.sh b/playground.sh index a027b98b..40ed72bd 100755 --- a/playground.sh +++ b/playground.sh @@ -40,16 +40,10 @@ format() { local input="$2" local skip_idempotence="${3-1}" - local -a topiary_args=( - --query "${query}" - --input-files "${input}" - ) + local -a topiary_args=(--query "${query}") + (( skip_idempotence )) && topiary_args+=(--skip-idempotence) - if (( skip_idempotence )); then - topiary_args+=(--skip-idempotence) - fi - - cargo run --quiet -- "${topiary_args[@]}" + cargo run --quiet -- fmt "${topiary_args[@]}" < "${input}" } idempotency() { diff --git a/topiary-cli/Cargo.toml b/topiary-cli/Cargo.toml index aab330e1..dd144209 100644 --- a/topiary-cli/Cargo.toml +++ b/topiary-cli/Cargo.toml @@ -26,16 +26,21 @@ path = "src/main.rs" [dependencies] # For now we just load the tree-sitter language parsers statically. # Eventually we will want to dynamically load them, like Helix does. -clap = { workspace = true, features = ["derive"] } +async-scoped = { workspace = true } +clap = { workspace = true, features = ["derive", "env", "wrap_help"] } +directories = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } -directories = { workspace = true } +indoc = { workspace = true } +itertools = { workspace = true } log = { workspace = true } serde-toml-merge = { workspace = true } tempfile = { workspace = true } -tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } +tokio = { workspace = true, features = ["fs", "rt-multi-thread", "sync", "macros"] } toml = { workspace = true } topiary = { path = "../topiary" } +tree-sitter-facade = { workspace = true } [dev-dependencies] assert_cmd = { workspace = true } +predicates = { workspace = true } diff --git a/topiary-cli/src/cli.rs b/topiary-cli/src/cli.rs new file mode 100644 index 00000000..7f15e650 --- /dev/null +++ b/topiary-cli/src/cli.rs @@ -0,0 +1,228 @@ +//! Command line interface argument parsing. + +use clap::{ArgAction, ArgGroup, Args, Parser, Subcommand}; +use std::path::PathBuf; + +use log::LevelFilter; +use topiary::SupportedLanguage; + +use crate::{ + configuration, + error::{CLIResult, TopiaryError}, + visualisation, +}; + +#[derive(Debug, Parser)] +// NOTE Don't use infer_subcommands, as that could fossilise the interface. We define explicit +// aliases instead. (See https://clig.dev/#future-proofing) +#[command(about, author, long_about = None, version)] +pub struct Cli { + // Global options + #[command(flatten)] + pub global: GlobalArgs, + + // Subcommands + #[command(subcommand)] + pub command: Commands, +} + +// These are "true" global arguments that are relevant to all subcommands +// NOTE Global arguments must be optional, even when defaults are specified +#[derive(Args, Debug)] +pub struct GlobalArgs { + /// Configuration file + #[arg( + short = 'C', + long, + display_order = 100, + env = "TOPIARY_CONFIG_FILE", + global = true, + hide_env_values = true + )] + pub configuration: Option, + + /// Configuration collation mode + #[arg( + long, + default_value = "merge", + display_order = 101, + env = "TOPIARY_CONFIG_COLLATION", + global = true, + hide_env_values = true + )] + pub configuration_collation: Option, + + /// Logging verbosity (increased per occurrence) + #[arg( + short, + long, + action = ArgAction::Count, + global = true, + display_order = 102 + )] + pub verbose: u8, +} + +// NOTE This abstraction is largely to workaround clap-rs/clap#4707 +#[derive(Args, Debug)] +pub struct FromStdin { + /// Topiary supported language (for formatting stdin) + #[arg(short, long)] + pub language: SupportedLanguage, + + /// Topiary query file override (when formatting stdin) + #[arg(short, long, requires = "language")] + pub query: Option, +} + +// Subtype for exactly one input: +// * FILE => Read input from disk, visualisation output to stdout +// * --language => Read input from stdin, visualisation output to stdout +#[derive(Args, Debug)] +#[command( + // Require exactly one of --language, or FILES... + group = ArgGroup::new("source") + .multiple(false) + .required(true) + .args(&["language", "file"]) +)] +pub struct ExactlyOneInput { + #[command(flatten)] + pub stdin: Option, + + /// Input file (omit to read from stdin) + /// + /// Language detection and query selection is automatic, mapped from file extensions defined in + /// the Topiary configuration. + pub file: Option, +} + +// Subtype for at least one input +// * FILES... => Read input(s) from disk, format in place +// * --language => Read input from stdin, output to stdout +#[derive(Args, Debug)] +#[command( + // Require exactly one of --language, --query, or FILES... + group = ArgGroup::new("source") + .multiple(false) + .required(true) + .args(&["language", "files"]) +)] +pub struct AtLeastOneInput { + #[command(flatten)] + pub stdin: Option, + + /// Input files and directories (omit to read from stdin) + /// + /// Language detection and query selection is automatic, mapped from file extensions defined in + /// the Topiary configuration. + pub files: Vec, +} + +#[derive(Debug, Subcommand)] +pub enum Commands { + /// Format inputs + #[command(alias = "format", display_order = 1)] + Fmt { + /// Consume as much as possible in the presence of parsing errors + #[arg(short, long)] + tolerate_parsing_errors: bool, + + /// Do not check that formatting twice gives the same output + #[arg(short, long)] + skip_idempotence: bool, + + #[command(flatten)] + inputs: AtLeastOneInput, + }, + + /// Visualise the input's Tree-sitter parse tree + #[command(aliases = &["visualise", "visualize", "view"], display_order = 2)] + Vis { + /// Visualisation format + #[arg(short, long, default_value = "dot")] + format: visualisation::Format, + + #[command(flatten)] + input: ExactlyOneInput, + }, + + /// Print the current configuration + #[command(alias = "config", display_order = 3)] + Cfg, +} + +/// Given a vector of paths, recursively expand those that identify as directories, in place +fn traverse_fs(files: &mut Vec) -> CLIResult<()> { + let mut expanded = vec![]; + + for file in &mut *files { + if file.is_dir() { + let mut subfiles = file.read_dir()?.flatten().map(|f| f.path()).collect(); + traverse_fs(&mut subfiles)?; + expanded.append(&mut subfiles); + } else { + expanded.push(file.to_path_buf()); + } + } + + *files = expanded; + Ok(()) +} + +/// Parse CLI arguments and normalise them for the caller +pub fn get_args() -> CLIResult { + let mut args = Cli::parse(); + + // This is the earliest point that we can initialise the logger, from the --verbose flags, + // before any fallible operations have started + env_logger::Builder::new() + .filter_level(match args.global.verbose { + 0 => LevelFilter::Error, + 1 => LevelFilter::Warn, + 2 => LevelFilter::Info, + 3 => LevelFilter::Debug, + _ => LevelFilter::Trace, + }) + .init(); + + // NOTE We do not check that input files are actual files (with Path::is_file), because that + // would break in the case of, for example, named pipes; thus also adding a platform dimension + // to the check, which is simply not worth the complexity. We _could_ check by opening each + // file, but that's going to be done sooner-or-later by Topiary, so there's no need. + + match &mut args.command { + Commands::Fmt { + inputs: AtLeastOneInput { files, .. }, + .. + } => { + // If we're given a list of FILES... then we assume them to all be on disk, even if "-" + // is passed as an argument (i.e., interpret this as a valid filename, rather than as + // stdin). We deduplicate this list to avoid formatting the same file multiple times + // and recursively expand directories until we're left with a list of unique + // (potential) files as input sources. + files.sort_unstable(); + files.dedup(); + traverse_fs(files)?; + } + + Commands::Vis { + input: ExactlyOneInput { + file: Some(file), .. + }, + .. + } => { + // Make sure our FILE is not a directory + if file.is_dir() { + return Err(TopiaryError::Bin( + format!("Cannot visualise directory \"{}\"; please provide a single file from disk or stdin.", file.to_string_lossy()), + None, + )); + } + } + + _ => {} + } + + Ok(args) +} diff --git a/topiary-cli/src/configuration.rs b/topiary-cli/src/configuration.rs index 0dcc8781..3eb3e9d8 100644 --- a/topiary-cli/src/configuration.rs +++ b/topiary-cli/src/configuration.rs @@ -1,148 +1,379 @@ +use clap::ValueEnum; use directories::ProjectDirs; -use std::{env::current_dir, path::PathBuf}; +use indoc::formatdoc; +use itertools::Itertools; +use std::{env::current_dir, fmt, path::PathBuf}; use topiary::{default_configuration_toml, Configuration}; use crate::error::{CLIResult, TopiaryError}; -pub fn parse_configuration( - config_override: Option, - config_file: Option, -) -> CLIResult { - user_configuration_toml(config_override, config_file)? - .try_into() - .map_err(TopiaryError::from) +type Annotations = String; + +/// Collation mode for configuration values +// NOTE The enum variants are in "natural" order, rather than +// sorted lexicographically, for the sake of the help text +#[derive(Clone, Debug, ValueEnum)] +pub enum CollationMode { + /// When multiple sources of configuration are available, matching items are updated from the + /// higher priority source, with collections merged as the union of sets. + Merge, + + /// When multiple sources of configuration are available, matching items (including + /// collections) are superseded from the higher priority source. + Revise, + + /// When multiple sources of configuration are available, the highest priority source is taken. + /// All values from lower priority sources are discarded. + Override, } -/// User configured languages.toml file, merged with the default config. -/// If a configuration_override was provided, all other configuration files are ignored. -fn user_configuration_toml( - config_override: Option, - config_file: Option, -) -> CLIResult { - // If an override was requested, disregard all other configuration - if let Some(path) = config_override { - let content = std::fs::read_to_string(path)?; - let toml = toml::from_str(&content)?; - return Ok(toml); +/// Map collation modes to merge depths for the TOML collation (see `collate_toml`) +impl From<&CollationMode> for usize { + fn from(collation: &CollationMode) -> Self { + match collation { + CollationMode::Merge => 4, + CollationMode::Revise => 2, + _ => unreachable!(), + } + } +} + +/// Consume the configuration from the usual sources, collated as specified +pub fn fetch( + file: &Option, + collation: &CollationMode, +) -> CLIResult<(Annotations, Configuration)> { + // If we have an explicit file, fail if it doesn't exist + if let Some(path) = file { + if !path.exists() { + return Err(TopiaryError::Bin( + format!("Configuration file not found: {}", path.to_string_lossy()), + None, + )); + } + } + + let sources = configuration_sources(file); + + Ok(( + annotate(&sources, collation), + configuration_toml(&sources, collation)? + .try_into() + .map_err(TopiaryError::from)?, + )) +} + +/// Return annotations for the configuration in the form of TOML comments +/// (useful for human-readable output) +fn annotate(sources: &[ConfigSource], collation: &CollationMode) -> String { + formatdoc!( + " + # Configuration collated from the following sources, + # in priority order (lowest to highest): + # + {} + # + # Collation mode: {collation:?} + ", + sources + .iter() + .enumerate() + .map(|(i, source)| format!("# {}. {source}", i + 1)) + .join("\n") + ) +} + +/// Sources of TOML configuration +#[derive(Debug)] +enum ConfigSource { + Builtin, + File(PathBuf), + + // This is a sentinel element for files that don't exist + Missing, +} + +impl ConfigSource { + fn is_valid(&self) -> bool { + !matches!(self, Self::Missing) + } +} + +impl fmt::Display for ConfigSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + match self { + Self::Builtin => write!(f, "Built-in configuration"), + + Self::File(path) => { + // We only stringify the path when we know it exists, so the call to `canonicalize` + // is safe to unwrap. (All bets are off, if called from elsewhere.) + write!(f, "{}", path.canonicalize().unwrap().to_string_lossy()) + } + + Self::Missing => write!(f, "Missing configuration"), + } + } +} + +impl From> for ConfigSource { + fn from(path: Option) -> Self { + match path { + None => ConfigSource::Missing, + + Some(path) => { + let candidate = if path.is_dir() { + path.join("languages.toml") + } else { + path + }; + + if candidate.exists() { + ConfigSource::File(candidate) + } else { + log::warn!( + "Could not find configuration file: {}", + candidate.to_string_lossy() + ); + + ConfigSource::Missing + } + } + } } +} + +impl TryFrom<&ConfigSource> for toml::Value { + type Error = TopiaryError; + + fn try_from(source: &ConfigSource) -> Result { + match source { + ConfigSource::Builtin => Ok(default_configuration_toml()), + + ConfigSource::File(file) => { + let config = std::fs::read_to_string(file)?; + toml::from_str(&config).map_err(TopiaryError::from) + } + + ConfigSource::Missing => Err(TopiaryError::Bin( + "Could not parse missing configuration".into(), + None, + )), + } + } +} - // Otherwise consider the configuration files in order. Lowest priority first: - // - The built-in configuration `default_configuration_toml` - // - `~/.config/topiary/languages.toml` (or equivalent) - // - `.topiary/languages.toml` - // - `config_file` as passed by `--configuration_file/-c/TOPIARY_CONFIGURATION_FILE` +/// Return the valid sources of configuration, in priority order (lowest to highest): +/// +/// 1. Built-in configuration (`topiary::default_configuration_toml`) +/// 2. `~/.config/topiary/languages.toml` (or equivalent) +/// 3. `.topiary/languages.toml` (or equivalent) +/// 4. `file`, passed as a CLI argument/environment variable +fn configuration_sources(file: &Option) -> Vec { [ - Some(find_configuration_dir()), - find_workspace_configuration_dir(), - config_file, + ConfigSource::Builtin, + Some(find_os_configuration_dir()).into(), + find_workspace_configuration_dir().into(), + file.clone().into(), ] .into_iter() - .filter_map(|path| { - path.map(|p| match p.is_file() { - // The path already points to a file, assume the file is the configuration file - true => p, - // The path points to a directory, assume it is a topiary configuration directory and append "languages.toml" - false => p.join("languages.toml"), - }) - }) - .filter_map(|file| -> Option> { - std::fs::read_to_string(file) - .map(|config| toml::from_str(&config)) - .ok() - }) - .try_fold(default_configuration_toml(), |a, b| { - let b = b?; - Ok(merge_toml_values(a, b, 3)) - }) + .filter(ConfigSource::is_valid) + .collect() +} + +/// Consume configuration and collate as specified +fn configuration_toml( + sources: &[ConfigSource], + collation: &CollationMode, +) -> CLIResult { + match collation { + CollationMode::Override => { + // It's safe to unwrap here, as `sources` is guaranteed to contain at least one element + sources + .last() + .unwrap() + .try_into() + .map_err(TopiaryError::from) + } + + // CollationMode::Merge and CollationMode::Revise + _ => { + // It's safe to unwrap here, as `sources` is guaranteed to contain at least one element + sources + .iter() + .map(|source| source.try_into()) + .reduce(|config, toml| Ok(collate_toml(config?, toml?, collation))) + .unwrap() + } + } +} + +/// Find the OS-specific configuration directory +fn find_os_configuration_dir() -> PathBuf { + ProjectDirs::from("", "", "topiary") + .expect("Could not access the OS's Home directory") + .config_dir() + .to_path_buf() +} + +/// Ascend the directory hierarchy, starting from the current working directory, in search of the +/// nearest `.topiary` configuration directory +fn find_workspace_configuration_dir() -> Option { + current_dir() + .expect("Could not get current working directory") + .ancestors() + .map(|path| path.join(".topiary")) + .find(|path| path.exists()) } -/// Merge two TOML documents, merging values from `right` onto `left` +/// Collate two TOML documents, merging values from `graft` onto `base`. /// -/// When an array exists in both `left` and `right`, `right`'s array is -/// used. When a table exists in both `left` and `right`, the merged table -/// consists of all keys in `left`'s table unioned with all keys in `right` -/// with the values of `right` being merged recursively onto values of -/// `left`. +/// Arrays of tables with a `name` key (e.g., our `[[language]]` tables) are always merged; that +/// is, the union of the `base` and `graft` is taken. Otherwise, the `merge_depth` controls the +/// collation of arrays, resulting in concatenation. This can leave duplicates, in the collated +/// TOML, but for Topiary, this only matters for our `Languages::extensions`, which is implemented +/// as a `HashSet`; thus deserialisation will deduplicate for us. /// -/// `merge_toplevel_arrays` controls whether a top-level array in the TOML -/// document is merged instead of overridden. This is useful for TOML -/// documents that use a top-level array of values like the `languages.toml`, -/// where one usually wants to override or add to the array instead of -/// replacing it altogether. +/// When a table exists in both `base` and `graft`, the merged table consists of all keys in +/// `base`'s table unioned with all keys in `graft` with the values of `graft` being merged +/// recursively onto values of `base`. /// -/// NOTE: This merge function is taken from Helix: -/// https://github.com/helix-editor/helix licensed under MPL-2.0. There -/// it is defined under: helix-loader/src/lib.rs. Taken from commit df09490 -pub fn merge_toml_values(left: toml::Value, right: toml::Value, merge_depth: usize) -> toml::Value { +/// NOTE This collation function is forked from Helix, licensed under MPL-2.0 +/// * Repo: https://github.com/helix-editor/helix +/// * Rev: df09490 +/// * Path: helix-loader/src/lib.rs +fn collate_toml(base: toml::Value, graft: toml::Value, merge_depth: T) -> toml::Value +where + T: Into, +{ use toml::Value; fn get_name(v: &Value) -> Option<&str> { v.get("name").and_then(Value::as_str) } - match (left, right) { - (Value::Array(mut left_items), Value::Array(right_items)) => { - // The top-level arrays should be merged but nested arrays should - // act as overrides. For the `languages.toml` config, this means - // that you can specify a sub-set of languages in an overriding - // `languages.toml` but that nested arrays like file extensions - // arguments are replaced instead of merged. - if merge_depth > 0 { - left_items.reserve(right_items.len()); - for rvalue in right_items { - let lvalue = get_name(&rvalue) - .and_then(|rname| { - left_items.iter().position(|v| get_name(v) == Some(rname)) - }) - .map(|lpos| left_items.remove(lpos)); - let mvalue = match lvalue { - Some(lvalue) => merge_toml_values(lvalue, rvalue, merge_depth - 1), - None => rvalue, - }; - left_items.push(mvalue); - } - Value::Array(left_items) - } else { - Value::Array(right_items) + let merge_depth: usize = merge_depth.into(); + + match (base, graft, merge_depth) { + // Fallback to the graft value if the recursion depth bottoms out + (_, graft, 0) => graft, + + (Value::Array(mut base_items), Value::Array(graft_items), _) => { + for rvalue in graft_items { + // If our graft value has a `name` key, then we're dealing with a `[[language]]` + // table. In which case, pop it -- if it exists -- from the base array. + let language = get_name(&rvalue) + .and_then(|rname| base_items.iter().position(|v| get_name(v) == Some(rname))) + .map(|lpos| base_items.remove(lpos)); + + let mvalue = match language { + // Merge matching language tables + Some(lvalue) => collate_toml(lvalue, rvalue, merge_depth - 1), + + // Collate everything else + None => rvalue, + }; + + base_items.push(mvalue); } + + Value::Array(base_items) } - (Value::Table(mut left_map), Value::Table(right_map)) => { - if merge_depth > 0 { - for (rname, rvalue) in right_map { - match left_map.remove(&rname) { - Some(lvalue) => { - let merged_value = merge_toml_values(lvalue, rvalue, merge_depth - 1); - left_map.insert(rname, merged_value); - } - None => { - left_map.insert(rname, rvalue); - } + + (Value::Table(mut base_map), Value::Table(graft_map), _) => { + for (rname, rvalue) in graft_map { + match base_map.remove(&rname) { + Some(lvalue) => { + let merged_value = collate_toml(lvalue, rvalue, merge_depth - 1); + base_map.insert(rname, merged_value); + } + None => { + base_map.insert(rname, rvalue); } } - Value::Table(left_map) - } else { - Value::Table(right_map) } + + Value::Table(base_map) } - // Catch everything else we didn't handle, and use the right value - (_, value) => value, + + // Fallback to the graft value for everything else + (_, graft, _) => graft, } } -fn find_configuration_dir() -> PathBuf { - ProjectDirs::from("", "", "topiary") - .expect("Could not access the OS's Home directory") - .config_dir() - .to_owned() -} +#[cfg(test)] +mod test_config_collation { + use super::{collate_toml, CollationMode, Configuration}; -pub fn find_workspace_configuration_dir() -> Option { - let current_dir = current_dir().expect("Could not get current working directory"); - for ancestor in current_dir.ancestors() { - if ancestor.join(".topiary").exists() { - return Some(ancestor.to_owned().join(".topiary")); - } + // NOTE PartialEq for toml::Value is (understandably) order sensitive over array elements, so + // we deserialse to `topiary::Configuration` for equality testing. This also has the effect of + // side-stepping potential duplication, from concatenation, when using `CollationMode::Merge`. + + static BASE: &str = r#" + [[language]] + name = "example" + extensions = ["eg"] + + [[language]] + name = "demo" + extensions = ["demo"] + "#; + + static GRAFT: &str = r#" + [[language]] + name = "example" + extensions = ["example"] + indent = "\t" + "#; + + #[test] + fn merge() { + let base = toml::from_str(BASE).unwrap(); + let graft = toml::from_str(GRAFT).unwrap(); + + let merged: Configuration = collate_toml(base, graft, &CollationMode::Merge) + .try_into() + .unwrap(); + + let expected: Configuration = toml::from_str( + r#" + [[language]] + name = "example" + extensions = ["eg", "example"] + indent = "\t" + + [[language]] + name = "demo" + extensions = ["demo"] + "#, + ) + .unwrap(); + + assert_eq!(merged, expected); } - None + #[test] + fn revise() { + let base = toml::from_str(BASE).unwrap(); + let graft = toml::from_str(GRAFT).unwrap(); + + let revised: Configuration = collate_toml(base, graft, &CollationMode::Revise) + .try_into() + .unwrap(); + + let expected: Configuration = toml::from_str( + r#" + [[language]] + name = "example" + extensions = ["example"] + indent = "\t" + + [[language]] + name = "demo" + extensions = ["demo"] + "#, + ) + .unwrap(); + + assert_eq!(revised, expected); + } } diff --git a/topiary-cli/src/error.rs b/topiary-cli/src/error.rs index a9ae8bc9..c2386c32 100644 --- a/topiary-cli/src/error.rs +++ b/topiary-cli/src/error.rs @@ -18,6 +18,7 @@ pub enum TopiaryError { pub enum CLIError { IOError(io::Error), Generic(Box), + Multiple, } /// # Safety @@ -48,6 +49,7 @@ impl error::Error for TopiaryError { Self::Lib(error) => error.source(), Self::Bin(_, Some(CLIError::IOError(error))) => Some(error), Self::Bin(_, Some(CLIError::Generic(error))) => error.source(), + Self::Bin(_, Some(CLIError::Multiple)) => None, Self::Bin(_, None) => None, } } @@ -56,6 +58,9 @@ impl error::Error for TopiaryError { impl From for ExitCode { fn from(e: TopiaryError) -> Self { let exit_code = match e { + // Multiple errors: Exit 9 + TopiaryError::Bin(_, Some(CLIError::Multiple)) => 9, + // Idempotency parsing errors: Exit 8 TopiaryError::Lib(FormatterError::IdempotenceParsing(_)) => 8, @@ -133,7 +138,7 @@ where impl From for TopiaryError { fn from(e: toml::de::Error) -> Self { TopiaryError::Bin( - "Could not parse user configuration".to_owned(), + "Could not parse configuration".into(), Some(CLIError::Generic(Box::new(e))), ) } @@ -142,7 +147,7 @@ impl From for TopiaryError { impl From for TopiaryError { fn from(e: serde_toml_merge::Error) -> Self { TopiaryError::Bin( - format!("Could not merge the default configuration and user configurations. Error occured while merging: {}", e.path), + format!("Could not collate configuration from {}", e.path), None, ) } @@ -151,7 +156,7 @@ impl From for TopiaryError { impl From for TopiaryError { fn from(e: tokio::task::JoinError) -> Self { TopiaryError::Bin( - "Could not join parallel formatting tasks".to_owned(), + "Could not join parallel formatting tasks".into(), Some(CLIError::Generic(Box::new(e))), ) } diff --git a/topiary-cli/src/io.rs b/topiary-cli/src/io.rs new file mode 100644 index 00000000..b17dd486 --- /dev/null +++ b/topiary-cli/src/io.rs @@ -0,0 +1,271 @@ +use std::{ + ffi::OsString, + fmt, + fs::File, + io::{stdin, stdout, ErrorKind, Read, Result, Write}, + path::{Path, PathBuf}, +}; + +use tempfile::NamedTempFile; +use topiary::{Configuration, Language, SupportedLanguage, TopiaryQuery}; + +use crate::{ + cli::{AtLeastOneInput, ExactlyOneInput, FromStdin}, + error::{CLIResult, TopiaryError}, + language::LanguageDefinition, +}; + +type QuerySource = PathBuf; + +/// Unified interface for input sources. We either have input from: +/// * Standard input, in which case we need to specify the language and, optionally, query override +/// * A sequence of files +/// +/// These are captured by the CLI parser, with `cli::AtLeastOneInput` and `cli::ExactlyOneInput`. +/// We use this struct to normalise the interface for downstream (using `From` implementations). +pub enum InputFrom { + Stdin(SupportedLanguage, Option), + Files(Vec), +} + +impl From<&ExactlyOneInput> for InputFrom { + fn from(input: &ExactlyOneInput) -> Self { + match input { + ExactlyOneInput { + stdin: Some(FromStdin { language, query }), + .. + } => InputFrom::Stdin(language.to_owned(), query.to_owned()), + + ExactlyOneInput { + file: Some(path), .. + } => InputFrom::Files(vec![path.to_owned()]), + + _ => unreachable!("Clap guarantees input is always one of the above"), + } + } +} + +impl From<&AtLeastOneInput> for InputFrom { + fn from(input: &AtLeastOneInput) -> Self { + match input { + AtLeastOneInput { + stdin: Some(FromStdin { language, query }), + .. + } => InputFrom::Stdin(language.to_owned(), query.to_owned()), + + AtLeastOneInput { files, .. } => InputFrom::Files(files.to_owned()), + } + } +} + +/// Each `InputFile` needs to locate its source (standard input or disk), such that its `io::Read` +/// implementation can do the right thing. +#[derive(Debug)] +pub enum InputSource { + Stdin, + Disk(PathBuf, Option), +} + +impl fmt::Display for InputSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Stdin => write!(f, "standard input"), + Self::Disk(path, _) => write!(f, "{}", path.to_string_lossy()), + } + } +} + +/// An `InputFile` is the unit of input for Topiary, encapsulating everything needed for downstream +/// processing. It implements `io::Read`, so it can be passed directly to the Topiary API. +#[derive(Debug)] +pub struct InputFile<'cfg> { + source: InputSource, + language: &'cfg Language, + query: QuerySource, +} + +impl<'cfg> InputFile<'cfg> { + /// Convert our `InputFile` into language definition values that Topiary can consume + pub async fn to_language_definition(&self) -> CLIResult { + let grammar = self.language.grammar().await?; + let query = { + let contents = tokio::fs::read_to_string(&self.query).await?; + TopiaryQuery::new(&grammar, &contents)? + }; + + Ok(LanguageDefinition { + query, + language: self.language.clone(), + grammar, + }) + } + + /// Expose input source + pub fn source(&self) -> &InputSource { + &self.source + } + + /// Expose language for input + pub fn language(&self) -> &Language { + self.language + } + + /// Expose query path for input + pub fn query(&self) -> &PathBuf { + &self.query + } +} + +impl<'cfg> Read for InputFile<'cfg> { + fn read(&mut self, buf: &mut [u8]) -> Result { + match &mut self.source { + InputSource::Stdin => stdin().lock().read(buf), + + InputSource::Disk(path, fd) => { + if fd.is_none() { + *fd = Some(File::open(path)?); + } + + fd.as_mut().unwrap().read(buf) + } + } + } +} + +/// `Inputs` is an iterator of fully qualified `InputFile`s, each wrapped in `CLIResult`, which is +/// populated by its constructor from any type that implements `Into` +pub struct Inputs<'cfg>(Vec>>); + +impl<'cfg, 'i> Inputs<'cfg> { + pub fn new(config: &'cfg Configuration, inputs: &'i T) -> Self + where + &'i T: Into, + { + let inputs = match inputs.into() { + InputFrom::Stdin(language, query) => { + vec![(|| { + let language = language.to_language(config); + let query = query.unwrap_or(language.query_file()?); + + Ok(InputFile { + source: InputSource::Stdin, + language, + query, + }) + })()] + } + + InputFrom::Files(files) => files + .into_iter() + .map(|path| { + let language = Language::detect(&path, config)?; + let query = language.query_file()?; + + Ok(InputFile { + source: InputSource::Disk(path, None), + language, + query, + }) + }) + .collect(), + }; + + Self(inputs) + } +} + +impl<'cfg> Iterator for Inputs<'cfg> { + type Item = CLIResult>; + + fn next(&mut self) -> Option { + self.0.pop() + } +} + +/// An `OutputFile` is the unit of output for Topiary, differentiating between standard output and +/// disk (which uses temporary files to perform atomic updates in place). It implements +/// `io::Write`, so it can be passed directly to the Topiary API. +/// +/// NOTE When writing to disk, the `persist` function must be called to perform the in place write. +#[derive(Debug)] +pub enum OutputFile { + Stdout, + Disk { + // NOTE We stage to a file, rather than writing + // to memory (e.g., Vec), to ensure atomicity + staged: NamedTempFile, + output: OsString, + }, +} + +impl OutputFile { + pub fn new(path: &str) -> CLIResult { + match path { + "-" => Ok(Self::Stdout), + file => { + // `canonicalize` if the given path exists, otherwise fallback to what was given + let path = Path::new(file).canonicalize().or_else(|e| match e.kind() { + ErrorKind::NotFound => Ok(file.into()), + _ => Err(e), + })?; + + // The call to `parent` will only return `None` if `path` is the root directory, + // but that doesn't make sense as an output file, so unwrapping is safe + let parent = path.parent().unwrap(); + + Ok(Self::Disk { + staged: NamedTempFile::new_in(parent)?, + output: file.into(), + }) + } + } + } + + // This function must be called to persist the output to disk + pub fn persist(self) -> CLIResult<()> { + if let Self::Disk { staged, output } = self { + staged.persist(output)?; + } + + Ok(()) + } +} + +impl fmt::Display for OutputFile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Stdout => write!(f, "standard ouput"), + Self::Disk { output, .. } => write!(f, "{}", output.to_string_lossy()), + } + } +} + +impl Write for OutputFile { + fn write(&mut self, buf: &[u8]) -> Result { + match self { + Self::Stdout => stdout().lock().write(buf), + Self::Disk { staged, .. } => staged.write(buf), + } + } + + fn flush(&mut self) -> Result<()> { + match self { + Self::Stdout => stdout().lock().flush(), + Self::Disk { staged, .. } => staged.flush(), + } + } +} + +// Convenience conversion: +// * stdin maps to stdout +// * Files map to themselves (i.e., for in-place updates) +impl<'cfg> TryFrom<&InputFile<'cfg>> for OutputFile { + type Error = TopiaryError; + + fn try_from(input: &InputFile) -> CLIResult { + match &input.source { + InputSource::Stdin => Ok(Self::Stdout), + InputSource::Disk(path, _) => Self::new(path.to_string_lossy().as_ref()), + } + } +} diff --git a/topiary-cli/src/language.rs b/topiary-cli/src/language.rs new file mode 100644 index 00000000..79a65ff1 --- /dev/null +++ b/topiary-cli/src/language.rs @@ -0,0 +1,76 @@ +use std::{ + collections::{ + hash_map::{DefaultHasher, Entry}, + HashMap, + }, + hash::{Hash, Hasher}, + sync::Arc, +}; + +use tokio::sync::Mutex; +use topiary::{Language, TopiaryQuery}; + +use crate::{error::CLIResult, io::InputFile}; + +/// `LanguageDefinition` contains the necessary language-related values that the Topiary API +/// expects to do its job +pub struct LanguageDefinition { + pub query: TopiaryQuery, + pub language: Language, + pub grammar: tree_sitter_facade::Language, +} + +/// Thread-safe language definition cache +pub struct LanguageDefinitionCache(Mutex>>); + +impl LanguageDefinitionCache { + pub fn new() -> Self { + LanguageDefinitionCache(Mutex::new(HashMap::new())) + } + + /// Fetch the language definition from the cache, populating if necessary, with thread-safety + pub async fn fetch<'i>(&self, input: &'i InputFile<'i>) -> CLIResult> { + // There's no need to store the input's identifying information (language name and query) + // in the key, so we use its hash directly. This side-steps any awkward lifetime issues. + let key = { + let mut hash = DefaultHasher::new(); + input.language().name.hash(&mut hash); + input.query().hash(&mut hash); + + hash.finish() + }; + + // Lock the entire `HashMap` on access. (This may seem blunt, but is necessary for the + // correct behaviour when we have near-simultaneous cache access; see issue #605.) + let mut cache = self.0.lock().await; + + Ok(match cache.entry(key) { + // Return the language definition from the cache, if it exists... + Entry::Occupied(lang_def) => { + log::debug!( + "Cache {:p}: Hit at {:#016x} ({}, {})", + self, + key, + input.language(), + input.query().file_name().unwrap().to_string_lossy() + ); + + lang_def.get().to_owned() + } + + // ...otherwise, fetch the language definition, to populate the cache + Entry::Vacant(slot) => { + log::debug!( + "Cache {:p}: Insert at {:#016x} ({}, {})", + self, + key, + input.language(), + input.query().file_name().unwrap().to_string_lossy() + ); + + let lang_def = Arc::new(input.to_language_definition().await?); + slot.insert(lang_def).to_owned() + } + }) + } +} diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 9b329d44..32ef72e0 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -1,102 +1,24 @@ +mod cli; mod configuration; mod error; -mod output; -mod visualise; +mod io; +mod language; +mod visualisation; use std::{ - eprintln, error::Error, - fs::File, - io::{stdin, BufReader, BufWriter, Read}, - path::PathBuf, + io::{BufReader, BufWriter}, process::ExitCode, }; -use clap::{ArgGroup, Parser}; -use configuration::parse_configuration; +use topiary::{formatter, Operation}; use crate::{ + cli::Commands, error::{CLIError, CLIResult, TopiaryError}, - output::OutputFile, - visualise::Visualisation, + io::{Inputs, OutputFile}, + language::LanguageDefinitionCache, }; -use topiary::{formatter, Language, Operation, SupportedLanguage, TopiaryQuery}; - -#[derive(Parser, Debug)] -#[command(author, version, about, long_about = None)] -// Require at least one of --language or --input-files (n.b., language > input) -#[command(group(ArgGroup::new("rule").multiple(true).required(true).args(&["language", "input_files"]),))] -struct Args { - /// Which language to parse and format - #[arg(short, long, value_enum, display_order = 1)] - language: Option, - - /// Path to an input file or multiple input files. If omitted, or equal - /// to "-", read from standard input. If multiple files are provided, - /// `in_place` is assumed. - #[arg(short = 'f', long, num_args = 0.., display_order = 2, default_values_t = ["-".to_string()])] - input_files: Vec, - - /// Which query file to use - #[arg(short, long, display_order = 3)] - query: Option, - - /// Path to an output file. If omitted, or equal to "-", write to standard - /// output. - #[arg(short, long, display_order = 4)] - output_file: Option, - - /// Format the input files in place. - #[arg(short, long, requires = "input_files", display_order = 5)] - in_place: bool, - - /// Visualise the syntax tree, rather than format. - #[arg( - short, - long, - value_enum, - aliases = &["view", "visualize"], - value_name = "OUTPUT_FORMAT", - conflicts_with_all = &["in_place", "skip_idempotence"], - require_equals = true, - num_args = 0..=1, - default_missing_value = "json", - display_order = 6 - )] - visualise: Option, - - /// Do not check that formatting twice gives the same output - #[arg(short, long, display_order = 7)] - skip_idempotence: bool, - - /// Output the full configuration to stderr before continuing - #[arg(long, display_order = 8)] - output_configuration: bool, - - /// Format as much as possible even if some of the input causes parsing errors - #[arg(short, long, display_order = 9)] - tolerate_parsing_errors: bool, - - /// Override all configuration with the provided file - #[arg(long, env = "TOPIARY_CONFIGURATION_OVERRIDE", display_order = 10)] - configuration_override: Option, - - /// Add the specified configuration file with the highest prority - #[arg(short, long, env = "TOPIARY_CONFIGURATION_FILE", display_order = 11)] - configuration_file: Option, -} - -// /// Collects all the values needed for the eventual formatting. This helper -// /// struct just makes it easy to collect them all in a Vec. -// /// If `--in-place` was specified or if `--input-files` was -// /// given more than one file, the input and output will be the same file and -// /// the entire FormatStruct will be placed in a Vector. In all other cases the vector -// /// will still be created, but it will be a singleton vector. -// struct FormatStruct<'a> { -// input: &'a dyn Read, -// output: BufWriter, -// language: &'a Language, -// } #[tokio::main] async fn main() -> ExitCode { @@ -109,157 +31,126 @@ async fn main() -> ExitCode { } async fn run() -> CLIResult<()> { - env_logger::init(); - - // Restructure Args to match our expected behaviour - let args = { - let mut args = Args::parse(); - - // Remove duplicates from the input_files, (among other things, avoids being able to pass "-" twice) - args.input_files.sort_unstable(); - args.input_files.dedup(); - - args - }; - - let configuration = parse_configuration(args.configuration_override, args.configuration_file)?; - - if args.output_configuration { - eprintln!("{:#?}", configuration); - } - - let io_files: Vec<(String, String)> = if args.in_place || args.input_files.len() > 1 { - args.input_files - .iter() - .map(|f| (f.clone(), f.clone())) - .collect() - } else { - // Clap guarantees our input_files is non-empty - vec![( - args.input_files.first().unwrap().clone(), - match args.output_file.as_deref() { - Some("-") | None => String::from("-"), - Some(f) => String::from(f), - }, - )] - }; - - type IoFile = ( - String, - String, - Language, - Option, - CLIResult, - ); - - // Add the language and query Path to the io_files - let io_files: Vec = io_files - .into_iter() - // Add the appropriate language to all of the tuples - .map(|(i, o)| { - let language = if let Some(language) = args.language { - language.to_language(&configuration).clone() - } else { - Language::detect(&i, &configuration)?.clone() - }; - - let query_path = if let Some(query) = &args.query { - Ok(query.clone()) - } else { - language.query_file() - } - .map_err(TopiaryError::Lib); - - Ok((i, o, language, args.query.clone(), query_path)) - }) - .collect::>>()?; - - // Converts the simple types into arguments we can pass to the `formatter` function - // _ holds the tree_sitter_facade::Language - let fmt_args: Vec<(String, String, Language, _, TopiaryQuery)> = - futures::future::try_join_all(io_files.into_iter().map( - |(i, o, language, query_arg, query_path)| async move { - let grammar = language.grammar().await?; - - let query = query_path - .and_then(|query_path| { - { - let mut reader = BufReader::new(File::open(query_path)?); - let mut contents = String::new(); - reader.read_to_string(&mut contents)?; - Ok(contents) + let args = cli::get_args()?; + + let (annotations, config) = configuration::fetch( + &args.global.configuration, + // The collation value is always set, so we can safely unwrap + args.global.configuration_collation.as_ref().unwrap(), + )?; + + // Delegate by subcommand + match args.command { + Commands::Fmt { + tolerate_parsing_errors, + skip_idempotence, + inputs, + } => { + let inputs = Inputs::new(&config, &inputs); + let cache = LanguageDefinitionCache::new(); + + let (_, mut results) = async_scoped::TokioScope::scope_and_block(|scope| { + for input in inputs { + scope.spawn(async { + let result: CLIResult<()> = match input { + Ok(input) => { + let lang_def = cache.fetch(&input).await?; + let output = OutputFile::try_from(&input)?; + + log::info!( + "Formatting {}, as {} using {}, to {}", + input.source(), + input.language(), + input.query().to_string_lossy(), + output + ); + + let mut buf_input = BufReader::new(input); + let mut buf_output = BufWriter::new(output); + + formatter( + &mut buf_input, + &mut buf_output, + &lang_def.query, + &lang_def.language, + &lang_def.grammar, + Operation::Format { + skip_idempotence, + tolerate_parsing_errors, + }, + )?; + + buf_output.into_inner()?.persist()?; + + Ok(()) + } + + // This happens when the input resolver cannot establish an input + // source, language or query file. + Err(error) => Err(error), + }; + + if let Err(error) = &result { + // By this point, we've lost any reference to the original + // input; we trust that it is embedded into `error`. + log::warn!("Skipping: {error}"); } - .map_err(|e| { - TopiaryError::Bin( - "Could not open query file".into(), - Some(CLIError::IOError(e)), - ) - }) - }) - .and_then(|query_content: String| { - Ok(TopiaryQuery::new(&grammar, &query_content)?) - }) - .or_else(|e| { - // If we weren't able to read the query file, and the user didn't - // request a specific query file, we should fall back to the built-in - // queries. - if query_arg.is_none() { - log::info!( - "No language file found for {language:?}. Will use built-in query." - ); - Ok((&language).try_into()?) - } else { - Err(e) - } - })?; - - Ok::<_, TopiaryError>((i, o, language, grammar, query)) - }, - )) - .await?; - // The operation needs not be part of the Vector of Structs because it is the same for every formatting instance - let operation = if let Some(visualisation) = args.visualise { - Operation::Visualise { - output_format: visualisation.into(), - } - } else { - Operation::Format { - skip_idempotence: args.skip_idempotence, - tolerate_parsing_errors: args.tolerate_parsing_errors, + result + }); + } + }); + + if results.len() == 1 { + // If we just had one input, then handle errors as normal + results.remove(0)?? + } else if results + .iter() + .any(|result| matches!(result, Err(_) | Ok(Err(_)))) + { + // For multiple inputs, bail out if any failed with a "multiple errors" failure + return Err(TopiaryError::Bin( + "Processing of some inputs failed; see warning logs for details".into(), + Some(CLIError::Multiple), + )); + } } - }; - - let tasks: Vec<_> = fmt_args - .into_iter() - .map(|(input, output, language, grammar, query)| -> tokio::task::JoinHandle> { - tokio::spawn(async move { - let mut input: Box = match input.as_str() { - "-" => Box::new(stdin()), - file => Box::new(BufReader::new(File::open(file)?)), - }; - let mut output: BufWriter = BufWriter::new(OutputFile::new(&output)?); - formatter( - &mut input, - &mut output, - &query, - &language, - &grammar, - operation, - )?; - - output.into_inner()?.persist()?; - - Ok(()) - }) - }) - .collect(); + Commands::Vis { format, input } => { + // We are guaranteed (by clap) to have exactly one input, so it's safe to unwrap + let input = Inputs::new(&config, &input).next().unwrap()?; + let output = OutputFile::Stdout; + + // We don't need a `LanguageDefinitionCache` when there's only one input, + // which saves us the thread-safety overhead + let lang_def = input.to_language_definition().await?; + + log::info!( + "Visualising {}, as {}, to {}", + input.source(), + input.language(), + output + ); + + let mut buf_input = BufReader::new(input); + let mut buf_output = BufWriter::new(output); + + formatter( + &mut buf_input, + &mut buf_output, + &lang_def.query, + &lang_def.language, + &lang_def.grammar, + Operation::Visualise { + output_format: format.into(), + }, + )?; + } - for task in tasks { - // The await results in a `Result, JoinError>`. - // The first ? concerns the `JoinError`, the second one the `TopiaryError`. - task.await??; + Commands::Cfg => { + // Output collated configuration as TOML, with annotations about how we got there + print!("{annotations}\n{config}"); + } } Ok(()) diff --git a/topiary-cli/src/output.rs b/topiary-cli/src/output.rs deleted file mode 100644 index b02e02d0..00000000 --- a/topiary-cli/src/output.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::error::CLIResult; -use std::{ - ffi::OsString, - io::{stdout, Write}, - path::Path, -}; -use tempfile::NamedTempFile; - -#[derive(Debug)] -pub enum OutputFile { - Stdout, - Disk { - // NOTE We stage to a file, rather than writing - // to memory (e.g., Vec), to ensure atomicity - staged: NamedTempFile, - output: OsString, - }, -} - -impl OutputFile { - pub fn new(path: &str) -> CLIResult { - match path { - "-" => Ok(Self::Stdout), - file => { - let path = Path::new(file).canonicalize()?; - let parent = path.parent().unwrap(); - Ok(Self::Disk { - staged: NamedTempFile::new_in(parent)?, - output: file.into(), - }) - } - } - } - - // This function must be called to persist the output to disk - pub fn persist(self) -> CLIResult<()> { - if let Self::Disk { staged, output } = self { - staged.persist(output)?; - } - - Ok(()) - } -} - -impl Write for OutputFile { - fn write(&mut self, buf: &[u8]) -> std::io::Result { - match self { - Self::Stdout => stdout().write(buf), - Self::Disk { staged, .. } => staged.write(buf), - } - } - - fn flush(&mut self) -> std::io::Result<()> { - match self { - Self::Stdout => stdout().flush(), - Self::Disk { staged, .. } => staged.flush(), - } - } -} diff --git a/topiary-cli/src/visualisation.rs b/topiary-cli/src/visualisation.rs new file mode 100644 index 00000000..5defe40f --- /dev/null +++ b/topiary-cli/src/visualisation.rs @@ -0,0 +1,23 @@ +use clap::ValueEnum; +use topiary::Visualisation; + +/// Visualisation output formats for Tree-sitter parse trees +// NOTE While redundant, we cannot implement clap::ValueEnum for topiary::Visualisation without +// breaking the orphan rules. So we have to maintain a local copy for the sake of the CLI. +#[derive(Clone, Debug, ValueEnum)] +pub enum Format { + /// GraphViz DOT serialisation + Dot, + + /// JSON serialisation + Json, +} + +impl From for Visualisation { + fn from(visualisation: Format) -> Self { + match visualisation { + Format::Dot => Self::GraphViz, + Format::Json => Self::Json, + } + } +} diff --git a/topiary-cli/src/visualise.rs b/topiary-cli/src/visualise.rs deleted file mode 100644 index f3686e68..00000000 --- a/topiary-cli/src/visualise.rs +++ /dev/null @@ -1,24 +0,0 @@ -// This is somewhat redundant, but we cannot implement clap::ValueEnum for topiary::Visualisation -// without breaking the orphan rules. So we have to maintain a local copy for the sake of the CLI. - -use clap::ValueEnum; - -#[derive(ValueEnum, Clone, Copy, Debug)] -pub enum Visualisation { - // JSON is first as it's the default and - // we want it displayed first in the help - Json, - - // All other output formats should be listed - // in alphabetical order - Dot, -} - -impl From for topiary::Visualisation { - fn from(visualisation: Visualisation) -> Self { - match visualisation { - Visualisation::Dot => Self::GraphViz, - Visualisation::Json => Self::Json, - } - } -} diff --git a/topiary-cli/tests/cli-tester.rs b/topiary-cli/tests/cli-tester.rs index edcf83b2..23833b47 100644 --- a/topiary-cli/tests/cli-tester.rs +++ b/topiary-cli/tests/cli-tester.rs @@ -1,110 +1,210 @@ +use std::{fmt, fs, fs::File, io::Write, path::PathBuf}; + use assert_cmd::Command; -use std::{ - fs::File, - io::{Read, Write}, - path::Path, +use predicates::{ + prelude::PredicateBooleanExt, + str::{ends_with, starts_with}, }; -use tempfile::NamedTempFile; +use tempfile::TempDir; -// Simple exemplar JSON state, to verify the formatter +// Simple exemplar JSON and TOML state, to verify the formatter // is doing something... and hopefully the right thing const JSON_INPUT: &str = r#"{ "test" :123}"#; -const JSON_EXPECTED: &str = r#"{ "test": 123 }"#; +const JSON_EXPECTED: &str = r#"{ "test": 123 } +"#; + +const TOML_INPUT: &str = r#" test= 123"#; +const TOML_EXPECTED: &str = r#"test = 123 +"#; -struct State(NamedTempFile); +struct State(TempDir, PathBuf); impl State { - fn new(payload: &str) -> Self { - let mut state = NamedTempFile::new().unwrap(); + fn new(payload: &str, extension: &str) -> Self { + let tmp_dir = TempDir::new().unwrap(); + let tmp_file = tmp_dir.path().join(format!("state.{extension}")); + + let mut state = File::create(&tmp_file).unwrap(); write!(state, "{payload}").unwrap(); - Self(state) + Self(tmp_dir, tmp_file) } - fn path(&self) -> &Path { - self.0.path() + fn path(&self) -> &PathBuf { + &self.1 } fn read(&self) -> String { - // For an in place edit, Topiary will remove the original file. As such, we can't use - // NamedTempFile::reopen, as the original no longer exists; we have to "reopen" it by path. - let mut file = File::open(self.path()).unwrap(); - let mut contents = String::new(); - file.read_to_string(&mut contents).unwrap(); - - contents + fs::read_to_string(self.path()).unwrap() } } #[test] -fn test_file_output() { - let output = State::new(""); +fn test_fmt_stdin() { + let mut topiary = Command::cargo_bin("topiary").unwrap(); + topiary + .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("fmt") + .arg("--language") + .arg("json") + .write_stdin(JSON_INPUT) + .assert() + .success() + .stdout(JSON_EXPECTED); +} + +#[test] +fn test_fmt_stdin_query() { let mut topiary = Command::cargo_bin("topiary").unwrap(); + topiary .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("fmt") .arg("--language") .arg("json") - .arg("--output-file") - .arg(output.path()) + .arg("--query") + .arg("../languages/json.scm") .write_stdin(JSON_INPUT) .assert() + .success() + .stdout(JSON_EXPECTED); +} + +#[test] +fn test_fmt_files() { + let json = State::new(JSON_INPUT, "json"); + let toml = State::new(TOML_INPUT, "toml"); + + let mut topiary = Command::cargo_bin("topiary").unwrap(); + + topiary + .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("fmt") + .arg(json.path()) + .arg(toml.path()) + .assert() .success(); - assert_eq!(output.read().trim(), JSON_EXPECTED); + assert_eq!(json.read(), JSON_EXPECTED); + assert_eq!(toml.read(), TOML_EXPECTED); } #[test] -fn test_no_clobber() { - let json = State::new(JSON_INPUT); - let input_path = json.path(); +fn test_fmt_dir() { + let json = State::new(JSON_INPUT, "json"); let mut topiary = Command::cargo_bin("topiary").unwrap(); + topiary .env("TOPIARY_LANGUAGE_DIR", "../languages") - .arg("--language") - .arg("json") - .arg("--input-files") - .arg(input_path) - .arg("--output-file") - .arg(input_path) + .arg("fmt") + .arg(json.path().parent().unwrap()) .assert() .success(); - // NOTE We only assume, here, that the state has been modified by the call to Topiary. It may - // be worthwhile asserting (e.g., change in mtime, etc.). - assert_eq!(json.read().trim(), JSON_EXPECTED); + assert_eq!(json.read(), JSON_EXPECTED); } #[test] -fn test_in_place() { - let json = State::new(JSON_INPUT); - let input_path = json.path(); - +fn test_fmt_invalid() { let mut topiary = Command::cargo_bin("topiary").unwrap(); + + // Can't specify --language with input files topiary .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("fmt") .arg("--language") .arg("json") - .arg("--input-files") - .arg(input_path) - .arg("--in-place") + .arg("/path/to/some/input") .assert() - .success(); + .failure(); + + // Can't specify --query without --language + topiary + .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("fmt") + .arg("--query") + .arg("/path/to/query") + .assert() + .failure(); +} - // NOTE We only assume, here, that the state has been modified by the call to Topiary. It may - // be worthwhile asserting (e.g., change in mtime, etc.). - assert_eq!(json.read().trim(), JSON_EXPECTED); +#[test] +fn test_vis() { + let mut topiary = Command::cargo_bin("topiary").unwrap(); + + // Sanity check output is a valid DOT graph + let is_graph = starts_with("graph {").and(ends_with("}\n")); + + topiary + .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("vis") + .arg("--language") + .arg("json") + .write_stdin(JSON_INPUT) + .assert() + .success() + .stdout(is_graph); } #[test] -fn test_in_place_no_input() { +fn test_vis_invalid() { let mut topiary = Command::cargo_bin("topiary").unwrap(); + + // Can't specify --language with input file topiary .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("vis") .arg("--language") .arg("json") - .arg("--in-place") + .arg("/path/to/some/input") .assert() .failure(); + + // Can't specify --query without --language + topiary + .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("vis") + .arg("--query") + .arg("/path/to/query") + .assert() + .failure(); + + // Can't specify multiple input files + topiary + .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("vis") + .arg("/path/to/some/input") + .arg("/path/to/another/input") + .assert() + .failure(); +} + +#[test] +fn test_cfg() { + let mut topiary = Command::cargo_bin("topiary").unwrap(); + + topiary + .env("TOPIARY_LANGUAGE_DIR", "../languages") + .arg("cfg") + .assert() + .success() + .stdout(IsToml); +} + +struct IsToml; + +impl predicates::Predicate for IsToml { + fn eval(&self, variable: &str) -> bool { + toml::Value::try_from(variable).is_ok() + } +} + +impl predicates::reflection::PredicateReflection for IsToml {} + +impl fmt::Display for IsToml { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "is_toml") + } } diff --git a/topiary/src/configuration.rs b/topiary/src/configuration.rs index cce652a9..273fcede 100644 --- a/topiary/src/configuration.rs +++ b/topiary/src/configuration.rs @@ -1,7 +1,8 @@ /// Topiary can be configured using the `Configuration` struct. /// A basic configuration, written in toml, it is included buildtime and parsed runtime. /// Additional configuration has to be provided by the user of the library. -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; +use std::fmt; use crate::{language::Language, FormatterError, FormatterResult}; use serde::{Deserialize, Serialize}; @@ -66,6 +67,37 @@ impl Default for Configuration { } } +/// Convert `Configuration` values into `HashMap`s, keyed on `Language::name` +// NOTE There are optimisations to be had here, to avoid cloning, but life's too short! +impl From<&Configuration> for HashMap { + fn from(config: &Configuration) -> Self { + HashMap::from_iter(config.language.iter().map(|language| { + let name = language.name.clone(); + let language = language.clone(); + + (name, language) + })) + } +} + +// Order-invariant equality; required for unit testing +impl PartialEq for Configuration { + fn eq(&self, other: &Self) -> bool { + let lhs: HashMap = self.into(); + let rhs: HashMap = other.into(); + + lhs == rhs + } +} + +impl fmt::Display for Configuration { + /// Pretty-print configuration as TOML + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let toml = toml::to_string_pretty(self).map_err(|_| fmt::Error)?; + write!(f, "{toml}") + } +} + /// Default built-in languages.toml parsed to a toml file. /// We parse the configuration file in two phases, the first is to a `toml::Value` /// This function is exported to allow users of the library to merge their own diff --git a/topiary/src/error.rs b/topiary/src/error.rs index 6c885f8d..f3d98347 100644 --- a/topiary/src/error.rs +++ b/topiary/src/error.rs @@ -96,7 +96,7 @@ impl fmt::Display for FormatterError { match extension { Some(extension) => write!(f, - "Cannot detect language {file} due to unknown extension '.{extension}'. Try specifying language explicitly.", + "Cannot detect language {file} due to unknown extension '.{extension}'. Try specifying language explicitly, or updating your configuration.", ), None => write!(f, "Cannot detect language {file}. Try specifying language explicitly." diff --git a/topiary/src/language.rs b/topiary/src/language.rs index 662ede30..b6522c05 100644 --- a/topiary/src/language.rs +++ b/topiary/src/language.rs @@ -1,6 +1,8 @@ -use std::collections::HashSet; -use std::io; -use std::path::{Path, PathBuf}; +use std::{ + collections::HashSet, + fmt, io, + path::{Path, PathBuf}, +}; use clap::ValueEnum; use serde::{Deserialize, Serialize}; @@ -15,7 +17,7 @@ pub struct Language { /// the Configuration, and to convert from a language to the respective tree-sitter /// grammar. pub name: String, - /// A Set of the filetype extentions associated with this particular language. + /// A Set of the filetype extensions associated with this particular language. /// Enables Topiary to pick the right language given an input file pub extensions: HashSet, /// The indentation string used for that particular language. Defaults to " " @@ -103,6 +105,12 @@ impl Language { } } +impl fmt::Display for Language { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name) + } +} + /// Convert a Language into the canonical basename of its query file, under the most appropriate /// search path. We test 3 different locations for query files, in the following priority order, /// returning the first that exists: @@ -181,9 +189,7 @@ impl SupportedLanguage { } } - // Every supported language MUST have an entry in the builtin - // configuration, and so there should always be a match. - unreachable!() + unreachable!("A match should always be returned because every supported language must have an entry in the builtin configuration file") } pub fn name(&self) -> &str { diff --git a/verify-documented-usage.sh b/verify-documented-usage.sh index de1f17a2..a7945225 100755 --- a/verify-documented-usage.sh +++ b/verify-documented-usage.sh @@ -1,17 +1,58 @@ -#!/usr/bin/env bash -usage="$(nix run . -- --help)" - -echo "$usage" | -{ - while IFS= read -r line - do - if ! grep -Fxq "$line" README.md - then - echo "Usage is not correctly documented in README.md. Update the file with the following:" - echo "$usage" - exit 1 - fi - done - - echo "Usage is correctly documented in README.md." +#!/usr/bin/env nix-shell +#!nix-shell -i bash --packages diffutils gnused +#shellcheck shell=bash + +set -euo pipefail + +readonly FENCE='```' + +get-cli-usage() { + # Get the help text from the CLI + local subcommand="${1-ROOT}" + + case "${subcommand}" in + "ROOT") nix run . -- --help;; + *) nix run . -- "${subcommand}" --help;; + esac } + +get-readme-usage() { + # Get the help text from the README + local subcommand="${1-ROOT}" + + sed --quiet " + /usage:start:${subcommand}/, /usage:end:${subcommand}/ { + //d # Delete the markers (last pattern) + /${FENCE}/d # Delete the code fences + p # Print anything else + } + " README.md +} + +diff-usage() { + # Generate a diff between the README and CLI help text + local subcommand="${1-ROOT}" + + diff --text \ + --ignore-all-space \ + <(get-readme-usage "${subcommand}") \ + <(get-cli-usage "${subcommand}") +} + +main() { + local -a subcommands=(ROOT fmt vis cfg) + + local _diff + local _subcommand + for _subcommand in "${subcommands[@]}"; do + if ! _diff=$(diff-usage "${_subcommand}"); then + >&2 echo "Usage is not correctly documented in README.md for the ${_subcommand} subcommand!" + echo "${_diff}" + exit 1 + fi + done + + >&2 echo "Usage is correctly documented in README.md" +} + +main