From c6574a96d9209dea95641f2ad35ec4c0697a9335 Mon Sep 17 00:00:00 2001 From: Erin van der Veen Date: Thu, 12 Sep 2024 12:41:32 +0200 Subject: [PATCH 1/3] feat: add prefetch subcommand --- Cargo.lock | 17 +++++++-- Cargo.toml | 3 +- topiary-cli/src/cli.rs | 9 +++++ topiary-cli/src/error.rs | 8 +++- topiary-cli/src/main.rs | 4 ++ topiary-config/Cargo.toml | 16 ++++---- topiary-config/src/error.rs | 64 +++++++++++++++++++++++-------- topiary-config/src/language.rs | 69 ++++++++++++++++++++++------------ topiary-config/src/lib.rs | 38 +++++++++++++++++++ topiary-config/src/source.rs | 2 +- 10 files changed, 176 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 116d1552..291bd498 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1828,14 +1828,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.10.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2101,6 +2102,7 @@ dependencies = [ "libloading", "log", "nickel-lang-core", + "rayon", "serde", "tempfile", "toml 0.8.14", @@ -2539,6 +2541,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-targets" version = "0.48.5" diff --git a/Cargo.toml b/Cargo.toml index 51f6467f..eb42a52a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,9 +66,10 @@ nickel-lang-core = { version = "0.8.0", default-features = false } predicates = "3.0" pretty_assertions = "1.3" prettydiff = { version = "0.6.4", default-features = false } +rayon = "1.10.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -tempfile = "3.5" +tempfile = "3.12" test-log = "0.2" tokio = "1.32" tokio-test = "0.4" diff --git a/topiary-cli/src/cli.rs b/topiary-cli/src/cli.rs index 40347ed2..a23b1304 100644 --- a/topiary-cli/src/cli.rs +++ b/topiary-cli/src/cli.rs @@ -140,6 +140,10 @@ pub enum Commands { #[command(alias = "cfg", display_order = 3)] Config, + /// Prefetch all languages in the configuration + #[command(display_order = 4)] + Prefetch, + /// Generate shell completion script #[command(display_order = 100)] Completion { @@ -170,6 +174,11 @@ fn traverse_fs(files: &mut Vec) -> CLIResult<()> { pub fn get_args() -> CLIResult { let mut args = Cli::parse(); + // When doing prefetching, we should always output at at least verbosity level two + if matches!(args.command, Commands::Prefetch) && args.global.verbose < 2 { + args.global.verbose = 2; + } + // This is the earliest point that we can initialise the logger, from the --verbose flags, // before any fallible operations have started env_logger::Builder::new() diff --git a/topiary-cli/src/error.rs b/topiary-cli/src/error.rs index d6928747..fa7210f4 100644 --- a/topiary-cli/src/error.rs +++ b/topiary-cli/src/error.rs @@ -1,5 +1,5 @@ use std::{error, fmt, io, path::PathBuf, process::ExitCode, result}; -use topiary_config::error::TopiaryConfigError; +use topiary_config::error::{TopiaryConfigError, TopiaryConfigFetchingError}; use topiary_core::FormatterError; /// A convenience wrapper around `std::result::Result`. @@ -112,6 +112,12 @@ impl From for TopiaryError { } } +impl From for TopiaryError { + fn from(e: TopiaryConfigFetchingError) -> Self { + Self::Config(TopiaryConfigError::Fetching(e)) + } +} + impl From for TopiaryError { fn from(e: io::Error) -> Self { match e.kind() { diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index e5a88e10..3636df53 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -143,6 +143,10 @@ async fn run() -> CLIResult<()> { print!("{:#?}", config); } + Commands::Prefetch => { + config.prefetch_languages()?; + } + Commands::Completion { shell } => { // The CLI parser fails if no shell is provided/detected, so it's safe to unwrap here cli::completion(shell.unwrap()); diff --git a/topiary-config/Cargo.toml b/topiary-config/Cargo.toml index ab6f13bd..36103a3d 100644 --- a/topiary-config/Cargo.toml +++ b/topiary-config/Cargo.toml @@ -17,6 +17,7 @@ directories.workspace = true itertools.workspace = true log.workspace = true nickel-lang-core.workspace = true +rayon = { workspace = true, optional = true } serde = { workspace = true, features = ["derive"] } tempfile.workspace = true toml.workspace = true @@ -24,15 +25,6 @@ tree-sitter.workspace = true topiary-tree-sitter-facade.workspace = true topiary-web-tree-sitter-sys.workspace = true -# tree-sitter-json = { workspace = true, optional = true } -# tree-sitter-rust = { workspace = true, optional = true } -# tree-sitter-toml = { workspace = true, optional = true } -# tree-sitter-bash = { workspace = true, optional = true } -# tree-sitter-css = { workspace = true, optional = true } -# tree-sitter-nickel = { workspace = true, optional = true } -# tree-sitter-query = { workspace = true, optional = true } -# tree-sitter-ocaml = { workspace = true, optional = true } -# tree-sitter-ocamllex = { workspace = true, optional = true } [target.'cfg(not(target_family = "wasm"))'.dependencies] clap = { workspace = true, features = ["derive"] } @@ -41,6 +33,12 @@ git2.workspace = true libloading.workspace = true [features] +default = [ "parallel" ] + +# Enabling the `parallel` feature enables parallel computation where possible. +# At the moment, this is only in grammar prefetching +parallel = [ "dep:rayon" ] + bash = [] css = [] json = [] diff --git a/topiary-config/src/error.rs b/topiary-config/src/error.rs index a35625e0..d87d4210 100644 --- a/topiary-config/src/error.rs +++ b/topiary-config/src/error.rs @@ -10,17 +10,25 @@ pub enum TopiaryConfigError { NoExtension(path::PathBuf), #[cfg(not(target_arch = "wasm32"))] QueryFileNotFound(path::PathBuf), - IoError(io::Error), + Io(io::Error), Missing, TreeSitterFacade(topiary_tree_sitter_facade::LanguageError), Nickel(nickel_lang_core::error::Error), NickelDeserialization(nickel_lang_core::deserialize::RustDeserializationError), #[cfg(not(target_arch = "wasm32"))] - LibLoading(libloading::Error), - #[cfg(not(target_arch = "wasm32"))] + Fetching(TopiaryConfigFetchingError), +} + +#[derive(Debug)] +/// Topiary can fetch an compile grammars, doing so may create errors. +/// Usually, this error would be part of the `TopiaryConfigError`, however, that enum also includes `nickel_lang_core::error::Error`, which does not implement Sync/Send. +/// Since fetching an compilation is something that can easily be parallelized, we create a special error that DOES implement Sync/Send. +#[cfg(not(target_arch = "wasm32"))] +pub enum TopiaryConfigFetchingError { Git(git2::Error), - #[cfg(not(target_arch = "wasm32"))] - Compilation(String), + Subprocess(String), + Io(io::Error), + LibLoading(libloading::Error), } impl fmt::Display for TopiaryConfigError { @@ -32,17 +40,29 @@ impl fmt::Display for TopiaryConfigError { TopiaryConfigError::NoExtension(path) => write!(f, "You tried to format {} without specifying a language, but we cannot automatically detect the language because we can't find the filetype extension.", path.to_string_lossy()), #[cfg(not(target_arch = "wasm32"))] TopiaryConfigError::QueryFileNotFound(path) => write!(f, "We could not find the query file: \"{}\" anywhere. If you use the TOPIARY_LANGUAGE_DIR environment variable, make sure it set set correctly.", path.to_string_lossy()), - TopiaryConfigError::IoError(error) => write!(f, "We encountered an io error: {error}"), + TopiaryConfigError::Io(error) => write!(f, "We encountered an io error: {error}"), TopiaryConfigError::Missing => write!(f, "A configuration file is missing. If you passed a configuration file, make sure it exists."), TopiaryConfigError::TreeSitterFacade(_) => write!(f, "We could not load the grammar for the given language"), TopiaryConfigError::Nickel(e) => write!(f, "Nickel error: {:?}", e), TopiaryConfigError::NickelDeserialization(e) => write!(f, "Nickel error: {:?}", e), #[cfg(not(target_arch = "wasm32"))] - TopiaryConfigError::LibLoading(e) => write!(f, "Libloading error: {:?}", e), - #[cfg(not(target_arch = "wasm32"))] - TopiaryConfigError::Git(e) => write!(f, "Git error: {:?}", e), - #[cfg(not(target_arch = "wasm32"))] - TopiaryConfigError::Compilation(e) => write!(f, "Compilation error: {:?},", e), + TopiaryConfigError::Fetching(e) => write!(f, "Error Fetching Language: {:?}", e), + } + } +} + +#[cfg(not(target_arch = "wasm32"))] +impl fmt::Display for TopiaryConfigFetchingError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TopiaryConfigFetchingError::Git(e) => write!(f, "Git error: {:?}", e), + TopiaryConfigFetchingError::Subprocess(e) => { + write!(f, "Compilation error: {e},") + } + TopiaryConfigFetchingError::Io(error) => { + write!(f, "We encountered an io error: {error}") + } + TopiaryConfigFetchingError::LibLoading(e) => write!(f, "Libloading error: {:?}", e), } } } @@ -59,9 +79,23 @@ impl From for TopiaryConfigError { } } +#[cfg(not(target_arch = "wasm32"))] +impl From for TopiaryConfigError { + fn from(e: TopiaryConfigFetchingError) -> Self { + Self::Fetching(e) + } +} + impl From for TopiaryConfigError { fn from(e: io::Error) -> Self { - Self::IoError(e) + Self::Io(e) + } +} + +#[cfg(not(target_arch = "wasm32"))] +impl From for TopiaryConfigFetchingError { + fn from(e: io::Error) -> Self { + Self::Io(e) } } @@ -72,14 +106,14 @@ impl From for TopiaryConfigError { } #[cfg(not(target_arch = "wasm32"))] -impl From for TopiaryConfigError { +impl From for TopiaryConfigFetchingError { fn from(e: libloading::Error) -> Self { Self::LibLoading(e) } } #[cfg(not(target_arch = "wasm32"))] -impl From for TopiaryConfigError { +impl From for TopiaryConfigFetchingError { fn from(e: git2::Error) -> Self { Self::Git(e) } @@ -89,7 +123,7 @@ impl error::Error for TopiaryConfigError { fn source(&self) -> Option<&(dyn error::Error + 'static)> { match self { #[cfg(not(target_arch = "wasm32"))] - TopiaryConfigError::IoError(e) => e.source(), + TopiaryConfigError::Io(e) => e.source(), _ => None, } } diff --git a/topiary-config/src/language.rs b/topiary-config/src/language.rs index 4d13a31b..1d00fa20 100644 --- a/topiary-config/src/language.rs +++ b/topiary-config/src/language.rs @@ -1,9 +1,9 @@ //! This module contains the `Language` struct, which represents a language configuration, and //! associated methods. -#[cfg(not(target_arch = "wasm32"))] -use crate::error::TopiaryConfigError; use crate::error::TopiaryConfigResult; +#[cfg(not(target_arch = "wasm32"))] +use crate::error::{TopiaryConfigError, TopiaryConfigFetchingError}; use std::collections::HashSet; #[cfg(not(target_arch = "wasm32"))] @@ -14,8 +14,6 @@ use git2::Repository; use std::path::PathBuf; #[cfg(not(target_arch = "wasm32"))] use std::process::Command; -#[cfg(not(target_arch = "wasm32"))] -use tempfile::tempdir; #[cfg(not(target_arch = "wasm32"))] const BUILD_TARGET: &str = env!("BUILD_TARGET"); @@ -89,21 +87,11 @@ impl Language { } #[cfg(not(target_arch = "wasm32"))] - // NOTE: Much of the following code is heavily inspired by the `helix-loader` crate with license MPL-2.0. - // To be safe, assume any and all of the following code is MLP-2.0 and copyrighted to the Helix project. - pub fn grammar(&self) -> TopiaryConfigResult { - // Locate cache dir, e.g. `~/.cache/topiary/ + // Returns the library path, and ensures the parent directories exist. + pub fn library_path(&self) -> std::io::Result { let mut library_path = crate::project_dirs().cache_dir().to_path_buf(); - - // Create the language specific directory. This directory is not - // necessary (the rev should be identifying enough), but allows - // convenient removing of entire languages. library_path.push(self.name.clone()); - - // Ensure path exists - if !library_path.exists() { - std::fs::create_dir_all(&library_path)?; - } + std::fs::create_dir_all(&library_path)?; // Set the output path as the revision of the grammar library_path.push(self.config.grammar.rev.clone()); @@ -112,6 +100,17 @@ impl Language { // On both MacOS and Linux, .so is a valid file extension for shared objects. library_path.set_extension("so"); + Ok(library_path) + } + + #[cfg(not(target_arch = "wasm32"))] + // NOTE: Much of the following code is heavily inspired by the `helix-loader` crate with license MPL-2.0. + // To be safe, assume any and all of the following code is MLP-2.0 and copyrighted to the Helix project. + pub fn grammar( + &self, + ) -> Result { + let library_path = self.library_path()?; + // Ensure the comile exists if !library_path.is_file() { self.fetch_and_compile(library_path.clone())?; @@ -159,21 +158,41 @@ impl Language { } #[cfg(not(target_arch = "wasm32"))] - fn fetch_and_compile(&self, library_path: PathBuf) -> TopiaryConfigResult<()> { + fn fetch_and_compile(&self, library_path: PathBuf) -> Result<(), TopiaryConfigFetchingError> { + log::info!( + "{}: Language Grammar not found, attempting to fetch and compile it", + self.name + ); // Create a temporary directory to clone the repository to. We could // cached the repositories, but the additional disk space is probably // not worth the benefits gained by caching. The tempdir is deleted // when dropped - let tmp_dir = tempdir()?; + let tmp_dir = tempfile::tempdir()?; + + self.fetch_and_compile_with_dir(library_path, tmp_dir.into_path()) + } + + #[cfg(not(target_arch = "wasm32"))] + pub fn fetch_and_compile_with_dir( + &self, + library_path: PathBuf, + tmp_dir: PathBuf, + ) -> Result<(), TopiaryConfigFetchingError> { + if library_path.is_file() { + return Ok(()); + } + let tmp_dir = tmp_dir.join(self.name.clone()); // Clone the repository and checkout the configured revision + log::info!("{}: cloning from {}", self.name, self.config.grammar.git); let repo = Repository::clone(&self.config.grammar.git, &tmp_dir)?; + log::info!("{}: checking out {}", self.name, self.config.grammar.rev); repo.set_head_detached(Oid::from_str(&self.config.grammar.rev)?)?; let path = match self.config.grammar.subdir.clone() { // Some grammars are in a subdirectory, go there - Some(subdir) => tmp_dir.path().join(subdir), - None => tmp_dir.path().to_owned(), + Some(subdir) => tmp_dir.join(subdir), + None => tmp_dir, } // parser.c and potenial scanners are always in src/ .join("src"); @@ -188,7 +207,8 @@ impl Language { &self, src_path: &PathBuf, target_path: PathBuf, - ) -> Result<(), TopiaryConfigError> { + ) -> Result<(), TopiaryConfigFetchingError> { + log::info!("{}: compiling grammar", self.name); let header_path = src_path; let parser_path = src_path.join("parser.c"); let mut scanner_path = src_path.join("scanner.c"); @@ -251,7 +271,7 @@ impl Language { .arg(scanner_path); let output = cpp_command.output()?; if !output.status.success() { - return Err(TopiaryConfigError::Compilation(format!( + return Err(TopiaryConfigFetchingError::Subprocess(format!( "{:#?}, {:#?}", output.stdout, output.stderr ))); @@ -273,13 +293,14 @@ impl Language { let output = command.output()?; if !output.status.success() { - return Err(TopiaryConfigError::Compilation(format!( + return Err(TopiaryConfigFetchingError::Subprocess(format!( "{:#?}, {:#?}", String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr), ))); } + log::info!("{}: succesfully compiled", self.name); Ok(()) } } diff --git a/topiary-config/src/lib.rs b/topiary-config/src/lib.rs index aaae9b13..1fe77738 100644 --- a/topiary-config/src/lib.rs +++ b/topiary-config/src/lib.rs @@ -15,6 +15,11 @@ use language::{Language, LanguageConfiguration}; use nickel_lang_core::{eval::cache::CacheImpl, program::Program}; use serde::Deserialize; +#[cfg(not(target_arch = "wasm32"))] +use crate::error::TopiaryConfigFetchingError; +#[cfg(not(target_arch = "wasm32"))] +use tempfile::tempdir; + use crate::{ error::{TopiaryConfigError, TopiaryConfigResult}, source::Source, @@ -77,6 +82,39 @@ impl Configuration { .ok_or(TopiaryConfigError::UnknownLanguage(name.to_string())) } + /// Prefetches and builds all known languages. + /// This can be beneficial to speed up future startup time. + /// + /// # Errors + /// + /// If any Grammar could not be build, a `TopiaryConfigError` is returned. + #[cfg(not(target_arch = "wasm32"))] + pub fn prefetch_languages(&self) -> TopiaryConfigResult<()> { + let tmp_dir = tempdir()?; + let tmp_dir_path = tmp_dir.path().to_owned(); + + // When "parallel" is enabled, we use rayon to fetch and compile all found grammars in parallel. + #[cfg(feature = "parallel")] + { + use rayon::prelude::*; + self.languages + .par_iter() + .map(|l| l.fetch_and_compile_with_dir(l.library_path()?, tmp_dir_path.clone())) + .collect::, TopiaryConfigFetchingError>>()?; + } + + #[cfg(not(feature = "parallel"))] + { + self.languages + .iter() + .map(|l| l.fetch_and_compile_with_dir(l.library_path()?, tmp_dir_path.clone())) + .collect::, TopiaryConfigFetchingError>>()?; + } + + tmp_dir.close()?; + Ok(()) + } + /// Convenience alias to detect the Language from a Path-like value's extension. /// /// # Errors diff --git a/topiary-config/src/source.rs b/topiary-config/src/source.rs index 4f9b23a5..7cce1fad 100644 --- a/topiary-config/src/source.rs +++ b/topiary-config/src/source.rs @@ -79,7 +79,7 @@ impl Source { match self { Self::Builtin => Ok(self.builtin_nickel().into_bytes()), Self::File(path) => std::fs::read_to_string(path) - .map_err(TopiaryConfigError::IoError) + .map_err(TopiaryConfigError::Io) .map(|s| s.into_bytes()), } } From cd7a6f17c95378e725fe77dda08594644b44d7c6 Mon Sep 17 00:00:00 2001 From: Erin van der Veen Date: Fri, 13 Sep 2024 15:20:26 +0200 Subject: [PATCH 2/3] chore: update readme to include new subcommand --- README.md | 21 +++++++++++++++++++++ bin/verify-documented-usage.sh | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 743b908c..f2d80342 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,7 @@ Commands: format Format inputs visualise Visualise the input's Tree-sitter parse tree config Print the current configuration + prefetch Prefetch all languages in the configuration completion Generate shell completion script help Print this message or the help of the given subcommand(s) @@ -345,6 +346,26 @@ For example, in Bash: source <(topiary completion) ``` +#### Prefetching + +Topiary dynamically downloads, builds, and loads the tree-sitter grammars. In +order to ensure offline availability or speed up startup time, the grammars can +be prefetched and compiled. + + + +``` +Prefetch all languages in the configuration + +Usage: topiary prefetch [OPTIONS] + +Options: + -C, --configuration Configuration file [env: TOPIARY_CONFIG_FILE] + -v, --verbose... Logging verbosity (increased per occurrence) + -h, --help Print help +``` + + #### Logging By default, the Topiary CLI will only output error messages. You can diff --git a/bin/verify-documented-usage.sh b/bin/verify-documented-usage.sh index fd0e922d..708f7399 100755 --- a/bin/verify-documented-usage.sh +++ b/bin/verify-documented-usage.sh @@ -40,7 +40,7 @@ diff-usage() { } main() { - local -a subcommands=(ROOT format visualise config completion) + local -a subcommands=(ROOT format visualise config completion prefetch) local _diff local _subcommand From 946957e732b26de7cc5a23fd3a5294222a985181 Mon Sep 17 00:00:00 2001 From: Erin van der Veen Date: Tue, 17 Sep 2024 12:16:31 +0200 Subject: [PATCH 3/3] chore: update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index db9470cb..3e34ccc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ This name should be decided amongst the team before the release. ### Added - [#705](https://github.com/tweag/topiary/pull/705) Added support for Nickel 1.7 extended pattern formatting +- [#737](https://github.com/tweag/topiary/pull/737) Added the `prefetch` command, that prefetches and caches all grammars in the current configuration ### Fixed - [#720](https://github.com/tweag/topiary/pull/720) [#722](https://github.com/tweag/topiary/pull/722) [#723](https://github.com/tweag/topiary/pull/723) [#724](https://github.com/tweag/topiary/pull/724) [#735](https://github.com/tweag/topiary/pull/735)