From fdb4d20f7390412c1f68a2b6040f6790e752cff6 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Mon, 21 Oct 2024 12:11:48 +0200 Subject: [PATCH 1/2] Add query coverage subcommand --- topiary-cli/src/cli.rs | 7 ++++ topiary-cli/src/error.rs | 22 +++++++++-- topiary-cli/src/main.rs | 28 +++++++++++++- topiary-core/src/error.rs | 9 ++--- topiary-core/src/lib.rs | 40 ++++++++++++++++++- topiary-core/src/tree_sitter.rs | 68 ++++++++++++++++++++++----------- 6 files changed, 139 insertions(+), 35 deletions(-) diff --git a/topiary-cli/src/cli.rs b/topiary-cli/src/cli.rs index a23b1304..54078995 100644 --- a/topiary-cli/src/cli.rs +++ b/topiary-cli/src/cli.rs @@ -144,6 +144,13 @@ pub enum Commands { #[command(display_order = 4)] Prefetch, + /// Checks how much of the tree-sitter query is used + #[command(display_order = 5)] + Coverage { + #[command(flatten)] + input: ExactlyOneInput, + }, + /// Generate shell completion script #[command(display_order = 100)] Completion { diff --git a/topiary-cli/src/error.rs b/topiary-cli/src/error.rs index a65e2963..872d1104 100644 --- a/topiary-cli/src/error.rs +++ b/topiary-cli/src/error.rs @@ -68,6 +68,9 @@ impl error::Error for TopiaryError { impl From for ExitCode { fn from(e: TopiaryError) -> Self { let exit_code = match e { + // Things went well but Topiary needs to answer 'false' in a clean way: Exit 1 + _ if e.benign() => 1, + // Multiple errors: Exit 9 TopiaryError::Bin(_, Some(CLIError::Multiple)) => 9, @@ -92,9 +95,6 @@ impl From for ExitCode { // Bad arguments: Exit 2 // (Handled by clap: https://github.com/clap-rs/clap/issues/3426) - // Things went well but Topiary needs to answer 'false' in a clean way: Exit 1 - // (Not used at the moment) - // Anything else: Exit 10 _ => 10, }; @@ -167,3 +167,19 @@ impl From for TopiaryError { ) } } + +// Tells whether an error should raise a message on stderr, +// or if it's an "expected" error. +pub trait Benign { + fn benign(&self) -> bool; +} + +impl Benign for TopiaryError { + #[allow(clippy::match_like_matches_macro)] + fn benign(&self) -> bool { + match self { + TopiaryError::Lib(FormatterError::PatternDoesNotMatch) => true, + _ => false, + } + } +} diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 3636df53..2ebb62cc 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -10,7 +10,8 @@ use std::{ process::ExitCode, }; -use topiary_core::{formatter, Operation}; +use error::Benign; +use topiary_core::{coverage, formatter, Operation}; use crate::{ cli::Commands, @@ -22,7 +23,9 @@ use crate::{ #[tokio::main] async fn main() -> ExitCode { if let Err(e) = run().await { - print_error(&e); + if !e.benign() { + print_error(&e) + } return e.into(); } @@ -147,6 +150,27 @@ async fn run() -> CLIResult<()> { config.prefetch_languages()?; } + Commands::Coverage { input } => { + // We are guaranteed (by clap) to have exactly one input, so it's safe to unwrap + let input = Inputs::new(&config, &input).next().unwrap()?; + let output = OutputFile::Stdout; + + // We don't need a `LanguageDefinitionCache` when there's only one input, + // which saves us the thread-safety overhead + let language = input.to_language().await?; + + log::info!( + "Checking query coverage of {}, as {}", + input.source(), + input.language().name, + ); + + let mut buf_input = BufReader::new(input); + let mut buf_output = BufWriter::new(output); + + coverage(&mut buf_input, &mut buf_output, &language)? + } + Commands::Completion { shell } => { // The CLI parser fails if no shell is provided/detected, so it's safe to unwrap here cli::completion(shell.unwrap()); diff --git a/topiary-core/src/error.rs b/topiary-core/src/error.rs index 6bbfa7cf..4c93c8fb 100644 --- a/topiary-core/src/error.rs +++ b/topiary-core/src/error.rs @@ -28,8 +28,7 @@ pub enum FormatterError { }, /// The query contains a pattern that had no match in the input file. - /// Should only be raised in the test suite. - PatternDoesNotMatch(String), + PatternDoesNotMatch, /// There was an error in the query file. If this happened using our /// provided query files, it is a bug. Please log an issue. @@ -81,10 +80,10 @@ impl fmt::Display for FormatterError { write!(f, "Parsing error between line {start_line}, column {start_column} and line {end_line}, column {end_column}") } - Self::PatternDoesNotMatch(pattern_content) => { + Self::PatternDoesNotMatch => { write!( f, - "The following pattern matches nothing in the input:\n{pattern_content}" + "The query contains a pattern that does not match the input" ) } @@ -102,7 +101,7 @@ impl Error for FormatterError { match self { Self::Idempotence | Self::Parsing { .. } - | Self::PatternDoesNotMatch(_) + | Self::PatternDoesNotMatch | Self::Io(IoError::Generic(_, None)) => None, Self::Internal(_, source) => source.as_ref().map(Deref::deref), Self::Query(_, source) => source.as_ref().map(|e| e as &dyn Error), diff --git a/topiary-core/src/lib.rs b/topiary-core/src/lib.rs index cea567fe..af318548 100644 --- a/topiary-core/src/lib.rs +++ b/topiary-core/src/lib.rs @@ -19,7 +19,7 @@ use tree_sitter::Position; pub use crate::{ error::{FormatterError, IoError}, language::Language, - tree_sitter::{apply_query, SyntaxNode, TopiaryQuery, Visualisation}, + tree_sitter::{apply_query, CoverageData, SyntaxNode, TopiaryQuery, Visualisation}, }; mod atom_collection; @@ -240,7 +240,6 @@ pub fn formatter( &language.query, &language.grammar, tolerate_parsing_errors, - false, )?; // Various post-processing of whitespace @@ -276,6 +275,43 @@ pub fn formatter( Ok(()) } +pub fn coverage( + input: &mut impl io::Read, + output: &mut impl io::Write, + language: &Language, +) -> FormatterResult<()> { + let content = read_input(input).map_err(|e| { + FormatterError::Io(IoError::Filesystem( + "Failed to read input contents".into(), + e, + )) + })?; + + let res = tree_sitter::check_query_coverage(&content, &language.query, &language.grammar)?; + + let queries_string = if res.missing_patterns.is_empty() { + "All queries are matched".into() + } else { + format!( + "Unmatched queries:\n{}", + &res.missing_patterns[..].join("\n"), + ) + }; + + write!( + output, + "Query coverage: {:.2}%\n{}\n", + res.cover_percentage * 100.0, + queries_string, + )?; + + if res.cover_percentage == 1.0 { + Ok(()) + } else { + Err(FormatterError::PatternDoesNotMatch) + } +} + /// Simple helper function to read the full content of an io Read stream fn read_input(input: &mut dyn io::Read) -> Result { let mut content = String::new(); diff --git a/topiary-core/src/tree_sitter.rs b/topiary-core/src/tree_sitter.rs index 59ddcb47..cdba7ffa 100644 --- a/topiary-core/src/tree_sitter.rs +++ b/topiary-core/src/tree_sitter.rs @@ -187,6 +187,13 @@ impl<'a> Display for LocalQueryMatch<'a> { } } +#[derive(Clone, Debug, PartialEq)] +// A struct to store the result of a query coverage check +pub struct CoverageData { + pub cover_percentage: f32, + pub missing_patterns: Vec, +} + /// Applies a query to an input content and returns a collection of atoms. /// /// # Errors @@ -202,9 +209,8 @@ pub fn apply_query( query: &TopiaryQuery, grammar: &topiary_tree_sitter_facade::Language, tolerate_parsing_errors: bool, - should_check_input_exhaustivity: bool, ) -> FormatterResult { - let (tree, grammar) = parse(input_content, grammar, tolerate_parsing_errors)?; + let (tree, _grammar) = parse(input_content, grammar, tolerate_parsing_errors)?; let root = tree.root_node(); let source = input_content.as_bytes(); @@ -222,11 +228,6 @@ pub fn apply_query( }); } - if should_check_input_exhaustivity { - let ref_match_count = matches.len(); - check_input_exhaustivity(ref_match_count, query, grammar, &root, source)?; - } - // Find the ids of all tree-sitter nodes that were identified as a leaf // We want to avoid recursing into them in the collect_leafs function. let specified_leaf_nodes: HashSet = collect_leaf_ids(&matches, capture_names.clone()); @@ -500,24 +501,40 @@ fn check_predicates(predicates: &QueryPredicates) -> FormatterResult<()> { /// Check if the input tests all patterns in the query, by successively disabling /// all patterns. If disabling a pattern does not decrease the number of matches, /// then that pattern originally matched nothing in the input. -fn check_input_exhaustivity( - ref_match_count: usize, +pub fn check_query_coverage( + input_content: &str, original_query: &TopiaryQuery, grammar: &topiary_tree_sitter_facade::Language, - root: &Node, - source: &[u8], -) -> FormatterResult<()> { +) -> FormatterResult { + let (tree, grammar) = parse(input_content, grammar, false)?; + let root = tree.root_node(); + let source = input_content.as_bytes(); + let mut missing_patterns = Vec::new(); + + // Match queries + let mut cursor = QueryCursor::new(); + let ref_match_count = original_query + .query + .matches(&root, source, &mut cursor) + .count(); let pattern_count = original_query.query.pattern_count(); let query_content = &original_query.query_content; + // This particular test avoids a SIGSEGV error that occurs when trying // to count the matches of an empty query (see #481) if pattern_count == 1 { + let mut cover_percentage = 1.0; if ref_match_count == 0 { - return Err(FormatterError::PatternDoesNotMatch(query_content.into())); - } else { - return Ok(()); + missing_patterns.push(query_content.into()); + cover_percentage = 0.0 } + return Ok(CoverageData { + cover_percentage, + missing_patterns, + }); } + + let mut ok_patterns = 0.0; for i in 0..pattern_count { // We don't need to use TopiaryQuery in this test since we have no need // for duplicate versions of the query_content string, instead we create the query @@ -526,7 +543,7 @@ fn check_input_exhaustivity( .map_err(|e| FormatterError::Query("Error parsing query file".into(), Some(e)))?; query.disable_pattern(i); let mut cursor = QueryCursor::new(); - let match_count = query.matches(root, source, &mut cursor).count(); + let match_count = query.matches(&root, source, &mut cursor).count(); if match_count == ref_match_count { let index_start = query.start_byte_for_pattern(i); let index_end = if i == pattern_count - 1 { @@ -535,19 +552,24 @@ fn check_input_exhaustivity( query.start_byte_for_pattern(i + 1) }; let pattern_content = &query_content[index_start..index_end]; - return Err(FormatterError::PatternDoesNotMatch(pattern_content.into())); + missing_patterns.push(pattern_content.into()); + } else { + ok_patterns += 1.0; } } - Ok(()) + + let cover_percentage = ok_patterns / pattern_count as f32; + Ok(CoverageData { + cover_percentage, + missing_patterns, + }) } #[cfg(target_arch = "wasm32")] -fn check_input_exhaustivity( - _ref_match_count: usize, +pub fn check_query_coverage( + _input_content: &str, _original_query: &TopiaryQuery, _grammar: &topiary_tree_sitter_facade::Language, - _root: &Node, - _source: &[u8], -) -> FormatterResult<()> { +) -> FormatterResult { unimplemented!(); } From e218cc366ceef3c168b56042e1fd831935d73f6c Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Tue, 29 Oct 2024 12:18:40 +0100 Subject: [PATCH 2/2] Update documentation and changelog --- CHANGELOG.md | 3 +++ README.md | 47 ++++++++++++++++++++++++++++++++++ bin/verify-documented-usage.sh | 2 +- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c48fec4..7eb7098c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,9 @@ This name should be decided amongst the team before the release. [Full list of changes](https://github.com/tweag/topiary/compare/v0.5.1...HEAD) +### Added +- [#785](https://github.com/tweag/topiary/pull/785) Added the `coverage` command, that checks how much of the query file is used by the input. + ### Changed - [#780](https://github.com/tweag/topiary/pull/780) Measuring scopes are now independent from captures order diff --git a/README.md b/README.md index b2e65e22..f5a6edb0 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,7 @@ Commands: visualise Visualise the input's Tree-sitter parse tree config Print the current configuration prefetch Prefetch all languages in the configuration + coverage Checks how much of the tree-sitter query is used completion Generate shell completion script help Print this message or the help of the given subcommand(s) @@ -366,6 +367,48 @@ Options: ``` +#### Coverage + +This subcommand checks how much of the language query file is used to process the input. +Specifically, it checks the percentage of queries in the query file that match the given input, +And prints the queries that don't matched anything. + + + +``` +Checks how much of the tree-sitter query is used + +Usage: topiary coverage [OPTIONS] <--language |FILE> + +Arguments: + [FILE] + Input file (omit to read from stdin) + + Language detection and query selection is automatic, mapped from file extensions defined + in the Topiary configuration. + +Options: + -l, --language + Topiary language identifier (for formatting stdin) + + -q, --query + Topiary query file override (when formatting stdin) + + -C, --configuration + Configuration file + + [env: TOPIARY_CONFIG_FILE] + + -v, --verbose... + Logging verbosity (increased per occurrence) + + -h, --help + Print help (see a summary with '-h') +``` + + +The `coverage` subcommand will exit with error code `1` if the coverage is less than 100%. + #### Logging By default, the Topiary CLI will only output error messages. You can @@ -387,6 +430,7 @@ formatting. Otherwise, the following exit codes are defined: | Reason | Code | | :--------------------------- | ---: | +| Negative result | 1 | | CLI argument parsing error | 2 | | I/O error | 3 | | Topiary query error | 4 | @@ -397,6 +441,9 @@ formatting. Otherwise, the following exit codes are defined: | Multiple errors | 9 | | Unspecified error | 10 | +Negative results with error code `1` only happen when Topiary is called +with the `coverage` sub-command, if the input does not cover 100% of the query. + When given multiple inputs, Topiary will do its best to process them all, even in the presence of errors. Should _any_ errors occur, Topiary will return a non-zero exit code. For more details on the nature of diff --git a/bin/verify-documented-usage.sh b/bin/verify-documented-usage.sh index 708f7399..9a37e9ef 100755 --- a/bin/verify-documented-usage.sh +++ b/bin/verify-documented-usage.sh @@ -40,7 +40,7 @@ diff-usage() { } main() { - local -a subcommands=(ROOT format visualise config completion prefetch) + local -a subcommands=(ROOT format visualise config completion coverage prefetch) local _diff local _subcommand