diff --git a/Cargo.lock b/Cargo.lock index 4db1be6b..89122315 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,6 +154,12 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +[[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + [[package]] name = "async-recursion" version = "1.1.1" @@ -265,15 +271,30 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec 0.6.3", +] + [[package]] name = "bit-set" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ - "bit-vec", + "bit-vec 0.8.0", ] +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bit-vec" version = "0.8.0" @@ -352,17 +373,21 @@ dependencies = [ "open", "os_info", "rayon", + "regex", "requestty", "reqwest 0.12.8", + "rstest", "rusty-hook", "serde", "serde_json", "serde_yml", "shadow-rs", + "syntect", "tempdir", "tokio", "urlencoding", "users", + "yaml-rust2", ] [[package]] @@ -1180,13 +1205,23 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set 0.5.3", + "regex", +] + [[package]] name = "fancy-regex" version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" dependencies = [ - "bit-set", + "bit-set 0.8.0", "regex-automata", "regex-syntax", ] @@ -1513,6 +1548,15 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "heck" version = "0.5.0" @@ -2127,7 +2171,7 @@ dependencies = [ "base64 0.22.1", "bytecount", "email_address", - "fancy-regex", + "fancy-regex 0.14.0", "fraction", "idna 1.0.2", "itoa", @@ -2409,6 +2453,7 @@ dependencies = [ "supports-color", "supports-hyperlinks", "supports-unicode", + "syntect", "terminal_size", "textwrap 0.16.1", "thiserror", @@ -4485,6 +4530,7 @@ checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1" dependencies = [ "bincode", "bitflags 1.3.2", + "fancy-regex 0.11.0", "flate2", "fnv", "once_cell", @@ -5562,6 +5608,17 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "yaml-rust2" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a1a1c0bc9823338a3bdf8c61f994f23ac004c6fa32c08cd152984499b445e8d" +dependencies = [ + "arraydeque", + "encoding_rs", + "hashlink", +] + [[package]] name = "yoke" version = "0.7.4" diff --git a/Cargo.toml b/Cargo.toml index 2ccb0a2f..358c17a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ rstest = "0.18" serde = { version = "1", features = ["derive"] } serde_json = "1" serde_yaml = { version = "0.0.12", package = "serde_yml" } +syntect = { version = "5", default-features = false, features = ["default-fancy"] } tempdir = "0.3" tokio = { version = "1", features = ["rt", "rt-multi-thread"] } users = "0.11" @@ -71,9 +72,11 @@ jsonschema = { version = "0.26", optional = true } open = "5" os_info = "3" rayon = { version = "1.10.0", optional = true } +regex = { version = "1", optional = true } requestty = { version = "0.5", features = ["macros", "termion"] } shadow-rs = { version = "0.26", default-features = false } urlencoding = "2" +yaml-rust2 = { version = "0.9.0", optional = true } cached.workspace = true clap = { workspace = true, features = ["derive", "cargo", "unicode", "env"] } @@ -81,12 +84,13 @@ colored.workspace = true indexmap.workspace = true indicatif.workspace = true log.workspace = true -miette = { workspace = true, features = ["fancy"] } +miette = { workspace = true, features = ["fancy", "syntect-highlighter"] } oci-distribution.workspace = true reqwest.workspace = true serde.workspace = true serde_json.workspace = true serde_yaml.workspace = true +syntect = { workspace = true, optional = true } tempdir.workspace = true tokio = { workspace = true, optional = true } bon.workspace = true @@ -105,6 +109,9 @@ validate = [ "dep:jsonschema", "dep:rayon", "dep:tokio", + "dep:yaml-rust2", + "dep:syntect", + "dep:regex", "cached/async", "blue-build-process-management/validate" ] @@ -112,6 +119,8 @@ validate = [ [dev-dependencies] rusty-hook = "0.11" +rstest.workspace = true + [build-dependencies] shadow-rs = { version = "0.26", default-features = false } diff --git a/bacon.toml b/bacon.toml index b5c835ac..bf6551db 100644 --- a/bacon.toml +++ b/bacon.toml @@ -45,7 +45,7 @@ command = [ ] need_stdout = true default_watch = false -watch = ["src", "process", "recipe", "template", "utils", "Cargo.toml", "build.rs", "test-files"] +watch = ["src", "process", "recipe", "template", "utils", "Cargo.toml", "build.rs", "test-files", "integration-tests"] [jobs.test-all] command = [ @@ -54,7 +54,7 @@ command = [ ] need_stdout = true default_watch = false -watch = ["src", "process", "recipe", "template", "utils", "Cargo.toml", "build.rs", "test-files"] +watch = ["src", "process", "recipe", "template", "utils", "Cargo.toml", "build.rs", "test-files", "integration-tests"] [jobs.doc] command = ["cargo", "doc", "--color", "always", "--no-deps"] diff --git a/integration-tests/Earthfile b/integration-tests/Earthfile index 1e4b0974..39ca60a9 100644 --- a/integration-tests/Earthfile +++ b/integration-tests/Earthfile @@ -89,6 +89,7 @@ validate: RUN --no-cache bluebuild -v validate recipes/recipe-invalid.yml && exit 1 || exit 0 RUN --no-cache bluebuild -v validate recipes/recipe-invalid-module.yml && exit 1 || exit 0 RUN --no-cache bluebuild -v validate recipes/recipe-invalid-stage.yml && exit 1 || exit 0 + RUN --no-cache bluebuild -v validate recipes/recipe-invalid-from-file.yml && exit 1 || exit 0 legacy-base: FROM ../+blue-build-cli-alpine --RELEASE=false diff --git a/integration-tests/test-repo/recipes/invalid-module.yml b/integration-tests/test-repo/recipes/invalid-module.yml new file mode 100644 index 00000000..2c6ae549 --- /dev/null +++ b/integration-tests/test-repo/recipes/invalid-module.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://schema.blue-build.org/module-list-v1.json +modules: + # Tests installing rpms from a combo image stage + - type: akmods + install: openrazer + diff --git a/integration-tests/test-repo/recipes/invalid-stages.yml b/integration-tests/test-repo/recipes/invalid-stages.yml new file mode 100644 index 00000000..605cc253 --- /dev/null +++ b/integration-tests/test-repo/recipes/invalid-stages.yml @@ -0,0 +1,37 @@ +--- +# yaml-language-server: $schema=https://schema.blue-build.org/module-stage-list-v1.json +stages: + - name: ubuntu-test + from: + - ubuntu + modules: {} + - name: debian-test + from: debian + modules: + - from-file: stages.yml + - name: fedora-test + from: fedora + modules: + - from-file: stages.yml + - name: alpine-test + from: alpine + modules: + - from-file: stages.yml +modules: + - type: files + files: + - usr: /usr + - type: script + scripts: + - example.sh + snippets: + - echo "test" > /test.txt + - type: test-module + source: local + - type: containerfile + containerfiles: + - labels + snippets: + - RUN echo "This is a snippet" + - type: rpm-ostree + install: micro diff --git a/integration-tests/test-repo/recipes/recipe-invalid-from-file.yml b/integration-tests/test-repo/recipes/recipe-invalid-from-file.yml new file mode 100644 index 00000000..0c9ebba5 --- /dev/null +++ b/integration-tests/test-repo/recipes/recipe-invalid-from-file.yml @@ -0,0 +1,58 @@ +--- +# yaml-language-server: $schema=https://schema.blue-build.org/recipe-v1.json +name: cli/test +description: This is my personal OS image. +base-image: ghcr.io/ublue-os/silverblue-main +image-version: 40 +stages: + - from-file: invalid-stages.yml +modules: + - from-file: invalid-module.yml + - from-file: flatpaks.yml + + - type: files + files: + - source: usr + destination: /usr + + - type: script + scripts: + - example.sh + + - type: rpm-ostree + repos: + - https://copr.fedorainfracloud.org/coprs/atim/starship/repo/fedora-%OS_VERSION%/atim-starship-fedora-%OS_VERSION%.repo + install: + - micro + - starship + remove: + - firefox + - firefox-langpacks + + - type: signing + + - type: test-module + source: local + + - type: containerfile + containerfiles: + - labels + snippets: + - RUN echo "This is a snippet" && ostree container commit + + - type: copy + from: alpine-test + src: /test.txt + dest: / + - type: copy + from: ubuntu-test + src: /test.txt + dest: / + - type: copy + from: debian-test + src: /test.txt + dest: / + - type: copy + from: fedora-test + src: /test.txt + dest: / diff --git a/recipe/src/lib.rs b/recipe/src/lib.rs index 4d685cbc..b19b6359 100644 --- a/recipe/src/lib.rs +++ b/recipe/src/lib.rs @@ -21,6 +21,10 @@ pub trait FromFileList { const LIST_KEY: &str; fn get_from_file_paths(&self) -> Vec; + + fn get_module_from_file_paths(&self) -> Vec { + Vec::new() + } } pub(crate) fn base_recipe_path() -> &'static Path { diff --git a/recipe/src/stages_ext.rs b/recipe/src/stages_ext.rs index a9a5333c..2b903e0e 100644 --- a/recipe/src/stages_ext.rs +++ b/recipe/src/stages_ext.rs @@ -25,6 +25,18 @@ impl FromFileList for StagesExt<'_> { .filter_map(Stage::get_from_file_path) .collect() } + + fn get_module_from_file_paths(&self) -> Vec { + self.stages + .iter() + .flat_map(|stage| { + stage + .required_fields + .as_ref() + .map_or_else(Vec::new, |rf| rf.modules_ext.get_from_file_paths()) + }) + .collect() + } } impl TryFrom<&PathBuf> for StagesExt<'_> { diff --git a/src/commands/validate.rs b/src/commands/validate.rs index c1c84c8a..79c1e753 100644 --- a/src/commands/validate.rs +++ b/src/commands/validate.rs @@ -2,32 +2,29 @@ use std::{ fs::OpenOptions, io::{BufReader, Read}, path::{Path, PathBuf}, + sync::Arc, }; use blue_build_process_management::ASYNC_RUNTIME; use blue_build_recipe::{FromFileList, ModuleExt, Recipe, StagesExt}; -use blue_build_utils::{ - string, - syntax_highlighting::{self}, -}; use bon::Builder; use clap::Args; use colored::Colorize; -use indexmap::IndexMap; -use jsonschema::{BasicOutput, ValidationError}; use log::{debug, info, trace}; use miette::{bail, miette, Context, IntoDiagnostic, Report}; use rayon::prelude::*; use schema_validator::{ - build_validator, SchemaValidator, MODULE_LIST_V1_SCHEMA_URL, MODULE_V1_SCHEMA_URL, - RECIPE_V1_SCHEMA_URL, STAGE_LIST_V1_SCHEMA_URL, STAGE_V1_SCHEMA_URL, + SchemaValidator, MODULE_STAGE_LIST_V1_SCHEMA_URL, MODULE_V1_SCHEMA_URL, RECIPE_V1_SCHEMA_URL, + STAGE_V1_SCHEMA_URL, }; use serde::de::DeserializeOwned; use serde_json::Value; use super::BlueBuildCommand; +mod location; mod schema_validator; +mod yaml_span; #[derive(Debug, Args, Builder)] pub struct ValidateCommand { @@ -50,14 +47,11 @@ pub struct ValidateCommand { #[clap(skip)] stage_validator: Option, - #[clap(skip)] - stage_list_validator: Option, - #[clap(skip)] module_validator: Option, #[clap(skip)] - module_list_validator: Option, + module_stage_list_validator: Option, } impl BlueBuildCommand for ValidateCommand { @@ -98,18 +92,18 @@ impl BlueBuildCommand for ValidateCommand { impl ValidateCommand { async fn setup_validators(&mut self) -> Result<(), Report> { - let (rv, sv, slv, mv, mlv) = tokio::try_join!( - build_validator(RECIPE_V1_SCHEMA_URL), - build_validator(STAGE_V1_SCHEMA_URL), - build_validator(STAGE_LIST_V1_SCHEMA_URL), - build_validator(MODULE_V1_SCHEMA_URL), - build_validator(MODULE_LIST_V1_SCHEMA_URL), + let (rv, sv, mv, mslv) = tokio::try_join!( + SchemaValidator::builder().url(RECIPE_V1_SCHEMA_URL).build(), + SchemaValidator::builder().url(STAGE_V1_SCHEMA_URL).build(), + SchemaValidator::builder().url(MODULE_V1_SCHEMA_URL).build(), + SchemaValidator::builder() + .url(MODULE_STAGE_LIST_V1_SCHEMA_URL) + .build(), )?; self.recipe_validator = Some(rv); self.stage_validator = Some(sv); - self.stage_list_validator = Some(slv); self.module_validator = Some(mv); - self.module_list_validator = Some(mlv); + self.module_stage_list_validator = Some(mslv); Ok(()) } @@ -118,7 +112,6 @@ impl ValidateCommand { path: &Path, traversed_files: &[&Path], single_validator: &SchemaValidator, - list_validator: &SchemaValidator, ) -> Vec where DF: DeserializeOwned + FromFileList, @@ -140,7 +133,7 @@ impl ValidateCommand { let file_str = match read_file(path) { Err(e) => return vec![e], - Ok(f) => f, + Ok(f) => Arc::new(f), }; match serde_yaml::from_str::(&file_str) @@ -151,56 +144,60 @@ impl ValidateCommand { trace!("{path_display}:\n{instance}"); if instance.get(DF::LIST_KEY).is_some() { - debug!("{path_display} is a multi file file"); - let errors = if self.all_errors { - process_basic_output( - list_validator.validator().apply(&instance).basic(), - &instance, - path, - ) - } else { - list_validator - .validator() - .iter_errors(&instance) - .map(process_err(&self.recipe)) - .collect() + debug!("{path_display} is a list file"); + let err = match self + .module_stage_list_validator + .as_ref() + .unwrap() + .process_validation(path, file_str.clone(), self.all_errors) + { + Err(e) => return vec![e], + Ok(e) => e, }; - if errors.is_empty() { - match serde_yaml::from_str::(&file_str).into_diagnostic() { - Err(e) => vec![e], - Ok(file) => file - .get_from_file_paths() - .par_iter() - .map(|file_path| { - self.validate_file::( - file_path, - &traversed_files, - single_validator, - list_validator, - ) - }) - .flatten() - .collect(), - } - } else { - errors - } + err.map_or_else( + || { + serde_yaml::from_str::(&file_str) + .into_diagnostic() + .map_or_else( + |e| vec![e], + |file| { + let mut errs = file + .get_from_file_paths() + .par_iter() + .map(|file_path| { + self.validate_file::( + file_path, + &traversed_files, + single_validator, + ) + }) + .flatten() + .collect::>(); + errs.extend( + file.get_module_from_file_paths() + .par_iter() + .map(|file_path| { + self.validate_file::( + file_path, + &[], + self.module_validator.as_ref().unwrap(), + ) + }) + .flatten() + .collect::>(), + ); + errs + }, + ) + }, + |err| vec![err], + ) } else { debug!("{path_display} is a single file file"); - if self.all_errors { - process_basic_output( - single_validator.validator().apply(&instance).basic(), - &instance, - path, - ) - } else { - single_validator - .validator() - .iter_errors(&instance) - .map(|err| miette!("{err}")) - .collect() - } + single_validator + .process_validation(path, file_str, self.all_errors) + .map_or_else(|e| vec![e], |e| e.map_or_else(Vec::new, |e| vec![e])) } } Err(e) => vec![e], @@ -211,7 +208,7 @@ impl ValidateCommand { let recipe_path_display = self.recipe.display().to_string().bold().italic(); debug!("Validating recipe {recipe_path_display}"); - let recipe_str = read_file(&self.recipe).map_err(err_vec)?; + let recipe_str = Arc::new(read_file(&self.recipe).map_err(err_vec)?); let recipe: Value = serde_yaml::from_str(&recipe_str) .into_diagnostic() .with_context(|| format!("Failed to deserialize recipe {recipe_path_display}")) @@ -219,21 +216,13 @@ impl ValidateCommand { trace!("{recipe_path_display}:\n{recipe}"); let schema_validator = self.recipe_validator.as_ref().unwrap(); - let errors = if self.all_errors { - process_basic_output( - schema_validator.validator().apply(&recipe).basic(), - &recipe, - &self.recipe, - ) - } else { - schema_validator - .validator() - .iter_errors(&recipe) - .map(process_err(&self.recipe)) - .collect() - }; + let err = schema_validator + .process_validation(&self.recipe, recipe_str.clone(), self.all_errors) + .map_err(err_vec)?; - if errors.is_empty() { + if let Some(err) = err { + Err(vec![err]) + } else { let recipe: Recipe = serde_yaml::from_str(&recipe_str) .into_diagnostic() .with_context(|| { @@ -258,7 +247,6 @@ impl ValidateCommand { stage_path, &[], self.stage_validator.as_ref().unwrap(), - self.stage_list_validator.as_ref().unwrap(), ) }) .flatten() @@ -281,7 +269,6 @@ impl ValidateCommand { module_path, &[], self.module_validator.as_ref().unwrap(), - self.module_list_validator.as_ref().unwrap(), ) }) .flatten() @@ -292,8 +279,6 @@ impl ValidateCommand { } else { Err(errors) } - } else { - Err(errors) } } } @@ -320,93 +305,3 @@ fn read_file(path: &Path) -> Result { .into_diagnostic()?; Ok(recipe) } - -fn process_basic_output(out: BasicOutput<'_>, instance: &Value, path: &Path) -> Vec { - match out { - BasicOutput::Valid(_) => vec![], - BasicOutput::Invalid(errors) => { - let mut collection: IndexMap> = IndexMap::new(); - let errors = { - let mut e = errors.into_iter().collect::>(); - e.sort_by(|e1, e2| { - e1.instance_location() - .as_str() - .cmp(e2.instance_location().as_str()) - }); - e - }; - - for err in errors { - let schema_path = err.keyword_location(); - let instance_path = err.instance_location().to_string(); - let build_err = || { - miette!( - "schema_path: '{}'", - schema_path.to_string().italic().dimmed(), - ) - .context(err.error_description().to_string().bold().bright_red()) - }; - - collection - .entry(instance_path) - .and_modify(|errs| { - errs.push(build_err()); - }) - .or_insert_with(|| vec![build_err()]); - } - - collection - .into_iter() - .map(|(key, value)| { - let instance = instance.pointer(&key).unwrap(); - - miette!( - "{}\n{}", - serde_yaml::to_string(instance) - .into_diagnostic() - .and_then(|file| syntax_highlighting::highlight(&file, "yml", None)) - .unwrap_or_else(|_| instance.to_string()), - value.into_iter().fold(String::new(), |mut acc, err| { - acc.push_str(&format!("{err:?}")); - acc - }) - ) - .context(format!( - "In file {} at '{}'", - path.display().to_string().bold().italic(), - key.bold().bright_yellow(), - )) - }) - .collect() - } - } -} - -fn process_err<'a, 'b>(path: &'b Path) -> impl Fn(ValidationError<'a>) -> Report + use<'a, 'b> { - move |ValidationError { - instance, - instance_path, - kind: _, - schema_path: _, - }| { - miette!( - "{}", - &serde_yaml::to_string(&*instance) - .into_diagnostic() - .and_then(|file| syntax_highlighting::highlight(&file, "yml", None)) - .unwrap_or_else(|_| instance.to_string()) - ) - .context(format!( - "Invalid value {} file '{}'", - if instance_path.as_str().is_empty() { - string!("in root of") - } else { - format!( - "at path '{}' in", - instance_path.as_str().bold().bright_yellow() - ) - }, - path.display().to_string().italic().bold(), - )) - } -} diff --git a/src/commands/validate/location.rs b/src/commands/validate/location.rs new file mode 100644 index 00000000..22974e71 --- /dev/null +++ b/src/commands/validate/location.rs @@ -0,0 +1,120 @@ +use std::sync::Arc; + +use jsonschema::paths::{LazyLocation, Location as JsonLocation, LocationSegment}; + +#[derive(Debug, Default, Clone, Hash, PartialEq, Eq)] +pub struct Location(Arc); + +impl Location { + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl From<&JsonLocation> for Location { + fn from(value: &JsonLocation) -> Self { + Self(Arc::new(value.as_str().into())) + } +} + +impl From for Location { + fn from(value: JsonLocation) -> Self { + Self(Arc::new(value.as_str().into())) + } +} + +impl std::fmt::Display for Location { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", &self.0) + } +} + +impl TryFrom<&str> for Location { + type Error = miette::Report; + + fn try_from(value: &str) -> Result { + fn child<'a, 'b, 'c, I>(path_iter: &mut I, location: &'b LazyLocation<'b, 'a>) -> Location + where + I: Iterator, + { + let Some(path) = path_iter.next() else { + return JsonLocation::from(location).into(); + }; + let location = build(path, location); + child(path_iter, &location) + } + + fn build<'a, 'b>( + path: &'a str, + location: &'b LazyLocation<'b, 'a>, + ) -> LazyLocation<'a, 'b> { + path.parse::() + .map_or_else(|_| location.push(path), |p| location.push(p)) + } + let path_count = value.split('/').count(); + let mut path_iter = value.split('/'); + + let root = path_iter.next().unwrap(); + + if root.is_empty() && path_count == 1 { + return Ok(Self::default()); + } + + let Some(path) = path_iter.next() else { + return Ok(Self::from(JsonLocation::from(&LazyLocation::new()))); + }; + + let location = LazyLocation::new(); + let location = build(path, &location); + + Ok(child(&mut path_iter, &location)) + } +} + +impl TryFrom<&String> for Location { + type Error = miette::Report; + + fn try_from(value: &String) -> Result { + Self::try_from(value.as_str()) + } +} + +impl TryFrom for Location { + type Error = miette::Report; + + fn try_from(value: String) -> Result { + Self::try_from(value.as_str()) + } +} + +pub struct LocationSegmentIterator<'a> { + iter: std::vec::IntoIter>, +} + +impl<'a> Iterator for LocationSegmentIterator<'a> { + type Item = LocationSegment<'a>; + + fn next(&mut self) -> Option { + self.iter.next() + } +} + +impl<'a> IntoIterator for &'a Location { + type Item = LocationSegment<'a>; + type IntoIter = LocationSegmentIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + Self::IntoIter { + iter: self + .as_str() + .split('/') + .filter(|p| !p.is_empty()) + .map(|p| { + p.parse::() + .map_or_else(|_| LocationSegment::Property(p), LocationSegment::Index) + }) + .collect::>() + .into_iter(), + } + } +} diff --git a/src/commands/validate/schema_validator.rs b/src/commands/validate/schema_validator.rs index 695385ca..655db5b1 100644 --- a/src/commands/validate/schema_validator.rs +++ b/src/commands/validate/schema_validator.rs @@ -1,67 +1,246 @@ -use std::sync::Arc; +use std::{ + borrow::Cow, + collections::HashSet, + path::Path, + sync::{Arc, LazyLock}, +}; use blue_build_process_management::ASYNC_RUNTIME; +use bon::bon; use cached::proc_macro::cached; use colored::Colorize; -use jsonschema::{Retrieve, Uri, Validator}; +use indexmap::IndexMap; +use jsonschema::{ + output::Output, BasicOutput, ErrorIterator, Retrieve, Uri, ValidationError, Validator, +}; use log::{debug, trace}; -use miette::{bail, Context, IntoDiagnostic, Report}; +use miette::{bail, miette, Context, IntoDiagnostic, LabeledSpan, NamedSource, Report, Result}; +use regex::Regex; use serde_json::Value; +use super::{location::Location, yaml_span::YamlSpan}; + pub const BASE_SCHEMA_URL: &str = "https://schema.blue-build.org"; pub const RECIPE_V1_SCHEMA_URL: &str = "https://schema.blue-build.org/recipe-v1.json"; pub const STAGE_V1_SCHEMA_URL: &str = "https://schema.blue-build.org/stage-v1.json"; -pub const STAGE_LIST_V1_SCHEMA_URL: &str = "https://schema.blue-build.org/stage-list-v1.json"; pub const MODULE_V1_SCHEMA_URL: &str = "https://schema.blue-build.org/module-v1.json"; -pub const MODULE_LIST_V1_SCHEMA_URL: &str = "https://schema.blue-build.org/module-list-v1.json"; +pub const MODULE_STAGE_LIST_V1_SCHEMA_URL: &str = + "https://schema.blue-build.org/module-stage-list-v1.json"; #[derive(Debug, Clone)] pub struct SchemaValidator { schema: Arc, validator: Arc, + url: &'static str, } +#[bon] impl SchemaValidator { - pub fn validator(&self) -> Arc { - self.validator.clone() + #[builder] + pub async fn new(url: &'static str) -> Result { + tokio::spawn(async move { + let schema: Arc = Arc::new( + reqwest::get(url) + .await + .into_diagnostic() + .with_context(|| format!("Failed to get schema at {url}"))? + .json() + .await + .into_diagnostic() + .with_context(|| format!("Failed to get json for schema {url}"))?, + ); + let validator = Arc::new( + tokio::task::spawn_blocking({ + let schema = schema.clone(); + move || { + jsonschema::options() + .with_retriever(ModuleSchemaRetriever) + .build(&schema) + .into_diagnostic() + .with_context(|| format!("Failed to build validator for schema {url}")) + } + }) + .await + .expect("Should join blocking thread")?, + ); + + Ok(Self { + schema, + validator, + url, + }) + }) + .await + .expect("Should join task") + } + + pub fn apply<'a, 'b>(&'a self, value: &'b Value) -> Output<'a, 'b> { + self.validator.apply(value) + } + + pub fn iter_errors<'a>(&'a self, value: &'a Value) -> ErrorIterator<'a> { + self.validator.iter_errors(value) } pub fn schema(&self) -> Arc { self.schema.clone() } -} -pub async fn build_validator(url: &'static str) -> Result { - tokio::spawn(async move { - let schema: Arc = Arc::new( - reqwest::get(url) - .await - .into_diagnostic() - .with_context(|| format!("Failed to get schema at {url}"))? - .json() - .await - .into_diagnostic() - .with_context(|| format!("Failed to get json for schema {url}"))?, - ); - let validator = Arc::new( - tokio::task::spawn_blocking({ - let schema = schema.clone(); - move || { - jsonschema::options() - .with_retriever(ModuleSchemaRetriever) - .build(&schema) - .into_diagnostic() - .with_context(|| format!("Failed to build validator for schema {url}")) + pub const fn url(&self) -> &'static str { + self.url + } + + pub fn process_validation( + &self, + path: &Path, + file: Arc, + all_errors: bool, + ) -> Result> { + let recipe_path_display = path.display().to_string().bold().italic(); + + let spanner = YamlSpan::builder().file(file.clone()).build()?; + let instance: Value = serde_yaml::from_str(&file) + .into_diagnostic() + .with_context(|| format!("Failed to deserialize recipe {recipe_path_display}"))?; + trace!("{recipe_path_display}:\n{file}"); + + Ok(if all_errors { + self.process_basic_output(self.apply(&instance).basic(), file, &spanner, path) + } else { + self.process_err(self.iter_errors(&instance), path, file, &spanner) + }) + } + + fn process_basic_output( + &self, + out: BasicOutput<'_>, + file: Arc, + spanner: &YamlSpan, + path: &Path, + ) -> Option { + match out { + BasicOutput::Valid(_) => None, + BasicOutput::Invalid(errors) => { + let mut collection: IndexMap> = IndexMap::new(); + let errors = { + let mut e = errors.into_iter().collect::>(); + e.sort_by(|e1, e2| { + e1.instance_location() + .as_str() + .cmp(e2.instance_location().as_str()) + }); + e + }; + let errors: Vec<(Location, String)> = { + let e = errors + .into_iter() + .map(|e| { + ( + Location::from(e.instance_location()), + remove_json(&e.error_description().to_string()).to_string(), + ) + }) + .collect::>(); + let mut e = e.into_iter().collect::>(); + e.sort_by(|e1, e2| e1.0.as_str().cmp(e2.0.as_str())); + e + }; + + for (instance_path, err) in errors { + collection + .entry(instance_path) + .and_modify(|errs| { + errs.push(format!("- {}", err.bold().red())); + }) + .or_insert_with(|| vec![format!("- {}", err.bold().red())]); } + + let spans = collection + .into_iter() + .map(|(key, value)| { + LabeledSpan::new_with_span( + Some(value.join("\n")), + spanner.get_span(&key).unwrap(), + ) + }) + .collect::>(); + Some( + miette!( + labels = spans, + help = format!( + "Try adding these lines to the top of your file:\n{}\n{}", + "---".bright_green(), + format!("# yaml-language-server: $schema={}", self.url).bright_green(), + ), + "{} error{} encountered", + spans.len().to_string().red(), + if spans.len() == 1 { "" } else { "s" } + ) + .with_source_code( + NamedSource::new(path.display().to_string(), file).with_language("yaml"), + ), + ) + } + } + } + + fn process_err<'a, I>( + &self, + errors: I, + path: &Path, + file: Arc, + spanner: &YamlSpan, + ) -> Option + where + I: Iterator>, + { + let spans = errors + .map(|err| { + LabeledSpan::new_primary_with_span( + Some(remove_json(&err.to_string()).bold().red().to_string()), + spanner + .get_span(&Location::from(err.instance_path)) + .unwrap(), + ) }) - .await - .expect("Should join blocking thread")?, - ); + .collect::>(); - Ok(SchemaValidator { schema, validator }) - }) - .await - .expect("Should join task") + if spans.is_empty() { + None + } else { + Some( + miette!( + labels = spans, + help = format!( + "Try adding these lines to the top of your file:\n{}\n{}", + "---".bright_green(), + format!("# yaml-language-server: $schema={}", self.url).bright_green(), + ), + "{} error{} encountered", + spans.len().to_string().red(), + if spans.len() == 1 { "" } else { "s" } + ) + .with_source_code( + NamedSource::new(path.display().to_string(), file).with_language("yaml"), + ), + ) + } + } +} + +fn remove_json(string: &str) -> Cow<'_, str> { + static REGEX_OBJECT: LazyLock = LazyLock::new(|| Regex::new(r"^\{.*\}\s(.*)$").unwrap()); + static REGEX_ARRAY: LazyLock = LazyLock::new(|| Regex::new(r"^\[.*\]\s(.*)$").unwrap()); + + let string = string.trim(); + + if REGEX_OBJECT.is_match(string) { + REGEX_OBJECT.replace_all(string, "$1") + } else if REGEX_ARRAY.is_match(string) { + REGEX_ARRAY.replace_all(string, "$1") + } else { + Cow::Borrowed(string) + } } struct ModuleSchemaRetriever; diff --git a/src/commands/validate/yaml_span.rs b/src/commands/validate/yaml_span.rs new file mode 100644 index 00000000..b6f1a3f7 --- /dev/null +++ b/src/commands/validate/yaml_span.rs @@ -0,0 +1,330 @@ +use std::sync::Arc; + +use bon::bon; +use jsonschema::paths::LocationSegment; +use miette::{bail, Context, IntoDiagnostic, Result, SourceSpan}; +use yaml_rust2::{ + parser::{MarkedEventReceiver, Parser}, + scanner::Marker, + Event, +}; + +#[cfg(not(test))] +use log::trace; +#[cfg(test)] +use std::eprintln as trace; + +use super::location::Location; + +#[derive(Debug)] +pub struct YamlSpan { + file: Arc, + event_markers: Vec<(Event, Marker)>, +} + +#[bon] +impl YamlSpan { + #[builder] + pub fn new(file: Arc) -> Result { + let mut ys = Self { + file, + event_markers: Vec::default(), + }; + + let file = ys.file.clone(); + let mut parser = Parser::new_from_str(&file); + + parser + .load(&mut ys, false) + .into_diagnostic() + .context("Failed to parse file")?; + Ok(ys) + } + + pub fn get_span(&self, path: &Location) -> Result { + let mut event_iter = self.event_markers.iter(); + let mut path_iter = path.into_iter(); + + YamlCrawler::builder() + .events(&mut event_iter) + .path(&mut path_iter) + .build() + .get_span() + } +} + +impl MarkedEventReceiver for YamlSpan { + fn on_event(&mut self, ev: Event, mark: Marker) { + self.event_markers.push((ev, mark)); + } +} + +struct YamlCrawler<'a, 'b, I, P> +where + I: Iterator, + P: Iterator>, +{ + events: &'a mut I, + path: &'b mut P, +} + +#[bon] +impl<'a, 'b, I, P> YamlCrawler<'a, 'b, I, P> +where + I: Iterator, + P: Iterator>, +{ + #[builder] + pub fn new(events: &'a mut I, path: &'b mut P) -> Self { + Self { events, path } + } + + pub fn get_span(&mut self) -> Result { + let mut stream_start = false; + let mut document_start = false; + + let key = match self.path.next() { + None => { + let (_, marker) = self + .events + .find(|(e, _)| matches!(e, Event::StreamStart)) + .unwrap(); + return Ok((marker.index(), 1).into()); + } + Some(key) => key, + }; + + Ok(loop { + let (event, _) = self.events.next().expect("Need events"); + match event { + Event::StreamStart if !stream_start && !document_start => { + stream_start = true; + continue; + } + Event::DocumentStart if stream_start && !document_start => { + document_start = true; + continue; + } + Event::MappingStart(_, _) if stream_start && document_start => { + break self.key(key)?.into(); + } + event => bail!("Failed to read event: {event:?}"), + } + }) + } + + fn key(&mut self, expected_key: LocationSegment<'_>) -> Result<(usize, usize)> { + trace!("Looking for location {expected_key:?}"); + + loop { + let (event, marker) = self.events.next().unwrap(); + trace!("{event:?} {marker:?}"); + + match (event, expected_key) { + (Event::Scalar(key, _, _, _), LocationSegment::Property(expected_key)) + if key == expected_key => + { + trace!("Found matching key '{key}'"); + break self.value(); + } + (Event::Scalar(key, _, _, _), LocationSegment::Property(expected_key)) + if key != expected_key => + { + trace!("Non-matching key '{key}'"); + continue; + } + (Event::Scalar(key, _, _, _), LocationSegment::Index(index)) => { + bail!("Encountered key {key} when looking for index {index}") + } + (Event::SequenceStart(_, _), LocationSegment::Index(index)) => { + break self.sequence(index, 0); + } + (Event::SequenceStart(_, _), _) => { + self.skip_sequence(marker.index()); + } + (Event::MappingStart(_, _), _) => { + self.skip_mapping(marker.index()); + } + (Event::MappingEnd, _) => { + bail!("Reached end of map an haven't found key {expected_key}") + } + event => unreachable!("{event:?}"), + } + } + } + + fn skip_sequence(&mut self, mut last_index: usize) -> usize { + loop { + let (event, marker) = self.events.next().unwrap(); + trace!("SKIPPING: {event:?} {marker:?}"); + match event { + Event::SequenceEnd => break last_index, + Event::SequenceStart(_, _) => { + last_index = self.skip_sequence(last_index); + } + Event::MappingStart(_, _) => { + last_index = self.skip_mapping(last_index); + } + Event::Scalar(value, _, _, _) => { + last_index = marker.index() + value.len(); + } + _ => continue, + }; + } + } + + fn skip_mapping(&mut self, mut last_index: usize) -> usize { + loop { + let (event, marker) = self.events.next().unwrap(); + trace!("SKIPPING: {event:?} {marker:?}"); + match event { + Event::MappingEnd => break last_index, + Event::SequenceStart(_, _) => { + last_index = self.skip_sequence(last_index); + } + Event::MappingStart(_, _) => { + last_index = self.skip_mapping(last_index); + } + Event::Scalar(value, _, _, _) => { + last_index = marker.index() + value.len(); + } + _ => continue, + }; + } + } + + fn sequence(&mut self, index: usize, curr_index: usize) -> Result<(usize, usize)> { + let (event, marker) = self.events.next().expect("Need events"); + trace!("{event:?} {marker:?}"); + trace!("index: {index}, curr_index: {curr_index}"); + + Ok(match event { + Event::SequenceEnd => bail!("Reached end of sequence before reaching index {index}"), + Event::Scalar(_, _, _, _) if index > curr_index => { + self.sequence(index, curr_index + 1)? + } + Event::Scalar(value, _, _, _) if index == curr_index => (marker.index(), value.len()), + Event::MappingStart(_, _) if index > curr_index => { + self.skip_mapping(marker.index()); + self.sequence(index, curr_index + 1)? + } + Event::MappingStart(_, _) if index == curr_index => { + trace!("Found mapping at index {index}"); + match self.path.next() { + None => { + let index = marker.index(); + (index, self.skip_mapping(index) - index) + } + Some(key) => self.key(key)?, + } + } + Event::SequenceStart(_, _) if index > curr_index => { + self.skip_sequence(marker.index()); + self.sequence(index, curr_index + 1)? + } + Event::SequenceStart(_, _) if index == curr_index => { + trace!("Found sequence at index {index}"); + match self.path.next() { + None => { + let index = marker.index(); + (index, self.skip_sequence(index) - index) + } + Some(key) => self.key(key)?, + } + } + event => unreachable!("{event:?}"), + }) + } + + fn value(&mut self) -> Result<(usize, usize)> { + let (event, marker) = self.events.next().unwrap(); + trace!("{event:?} {marker:?}"); + let key = self.path.next(); + + Ok(match (event, key) { + (Event::Scalar(value, _, _, _), None) => (marker.index(), value.len()), + (Event::Scalar(value, _, _, _), Some(segment)) => { + bail!("Encountered scalar value {value} when looking for {segment}") + } + (Event::MappingStart(_, _), Some(LocationSegment::Property(key))) => { + self.key(LocationSegment::Property(key))? + } + (Event::MappingStart(_, _), None) => { + let index = marker.index(); + (index, self.skip_mapping(index) - index) + } + (Event::SequenceStart(_, _), Some(LocationSegment::Index(index))) => { + self.sequence(index, 0)? + } + (Event::SequenceStart(_, _), None) => { + let index = marker.index(); + (index, self.skip_sequence(index) - index) + } + event => unreachable!("{event:?}"), + }) + } +} + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use miette::{miette, LabeledSpan}; + use rstest::rstest; + + use crate::commands::validate::location::Location; + + use super::YamlSpan; + + const RECIPE: &str = include_str!("../../../integration-tests/test-repo/recipes/recipe.yml"); + const RECIPE_INVALID: &str = + include_str!("../../../integration-tests/test-repo/recipes/recipe-invalid.yml"); + const RECIPE_INVALID_MODULE: &str = + include_str!("../../../integration-tests/test-repo/recipes/recipe-invalid-module.yml"); + const RECIPE_INVALID_STAGE: &str = + include_str!("../../../integration-tests/test-repo/recipes/recipe-invalid-stage.yml"); + + #[rstest] + #[case("test: value", "", (0, 1))] + #[case("test: value", "/test", (6, 5))] + #[case(RECIPE, "/description", (109, 29))] + #[case(RECIPE, "/image-version", (199, 2))] + #[case(RECIPE, "/modules/4/install", (601, 24))] + #[case(RECIPE, "/modules/7/snippets", (820, 57))] + #[case(RECIPE_INVALID, "/image-version", (182, 11))] + #[case(RECIPE_INVALID_STAGE, "/stages/0/from", (262, 8))] + #[case(RECIPE_INVALID_MODULE, "/modules/7/containerfiles", (807, 8))] + fn test_getspan(#[case] file: &str, #[case] path: &str, #[case] expected: (usize, usize)) { + let file = Arc::new(file.to_owned()); + let location = Location::try_from(path).unwrap(); + dbg!(&location); + + let collector = YamlSpan::builder().file(file.clone()).build().unwrap(); + let source_span = collector.get_span(&location).unwrap(); + println!( + "{:?}", + miette!( + labels = [LabeledSpan::underline(source_span)], + "Found value at {path}" + ) + .with_source_code(file) + ); + assert_eq!(source_span, expected.into()); + } + + #[rstest] + #[case("test: value", "/2")] + #[case("test: value", "/mapping")] + #[case(RECIPE, "/test")] + #[case(RECIPE, "/image-version/2")] + #[case(RECIPE, "/modules/12")] + fn test_getspan_err(#[case] file: &str, #[case] path: &str) { + let file = Arc::new(file.to_owned()); + let location = Location::try_from(path).unwrap(); + dbg!(&location); + + let collector = YamlSpan::builder().file(file).build().unwrap(); + let source_span = collector.get_span(&location).unwrap_err(); + eprintln!("{source_span:?}"); + } +} diff --git a/utils/Cargo.toml b/utils/Cargo.toml index 7a638b2d..6842807a 100644 --- a/utils/Cargo.toml +++ b/utils/Cargo.toml @@ -16,7 +16,6 @@ directories = "5" docker_credential = "1" format_serde_error = "0.3" process_control = { version = "4", features = ["crossbeam-channel"] } -syntect = "5" which = "6" chrono.workspace = true @@ -26,6 +25,7 @@ miette.workspace = true serde.workspace = true serde_json.workspace = true serde_yaml.workspace = true +syntect.workspace = true bon.workspace = true [build-dependencies]