Skip to content

Commit

Permalink
fix(core): optimize daemon output glob matching (#27775)
Browse files Browse the repository at this point in the history
<!-- Please make sure you have read the submission guidelines before
posting an PR -->
<!--
https://github.com/nrwl/nx/blob/master/CONTRIBUTING.md#-submitting-a-pr
-->

<!-- Please make sure that your commit message follows our format -->
<!-- Example: `fix(nx): must begin with lowercase` -->

<!-- If this is a particularly complex change or feature addition, you
can request a dedicated Nx release for this pull request branch. Mention
someone from the Nx team or the `@nrwl/nx-pipelines-reviewers` and they
will confirm if the PR warrants its own release for testing purposes,
and generate it for you if appropriate. -->

## Current Behavior
- Nonexistant outputs are considered globs
- Glob outputs are walked from workspace root, which is slow on large
repos

[NX Daemon Server] - 2024-09-06T19:44:39.674Z - Done responding to the
client outputsHashesMatch
[NX Daemon Server] - 2024-09-06T19:44:39.674Z - Handled
OUTPUTS_HASHES_MATCH. Handling time: 94. Response time: 0.
[NX Daemon Server] - 2024-09-06T19:44:39.775Z - [REQUEST]: Responding to
the client. recordOutputsHash
[NX Daemon Server] - 2024-09-06T19:44:39.775Z - Done responding to the
client recordOutputsHash
[NX Daemon Server] - 2024-09-06T19:44:39.775Z - Handled
RECORD_OUTPUTS_HASH. Handling time: 100. Response time: 0.
[NX Daemon Server] - 2024-09-06T19:44:39.818Z - [REQUEST]: Responding to
the client. PROCESS_IN_BACKGROUND
[NX Daemon Server] - 2024-09-06T19:44:39.818Z - Done responding to the
client PROCESS_IN_BACKGROUND
[NX Daemon Server] - 2024-09-06T19:44:39.818Z - Handled
PROCESS_IN_BACKGROUND. Handling time: 14. Response time: 0.

## Expected Behavior
- Nonexistant outputs are only globs if they should be
- Globs are a bit faster

[NX Daemon Server] - 2024-09-06T19:43:36.899Z - Handled
OUTPUTS_HASHES_MATCH. Handling time: 0. Response time: 0.
[NX Daemon Server] - 2024-09-06T19:43:36.900Z - [REQUEST]: Responding to
the client. recordOutputsHash
[NX Daemon Server] - 2024-09-06T19:43:36.900Z - Done responding to the
client recordOutputsHash
[NX Daemon Server] - 2024-09-06T19:43:36.900Z - Handled
RECORD_OUTPUTS_HASH. Handling time: 0. Response time: 0.
[NX Daemon Server] - 2024-09-06T19:43:36.944Z - [REQUEST]: Responding to
the client. PROCESS_IN_BACKGROUND
[NX Daemon Server] - 2024-09-06T19:43:36.944Z - Done responding to the
client PROCESS_IN_BACKGROUND
[NX Daemon Server] - 2024-09-06T19:43:36.944Z - Handled
PROCESS_IN_BACKGROUND. Handling time: 13. Response time: 0.
[NX Daemon Server] - 2024-09-06T19:43:36.949Z - Uploading file artifacts
[NX Daemon Server] - 2024-09-06T19:43:36.949Z - Done uploading file
artifacts

> Note timings are from Nx repo, close enough to be comparable. No real
improvement was expected here, mainly checking that things didn't get
worse.

## Related Issue(s)
<!-- Please link the issue being fixed so it gets closed when this is
merged. -->

Fixes #

---------

Co-authored-by: FrozenPandaz <[email protected]>

(cherry picked from commit ac9da41)
  • Loading branch information
AgentEnder authored and FrozenPandaz committed Sep 6, 2024
1 parent bb7d9c9 commit 0d9ff23
Show file tree
Hide file tree
Showing 17 changed files with 313 additions and 120 deletions.
21 changes: 6 additions & 15 deletions packages/nx/src/migrations/update-15-0-0/prefix-outputs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ export default async function (tree: Tree) {
continue;
}

try {
validateOutputs(target.outputs);
} catch (e) {
target.outputs = transformLegacyOutputs(project.root, e);
}
target.outputs = transformLegacyOutputs(project.root, target.outputs);
}
try {
updateProjectConfiguration(tree, projectName, project);
Expand All @@ -44,11 +40,10 @@ export default async function (tree: Tree) {
(json) => {
for (const target of Object.values(json.nx?.targets ?? {})) {
if (target.outputs) {
try {
validateOutputs(target.outputs);
} catch (e) {
target.outputs = transformLegacyOutputs(project.root, e);
}
target.outputs = transformLegacyOutputs(
project.root,
target.outputs
);
}
}

Expand All @@ -64,11 +59,7 @@ export default async function (tree: Tree) {
if (!target.outputs) {
continue;
}
try {
validateOutputs(target.outputs);
} catch (e: any) {
target.outputs = transformLegacyOutputs('{projectRoot}', e);
}
target.outputs = transformLegacyOutputs('{projectRoot}', target.outputs);
}

updateNxJson(tree, nxJson);
Expand Down
89 changes: 73 additions & 16 deletions packages/nx/src/native/cache/expand_outputs.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use hashbrown::HashMap;
use std::path::PathBuf;
use tracing::trace;

use crate::native::glob::{build_glob_set, contains_glob_pattern};
use crate::native::glob::{build_glob_set, contains_glob_pattern, glob_transform::partition_glob};
use crate::native::logger::enable_logger;
use crate::native::utils::Normalize;
use crate::native::walker::nx_walker_sync;
use crate::native::walker::{nx_walker, nx_walker_sync};

#[napi]
/// Expands the given entries into a list of existing directories and files.
Expand Down Expand Up @@ -63,13 +65,33 @@ pub fn expand_outputs(directory: String, entries: Vec<String>) -> anyhow::Result
Ok(found_paths)
}

fn partition_globs_into_map(globs: Vec<String>) -> anyhow::Result<HashMap<String, Vec<String>>> {
globs
.iter()
.map(|glob| partition_glob(glob))
// Right now we have an iterator where each item is (root: String, patterns: String[]).
// We want a singular root, with the patterns mapped to it.
.fold(
Ok(HashMap::<String, Vec<String>>::new()),
|map_result, parsed_glob| {
let mut map = map_result?;
let (root, patterns) = parsed_glob?;
let entry = map.entry(root).or_insert(vec![]);
entry.extend(patterns);
Ok(map)
},
)
}

#[napi]
/// Expands the given outputs into a list of existing files.
/// This is used when hashing outputs
pub fn get_files_for_outputs(
directory: String,
entries: Vec<String>,
) -> anyhow::Result<Vec<String>> {
enable_logger();

let directory: PathBuf = directory.into();

let mut globs: Vec<String> = vec![];
Expand All @@ -79,7 +101,9 @@ pub fn get_files_for_outputs(
let path = directory.join(&entry);

if !path.exists() {
globs.push(entry);
if contains_glob_pattern(&entry) {
globs.push(entry);
}
} else if path.is_dir() {
directories.push(entry);
} else {
Expand All @@ -88,28 +112,41 @@ pub fn get_files_for_outputs(
}

if !globs.is_empty() {
// todo(jcammisuli): optimize this as nx_walker_sync is very slow on the root directory. We need to change this to only search smaller directories
let glob_set = build_glob_set(&globs)?;
let found_paths = nx_walker_sync(&directory, None).filter_map(|path| {
if glob_set.is_match(&path) {
Some(path.to_normalized_string())
} else {
None
}
});
let partitioned_globs = partition_globs_into_map(globs)?;
for (root, patterns) in partitioned_globs {
let root_path = directory.join(&root);
let glob_set = build_glob_set(&patterns)?;
trace!("walking directory: {:?}", root_path);

let found_paths: Vec<String> = nx_walker(&root_path, false)
.filter_map(|file| {
if glob_set.is_match(&file.normalized_path) {
Some(
// root_path contains full directory,
// root is only the leading dirs from glob
PathBuf::from(&root)
.join(&file.normalized_path)
.to_normalized_string(),
)
} else {
None
}
})
.collect();

files.extend(found_paths);
files.extend(found_paths);
}
}

if !directories.is_empty() {
for dir in directories {
let dir = PathBuf::from(dir);
let dir_path = directory.join(&dir);
let files_in_dir = nx_walker_sync(&dir_path, None).filter_map(|e| {
let path = dir_path.join(&e);
let files_in_dir = nx_walker(&dir_path, false).filter_map(|e| {
let path = dir_path.join(&e.normalized_path);

if path.is_file() {
Some(dir.join(e).to_normalized_string())
Some(dir.join(e.normalized_path).to_normalized_string())
} else {
None
}
Expand Down Expand Up @@ -214,4 +251,24 @@ mod test {
]
);
}

#[test]
fn should_get_files_for_outputs_with_glob() {
let temp = setup_fs();
let entries = vec![
"packages/nx/src/native/*.node".to_string(),
"folder/nested-folder".to_string(),
"test.txt".to_string(),
];
let mut result = get_files_for_outputs(temp.display().to_string(), entries).unwrap();
result.sort();
assert_eq!(
result,
vec![
"folder/nested-folder",
"packages/nx/src/native/nx.darwin-arm64.node",
"test.txt"
]
);
}
}
1 change: 1 addition & 0 deletions packages/nx/src/native/cache/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod expand_outputs;
pub mod file_ops;
pub mod validate_outputs;
77 changes: 77 additions & 0 deletions packages/nx/src/native/cache/validate_outputs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use itertools::Itertools;
use regex::Regex;

use crate::native::glob::{contains_glob_pattern, glob_transform::partition_glob};

const ALLOWED_WORKSPACE_ROOT_OUTPUT_PREFIXES: [&str; 2] = ["!{workspaceRoot}", "{workspaceRoot}"];

fn is_missing_prefix(output: &str) -> bool {
let re = Regex::new(r"^!?\{[\s\S]+\}").expect("Output pattern regex should compile");

!re.is_match(output)
}

#[napi]
pub fn validate_outputs(outputs: Vec<String>) -> anyhow::Result<()> {
let outputs_len = outputs.len();
let mut missing_prefix = Vec::with_capacity(outputs_len);
let mut workspace_globs = Vec::with_capacity(outputs_len);

for output in outputs.iter() {
if is_missing_prefix(output) {
missing_prefix.push(output);
} else {
for prefix in ALLOWED_WORKSPACE_ROOT_OUTPUT_PREFIXES.iter() {
if let Some(trimmed) = output.strip_prefix(prefix) {
if contains_glob_pattern(&trimmed) {
let (root, _) = partition_glob(&trimmed)?;
if root.is_empty() {
workspace_globs.push(output);
}
}
}
}
}
}

if workspace_globs.is_empty() && missing_prefix.is_empty() {
return Ok(());
}

let mut error_message = String::new();
if !missing_prefix.is_empty() {
error_message.push_str(&format!(
"The following outputs are invalid: \n - {}\n\nRun `nx repair` to fix this.",
missing_prefix.iter().join("\n - ")
));
}
if !workspace_globs.is_empty() {
error_message.push_str(&format!(
"The following outputs are defined by a glob pattern from the workspace root: \n - {}\n\nThese can be slow, replace them with a more specific pattern.",
workspace_globs.iter().join("\n - ")
));
}

Err(anyhow::anyhow!(error_message))
}

#[napi]
pub fn get_transformable_outputs(outputs: Vec<String>) -> Vec<String> {
outputs
.into_iter()
.filter(|output| is_missing_prefix(output))
.collect()
}

#[cfg(test)]
mod test {
use super::is_missing_prefix;

#[test]
fn test_is_missing_prefix() {
assert!(is_missing_prefix("dist"));
assert!(is_missing_prefix("!dist"));
assert!(!is_missing_prefix("{workspaceRoot}/dist"));
assert!(!is_missing_prefix("!{workspaceRoot}/dist"));
}
}
2 changes: 1 addition & 1 deletion packages/nx/src/native/glob.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
mod glob_group;
mod glob_parser;
mod glob_transform;
pub mod glob_transform;

use crate::native::glob::glob_transform::convert_glob;
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
Expand Down
65 changes: 62 additions & 3 deletions packages/nx/src/native/glob/glob_transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ use crate::native::glob::glob_group::GlobGroup;
use crate::native::glob::glob_parser::parse_glob;
use itertools::Itertools;
use std::collections::HashSet;
use itertools::Either::{Left, Right};
use super::contains_glob_pattern;

#[derive(Debug)]
enum GlobType {
Negative(String),
Positive(String),
}

pub fn convert_glob(glob: &str) -> anyhow::Result<Vec<String>> {
let (negated, parsed) = parse_glob(glob)?;
fn convert_glob_segments(negated: bool, parsed: Vec<Vec<GlobGroup>>) -> Vec<String> {
let mut built_segments: Vec<Vec<GlobType>> = Vec::new();
for (index, glob_segment) in parsed.iter().enumerate() {
let is_last = index == parsed.len() - 1;
Expand Down Expand Up @@ -58,7 +59,12 @@ pub fn convert_glob(glob: &str) -> anyhow::Result<Vec<String>> {
.into_iter()
.collect::<Vec<_>>();
globs.sort();
Ok(globs)
globs
}

pub fn convert_glob(glob: &str) -> anyhow::Result<Vec<String>> {
let (negated, parsed) = parse_glob(glob)?;
Ok(convert_glob_segments(negated, parsed))
}

fn build_segment(
Expand Down Expand Up @@ -107,6 +113,31 @@ fn build_segment(
}
}

pub fn partition_glob(glob: &str) -> anyhow::Result<(String, Vec<String>)> {
let (negated, groups) = parse_glob(glob)?;
// Partition glob into leading directories and patterns that should be matched
let mut has_patterns = false;
let (leading_dir_segments, pattern_segments): (Vec<String>, _) = groups
.into_iter()
.filter(|group| !group.is_empty())
.partition_map(|group| {
match &group[0] {
GlobGroup::NonSpecial(value) if !contains_glob_pattern(&value) && !has_patterns => {
Left(value.to_string())
}
_ => {
has_patterns = true;
Right(group)
}
}
});

Ok((
leading_dir_segments.join("/"),
convert_glob_segments(negated, pattern_segments),
))
}

#[cfg(test)]
mod test {
use super::convert_glob;
Expand Down Expand Up @@ -233,4 +264,32 @@ mod test {
]
);
}

#[test]
fn should_partition_glob_with_leading_dirs() {
let (leading_dirs, globs) = super::partition_glob("dist/app/**/!(README|LICENSE).(js|ts)").unwrap();
assert_eq!(leading_dirs, "dist/app");
assert_eq!(globs, ["!**/{README,LICENSE}.{js,ts}", "**/*.{js,ts}",]);
}

#[test]
fn should_partition_glob_with_leading_dirs_and_simple_patterns() {
let (leading_dirs, globs) = super::partition_glob("dist/app/**/*.css").unwrap();
assert_eq!(leading_dirs, "dist/app");
assert_eq!(globs, ["**/*.css"]);
}

#[test]
fn should_partition_glob_with_leading_dirs_dirs_and_patterns() {
let (leading_dirs, globs) = super::partition_glob("dist/app/**/js/*.js").unwrap();
assert_eq!(leading_dirs, "dist/app");
assert_eq!(globs, ["**/js/*.js"]);
}

#[test]
fn should_partition_glob_with_leading_dirs_and_no_patterns() {
let (leading_dirs, globs) = super::partition_glob("dist/app/").unwrap();
assert_eq!(leading_dirs, "dist/app");
assert_eq!(globs, [] as [String; 0]);
}
}
4 changes: 4 additions & 0 deletions packages/nx/src/native/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ export declare export function getBinaryTarget(): string
*/
export declare export function getFilesForOutputs(directory: string, entries: Array<string>): Array<string>

export declare export function getTransformableOutputs(outputs: Array<string>): Array<string>

export declare export function hashArray(input: Array<string>): string

export interface HashDetails {
Expand Down Expand Up @@ -222,6 +224,8 @@ export interface UpdatedWorkspaceFiles {
externalReferences: NxWorkspaceFilesExternals
}

export declare export function validateOutputs(outputs: Array<string>): void

export interface WatchEvent {
path: string
type: EventType
Expand Down
2 changes: 2 additions & 0 deletions packages/nx/src/native/native-bindings.js
Original file line number Diff line number Diff line change
Expand Up @@ -374,10 +374,12 @@ module.exports.expandOutputs = nativeBinding.expandOutputs
module.exports.findImports = nativeBinding.findImports
module.exports.getBinaryTarget = nativeBinding.getBinaryTarget
module.exports.getFilesForOutputs = nativeBinding.getFilesForOutputs
module.exports.getTransformableOutputs = nativeBinding.getTransformableOutputs
module.exports.hashArray = nativeBinding.hashArray
module.exports.hashFile = nativeBinding.hashFile
module.exports.IS_WASM = nativeBinding.IS_WASM
module.exports.remove = nativeBinding.remove
module.exports.testOnlyTransferFileMap = nativeBinding.testOnlyTransferFileMap
module.exports.transferProjectGraph = nativeBinding.transferProjectGraph
module.exports.validateOutputs = nativeBinding.validateOutputs
module.exports.WorkspaceErrors = nativeBinding.WorkspaceErrors
Loading

0 comments on commit 0d9ff23

Please sign in to comment.