From 133ce0987c10c8bfa60d86c6fee87fac65d8b5b4 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Fri, 10 May 2024 21:30:15 +0200 Subject: [PATCH] perf: Do not build glob matchers repeatedly when include-exclude feature is enabled I've noticed in my profiles that if include-exclude feature is enabled, the call to ::iter() function is dominated by path filtering. This commit fixes that by explicitly passing in a premade Globset for a given object. Note that we're using std::sync::OnceLock to lazily store away premade Globsets, which bumps MSRV to 1.70. An alternative would be to use once_cell as an explicit dependency. For my use case, in debug builds this takes down the matching time from 650ms to 6ms. I know you shouldn't benchmark debug builds, but rust-embed::iter is on my app's initialization path and that's how I noticed it in the first place. I'd expect release timings to be somewhat similar --- impl/src/lib.rs | 27 ++++++++------ utils/src/lib.rs | 91 ++++++++++++++++++++++++++---------------------- 2 files changed, 65 insertions(+), 53 deletions(-) diff --git a/impl/src/lib.rs b/impl/src/lib.rs index 0bfbf25..43349a0 100644 --- a/impl/src/lib.rs +++ b/impl/src/lib.rs @@ -6,6 +6,7 @@ extern crate proc_macro; use proc_macro::TokenStream; use proc_macro2::TokenStream as TokenStream2; +use rust_embed_utils::PathMatcher; use std::{ collections::BTreeMap, env, @@ -25,7 +26,8 @@ fn embedded( let includes: Vec<&str> = includes.iter().map(AsRef::as_ref).collect(); let excludes: Vec<&str> = excludes.iter().map(AsRef::as_ref).collect(); - for rust_embed_utils::FileEntry { rel_path, full_canonical_path } in rust_embed_utils::get_files(absolute_folder_path.clone(), &includes, &excludes) { + let matcher = PathMatcher::new(&includes, &excludes); + for rust_embed_utils::FileEntry { rel_path, full_canonical_path } in rust_embed_utils::get_files(absolute_folder_path.clone(), matcher) { match_values.insert( rel_path.clone(), embed_file(relative_folder_path, ident, &rel_path, &full_canonical_path, metadata_only)?, @@ -125,8 +127,8 @@ fn dynamic(ident: &syn::Ident, folder_path: String, prefix: Option<&str>, includ const EXCLUDES: &[&str] = &[#(#excludes),*]; }; - // In metadata_only mode, we still need to read file contents to generate the file hash, but - // then we drop the file data. + // In metadata_only mode, we still need to read file contents to generate the + // file hash, but then we drop the file data. let strip_contents = metadata_only.then_some(quote! { .map(|mut file| { file.data = ::std::default::Default::default(); file }) }); @@ -137,13 +139,18 @@ fn dynamic(ident: &syn::Ident, folder_path: String, prefix: Option<&str>, includ quote! { #[cfg(debug_assertions)] impl #ident { + + + fn matcher() -> ::rust_embed::utils::PathMatcher { + #declare_includes + #declare_excludes + static PATH_MATCHER: ::std::sync::OnceLock<::rust_embed::utils::PathMatcher> = ::std::sync::OnceLock::new(); + PATH_MATCHER.get_or_init(|| rust_embed::utils::PathMatcher::new(INCLUDES, EXCLUDES)).clone() + } /// Get an embedded file and its metadata. pub fn get(file_path: &str) -> ::std::option::Option { #handle_prefix - #declare_includes - #declare_excludes - let rel_file_path = file_path.replace("\\", "/"); let file_path = ::std::path::Path::new(#folder_path).join(&rel_file_path); @@ -162,8 +169,8 @@ fn dynamic(ident: &syn::Ident, folder_path: String, prefix: Option<&str>, includ return ::std::option::Option::None; } } - - if rust_embed::utils::is_path_included(&rel_file_path, INCLUDES, EXCLUDES) { + let path_matcher = Self::matcher(); + if path_matcher.is_path_included(&rel_file_path) { rust_embed::utils::read_file_from_fs(&canonical_file_path).ok() #strip_contents } else { ::std::option::Option::None @@ -174,10 +181,8 @@ fn dynamic(ident: &syn::Ident, folder_path: String, prefix: Option<&str>, includ pub fn iter() -> impl ::std::iter::Iterator> { use ::std::path::Path; - #declare_includes - #declare_excludes - rust_embed::utils::get_files(::std::string::String::from(#folder_path), INCLUDES, EXCLUDES) + rust_embed::utils::get_files(::std::string::String::from(#folder_path), Self::matcher()) .map(|e| #map_iter) } } diff --git a/utils/src/lib.rs b/utils/src/lib.rs index 861ed18..85ee206 100644 --- a/utils/src/lib.rs +++ b/utils/src/lib.rs @@ -12,47 +12,8 @@ pub struct FileEntry { pub full_canonical_path: String, } -#[cfg(not(feature = "include-exclude"))] -pub fn is_path_included(_path: &str, _includes: &[&str], _excludes: &[&str]) -> bool { - true -} - -#[cfg(feature = "include-exclude")] -pub fn is_path_included(rel_path: &str, includes: &[&str], excludes: &[&str]) -> bool { - use globset::Glob; - - // ignore path matched by exclusion pattern - for exclude in excludes { - let pattern = Glob::new(exclude) - .unwrap_or_else(|_| panic!("invalid exclude pattern '{}'", exclude)) - .compile_matcher(); - - if pattern.is_match(rel_path) { - return false; - } - } - - // accept path if no includes provided - if includes.is_empty() { - return true; - } - - // accept path if matched by inclusion pattern - for include in includes { - let pattern = Glob::new(include) - .unwrap_or_else(|_| panic!("invalid include pattern '{}'", include)) - .compile_matcher(); - - if pattern.is_match(rel_path) { - return true; - } - } - - false -} - #[cfg_attr(all(debug_assertions, not(feature = "debug-embed")), allow(unused))] -pub fn get_files<'patterns>(folder_path: String, includes: &'patterns [&str], excludes: &'patterns [&str]) -> impl Iterator + 'patterns { +pub fn get_files(folder_path: String, matcher: PathMatcher) -> impl Iterator { walkdir::WalkDir::new(&folder_path) .follow_links(true) .sort_by_file_name() @@ -68,8 +29,7 @@ pub fn get_files<'patterns>(folder_path: String, includes: &'patterns [&str], ex } else { rel_path }; - - if is_path_included(&rel_path, includes, excludes) { + if matcher.is_path_included(&rel_path) { Some(FileEntry { rel_path, full_canonical_path }) } else { None @@ -176,3 +136,50 @@ pub fn read_file_from_fs(file_path: &Path) -> io::Result { fn path_to_str>(p: P) -> String { p.as_ref().to_str().expect("Path does not have a string representation").to_owned() } + +#[derive(Clone)] +pub struct PathMatcher { + #[cfg(feature = "include-exclude")] + include_matcher: globset::GlobSet, + #[cfg(feature = "include-exclude")] + exclude_matcher: globset::GlobSet, +} + +#[cfg(feature = "include-exclude")] +impl PathMatcher { + pub fn new(includes: &[&str], excludes: &[&str]) -> Self { + let mut include_matcher = globset::GlobSetBuilder::new(); + for include in includes { + include_matcher.add(globset::Glob::new(include).unwrap_or_else(|_| panic!("invalid include pattern '{}'", include))); + } + let include_matcher = include_matcher + .build() + .unwrap_or_else(|_| panic!("Could not compile included patterns matcher")); + + let mut exclude_matcher = globset::GlobSetBuilder::new(); + for exclude in excludes { + exclude_matcher.add(globset::Glob::new(exclude).unwrap_or_else(|_| panic!("invalid exclude pattern '{}'", exclude))); + } + let exclude_matcher = exclude_matcher + .build() + .unwrap_or_else(|_| panic!("Could not compile excluded patterns matcher")); + + Self { + include_matcher, + exclude_matcher, + } + } + pub fn is_path_included(&self, path: &str) -> bool { + !self.exclude_matcher.is_match(path) && (self.include_matcher.is_empty() || self.include_matcher.is_match(path)) + } +} + +#[cfg(not(feature = "include-exclude"))] +impl PathMatcher { + pub fn new(_includes: &[&str], _excludes: &[&str]) -> Self { + Self {} + } + pub fn is_path_included(&self, _path: &str) -> bool { + true + } +}