Skip to content

Commit

Permalink
feat: implement lazy file loading (#306)
Browse files Browse the repository at this point in the history
morgante authored May 6, 2024

Verified

This commit was signed with the committer’s verified signature. The key has expired.
charlyx Charles-Henri GUERIN
1 parent 452abd1 commit b4cfadc
Showing 24 changed files with 848 additions and 298 deletions.
2 changes: 1 addition & 1 deletion crates/cli/src/analytics.rs
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@ use anyhow::Result;
use clap::Args;
use lazy_static::lazy_static;
use marzano_gritmodule::fetcher::LocalRepo;
use marzano_gritmodule::{fetcher::ModuleRepo};
use marzano_gritmodule::fetcher::ModuleRepo;
use marzano_messenger::emit::ApplyDetails;
use serde::{Deserialize, Serialize};
use std::{env, fmt, time::Duration};
2 changes: 1 addition & 1 deletion crates/cli/src/analyze.rs
Original file line number Diff line number Diff line change
@@ -323,7 +323,7 @@ where
#[cfg(feature = "grit_tracing")]
task_span.set_parent(grouped_ctx);
task_span.in_scope(|| {
compiled.execute_paths_streaming(&found_paths, context, tx, cache_ref);
compiled.execute_paths_streaming(found_paths, context, tx, cache_ref);
loop {
if processed.load(Ordering::SeqCst) >= found_count.try_into().unwrap()
|| !should_continue.load(Ordering::SeqCst)
2 changes: 1 addition & 1 deletion crates/cli/src/commands/check.rs
Original file line number Diff line number Diff line change
@@ -194,7 +194,7 @@ pub(crate) async fn run_check(
!cache.has_no_matches(hash, pattern.hash)
})
.collect();
let (result, no_match) = pattern.execute_paths(&un_cached_input_files, &context);
let (result, no_match) = pattern.execute_paths(un_cached_input_files, &context);
if !no_match.is_empty() {
for path in no_match.into_iter() {
let hash = path.hash.unwrap();
1 change: 0 additions & 1 deletion crates/cli/src/community.rs
Original file line number Diff line number Diff line change
@@ -2,7 +2,6 @@ use anyhow::Result;
use grit_util::{FileRange, Position, RangeWithoutByte};
use serde::Deserialize;


use std::path::PathBuf;

#[derive(Debug, Deserialize)]
2 changes: 2 additions & 0 deletions crates/cli_bin/tests/apply.rs
Original file line number Diff line number Diff line change
@@ -783,6 +783,8 @@ fn test_absolute_path() -> Result<()> {
let file = dir.join("dir2/unique.js");
let content = std::fs::read_to_string(file)?;

println!("content: {:?}", content);

// Verify it contains dir2/unique.js
assert!(content.contains("dir2/unique.js"));

23 changes: 23 additions & 0 deletions crates/core/src/api.rs
Original file line number Diff line number Diff line change
@@ -35,6 +35,16 @@ pub enum MatchResult {
AnalysisLog(AnalysisLog),
}

impl MatchResult {
pub fn is_match(&self) -> bool {
is_match(self)
}

pub fn is_error(&self) -> bool {
matches!(self, MatchResult::AnalysisLog(log) if log.level < 400)
}
}

/// Make a path look the way provolone expects it to
/// Removes leading "./", or the root path if it's provided
fn normalize_path_in_project<'a>(path: &'a str, root_path: Option<&'a PathBuf>) -> &'a str {
@@ -591,6 +601,19 @@ impl AnalysisLog {
source: None,
}
}

pub(crate) fn floating_error(message: String) -> Self {
Self {
level: 280,
message,
position: Position::first(),
file: "".to_string(),
engine_id: "marzano".to_string(),
range: None,
syntax_tree: None,
source: None,
}
}
}

impl From<GritAnalysisLog> for AnalysisLog {
1 change: 1 addition & 0 deletions crates/core/src/built_in_functions.rs
Original file line number Diff line number Diff line change
@@ -173,6 +173,7 @@ fn resolve_path_fn<'a>(
let args = MarzanoResolvedPattern::from_patterns(args, state, context, logs)?;

let current_file = get_absolute_file_name(state, context.language())?;

let target_path = match &args[0] {
Some(resolved_pattern) => resolved_pattern.text(&state.files, context.language())?,
None => return Err(anyhow!("No path argument provided for resolve function")),
3 changes: 3 additions & 0 deletions crates/core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@ mod equivalence;
mod foreign_function_definition;
pub mod fs;
mod inline_snippets;
mod limits;
pub mod marzano_binding;
pub mod marzano_code_snippet;
pub mod marzano_context;
@@ -35,3 +36,5 @@ use getrandom as _;

#[cfg(test)]
mod test;
#[cfg(test)]
mod test_files;
22 changes: 22 additions & 0 deletions crates/core/src/limits.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use grit_util::{AnalysisLog, Position};
use marzano_util::rich_path::RichFile;

use crate::constants::MAX_FILE_SIZE;

pub(crate) fn is_file_too_big(file: &RichFile) -> Option<AnalysisLog> {
if file.path.len() > MAX_FILE_SIZE || file.content.len() > MAX_FILE_SIZE {
Some(AnalysisLog {
// TODO: standardize levels
level: Some(310),
message: format!("Skipped {}, it is too big.", file.path),
file: Some(file.path.to_owned().into()),
engine_id: Some("marzano".to_owned()),
position: Some(Position::first()),
syntax_tree: None,
range: None,
source: None,
})
} else {
None
}
}
64 changes: 59 additions & 5 deletions crates/core/src/marzano_context.rs
Original file line number Diff line number Diff line change
@@ -2,7 +2,8 @@ use crate::{
built_in_functions::BuiltIns,
clean::{get_replacement_ranges, replace_cleaned_ranges},
foreign_function_definition::ForeignFunctionDefinition,
marzano_resolved_pattern::MarzanoResolvedPattern,
limits::is_file_too_big,
marzano_resolved_pattern::{MarzanoFile, MarzanoResolvedPattern},
pattern_compiler::file_owner_compiler::FileOwnerCompiler,
problem::MarzanoQueryContext,
text_unparser::apply_effects,
@@ -24,14 +25,18 @@ use marzano_language::{
language::{MarzanoLanguage, Tree},
target_language::TargetLanguage,
};
use marzano_util::runtime::ExecutionContext;
use std::path::PathBuf;
use marzano_util::{
rich_path::{LoadableFile, RichFile},
runtime::ExecutionContext,
};
use std::{borrow::Cow, path::PathBuf};

pub struct MarzanoContext<'a> {
pub pattern_definitions: &'a Vec<PatternDefinition<MarzanoQueryContext>>,
pub predicate_definitions: &'a Vec<PredicateDefinition<MarzanoQueryContext>>,
pub function_definitions: &'a Vec<GritFunctionDefinition<MarzanoQueryContext>>,
pub foreign_function_definitions: &'a Vec<ForeignFunctionDefinition>,
lazy_files: Vec<Box<dyn LoadableFile + 'a>>,
pub files: &'a FileOwners<Tree>,
pub built_ins: &'a BuiltIns,
pub language: &'a TargetLanguage,
@@ -46,6 +51,7 @@ impl<'a> MarzanoContext<'a> {
predicate_definitions: &'a Vec<PredicateDefinition<MarzanoQueryContext>>,
function_definitions: &'a Vec<GritFunctionDefinition<MarzanoQueryContext>>,
foreign_function_definitions: &'a Vec<ForeignFunctionDefinition>,
lazy_files: Vec<Box<dyn LoadableFile + 'a>>,
files: &'a FileOwners<Tree>,
built_ins: &'a BuiltIns,
language: &'a TargetLanguage,
@@ -57,6 +63,7 @@ impl<'a> MarzanoContext<'a> {
predicate_definitions,
function_definitions,
foreign_function_definitions,
lazy_files,
files,
built_ins,
language,
@@ -112,6 +119,48 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> {
self.built_ins.call(call, context, state, logs)
}

fn load_file(
&self,
file: &MarzanoFile<'a>,
state: &mut State<'a, MarzanoQueryContext>,
logs: &mut AnalysisLogs,
) -> anyhow::Result<bool> {
match file {
MarzanoFile::Resolved(_) => {
// Assume the file is already loaded
}
MarzanoFile::Ptr(ptr) => {
if state.files.is_loaded(ptr) {
return Ok(true);
}
let index = ptr.file;

let cow: Cow<RichFile> = self.lazy_files[index as usize].try_into_cow()?;

if let Some(log) = is_file_too_big(&cow) {
logs.push(log);
return Ok(false);
}

let owned = cow.into_owned();

let file = FileOwnerCompiler::from_matches(
owned.path,
owned.content,
None,
false,
self.language,
logs,
)?;
if let Some(file) = file {
self.files.push(file);
state.files.load_file(ptr, self.files.last().unwrap());
}
}
}
Ok(true)
}

// FIXME: Don't depend on Grit's file handling in context.
fn files(&self) -> &FileOwners<Tree> {
self.files
@@ -130,7 +179,7 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> {
) -> Result<bool> {
let mut parser = self.language().get_parser();

let files = if let Some(files) = binding.get_file_pointers() {
let mut files = if let Some(files) = binding.get_file_pointers() {
files
.iter()
.map(|f| state.files.latest_revision(f))
@@ -142,6 +191,11 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> {
let binding = if files.len() == 1 {
ResolvedPattern::from_file_pointer(*files.last().unwrap())
} else {
// Load all files into memory and collect successful file pointers
files.retain(|file_ptr| {
self.load_file(&MarzanoFile::Ptr(*file_ptr), state, logs)
.unwrap_or(false)
});
ResolvedPattern::from_files(ResolvedPattern::from_list_parts(
files.iter().map(|f| ResolvedPattern::from_file_pointer(*f)),
))
@@ -160,7 +214,7 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> {
suppressed,
};
for file_ptr in files {
let file = state.files.get_file(file_ptr);
let file = state.files.get_file_owner(file_ptr);
let mut match_log = file.matches.borrow_mut();

let filename_path = &file.name;
8 changes: 4 additions & 4 deletions crates/core/src/marzano_resolved_pattern.rs
Original file line number Diff line number Diff line change
@@ -848,7 +848,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> {
fn name(&self, files: &FileRegistry<'a, MarzanoQueryContext>) -> MarzanoResolvedPattern<'a> {
match self {
Self::Resolved(resolved) => resolved.name.clone(),
Self::Ptr(ptr) => MarzanoResolvedPattern::from_path_binding(&files.get_file(*ptr).name),
Self::Ptr(ptr) => MarzanoResolvedPattern::from_path_binding(files.get_file_name(*ptr)),
}
}

@@ -866,7 +866,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> {
)))
}
Self::Ptr(ptr) => Ok(ResolvedPattern::from_path_binding(
&files.get_file(*ptr).absolute_path,
files.get_absolute_path(*ptr)?,
)),
}
}
@@ -875,7 +875,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> {
match self {
Self::Resolved(resolved) => resolved.body.clone(),
Self::Ptr(ptr) => {
let file = &files.get_file(*ptr);
let file = &files.get_file_owner(*ptr);
let root = file.tree.root_node();
let range = root.byte_range();
ResolvedPattern::from_range_binding(range, &file.tree.source)
@@ -887,7 +887,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> {
match self {
Self::Resolved(resolved) => resolved.body.clone(),
Self::Ptr(ptr) => {
let file = &files.get_file(*ptr);
let file = &files.get_file_owner(*ptr);
ResolvedPattern::from_node_binding(file.tree.root_node())
}
}
Loading

0 comments on commit b4cfadc

Please sign in to comment.