Skip to content

Commit

Permalink
Remove memoization of the Paths intrinsic to reduce memory usage (C…
Browse files Browse the repository at this point in the history
…herry-pick of #19689) (#19716)

Remove memoization of the `Paths` intrinsic, since it is very memory
intensive, and relatively cheap to recompute from its memoized inputs.

Improves performance for `pants dependencies ::` in the Pants repo by
6%, and eliminates the `paths` Node line item mentioned on #19053, since
it will no longer be held in memory by memoization.

Fixes #19053.

Co-authored-by: Stu Hood <[email protected]>
  • Loading branch information
WorkerPants and stuhood authored Aug 30, 2023
1 parent 3221395 commit 68b5b42
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 105 deletions.
48 changes: 41 additions & 7 deletions src/rust/engine/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ use std::time::Duration;
use crate::context::Context;
use crate::externs::fs::{PyAddPrefix, PyFileDigest, PyMergeDigests, PyRemovePrefix};
use crate::nodes::{
lift_directory_digest, task_side_effected, DownloadedFile, ExecuteProcess, NodeResult, Paths,
RunId, SessionValues, Snapshot,
lift_directory_digest, task_side_effected, unmatched_globs_additional_context, DownloadedFile,
ExecuteProcess, NodeResult, RunId, SessionValues, Snapshot,
};
use crate::python::{throw, Key, Value};
use crate::tasks::Intrinsic;
Expand All @@ -36,7 +36,10 @@ use pyo3::{IntoPy, PyAny, PyRef, Python, ToPyObject};
use tokio::process;

use docker::docker::{ImagePullPolicy, ImagePullScope, DOCKER, IMAGE_PULL_CACHE};
use fs::{DigestTrie, DirectoryDigest, Entry, RelativePath, SymlinkBehavior, TypedPath};
use fs::{
DigestTrie, DirectoryDigest, Entry, GlobMatching, PathStat, RelativePath, SymlinkBehavior,
TypedPath,
};
use hashing::{Digest, EMPTY_DIGEST};
use process_execution::local::{
apply_chroot, create_sandbox, prepare_workdir, setup_run_sh_script, KeepSandboxes,
Expand Down Expand Up @@ -411,10 +414,41 @@ fn path_globs_to_paths(
Snapshot::lift_path_globs(py_path_globs)
})
.map_err(|e| throw(format!("Failed to parse PathGlobs: {e}")))?;
let paths = context.get(Paths::from_path_globs(path_globs)).await?;
Ok(Python::with_gil(|py| {
Paths::store_paths(py, &core, &paths)
})?)

let path_globs = path_globs.parse().map_err(throw)?;
let path_stats = context
.expand_globs(
path_globs,
SymlinkBehavior::Oblivious,
unmatched_globs_additional_context(),
)
.await?;

Python::with_gil(|py| {
let mut files = Vec::new();
let mut dirs = Vec::new();
for ps in path_stats.iter() {
match ps {
PathStat::File { path, .. } => {
files.push(Snapshot::store_path(py, path)?);
}
PathStat::Link { path, .. } => {
panic!("Paths shouldn't be symlink-aware {path:?}");
}
PathStat::Dir { path, .. } => {
dirs.push(Snapshot::store_path(py, path)?);
}
}
}
Ok(externs::unsafe_call(
py,
core.types.paths,
&[
externs::store_tuple(py, files),
externs::store_tuple(py, dirs),
],
))
})
}
.boxed()
}
Expand Down
103 changes: 5 additions & 98 deletions src/rust/engine/src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ use crate::python::{display_sorted_in_parens, throw, Failure, Key, Params, TypeI
use crate::tasks::{self, Rule};
use fs::{
self, DigestEntry, Dir, DirectoryDigest, DirectoryListing, File, FileContent, FileEntry,
GlobExpansionConjunction, GlobMatching, Link, PathGlobs, PathStat, PreparedPathGlobs,
RelativePath, StrictGlobMatching, SymlinkBehavior, SymlinkEntry, Vfs,
GlobExpansionConjunction, GlobMatching, Link, PathGlobs, PreparedPathGlobs, RelativePath,
StrictGlobMatching, SymlinkBehavior, SymlinkEntry, Vfs,
};
use process_execution::{
self, CacheName, InputDigests, Process, ProcessCacheScope, ProcessExecutionStrategy,
Expand Down Expand Up @@ -639,7 +639,7 @@ impl From<Scandir> for NodeKey {
}
}

fn unmatched_globs_additional_context() -> Option<String> {
pub fn unmatched_globs_additional_context() -> Option<String> {
let url = Python::with_gil(|py| {
externs::doc_url(
py,
Expand All @@ -653,75 +653,6 @@ fn unmatched_globs_additional_context() -> Option<String> {
))
}

///
/// A node that captures Vec<PathStat> for resolved files/dirs from PathGlobs.
///
/// This is similar to the Snapshot node, but avoids digesting the files and writing to LMDB store
/// as a performance optimization.
///
#[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)]
pub struct Paths {
path_globs: PathGlobs,
}

impl Paths {
pub fn from_path_globs(path_globs: PathGlobs) -> Paths {
Paths { path_globs }
}

async fn create(context: Context, path_globs: PreparedPathGlobs) -> NodeResult<Vec<PathStat>> {
context
.expand_globs(
path_globs,
SymlinkBehavior::Oblivious,
unmatched_globs_additional_context(),
)
.await
}

pub fn store_paths(py: Python, core: &Arc<Core>, item: &[PathStat]) -> Result<Value, String> {
let mut files = Vec::new();
let mut dirs = Vec::new();
for ps in item.iter() {
match ps {
PathStat::File { path, .. } => {
files.push(Snapshot::store_path(py, path)?);
}
PathStat::Link { path, .. } => {
panic!("Paths shouldn't be symlink-aware {path:?}");
}
PathStat::Dir { path, .. } => {
dirs.push(Snapshot::store_path(py, path)?);
}
}
}
Ok(externs::unsafe_call(
py,
core.types.paths,
&[
externs::store_tuple(py, files),
externs::store_tuple(py, dirs),
],
))
}

async fn run_node(self, context: Context) -> NodeResult<Arc<Vec<PathStat>>> {
let path_globs = self.path_globs.parse().map_err(throw)?;
let path_stats = Self::create(context, path_globs).await?;
Ok(Arc::new(path_stats))
}
}

impl CompoundNode<NodeKey> for Paths {
type Item = Arc<Vec<PathStat>>;
}

impl From<Paths> for NodeKey {
fn from(n: Paths) -> Self {
NodeKey::Paths(n)
}
}

#[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)]
pub struct SessionValues;

Expand Down Expand Up @@ -828,7 +759,7 @@ impl Snapshot {
Ok(Value::new(py_snapshot.into_py(py)))
}

fn store_path(py: Python, item: &Path) -> Result<Value, String> {
pub fn store_path(py: Python, item: &Path) -> Result<Value, String> {
if let Some(p) = item.as_os_str().to_str() {
Ok(externs::store_utf8(py, p))
} else {
Expand Down Expand Up @@ -1301,7 +1232,6 @@ pub enum NodeKey {
Scandir(Scandir),
Select(Box<Select>),
Snapshot(Snapshot),
Paths(Paths),
SessionValues(SessionValues),
RunId(RunId),
Task(Box<Task>),
Expand All @@ -1323,7 +1253,6 @@ impl NodeKey {
| &NodeKey::SessionValues { .. }
| &NodeKey::RunId { .. }
| &NodeKey::Snapshot { .. }
| &NodeKey::Paths { .. }
| &NodeKey::Task { .. }
| &NodeKey::DownloadedFile { .. } => None,
}
Expand Down Expand Up @@ -1352,7 +1281,6 @@ impl NodeKey {
NodeKey::Task(ref task) => &task.task.as_ref().display_info.name,
NodeKey::ExecuteProcess(..) => "process",
NodeKey::Snapshot(..) => "snapshot",
NodeKey::Paths(..) => "paths",
NodeKey::DigestFile(..) => "digest_file",
NodeKey::DownloadedFile(..) => "downloaded_file",
NodeKey::ReadLink(..) => "read_link",
Expand Down Expand Up @@ -1394,7 +1322,6 @@ impl NodeKey {
Some(desc)
}
NodeKey::Snapshot(ref s) => Some(format!("Snapshotting: {}", s.path_globs)),
NodeKey::Paths(ref s) => Some(format!("Finding files: {}", s.path_globs)),
NodeKey::ExecuteProcess(epr) => {
// NB: See Self::workunit_level for more information on why this is prefixed.
Some(format!("Scheduling: {}", epr.process.description))
Expand Down Expand Up @@ -1495,7 +1422,6 @@ impl Node for NodeKey {
NodeKey::Scandir(n) => n.run_node(context).await.map(NodeOutput::DirectoryListing),
NodeKey::Select(n) => n.run_node(context).await.map(NodeOutput::Value),
NodeKey::Snapshot(n) => n.run_node(context).await.map(NodeOutput::Snapshot),
NodeKey::Paths(n) => n.run_node(context).await.map(NodeOutput::Paths),
NodeKey::SessionValues(n) => n.run_node(context).await.map(NodeOutput::Value),
NodeKey::RunId(n) => n.run_node(context).await.map(NodeOutput::Value),
NodeKey::Task(n) => n.run_node(context, workunit).await.map(NodeOutput::Value),
Expand Down Expand Up @@ -1615,7 +1541,6 @@ impl Display for NodeKey {
NodeKey::Snapshot(s) => write!(f, "Snapshot({})", s.path_globs),
&NodeKey::SessionValues(_) => write!(f, "SessionValues"),
&NodeKey::RunId(_) => write!(f, "RunId"),
NodeKey::Paths(s) => write!(f, "Paths({})", s.path_globs),
}
}
}
Expand All @@ -1637,10 +1562,6 @@ pub enum NodeOutput {
DirectoryListing(Arc<DirectoryListing>),
LinkDest(LinkDest),
ProcessResult(Box<ProcessResult>),
// Allow clippy::rc_buffer due to non-trivial issues that would arise in using the
// suggested Arc<[PathStat]> type. See https://github.com/rust-lang/rust-clippy/issues/6170
#[allow(clippy::rc_buffer)]
Paths(Arc<Vec<PathStat>>),
Value(Value),
}

Expand All @@ -1661,10 +1582,7 @@ impl NodeOutput {
digests.push(p.result.stderr_digest);
digests
}
NodeOutput::DirectoryListing(_)
| NodeOutput::LinkDest(_)
| NodeOutput::Paths(_)
| NodeOutput::Value(_) => vec![],
NodeOutput::DirectoryListing(_) | NodeOutput::LinkDest(_) | NodeOutput::Value(_) => vec![],
}
}
}
Expand Down Expand Up @@ -1702,17 +1620,6 @@ impl TryFrom<NodeOutput> for store::Snapshot {
}
}

impl TryFrom<NodeOutput> for Arc<Vec<PathStat>> {
type Error = ();

fn try_from(nr: NodeOutput) -> Result<Self, ()> {
match nr {
NodeOutput::Paths(v) => Ok(v),
_ => Err(()),
}
}
}

impl TryFrom<NodeOutput> for ProcessResult {
type Error = ();

Expand Down

0 comments on commit 68b5b42

Please sign in to comment.