From d2cd52188ab0350a194dd4ae8e74f030ebbe3d31 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Tue, 8 Dec 2020 20:33:58 -0800 Subject: [PATCH] Revert "Move graph cycle detection to rust. (#11202)" (cherrypick of #11272) (#11277) #11202 introduced a lot of contention on the GIL that was only obvious when larger numbers of targets were being built. #11270 is a holistic fix for that issue, but it is currently blocked on #11269. In the meantime, we will land #11271 to dodge the original issue in #11201 to get us back to a medium-slow-but-working state, and then follow up on #11269 and #11270 to get us to the best place. This reverts commit fabcb458d50364e24923ad28cc552f4dbfd8c9f6. [ci skip-build-wheels] --- src/python/pants/engine/internals/graph.py | 45 ++++++++++++---- .../pants/engine/internals/native_engine.pyi | 6 +-- src/rust/engine/src/core.rs | 54 ------------------- src/rust/engine/src/externs/interface.rs | 39 +------------- 4 files changed, 37 insertions(+), 107 deletions(-) diff --git a/src/python/pants/engine/internals/graph.py b/src/python/pants/engine/internals/graph.py index c549347bc27..1724e19d920 100644 --- a/src/python/pants/engine/internals/graph.py +++ b/src/python/pants/engine/internals/graph.py @@ -7,7 +7,7 @@ import os.path from dataclasses import dataclass from pathlib import PurePath -from typing import Dict, Iterable, List, NamedTuple, Optional, Sequence, Tuple, Type +from typing import Dict, Iterable, List, NamedTuple, Optional, Sequence, Set, Tuple, Type from pants.base.exceptions import ResolveError from pants.base.specs import ( @@ -37,7 +37,6 @@ Snapshot, SourcesSnapshot, ) -from pants.engine.internals.native_engine import cyclic_paths from pants.engine.internals.target_adaptor import TargetAdaptor from pants.engine.rules import Get, MultiGet, collect_rules, rule from pants.engine.target import ( @@ -248,18 +247,21 @@ def __init__(self, subject: Address, path: Tuple[Address, ...]) -> None: self.path = path -def _detect_cycles(dependency_mapping: Dict[Address, Tuple[Address, ...]]) -> None: - for path in cyclic_paths(dependency_mapping): - address = path[-1] +def _detect_cycles( + roots: Tuple[Address, ...], dependency_mapping: Dict[Address, Tuple[Address, ...]] +) -> None: + path_stack: OrderedSet[Address] = OrderedSet() + visited: Set[Address] = set() + def maybe_report_cycle(address: Address) -> None: # NB: File-level dependencies are cycle tolerant. - if not address.is_base_target: + if not address.is_base_target or address not in path_stack: return # The path of the cycle is shorter than the entire path to the cycle: if the suffix of # the path representing the cycle contains a file dep, it is ignored. in_cycle = False - for path_address in path: + for path_address in path_stack: if in_cycle and not path_address.is_base_target: # There is a file address inside the cycle: do not report it. return @@ -271,7 +273,27 @@ def _detect_cycles(dependency_mapping: Dict[Address, Tuple[Address, ...]]) -> No # the address in question. in_cycle = path_address == address # If we did not break out early, it's because there were no file addresses in the cycle. - raise CycleException(address, path) + raise CycleException(address, (*path_stack, address)) + + def visit(address: Address): + if address in visited: + maybe_report_cycle(address) + return + path_stack.add(address) + visited.add(address) + + for dep_address in dependency_mapping[address]: + visit(dep_address) + + path_stack.remove(address) + + for root in roots: + visit(root) + if path_stack: + raise AssertionError( + f"The stack of visited nodes should have been empty at the end of recursion, " + f"but it still contained: {path_stack}" + ) @rule(desc="Resolve transitive targets") @@ -310,7 +332,10 @@ async def transitive_targets(request: TransitiveTargetsRequest) -> TransitiveTar ) visited.update(queued) - _detect_cycles(dependency_mapping) + # NB: We use `roots_as_targets` to get the root addresses, rather than `request.roots`. This + # is because expanding from the `Addresses` -> `Targets` may have resulted in generated + # subtargets being used, so we need to use `roots_as_targets` to have this expansion. + _detect_cycles(tuple(t.address for t in roots_as_targets), dependency_mapping) # Apply any transitive excludes (`!!` ignores). transitive_excludes: FrozenOrderedSet[Target] = FrozenOrderedSet() @@ -367,7 +392,7 @@ async def transitive_targets_lite(request: TransitiveTargetsRequestLite) -> Tran # NB: We use `roots_as_targets` to get the root addresses, rather than `request.roots`. This # is because expanding from the `Addresses` -> `Targets` may have resulted in generated # subtargets being used, so we need to use `roots_as_targets` to have this expansion. - _detect_cycles(dependency_mapping) + _detect_cycles(tuple(t.address for t in roots_as_targets), dependency_mapping) # Apply any transitive excludes (`!!` ignores). wrapped_transitive_excludes = await MultiGet( diff --git a/src/python/pants/engine/internals/native_engine.pyi b/src/python/pants/engine/internals/native_engine.pyi index 8a98601c77d..e3e4ec0d75a 100644 --- a/src/python/pants/engine/internals/native_engine.pyi +++ b/src/python/pants/engine/internals/native_engine.pyi @@ -1,13 +1,9 @@ -from typing import Any, Dict, Tuple, TypeVar +from typing import Any # TODO: black and flake8 disagree about the content of this file: # see https://github.com/psf/black/issues/1548 # flake8: noqa: E302 -T = TypeVar("T") - -def cyclic_paths(adjacencies: Dict[T, Tuple[T, ...]]) -> Tuple[Tuple[T]]: ... - class PyDigest: def __init__(self, fingerprint: str, serialized_bytes_length: int) -> None: ... @property diff --git a/src/rust/engine/src/core.rs b/src/rust/engine/src/core.rs index 20d22f765f7..b443fc9314d 100644 --- a/src/rust/engine/src/core.rs +++ b/src/rust/engine/src/core.rs @@ -3,7 +3,6 @@ use fnv::FnvHasher; -use std::collections::HashSet; use std::convert::AsRef; use std::ops::Deref; use std::sync::Arc; @@ -12,7 +11,6 @@ use std::{fmt, hash}; use crate::externs; use cpython::{FromPyObject, PyClone, PyDict, PyErr, PyObject, PyResult, Python, ToPyObject}; -use indexmap::{IndexMap, IndexSet}; use smallvec::SmallVec; pub type FNV = hash::BuildHasherDefault; @@ -412,55 +410,3 @@ pub fn throw(msg: &str) -> Failure { engine_traceback: Vec::new(), } } - -/// -/// Given a graph represented as an adjacency list, return a collection of cyclic paths. -/// -pub fn cyclic_paths(adjacencies: IndexMap>) -> Vec> { - let mut cyclic_paths = Vec::new(); - let mut path_stack = IndexSet::new(); - let mut visited = HashSet::new(); - - for node in adjacencies.keys() { - cyclic_paths_visit( - *node, - &adjacencies, - &mut cyclic_paths, - &mut path_stack, - &mut visited, - ); - } - - cyclic_paths -} - -fn cyclic_paths_visit( - node: N, - adjacencies: &IndexMap>, - cyclic_paths: &mut Vec>, - path_stack: &mut IndexSet, - visited: &mut HashSet, -) { - if visited.contains(&node) { - if path_stack.contains(&node) { - cyclic_paths.push( - path_stack - .iter() - .cloned() - .chain(std::iter::once(node)) - .collect::>(), - ); - } - return; - } - path_stack.insert(node); - visited.insert(node); - - if let Some(adjacent) = adjacencies.get(&node) { - for dep_node in adjacent { - cyclic_paths_visit(*dep_node, adjacencies, cyclic_paths, path_stack, visited); - } - } - - path_stack.remove(&node); -} diff --git a/src/rust/engine/src/externs/interface.rs b/src/rust/engine/src/externs/interface.rs index b443200fa62..071d091443a 100644 --- a/src/rust/engine/src/externs/interface.rs +++ b/src/rust/engine/src/externs/interface.rs @@ -61,7 +61,6 @@ use futures::future::FutureExt; use futures::future::{self as future03, TryFutureExt}; use futures01::Future; use hashing::{Digest, EMPTY_DIGEST}; -use indexmap::IndexMap; use log::{self, debug, error, warn, Log}; use logging::logger::PANTS_LOGGER; use logging::{Destination, Logger, PythonLogLevel}; @@ -73,8 +72,7 @@ use workunit_store::{Workunit, WorkunitState}; use crate::{ externs, nodes, Core, ExecutionRequest, ExecutionStrategyOptions, ExecutionTermination, Failure, - Function, Intrinsics, Key, Params, RemotingOptions, Rule, Scheduler, Session, Tasks, Types, - Value, + Function, Intrinsics, Params, RemotingOptions, Rule, Scheduler, Session, Tasks, Types, Value, }; py_exception!(native_engine, PollTimeout); @@ -87,8 +85,6 @@ py_module_initializer!(native_engine, |py, m| { m.add(py, "default_config_path", py_fn!(py, default_config_path()))?; - m.add(py, "cyclic_paths", py_fn!(py, cyclic_paths(a: PyDict)))?; - m.add( py, "init_logging", @@ -1649,39 +1645,6 @@ fn default_config_path(py: Python) -> CPyResult { }) } -fn cyclic_paths(py: Python, adjacencies: PyDict) -> CPyResult> { - let adjacencies = adjacencies - .items(py) - .into_iter() - .map(|(k, v)| { - let node = externs::key_for(k.into())?; - let adjacent = v - .extract::>(py)? - .into_iter() - .map(|v| externs::key_for(v.into())) - .collect::, _>>()?; - let res: Result<_, PyErr> = Ok((node, adjacent)); - res - }) - .collect::>, _>>()?; - let paths = py.allow_threads(move || crate::core::cyclic_paths(adjacencies)); - - Ok( - paths - .into_iter() - .map(|path| { - let gil = Python::acquire_gil(); - let path_vec = path - .iter() - .map(externs::val_for) - .map(|node| node.consume_into_py_object(gil.python())) - .collect::>(); - PyTuple::new(gil.python(), &path_vec) - }) - .collect(), - ) -} - fn init_logging( py: Python, level: u64,