Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option for max wait before terminating interactive process (Cherry-pick of #15767) #15908

Merged
merged 1 commit into from
Jun 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/python/pants/engine/internals/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def __init__(
remote_parallelism=execution_options.process_execution_remote_parallelism,
child_max_memory=execution_options.process_total_child_memory_usage or 0,
child_default_memory=execution_options.process_per_child_memory_usage,
graceful_shutdown_timeout=execution_options.process_execution_graceful_shutdown_timeout,
)

self._py_scheduler = native_engine.scheduler_create(
Expand Down
14 changes: 14 additions & 0 deletions src/python/pants/option/global_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ class ExecutionOptions:
process_execution_local_enable_nailgun: bool
process_execution_remote_parallelism: int
process_execution_cache_namespace: str | None
process_execution_graceful_shutdown_timeout: int

process_total_child_memory_usage: int | None
process_per_child_memory_usage: int
Expand Down Expand Up @@ -383,6 +384,7 @@ def from_options(
process_execution_local_parallelism=bootstrap_options.process_execution_local_parallelism,
process_execution_remote_parallelism=dynamic_remote_options.parallelism,
process_execution_cache_namespace=bootstrap_options.process_execution_cache_namespace,
process_execution_graceful_shutdown_timeout=bootstrap_options.process_execution_graceful_shutdown_timeout,
process_execution_local_enable_nailgun=bootstrap_options.process_execution_local_enable_nailgun,
process_total_child_memory_usage=bootstrap_options.process_total_child_memory_usage,
process_per_child_memory_usage=bootstrap_options.process_per_child_memory_usage,
Expand Down Expand Up @@ -469,6 +471,7 @@ def from_options(cls, options: OptionValueContainer) -> LocalStoreOptions:
process_cleanup=True,
local_cache=True,
process_execution_local_enable_nailgun=True,
process_execution_graceful_shutdown_timeout=3,
# Remote store setup.
remote_store_address=None,
remote_store_headers={
Expand Down Expand Up @@ -1144,6 +1147,17 @@ class BootstrapOptions:
help="Whether or not to use nailgun to run JVM requests that are marked as supporting nailgun.",
advanced=True,
)
process_execution_graceful_shutdown_timeout = IntOption(
"--process-execution-graceful-shutdown-timeout",
default=DEFAULT_EXECUTION_OPTIONS.process_execution_graceful_shutdown_timeout,
help=softwrap(
f"""
The time in seconds to wait when gracefully shutting down an interactive process (such
as one opened using `{bin_name()} run`) before killing it.
"""
),
advanced=True,
)
remote_execution = BoolOption(
"--remote-execution",
default=DEFAULT_EXECUTION_OPTIONS.remote_execution,
Expand Down
12 changes: 7 additions & 5 deletions src/rust/engine/process_execution/src/children.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ use nix::unistd::getpgid;
use nix::unistd::Pid;
use tokio::process::{Child, Command};

// We keep this relatively low to start to encourage interactivity. If users end up needing longer graceful
// shutdown periods, we can expose it as a per-process setting.
const GRACEFUL_SHUTDOWN_MAX_WAIT_TIME: time::Duration = time::Duration::from_secs(3);
const GRACEFUL_SHUTDOWN_POLL_TIME: time::Duration = time::Duration::from_millis(50);

/// A child process running in its own PGID, with a drop implementation that will kill that
Expand All @@ -20,11 +17,15 @@ const GRACEFUL_SHUTDOWN_POLL_TIME: time::Duration = time::Duration::from_millis(
/// signals in sequence for https://github.com/pantsbuild/pants/issues/13230.
pub struct ManagedChild {
child: Child,
graceful_shutdown_timeout: time::Duration,
killed: AtomicBool,
}

impl ManagedChild {
pub fn spawn(mut command: Command) -> Result<Self, String> {
pub fn spawn(
mut command: Command,
graceful_shutdown_timeout: time::Duration,
) -> Result<Self, String> {
// Set `kill_on_drop` to encourage `tokio` to `wait` the process via its own "reaping"
// mechanism:
// see https://docs.rs/tokio/1.14.0/tokio/process/struct.Command.html#method.kill_on_drop
Expand All @@ -49,6 +50,7 @@ impl ManagedChild {
.map_err(|e| format!("Error executing interactive process: {}", e))?;
Ok(Self {
child,
graceful_shutdown_timeout,
killed: AtomicBool::new(false),
})
}
Expand Down Expand Up @@ -115,7 +117,7 @@ impl ManagedChild {
/// This method *will* block the current thread but will do so for a bounded amount of time.
pub fn graceful_shutdown_sync(&mut self) -> Result<(), String> {
self.signal_pg(signal::Signal::SIGINT)?;
match self.wait_for_child_exit_sync(GRACEFUL_SHUTDOWN_MAX_WAIT_TIME) {
match self.wait_for_child_exit_sync(self.graceful_shutdown_timeout) {
Ok(true) => {
// process was gracefully shutdown
self.killed.store(true, Ordering::SeqCst);
Expand Down
3 changes: 3 additions & 0 deletions src/rust/engine/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pub struct Core {
pub watcher: Option<Arc<InvalidationWatcher>>,
pub build_root: PathBuf,
pub local_parallelism: usize,
pub graceful_shutdown_timeout: Duration,
pub sessions: Sessions,
pub named_caches_dir: PathBuf,
}
Expand Down Expand Up @@ -106,6 +107,7 @@ pub struct ExecutionStrategyOptions {
pub remote_cache_write: bool,
pub child_max_memory: usize,
pub child_default_memory: usize,
pub graceful_shutdown_timeout: Duration,
}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -497,6 +499,7 @@ impl Core {
build_root,
watcher,
local_parallelism: exec_strategy_opts.local_parallelism,
graceful_shutdown_timeout: exec_strategy_opts.graceful_shutdown_timeout,
sessions,
named_caches_dir,
})
Expand Down
2 changes: 2 additions & 0 deletions src/rust/engine/src/externs/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ impl PyExecutionStrategyOptions {
remote_cache_write: bool,
child_default_memory: usize,
child_max_memory: usize,
graceful_shutdown_timeout: usize,
) -> Self {
Self(ExecutionStrategyOptions {
local_parallelism,
Expand All @@ -259,6 +260,7 @@ impl PyExecutionStrategyOptions {
remote_cache_write,
child_default_memory,
child_max_memory,
graceful_shutdown_timeout: Duration::from_secs(graceful_shutdown_timeout.try_into().unwrap()),
})
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/rust/engine/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ fn interactive_process(
.try_clone_as_file()
.map_err(|e| format!("Couldn't clone stderr: {}", e))?,
));
let mut subprocess = ManagedChild::spawn(command)?;
let mut subprocess = ManagedChild::spawn(command, context.core.graceful_shutdown_timeout)?;
tokio::select! {
_ = session.cancelled() => {
// The Session was cancelled: attempt to kill the process group / process, and
Expand Down