From c98aa558c7a924129e83e2246f53a93bf6805b52 Mon Sep 17 00:00:00 2001 From: Dominic Burkart Date: Tue, 13 Jun 2023 14:39:54 +0200 Subject: [PATCH 1/5] feat(error code when shutdown fails): set exit flag to failure on shutdown --- lib/vector-vrl/cli/src/main.rs | 5 +++-- src/app.rs | 17 +++++++++++------ src/main.rs | 10 ++++++---- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/lib/vector-vrl/cli/src/main.rs b/lib/vector-vrl/cli/src/main.rs index 5f08c6f09b26f..c02ae28b1ade3 100644 --- a/lib/vector-vrl/cli/src/main.rs +++ b/lib/vector-vrl/cli/src/main.rs @@ -1,9 +1,10 @@ use clap::Parser; use vrl::cli::{cmd::cmd, Opts}; +use std::process::ExitCode; -fn main() { +fn main() -> ExitCode { let mut functions = vrl::stdlib::all(); functions.extend(vector_vrl_functions::all()); - std::process::exit(cmd(&Opts::parse(), functions)); + cmd(&Opts::parse(), functions) } diff --git a/src/app.rs b/src/app.rs index e599499afe3c9..eeb7821b88198 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1,5 +1,8 @@ #![allow(missing_docs)] -use std::{collections::HashMap, num::NonZeroUsize, path::PathBuf, time::Duration}; +use std::{ + collections::HashMap, num::NonZeroUsize, path::PathBuf, process::ExitCode as Exit, + time::Duration, +}; use exitcode::ExitCode; use futures::StreamExt; @@ -145,10 +148,10 @@ impl ApplicationConfig { } impl Application { - pub fn run() { + pub fn run() -> Exit { let (runtime, app) = Self::prepare_start().unwrap_or_else(|code| std::process::exit(code)); - runtime.block_on(app.run()); + runtime.block_on(app.run()) } pub fn prepare_start() -> Result<(Runtime, StartedApplication), ExitCode> { @@ -242,7 +245,7 @@ pub struct StartedApplication { } impl StartedApplication { - pub async fn run(self) { + pub async fn run(self) -> Exit { self.main().await.shutdown().await } @@ -317,7 +320,7 @@ pub struct FinishedApplication { } impl FinishedApplication { - pub async fn shutdown(self) { + pub async fn shutdown(self) -> Exit { let FinishedApplication { signal, mut signal_rx, @@ -335,11 +338,12 @@ impl FinishedApplication { SignalTo::Shutdown => { emit!(VectorStopped); tokio::select! { - _ = topology_controller.stop() => (), // Graceful shutdown finished + _ = topology_controller.stop() => Exit::SUCCESS, // Graceful shutdown finished _ = signal_rx.recv() => { // It is highly unlikely that this event will exit from topology. emit!(VectorQuit); // Dropping the shutdown future will immediately shut the server down + Exit::FAILURE } } } @@ -347,6 +351,7 @@ impl FinishedApplication { // It is highly unlikely that this event will exit from topology. emit!(VectorQuit); drop(topology_controller); + Exit::FAILURE } _ => unreachable!(), } diff --git a/src/main.rs b/src/main.rs index 1859eff381c06..7efb8017dac32 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,8 +3,10 @@ extern crate vector; use vector::app::Application; +use std::process::ExitCode; + #[cfg(unix)] -fn main() { +fn main() -> ExitCode { #[cfg(feature = "allocation-tracing")] { use crate::vector::internal_telemetry::allocations::{ @@ -35,14 +37,14 @@ fn main() { } } - Application::run(); + Application::run() } #[cfg(windows)] -pub fn main() { +pub fn main() -> ExitCode { // We need to be able to run vector in User Interactive mode. We first try // to run vector as a service. If we fail, we consider that we are in // interactive mode and then fallback to console mode. See // https://docs.microsoft.com/en-us/dotnet/api/system.environment.userinteractive?redirectedfrom=MSDN&view=netcore-3.1#System_Environment_UserInteractive - vector::vector_windows::run().unwrap_or_else(|_| Application::run()); + vector::vector_windows::run().unwrap_or_else(|_| Application::run()) } From 2c354edf8e5cc9e5da8c4dbfc3721425d24d4e9d Mon Sep 17 00:00:00 2001 From: Dominic Burkart Date: Tue, 13 Jun 2023 15:33:21 +0200 Subject: [PATCH 2/5] revert cli changes as the exit code is already propagated --- lib/vector-vrl/cli/src/main.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/vector-vrl/cli/src/main.rs b/lib/vector-vrl/cli/src/main.rs index c02ae28b1ade3..5f08c6f09b26f 100644 --- a/lib/vector-vrl/cli/src/main.rs +++ b/lib/vector-vrl/cli/src/main.rs @@ -1,10 +1,9 @@ use clap::Parser; use vrl::cli::{cmd::cmd, Opts}; -use std::process::ExitCode; -fn main() -> ExitCode { +fn main() { let mut functions = vrl::stdlib::all(); functions.extend(vector_vrl_functions::all()); - cmd(&Opts::parse(), functions) + std::process::exit(cmd(&Opts::parse(), functions)); } From 088861e216c51686d8360a8cf6578fbfc4da3367 Mon Sep 17 00:00:00 2001 From: Dominic Burkart Date: Mon, 19 Jun 2023 15:30:54 +0200 Subject: [PATCH 3/5] return error on windows if graceful shutdown fails --- src/vector_windows.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/vector_windows.rs b/src/vector_windows.rs index 0473dfc71c364..4410f63600076 100644 --- a/src/vector_windows.rs +++ b/src/vector_windows.rs @@ -17,6 +17,7 @@ const SERVICE_NAME: &str = "vector"; const SERVICE_TYPE: ServiceType = ServiceType::OWN_PROCESS; const NO_ERROR: u32 = 0; +const ERROR: u32 = 121; pub mod service_control { use std::{ffi::OsString, fmt, fmt::Formatter, time::Duration}; @@ -398,14 +399,21 @@ fn run_service(_arguments: Vec) -> Result<()> { process_id: None, })?; - runtime.block_on(app.run()); + let program_completion_status = runtime.block_on(app.run()); // Tell the system that service has stopped. status_handle.set_service_status(ServiceStatus { service_type: SERVICE_TYPE, current_state: ServiceState::Stopped, controls_accepted: ServiceControlAccept::empty(), - exit_code: ServiceExitCode::Win32(NO_ERROR), + exit_code: { + if program_completion_status.id() == 0 { + ServiceExitCode::Win32(NO_ERROR) + } else { + // we could not gracefully shut down in time, likely due to timeout. + ServiceExitCode::Win32(ERROR) + } + }, checkpoint: 0, wait_hint: Duration::default(), process_id: None, From 3fc8daba8186e6e59b6f08f4c008d2aeeb44e255 Mon Sep 17 00:00:00 2001 From: Dominic Burkart Date: Tue, 20 Jun 2023 10:09:13 +0200 Subject: [PATCH 4/5] refactor internal of ExitCode to ExitStatus to support the windows service api --- src/app.rs | 20 ++++++++++++-------- src/main.rs | 7 +++++-- src/vector_windows.rs | 9 +++++---- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/app.rs b/src/app.rs index eeb7821b88198..6dca25555f3c7 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1,7 +1,6 @@ #![allow(missing_docs)] use std::{ - collections::HashMap, num::NonZeroUsize, path::PathBuf, process::ExitCode as Exit, - time::Duration, + collections::HashMap, num::NonZeroUsize, path::PathBuf, process::ExitStatus, time::Duration, }; use exitcode::ExitCode; @@ -35,6 +34,11 @@ use crate::{ trace, }; +#[cfg(unix)] +use std::os::unix::process::ExitStatusExt; +#[cfg(windows)] +use std::os::windows::process::ExitStatusExt; + pub static WORKER_THREADS: OnceNonZeroUsize = OnceNonZeroUsize::new(); use crate::internal_events::{VectorQuit, VectorStarted, VectorStopped}; @@ -148,7 +152,7 @@ impl ApplicationConfig { } impl Application { - pub fn run() -> Exit { + pub fn run() -> ExitStatus { let (runtime, app) = Self::prepare_start().unwrap_or_else(|code| std::process::exit(code)); runtime.block_on(app.run()) @@ -245,7 +249,7 @@ pub struct StartedApplication { } impl StartedApplication { - pub async fn run(self) -> Exit { + pub async fn run(self) -> ExitStatus { self.main().await.shutdown().await } @@ -320,7 +324,7 @@ pub struct FinishedApplication { } impl FinishedApplication { - pub async fn shutdown(self) -> Exit { + pub async fn shutdown(self) -> ExitStatus { let FinishedApplication { signal, mut signal_rx, @@ -338,12 +342,12 @@ impl FinishedApplication { SignalTo::Shutdown => { emit!(VectorStopped); tokio::select! { - _ = topology_controller.stop() => Exit::SUCCESS, // Graceful shutdown finished + _ = topology_controller.stop() => ExitStatus::from_raw(exitcode::OK as u32), // Graceful shutdown finished _ = signal_rx.recv() => { // It is highly unlikely that this event will exit from topology. emit!(VectorQuit); // Dropping the shutdown future will immediately shut the server down - Exit::FAILURE + ExitStatus::from_raw(exitcode::UNAVAILABLE as u32) } } } @@ -351,7 +355,7 @@ impl FinishedApplication { // It is highly unlikely that this event will exit from topology. emit!(VectorQuit); drop(topology_controller); - Exit::FAILURE + ExitStatus::from_raw(exitcode::UNAVAILABLE as u32) } _ => unreachable!(), } diff --git a/src/main.rs b/src/main.rs index 7efb8017dac32..66818155ab9ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -37,7 +37,8 @@ fn main() -> ExitCode { } } - Application::run() + let exit_code = Application::run().code().unwrap_or(exitcode::UNAVAILABLE) as u8; + ExitCode::from(exit_code) } #[cfg(windows)] @@ -46,5 +47,7 @@ pub fn main() -> ExitCode { // to run vector as a service. If we fail, we consider that we are in // interactive mode and then fallback to console mode. See // https://docs.microsoft.com/en-us/dotnet/api/system.environment.userinteractive?redirectedfrom=MSDN&view=netcore-3.1#System_Environment_UserInteractive - vector::vector_windows::run().unwrap_or_else(|_| Application::run()) + let exit_code = vector::vector_windows::run() + .unwrap_or_else(|_| Application::run().code().unwrap_or(exitcode::UNAVAILABLE)); + ExitCode::from(exit_code as u8) } diff --git a/src/vector_windows.rs b/src/vector_windows.rs index 4410f63600076..5ba3548ce5a25 100644 --- a/src/vector_windows.rs +++ b/src/vector_windows.rs @@ -362,8 +362,9 @@ fn win_main(arguments: Vec) { if let Err(_e) = run_service(arguments) {} } -pub fn run() -> Result<()> { - service_dispatcher::start(SERVICE_NAME, ffi_service_main) +pub fn run() -> Result { + service_dispatcher::start(SERVICE_NAME, ffi_service_main).map(|()| 0_i32) + // Always returns 0 exit code as errors are handled by the service dispatcher. } fn run_service(_arguments: Vec) -> Result<()> { @@ -407,10 +408,10 @@ fn run_service(_arguments: Vec) -> Result<()> { current_state: ServiceState::Stopped, controls_accepted: ServiceControlAccept::empty(), exit_code: { - if program_completion_status.id() == 0 { + if program_completion_status.success() { ServiceExitCode::Win32(NO_ERROR) } else { - // we could not gracefully shut down in time, likely due to timeout. + // we didn't gracefully shutdown within grace period. ServiceExitCode::Win32(ERROR) } }, From 4f9a0a12e6b6311e7480f87d5bc45011d9a192ac Mon Sep 17 00:00:00 2001 From: Dominic Burkart Date: Tue, 20 Jun 2023 14:53:25 +0200 Subject: [PATCH 5/5] fix incorrect casts --- src/app.rs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/app.rs b/src/app.rs index 6dca25555f3c7..248fbe78b905d 100644 --- a/src/app.rs +++ b/src/app.rs @@ -342,20 +342,42 @@ impl FinishedApplication { SignalTo::Shutdown => { emit!(VectorStopped); tokio::select! { - _ = topology_controller.stop() => ExitStatus::from_raw(exitcode::OK as u32), // Graceful shutdown finished + _ = topology_controller.stop() => ExitStatus::from_raw({ + #[cfg(windows)] + { + exitcode::OK as u32 + } + #[cfg(unix)] + exitcode::OK + }), // Graceful shutdown finished _ = signal_rx.recv() => { // It is highly unlikely that this event will exit from topology. emit!(VectorQuit); // Dropping the shutdown future will immediately shut the server down - ExitStatus::from_raw(exitcode::UNAVAILABLE as u32) + ExitStatus::from_raw({ + #[cfg(windows)] + { + exitcode::UNAVAILABLE as u32 + } + #[cfg(unix)] + exitcode::OK + }) } + } } SignalTo::Quit => { // It is highly unlikely that this event will exit from topology. emit!(VectorQuit); drop(topology_controller); - ExitStatus::from_raw(exitcode::UNAVAILABLE as u32) + ExitStatus::from_raw({ + #[cfg(windows)] + { + exitcode::UNAVAILABLE as u32 + } + #[cfg(unix)] + exitcode::OK + }) } _ => unreachable!(), }