Skip to content

Commit

Permalink
Merge pull request #2079 from jacderida/fix-node_man-local_status
Browse files Browse the repository at this point in the history
fix: [#2032] `local status` cmd reports status correctly
  • Loading branch information
joshuef authored Sep 1, 2024
2 parents 4340ddd + a9c6d88 commit e5e554a
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 37 deletions.
1 change: 1 addition & 0 deletions node-launchpad/src/components/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ impl Status {
&ServiceController {},
false,
true,
false,
)
.await?;
node_registry.save()?;
Expand Down
1 change: 1 addition & 0 deletions sn_node_manager/src/cmd/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ pub async fn status(details: bool, fail: bool, json: bool) -> Result<()> {
details,
json,
fail,
true,
)
.await?;
local_node_registry.save()?;
Expand Down
6 changes: 6 additions & 0 deletions sn_node_manager/src/cmd/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ pub async fn balance(
&ServiceController {},
verbosity != VerbosityLevel::Minimal,
false,
false,
)
.await?;

Expand Down Expand Up @@ -227,6 +228,7 @@ pub async fn remove(
&ServiceController {},
verbosity != VerbosityLevel::Minimal,
false,
false,
)
.await?;

Expand Down Expand Up @@ -313,6 +315,7 @@ pub async fn start(
&ServiceController {},
verbosity != VerbosityLevel::Minimal,
false,
false,
)
.await?;

Expand Down Expand Up @@ -383,6 +386,7 @@ pub async fn status(details: bool, fail: bool, json: bool) -> Result<()> {
details,
json,
fail,
false,
)
.await?;
node_registry.save()?;
Expand All @@ -406,6 +410,7 @@ pub async fn stop(
&ServiceController {},
verbosity != VerbosityLevel::Minimal,
false,
false,
)
.await?;

Expand Down Expand Up @@ -481,6 +486,7 @@ pub async fn upgrade(
&ServiceController {},
verbosity != VerbosityLevel::Minimal,
false,
false,
)
.await?;

Expand Down
113 changes: 76 additions & 37 deletions sn_node_manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ impl From<u8> for VerbosityLevel {
use crate::error::{Error, Result};
use colored::Colorize;
use semver::Version;
use sn_service_management::rpc::RpcActions;
use sn_service_management::{
control::ServiceControl, error::Error as ServiceError, rpc::RpcClient, NodeRegistry,
NodeService, NodeServiceData, ServiceStateActions, ServiceStatus, UpgradeOptions,
Expand Down Expand Up @@ -376,8 +377,16 @@ pub async fn status_report(
detailed_view: bool,
output_json: bool,
fail: bool,
is_local_network: bool,
) -> Result<()> {
refresh_node_registry(node_registry, service_control, !output_json, true).await?;
refresh_node_registry(
node_registry,
service_control,
!output_json,
true,
is_local_network,
)
.await?;

if output_json {
let json = serde_json::to_string_pretty(&node_registry.to_status_summary())?;
Expand Down Expand Up @@ -516,18 +525,23 @@ pub async fn status_report(

/// Refreshes the status of the node registry's services.
///
/// At a minimum, the refresh determines if each service is running. It does that by trying to find
/// a process whose binary path matches the path of the binary for the service. Since each service
/// uses its own binary, the path is a unique identifer. So you can know if any *particular*
/// service is running or not.
/// The mechanism is different, depending on whether it's a service-based network or a local
/// network.
///
/// A full refresh uses the RPC client to connect to the node's RPC service to determine things
/// like the number of connected peers.
/// For a service-based network, at a minimum, the refresh determines if each service is running.
/// It does that by trying to find a process whose binary path matches the path of the binary for
/// the service. Since each service uses its own binary, the path is a unique identifer. So you can
/// know if any *particular* service is running or not. A full refresh uses the RPC client to
/// connect to the node's RPC service to determine things like the number of connected peers.
///
/// For a local network, the node paths are not unique, so we can't use that. We consider the node
/// running if we can connect to its RPC service; otherwise it is considered stopped.
pub async fn refresh_node_registry(
node_registry: &mut NodeRegistry,
service_control: &dyn ServiceControl,
print_refresh_message: bool,
full_refresh: bool,
is_local_network: bool,
) -> Result<()> {
// This message is useful for users, but needs to be suppressed when a JSON output is
// requested.
Expand All @@ -553,37 +567,62 @@ pub async fn refresh_node_registry(

let mut rpc_client = RpcClient::from_socket_addr(node.rpc_socket_addr);
rpc_client.set_max_attempts(1);
let mut service = NodeService::new(node, Box::new(rpc_client));
match service_control.get_process_pid(&service.bin_path()) {
Ok(pid) => {
debug!(
"{} is running with PID {pid}",
service.service_data.service_name
);
service.on_start(Some(pid), full_refresh).await?;
let mut service = NodeService::new(node, Box::new(rpc_client.clone()));

if is_local_network {
// For a local network, retrieving the process by its path does not work, because the
// paths are not unique: they are all launched from the same binary. Instead we will
// just determine whether the node is running by connecting to its RPC service. We
// only need to distinguish between `RUNNING` and `STOPPED` for a local network.
match rpc_client.node_info().await {
Ok(info) => {
let pid = info.pid;
debug!(
"local node {} is running with PID {pid}",
service.service_data.service_name
);
service.on_start(Some(pid), full_refresh).await?;
}
Err(_) => {
debug!(
"Failed to retrieve PID for local node {}",
service.service_data.service_name
);
service.on_stop().await?;
}
}
Err(_) => {
match service.status() {
ServiceStatus::Added => {
// If the service is still at `Added` status, there hasn't been an attempt
// to start it since it was installed. It's useful to keep this status
// rather than setting it to `STOPPED`, so that the user can differentiate.
debug!(
"{} has not been started since it was installed",
service.service_data.service_name
);
}
ServiceStatus::Removed => {
// In the case of the service being removed, we want to retain that state
// and not have it marked `STOPPED`.
debug!("{} has been removed", service.service_data.service_name);
}
_ => {
debug!(
"Failed to retrieve PID for {}",
service.service_data.service_name
);
service.on_stop().await?;
} else {
match service_control.get_process_pid(&service.bin_path()) {
Ok(pid) => {
debug!(
"{} is running with PID {pid}",
service.service_data.service_name
);
service.on_start(Some(pid), full_refresh).await?;
}
Err(_) => {
match service.status() {
ServiceStatus::Added => {
// If the service is still at `Added` status, there hasn't been an attempt
// to start it since it was installed. It's useful to keep this status
// rather than setting it to `STOPPED`, so that the user can differentiate.
debug!(
"{} has not been started since it was installed",
service.service_data.service_name
);
}
ServiceStatus::Removed => {
// In the case of the service being removed, we want to retain that state
// and not have it marked `STOPPED`.
debug!("{} has been removed", service.service_data.service_name);
}
_ => {
debug!(
"Failed to retrieve PID for {}",
service.service_data.service_name
);
service.on_stop().await?;
}
}
}
}
Expand Down
1 change: 1 addition & 0 deletions sn_service_management/src/rpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ pub trait RpcActions: Sync {
async fn update_log_level(&self, log_levels: String) -> Result<()>;
}

#[derive(Debug, Clone)]
pub struct RpcClient {
endpoint: String,
max_attempts: u8,
Expand Down

0 comments on commit e5e554a

Please sign in to comment.