From e76a3efbc8eea1cdf4044fee924ad5116722991e Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Tue, 27 Jun 2023 10:05:31 +0100 Subject: [PATCH] feat(supportability/loki): add loki dump subcommand Signed-off-by: Tiago Castro --- k8s/supportability/src/collect/system_dump.rs | 87 +++++++++++-------- k8s/supportability/src/lib.rs | 17 ++++ k8s/supportability/src/operations.rs | 4 + 3 files changed, 71 insertions(+), 37 deletions(-) diff --git a/k8s/supportability/src/collect/system_dump.rs b/k8s/supportability/src/collect/system_dump.rs index 10bd32f3f..fba9e666b 100644 --- a/k8s/supportability/src/collect/system_dump.rs +++ b/k8s/supportability/src/collect/system_dump.rs @@ -5,11 +5,11 @@ use crate::{ constants::MAYASTOR_SERVICE, error::Error, k8s_resources::k8s_resource_dump::K8sResourceDumperClient, - logs::{LogCollection, LogResource, Logger}, + logs::{LogCollection, LogError, LogResource, Logger}, persistent_store::etcd::EtcdStore, resources::{ - node::NodeClientWrapper, pool::PoolClientWrapper, volume::VolumeClientWrapper, - Resourcer, + node::NodeClientWrapper, pool::PoolClientWrapper, traits::Topologer, + volume::VolumeClientWrapper, Resourcer, }, rest_wrapper::RestClient, utils::{flush_tool_log_file, init_tool_log_file, write_to_log_file}, @@ -117,6 +117,45 @@ impl SystemDumper { } } + /// Collect and dump loki logs. + pub(crate) async fn collect_and_dump_loki_logs( + &mut self, + node_topologer: Option>, + ) -> Result<(), LogError> { + // Fetch required logging resources + let mut resources = self.logger.get_control_plane_logging_services().await?; + resources.extend(self.logger.get_data_plane_logging_services().await?); + resources.extend(self.logger.get_upgrade_logging_services().await?); + resources.extend(self.logger.get_callhome_logging_services().await?); + + // NOTE: MAYASTOR-IO services will not be available when MAYASTOR-IO pod is down. + // Lets add information from mayastor node resources. + if let Some(topologer) = node_topologer { + topologer + .get_all_resource_info() + .iter() + .for_each(|node_topo| { + resources.insert(LogResource { + container_name: node_topo.get_container_name(), + host_name: Some(node_topo.get_host_name()), + label_selector: node_topo.get_label_selector().as_string(','), + service_type: MAYASTOR_SERVICE.to_string(), + }); + }); + } + + let _ = write_to_log_file(format!( + "Collecting logs of following services: \n {resources:#?}" + )); + + log("Collecting logs...".to_string()); + self.logger + .fetch_and_dump_logs(resources, self.dir_path.clone()) + .await?; + log("Completed collection of logs".to_string()); + Ok(()) + } + /// Dumps the state of the system pub(crate) async fn dump_system(&mut self) -> Result<(), Error> { let mut errors: Vec = Vec::new(); @@ -177,42 +216,11 @@ impl SystemDumper { }; log("Completed collection of topology information".to_string()); - // Fetch required logging resources - let mut resources = self.logger.get_control_plane_logging_services().await?; - resources.extend(self.logger.get_data_plane_logging_services().await?); - resources.extend(self.logger.get_upgrade_logging_services().await?); - resources.extend(self.logger.get_callhome_logging_services().await?); - // NOTE: MAYASTOR-IO services will not be available when MAYASTOR-IO pod is down. - // Lets add information from mayastor node resources. - if let Some(topologer) = node_topologer { - topologer - .get_all_resource_info() - .iter() - .for_each(|node_topo| { - resources.insert(LogResource { - container_name: node_topo.get_container_name(), - host_name: Some(node_topo.get_host_name()), - label_selector: node_topo.get_label_selector().as_string(','), - service_type: MAYASTOR_SERVICE.to_string(), - }); - }); + if let Err(error) = self.collect_and_dump_loki_logs(node_topologer).await { + log("Error occurred while collecting logs".to_string()); + errors.push(Error::LogCollectionError(error)); } - let _ = write_to_log_file(format!( - "Collecting logs of following services: \n {resources:#?}" - )); - - log("Collecting logs...".to_string()); - let _ = self - .logger - .fetch_and_dump_logs(resources, self.dir_path.clone()) - .await - .map_err(|e| { - log("Error occurred while collecting logs".to_string()); - errors.push(Error::LogCollectionError(e)); - }); - log("Completed collection of logs".to_string()); - log("Collecting Kubernetes resources specific to mayastor service".to_string()); let _ = self .k8s_resource_dumper @@ -268,6 +276,11 @@ impl SystemDumper { Ok(()) } + /// Get the rest client clone. + pub(crate) fn rest_client(&self) -> RestClient { + self.rest_client.clone() + } + fn delete_temporary_directory(&self) -> Result<(), Error> { std::fs::remove_dir_all(self.dir_path.clone())?; Ok(()) diff --git a/k8s/supportability/src/lib.rs b/k8s/supportability/src/lib.rs index f18cff25f..8472dcd1f 100644 --- a/k8s/supportability/src/lib.rs +++ b/k8s/supportability/src/lib.rs @@ -118,6 +118,23 @@ impl SupportArgs { }; let mut errors = Vec::new(); match resource { + Resource::Loki => { + let mut system_dumper = + collect::system_dump::SystemDumper::get_or_panic_system_dumper(config).await; + let node_topologer = NodeClientWrapper::new(system_dumper.rest_client()) + .get_topologer(None) + .await + .ok(); + log("Completed collection of topology information".to_string()); + + system_dumper + .collect_and_dump_loki_logs(node_topologer) + .await?; + if let Err(e) = system_dumper.fill_archive_and_delete_tmp() { + log(format!("Failed to copy content to archive, error: {e:?}")); + errors.push(e); + } + } Resource::System => { let mut system_dumper = collect::system_dump::SystemDumper::get_or_panic_system_dumper(config).await; diff --git a/k8s/supportability/src/operations.rs b/k8s/supportability/src/operations.rs index 8f2eb1a6b..9a460d1c9 100644 --- a/k8s/supportability/src/operations.rs +++ b/k8s/supportability/src/operations.rs @@ -53,4 +53,8 @@ pub(crate) enum Resource { #[clap(long)] stdout: bool, }, + + /// Collects the Loki logs from the product's components + #[clap(hide = true)] + Loki, }