Skip to content

Commit

Permalink
feat(supportability/loki): add loki dump subcommand
Browse files Browse the repository at this point in the history
Signed-off-by: Tiago Castro <[email protected]>
  • Loading branch information
tiagolobocastro committed Jun 27, 2023
1 parent 51555a3 commit e76a3ef
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 37 deletions.
87 changes: 50 additions & 37 deletions k8s/supportability/src/collect/system_dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ use crate::{
constants::MAYASTOR_SERVICE,
error::Error,
k8s_resources::k8s_resource_dump::K8sResourceDumperClient,
logs::{LogCollection, LogResource, Logger},
logs::{LogCollection, LogError, LogResource, Logger},
persistent_store::etcd::EtcdStore,
resources::{
node::NodeClientWrapper, pool::PoolClientWrapper, volume::VolumeClientWrapper,
Resourcer,
node::NodeClientWrapper, pool::PoolClientWrapper, traits::Topologer,
volume::VolumeClientWrapper, Resourcer,
},
rest_wrapper::RestClient,
utils::{flush_tool_log_file, init_tool_log_file, write_to_log_file},
Expand Down Expand Up @@ -117,6 +117,45 @@ impl SystemDumper {
}
}

/// Collect and dump loki logs.
pub(crate) async fn collect_and_dump_loki_logs(
&mut self,
node_topologer: Option<Box<dyn Topologer>>,
) -> Result<(), LogError> {
// Fetch required logging resources
let mut resources = self.logger.get_control_plane_logging_services().await?;
resources.extend(self.logger.get_data_plane_logging_services().await?);
resources.extend(self.logger.get_upgrade_logging_services().await?);
resources.extend(self.logger.get_callhome_logging_services().await?);

// NOTE: MAYASTOR-IO services will not be available when MAYASTOR-IO pod is down.
// Lets add information from mayastor node resources.
if let Some(topologer) = node_topologer {
topologer
.get_all_resource_info()
.iter()
.for_each(|node_topo| {
resources.insert(LogResource {
container_name: node_topo.get_container_name(),
host_name: Some(node_topo.get_host_name()),
label_selector: node_topo.get_label_selector().as_string(','),
service_type: MAYASTOR_SERVICE.to_string(),
});
});
}

let _ = write_to_log_file(format!(
"Collecting logs of following services: \n {resources:#?}"
));

log("Collecting logs...".to_string());
self.logger
.fetch_and_dump_logs(resources, self.dir_path.clone())
.await?;
log("Completed collection of logs".to_string());
Ok(())
}

/// Dumps the state of the system
pub(crate) async fn dump_system(&mut self) -> Result<(), Error> {
let mut errors: Vec<Error> = Vec::new();
Expand Down Expand Up @@ -177,42 +216,11 @@ impl SystemDumper {
};
log("Completed collection of topology information".to_string());

// Fetch required logging resources
let mut resources = self.logger.get_control_plane_logging_services().await?;
resources.extend(self.logger.get_data_plane_logging_services().await?);
resources.extend(self.logger.get_upgrade_logging_services().await?);
resources.extend(self.logger.get_callhome_logging_services().await?);
// NOTE: MAYASTOR-IO services will not be available when MAYASTOR-IO pod is down.
// Lets add information from mayastor node resources.
if let Some(topologer) = node_topologer {
topologer
.get_all_resource_info()
.iter()
.for_each(|node_topo| {
resources.insert(LogResource {
container_name: node_topo.get_container_name(),
host_name: Some(node_topo.get_host_name()),
label_selector: node_topo.get_label_selector().as_string(','),
service_type: MAYASTOR_SERVICE.to_string(),
});
});
if let Err(error) = self.collect_and_dump_loki_logs(node_topologer).await {
log("Error occurred while collecting logs".to_string());
errors.push(Error::LogCollectionError(error));
}

let _ = write_to_log_file(format!(
"Collecting logs of following services: \n {resources:#?}"
));

log("Collecting logs...".to_string());
let _ = self
.logger
.fetch_and_dump_logs(resources, self.dir_path.clone())
.await
.map_err(|e| {
log("Error occurred while collecting logs".to_string());
errors.push(Error::LogCollectionError(e));
});
log("Completed collection of logs".to_string());

log("Collecting Kubernetes resources specific to mayastor service".to_string());
let _ = self
.k8s_resource_dumper
Expand Down Expand Up @@ -268,6 +276,11 @@ impl SystemDumper {
Ok(())
}

/// Get the rest client clone.
pub(crate) fn rest_client(&self) -> RestClient {
self.rest_client.clone()
}

fn delete_temporary_directory(&self) -> Result<(), Error> {
std::fs::remove_dir_all(self.dir_path.clone())?;
Ok(())
Expand Down
17 changes: 17 additions & 0 deletions k8s/supportability/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,23 @@ impl SupportArgs {
};
let mut errors = Vec::new();
match resource {
Resource::Loki => {
let mut system_dumper =
collect::system_dump::SystemDumper::get_or_panic_system_dumper(config).await;
let node_topologer = NodeClientWrapper::new(system_dumper.rest_client())
.get_topologer(None)
.await
.ok();
log("Completed collection of topology information".to_string());

system_dumper
.collect_and_dump_loki_logs(node_topologer)
.await?;
if let Err(e) = system_dumper.fill_archive_and_delete_tmp() {
log(format!("Failed to copy content to archive, error: {e:?}"));
errors.push(e);
}
}
Resource::System => {
let mut system_dumper =
collect::system_dump::SystemDumper::get_or_panic_system_dumper(config).await;
Expand Down
4 changes: 4 additions & 0 deletions k8s/supportability/src/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,8 @@ pub(crate) enum Resource {
#[clap(long)]
stdout: bool,
},

/// Collects the Loki logs from the product's components
#[clap(hide = true)]
Loki,
}

0 comments on commit e76a3ef

Please sign in to comment.