Skip to content

Commit

Permalink
Pluggable metadata library for generating sample labels (#83)
Browse files Browse the repository at this point in the history
* Add metadata provider scaffold

* Dedup labels, and cache labels for tids

* rm dotfile

* Move system metadata to separate module

* Introduce a TaskMetadata struct

* Rename UniqueLabel -> Label

* Cleanup + comments

* System metadata test

* cargo fmt

* Add tests for task labels

* Cleanup

* More tests

* Cleanup

* Update cargo lock

* Clippy

* .

* .

* Cargo fmt

* Remove dependendency on proto related things from the metadata provider

* .

* .

* Fix various container related things

* Use entrypoint, to allow additional arguments to be passed to the main
  binary.
* Update README.md on how to run the container

Also print the path were the profile was written

* Apply suggestions from code review

* Clippy

* Impl from trait

* rm redundant print added on merge

* fix readme

* Remove process_metadata file for now -- dead code.

---------

Signed-off-by: Okwudili Pat-Nebe <[email protected]>
Co-authored-by: Francisco Javier Honduvilla Coto <[email protected]>
  • Loading branch information
patnebe and javierhonduco authored Oct 25, 2024
1 parent e0cb892 commit 0f063de
Show file tree
Hide file tree
Showing 14 changed files with 370 additions and 34 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ profile.pb
**/bpf/*_skel.rs
.vmtest.log
/result
.vscode/
32 changes: 32 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ edition = "2021"
[workspace]
members = [
"lightswitch-proto",
"lightswitch-capabilities",
"lightswitch-metadata-provider"
]

[dependencies]
Expand Down Expand Up @@ -33,6 +35,7 @@ primal = "0.3.3"
nix = { version = "0.29.0", features = ["user"] }
prost = "0.13" # Needed to encode protocol buffers to bytes.
reqwest = { version = "0.12", features = ["blocking"] }
lightswitch-metadata-provider = {path = "./lightswitch-metadata-provider"}
lightswitch-proto = { path = "./lightswitch-proto"}
lightswitch-capabilities = {path = "./lightswitch-capabilities"}
ctrlc = "3.4.5"
Expand Down
12 changes: 12 additions & 0 deletions lightswitch-metadata-provider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "lightswitch-metadata-provider"
version = "0.1.0"
edition = "2021"

[dependencies]
anyhow = "1.0.86"
lru = "0.12.4"
nix = { version = "0.29.0", features = ["user", "process"] }
procfs = "0.16.0"
tracing = "0.1.40"
thiserror = "1.0.63"
4 changes: 4 additions & 0 deletions lightswitch-metadata-provider/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod metadata_label;
pub mod metadata_provider;
pub mod system_metadata;
pub mod taskname;
28 changes: 28 additions & 0 deletions lightswitch-metadata-provider/src/metadata_label.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum MetadataLabelValue {
String(String),
/// Value and unit.
Number(i64, String),
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct MetadataLabel {
pub key: String,
pub value: MetadataLabelValue,
}

impl MetadataLabel {
pub fn from_string_value(key: String, value: String) -> Self {
MetadataLabel {
key,
value: MetadataLabelValue::String(value),
}
}

pub fn from_number_value(key: String, value: i64, unit: String) -> Self {
MetadataLabel {
key,
value: MetadataLabelValue::Number(value, unit),
}
}
}
138 changes: 138 additions & 0 deletions lightswitch-metadata-provider/src/metadata_provider.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
use crate::metadata_label::MetadataLabel;
use crate::system_metadata::SystemMetadata;
use crate::taskname::TaskName;

use anyhow::Result;
use lru::LruCache;
use std::num::NonZeroUsize;
use std::sync::{Arc, Mutex};
use thiserror::Error;
use tracing::{error, warn};

#[derive(Debug, Error)]
pub enum MetadataProviderError {
#[error("Failed to retrieve metadata for task_id={0}, error={1}")]
ErrorRetrievingMetadata(i32, String),
}

pub trait MetadataProvider {
/// Return a vector of labels for the provided task id.
/// Labels returned by this function will be assumed to apply
/// to all task_ids in the same process/tgid as the provided task_id.
fn get_metadata(&self, task_id: i32) -> Result<Vec<MetadataLabel>, MetadataProviderError>;
}
pub type ThreadSafeMetadataProvider = Arc<Mutex<Box<dyn MetadataProvider + Send>>>;

pub struct GlobalMetadataProvider {
pid_label_cache: LruCache</*pid*/ i32, Vec<MetadataLabel>>,
system_metadata: SystemMetadata,
custom_metadata_providers: Vec<ThreadSafeMetadataProvider>,
}

pub struct TaskKey {
pub pid: i32,
pub tid: i32,
}

pub type ThreadSafeGlobalMetadataProvider = Arc<Mutex<GlobalMetadataProvider>>;

impl Default for GlobalMetadataProvider {
fn default() -> Self {
Self::new(NonZeroUsize::new(1000).unwrap())
}
}

impl GlobalMetadataProvider {
pub fn new(metadata_cache_size: NonZeroUsize) -> Self {
Self {
pid_label_cache: LruCache::new(metadata_cache_size),
system_metadata: SystemMetadata {},
custom_metadata_providers: Vec::new(),
}
}

pub fn register_custom_providers(&mut self, providers: Vec<ThreadSafeMetadataProvider>) {
self.custom_metadata_providers.extend(providers);
}

fn get_labels(&mut self, pid: i32) -> Vec<MetadataLabel> {
let mut labels = self
.system_metadata
.get_metadata()
.map_err(|err| warn!("{}", err))
.unwrap_or_default();

for provider in &self.custom_metadata_providers {
match provider.lock().unwrap().get_metadata(pid) {
Ok(custom_labels) => {
labels.extend(custom_labels.into_iter());
}
Err(err) => {
warn!("Failed to retrieve custom metadata, error = {}", err);
}
}
}
labels
}

pub fn get_metadata(&mut self, task_key: TaskKey) -> Vec<MetadataLabel> {
let task_name = TaskName::for_task(task_key.tid).unwrap_or(TaskName::errored());
let pid = task_key.pid;
let mut task_metadata = vec![
MetadataLabel::from_number_value("pid".into(), task_key.tid.into(), "task-id".into()),
MetadataLabel::from_string_value("thread-name".into(), task_name.current_thread),
MetadataLabel::from_string_value("process-name".into(), task_name.main_thread),
MetadataLabel::from_number_value("pid".into(), pid.into(), "task-tgid".into()),
];

if let Some(cached_labels) = self.pid_label_cache.get(&pid) {
task_metadata.extend(cached_labels.iter().cloned());
} else {
let labels = self.get_labels(pid);
self.pid_label_cache.push(pid, labels.clone());
task_metadata.extend(labels);
}
task_metadata
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::metadata_label::MetadataLabelValue;
use nix::unistd;

#[test]
fn test_get_metadata_returns_minimal_labels() {
// Given
let tid = unistd::gettid().as_raw();
let pid = unistd::getpgrp().as_raw();
let mut metadata_provider = GlobalMetadataProvider::default();
let expected = TaskName::for_task(tid).unwrap();

// When
let labels = metadata_provider.get_metadata(TaskKey { tid, pid });

// Then
assert_eq!(labels[0].key, "pid");
assert_eq!(
labels[0].value,
MetadataLabelValue::Number(tid.into(), "task-id".into())
);
assert_eq!(labels[1].key, "thread-name");
assert_eq!(
labels[1].value,
MetadataLabelValue::String(expected.current_thread)
);
assert_eq!(labels[2].key, "process-name");
assert_eq!(
labels[2].value,
MetadataLabelValue::String(expected.main_thread)
);
assert_eq!(labels[3].key, "pid");
assert_eq!(
labels[3].value,
MetadataLabelValue::Number(pid.into(), "task-tgid".into())
);
}
}
75 changes: 75 additions & 0 deletions lightswitch-metadata-provider/src/system_metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
use crate::metadata_label::MetadataLabel;

use anyhow::Result;
use nix::sys::utsname;
use thiserror::Error;

pub struct SystemMetadata {}

#[derive(Debug, Error)]
pub enum SystemMetadataError {
#[error("Failed to read system information, error = {0}")]
ErrorRetrievingSystemInfo(String),
}

fn get_kernel_release(uname: &utsname::UtsName) -> String {
format!(
"{}:{}",
uname.sysname().to_string_lossy(),
uname.release().to_string_lossy()
)
}

impl SystemMetadata {
pub fn get_metadata(&self) -> Result<Vec<MetadataLabel>, SystemMetadataError> {
let uname = utsname::uname()
.map_err(|e| SystemMetadataError::ErrorRetrievingSystemInfo(e.desc().to_string()))?;
let kernel_release_label =
MetadataLabel::from_string_value("kernel_release".into(), get_kernel_release(&uname));
let machine_label = MetadataLabel::from_string_value(
"machine".into(),
uname.machine().to_string_lossy().to_string(),
);
Ok(vec![kernel_release_label, machine_label])
}
}

#[cfg(test)]
mod tests {
use crate::metadata_label::MetadataLabelValue;
use crate::system_metadata::*;

#[test]
fn test_get_system_metadata() {
// Given
let system_metadata = SystemMetadata {};
let expected = utsname::uname().unwrap();

// When
let result = system_metadata.get_metadata();

// Then
assert!(result.is_ok());
let labels = result.unwrap();

assert_eq!(labels.len(), 2);
let kernel_release = &labels[0];
let machine = &labels[1];

assert_eq!(kernel_release.key, "kernel_release");
assert_eq!(
kernel_release.value,
MetadataLabelValue::String(format!(
"{}:{}",
expected.sysname().to_string_lossy(),
expected.release().to_string_lossy()
))
);

assert_eq!(machine.key, "machine");
assert_eq!(
machine.value,
MetadataLabelValue::String(expected.machine().to_string_lossy().to_string())
);
}
}
16 changes: 16 additions & 0 deletions src/metadata.rs → ...tswitch-metadata-provider/src/taskname.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,20 @@ mod tests {
.join()
.unwrap();
}

#[test]
fn test_errored() {
// Given
let task_name = TaskName::errored();

// When / Then
assert_eq!(
task_name.current_thread,
String::from("<could not fetch thread name>")
);
assert_eq!(
task_name.main_thread,
String::from("<could not fetch process name>")
);
}
}
Loading

0 comments on commit 0f063de

Please sign in to comment.