diff --git a/analyzeme/src/event.rs b/analyzeme/src/event.rs index 1a0567f..2c548c0 100644 --- a/analyzeme/src/event.rs +++ b/analyzeme/src/event.rs @@ -1,4 +1,5 @@ use crate::timestamp::Timestamp; +use memchr::memchr; use std::borrow::Cow; use std::time::Duration; @@ -6,7 +7,7 @@ use std::time::Duration; pub struct Event<'a> { pub event_kind: Cow<'a, str>, pub label: Cow<'a, str>, - pub additional_data: &'a [Cow<'a, str>], + pub additional_data: Vec>, pub timestamp: Timestamp, pub thread_id: u32, } @@ -36,4 +37,141 @@ impl<'a> Event<'a> { Timestamp::Instant(_) => None, } } + + pub(crate) fn parse_event_id(event_id: Cow<'a, str>) -> (Cow<'a, str>, Vec>) { + let event_id = match event_id { + Cow::Owned(s) => Cow::Owned(s.into_bytes()), + Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), + }; + + let mut parser = Parser::new(event_id); + + let label = match parser.parse_label() { + Ok(label) => label, + Err(message) => { + eprintln!("{}", message); + return (Cow::from(""), Vec::new()); + } + }; + + let mut args = Vec::new(); + + while parser.pos != parser.full_text.len() { + match parser.parse_arg() { + Ok(arg) => args.push(arg), + Err(message) => { + eprintln!("{}", message); + break; + } + } + } + + (label, args) + } +} + +struct Parser<'a> { + full_text: Cow<'a, [u8]>, + pos: usize, +} + +const SEPARATOR_BYTE: u8 = measureme::event_id::SEPARATOR_BYTE.as_bytes()[0]; + +impl<'a> Parser<'a> { + fn new(full_text: Cow<'a, [u8]>) -> Parser<'a> { + Parser { full_text, pos: 0 } + } + + fn peek(&self) -> u8 { + self.full_text[self.pos] + } + + fn parse_label(&mut self) -> Result, String> { + assert!(self.pos == 0); + self.parse_separator_terminated_text() + } + + fn parse_separator_terminated_text(&mut self) -> Result, String> { + let start = self.pos; + + let end = memchr(SEPARATOR_BYTE, &self.full_text[start..]) + .map(|pos| pos + start) + .unwrap_or(self.full_text.len()); + + if start == end { + return self.err("Zero-length "); + } + + self.pos = end; + + if self.full_text[start..end].iter().any(u8::is_ascii_control) { + return self.err("Found ASCII control character in "); + } + + Ok(self.substring(start, end)) + } + + fn parse_arg(&mut self) -> Result, String> { + if self.peek() != SEPARATOR_BYTE { + return self.err(&format!( + "Expected '\\x{:x}' char at start of ", + SEPARATOR_BYTE + )); + } + + self.pos += 1; + self.parse_separator_terminated_text() + } + + fn err(&self, message: &str) -> Result { + Err(format!( + r#"Could not parse `event_id`. {} at {} in "{}""#, + message, + self.pos, + std::str::from_utf8(&self.full_text[..]).unwrap() + )) + } + + fn substring(&self, start: usize, end: usize) -> Cow<'a, str> { + match self.full_text { + Cow::Owned(ref s) => { + let bytes = s[start..end].to_owned(); + Cow::Owned(String::from_utf8(bytes).unwrap()) + } + Cow::Borrowed(s) => Cow::Borrowed(std::str::from_utf8(&s[start..end]).unwrap()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::borrow::Cow; + + #[test] + fn parse_event_id_no_args() { + let (label, args) = Event::parse_event_id(Cow::from("foo")); + + assert_eq!(label, "foo"); + assert!(args.is_empty()); + } + + #[test] + fn parse_event_id_one_arg() { + let (label, args) = Event::parse_event_id(Cow::from("foo\x1emy_arg")); + + assert_eq!(label, "foo"); + assert_eq!(args, vec![Cow::from("my_arg")]); + } + + #[test] + fn parse_event_id_n_args() { + let (label, args) = Event::parse_event_id(Cow::from("foo\x1earg1\x1earg2\x1earg3")); + + assert_eq!(label, "foo"); + assert_eq!( + args, + vec![Cow::from("arg1"), Cow::from("arg2"), Cow::from("arg3")] + ); + } } diff --git a/analyzeme/src/profiling_data.rs b/analyzeme/src/profiling_data.rs index 9325fc1..6d08f05 100644 --- a/analyzeme/src/profiling_data.rs +++ b/analyzeme/src/profiling_data.rs @@ -1,13 +1,13 @@ use crate::event::Event; use crate::lightweight_event::LightweightEvent; -use crate::StringTable; use crate::timestamp::Timestamp; +use crate::StringTable; use measureme::file_header::{ read_file_header, write_file_header, CURRENT_FILE_FORMAT_VERSION, FILE_HEADER_SIZE, FILE_MAGIC_EVENT_STREAM, }; use measureme::ByteVecSink; -use measureme::{ProfilerFiles, RawEvent, SerializationSink, StringTableBuilder}; +use measureme::{EventId, ProfilerFiles, RawEvent, SerializationSink, StringTableBuilder}; use serde::{Deserialize, Deserializer}; use std::error::Error; use std::fs; @@ -94,10 +94,16 @@ impl ProfilingData { let timestamp = Timestamp::from_raw_event(&raw_event, self.metadata.start_time); + let event_id = string_table + .get(raw_event.event_id.to_string_id()) + .to_string(); + // Parse out the label and arguments from the `event_id`. + let (label, additional_data) = Event::parse_event_id(event_id); + Event { event_kind: string_table.get(raw_event.event_kind).to_string(), - label: string_table.get(raw_event.event_id).to_string(), - additional_data: &[], + label, + additional_data, timestamp, thread_id: raw_event.thread_id, } @@ -226,7 +232,7 @@ impl ProfilingDataBuilder { F: FnOnce(&mut Self), { let event_kind = self.string_table.alloc(event_kind); - let event_id = self.string_table.alloc(event_id); + let event_id = EventId::from_label(self.string_table.alloc(event_id)); inner(self); @@ -247,7 +253,7 @@ impl ProfilingDataBuilder { timestamp_nanos: u64, ) -> &mut Self { let event_kind = self.string_table.alloc(event_kind); - let event_id = self.string_table.alloc(event_id); + let event_id = EventId::from_label(self.string_table.alloc(event_id)); let raw_event = RawEvent::new_instant(event_kind, event_id, thread_id, timestamp_nanos); @@ -319,7 +325,7 @@ mod tests { Event { event_kind: Cow::from(event_kind), label: Cow::from(label), - additional_data: &[], + additional_data: Vec::new(), timestamp: Timestamp::Interval { start: SystemTime::UNIX_EPOCH + Duration::from_nanos(start_nanos), end: SystemTime::UNIX_EPOCH + Duration::from_nanos(end_nanos), @@ -337,7 +343,7 @@ mod tests { Event { event_kind: Cow::from(event_kind), label: Cow::from(label), - additional_data: &[], + additional_data: Vec::new(), timestamp: Timestamp::Instant( SystemTime::UNIX_EPOCH + Duration::from_nanos(timestamp_nanos), ), @@ -399,7 +405,6 @@ mod tests { assert_eq!(events[0].to_event(), full_interval("k1", "id1", 0, 10, 100)); assert_eq!(events[1].to_event(), full_interval("k2", "id2", 1, 100, 110)); assert_eq!(events[2].to_event(), full_interval("k3", "id3", 0, 120, 140)); - } #[test] diff --git a/analyzeme/src/stringtable.rs b/analyzeme/src/stringtable.rs index 86fc99d..2922444 100644 --- a/analyzeme/src/stringtable.rs +++ b/analyzeme/src/stringtable.rs @@ -7,10 +7,10 @@ use measureme::file_header::{ }; use measureme::stringtable::{METADATA_STRING_ID, STRING_ID_MASK, TERMINATOR}; use measureme::{Addr, StringId}; +use memchr::memchr; use rustc_hash::FxHashMap; use std::borrow::Cow; use std::error::Error; -use memchr::memchr; fn deserialize_index_entry(bytes: &[u8]) -> (StringId, Addr) { ( diff --git a/analyzeme/src/testing_common.rs b/analyzeme/src/testing_common.rs index 976f7a3..9e9874a 100644 --- a/analyzeme/src/testing_common.rs +++ b/analyzeme/src/testing_common.rs @@ -1,6 +1,6 @@ use crate::timestamp::Timestamp; use crate::{Event, ProfilingData}; -use measureme::{Profiler, SerializationSink, StringId}; +use measureme::{EventId, EventIdBuilder, Profiler, SerializationSink, StringId}; use rustc_hash::FxHashMap; use std::borrow::Cow; use std::default::Default; @@ -18,6 +18,23 @@ fn mk_filestem(file_name_stem: &str) -> PathBuf { path } +#[derive(Clone)] +struct ExpectedEvent { + kind: Cow<'static, str>, + label: Cow<'static, str>, + args: Vec>, +} + +impl ExpectedEvent { + fn new(kind: &'static str, label: &'static str, args: &[&'static str]) -> ExpectedEvent { + ExpectedEvent { + kind: Cow::from(kind), + label: Cow::from(label), + args: args.iter().map(|&x| Cow::from(x)).collect(), + } + } +} + // Generate some profiling data. This is the part that would run in rustc. fn generate_profiling_data( filestem: &Path, @@ -26,27 +43,35 @@ fn generate_profiling_data( ) -> Vec> { let profiler = Arc::new(Profiler::::new(Path::new(filestem)).unwrap()); - let event_id_virtual = StringId::new_virtual(42); + let event_id_virtual = EventId::from_label(StringId::new_virtual(42)); + let event_id_builder = EventIdBuilder::new(&profiler); - let event_ids = vec![ + let event_ids: Vec<(StringId, EventId)> = vec![ ( profiler.alloc_string("Generic"), - profiler.alloc_string("SomeGenericActivity"), + EventId::from_label(profiler.alloc_string("SomeGenericActivity")), ), (profiler.alloc_string("Query"), event_id_virtual), + ( + profiler.alloc_string("QueryWithArg"), + event_id_builder.from_label_and_arg( + profiler.alloc_string("AQueryWithArg"), + profiler.alloc_string("some_arg"), + ), + ), ]; // This and event_ids have to match! - let mut event_ids_as_str: FxHashMap<_, _> = Default::default(); - event_ids_as_str.insert(event_ids[0].0, "Generic"); - event_ids_as_str.insert(event_ids[0].1, "SomeGenericActivity"); - event_ids_as_str.insert(event_ids[1].0, "Query"); - event_ids_as_str.insert(event_ids[1].1, "SomeQuery"); + let expected_events_templates = vec![ + ExpectedEvent::new("Generic", "SomeGenericActivity", &[]), + ExpectedEvent::new("Query", "SomeQuery", &[]), + ExpectedEvent::new("QueryWithArg", "AQueryWithArg", &["some_arg"]), + ]; let threads: Vec<_> = (0.. num_threads).map(|thread_id| { let event_ids = event_ids.clone(); let profiler = profiler.clone(); - let event_ids_as_str = event_ids_as_str.clone(); + let expected_events_templates = expected_events_templates.clone(); std::thread::spawn(move || { let mut expected_events = Vec::new(); @@ -60,7 +85,7 @@ fn generate_profiling_data( thread_id as u32, 4, &event_ids[..], - &event_ids_as_str, + &expected_events_templates, &mut expected_events, ); } @@ -74,7 +99,7 @@ fn generate_profiling_data( // An example of allocating the string contents of an event id that has // already been used profiler.map_virtual_to_concrete_string( - event_id_virtual, + event_id_virtual.to_string_id(), profiler.alloc_string("SomeQuery") ); @@ -165,15 +190,17 @@ fn pseudo_invocation( random: usize, thread_id: u32, recursions_left: usize, - event_ids: &[(StringId, StringId)], - event_ids_as_str: &FxHashMap, + event_ids: &[(StringId, EventId)], + expected_events_templates: &[ExpectedEvent], expected_events: &mut Vec>, ) { if recursions_left == 0 { return; } - let (event_kind, event_id) = event_ids[random % event_ids.len()]; + let random_event_index = random % event_ids.len(); + + let (event_kind, event_id) = event_ids[random_event_index]; let _prof_guard = profiler.start_recording_interval_event(event_kind, event_id, thread_id); @@ -183,19 +210,19 @@ fn pseudo_invocation( thread_id, recursions_left - 1, event_ids, - event_ids_as_str, + expected_events_templates, expected_events, ); expected_events.push(Event { - event_kind: Cow::from(event_ids_as_str[&event_kind]), - label: Cow::from(event_ids_as_str[&event_id]), - additional_data: &[], + event_kind: expected_events_templates[random_event_index].kind.clone(), + label: expected_events_templates[random_event_index].label.clone(), + additional_data: expected_events_templates[random_event_index].args.clone(), + thread_id, // We can't test this anyway: timestamp: Timestamp::Interval { start: SystemTime::UNIX_EPOCH, end: SystemTime::UNIX_EPOCH, }, - thread_id, }); } diff --git a/measureme/src/event_id.rs b/measureme/src/event_id.rs new file mode 100644 index 0000000..64af40e --- /dev/null +++ b/measureme/src/event_id.rs @@ -0,0 +1,74 @@ +use crate::{Profiler, SerializationSink, StringComponent, StringId}; + +/// Event IDs are strings conforming to the following grammar: +/// +/// ```ignore +/// =