From 37ae7fd5241c986b561a47a73dc93721bf8b6e7b Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 16 Aug 2023 13:33:10 +0200 Subject: [PATCH] track ff dictionary indexing memory consumption --- columnar/src/columnar/writer/mod.rs | 6 +++++- columnar/src/dictionary.rs | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index b2f8259e8c..56e557b157 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -79,7 +79,6 @@ fn mutate_or_create_column( impl ColumnarWriter { pub fn mem_usage(&self) -> usize { - // TODO add dictionary builders. self.arena.mem_usage() + self.numerical_field_hash_map.mem_usage() + self.bool_field_hash_map.mem_usage() @@ -87,6 +86,11 @@ impl ColumnarWriter { + self.str_field_hash_map.mem_usage() + self.ip_addr_field_hash_map.mem_usage() + self.datetime_field_hash_map.mem_usage() + + self + .dictionaries + .iter() + .map(|dict| dict.mem_usage()) + .sum::() } /// Returns the list of doc ids from 0..num_docs sorted by the `sort_field` diff --git a/columnar/src/dictionary.rs b/columnar/src/dictionary.rs index 82ccb91df8..cdf10f357b 100644 --- a/columnar/src/dictionary.rs +++ b/columnar/src/dictionary.rs @@ -32,6 +32,7 @@ pub struct OrderedId(pub u32); #[derive(Default)] pub(crate) struct DictionaryBuilder { dict: FnvHashMap, UnorderedId>, + memory_consumption: usize, } impl DictionaryBuilder { @@ -43,6 +44,8 @@ impl DictionaryBuilder { } let new_id = UnorderedId(self.dict.len() as u32); self.dict.insert(term.to_vec(), new_id); + self.memory_consumption += term.len(); + self.memory_consumption += 40; // Term Metadata + HashMap overhead new_id } @@ -63,6 +66,10 @@ impl DictionaryBuilder { sstable_builder.finish()?; Ok(TermIdMapping { unordered_to_ord }) } + + pub(crate) fn mem_usage(&self) -> usize { + self.memory_consumption + } } #[cfg(test)]