From 8b2d8f92a9ffdb5676efce9a12a65075e1494002 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 22 Jan 2019 18:49:39 +0100 Subject: [PATCH 1/2] fix(general): Drop long tags correctly --- Cargo.lock | 7 +++++++ cabi/Cargo.lock | 7 +++++++ general/Cargo.toml | 4 ++-- general/src/processor/chunks.rs | 8 ++++---- general/src/store/normalize.rs | 4 ++-- general/src/store/trimming.rs | 4 ++-- 6 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4ecc1a5010..3d0b6c6717 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -299,6 +299,11 @@ name = "byte-tools" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "bytecount" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "byteorder" version = "1.3.0" @@ -1877,6 +1882,7 @@ dependencies = [ name = "semaphore-general" version = "0.4.3" dependencies = [ + "bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "cookie 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "debugid 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2826,6 +2832,7 @@ dependencies = [ "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" "checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40" "checksum byte-tools 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "980479e6fde23246dfb54d47580d66b4e99202e7579c5eaa9fe10ecb5ebd2182" +"checksum bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c219c0335c21a8bd79587ce5aee9f64aff1d0bd7a2cca7a58a815f9780cd3b0d" "checksum byteorder 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "60f0b0d4c0a382d2734228fd12b5a6b5dac185c60e938026fd31b265b94f9bd2" "checksum bytes 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "40ade3d27603c2cb345eb0912aec461a6dec7e06a4ae48589904e808335c7afa" "checksum cadence 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3970b43fb5b4049ac086516d1ca06f929924aa0ed38566ba52d6cbb291892a40" diff --git a/cabi/Cargo.lock b/cabi/Cargo.lock index 502d6b5505..7de98aba1c 100644 --- a/cabi/Cargo.lock +++ b/cabi/Cargo.lock @@ -107,6 +107,11 @@ name = "byte-tools" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "bytecount" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "byteorder" version = "1.3.0" @@ -814,6 +819,7 @@ dependencies = [ name = "semaphore-general" version = "0.4.3" dependencies = [ + "bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "cookie 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "debugid 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1170,6 +1176,7 @@ dependencies = [ "checksum block-padding 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4fc4358306e344bf9775d0197fd00d2603e5afb0771bb353538630f022068ea3" "checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40" "checksum byte-tools 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "980479e6fde23246dfb54d47580d66b4e99202e7579c5eaa9fe10ecb5ebd2182" +"checksum bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c219c0335c21a8bd79587ce5aee9f64aff1d0bd7a2cca7a58a815f9780cd3b0d" "checksum byteorder 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "60f0b0d4c0a382d2734228fd12b5a6b5dac185c60e938026fd31b265b94f9bd2" "checksum cadence 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3970b43fb5b4049ac086516d1ca06f929924aa0ed38566ba52d6cbb291892a40" "checksum cc 1.0.28 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4a8b715cb4597106ea87c7c84b2f1d452c7492033765df7f32651e66fcf749" diff --git a/general/Cargo.toml b/general/Cargo.toml index 6b848d72a1..39a6676eb5 100644 --- a/general/Cargo.toml +++ b/general/Cargo.toml @@ -5,12 +5,14 @@ authors = ["Armin Ronacher "] edition = "2018" [dependencies] +bytecount = "0.5.0" chrono = "0.4.6" cookie = { version = "0.11.0", features = ["percent-encode"] } debugid = { version = "0.3.1", features = ["with_serde"] } dynfmt = { version = "0.1.1", features = ["python", "curly"] } failure = "0.1.5" hmac = "0.7.0" +insta = "0.4.1" itertools = "0.8.0" lazy_static = "1.2.0" maxminddb = "0.12.0" @@ -25,11 +27,9 @@ serde_urlencoded = "0.5.4" smallvec = { version = "0.6.8", features = ["serde"] } url = "1.7.2" uuid = { version = "0.7.1", features = ["v4", "serde"] } -insta = "0.4.1" [dev-dependencies] difference = "2.0.0" [features] bench = [] - diff --git a/general/src/processor/chunks.rs b/general/src/processor/chunks.rs index 39628420ee..b22c78ebaa 100644 --- a/general/src/processor/chunks.rs +++ b/general/src/processor/chunks.rs @@ -64,9 +64,9 @@ impl<'a> Chunk<'a> { self.as_str().len() } - /// The number of chars in this chunk. - pub fn chars(&self) -> usize { - self.as_str().chars().count() + /// The number of UTF-8 encoded Unicode codepoints in this chunk. + pub fn count(&self) -> usize { + bytecount::num_chars(self.as_str().as_bytes()) } /// Determines whether this chunk is empty. @@ -169,7 +169,7 @@ where for remark in remarks.into_iter() { meta.add_remark(remark); } - meta.set_original_length(Some(value.chars().count())); + meta.set_original_length(Some(bytecount::num_chars(value.as_bytes()))); *value = new_value; } } diff --git a/general/src/store/normalize.rs b/general/src/store/normalize.rs index d233f6375d..9e679d766b 100644 --- a/general/src/store/normalize.rs +++ b/general/src/store/normalize.rs @@ -148,14 +148,14 @@ impl<'a> NormalizeProcessor<'a> { for tag in tags.iter_mut() { tag.apply(|tag, meta| { if let Some(key) = tag.key() { - if key.len() > MaxChars::TagKey.limit() { + if bytecount::num_chars(key.as_bytes()) > MaxChars::TagKey.limit() { meta.add_error(Error::new(ErrorKind::ValueTooLong)); return ValueAction::DeleteHard; } } if let Some(value) = tag.value() { - if value.len() > MaxChars::TagValue.limit() { + if bytecount::num_chars(value.as_bytes()) > MaxChars::TagValue.limit() { meta.add_error(Error::new(ErrorKind::ValueTooLong)); return ValueAction::DeleteHard; } diff --git a/general/src/store/trimming.rs b/general/src/store/trimming.rs index 8d7a715530..1319d7aba4 100644 --- a/general/src/store/trimming.rs +++ b/general/src/store/trimming.rs @@ -185,7 +185,7 @@ fn trim_string(value: &mut String, meta: &mut Meta, max_chars: MaxChars) { let soft_limit = max_chars.limit(); let hard_limit = soft_limit + max_chars.allowance(); - if value.chars().count() <= hard_limit { + if bytecount::num_chars(value.as_bytes()) <= hard_limit { return; } @@ -194,7 +194,7 @@ fn trim_string(value: &mut String, meta: &mut Meta, max_chars: MaxChars) { let mut new_chunks = vec![]; for chunk in chunks { - let chunk_chars = chunk.chars(); + let chunk_chars = chunk.count(); // if the entire chunk fits, just put it in if length + chunk_chars < soft_limit { From 6c9d02934fce02b381b19944b140eb5b4f943dc3 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 22 Jan 2019 19:02:08 +0100 Subject: [PATCH 2/2] fix(general): Remove empty tags --- general/src/store/normalize.rs | 37 ++++++++++++++++++++++++++++++++++ general/src/store/schema.rs | 2 +- general/src/types/meta.rs | 6 ++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/general/src/store/normalize.rs b/general/src/store/normalize.rs index 9e679d766b..0ed9753c41 100644 --- a/general/src/store/normalize.rs +++ b/general/src/store/normalize.rs @@ -155,6 +155,11 @@ impl<'a> NormalizeProcessor<'a> { } if let Some(value) = tag.value() { + if value.is_empty() { + meta.add_error(Error::nonempty()); + return ValueAction::DeleteHard; + } + if bytecount::num_chars(value.as_bytes()) > MaxChars::TagValue.limit() { meta.add_error(Error::new(ErrorKind::ValueTooLong)); return ValueAction::DeleteHard; @@ -743,6 +748,38 @@ fn test_internal_tags_removed() { assert_eq!(event.value().unwrap().tags.value().unwrap().len(), 1); } +#[test] +fn test_empty_tags_removed() { + let mut event = Annotated::new(Event { + tags: Annotated::new(Tags(PairList(vec![ + Annotated::new(TagEntry( + Annotated::new("".to_string()), + Annotated::new("foo".to_string()), + )), + Annotated::new(TagEntry( + Annotated::new("foo".to_string()), + Annotated::new("".to_string()), + )), + Annotated::new(TagEntry( + Annotated::new("something".to_string()), + Annotated::new("else".to_string()), + )), + ]))), + ..Default::default() + }); + + let mut processor = NormalizeProcessor::new(Arc::new(StoreConfig::default()), None); + process_value(&mut event, &mut processor, ProcessingState::root()); + + let tags = event.value().unwrap().tags.value().unwrap(); + assert_eq!(tags.len(), 2); + + assert_eq!( + tags.get(0).unwrap(), + &Annotated::from_error(Error::nonempty(), None) + ) +} + #[test] fn test_tags_deduplicated() { let mut event = Annotated::new(Event { diff --git a/general/src/store/schema.rs b/general/src/store/schema.rs index c3dd62f2a8..f1a85abea0 100644 --- a/general/src/store/schema.rs +++ b/general/src/store/schema.rs @@ -63,7 +63,7 @@ where T: Empty, { if state.attrs().nonempty && value.is_empty() { - meta.add_error(Error::expected("a non-empty value")); + meta.add_error(Error::nonempty()); ValueAction::DeleteHard } else { ValueAction::Keep diff --git a/general/src/types/meta.rs b/general/src/types/meta.rs index 5683cfbd27..d7abeecdd6 100644 --- a/general/src/types/meta.rs +++ b/general/src/types/meta.rs @@ -315,6 +315,12 @@ impl Error { }) } + /// Creates an error that describes an expected non-empty value. + pub fn nonempty() -> Self { + // TODO: Replace `invalid_data` this with an explicity error constant for empty values + Error::invalid("expected a non-empty value") + } + /// Returns the kind of this error. pub fn kind(&self) -> &ErrorKind { &self.kind