Skip to content

Commit

Permalink
fix(general): Drop long tags correctly (#165)
Browse files Browse the repository at this point in the history
  • Loading branch information
jan-auer authored Jan 23, 2019
1 parent 40d10bd commit 4950704
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 11 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions cabi/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions general/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ authors = ["Armin Ronacher <[email protected]>"]
edition = "2018"

[dependencies]
bytecount = "0.5.0"
chrono = "0.4.6"
cookie = { version = "0.11.0", features = ["percent-encode"] }
debugid = { version = "0.3.1", features = ["with_serde"] }
dynfmt = { version = "0.1.1", features = ["python", "curly"] }
failure = "0.1.5"
hmac = "0.7.0"
insta = "0.5.1"
itertools = "0.8.0"
lazy_static = "1.2.0"
maxminddb = "0.12.0"
Expand All @@ -25,11 +27,9 @@ serde_urlencoded = "0.5.4"
smallvec = { version = "0.6.8", features = ["serde"] }
url = "1.7.2"
uuid = { version = "0.7.1", features = ["v4", "serde"] }
insta = "0.5.1"

[dev-dependencies]
difference = "2.0.0"

[features]
bench = []

8 changes: 4 additions & 4 deletions general/src/processor/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ impl<'a> Chunk<'a> {
self.as_str().len()
}

/// The number of chars in this chunk.
pub fn chars(&self) -> usize {
self.as_str().chars().count()
/// The number of UTF-8 encoded Unicode codepoints in this chunk.
pub fn count(&self) -> usize {
bytecount::num_chars(self.as_str().as_bytes())
}

/// Determines whether this chunk is empty.
Expand Down Expand Up @@ -169,7 +169,7 @@ where
for remark in remarks.into_iter() {
meta.add_remark(remark);
}
meta.set_original_length(Some(value.chars().count()));
meta.set_original_length(Some(bytecount::num_chars(value.as_bytes())));
*value = new_value;
}
}
Expand Down
41 changes: 39 additions & 2 deletions general/src/store/normalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,19 @@ impl<'a> NormalizeProcessor<'a> {
for tag in tags.iter_mut() {
tag.apply(|tag, meta| {
if let Some(key) = tag.key() {
if key.len() > MaxChars::TagKey.limit() {
if bytecount::num_chars(key.as_bytes()) > MaxChars::TagKey.limit() {
meta.add_error(Error::new(ErrorKind::ValueTooLong));
return ValueAction::DeleteHard;
}
}

if let Some(value) = tag.value() {
if value.len() > MaxChars::TagValue.limit() {
if value.is_empty() {
meta.add_error(Error::nonempty());
return ValueAction::DeleteHard;
}

if bytecount::num_chars(value.as_bytes()) > MaxChars::TagValue.limit() {
meta.add_error(Error::new(ErrorKind::ValueTooLong));
return ValueAction::DeleteHard;
}
Expand Down Expand Up @@ -743,6 +748,38 @@ fn test_internal_tags_removed() {
assert_eq!(event.value().unwrap().tags.value().unwrap().len(), 1);
}

#[test]
fn test_empty_tags_removed() {
let mut event = Annotated::new(Event {
tags: Annotated::new(Tags(PairList(vec![
Annotated::new(TagEntry(
Annotated::new("".to_string()),
Annotated::new("foo".to_string()),
)),
Annotated::new(TagEntry(
Annotated::new("foo".to_string()),
Annotated::new("".to_string()),
)),
Annotated::new(TagEntry(
Annotated::new("something".to_string()),
Annotated::new("else".to_string()),
)),
]))),
..Default::default()
});

let mut processor = NormalizeProcessor::new(Arc::new(StoreConfig::default()), None);
process_value(&mut event, &mut processor, ProcessingState::root());

let tags = event.value().unwrap().tags.value().unwrap();
assert_eq!(tags.len(), 2);

assert_eq!(
tags.get(0).unwrap(),
&Annotated::from_error(Error::nonempty(), None)
)
}

#[test]
fn test_tags_deduplicated() {
let mut event = Annotated::new(Event {
Expand Down
2 changes: 1 addition & 1 deletion general/src/store/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ where
T: Empty,
{
if state.attrs().nonempty && value.is_empty() {
meta.add_error(Error::expected("a non-empty value"));
meta.add_error(Error::nonempty());
ValueAction::DeleteHard
} else {
ValueAction::Keep
Expand Down
4 changes: 2 additions & 2 deletions general/src/store/trimming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ fn trim_string(value: &mut String, meta: &mut Meta, max_chars: MaxChars) {
let soft_limit = max_chars.limit();
let hard_limit = soft_limit + max_chars.allowance();

if value.chars().count() <= hard_limit {
if bytecount::num_chars(value.as_bytes()) <= hard_limit {
return;
}

Expand All @@ -194,7 +194,7 @@ fn trim_string(value: &mut String, meta: &mut Meta, max_chars: MaxChars) {
let mut new_chunks = vec![];

for chunk in chunks {
let chunk_chars = chunk.chars();
let chunk_chars = chunk.count();

// if the entire chunk fits, just put it in
if length + chunk_chars < soft_limit {
Expand Down
6 changes: 6 additions & 0 deletions general/src/types/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,12 @@ impl Error {
})
}

/// Creates an error that describes an expected non-empty value.
pub fn nonempty() -> Self {
// TODO: Replace `invalid_data` this with an explicity error constant for empty values
Error::invalid("expected a non-empty value")
}

/// Returns the kind of this error.
pub fn kind(&self) -> &ErrorKind {
&self.kind
Expand Down

0 comments on commit 4950704

Please sign in to comment.