Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(general): Drop long tags correctly #165

Merged
merged 3 commits into from
Jan 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions cabi/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions general/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ authors = ["Armin Ronacher <[email protected]>"]
edition = "2018"

[dependencies]
bytecount = "0.5.0"
chrono = "0.4.6"
cookie = { version = "0.11.0", features = ["percent-encode"] }
debugid = { version = "0.3.1", features = ["with_serde"] }
dynfmt = { version = "0.1.1", features = ["python", "curly"] }
failure = "0.1.5"
hmac = "0.7.0"
insta = "0.5.1"
itertools = "0.8.0"
lazy_static = "1.2.0"
maxminddb = "0.12.0"
Expand All @@ -25,11 +27,9 @@ serde_urlencoded = "0.5.4"
smallvec = { version = "0.6.8", features = ["serde"] }
url = "1.7.2"
uuid = { version = "0.7.1", features = ["v4", "serde"] }
insta = "0.5.1"

[dev-dependencies]
difference = "2.0.0"

[features]
bench = []

8 changes: 4 additions & 4 deletions general/src/processor/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ impl<'a> Chunk<'a> {
self.as_str().len()
}

/// The number of chars in this chunk.
pub fn chars(&self) -> usize {
self.as_str().chars().count()
/// The number of UTF-8 encoded Unicode codepoints in this chunk.
pub fn count(&self) -> usize {
bytecount::num_chars(self.as_str().as_bytes())
}

/// Determines whether this chunk is empty.
Expand Down Expand Up @@ -169,7 +169,7 @@ where
for remark in remarks.into_iter() {
meta.add_remark(remark);
}
meta.set_original_length(Some(value.chars().count()));
meta.set_original_length(Some(bytecount::num_chars(value.as_bytes())));
*value = new_value;
}
}
Expand Down
41 changes: 39 additions & 2 deletions general/src/store/normalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,19 @@ impl<'a> NormalizeProcessor<'a> {
for tag in tags.iter_mut() {
tag.apply(|tag, meta| {
if let Some(key) = tag.key() {
if key.len() > MaxChars::TagKey.limit() {
if bytecount::num_chars(key.as_bytes()) > MaxChars::TagKey.limit() {
meta.add_error(Error::new(ErrorKind::ValueTooLong));
return ValueAction::DeleteHard;
}
}

if let Some(value) = tag.value() {
if value.len() > MaxChars::TagValue.limit() {
if value.is_empty() {
meta.add_error(Error::nonempty());
return ValueAction::DeleteHard;
}

if bytecount::num_chars(value.as_bytes()) > MaxChars::TagValue.limit() {
meta.add_error(Error::new(ErrorKind::ValueTooLong));
return ValueAction::DeleteHard;
}
Expand Down Expand Up @@ -743,6 +748,38 @@ fn test_internal_tags_removed() {
assert_eq!(event.value().unwrap().tags.value().unwrap().len(), 1);
}

#[test]
fn test_empty_tags_removed() {
let mut event = Annotated::new(Event {
tags: Annotated::new(Tags(PairList(vec![
Annotated::new(TagEntry(
Annotated::new("".to_string()),
Annotated::new("foo".to_string()),
)),
Annotated::new(TagEntry(
Annotated::new("foo".to_string()),
Annotated::new("".to_string()),
)),
Annotated::new(TagEntry(
Annotated::new("something".to_string()),
Annotated::new("else".to_string()),
)),
]))),
..Default::default()
});

let mut processor = NormalizeProcessor::new(Arc::new(StoreConfig::default()), None);
process_value(&mut event, &mut processor, ProcessingState::root());

let tags = event.value().unwrap().tags.value().unwrap();
assert_eq!(tags.len(), 2);

assert_eq!(
tags.get(0).unwrap(),
&Annotated::from_error(Error::nonempty(), None)
)
}

#[test]
fn test_tags_deduplicated() {
let mut event = Annotated::new(Event {
Expand Down
2 changes: 1 addition & 1 deletion general/src/store/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ where
T: Empty,
{
if state.attrs().nonempty && value.is_empty() {
meta.add_error(Error::expected("a non-empty value"));
meta.add_error(Error::nonempty());
ValueAction::DeleteHard
} else {
ValueAction::Keep
Expand Down
4 changes: 2 additions & 2 deletions general/src/store/trimming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ fn trim_string(value: &mut String, meta: &mut Meta, max_chars: MaxChars) {
let soft_limit = max_chars.limit();
let hard_limit = soft_limit + max_chars.allowance();

if value.chars().count() <= hard_limit {
if bytecount::num_chars(value.as_bytes()) <= hard_limit {
return;
}

Expand All @@ -194,7 +194,7 @@ fn trim_string(value: &mut String, meta: &mut Meta, max_chars: MaxChars) {
let mut new_chunks = vec![];

for chunk in chunks {
let chunk_chars = chunk.chars();
let chunk_chars = chunk.count();

// if the entire chunk fits, just put it in
if length + chunk_chars < soft_limit {
Expand Down
6 changes: 6 additions & 0 deletions general/src/types/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,12 @@ impl Error {
})
}

/// Creates an error that describes an expected non-empty value.
pub fn nonempty() -> Self {
// TODO: Replace `invalid_data` this with an explicity error constant for empty values
Error::invalid("expected a non-empty value")
}

/// Returns the kind of this error.
pub fn kind(&self) -> &ErrorKind {
&self.kind
Expand Down