From 7bf5764c13e5557e8b743092c8b121c6f3ecf4fe Mon Sep 17 00:00:00 2001 From: Iker Barriocanal <32816711+iker-barriocanal@users.noreply.github.com> Date: Mon, 20 Mar 2023 11:20:23 +0100 Subject: [PATCH 1/5] Revert span.data typing This reverts commit b710ffdaa1eb0f95e5a0cd5fa80ee10fc4d73e5a. --- relay-general/src/pii/processor.rs | 94 +------------------ ...r__tests__scrub_span_data_is_scrubbed.snap | 30 ------ ...__tests__scrub_span_data_not_scrubbed.snap | 11 --- ...s__scrub_span_data_object_is_scrubbed.snap | 35 ------- relay-general/src/protocol/span.rs | 52 +--------- relay-general/src/protocol/types.rs | 20 ---- 6 files changed, 5 insertions(+), 237 deletions(-) delete mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_is_scrubbed.snap delete mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_not_scrubbed.snap delete mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_object_is_scrubbed.snap diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index 0df4a663c6..937c583f5f 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -385,16 +385,14 @@ fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec, + pub data: Annotated>, // TODO remove retain when the api stabilizes /// Additional arbitrary fields for forwards compatibility. @@ -57,8 +55,6 @@ pub struct Span { #[cfg(test)] mod tests { - use std::collections::BTreeMap; - use chrono::{TimeZone, Utc}; use similar_asserts::assert_eq; @@ -75,17 +71,7 @@ mod tests { "op": "operation", "span_id": "fa90fdead5f74052", "trace_id": "4c79f60c11214eb38604f4ae0781bfb2", - "status": "ok", - "data": { - "http": { - "query": "is_sample_query=true", - "not_query": "this is not the query" - }, - "more_fields": { - "how_many": "yes", - "many": "a lot!" - } - } + "status": "ok" }"#; let span = Annotated::new(Span { @@ -99,38 +85,6 @@ mod tests { trace_id: Annotated::new(TraceId("4c79f60c11214eb38604f4ae0781bfb2".into())), span_id: Annotated::new(SpanId("fa90fdead5f74052".into())), status: Annotated::new(SpanStatus::Ok), - data: Annotated::new(DataElement { - http: Annotated::new(HttpElement { - query: Annotated::new(Value::String("is_sample_query=true".to_owned())), - other: { - let mut map = BTreeMap::new(); - map.insert( - "not_query".to_owned(), - Annotated::new(Value::String("this is not the query".to_owned())), - ); - map - }, - }), - other: { - let mut inner_map = BTreeMap::new(); - inner_map.insert( - "how_many".to_owned(), - Annotated::new(Value::String("yes".to_owned())), - ); - inner_map.insert( - "many".to_owned(), - Annotated::new(Value::String("a lot!".to_owned())), - ); - - let mut outter_map = BTreeMap::new(); - outter_map.insert( - "more_fields".to_owned(), - Annotated::new(Value::Object(inner_map)), - ); - - outter_map - }, - }), ..Default::default() }); assert_eq!(json, span.to_json_pretty().unwrap()); diff --git a/relay-general/src/protocol/types.rs b/relay-general/src/protocol/types.rs index a999b47956..593f5cc03d 100644 --- a/relay-general/src/protocol/types.rs +++ b/relay-general/src/protocol/types.rs @@ -1115,26 +1115,6 @@ impl schemars::JsonSchema for Timestamp { } } -#[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] -#[cfg_attr(feature = "jsonschema", derive(JsonSchema))] -pub struct DataElement { - #[metastructure(pii = "maybe")] - pub http: Annotated, - - #[metastructure(additional_properties, retain = "true", pii = "maybe")] - pub other: Object, -} - -#[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] -#[cfg_attr(feature = "jsonschema", derive(JsonSchema))] -pub struct HttpElement { - #[metastructure(pii = "true")] - pub query: Annotated, - - #[metastructure(additional_properties, retain = "true", pii = "maybe")] - pub other: Object, -} - #[cfg(test)] mod tests { use similar_asserts::assert_eq; From 9edec28206419cc5414f846525152544f2a81e40 Mon Sep 17 00:00:00 2001 From: Iker Barriocanal <32816711+iker-barriocanal@users.noreply.github.com> Date: Mon, 20 Mar 2023 15:15:38 +0100 Subject: [PATCH 2/5] PII scrub span.data by default --- relay-general/src/pii/processor.rs | 113 ++++++++++++++++++++++++++++- relay-general/src/protocol/span.rs | 3 +- 2 files changed, 113 insertions(+), 3 deletions(-) diff --git a/relay-general/src/pii/processor.rs b/relay-general/src/pii/processor.rs index 937c583f5f..9b5bbe9830 100644 --- a/relay-general/src/pii/processor.rs +++ b/relay-general/src/pii/processor.rs @@ -392,7 +392,7 @@ mod tests { use crate::processor::process_value; use crate::protocol::{ Addr, DebugImage, DebugMeta, Event, ExtraValue, Headers, LogEntry, NativeDebugImage, - Request, TagEntry, Tags, + Request, Span, TagEntry, Tags, }; use crate::testutils::assert_annotated_snapshot; use crate::types::{Annotated, FromValue, Object, Value}; @@ -1029,4 +1029,115 @@ mod tests { ); assert_eq!(chunks, res); } + + #[test] + fn test_scrub_span_data_http_not_scrubbed() { + let mut span: Annotated = Annotated::from_json( + r#"{ + "data": { + "http": { + "query": "dance=true" + } + } + }"#, + ) + .unwrap(); + + let ds_config = DataScrubbingConfig { + scrub_data: true, + scrub_defaults: true, + ..Default::default() + }; + let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + + process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap(); + assert_annotated_snapshot!(span); + } + + #[test] + fn test_scrub_span_data_http_strings_are_scrubbed() { + let mut span: Annotated = Annotated::from_json( + r#"{ + "data": { + "http": { + "query": "ccnumber=5105105105105100&process_id=123", + "fragment": "ccnumber=5105105105105100,process_id=123" + } + } + }"#, + ) + .unwrap(); + + let ds_config = DataScrubbingConfig { + scrub_data: true, + scrub_defaults: true, + ..Default::default() + }; + let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + + process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap(); + assert_annotated_snapshot!(span); + } + + #[test] + fn test_scrub_span_data_http_objects_are_scrubbed() { + let mut span: Annotated = Annotated::from_json( + r#"{ + "data": { + "http": { + "query": { + "ccnumber": "5105105105105100", + "process_id": "123" + }, + "fragment": { + "ccnumber": "5105105105105100", + "process_id": "123" + } + } + } + }"#, + ) + .unwrap(); + + let ds_config = DataScrubbingConfig { + scrub_data: true, + scrub_defaults: true, + ..Default::default() + }; + let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + + process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap(); + assert_annotated_snapshot!(span); + } + + #[test] + fn test_scrub_span_data_untyped_props_are_scrubbed() { + let mut span: Annotated = Annotated::from_json( + r#"{ + "data": { + "untyped": "ccnumber=5105105105105100", + "more_untyped": { + "typed": "no", + "scrubbed": "yes", + "ccnumber": "5105105105105100" + } + } + }"#, + ) + .unwrap(); + + let ds_config = DataScrubbingConfig { + scrub_data: true, + scrub_defaults: true, + ..Default::default() + }; + let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + + process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap(); + assert_annotated_snapshot!(span); + } } diff --git a/relay-general/src/protocol/span.rs b/relay-general/src/protocol/span.rs index 4ba96b0bdf..7afa7b21bb 100644 --- a/relay-general/src/protocol/span.rs +++ b/relay-general/src/protocol/span.rs @@ -44,7 +44,7 @@ pub struct Span { pub tags: Annotated>, /// Arbitrary additional data on a span, like `extra` on the top-level event. - #[metastructure(pii = "maybe")] + #[metastructure(pii = "true")] pub data: Annotated>, // TODO remove retain when the api stabilizes @@ -59,7 +59,6 @@ mod tests { use similar_asserts::assert_eq; use super::*; - use crate::protocol::HttpElement; #[test] fn test_span_serialization() { From a88bbc19f7bc8104cd2352590c3ae4ef10c7afb3 Mon Sep 17 00:00:00 2001 From: Iker Barriocanal <32816711+iker-barriocanal@users.noreply.github.com> Date: Mon, 20 Mar 2023 20:49:12 +0100 Subject: [PATCH 3/5] Add test snapshots --- ...__tests__scrub_span_data_not_scrubbed.snap | 11 ++++ ...s__scrub_span_data_object_is_scrubbed.snap | 54 +++++++++++++++++++ ...s__scrub_span_data_string_is_scrubbed.snap | 44 +++++++++++++++ ..._span_data_untyped_props_are_scrubbed.snap | 46 ++++++++++++++++ 4 files changed, 155 insertions(+) create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_not_scrubbed.snap create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_object_is_scrubbed.snap create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_string_is_scrubbed.snap create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_untyped_props_are_scrubbed.snap diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_not_scrubbed.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_not_scrubbed.snap new file mode 100644 index 0000000000..9f35cbda46 --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_not_scrubbed.snap @@ -0,0 +1,11 @@ +--- +source: relay-general/src/pii/processor.rs +expression: span +--- +{ + "data": { + "http": { + "query": "dance=true" + } + } +} diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_object_is_scrubbed.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_object_is_scrubbed.snap new file mode 100644 index 0000000000..676a9401da --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_object_is_scrubbed.snap @@ -0,0 +1,54 @@ +--- +source: relay-general/src/pii/processor.rs +expression: span +--- +{ + "data": { + "http": { + "fragment": { + "ccnumber": "[Filtered]", + "process_id": "123" + }, + "query": { + "ccnumber": "[Filtered]", + "process_id": "123" + } + } + }, + "_meta": { + "data": { + "http": { + "fragment": { + "ccnumber": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 0, + 10 + ] + ], + "len": 16 + } + } + }, + "query": { + "ccnumber": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 0, + 10 + ] + ], + "len": 16 + } + } + } + } + } + } +} diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_string_is_scrubbed.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_string_is_scrubbed.snap new file mode 100644 index 0000000000..6d8e7685ae --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_string_is_scrubbed.snap @@ -0,0 +1,44 @@ +--- +source: relay-general/src/pii/processor.rs +expression: span +--- +{ + "data": { + "http": { + "fragment": "ccnumber=[Filtered],process_id=123", + "query": "ccnumber=[Filtered]&process_id=123" + } + }, + "_meta": { + "data": { + "http": { + "fragment": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 9, + 19 + ] + ], + "len": 40 + } + }, + "query": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 9, + 19 + ] + ], + "len": 40 + } + } + } + } + } +} diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_untyped_props_are_scrubbed.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_untyped_props_are_scrubbed.snap new file mode 100644 index 0000000000..c0f188f6b6 --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_untyped_props_are_scrubbed.snap @@ -0,0 +1,46 @@ +--- +source: relay-general/src/pii/processor.rs +expression: span +--- +{ + "data": { + "more_untyped": { + "ccnumber": "[Filtered]", + "scrubbed": "yes", + "typed": "no" + }, + "untyped": "ccnumber=[Filtered]" + }, + "_meta": { + "data": { + "more_untyped": { + "ccnumber": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 0, + 10 + ] + ], + "len": 16 + } + } + }, + "untyped": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 9, + 19 + ] + ], + "len": 25 + } + } + } + } +} From ce923400105be2c81ed0d303f32ef470e0adaef1 Mon Sep 17 00:00:00 2001 From: Iker Barriocanal <32816711+iker-barriocanal@users.noreply.github.com> Date: Mon, 20 Mar 2023 20:53:30 +0100 Subject: [PATCH 4/5] Update changelog --- CHANGELOG.md | 2 +- py/CHANGELOG.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd8e194ace..855ae0a1bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ - Indicate if OS-version may be frozen with '>=' prefix. ([#1945](https://github.com/getsentry/relay/pull/1945)) - Normalize monitor slug parameters into slugs. ([#1913](https://github.com/getsentry/relay/pull/1913)) - Smart trim loggers for Java platforms. ([#1941](https://github.com/getsentry/relay/pull/1941)) - +- PII scrub `span.data` by default. ([#1953](https://github.com/getsentry/relay/pull/1953)) ## 23.3.0 diff --git a/py/CHANGELOG.md b/py/CHANGELOG.md index a7072cf053..5e2929a6a3 100644 --- a/py/CHANGELOG.md +++ b/py/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased - Add `thread.state` field to protocol. ([#1896](https://github.com/getsentry/relay/pull/1896)) +- PII scrub `span.data` by default. ([#1953](https://github.com/getsentry/relay/pull/1953)) ## 0.8.19 From e600f7dd9b77a10aaf6f912e633f4e52ec01756d Mon Sep 17 00:00:00 2001 From: Iker Barriocanal <32816711+iker-barriocanal@users.noreply.github.com> Date: Mon, 20 Mar 2023 21:10:20 +0100 Subject: [PATCH 5/5] fix snapshots --- ...ts__scrub_span_data_http_not_scrubbed.snap | 11 ++++ ...b_span_data_http_objects_are_scrubbed.snap | 54 +++++++++++++++++++ ...b_span_data_http_strings_are_scrubbed.snap | 44 +++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_not_scrubbed.snap create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_objects_are_scrubbed.snap create mode 100644 relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_strings_are_scrubbed.snap diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_not_scrubbed.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_not_scrubbed.snap new file mode 100644 index 0000000000..9f35cbda46 --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_not_scrubbed.snap @@ -0,0 +1,11 @@ +--- +source: relay-general/src/pii/processor.rs +expression: span +--- +{ + "data": { + "http": { + "query": "dance=true" + } + } +} diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_objects_are_scrubbed.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_objects_are_scrubbed.snap new file mode 100644 index 0000000000..676a9401da --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_objects_are_scrubbed.snap @@ -0,0 +1,54 @@ +--- +source: relay-general/src/pii/processor.rs +expression: span +--- +{ + "data": { + "http": { + "fragment": { + "ccnumber": "[Filtered]", + "process_id": "123" + }, + "query": { + "ccnumber": "[Filtered]", + "process_id": "123" + } + } + }, + "_meta": { + "data": { + "http": { + "fragment": { + "ccnumber": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 0, + 10 + ] + ], + "len": 16 + } + } + }, + "query": { + "ccnumber": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 0, + 10 + ] + ], + "len": 16 + } + } + } + } + } + } +} diff --git a/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_strings_are_scrubbed.snap b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_strings_are_scrubbed.snap new file mode 100644 index 0000000000..6d8e7685ae --- /dev/null +++ b/relay-general/src/pii/snapshots/relay_general__pii__processor__tests__scrub_span_data_http_strings_are_scrubbed.snap @@ -0,0 +1,44 @@ +--- +source: relay-general/src/pii/processor.rs +expression: span +--- +{ + "data": { + "http": { + "fragment": "ccnumber=[Filtered],process_id=123", + "query": "ccnumber=[Filtered]&process_id=123" + } + }, + "_meta": { + "data": { + "http": { + "fragment": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 9, + 19 + ] + ], + "len": 40 + } + }, + "query": { + "": { + "rem": [ + [ + "@creditcard:filter", + "s", + 9, + 19 + ] + ], + "len": 40 + } + } + } + } + } +}