Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(general): Scrub all fields with IP address #1725

Merged
merged 11 commits into from
Jan 12, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- Add max replay size configuration parameter. ([#1694](https://github.com/getsentry/relay/pull/1694))
- Add nonchunked replay recording message type. ([#1653](https://github.com/getsentry/relay/pull/1653))
- Add `abnormal_mechanism` field to SessionUpdate protocol. ([#1665](https://github.com/getsentry/relay/pull/1665))
- Scrub all fields with IP addresses rather than only known IP address fields. ([#1725](https://github.com/getsentry/relay/pull/1725))

**Bug Fixes**:

Expand Down
72 changes: 41 additions & 31 deletions relay-general/src/pii/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,6 @@ use crate::pii::{
};
use crate::processor::{SelectorPathItem, SelectorSpec, ValueType};

// XXX: Move to @ip rule for better IP address scrubbing. Right now we just try to keep
// compatibility with Python.
static KNOWN_IP_FIELDS: Lazy<SelectorSpec> = Lazy::new(|| {
"($request.env.REMOTE_ADDR | $user.ip_address | $sdk.client_ip)"
.parse()
.unwrap()
});

/// Fields that the legacy data scrubber cannot strip.
///
/// We define this list independently of `metastructure(pii = true/false)` because the new PII
Expand All @@ -27,6 +19,13 @@ static DATASCRUBBER_IGNORE: Lazy<SelectorSpec> = Lazy::new(|| {
.unwrap()
});

/// Fields that are known to contain IPs. Used for legacy IP scrubbing.
static KNOWN_IP_FIELDS: Lazy<SelectorSpec> = Lazy::new(|| {
"($request.env.REMOTE_ADDR | $user.ip_address | $sdk.client_ip)"
.parse()
.unwrap()
});

pub fn to_pii_config(
datascrubbing_config: &DataScrubbingConfig,
) -> Result<Option<PiiConfig>, PiiConfigError> {
Expand All @@ -39,7 +38,11 @@ pub fn to_pii_config(
}

if datascrubbing_config.scrub_ip_addresses {
// legacy(?) scrubs all fields that are known to have IPs regardless of actual content
applications.insert(KNOWN_IP_FIELDS.clone(), vec!["@anything:remove".to_owned()]);

// checks actual contents of all fields and scrubs where there is an IP address
applied_rules.push("@ip:replace".to_owned());
}

if datascrubbing_config.scrub_data {
Expand Down Expand Up @@ -217,7 +220,8 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@common:filter"
"@common:filter",
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
Expand All @@ -242,7 +246,8 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@common:filter"
"@common:filter",
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
Expand Down Expand Up @@ -277,6 +282,7 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@common:filter",
"@ip:replace",
"strip-fields"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
Expand Down Expand Up @@ -315,7 +321,8 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value) && !foobar": [
"@common:filter"
"@common:filter",
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
Expand All @@ -341,6 +348,9 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
"hashKey": null
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
]
Expand Down Expand Up @@ -409,23 +419,6 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
assert_annotated_snapshot!(data);
}

#[test]
fn test_sdk_client_ip_stripped() {
let mut data = Event::from_value(
serde_json::json!({
"sdk": {
"client_ip": "127.0.0.1"
}
})
.into(),
);

let pii_config = simple_enabled_pii_config();
let mut pii_processor = PiiProcessor::new(pii_config.compiled());
process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
assert_annotated_snapshot!(data);
}

#[test]
fn test_user() {
let mut data = Event::from_value(
Expand All @@ -445,14 +438,30 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
assert_annotated_snapshot!(data);
}

/// Checks that any fields containing an IP-address is scrubbed.
/// Even if it doesn't make sense for there to be an IP-address
/// such as in the username field.
#[test]
fn test_user_ip_stripped() {
fn test_ip_stripped() {
let mut data = Event::from_value(
serde_json::json!({
"user": {
"username": "secret",
"ip_address": "73.133.27.120",
"username": "73.133.27.120", // should be stripped despite not being "known ip field"
"ip_address": "should be stripped despite lacking ip address",
"data": sensitive_vars()
},
"breadcrumbs": {
"values": [
{
"message": "73.133.27.120",
"data": {
"test_data": "73.133.27.120" // test deep wildcard stripping
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we talked in the meeting, can we also make sure to test that the IPs are scrubbed also from inside the strings, e.g.

"test_data": "contains ip's like 73.133.27.120 but also the text"

which should result in something like

"test_data": "contains ip's like [ip] but also the text"

or make sure this actually can work?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would create a separate issue for this, as it would also the touch IP scrubbing for users who configured it manually through advanced data scrubbing.

}
},
],
},
"sdk": {
"client_ip": "should also be stripped"
}
})
.into(),
Expand Down Expand Up @@ -1231,6 +1240,7 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@common:filter",
"@ip:replace",
"strip-fields"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,18 @@ expression: data
"REMOTE_ADDR": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
],
[
"@anything:remove",
"x"
]
]
],
"len": 9
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
---
source: relay-general/src/pii/convert.rs
expression: data
---
{
"user": {
"ip_address": null,
"username": "[ip]",
"data": {
"a_password_here": "hello",
"apiKey": "secret_key",
"api_key": "secret_key",
"foo": "bar",
"password": "hello",
"the_secret": "hello"
}
},
"breadcrumbs": {
"values": [
{
"message": "[ip]",
"data": {
"test_data": "[ip]"
}
}
]
},
"sdk": {
"client_ip": null
},
"_meta": {
"breadcrumbs": {
"values": {
"0": {
"data": {
"test_data": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
]
],
"len": 13
}
}
},
"message": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
]
],
"len": 13
}
}
}
}
},
"sdk": {
"client_ip": {
"": {
"err": [
[
"invalid_data",
{
"reason": "expected an ip address"
}
]
],
"val": "should also be stripped"
}
}
},
"user": {
"ip_address": {
"": {
"err": [
[
"invalid_data",
{
"reason": "expected an ip address"
}
]
],
"val": "should be stripped despite lacking ip address"
}
},
"username": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
]
],
"len": 13
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ expression: pii_config
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value) && !url && !message && !'http.request.url' && !'*url*' && !'*message*' && !'*http.request.url*'": [
"@common:filter"
"@common:filter",
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,21 @@ expression: data
---
{
"sdk": {
"client_ip": null
"client_ip": "[ip]"
},
"_meta": {
"sdk": {
"client_ip": {
"": {
"rem": [
[
"@anything:remove",
"x"
"@ip:replace",
"s",
0,
4
]
]
],
"len": 9
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,18 @@ expression: data
"ip_address": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
],
[
"@anything:remove",
"x"
]
]
],
"len": 13
}
},
"username": {
Expand Down
Loading