Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Scrub filenames in the minidump modules list #784

Merged
merged 14 commits into from
Sep 28, 2020
155 changes: 125 additions & 30 deletions relay-general/src/pii/attachments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,13 @@ pub struct PiiAttachmentsProcessor<'a> {
root_state: ProcessingState<'static>,
}

/// Which encodings to scrub for `scrub_bytes`.
pub enum ScrubEncodings {
Utf8,
Utf16Le,
All,
}

impl<'a> PiiAttachmentsProcessor<'a> {
/// Creates a new `PiiAttachmentsProcessor` from the given PII config.
pub fn new(compiled_config: &'a CompiledPiiConfig) -> Self {
Expand Down Expand Up @@ -396,7 +403,12 @@ impl<'a> PiiAttachmentsProcessor<'a> {
/// Applies PII rules to a plain buffer.
///
/// Returns `true`, if the buffer was modified.
pub(crate) fn scrub_bytes(&self, data: &mut [u8], state: &ProcessingState<'_>) -> bool {
pub(crate) fn scrub_bytes(
&self,
data: &mut [u8],
state: &ProcessingState<'_>,
encodings: ScrubEncodings,
) -> bool {
let mut changed = false;

for (selector, rules) in &self.compiled_config.applications {
Expand All @@ -414,33 +426,53 @@ impl<'a> PiiAttachmentsProcessor<'a> {
for (_pattern_type, regex, replace_behavior) in
get_regex_for_rule_type(&rule.ty)
{
let matches =
apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
changed |= !(matches.is_empty());

// Only scrub regions with the UTF-16 scrubber if they haven't been
// scrubbed yet.
let windowed_matches = matches
.into_iter()
.chain(std::iter::once((data.len(), 0)))
.scan((0usize, 0usize), |previous, current| {
let start = if previous.1 % 2 == 0 {
previous.1
} else {
previous.1 + 1
};
let item = (start, current.0);
*previous = current;
Some(item)
})
.filter(|(start, end)| end > start);
for (start, end) in windowed_matches {
changed |= apply_regex_to_utf16le_bytes(
&mut data[start..end],
rule,
regex,
&replace_behavior,
);
match encodings {
ScrubEncodings::Utf8 => {
let matches =
apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
changed |= !(matches.is_empty());
}
ScrubEncodings::Utf16Le => {
dbg!(&data);
eprintln!("regex: {}", regex);
dbg!(&regex);
changed |= apply_regex_to_utf16le_bytes(
data,
rule,
regex,
&replace_behavior,
);
}
ScrubEncodings::All => {
let matches =
apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
changed |= !(matches.is_empty());

// Only scrub regions with the UTF-16 scrubber if they haven't been
// scrubbed yet.
let unscrubbed_ranges = matches
.into_iter()
.chain(std::iter::once((data.len(), 0)))
.scan((0usize, 0usize), |previous, current| {
let start = if previous.1 % 2 == 0 {
previous.1
} else {
previous.1 + 1
};
let item = (start, current.0);
*previous = current;
Some(item)
})
.filter(|(start, end)| end > start);
for (start, end) in unscrubbed_ranges {
changed |= apply_regex_to_utf16le_bytes(
&mut data[start..end],
rule,
regex,
&replace_behavior,
);
}
}
}
}
}
Expand All @@ -455,7 +487,49 @@ impl<'a> PiiAttachmentsProcessor<'a> {
/// Returns `true`, if the attachment was modified.
pub fn scrub_attachment(&self, filename: &str, data: &mut [u8]) -> bool {
let state = self.state(filename, ValueType::Binary);
self.scrub_bytes(data, &state)
self.scrub_bytes(data, &state, ScrubEncodings::All)
}

/// Scrub a filepath, preserving the basename.
pub fn scrub_utf8_filepath(&self, path: &mut str, state: &ProcessingState<'_>) -> bool {
eprintln!("utf8 path: {}", path);
if let Some(index) = path.rfind(|c| c == '/' || c == '\\') {
let data = unsafe { &mut path.as_bytes_mut()[..index] };
eprintln!("Going to scrub: {:?}", data);
let ret = self.scrub_bytes(data, state, ScrubEncodings::Utf8);
eprintln!("Going to scrub: {:?}", data);
ret
} else {
false
}
}

/// Scrub a filepath, preserving the basename.
pub fn scrub_utf16_filepath(&self, path: &mut WStr, state: &ProcessingState<'_>) -> bool {
eprintln!("utf16 path: {} {:?}", path.to_utf8(), path);
let mut found = false;
let mut index = 0;
for (i, c) in path.char_indices() {
if c == '/' || c == '\\' {
found = true;
index = i;
}
}
eprintln!(
"found: {}, index: {}, len: {}",
found,
index,
path.as_bytes().len()
);
if found {
let data = unsafe { &mut path.as_bytes_mut()[..index] };
eprintln!("Going to scrub: {:?}", data);
let ret = self.scrub_bytes(data, state, ScrubEncodings::Utf16Le);
eprintln!("Scurbbed: {:?}", data);
ret
} else {
false
}
}
}

Expand Down Expand Up @@ -545,7 +619,7 @@ mod tests {
let mut data = input.to_owned();
let processor = PiiAttachmentsProcessor::new(&compiled);
let state = processor.state(filename, value_type);
let has_changed = processor.scrub_bytes(&mut data, &state);
let has_changed = processor.scrub_bytes(&mut data, &state, ScrubEncodings::All);

assert_eq_bytes_str!(data, output);
assert_eq!(changed, has_changed);
Expand Down Expand Up @@ -911,4 +985,25 @@ mod tests {
b"h\x00e\x00l\x00l\x00o\x00 \x00t\x00h\x00e\x00r\x00e\x00"
);
}

#[test]
fn test_module_list_scrubbing() {
let config = serde_json::from_value::<PiiConfig>(serde_json::json!(
{
"rules": {
"custom": {
"type": "userpath",
"redaction": {
"method": "mask"
}
}
},
"applications": {
"$filepath": ["custom"]
}
}
))
.unwrap();
todo!();
}
}
Loading