From 3ca3dbf8c751e892f9fb77811064719d2dbe4bb7 Mon Sep 17 00:00:00 2001 From: Scott Beddall <45376673+scbedd@users.noreply.github.com> Date: Thu, 4 Aug 2022 13:43:10 -0700 Subject: [PATCH] Add `RegexEntrySanitizer` (#3807) * add RegexEntrySanitizer and accompanying tests * Update tools/test-proxy/Azure.Sdk.Tools.TestProxy/Sanitizers/RegexEntrySanitizer.cs --- .../SanitizerTests.cs | 103 ++++++++++++++++++ .../Common/StringSanitizer.cs | 15 +++ .../Sanitizers/RegexEntrySanitizer.cs | 83 ++++++++++++++ 3 files changed, 201 insertions(+) create mode 100644 tools/test-proxy/Azure.Sdk.Tools.TestProxy/Sanitizers/RegexEntrySanitizer.cs diff --git a/tools/test-proxy/Azure.Sdk.Tools.TestProxy.Tests/SanitizerTests.cs b/tools/test-proxy/Azure.Sdk.Tools.TestProxy.Tests/SanitizerTests.cs index 483152b2957..ca9d1688f42 100644 --- a/tools/test-proxy/Azure.Sdk.Tools.TestProxy.Tests/SanitizerTests.cs +++ b/tools/test-proxy/Azure.Sdk.Tools.TestProxy.Tests/SanitizerTests.cs @@ -1,8 +1,16 @@ using Azure.Sdk.Tools.TestProxy.Common; +using Azure.Sdk.Tools.TestProxy.Common.Exceptions; using Azure.Sdk.Tools.TestProxy.Sanitizers; +using Microsoft.AspNetCore.Http; +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.Logging.Abstractions; using System.Collections.Generic; +using System.IO; using System.Linq; +using System.Reflection; using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; using Xunit; namespace Azure.Sdk.Tools.TestProxy.Tests @@ -10,7 +18,10 @@ namespace Azure.Sdk.Tools.TestProxy.Tests public class SanitizerTests { public OAuthResponseSanitizer OAuthResponseSanitizer = new OAuthResponseSanitizer(); + private NullLoggerFactory _nullLogger = new NullLoggerFactory(); + + public string oauthRegex = "\"/oauth2(?:/v2.0)?/token\""; public string lookaheadReplaceRegex = @"[a-z]+(?=\.(?:table|blob|queue)\.core\.windows\.net)"; public string capturingGroupReplaceRegex = @"https\:\/\/(?[a-z]+)\.(?:table|blob|queue)\.core\.windows\.net"; public string scopeClean = @"scope\=(?[^&]*)"; @@ -48,6 +59,98 @@ public void OauthResponseSanitizerNotAggressive() Assert.Equal(expectedCount, session.Session.Entries.Count); } + [Theory] + [InlineData("uri", "\"/oauth2(?:/v2.0)?/token\"")] + [InlineData("body", "\"/oauth2(?:/v2.0)?/token\"")] + [InlineData("header", "\"/oauth2(?:/v2.0)?/token\"")] + public void RegexEntrySanitizerNoOpsOnNonMatch(string target, string regex) + { + var session = TestHelpers.LoadRecordSession("Test.RecordEntries/post_delete_get_content.json"); + var sanitizer = new RegexEntrySanitizer(target, regex); + var expectedCount = session.Session.Entries.Count; + + session.Session.Sanitize(sanitizer); + + Assert.Equal(expectedCount, session.Session.Entries.Count); + } + + [Theory] + [InlineData("body", "(listtable09bf2a3d|listtable19bf2a3d)", 9)] + [InlineData("uri", "fakeazsdktestaccount", 0)] + [InlineData("body", "listtable09bf2a3d", 10)] + [InlineData("header", "a50f2f9c-b830-11eb-b8c8-10e7c6392c5a", 10)] + public void RegexEntrySanitizerCorrectlySanitizes(string target, string regex, int endCount) + { + var session = TestHelpers.LoadRecordSession("Test.RecordEntries/post_delete_get_content.json"); + var sanitizer = new RegexEntrySanitizer(target, regex); + var expectedCount = session.Session.Entries.Count; + + session.Session.Sanitize(sanitizer); + + Assert.Equal(endCount, session.Session.Entries.Count); + } + + [Fact] + public void RegexEntrySanitizerCorrectlySanitizesSpecific() + { + var session = TestHelpers.LoadRecordSession("Test.RecordEntries/response_with_xml_body.json"); + var sanitizer = new RegexEntrySanitizer("header", "b24f75a9-b830-11eb-b949-10e7c6392c5a"); + var expectedCount = session.Session.Entries.Count; + + session.Session.Sanitize(sanitizer); + + Assert.Equal(2, session.Session.Entries.Count); + Assert.Equal("b25bf92a-b830-11eb-947a-10e7c6392c5a", session.Session.Entries[0].Request.Headers["x-ms-client-request-id"][0].ToString()); + } + + [Theory] + [InlineData("wrong_name", "", "When defining which section of a request the regex should target, only values")] + [InlineData("", ".+", "When defining which section of a request the regex should target, only values")] + [InlineData("uri", "\"[\"", "Expression of value")] + public void RegexEntrySanitizerThrowsProperExceptions(string target, string regex, string exceptionMessage) + { + var assertion = Assert.Throws( + () => new RegexEntrySanitizer(target, regex) + ); + + Assert.Contains(exceptionMessage, assertion.Message); + } + + [Theory] + [InlineData("{ \"target\": \"URI\", \"regex\": \"/oauth2(?:/v2.0)?/token\" }")] + [InlineData("{ \"target\": \"uRi\", \"regex\": \"/login\\\\.microsoftonline.com\" }")] + [InlineData("{ \"target\": \"bodY\", \"regex\": \"/oauth2(?:/v2.0)?/token\" }")] + [InlineData("{ \"target\": \"HEADER\", \"regex\": \"/login\\\\.microsoftonline.com\" }")] + public async Task RegexEntrySanitizerCreatesOverAPI(string body) + { + + RecordingHandler testRecordingHandler = new RecordingHandler(Directory.GetCurrentDirectory()); + testRecordingHandler.Sanitizers.Clear(); + var httpContext = new DefaultHttpContext(); + httpContext.Request.Headers["x-abstraction-identifier"] = "RegexEntrySanitizer"; + httpContext.Request.Body = TestHelpers.GenerateStreamRequestBody(body); + + // content length must be set for the body to be parsed in SetMatcher + httpContext.Request.ContentLength = httpContext.Request.Body.Length; + + var controller = new Admin(testRecordingHandler, _nullLogger) + { + ControllerContext = new ControllerContext() + { + HttpContext = httpContext + } + }; + + await controller.AddSanitizer(); + var sanitizer = testRecordingHandler.Sanitizers[0]; + Assert.True(sanitizer is RegexEntrySanitizer); + + + var sanitizerTarget = (string)typeof(RegexEntrySanitizer).GetField("section", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(sanitizer); + var regex = (Regex)typeof(RegexEntrySanitizer).GetField("rx", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(sanitizer); + } + + [Fact] public void UriRegexSanitizerReplacesTableName() { diff --git a/tools/test-proxy/Azure.Sdk.Tools.TestProxy/Common/StringSanitizer.cs b/tools/test-proxy/Azure.Sdk.Tools.TestProxy/Common/StringSanitizer.cs index a0d487f5f05..855ce62f70d 100644 --- a/tools/test-proxy/Azure.Sdk.Tools.TestProxy/Common/StringSanitizer.cs +++ b/tools/test-proxy/Azure.Sdk.Tools.TestProxy/Common/StringSanitizer.cs @@ -24,6 +24,21 @@ public static void ConfirmValidRegex(string regex) } } + /// + /// Quick and easy abstraction for checking regex validity. Passing null explicitly will result in a True return. + /// + /// A regular expression. + public static Regex GetRegex(string regex) + { + try + { + return new Regex(regex); + } + catch (Exception e) + { + throw new HttpException(HttpStatusCode.BadRequest, $"Expression of value {regex} does not successfully compile. Failure Details: {e.Message}"); + } + } /// /// General purpose string replacement. Simple abstraction of string.Replace(). diff --git a/tools/test-proxy/Azure.Sdk.Tools.TestProxy/Sanitizers/RegexEntrySanitizer.cs b/tools/test-proxy/Azure.Sdk.Tools.TestProxy/Sanitizers/RegexEntrySanitizer.cs new file mode 100644 index 00000000000..885866a31e4 --- /dev/null +++ b/tools/test-proxy/Azure.Sdk.Tools.TestProxy/Sanitizers/RegexEntrySanitizer.cs @@ -0,0 +1,83 @@ +using Azure.Sdk.Tools.TestProxy.Common; +using Azure.Sdk.Tools.TestProxy.Common.Exceptions; +using System.Linq; +using System.Text.RegularExpressions; + +namespace Azure.Sdk.Tools.TestProxy.Sanitizers +{ + /// + /// This sanitizer applies at the session level, just before saving a recording to disk. + /// + /// It cleans out all request/response pairs that that match the defined settings. A match against URI, Header, or Body regex will result in the entire RecordEntry being omit from the recording. + /// + public class RegexEntrySanitizer : RecordedTestSanitizer + { + private Regex rx; + private string section; + private string[] validValues = new string[] { "uri", "header", "body" }; + + public string ValidValues + { + get { return string.Join(", ", validValues.Select(x => "\"" + x + "\"")); } + } + + /// + /// During sanitization, each RecordEntry within a session is checked against a target (URI, Header, Body) and a regex. If there is any match within the request, the whole request/response pair is omitted from the recording. + /// + /// Possible values are [ "URI", "Header", "Body"]. Only requests with text-like body values will be checked when targeting "Body". The value is NOT case-sensitive. + /// During sanitization, any entry where the 'target' is matched by the regex will be fully omitted. Request/Reponse both. + public RegexEntrySanitizer(string target, string regex) + { + section = target.ToLowerInvariant(); + + if (!validValues.Contains(section)) + { + throw new HttpException(System.Net.HttpStatusCode.BadRequest, $"When defining which section of a request the regex should target, only values [ {ValidValues} ] are valid."); + } + + rx = StringSanitizer.GetRegex(regex); + } + + public bool CheckMatch(RecordEntry x) + { + switch (section) + { + case "uri": + return rx.IsMatch(x.RequestUri); + case "header": + foreach (var headerKey in x.Request.Headers.Keys) + { + // Accessing 0th key safe due to the fact that we force header values in without splitting them on ;. + // We do this because letting .NET split and then reassemble header values introduces a space into the header itself + // Ex: "application/json;odata=minimalmetadata" with .NET default header parsing becomes "application/json; odata=minimalmetadata" + // Given this breaks signature verification, we have to avoid it. + var originalValue = x.Request.Headers[headerKey][0]; + + if (rx.IsMatch(originalValue)) + { + return true; + } + } + break; + case "body": + if (x.Request.TryGetBodyAsText(out string text)) + { + return rx.IsMatch(text); + } + else + { + return false; + } + default: + throw new HttpException(System.Net.HttpStatusCode.BadRequest, $"The RegexEntrySanitizer can only match against a target of [ {ValidValues} ]."); + } + + return false; + } + + public override void Sanitize(RecordSession session) + { + session.Entries.RemoveAll(x => CheckMatch(x)); + } + } +}