From a7d5aeb52349f5f7c0504898a748c6edcdfb3afb Mon Sep 17 00:00:00 2001 From: Alan West <3676547+alanwest@users.noreply.github.com> Date: Mon, 18 Nov 2024 12:38:41 -0800 Subject: [PATCH] [shared] Implement SQL sanitization for MSSQL (#2330) --- opentelemetry-dotnet-contrib.sln | 1 + src/Shared/SqlProcessor.cs | 234 ++++++++++++++++++ .../OpenTelemetry.Contrib.Shared.Tests.csproj | 8 + .../SqlProcessorTestCases.cs | 46 ++++ .../SqlProcessorTestCases.json | 128 ++++++++++ .../SqlProcessorTests.cs | 19 ++ 6 files changed, 436 insertions(+) create mode 100644 src/Shared/SqlProcessor.cs create mode 100644 test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.cs create mode 100644 test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.json create mode 100644 test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTests.cs diff --git a/opentelemetry-dotnet-contrib.sln b/opentelemetry-dotnet-contrib.sln index 6f378a2ab0..936e6672f8 100644 --- a/opentelemetry-dotnet-contrib.sln +++ b/opentelemetry-dotnet-contrib.sln @@ -261,6 +261,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Shared", "Shared", "{1FCC8E src\Shared\ServerCertificateValidationProvider.cs = src\Shared\ServerCertificateValidationProvider.cs src\Shared\SpanAttributeConstants.cs = src\Shared\SpanAttributeConstants.cs src\Shared\SpanHelper.cs = src\Shared\SpanHelper.cs + src\Shared\SqlProcessor.cs = src\Shared\SqlProcessor.cs src\Shared\UriHelper.cs = src\Shared\UriHelper.cs EndProjectSection EndProject diff --git a/src/Shared/SqlProcessor.cs b/src/Shared/SqlProcessor.cs new file mode 100644 index 0000000000..2625a062b7 --- /dev/null +++ b/src/Shared/SqlProcessor.cs @@ -0,0 +1,234 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +using System.Text; + +namespace OpenTelemetry.Instrumentation; + +public static class SqlProcessor +{ + public static string GetSanitizedSql(string sql) + { + if (sql == null) + { + return string.Empty; + } + + var sb = new StringBuilder(capacity: sql.Length); + for (var i = 0; i < sql.Length; ++i) + { + if (SkipComment(sql, ref i)) + { + continue; + } + + if (SanitizeStringLiteral(sql, ref i) || + SanitizeHexLiteral(sql, ref i) || + SanitizeNumericLiteral(sql, ref i)) + { + sb.Append('?'); + continue; + } + + WriteToken(sql, ref i, sb); + } + + return sb.ToString(); + } + + private static bool SkipComment(string sql, ref int index) + { + var i = index; + var ch = sql[i]; + var length = sql.Length; + + // Scan past multi-line comment + if (ch == '/' && i + 1 < length && sql[i + 1] == '*') + { + for (i += 2; i < length; ++i) + { + ch = sql[i]; + if (ch == '*' && i + 1 < length && sql[i + 1] == '/') + { + i += 1; + break; + } + } + + index = i; + return true; + } + + // Scan past single-line comment + if (ch == '-' && i + 1 < length && sql[i + 1] == '-') + { + for (i += 2; i < length; ++i) + { + ch = sql[i]; + if (ch == '\r' || ch == '\n') + { + i -= 1; + break; + } + } + + index = i; + return true; + } + + return false; + } + + private static bool SanitizeStringLiteral(string sql, ref int index) + { + var ch = sql[index]; + if (ch == '\'') + { + var i = index + 1; + var length = sql.Length; + for (; i < length; ++i) + { + ch = sql[i]; + if (ch == '\'' && i + 1 < length && sql[i + 1] == '\'') + { + ++i; + continue; + } + + if (ch == '\'') + { + break; + } + } + + index = i; + return true; + } + + return false; + } + + private static bool SanitizeHexLiteral(string sql, ref int index) + { + var i = index; + var ch = sql[i]; + var length = sql.Length; + + if (ch == '0' && i + 1 < length && (sql[i + 1] == 'x' || sql[i + 1] == 'X')) + { + for (i += 2; i < length; ++i) + { + ch = sql[i]; + if (char.IsDigit(ch) || + ch == 'A' || ch == 'a' || + ch == 'B' || ch == 'b' || + ch == 'C' || ch == 'c' || + ch == 'D' || ch == 'd' || + ch == 'E' || ch == 'e' || + ch == 'F' || ch == 'f') + { + continue; + } + + i -= 1; + break; + } + + index = i; + return true; + } + + return false; + } + + private static bool SanitizeNumericLiteral(string sql, ref int index) + { + var i = index; + var ch = sql[i]; + var length = sql.Length; + + // Scan past leading sign + if ((ch == '-' || ch == '+') && i + 1 < length && (char.IsDigit(sql[i + 1]) || sql[i + 1] == '.')) + { + i += 1; + ch = sql[i]; + } + + // Scan past leading decimal point + var periodMatched = false; + if (ch == '.' && i + 1 < length && char.IsDigit(sql[i + 1])) + { + periodMatched = true; + i += 1; + ch = sql[i]; + } + + if (char.IsDigit(ch)) + { + var exponentMatched = false; + for (i += 1; i < length; ++i) + { + ch = sql[i]; + if (char.IsDigit(ch)) + { + continue; + } + + if (!periodMatched && ch == '.') + { + periodMatched = true; + continue; + } + + if (!exponentMatched && (ch == 'e' || ch == 'E')) + { + // Scan past sign in exponent + if (i + 1 < length && (sql[i + 1] == '-' || sql[i + 1] == '+')) + { + i += 1; + } + + exponentMatched = true; + continue; + } + + i -= 1; + break; + } + + index = i; + return true; + } + + return false; + } + + private static void WriteToken(string sql, ref int index, StringBuilder sb) + { + var i = index; + var ch = sql[i]; + + if (char.IsLetter(ch) || ch == '_') + { + for (; i < sql.Length; i++) + { + ch = sql[i]; + if (char.IsLetter(ch) || ch == '_' || char.IsDigit(ch)) + { + sb.Append(ch); + continue; + } + + break; + } + + i -= 1; + } + else + { + sb.Append(ch); + } + + index = i; + } +} diff --git a/test/OpenTelemetry.Contrib.Shared.Tests/OpenTelemetry.Contrib.Shared.Tests.csproj b/test/OpenTelemetry.Contrib.Shared.Tests/OpenTelemetry.Contrib.Shared.Tests.csproj index 4762d556b8..e53431c07b 100644 --- a/test/OpenTelemetry.Contrib.Shared.Tests/OpenTelemetry.Contrib.Shared.Tests.csproj +++ b/test/OpenTelemetry.Contrib.Shared.Tests/OpenTelemetry.Contrib.Shared.Tests.csproj @@ -9,6 +9,7 @@ + @@ -23,6 +24,13 @@ + + + + + + SqlProcessorTestCases.json + diff --git a/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.cs b/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.cs new file mode 100644 index 0000000000..f739683fdc --- /dev/null +++ b/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.cs @@ -0,0 +1,46 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +using System.Reflection; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace OpenTelemetry.Instrumentation.Tests; + +public static class SqlProcessorTestCases +{ + private static readonly JsonSerializerOptions JsonSerializerOptions = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + Converters = { new JsonStringEnumConverter() }, + }; + + public static IEnumerable GetTestCases() + { + var assembly = Assembly.GetExecutingAssembly(); + var input = JsonSerializer.Deserialize( + assembly.GetManifestResourceStream("SqlProcessorTestCases.json")!, + JsonSerializerOptions)!; + + foreach (var testCase in input) + { + yield return new object[] { testCase }; + } + } + + public class TestCase + { + public string Name { get; set; } = string.Empty; + + public string Sql { get; set; } = string.Empty; + + public string Sanitized { get; set; } = string.Empty; + + public IEnumerable Dialects { get; set; } = []; + + public override string ToString() + { + return this.Name; + } + } +} diff --git a/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.json b/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.json new file mode 100644 index 0000000000..4971427b94 --- /dev/null +++ b/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.json @@ -0,0 +1,128 @@ +[ + { + "name": "numeric_literal_integers", + "sql": "SELECT 12, -12, +12", + "sanitized": "SELECT ?, ?, ?", + "dialects": [ + "mssql" + ] + }, + { + "name": "numeric_literal_with_decimal_point", + "sql": "SELECT 12.34, -12.34, +12.34, .01, -.01", + "sanitized": "SELECT ?, ?, ?, ?, ?", + "dialects": [ + "mssql" + ] + }, + { + "name": "numeric_literal_exponential", + "sql": "SELECT 12.34e56, -12.34e56, +12.34e56", + "sanitized": "SELECT ?, ?, ?", + "dialects": [ + "mssql" + ] + }, + { + "name": "numeric_literal_negative_exponential", + "sql": "SELECT 12.34e-56, -12.34e-56, +12.34e-56", + "sanitized": "SELECT ?, ?, ?", + "dialects": [ + "mssql" + ] + }, + { + "name": "hex_literal", + "sql": "SELECT 0xDEADBEEF, 0XdeadBEEF", + "sanitized": "SELECT ?, ?", + "dialects": [ + "mssql" + ] + }, + { + "name": "string_literal", + "sql": "SELECT 'hello'", + "sanitized": "SELECT ?", + "dialects": [ + "mssql" + ] + }, + { + "name": "string_literal_escaped_single_quote", + "sql": "SELECT 'My name''s not important'", + "sanitized": "SELECT ?", + "dialects": [ + "mssql" + ] + }, + { + "name": "string_with_embedded_newline", + "sql": "SELECT 'My name is \n not important'", + "sanitized": "SELECT ?", + "dialects": [ + "mssql" + ] + }, + { + "name": "numbers_in_identifiers", + "sql": "SELECT c3po, r2d2 FROM covid19 WHERE n1h1=1234", + "sanitized": "SELECT c3po, r2d2 FROM covid19 WHERE n1h1=?", + "dialects": [ + "mssql" + ] + }, + { + "name": "periods_in_identifiers", + "sql": "SELECT a FROM dbo.Table JOIN dbo.AnotherTable", + "sanitized": "SELECT a FROM dbo.Table JOIN dbo.AnotherTable", + "dialects": [ + "mssql" + ] + }, + { + "name": "insert_into", + "sql": "INSERT INTO X VALUES(1, 23456, 123.456, 99+100)", + "sanitized": "INSERT INTO X VALUES(?, ?, ?, ??)", + "dialects": [ + "mssql" + ], + "comments": [ + "The following may also be acceptable but would require", + "recognizing expressions", + "INSERT INTO X VALUES(?, ?, ?, ?+?)" + ] + }, + { + "name": "uuid", + "sql": "SELECT { guid '01234567-89ab-cdef-0123-456789abcdef' }", + "sanitized": "SELECT { guid ? }", + "dialects": [ + "mssql" + ], + "comments": [ + "The following may be preferable", + "SELECT ?" + ] + }, + { + "name": "in_clause", + "sql": "SELECT * FROM table WHERE value IN (123, 456, 'abc')", + "sanitized": "SELECT * FROM table WHERE value IN (?, ?, ?)", + "dialects": [ + "mssql" + ], + "comments": [ + "The following is allowed by the spec", + "but not required", + "SELECT * FROM table WHERE value IN (?)" + ] + }, + { + "name": "comments", + "sql": "SELECT column -- end of line comment\nFROM /* block \n comment */ table", + "sanitized": "SELECT column \nFROM table", + "dialects": [ + "mssql" + ] + } +] diff --git a/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTests.cs b/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTests.cs new file mode 100644 index 0000000000..e6d1a4640d --- /dev/null +++ b/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTests.cs @@ -0,0 +1,19 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +using Xunit; + +namespace OpenTelemetry.Instrumentation.Tests; + +public class SqlProcessorTests +{ + public static IEnumerable TestData => SqlProcessorTestCases.GetTestCases(); + + [Theory] + [MemberData(nameof(TestData))] + public void TestGetSanitizedSql(SqlProcessorTestCases.TestCase test) + { + var sanitized = SqlProcessor.GetSanitizedSql(test.Sql); + Assert.Equal(test.Sanitized, sanitized); + } +}