From a7d5aeb52349f5f7c0504898a748c6edcdfb3afb Mon Sep 17 00:00:00 2001
From: Alan West <3676547+alanwest@users.noreply.github.com>
Date: Mon, 18 Nov 2024 12:38:41 -0800
Subject: [PATCH] [shared] Implement SQL sanitization for MSSQL (#2330)
---
opentelemetry-dotnet-contrib.sln | 1 +
src/Shared/SqlProcessor.cs | 234 ++++++++++++++++++
.../OpenTelemetry.Contrib.Shared.Tests.csproj | 8 +
.../SqlProcessorTestCases.cs | 46 ++++
.../SqlProcessorTestCases.json | 128 ++++++++++
.../SqlProcessorTests.cs | 19 ++
6 files changed, 436 insertions(+)
create mode 100644 src/Shared/SqlProcessor.cs
create mode 100644 test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.cs
create mode 100644 test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.json
create mode 100644 test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTests.cs
diff --git a/opentelemetry-dotnet-contrib.sln b/opentelemetry-dotnet-contrib.sln
index 6f378a2ab0..936e6672f8 100644
--- a/opentelemetry-dotnet-contrib.sln
+++ b/opentelemetry-dotnet-contrib.sln
@@ -261,6 +261,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Shared", "Shared", "{1FCC8E
src\Shared\ServerCertificateValidationProvider.cs = src\Shared\ServerCertificateValidationProvider.cs
src\Shared\SpanAttributeConstants.cs = src\Shared\SpanAttributeConstants.cs
src\Shared\SpanHelper.cs = src\Shared\SpanHelper.cs
+ src\Shared\SqlProcessor.cs = src\Shared\SqlProcessor.cs
src\Shared\UriHelper.cs = src\Shared\UriHelper.cs
EndProjectSection
EndProject
diff --git a/src/Shared/SqlProcessor.cs b/src/Shared/SqlProcessor.cs
new file mode 100644
index 0000000000..2625a062b7
--- /dev/null
+++ b/src/Shared/SqlProcessor.cs
@@ -0,0 +1,234 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+using System.Text;
+
+namespace OpenTelemetry.Instrumentation;
+
+public static class SqlProcessor
+{
+ public static string GetSanitizedSql(string sql)
+ {
+ if (sql == null)
+ {
+ return string.Empty;
+ }
+
+ var sb = new StringBuilder(capacity: sql.Length);
+ for (var i = 0; i < sql.Length; ++i)
+ {
+ if (SkipComment(sql, ref i))
+ {
+ continue;
+ }
+
+ if (SanitizeStringLiteral(sql, ref i) ||
+ SanitizeHexLiteral(sql, ref i) ||
+ SanitizeNumericLiteral(sql, ref i))
+ {
+ sb.Append('?');
+ continue;
+ }
+
+ WriteToken(sql, ref i, sb);
+ }
+
+ return sb.ToString();
+ }
+
+ private static bool SkipComment(string sql, ref int index)
+ {
+ var i = index;
+ var ch = sql[i];
+ var length = sql.Length;
+
+ // Scan past multi-line comment
+ if (ch == '/' && i + 1 < length && sql[i + 1] == '*')
+ {
+ for (i += 2; i < length; ++i)
+ {
+ ch = sql[i];
+ if (ch == '*' && i + 1 < length && sql[i + 1] == '/')
+ {
+ i += 1;
+ break;
+ }
+ }
+
+ index = i;
+ return true;
+ }
+
+ // Scan past single-line comment
+ if (ch == '-' && i + 1 < length && sql[i + 1] == '-')
+ {
+ for (i += 2; i < length; ++i)
+ {
+ ch = sql[i];
+ if (ch == '\r' || ch == '\n')
+ {
+ i -= 1;
+ break;
+ }
+ }
+
+ index = i;
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool SanitizeStringLiteral(string sql, ref int index)
+ {
+ var ch = sql[index];
+ if (ch == '\'')
+ {
+ var i = index + 1;
+ var length = sql.Length;
+ for (; i < length; ++i)
+ {
+ ch = sql[i];
+ if (ch == '\'' && i + 1 < length && sql[i + 1] == '\'')
+ {
+ ++i;
+ continue;
+ }
+
+ if (ch == '\'')
+ {
+ break;
+ }
+ }
+
+ index = i;
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool SanitizeHexLiteral(string sql, ref int index)
+ {
+ var i = index;
+ var ch = sql[i];
+ var length = sql.Length;
+
+ if (ch == '0' && i + 1 < length && (sql[i + 1] == 'x' || sql[i + 1] == 'X'))
+ {
+ for (i += 2; i < length; ++i)
+ {
+ ch = sql[i];
+ if (char.IsDigit(ch) ||
+ ch == 'A' || ch == 'a' ||
+ ch == 'B' || ch == 'b' ||
+ ch == 'C' || ch == 'c' ||
+ ch == 'D' || ch == 'd' ||
+ ch == 'E' || ch == 'e' ||
+ ch == 'F' || ch == 'f')
+ {
+ continue;
+ }
+
+ i -= 1;
+ break;
+ }
+
+ index = i;
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool SanitizeNumericLiteral(string sql, ref int index)
+ {
+ var i = index;
+ var ch = sql[i];
+ var length = sql.Length;
+
+ // Scan past leading sign
+ if ((ch == '-' || ch == '+') && i + 1 < length && (char.IsDigit(sql[i + 1]) || sql[i + 1] == '.'))
+ {
+ i += 1;
+ ch = sql[i];
+ }
+
+ // Scan past leading decimal point
+ var periodMatched = false;
+ if (ch == '.' && i + 1 < length && char.IsDigit(sql[i + 1]))
+ {
+ periodMatched = true;
+ i += 1;
+ ch = sql[i];
+ }
+
+ if (char.IsDigit(ch))
+ {
+ var exponentMatched = false;
+ for (i += 1; i < length; ++i)
+ {
+ ch = sql[i];
+ if (char.IsDigit(ch))
+ {
+ continue;
+ }
+
+ if (!periodMatched && ch == '.')
+ {
+ periodMatched = true;
+ continue;
+ }
+
+ if (!exponentMatched && (ch == 'e' || ch == 'E'))
+ {
+ // Scan past sign in exponent
+ if (i + 1 < length && (sql[i + 1] == '-' || sql[i + 1] == '+'))
+ {
+ i += 1;
+ }
+
+ exponentMatched = true;
+ continue;
+ }
+
+ i -= 1;
+ break;
+ }
+
+ index = i;
+ return true;
+ }
+
+ return false;
+ }
+
+ private static void WriteToken(string sql, ref int index, StringBuilder sb)
+ {
+ var i = index;
+ var ch = sql[i];
+
+ if (char.IsLetter(ch) || ch == '_')
+ {
+ for (; i < sql.Length; i++)
+ {
+ ch = sql[i];
+ if (char.IsLetter(ch) || ch == '_' || char.IsDigit(ch))
+ {
+ sb.Append(ch);
+ continue;
+ }
+
+ break;
+ }
+
+ i -= 1;
+ }
+ else
+ {
+ sb.Append(ch);
+ }
+
+ index = i;
+ }
+}
diff --git a/test/OpenTelemetry.Contrib.Shared.Tests/OpenTelemetry.Contrib.Shared.Tests.csproj b/test/OpenTelemetry.Contrib.Shared.Tests/OpenTelemetry.Contrib.Shared.Tests.csproj
index 4762d556b8..e53431c07b 100644
--- a/test/OpenTelemetry.Contrib.Shared.Tests/OpenTelemetry.Contrib.Shared.Tests.csproj
+++ b/test/OpenTelemetry.Contrib.Shared.Tests/OpenTelemetry.Contrib.Shared.Tests.csproj
@@ -9,6 +9,7 @@
+
@@ -23,6 +24,13 @@
+
+
+
+
+
+ SqlProcessorTestCases.json
+
diff --git a/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.cs b/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.cs
new file mode 100644
index 0000000000..f739683fdc
--- /dev/null
+++ b/test/OpenTelemetry.Contrib.Shared.Tests/SqlProcessorTestCases.cs
@@ -0,0 +1,46 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+using System.Reflection;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace OpenTelemetry.Instrumentation.Tests;
+
+public static class SqlProcessorTestCases
+{
+ private static readonly JsonSerializerOptions JsonSerializerOptions = new()
+ {
+ PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
+ Converters = { new JsonStringEnumConverter() },
+ };
+
+ public static IEnumerable