From 9d893c26dc7ddb422630fb7cb103ae4f0e73fdad Mon Sep 17 00:00:00 2001
From: Gabe Stocco <98900+gfs@users.noreply.github.com>
Date: Wed, 8 Nov 2023 11:28:09 -0800
Subject: [PATCH 1/3] Fixes an issue with setting correct index values when
matching a property of an xml tag with an xpath
---
AppInspector.RulesEngine/TextContainer.cs | 53 ++++++---
.../RuleProcessor/XmlAndJsonTests.cs | 101 +++++++++++++++++-
2 files changed, 135 insertions(+), 19 deletions(-)
diff --git a/AppInspector.RulesEngine/TextContainer.cs b/AppInspector.RulesEngine/TextContainer.cs
index 1b631e68..8918d6ae 100644
--- a/AppInspector.RulesEngine/TextContainer.cs
+++ b/AppInspector.RulesEngine/TextContainer.cs
@@ -221,26 +221,47 @@ public TextContainer(string content, string language, Languages languages, ILogg
continue;
}
- // First we find the name
+ // First we find the name, absolute position index
var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name, StringComparison.Ordinal) + minIndex;
// Then we grab the index of the end of this tag.
// We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
- var endTagIndex = FullContent[nameIndex..].IndexOf('>');
- // We also look for self-closing tag
- var selfClosedTag = FullContent[endTagIndex-1] == '/';
- // If the tag is self closing innerxml will be empty string, so the finding is located at the end of the tag and is empty string
- // Otherwise the finding is the content of the xml tag
- var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
- // Move the minimum index up in case there are multiple instances of identical OuterXML
- // This ensures we won't re-find the same one
- var totalOffset = minIndex + nameIndex + endTagIndex;
- minIndex = totalOffset;
- var location = new Boundary
+ // Position relative to nameIndex
+ var endTagIndex = FullContent[nameIndex..].IndexOf('>') + nameIndex;
+ // If we are matching a tag itself, the previous char should be the open tag
+ // If its a property it won't be
+ var isProp = FullContent[(nameIndex - 1)] != '<';
+ // Check for self-closing tag
+ var selfClosedTag = FullContent[endTagIndex - 1] == '/';
+
+ // This is for when we're capturing the value of a property of the tag rather than the tag itself
+ if (isProp)
{
- Index = offset,
- Length = nodeIter.Current.InnerXml.Length
- };
- yield return (nodeIter.Current.Value, location);
+ // Move the offset to the end of the opening tag
+ var nextClosingIndexAfterName = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal)+ nameIndex+1;
+ var offset = selfClosedTag ? endTagIndex : FullContent[nextClosingIndexAfterName..].IndexOf('>') + nextClosingIndexAfterName + 1;
+ // Move the minimum index up to the end of the closing tag
+ minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
+ var location = new Boundary
+ {
+ // +2 for the \" before the value for the property
+ Index = nameIndex + nodeIter.Current.Name.Length + 2,
+ Length = nodeIter.Current.InnerXml.Length
+ };
+ yield return (nodeIter.Current.Value, location);
+ }
+ else
+ {
+ // Move the offset to the end of the opening tag
+ var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
+ // Move the minimum index up to the end of the closing tag
+ minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
+ var location = new Boundary
+ {
+ Index = offset,
+ Length = nodeIter.Current.InnerXml.Length
+ };
+ yield return (nodeIter.Current.Value, location);
+ }
}
}
diff --git a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
index d942cf04..045e1431 100644
--- a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
+++ b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
@@ -83,6 +83,82 @@ public class XmlAndJsonTests
}
]";
+ private const string xmlStringRuleForPropWithData = @"[
+ {
+ ""id"": ""SA000005"",
+ ""name"": ""Testing.Rules.XML"",
+ ""tags"": [
+ ""Testing.Rules.XML""
+ ],
+ ""severity"": ""Critical"",
+ ""description"": ""This rule checks the value of the property property to be true"",
+ ""patterns"": [
+ {
+ ""pattern"": ""true"",
+ ""type"": ""string"",
+ ""confidence"": ""High"",
+ ""scopes"": [
+ ""code""
+ ],
+ ""xpaths"" : [""/bookstore/book/title/@*[name()='property']""]
+ }
+ ],
+ ""_comment"": """"
+ }
+]";
+
+ private const string xmlStringRuleForPropWithDataForData = @"[
+ {
+ ""id"": ""SA000005"",
+ ""name"": ""Testing.Rules.XML"",
+ ""tags"": [
+ ""Testing.Rules.XML""
+ ],
+ ""severity"": ""Critical"",
+ ""description"": ""This rule checks the value of the title tag when it has a property"",
+ ""patterns"": [
+ {
+ ""pattern"": ""Franklin"",
+ ""type"": ""regex"",
+ ""confidence"": ""High"",
+ ""scopes"": [
+ ""code""
+ ],
+ ""xpaths"" : [""/bookstore/book/title""]
+ }
+ ],
+ ""_comment"": """"
+ }
+]";
+
+ private const string xmlDataPropsWithTagValue =
+ @"
+
+
+ The Autobiography of Benjamin Franklin
+
+ Benjamin
+ Franklin
+
+ 8.99
+
+
+ The Confidence Man
+
+ Herman
+ Melville
+
+ 11.99
+
+
+ The Gorgias
+
+ Plato
+
+ 9.99
+
+ ";
+
private const string jsonData =
@"{
""books"":
@@ -228,14 +304,14 @@ public void XmlAttributeTest()
{
""xpaths"": [""system.web/trace/@enabled""],
""pattern"": ""true"",
- ""type"": ""regex""
+ ""type"": ""string""
}
],
""must-match"": [
- ""\n\n""
+ ""\n\n""
],
""must-not-match"": [
- ""\n\n""
+ ""\n\n""
]
}]";
RuleSet rules = new();
@@ -268,6 +344,25 @@ public void JsonStringRule(string rule)
Assert.Fail();
}
}
+ [DataRow(xmlStringRuleForPropWithDataForData)]
+ [DataRow(xmlStringRuleForPropWithData)]
+ [DataTestMethod]
+ public void XmlTagWithPropsAndValue(string rule)
+ {
+ RuleSet rules = new();
+ rules.AddString(rule, "XmlTestRules");
+ Microsoft.ApplicationInspector.RulesEngine.RuleProcessor processor = new(rules,
+ new RuleProcessorOptions { AllowAllTagsInBuildFiles = true });
+ if (_languages.FromFileNameOut("test.xml", out var info))
+ {
+ var matches = processor.AnalyzeFile(xmlDataPropsWithTagValue, new FileEntry("test.xml", new MemoryStream()), info);
+ Assert.AreEqual(1, matches.Count);
+ }
+ else
+ {
+ Assert.Fail();
+ }
+ }
[DataRow(xmlStringRule)]
[DataRow(jsonAndXmlStringRule)]
From 972296d16be61260d3c3f59dbbe9c45a94fd1447 Mon Sep 17 00:00:00 2001
From: Gabe Stocco <98900+gfs@users.noreply.github.com>
Date: Thu, 9 Nov 2023 09:43:21 -0800
Subject: [PATCH 2/3] Update comments
---
AppInspector.RulesEngine/TextContainer.cs | 31 +++++++++++++++--------
1 file changed, 21 insertions(+), 10 deletions(-)
diff --git a/AppInspector.RulesEngine/TextContainer.cs b/AppInspector.RulesEngine/TextContainer.cs
index 8918d6ae..a93a3612 100644
--- a/AppInspector.RulesEngine/TextContainer.cs
+++ b/AppInspector.RulesEngine/TextContainer.cs
@@ -172,11 +172,13 @@ public TextContainer(string content, string language, Languages languages, ILogg
}
///
- /// If this file is a JSON, XML or YML file, returns the string contents of the specified path.
+ /// If this file is XML, attempts to return the the string contents of the specified XPath applied to the file.
/// If the path does not exist, or the file is not JSON, XML or YML returns null.
+ /// Method contains some heuristic behavior and may not cover all cases.
+ /// Please report any issues with a sample XML and XPATH to reproduce.
///
- ///
- ///
+ /// XPath to query document with
+ /// Enumeration of string and Boundary tuples for the XPath matches. Boundary locations refer to the locations in the original document on disk.
internal IEnumerable<(string, Boundary)> GetStringFromXPath(string Path, Dictionary xpathNameSpaces)
{
lock (_xpathLock)
@@ -221,14 +223,22 @@ public TextContainer(string content, string language, Languages languages, ILogg
continue;
}
+ // We have to heuristically calculate the original indexes of the locations in the original document because the internal representation differs
+ // For example it will convert to
+
// First we find the name, absolute position index
var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name, StringComparison.Ordinal) + minIndex;
- // Then we grab the index of the end of this tag.
- // We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
- // Position relative to nameIndex
- var endTagIndex = FullContent[nameIndex..].IndexOf('>') + nameIndex;
+ // Then we calculate the absolute index of the end of the tag.
+ // We can't use OuterXML property because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
+ var endTagIndex = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal) + nameIndex;
// If we are matching a tag itself, the previous char should be the open tag
+ // |
+ // v
+ //
// If its a property it won't be
+ // |
+ // v
+ //
var isProp = FullContent[(nameIndex - 1)] != '<';
// Check for self-closing tag
var selfClosedTag = FullContent[endTagIndex - 1] == '/';
@@ -236,10 +246,11 @@ public TextContainer(string content, string language, Languages languages, ILogg
// This is for when we're capturing the value of a property of the tag rather than the tag itself
if (isProp)
{
- // Move the offset to the end of the opening tag
- var nextClosingIndexAfterName = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal)+ nameIndex+1;
+ // Find the index of character after the next end tag index after the name
+ var nextClosingIndexAfterName = endTagIndex+1;
+ // If we have a self closing tag, we can use that index, otherwise we need the closure of this tag
var offset = selfClosedTag ? endTagIndex : FullContent[nextClosingIndexAfterName..].IndexOf('>') + nextClosingIndexAfterName + 1;
- // Move the minimum index up to the end of the closing tag
+ // Move the minimum index up to the end of the closing tag to avoid additioanl matches of the same values
minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
var location = new Boundary
{
From 40ce3bcd40ce22b031f71c25529a6c416b9f757b Mon Sep 17 00:00:00 2001
From: Gabe Stocco <98900+gfs@users.noreply.github.com>
Date: Thu, 9 Nov 2023 09:48:03 -0800
Subject: [PATCH 3/3] Improve robustness of new test cases.
Also check value and index location of matches.
---
AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
index 045e1431..29725d98 100644
--- a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
+++ b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
@@ -344,10 +344,10 @@ public void JsonStringRule(string rule)
Assert.Fail();
}
}
- [DataRow(xmlStringRuleForPropWithDataForData)]
- [DataRow(xmlStringRuleForPropWithData)]
+ [DataRow(xmlStringRuleForPropWithDataForData, "Franklin", 212)]
+ [DataRow(xmlStringRuleForPropWithData, "true", 176)]
[DataTestMethod]
- public void XmlTagWithPropsAndValue(string rule)
+ public void XmlTagWithPropsAndValue(string rule, string expectedValue, int expectedIndex)
{
RuleSet rules = new();
rules.AddString(rule, "XmlTestRules");
@@ -357,6 +357,9 @@ public void XmlTagWithPropsAndValue(string rule)
{
var matches = processor.AnalyzeFile(xmlDataPropsWithTagValue, new FileEntry("test.xml", new MemoryStream()), info);
Assert.AreEqual(1, matches.Count);
+ var match = matches[0];
+ Assert.AreEqual(expectedValue, match.Sample);
+ Assert.AreEqual(expectedIndex, match.Boundary.Index);
}
else
{