From 9d893c26dc7ddb422630fb7cb103ae4f0e73fdad Mon Sep 17 00:00:00 2001
From: Gabe Stocco <98900+gfs@users.noreply.github.com>
Date: Wed, 8 Nov 2023 11:28:09 -0800
Subject: [PATCH 1/3] Fixes an issue with setting correct index values when
 matching a property of an xml tag with an xpath

---
 AppInspector.RulesEngine/TextContainer.cs     |  53 ++++++---
 .../RuleProcessor/XmlAndJsonTests.cs          | 101 +++++++++++++++++-
 2 files changed, 135 insertions(+), 19 deletions(-)

diff --git a/AppInspector.RulesEngine/TextContainer.cs b/AppInspector.RulesEngine/TextContainer.cs
index 1b631e68..8918d6ae 100644
--- a/AppInspector.RulesEngine/TextContainer.cs
+++ b/AppInspector.RulesEngine/TextContainer.cs
@@ -221,26 +221,47 @@ public TextContainer(string content, string language, Languages languages, ILogg
                 continue;
             }
 
-            // First we find the name
+            // First we find the name, absolute position index
             var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name, StringComparison.Ordinal) + minIndex;
             // Then we grab the index of the end of this tag.
             // We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
-            var endTagIndex = FullContent[nameIndex..].IndexOf('>');
-            // We also look for self-closing tag
-            var selfClosedTag = FullContent[endTagIndex-1] == '/';
-            // If the tag is self closing innerxml will be empty string, so the finding is located at the end of the tag and is empty string
-            // Otherwise the finding is the content of the xml tag
-            var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
-            // Move the minimum index up in case there are multiple instances of identical OuterXML
-            // This ensures we won't re-find the same one
-            var totalOffset = minIndex + nameIndex + endTagIndex;
-            minIndex = totalOffset;
-            var location = new Boundary
+            // Position relative to nameIndex
+            var endTagIndex = FullContent[nameIndex..].IndexOf('>') + nameIndex;
+            // If we are matching a tag itself, the previous char should be the open tag
+            // If its a property it won't be
+            var isProp = FullContent[(nameIndex - 1)] != '<';
+            // Check for self-closing tag
+            var selfClosedTag = FullContent[endTagIndex - 1] == '/';
+
+            // This is for when we're capturing the value of a property of the tag rather than the tag itself
+            if (isProp)
             {
-                Index = offset,
-                Length = nodeIter.Current.InnerXml.Length
-            };
-            yield return (nodeIter.Current.Value, location);
+                // Move the offset to the end of the opening tag
+                var nextClosingIndexAfterName = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal)+ nameIndex+1;
+                var offset = selfClosedTag ? endTagIndex : FullContent[nextClosingIndexAfterName..].IndexOf('>') + nextClosingIndexAfterName + 1;
+                // Move the minimum index up to the end of the closing tag
+                minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
+                var location = new Boundary
+                {
+                    // +2 for the \" before the value for the property
+                    Index = nameIndex + nodeIter.Current.Name.Length + 2,
+                    Length = nodeIter.Current.InnerXml.Length
+                };
+                yield return (nodeIter.Current.Value, location);
+            }
+            else
+            {
+                // Move the offset to the end of the opening tag
+                var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
+                // Move the minimum index up to the end of the closing tag
+                minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
+                var location = new Boundary
+                {
+                    Index = offset,
+                    Length = nodeIter.Current.InnerXml.Length
+                };
+                yield return (nodeIter.Current.Value, location);
+            }
         }
     }
 
diff --git a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
index d942cf04..045e1431 100644
--- a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
+++ b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
@@ -83,6 +83,82 @@ public class XmlAndJsonTests
     }
 ]";
 
+    private const string xmlStringRuleForPropWithData = @"[
+    {
+        ""id"": ""SA000005"",
+        ""name"": ""Testing.Rules.XML"",
+        ""tags"": [
+            ""Testing.Rules.XML""
+        ],
+        ""severity"": ""Critical"",
+        ""description"": ""This rule checks the value of the property property to be true"",
+        ""patterns"": [
+            {
+                ""pattern"": ""true"",
+                ""type"": ""string"",
+                ""confidence"": ""High"",
+                ""scopes"": [
+                    ""code""
+                ],
+                ""xpaths"" : [""/bookstore/book/title/@*[name()='property']""]
+            }
+        ],
+        ""_comment"": """"
+    }
+]";
+
+    private const string xmlStringRuleForPropWithDataForData = @"[
+    {
+        ""id"": ""SA000005"",
+        ""name"": ""Testing.Rules.XML"",
+        ""tags"": [
+            ""Testing.Rules.XML""
+        ],
+        ""severity"": ""Critical"",
+        ""description"": ""This rule checks the value of the title tag when it has a property"",
+        ""patterns"": [
+            {
+                ""pattern"": ""Franklin"",
+                ""type"": ""regex"",
+                ""confidence"": ""High"",
+                ""scopes"": [
+                    ""code""
+                ],
+                ""xpaths"" : [""/bookstore/book/title""]
+            }
+        ],
+        ""_comment"": """"
+    }
+]";
+
+    private const string xmlDataPropsWithTagValue =
+        @"<?xml version=""1.0"" encoding=""utf-8"" ?>   
+  <bookstore>  
+      <book genre=""autobiography"" publicationdate=""1981-03-22"" ISBN=""1-861003-11-0"">  
+          <title property=""true"">The Autobiography of Benjamin Franklin</title>  
+          <author>  
+              <first-name>Benjamin</first-name>  
+              <last-name>Franklin</last-name>  
+          </author>  
+          <price>8.99</price>  
+      </book>  
+      <book genre=""novel"" publicationdate=""1967-11-17"" ISBN=""0-201-63361-2"">  
+          <title property=""false"">The Confidence Man</title>  
+          <author>  
+              <first-name>Herman</first-name>  
+              <last-name>Melville</last-name>  
+          </author>  
+          <price>11.99</price>  
+      </book>  
+      <book genre=""philosophy"" publicationdate=""1991-02-15"" ISBN=""1-861001-57-6"">  
+          <title property=""false"">The Gorgias</title>  
+          <author>  
+              <name>Plato</name>  
+          </author>  
+          <price>9.99</price>  
+      </book>  
+  </bookstore>";
+
     private const string jsonData =
         @"{
     ""books"":
@@ -228,14 +304,14 @@ public void XmlAttributeTest()
             {
                 ""xpaths"": [""system.web/trace/@enabled""],
                 ""pattern"": ""true"",
-                ""type"": ""regex""
+                ""type"": ""string""
             }
         ],
         ""must-match"": [
-            ""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='false' />\n</system.web>""
+            ""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
         ],
         ""must-not-match"": [
-            ""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
+            ""<system.web>\n<trace enabled='false' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
         ]
     }]";
         RuleSet rules = new();
@@ -268,6 +344,25 @@ public void JsonStringRule(string rule)
             Assert.Fail();
         }
     }
+    [DataRow(xmlStringRuleForPropWithDataForData)]
+    [DataRow(xmlStringRuleForPropWithData)]
+    [DataTestMethod]
+    public void XmlTagWithPropsAndValue(string rule)
+    {
+        RuleSet rules = new();
+        rules.AddString(rule, "XmlTestRules");
+        Microsoft.ApplicationInspector.RulesEngine.RuleProcessor processor = new(rules,
+            new RuleProcessorOptions { AllowAllTagsInBuildFiles = true });
+        if (_languages.FromFileNameOut("test.xml", out var info))
+        {
+            var matches = processor.AnalyzeFile(xmlDataPropsWithTagValue, new FileEntry("test.xml", new MemoryStream()), info);
+            Assert.AreEqual(1, matches.Count);
+        }
+        else
+        {
+            Assert.Fail();
+        }
+    }
 
     [DataRow(xmlStringRule)]
     [DataRow(jsonAndXmlStringRule)]

From 972296d16be61260d3c3f59dbbe9c45a94fd1447 Mon Sep 17 00:00:00 2001
From: Gabe Stocco <98900+gfs@users.noreply.github.com>
Date: Thu, 9 Nov 2023 09:43:21 -0800
Subject: [PATCH 2/3] Update comments

---
 AppInspector.RulesEngine/TextContainer.cs | 31 +++++++++++++++--------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/AppInspector.RulesEngine/TextContainer.cs b/AppInspector.RulesEngine/TextContainer.cs
index 8918d6ae..a93a3612 100644
--- a/AppInspector.RulesEngine/TextContainer.cs
+++ b/AppInspector.RulesEngine/TextContainer.cs
@@ -172,11 +172,13 @@ public TextContainer(string content, string language, Languages languages, ILogg
     }
 
     /// <summary>
-    ///     If this file is a JSON, XML or YML file, returns the string contents of the specified path.
+    ///     If this file is XML, attempts to return the the string contents of the specified XPath applied to the file.
     ///     If the path does not exist, or the file is not JSON, XML or YML returns null.
+    ///     Method contains some heuristic behavior and may not cover all cases. 
+    ///     Please report any issues with a sample XML and XPATH to reproduce.
     /// </summary>
-    /// <param name="Path"></param>
-    /// <returns></returns>
+    /// <param name="Path">XPath to query document with</param>
+    /// <returns>Enumeration of string and Boundary tuples for the XPath matches. Boundary locations refer to the locations in the original document on disk.</returns>
     internal IEnumerable<(string, Boundary)> GetStringFromXPath(string Path, Dictionary<string, string> xpathNameSpaces)
     {
         lock (_xpathLock)
@@ -221,14 +223,22 @@ public TextContainer(string content, string language, Languages languages, ILogg
                 continue;
             }
 
+            // We have to heuristically calculate the original indexes of the locations in the original document because the internal representation differs
+            // For example it will convert <Tag Prop="Val"/> to <Tag Prop=\"Val\"/>
+
             // First we find the name, absolute position index
             var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name, StringComparison.Ordinal) + minIndex;
-            // Then we grab the index of the end of this tag.
-            // We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
-            // Position relative to nameIndex
-            var endTagIndex = FullContent[nameIndex..].IndexOf('>') + nameIndex;
+            // Then we calculate the absolute index of the end of the tag.
+            // We can't use OuterXML property because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
+            var endTagIndex = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal) + nameIndex;
             // If we are matching a tag itself, the previous char should be the open tag
+            //  |
+            //  v
+            // <Tag>
             // If its a property it won't be
+            //      |
+            //      v
+            // <Tag Prop="Value">
             var isProp = FullContent[(nameIndex - 1)] != '<';
             // Check for self-closing tag
             var selfClosedTag = FullContent[endTagIndex - 1] == '/';
@@ -236,10 +246,11 @@ public TextContainer(string content, string language, Languages languages, ILogg
             // This is for when we're capturing the value of a property of the tag rather than the tag itself
             if (isProp)
             {
-                // Move the offset to the end of the opening tag
-                var nextClosingIndexAfterName = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal)+ nameIndex+1;
+                // Find the index of character after the next end tag index after the name
+                var nextClosingIndexAfterName = endTagIndex+1;
+                // If we have a self closing tag, we can use that index, otherwise we need the closure of this tag
                 var offset = selfClosedTag ? endTagIndex : FullContent[nextClosingIndexAfterName..].IndexOf('>') + nextClosingIndexAfterName + 1;
-                // Move the minimum index up to the end of the closing tag
+                // Move the minimum index up to the end of the closing tag to avoid additioanl matches of the same values
                 minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
                 var location = new Boundary
                 {

From 40ce3bcd40ce22b031f71c25529a6c416b9f757b Mon Sep 17 00:00:00 2001
From: Gabe Stocco <98900+gfs@users.noreply.github.com>
Date: Thu, 9 Nov 2023 09:48:03 -0800
Subject: [PATCH 3/3] Improve robustness of new test cases.

Also check value and index location of matches.
---
 AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
index 045e1431..29725d98 100644
--- a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
+++ b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
@@ -344,10 +344,10 @@ public void JsonStringRule(string rule)
             Assert.Fail();
         }
     }
-    [DataRow(xmlStringRuleForPropWithDataForData)]
-    [DataRow(xmlStringRuleForPropWithData)]
+    [DataRow(xmlStringRuleForPropWithDataForData, "Franklin", 212)]
+    [DataRow(xmlStringRuleForPropWithData, "true", 176)]
     [DataTestMethod]
-    public void XmlTagWithPropsAndValue(string rule)
+    public void XmlTagWithPropsAndValue(string rule, string expectedValue, int expectedIndex)
     {
         RuleSet rules = new();
         rules.AddString(rule, "XmlTestRules");
@@ -357,6 +357,9 @@ public void XmlTagWithPropsAndValue(string rule)
         {
             var matches = processor.AnalyzeFile(xmlDataPropsWithTagValue, new FileEntry("test.xml", new MemoryStream()), info);
             Assert.AreEqual(1, matches.Count);
+            var match = matches[0];
+            Assert.AreEqual(expectedValue, match.Sample);
+            Assert.AreEqual(expectedIndex, match.Boundary.Index);
         }
         else
         {