Skip to content

Commit

Permalink
Fixes an issue with setting correct index values (#567)
Browse files Browse the repository at this point in the history
* Fixes an issue with setting correct index values when matching a property of an xml tag with an xpath

* Update comments

* Improve robustness of new test cases.

Also check value and index location of matches.
  • Loading branch information
gfs authored Nov 9, 2023
1 parent c2a0912 commit 310ac15
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 24 deletions.
74 changes: 53 additions & 21 deletions AppInspector.RulesEngine/TextContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,13 @@ public TextContainer(string content, string language, Languages languages, ILogg
}

/// <summary>
/// If this file is a JSON, XML or YML file, returns the string contents of the specified path.
/// If this file is XML, attempts to return the the string contents of the specified XPath applied to the file.
/// If the path does not exist, or the file is not JSON, XML or YML returns null.
/// Method contains some heuristic behavior and may not cover all cases.
/// Please report any issues with a sample XML and XPATH to reproduce.
/// </summary>
/// <param name="Path"></param>
/// <returns></returns>
/// <param name="Path">XPath to query document with</param>
/// <returns>Enumeration of string and Boundary tuples for the XPath matches. Boundary locations refer to the locations in the original document on disk.</returns>
internal IEnumerable<(string, Boundary)> GetStringFromXPath(string Path, Dictionary<string, string> xpathNameSpaces)
{
lock (_xpathLock)
Expand Down Expand Up @@ -221,26 +223,56 @@ public TextContainer(string content, string language, Languages languages, ILogg
continue;
}

// First we find the name
// We have to heuristically calculate the original indexes of the locations in the original document because the internal representation differs
// For example it will convert <Tag Prop="Val"/> to <Tag Prop=\"Val\"/>

// First we find the name, absolute position index
var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name, StringComparison.Ordinal) + minIndex;
// Then we grab the index of the end of this tag.
// We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
var endTagIndex = FullContent[nameIndex..].IndexOf('>');
// We also look for self-closing tag
var selfClosedTag = FullContent[endTagIndex-1] == '/';
// If the tag is self closing innerxml will be empty string, so the finding is located at the end of the tag and is empty string
// Otherwise the finding is the content of the xml tag
var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
// Move the minimum index up in case there are multiple instances of identical OuterXML
// This ensures we won't re-find the same one
var totalOffset = minIndex + nameIndex + endTagIndex;
minIndex = totalOffset;
var location = new Boundary
// Then we calculate the absolute index of the end of the tag.
// We can't use OuterXML property because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
var endTagIndex = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal) + nameIndex;
// If we are matching a tag itself, the previous char should be the open tag
// |
// v
// <Tag>
// If its a property it won't be
// |
// v
// <Tag Prop="Value">
var isProp = FullContent[(nameIndex - 1)] != '<';
// Check for self-closing tag
var selfClosedTag = FullContent[endTagIndex - 1] == '/';

// This is for when we're capturing the value of a property of the tag rather than the tag itself
if (isProp)
{
Index = offset,
Length = nodeIter.Current.InnerXml.Length
};
yield return (nodeIter.Current.Value, location);
// Find the index of character after the next end tag index after the name
var nextClosingIndexAfterName = endTagIndex+1;
// If we have a self closing tag, we can use that index, otherwise we need the closure of this tag
var offset = selfClosedTag ? endTagIndex : FullContent[nextClosingIndexAfterName..].IndexOf('>') + nextClosingIndexAfterName + 1;
// Move the minimum index up to the end of the closing tag to avoid additioanl matches of the same values
minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
var location = new Boundary
{
// +2 for the \" before the value for the property
Index = nameIndex + nodeIter.Current.Name.Length + 2,
Length = nodeIter.Current.InnerXml.Length
};
yield return (nodeIter.Current.Value, location);
}
else
{
// Move the offset to the end of the opening tag
var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
// Move the minimum index up to the end of the closing tag
minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
var location = new Boundary
{
Index = offset,
Length = nodeIter.Current.InnerXml.Length
};
yield return (nodeIter.Current.Value, location);
}
}
}

Expand Down
104 changes: 101 additions & 3 deletions AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,82 @@ public class XmlAndJsonTests
}
]";

private const string xmlStringRuleForPropWithData = @"[
{
""id"": ""SA000005"",
""name"": ""Testing.Rules.XML"",
""tags"": [
""Testing.Rules.XML""
],
""severity"": ""Critical"",
""description"": ""This rule checks the value of the property property to be true"",
""patterns"": [
{
""pattern"": ""true"",
""type"": ""string"",
""confidence"": ""High"",
""scopes"": [
""code""
],
""xpaths"" : [""/bookstore/book/title/@*[name()='property']""]
}
],
""_comment"": """"
}
]";

private const string xmlStringRuleForPropWithDataForData = @"[
{
""id"": ""SA000005"",
""name"": ""Testing.Rules.XML"",
""tags"": [
""Testing.Rules.XML""
],
""severity"": ""Critical"",
""description"": ""This rule checks the value of the title tag when it has a property"",
""patterns"": [
{
""pattern"": ""Franklin"",
""type"": ""regex"",
""confidence"": ""High"",
""scopes"": [
""code""
],
""xpaths"" : [""/bookstore/book/title""]
}
],
""_comment"": """"
}
]";

private const string xmlDataPropsWithTagValue =
@"<?xml version=""1.0"" encoding=""utf-8"" ?>
<bookstore>
<book genre=""autobiography"" publicationdate=""1981-03-22"" ISBN=""1-861003-11-0"">
<title property=""true"">The Autobiography of Benjamin Franklin</title>
<author>
<first-name>Benjamin</first-name>
<last-name>Franklin</last-name>
</author>
<price>8.99</price>
</book>
<book genre=""novel"" publicationdate=""1967-11-17"" ISBN=""0-201-63361-2"">
<title property=""false"">The Confidence Man</title>
<author>
<first-name>Herman</first-name>
<last-name>Melville</last-name>
</author>
<price>11.99</price>
</book>
<book genre=""philosophy"" publicationdate=""1991-02-15"" ISBN=""1-861001-57-6"">
<title property=""false"">The Gorgias</title>
<author>
<name>Plato</name>
</author>
<price>9.99</price>
</book>
</bookstore>";

private const string jsonData =
@"{
""books"":
Expand Down Expand Up @@ -228,14 +304,14 @@ public void XmlAttributeTest()
{
""xpaths"": [""system.web/trace/@enabled""],
""pattern"": ""true"",
""type"": ""regex""
""type"": ""string""
}
],
""must-match"": [
""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='false' />\n</system.web>""
""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
],
""must-not-match"": [
""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
""<system.web>\n<trace enabled='false' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
]
}]";
RuleSet rules = new();
Expand Down Expand Up @@ -268,6 +344,28 @@ public void JsonStringRule(string rule)
Assert.Fail();
}
}
[DataRow(xmlStringRuleForPropWithDataForData, "Franklin", 212)]
[DataRow(xmlStringRuleForPropWithData, "true", 176)]
[DataTestMethod]
public void XmlTagWithPropsAndValue(string rule, string expectedValue, int expectedIndex)
{
RuleSet rules = new();
rules.AddString(rule, "XmlTestRules");
Microsoft.ApplicationInspector.RulesEngine.RuleProcessor processor = new(rules,
new RuleProcessorOptions { AllowAllTagsInBuildFiles = true });
if (_languages.FromFileNameOut("test.xml", out var info))
{
var matches = processor.AnalyzeFile(xmlDataPropsWithTagValue, new FileEntry("test.xml", new MemoryStream()), info);
Assert.AreEqual(1, matches.Count);
var match = matches[0];
Assert.AreEqual(expectedValue, match.Sample);
Assert.AreEqual(expectedIndex, match.Boundary.Index);
}
else
{
Assert.Fail();
}
}

[DataRow(xmlStringRule)]
[DataRow(jsonAndXmlStringRule)]
Expand Down

0 comments on commit 310ac15

Please sign in to comment.