Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes an issue with setting correct index values #567

Merged
merged 3 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 53 additions & 21 deletions AppInspector.RulesEngine/TextContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,13 @@ public TextContainer(string content, string language, Languages languages, ILogg
}

/// <summary>
/// If this file is a JSON, XML or YML file, returns the string contents of the specified path.
/// If this file is XML, attempts to return the the string contents of the specified XPath applied to the file.
/// If the path does not exist, or the file is not JSON, XML or YML returns null.
/// Method contains some heuristic behavior and may not cover all cases.
/// Please report any issues with a sample XML and XPATH to reproduce.
/// </summary>
/// <param name="Path"></param>
/// <returns></returns>
/// <param name="Path">XPath to query document with</param>
/// <returns>Enumeration of string and Boundary tuples for the XPath matches. Boundary locations refer to the locations in the original document on disk.</returns>
internal IEnumerable<(string, Boundary)> GetStringFromXPath(string Path, Dictionary<string, string> xpathNameSpaces)
{
lock (_xpathLock)
Expand Down Expand Up @@ -221,26 +223,56 @@ public TextContainer(string content, string language, Languages languages, ILogg
continue;
}

// First we find the name
// We have to heuristically calculate the original indexes of the locations in the original document because the internal representation differs
// For example it will convert <Tag Prop="Val"/> to <Tag Prop=\"Val\"/>

// First we find the name, absolute position index
var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name, StringComparison.Ordinal) + minIndex;
// Then we grab the index of the end of this tag.
// We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
var endTagIndex = FullContent[nameIndex..].IndexOf('>');
// We also look for self-closing tag
var selfClosedTag = FullContent[endTagIndex-1] == '/';
// If the tag is self closing innerxml will be empty string, so the finding is located at the end of the tag and is empty string
// Otherwise the finding is the content of the xml tag
var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
// Move the minimum index up in case there are multiple instances of identical OuterXML
// This ensures we won't re-find the same one
var totalOffset = minIndex + nameIndex + endTagIndex;
minIndex = totalOffset;
var location = new Boundary
// Then we calculate the absolute index of the end of the tag.
// We can't use OuterXML property because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
var endTagIndex = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal) + nameIndex;
// If we are matching a tag itself, the previous char should be the open tag
// |
// v
// <Tag>
// If its a property it won't be
// |
// v
// <Tag Prop="Value">
var isProp = FullContent[(nameIndex - 1)] != '<';
// Check for self-closing tag
var selfClosedTag = FullContent[endTagIndex - 1] == '/';

// This is for when we're capturing the value of a property of the tag rather than the tag itself
if (isProp)
{
Index = offset,
Length = nodeIter.Current.InnerXml.Length
};
yield return (nodeIter.Current.Value, location);
// Find the index of character after the next end tag index after the name
var nextClosingIndexAfterName = endTagIndex+1;
// If we have a self closing tag, we can use that index, otherwise we need the closure of this tag
var offset = selfClosedTag ? endTagIndex : FullContent[nextClosingIndexAfterName..].IndexOf('>') + nextClosingIndexAfterName + 1;
// Move the minimum index up to the end of the closing tag to avoid additioanl matches of the same values
minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
var location = new Boundary
{
// +2 for the \" before the value for the property
Index = nameIndex + nodeIter.Current.Name.Length + 2,
Length = nodeIter.Current.InnerXml.Length
};
yield return (nodeIter.Current.Value, location);
}
else
{
// Move the offset to the end of the opening tag
var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
// Move the minimum index up to the end of the closing tag
minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
var location = new Boundary
{
Index = offset,
Length = nodeIter.Current.InnerXml.Length
};
yield return (nodeIter.Current.Value, location);
}
}
}

Expand Down
104 changes: 101 additions & 3 deletions AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,82 @@ public class XmlAndJsonTests
}
]";

private const string xmlStringRuleForPropWithData = @"[
{
""id"": ""SA000005"",
""name"": ""Testing.Rules.XML"",
""tags"": [
""Testing.Rules.XML""
],
""severity"": ""Critical"",
""description"": ""This rule checks the value of the property property to be true"",
""patterns"": [
{
""pattern"": ""true"",
""type"": ""string"",
""confidence"": ""High"",
""scopes"": [
""code""
],
""xpaths"" : [""/bookstore/book/title/@*[name()='property']""]
}
],
""_comment"": """"
}
]";

private const string xmlStringRuleForPropWithDataForData = @"[
{
""id"": ""SA000005"",
""name"": ""Testing.Rules.XML"",
""tags"": [
""Testing.Rules.XML""
],
""severity"": ""Critical"",
""description"": ""This rule checks the value of the title tag when it has a property"",
""patterns"": [
{
""pattern"": ""Franklin"",
""type"": ""regex"",
""confidence"": ""High"",
""scopes"": [
""code""
],
""xpaths"" : [""/bookstore/book/title""]
}
],
""_comment"": """"
}
]";

private const string xmlDataPropsWithTagValue =
@"<?xml version=""1.0"" encoding=""utf-8"" ?>
<bookstore>
<book genre=""autobiography"" publicationdate=""1981-03-22"" ISBN=""1-861003-11-0"">
<title property=""true"">The Autobiography of Benjamin Franklin</title>
<author>
<first-name>Benjamin</first-name>
<last-name>Franklin</last-name>
</author>
<price>8.99</price>
</book>
<book genre=""novel"" publicationdate=""1967-11-17"" ISBN=""0-201-63361-2"">
<title property=""false"">The Confidence Man</title>
<author>
<first-name>Herman</first-name>
<last-name>Melville</last-name>
</author>
<price>11.99</price>
</book>
<book genre=""philosophy"" publicationdate=""1991-02-15"" ISBN=""1-861001-57-6"">
<title property=""false"">The Gorgias</title>
<author>
<name>Plato</name>
</author>
<price>9.99</price>
</book>
</bookstore>";

private const string jsonData =
@"{
""books"":
Expand Down Expand Up @@ -228,14 +304,14 @@ public void XmlAttributeTest()
{
""xpaths"": [""system.web/trace/@enabled""],
""pattern"": ""true"",
""type"": ""regex""
""type"": ""string""
}
],
""must-match"": [
""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='false' />\n</system.web>""
""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
],
""must-not-match"": [
""<system.web>\n<trace enabled='true' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
""<system.web>\n<trace enabled='false' pageOutput='false' requestLimit='40' localOnly='true' />\n</system.web>""
]
}]";
RuleSet rules = new();
Expand Down Expand Up @@ -268,6 +344,28 @@ public void JsonStringRule(string rule)
Assert.Fail();
}
}
[DataRow(xmlStringRuleForPropWithDataForData, "Franklin", 212)]
[DataRow(xmlStringRuleForPropWithData, "true", 176)]
[DataTestMethod]
public void XmlTagWithPropsAndValue(string rule, string expectedValue, int expectedIndex)
{
RuleSet rules = new();
rules.AddString(rule, "XmlTestRules");
Microsoft.ApplicationInspector.RulesEngine.RuleProcessor processor = new(rules,
new RuleProcessorOptions { AllowAllTagsInBuildFiles = true });
if (_languages.FromFileNameOut("test.xml", out var info))
{
var matches = processor.AnalyzeFile(xmlDataPropsWithTagValue, new FileEntry("test.xml", new MemoryStream()), info);
Assert.AreEqual(1, matches.Count);
var match = matches[0];
Assert.AreEqual(expectedValue, match.Sample);
Assert.AreEqual(expectedIndex, match.Boundary.Index);
}
else
{
Assert.Fail();
}
}

[DataRow(xmlStringRule)]
[DataRow(jsonAndXmlStringRule)]
Expand Down