Skip to content

Commit

Permalink
Fix suffix * handling in our CODEOWNERS path matcher (#5308)
Browse files Browse the repository at this point in the history
  • Loading branch information
Konrad Jamrozik authored Feb 2, 2023
1 parent a3a4b68 commit 0a6d021
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 115 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,23 @@ public class CodeownersFileTests
// @formatter:off
// Path: Expected match:
// Codeowners , Target , Legacy , New
new( "/" , "a" , true , true ),
new( "/" , "A" , true , true ),
new( "/" , "/a" , true , true ),
new( "/" , "a/" , true , true ),
new( "/" , "/a/" , true , true ),
new( "/" , "/a/b" , true , true ),
new( "/a" , "a" , true , true ),
new( "/**" , "a" , false , true ),
new( "/**" , "A" , false , true ),
new( "/**" , "/a" , false , true ),
new( "/**" , "a/" , false , true ),
new( "/**" , "/a/" , false , true ),
new( "/**" , "/a/b" , false , true ),
new( "/**" , "/a/b/" , false , true ),
new( "/**" , "/a/b/c" , false , true ),
new( "/**" , "[" , false , true ),
new( "/**" , "]" , false , true ),
new( "/" , "a" , true , false ),
new( "/" , "A" , true , false ),
new( "/" , "/a" , true , false ),
new( "/" , "a/" , true , false ),
new( "/" , "/a/" , true , false ),
new( "/" , "/a/b" , true , false ),
new( "/a" , "a" , true , true ),
new( "/a" , "A" , true , false ),
new( "/a" , "/a" , true , true ),
new( "/a" , "a/" , true , false ),
Expand Down Expand Up @@ -84,16 +94,20 @@ public class CodeownersFileTests
new( "/*" , "*" , true , false ),
new( "/*" , "a" , false , true ),
new( "/*" , "a/" , false , false ),
new( "/*" , "/a" , false , true ),
new( "/*" , "/a/" , false , false ),
new( "/*" , "a/b" , false , false ),
new( "/*" , "/a/b" , false , false ),
new( "/*" , "[" , false , true ),
new( "/*" , "]" , false , true ),
new( "/*" , "!" , false , true ),
new( "/**" , "a" , false , false ),
new( "/**" , "a/" , false , false ),
new( "/**" , "a/b" , false , false ),
new( "/**" , "[" , false , false ),
new( "/**" , "]" , false , false ),
new( "/**" , "!" , false , false ),
new( "/**" , "!" , false , true ),
new( "/a*" , "a" , false , true ),
new( "/a*" , "a/x" , false , true ),
new( "/a*" , "a/x/d" , false , true ),
new( "/a*" , "ab" , false , true ),
new( "/a*" , "ab/x" , false , true ),
new( "/a*" , "ab/x/d" , false , true ),
new( "/a/**" , "a" , false , false ),
new( "/*/**" , "a" , false , false ),
new( "/*/**" , "a/" , false , false ),
Expand All @@ -102,8 +116,6 @@ public class CodeownersFileTests
new( "/*/" , "a/" , false , true ),
new( "/*/b" , "a/b" , false , true ),
new( "/**/a" , "a" , false , true ),
new( "/**/a" , "a" , false , true ),
new( "/**/a" , "x/ba" , false , false ),
new( "/**/a" , "x/ba" , false , false ),
new( "/a/*" , "a" , false , false ),
new( "/a/*" , "a/" , false , true ),
Expand Down Expand Up @@ -162,7 +174,7 @@ public class CodeownersFileTests
new( "**/*.md" , "a/b.md/x.md" , false , false ),
new( "/*.md" , "a/md" , false , false ),
new( "/a.*" , "a.b" , false , true ),
new( "/a.*" , "a.b/" , false , false ),
new( "/a.*" , "a.b/" , false , true ),
new( "/a.*" , "x/a.b/" , false , false ),
new( "/a.*/" , "a.b" , false , false ),
new( "/a.*/" , "a.b/" , false , true ),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,8 @@
</ItemGroup>

<ItemGroup>
<None Update="TestData\simple_path_CODEOWNERS">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData/**">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>

<ItemGroup>
<Folder Include="TestData\InputDir\foo\bar\" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -11,60 +11,33 @@ namespace Azure.Sdk.Tools.RetrieveCodeOwners.Tests;
/// Test class for Azure.Sdk.Tools.RetrieveCodeOwners.Program.Main(),
/// for scenario in which targetPath is a glob path, i.e.
/// targetPath.IsGlobPath() returns true.
///
/// The tests assertion expectations are set to match GitHub CODEOWNERS interpreter behavior,
/// as explained here:
/// https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
/// and as observed based on manual tests and verification.
/// </summary>
[TestFixture]
public class ProgramGlobPathTests
{
/// <summary>
/// Given:
/// <br/><br/>
/// codeownersFilePathOrUrl contents of:
/// <code>
///
/// / @slash
/// /* @star
/// /foo/**/a.txt @foo_2star_a
/// /foo/*/a.txt @foo_star_a_1 @foo_star_a_2
///
/// </code>
///
/// targetDir contents of:
///
/// <code>
///
/// /a.txt
/// /b.txt
/// /foo/a.txt
/// /foo/b.txt
/// /foo/bar/a.txt
/// /foo/bar/b.txt
///
/// </code>
///
/// targetPath of:
///
/// <code>
///
/// /**
///
/// file system contents as seen in TestData/InputDir
///
/// </code>
/// codeownersFilePathOrUrl contents as seen in TestData/glob_path_CODEOWNERS
///
/// excludeNonUserAliases set to false and useRegexMatcher set to true.
/// <br/><br/>
/// targetPath of /**
///
/// excludeNonUserAliases set to false and useRegexMatcher set to true.
///
/// When:
/// The retrieve-codeowners tool is executed on these inputs.
/// <br/><br/>
///
/// Then:
/// The tool should return on STDOUT owners matched in following way:
/// The tool should return on STDOUT owners matched as seen in the
/// "expectedEntries" dictionary.
///
/// <code>
/// /a.txt @star
/// /b.txt @star
/// /foo/a.txt @foo_2star_a
/// /foo/b.txt @slash
/// /foo/bar/a.txt @foo_star_a_1 @foo_star_a_2
/// /foo/bar/b.txt @slash
/// </code>
/// </summary>
[Test]
public void OutputsCodeownersForGlobPath()
Expand All @@ -81,9 +54,12 @@ public void OutputsCodeownersForGlobPath()
["a.txt"] = new CodeownersEntry("/*", new List<string> { "star" }),
["b.txt"] = new CodeownersEntry("/*", new List<string> { "star" }),
["foo/a.txt"] = new CodeownersEntry("/foo/**/a.txt", new List<string> { "foo_2star_a" }),
["foo/b.txt"] = new CodeownersEntry("/", new List<string> { "slash" }),
["foo/b.txt"] = new CodeownersEntry("/**", new List<string> { "2star" }),
["foo/bar/a.txt"] = new CodeownersEntry("/foo/*/a.txt", new List<string> { "foo_star_a_1", "foo_star_a_2" }),
["foo/bar/b.txt"] = new CodeownersEntry("/", new List<string> { "slash" }),
["foo/bar/b.txt"] = new CodeownersEntry("/**", new List<string> { "2star" }),
["baz/cor/c.txt"] = new CodeownersEntry("/baz*", new List<string> { "baz_star" }),
["baz_.txt"] = new CodeownersEntry("/baz*", new List<string> { "baz_star" }),
["qux/abc/d.txt"] = new CodeownersEntry("/qux/", new List<string> { "qux" }),
// @formatter:on
};

Expand Down Expand Up @@ -142,7 +118,7 @@ private static void AssertEntries(
{
string path = kvp.Key;
CodeownersEntry actualEntry = kvp.Value;
Assert.That(expectedEntries[path], Is.EqualTo(actualEntry), $"path: {path}");
Assert.That(actualEntry, Is.EqualTo(expectedEntries[path]), $"path: {path}");
}

Assert.That(actualEntries, Has.Count.EqualTo(expectedEntries.Count));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
# Azure.Sdk.Tools.RetrieveCodeOwners.Tests.WildcardTests.TestWildcard
#

/ @slash
/** @2star
/* @star

/foo/**/a.txt @foo_2star_a
/foo/*/a.txt @foo_star_a_1 @foo_star_a_2
/foo/*/a.txt @foo_star_a_1 @foo_star_a_2

/baz* @baz_star

/qux/ @qux
121 changes: 76 additions & 45 deletions tools/code-owners-parser/CodeOwnersParser/MatchedCodeownersEntry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ namespace Azure.Sdk.Tools.CodeOwnersParser
/// The validation spec is given in this comment:
/// https://github.com/Azure/azure-sdk-tools/issues/4859#issuecomment-1370360622
///
/// Besides that, the matcher aim to exactly reflect the matching behavior of the
/// built-in GitHub CODEOWNERS interpreter. See ProgramGlobPathTests for examples
/// of its behavior.
///
/// To use this class, construct it, passing as input relevant paths.
/// Then, to obtain the value of the matched entry, reference "Value" member.
///
Expand Down Expand Up @@ -136,21 +140,41 @@ private static bool ContainsUnsupportedCharacters(string codeownersPath)

private static bool ContainsUnsupportedSequences(string codeownersPath)
{
if (codeownersPath == "/")
{
// This behavior matches GitHub CODEOWNERS interpreter behavior.
// I.e. a path of just "/" is unsupported.
Console.Error.WriteLine(
$"CODEOWNERS path \"{codeownersPath}\" will never match. " +
"Use \"/**\" instead.");
return true;
}

// See comment below why we support this path.
if (codeownersPath == "/**")
return false;

// We do not support suffix of "/**" because it is equivalent to "/".
// We do not support suffix of "/**/" because it is equivalent to
// "/**/**" which is equivalent to "/".
string[] unsupportedSuffixSentences = { "/**", "/**/" };
// For example, "/foo/**" is equivalent to "/foo/"
// One exception to this rule is if the entire path is "/**":
// GitHub doesn't match "/" to anything if it is the entire path,
// and instead expects "/**".
if (codeownersPath != "/**" && codeownersPath.EndsWith("/**"))
{
Console.Error.WriteLine(
$"CODEOWNERS path \"{codeownersPath}\" ends with " +
"unsupported sequence of \"/**\". Replace it with \"/\".");
return true;
}

foreach (var sequence in unsupportedSuffixSentences)
// We do not support suffix of "/**/" because it is equivalent to
// suffix "/**" which is equivalent to suffix "/".
if (codeownersPath.EndsWith("/**/"))
{
if (codeownersPath.EndsWith(sequence))
{
Console.Error.WriteLine(
$"CODEOWNERS path \"{codeownersPath}\" ends with " +
$"unsupported sequence of \"{sequence}\". Replace it with \"/\". " +
"Until then this path will never match.");
return true;
}
Console.Error.WriteLine(
$"CODEOWNERS path \"{codeownersPath}\" ends with " +
"unsupported sequence of \"/**/\". Replace it with \"/**\".");
return true;
}

// We do not support inline "**", i.e. if it is not within slashes, i.e. "/**/".
Expand Down Expand Up @@ -187,6 +211,11 @@ private Regex ConvertToRegex(string codeownersPath)
codeownersPath = NormalizePath(codeownersPath);
Trace.Assert(IsCodeownersPathValid(codeownersPath));

// Special case: path "/**" matches everything.
// We do not allow "/**" in any other context except when it is the entire path.
if (codeownersPath == "/**")
return new Regex(".*");

string pattern = codeownersPath;

if (codeownersPath.Contains(SingleStar) || pattern.Contains(DoubleStar))
Expand Down Expand Up @@ -218,42 +247,44 @@ private Regex ConvertToRegex(string codeownersPath)
return new Regex(pattern);
}

/// <summary>
/// Sets the regex pattern suffix, which can be either "$" (end of string)
/// or nothing.
///
/// GitHub's CODEOWNERS matching logic is a bit inconsistent when it comes to handling suffixes.
///
/// In a nutshell:
/// - For top level dir, `/` doesn't work. One has to use `/**`.
/// - But for nested dirs, `/` works. I.e. one can write `/foo/` and it is equivalent to `/foo/**`.
/// - `*` has different interpretations. If used with preceding slash, like `/*` or `/foo/*`
/// it means "things only in this dir". But when used as a suffix, it means "anything".
/// So `/foo*` is effectively `/foo*/** OR /foo*`. Where `*` in the OR clause
/// should be interpreted as "any character except `/`".
/// </summary>
private static string SetPatternSuffix(string pattern)
{
// Lack of slash at the end denotes the path is a path to a file,
// per our validation logic.
// If a pattern ends with "/*" this means it should match only files
// in the child directory, but not all descendant directories.
// Hence we must append "$", to avoid treating the regex pattern
// as a prefix match.
if (pattern.EndsWith($"/{SingleStar}"))
return pattern + "$";

// If the pattern ends with "/" it means it is a path to a directory,
// like "/foo". This means "match everything in this directory,
// at arbitrary directory nesting depth."
//
// Note we assume this is path to a file even if the path is invalid,
// even though in such case the path might be a path to a directory.
if (!pattern.EndsWith("/"))
{
// Append "end of string" symbol, denoting the match has to be exact,
// not a substring, as we are dealing with a file.
pattern += "$";
}
else // The pattern ends with "/", denoting it is a directory.
{
// We do not append the end-of-string symbol, "$",
// because we want to allow matching to any string suffix,
// which semantically means matching to anything (including nothing)
// in the directory represented by the path.

// Note that if the targetPath is exactly the same as pattern,
// except that it is missing the trailing "/", then we assume
// no match. This is because The pattern is matching
// against a directory, which, due to our validation,
// should exist. The targetPath, because it doesn't have trailing "/",
// matches to a file with the same name, hence it cannot exist.
//
// For example:
// pattern: /a/
// targetPath: /a
// result: no match
//
// There is no match because "/a" is not a substring of "/a/".
}

return pattern;
// If the pattern ends with "*" but not "/*" (as this case was handled above)
// then it is a suffix *, e.g. "/foo*". This means "match everything
// with a prefix string of "/foo". Notably, it matches not only
// everything in "/foo/" dir, but also files like "/foobar.txt"
if (pattern.EndsWith("/") || pattern.EndsWith(SingleStar))
return pattern;

// If the pattern doesn't end with "/" nor "*", it is a path to a file,
// hence we must append "$", to avoid treating the regex pattern as a
// prefix match.
return pattern + "$";
}

/// <summary>
Expand Down

0 comments on commit 0a6d021

Please sign in to comment.