From 0a6d0212e194304f01a73763a77b600d5d95e3cb Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Thu, 2 Feb 2023 11:54:23 -0800 Subject: [PATCH] Fix suffix `*` handling in our `CODEOWNERS` path matcher (#5308) --- .../CodeownersFileTests.cs | 44 ++++--- ....Sdk.Tools.RetrieveCodeOwners.Tests.csproj | 7 - .../ProgramGlobPathTests.cs | 66 +++------- .../TestData/InputDir/baz/cor/c.txt | 0 .../TestData/InputDir/baz_.txt | 0 .../TestData/InputDir/qux/abc/d.txt | 0 .../TestData/glob_path_CODEOWNERS | 9 +- .../MatchedCodeownersEntry.cs | 121 +++++++++++------- 8 files changed, 132 insertions(+), 115 deletions(-) create mode 100644 tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/baz/cor/c.txt create mode 100644 tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/baz_.txt create mode 100644 tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/qux/abc/d.txt diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeownersFileTests.cs b/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeownersFileTests.cs index ab96bca6284..b4ae9701f9b 100644 --- a/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeownersFileTests.cs +++ b/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeownersFileTests.cs @@ -26,13 +26,23 @@ public class CodeownersFileTests // @formatter:off // Path: Expected match: // Codeowners , Target , Legacy , New - new( "/" , "a" , true , true ), - new( "/" , "A" , true , true ), - new( "/" , "/a" , true , true ), - new( "/" , "a/" , true , true ), - new( "/" , "/a/" , true , true ), - new( "/" , "/a/b" , true , true ), - new( "/a" , "a" , true , true ), + new( "/**" , "a" , false , true ), + new( "/**" , "A" , false , true ), + new( "/**" , "/a" , false , true ), + new( "/**" , "a/" , false , true ), + new( "/**" , "/a/" , false , true ), + new( "/**" , "/a/b" , false , true ), + new( "/**" , "/a/b/" , false , true ), + new( "/**" , "/a/b/c" , false , true ), + new( "/**" , "[" , false , true ), + new( "/**" , "]" , false , true ), + new( "/" , "a" , true , false ), + new( "/" , "A" , true , false ), + new( "/" , "/a" , true , false ), + new( "/" , "a/" , true , false ), + new( "/" , "/a/" , true , false ), + new( "/" , "/a/b" , true , false ), + new( "/a" , "a" , true , true ), new( "/a" , "A" , true , false ), new( "/a" , "/a" , true , true ), new( "/a" , "a/" , true , false ), @@ -84,16 +94,20 @@ public class CodeownersFileTests new( "/*" , "*" , true , false ), new( "/*" , "a" , false , true ), new( "/*" , "a/" , false , false ), + new( "/*" , "/a" , false , true ), + new( "/*" , "/a/" , false , false ), new( "/*" , "a/b" , false , false ), + new( "/*" , "/a/b" , false , false ), new( "/*" , "[" , false , true ), new( "/*" , "]" , false , true ), new( "/*" , "!" , false , true ), - new( "/**" , "a" , false , false ), - new( "/**" , "a/" , false , false ), - new( "/**" , "a/b" , false , false ), - new( "/**" , "[" , false , false ), - new( "/**" , "]" , false , false ), - new( "/**" , "!" , false , false ), + new( "/**" , "!" , false , true ), + new( "/a*" , "a" , false , true ), + new( "/a*" , "a/x" , false , true ), + new( "/a*" , "a/x/d" , false , true ), + new( "/a*" , "ab" , false , true ), + new( "/a*" , "ab/x" , false , true ), + new( "/a*" , "ab/x/d" , false , true ), new( "/a/**" , "a" , false , false ), new( "/*/**" , "a" , false , false ), new( "/*/**" , "a/" , false , false ), @@ -102,8 +116,6 @@ public class CodeownersFileTests new( "/*/" , "a/" , false , true ), new( "/*/b" , "a/b" , false , true ), new( "/**/a" , "a" , false , true ), - new( "/**/a" , "a" , false , true ), - new( "/**/a" , "x/ba" , false , false ), new( "/**/a" , "x/ba" , false , false ), new( "/a/*" , "a" , false , false ), new( "/a/*" , "a/" , false , true ), @@ -162,7 +174,7 @@ public class CodeownersFileTests new( "**/*.md" , "a/b.md/x.md" , false , false ), new( "/*.md" , "a/md" , false , false ), new( "/a.*" , "a.b" , false , true ), - new( "/a.*" , "a.b/" , false , false ), + new( "/a.*" , "a.b/" , false , true ), new( "/a.*" , "x/a.b/" , false , false ), new( "/a.*/" , "a.b" , false , false ), new( "/a.*/" , "a.b/" , false , true ), diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/Azure.Sdk.Tools.RetrieveCodeOwners.Tests.csproj b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/Azure.Sdk.Tools.RetrieveCodeOwners.Tests.csproj index ff237a82014..8dbda797776 100644 --- a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/Azure.Sdk.Tools.RetrieveCodeOwners.Tests.csproj +++ b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/Azure.Sdk.Tools.RetrieveCodeOwners.Tests.csproj @@ -20,15 +20,8 @@ - - PreserveNewest - PreserveNewest - - - - diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/ProgramGlobPathTests.cs b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/ProgramGlobPathTests.cs index 3d64224fac3..73b0d3e684b 100644 --- a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/ProgramGlobPathTests.cs +++ b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/ProgramGlobPathTests.cs @@ -11,60 +11,33 @@ namespace Azure.Sdk.Tools.RetrieveCodeOwners.Tests; /// Test class for Azure.Sdk.Tools.RetrieveCodeOwners.Program.Main(), /// for scenario in which targetPath is a glob path, i.e. /// targetPath.IsGlobPath() returns true. +/// +/// The tests assertion expectations are set to match GitHub CODEOWNERS interpreter behavior, +/// as explained here: +/// https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +/// and as observed based on manual tests and verification. /// [TestFixture] public class ProgramGlobPathTests { /// /// Given: - ///

- /// codeownersFilePathOrUrl contents of: - /// - /// - /// / @slash - /// /* @star - /// /foo/**/a.txt @foo_2star_a - /// /foo/*/a.txt @foo_star_a_1 @foo_star_a_2 - /// - /// - /// - /// targetDir contents of: - /// - /// - /// - /// /a.txt - /// /b.txt - /// /foo/a.txt - /// /foo/b.txt - /// /foo/bar/a.txt - /// /foo/bar/b.txt - /// - /// - /// - /// targetPath of: - /// - /// - /// - /// /** + /// + /// file system contents as seen in TestData/InputDir /// - /// + /// codeownersFilePathOrUrl contents as seen in TestData/glob_path_CODEOWNERS /// - /// excludeNonUserAliases set to false and useRegexMatcher set to true. - ///

+ /// targetPath of /** + /// + /// excludeNonUserAliases set to false and useRegexMatcher set to true. + /// /// When: /// The retrieve-codeowners tool is executed on these inputs. - ///

+ /// /// Then: - /// The tool should return on STDOUT owners matched in following way: + /// The tool should return on STDOUT owners matched as seen in the + /// "expectedEntries" dictionary. /// - /// - /// /a.txt @star - /// /b.txt @star - /// /foo/a.txt @foo_2star_a - /// /foo/b.txt @slash - /// /foo/bar/a.txt @foo_star_a_1 @foo_star_a_2 - /// /foo/bar/b.txt @slash - /// ///
[Test] public void OutputsCodeownersForGlobPath() @@ -81,9 +54,12 @@ public void OutputsCodeownersForGlobPath() ["a.txt"] = new CodeownersEntry("/*", new List { "star" }), ["b.txt"] = new CodeownersEntry("/*", new List { "star" }), ["foo/a.txt"] = new CodeownersEntry("/foo/**/a.txt", new List { "foo_2star_a" }), - ["foo/b.txt"] = new CodeownersEntry("/", new List { "slash" }), + ["foo/b.txt"] = new CodeownersEntry("/**", new List { "2star" }), ["foo/bar/a.txt"] = new CodeownersEntry("/foo/*/a.txt", new List { "foo_star_a_1", "foo_star_a_2" }), - ["foo/bar/b.txt"] = new CodeownersEntry("/", new List { "slash" }), + ["foo/bar/b.txt"] = new CodeownersEntry("/**", new List { "2star" }), + ["baz/cor/c.txt"] = new CodeownersEntry("/baz*", new List { "baz_star" }), + ["baz_.txt"] = new CodeownersEntry("/baz*", new List { "baz_star" }), + ["qux/abc/d.txt"] = new CodeownersEntry("/qux/", new List { "qux" }), // @formatter:on }; @@ -142,7 +118,7 @@ private static void AssertEntries( { string path = kvp.Key; CodeownersEntry actualEntry = kvp.Value; - Assert.That(expectedEntries[path], Is.EqualTo(actualEntry), $"path: {path}"); + Assert.That(actualEntry, Is.EqualTo(expectedEntries[path]), $"path: {path}"); } Assert.That(actualEntries, Has.Count.EqualTo(expectedEntries.Count)); diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/baz/cor/c.txt b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/baz/cor/c.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/baz_.txt b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/baz_.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/qux/abc/d.txt b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/InputDir/qux/abc/d.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/glob_path_CODEOWNERS b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/glob_path_CODEOWNERS index a74409c924d..848cfd2c919 100644 --- a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/glob_path_CODEOWNERS +++ b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners.Tests/TestData/glob_path_CODEOWNERS @@ -3,7 +3,12 @@ # Azure.Sdk.Tools.RetrieveCodeOwners.Tests.WildcardTests.TestWildcard # -/ @slash +/** @2star /* @star + /foo/**/a.txt @foo_2star_a -/foo/*/a.txt @foo_star_a_1 @foo_star_a_2 \ No newline at end of file +/foo/*/a.txt @foo_star_a_1 @foo_star_a_2 + +/baz* @baz_star + +/qux/ @qux \ No newline at end of file diff --git a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeownersEntry.cs b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeownersEntry.cs index 56db516e998..5bdc43e69de 100644 --- a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeownersEntry.cs +++ b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeownersEntry.cs @@ -21,6 +21,10 @@ namespace Azure.Sdk.Tools.CodeOwnersParser /// The validation spec is given in this comment: /// https://github.com/Azure/azure-sdk-tools/issues/4859#issuecomment-1370360622 /// + /// Besides that, the matcher aim to exactly reflect the matching behavior of the + /// built-in GitHub CODEOWNERS interpreter. See ProgramGlobPathTests for examples + /// of its behavior. + /// /// To use this class, construct it, passing as input relevant paths. /// Then, to obtain the value of the matched entry, reference "Value" member. /// @@ -136,21 +140,41 @@ private static bool ContainsUnsupportedCharacters(string codeownersPath) private static bool ContainsUnsupportedSequences(string codeownersPath) { + if (codeownersPath == "/") + { + // This behavior matches GitHub CODEOWNERS interpreter behavior. + // I.e. a path of just "/" is unsupported. + Console.Error.WriteLine( + $"CODEOWNERS path \"{codeownersPath}\" will never match. " + + "Use \"/**\" instead."); + return true; + } + + // See comment below why we support this path. + if (codeownersPath == "/**") + return false; + // We do not support suffix of "/**" because it is equivalent to "/". - // We do not support suffix of "/**/" because it is equivalent to - // "/**/**" which is equivalent to "/". - string[] unsupportedSuffixSentences = { "/**", "/**/" }; + // For example, "/foo/**" is equivalent to "/foo/" + // One exception to this rule is if the entire path is "/**": + // GitHub doesn't match "/" to anything if it is the entire path, + // and instead expects "/**". + if (codeownersPath != "/**" && codeownersPath.EndsWith("/**")) + { + Console.Error.WriteLine( + $"CODEOWNERS path \"{codeownersPath}\" ends with " + + "unsupported sequence of \"/**\". Replace it with \"/\"."); + return true; + } - foreach (var sequence in unsupportedSuffixSentences) + // We do not support suffix of "/**/" because it is equivalent to + // suffix "/**" which is equivalent to suffix "/". + if (codeownersPath.EndsWith("/**/")) { - if (codeownersPath.EndsWith(sequence)) - { - Console.Error.WriteLine( - $"CODEOWNERS path \"{codeownersPath}\" ends with " + - $"unsupported sequence of \"{sequence}\". Replace it with \"/\". " + - "Until then this path will never match."); - return true; - } + Console.Error.WriteLine( + $"CODEOWNERS path \"{codeownersPath}\" ends with " + + "unsupported sequence of \"/**/\". Replace it with \"/**\"."); + return true; } // We do not support inline "**", i.e. if it is not within slashes, i.e. "/**/". @@ -187,6 +211,11 @@ private Regex ConvertToRegex(string codeownersPath) codeownersPath = NormalizePath(codeownersPath); Trace.Assert(IsCodeownersPathValid(codeownersPath)); + // Special case: path "/**" matches everything. + // We do not allow "/**" in any other context except when it is the entire path. + if (codeownersPath == "/**") + return new Regex(".*"); + string pattern = codeownersPath; if (codeownersPath.Contains(SingleStar) || pattern.Contains(DoubleStar)) @@ -218,42 +247,44 @@ private Regex ConvertToRegex(string codeownersPath) return new Regex(pattern); } + /// + /// Sets the regex pattern suffix, which can be either "$" (end of string) + /// or nothing. + /// + /// GitHub's CODEOWNERS matching logic is a bit inconsistent when it comes to handling suffixes. + /// + /// In a nutshell: + /// - For top level dir, `/` doesn't work. One has to use `/**`. + /// - But for nested dirs, `/` works. I.e. one can write `/foo/` and it is equivalent to `/foo/**`. + /// - `*` has different interpretations. If used with preceding slash, like `/*` or `/foo/*` + /// it means "things only in this dir". But when used as a suffix, it means "anything". + /// So `/foo*` is effectively `/foo*/** OR /foo*`. Where `*` in the OR clause + /// should be interpreted as "any character except `/`". + /// private static string SetPatternSuffix(string pattern) { - // Lack of slash at the end denotes the path is a path to a file, - // per our validation logic. + // If a pattern ends with "/*" this means it should match only files + // in the child directory, but not all descendant directories. + // Hence we must append "$", to avoid treating the regex pattern + // as a prefix match. + if (pattern.EndsWith($"/{SingleStar}")) + return pattern + "$"; + + // If the pattern ends with "/" it means it is a path to a directory, + // like "/foo". This means "match everything in this directory, + // at arbitrary directory nesting depth." // - // Note we assume this is path to a file even if the path is invalid, - // even though in such case the path might be a path to a directory. - if (!pattern.EndsWith("/")) - { - // Append "end of string" symbol, denoting the match has to be exact, - // not a substring, as we are dealing with a file. - pattern += "$"; - } - else // The pattern ends with "/", denoting it is a directory. - { - // We do not append the end-of-string symbol, "$", - // because we want to allow matching to any string suffix, - // which semantically means matching to anything (including nothing) - // in the directory represented by the path. - - // Note that if the targetPath is exactly the same as pattern, - // except that it is missing the trailing "/", then we assume - // no match. This is because The pattern is matching - // against a directory, which, due to our validation, - // should exist. The targetPath, because it doesn't have trailing "/", - // matches to a file with the same name, hence it cannot exist. - // - // For example: - // pattern: /a/ - // targetPath: /a - // result: no match - // - // There is no match because "/a" is not a substring of "/a/". - } - - return pattern; + // If the pattern ends with "*" but not "/*" (as this case was handled above) + // then it is a suffix *, e.g. "/foo*". This means "match everything + // with a prefix string of "/foo". Notably, it matches not only + // everything in "/foo/" dir, but also files like "/foobar.txt" + if (pattern.EndsWith("/") || pattern.EndsWith(SingleStar)) + return pattern; + + // If the pattern doesn't end with "/" nor "*", it is a path to a file, + // hence we must append "$", to avoid treating the regex pattern as a + // prefix match. + return pattern + "$"; } ///