From 5102e0eb3311216a77b36e94fca8b556002c4718 Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Thu, 5 Jan 2023 04:39:40 -0800 Subject: [PATCH 1/9] ongoing --- .../CodeOwnersFileTests.cs | 217 ++++++++-------- .../Program.cs | 2 +- .../CodeOwnersParser/MatchedCodeOwnerEntry.cs | 239 ++++++++---------- 3 files changed, 219 insertions(+), 239 deletions(-) diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs b/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs index ac7cb54677d..ce22e4a2897 100644 --- a/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs +++ b/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs @@ -1,6 +1,4 @@ -using System; using System.Collections.Generic; -using Microsoft.Extensions.FileSystemGlobbing; using NUnit.Framework; namespace Azure.Sdk.Tools.CodeOwnersParser.Tests; @@ -26,113 +24,109 @@ public class CodeOwnersFileTests private static readonly TestCase[] testCases = { // @formatter:off - // TestCase: Path: Expected match: - // Name, Target , Codeown. , Legacy , New - new( "1" , "a" , "a" , true , true ), - new( "2" , "a" , "a/" , true , false ), // New parser doesn't match as codeowners path expects directory, but it is unclear if target is directory, or not. - new( "3" , "a/b" , "a/b" , true , true ), - new( "4" , "a/b" , "/a/b" , true , true ), - new( "5" , "a/b" , "a/b/" , true , false ), // New parser doesn't match as codeowners path expects directory, but it is unclear if target is directory, or not. - new( "6" , "/a/b" , "a/b" , true , true ), - new( "7" , "/a/b" , "/a/b" , true , true ), - new( "8" , "/a/b" , "a/b/" , true , false ), // New parser doesn't match as codeowners path expects directory, but it is unclear if target is directory, or not. - new( "9" , "a/b/" , "a/b" , true , true ), - new( "10" , "a/b/" , "/a/b" , true , true ), - new( "11" , "a/b/" , "a/b/" , true , true ), - new( "12" , "/a/b/" , "a/b" , true , true ), - new( "13" , "/a/b/" , "/a/b" , true , true ), - new( "14" , "/a/b/" , "a/b/" , true , true ), - new( "15" , "/a/b/" , "/a/b/" , true , true ), - new( "16" , "/a/b/c" , "a/b" , true , true ), - new( "17" , "/a/b/c" , "/a/b" , true , true ), - new( "18" , "/a/b/c" , "a/b/" , true , true ), - new( "19" , "/a/b/c/d" , "/a/b/" , true , true ), - new( "casing" , "ABC" , "abc" , true , false ), // New parser doesn't match as it is case-sensitive, per codeowners spec - new( "chained1" , "a/b/c" , "a" , true , true ), - new( "chained2" , "a/b/c" , "b" , false , true ), // New parser matches per codeowners and .gitignore spec - new( "chained3" , "a/b/c" , "b/" , false , true ), // New parser matches per codeowners and .gitignore spec - new( "chained4" , "a/b/c" , "c" , false , true ), // New parser matches per codeowners and .gitignore spec - new( "chained5" , "a/b/c" , "c/" , false , false ), - new( "chained6" , "a/b/c/d" , "c/" , false , true ), // New parser matches per codeowners and .gitignore spec - new( "chained7" , "a/b/c/d/e" , "c/" , false , true ), // New parser matches per codeowners and .gitignore spec - new( "chained8" , "a/b/c" , "b/c" , false , false ), // TODO need to verify if CODEOWNERS actually follows this rule of "middle slashes prevent path relativity" from .gitignore, or not. - new( "chained9" , "a" , "a/b/c" , false , false ), - new( "chained10" , "c" , "a/b/c" , false , false ), - // Cases not supported by the new parser. - new( "unsupp1" , "!a" , "!a" , true , false ), - new( "unsupp2" , "b" , "!a" , false , false ), - new( "unsupp3" , "a[b" , "a[b" , true , false ), - new( "unsupp4" , "a]b" , "a]b" , true , false ), - new( "unsupp5" , "a?b" , "a?b" , true , false ), - new( "unsupp6" , "axb" , "a?b" , false , false ), - // The cases below test for wildcard support by the new parser. Legacy parser skips over wildcards. - new( "**1" , "a" , "**/a" , false , true ), - new( "**2" , "a" , "**/b/a" , false , false ), - new( "**3" , "a" , "**/a/b" , false , false ), - new( "**4" , "a" , "/**/a" , false , true ), - new( "**5" , "a/b" , "a/**/b" , false , true ), - new( "**6" , "a/x/b" , "a/**/b" , false , true ), - new( "**7" , "a/y/b" , "a/**/b" , false , true ), - new( "**8" , "a/x/y/b" , "a/**/b" , false , true ), - new( "**9" , "c/a/x/y/b" , "a/**/b" , false , false ), - new( "*10" , "a/b/cxy/d" , "/**/*x*/" , false , true ), - new( "1*" , "a" , "*" , false , true ), - new( "2*" , "a/b" , "a/*" , false , true ), - // There is discrepancy between GitHub CODEOWNERS behavior [1] and .gitignore behavior here - // CODEOWNERS will not match this path, while .gitignore will - // [1] The "docs/*" example in https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#example-of-a-codeowners-file - // [2] Confirmed empirically. The .gitignore will match "a/*" to "a/b" and thus ignore everything. - new( "3*" , "a/b/c" , "a/*" , false , true ), - new( "4*" , "x/a/b" , "a/*" , false , false ), - new( "1*/" , "a/b" , "a/*/" , false , false ), - new( "2*/" , "a/b/" , "a/*/" , false , true ), - new( "3*/" , "a/b/c" , "a/*/" , false , true ), - new( "1*/*" , "a/b" , "a/*/*" , false , false ), - new( "2*/*" , "a/b/c/d" , "a/*/*/d" , false , true ), - new( "3*/*" , "a/b/x/c/d" , "a/*/*/d" , false , false ), - new( "1**/*" , "a/b/x/c/d" , "a/**/*/d" , false , true ), - new( "*1" , "a/b" , "*/b" , false , true ), - new( "*/*1" , "a/b" , "*/*/b" , false , false ), - new( "1**" , "a" , "a/**" , false , false ), - new( "2**" , "a/" , "a/**" , false , true ), - new( "3**" , "a/b" , "a/**" , false , true ), - new( "4**" , "a/b/" , "a/**" , false , true ), - new( "*.ext1" , "a/x.md" , "*.md" , false , true ), - new( "*.ext2" , "a/b/x.md" , "*.md" , false , true ), - new( "*.ext3" , "a/b.md/x.md" , "*.md" , false , true ), - new( "*.ext4" , "a/md" , "*.md" , false , false ), - new( "*.ext5" , "a.b" , "a.*" , false , true ), - new( "*.ext6" , "a.b/" , "a.*" , false , true ), - new( "*.ext5" , "a.b" , "a.*/" , false , false ), - new( "*.ext7" , "a.b/" , "a.*/" , false , true ), - new( "*.ext8" , "a.b" , "/a.*" , false , true ), - new( "*.ext9" , "a.b/" , "/a.*" , false , true ), - new( "*.ext10" , "x/a.b/" , "/a.*" , false , false ), - // New parser should return false, but returns true due to https://github.com/dotnet/runtime/issues/80076 - // TODO globbug1 actually covers-up problem with the parser, where it converts "*" to "**/*". - new( "globbug1" , "a/b" , "*" , false , true ), - new( "globbug2" , "a/b/c/" , "a/*/" , false , true ) + // Path: Expected match: + // Codeowners , Target , Legacy , New + new( "/a" , "a" , true , true ), + new( "/a" , "A" , true , false ), + new( "/a" , "/a" , true , true ), + new( "/a" , "a/" , true , false ), + new( "/a" , "/a/" , true , false ), + new( "/a" , "/a/b" , true , false ), + new( "/a" , "/a/b/" , true , false ), + new( "/a" , "/x/a/b" , false , false ), + new( "a" , "a" , true , true ), + new( "a" , "A" , true , false ), + new( "a" , "/a" , true , true ), + new( "a" , "a/" , true , false ), + new( "a" , "/a/" , true , false ), + new( "a" , "/a/b" , true , false ), + new( "a" , "/a/b/" , true , false ), + new( "a" , "/x/a/b" , false , false ), + new( "/a/" , "a" , true , true ), + new( "/a/" , "/a" , true , true ), + new( "/a/" , "a/" , true , true ), + new( "/a/" , "/a/" , true , true ), + new( "/a/" , "/a/b" , true , true ), + new( "/a/" , "/a/b/" , true , true ), + new( "/a/" , "/A/b/" , true , false ), + new( "/a/" , "/x/a/b" , false , false ), + new( "/a/b/" , "/a" , false , false ), + new( "/a/b/" , "/a/" , false , false ), + new( "/a/b/" , "/a/b" , true , true ), + new( "/a/b/" , "/a/b/" , true , true ), + new( "/a/b/" , "/a/b/c" , true , true ), + new( "/a/b/" , "/a/b/c/" , true , true ), + new( "/a/b/" , "/a/b/c/d" , true , true ), + new( "/a/b" , "/a" , false , false ), + new( "/a/b" , "/a/" , false , false ), + new( "/a/b" , "/a/b" , true , true ), + new( "/a/b" , "/a/b/" , true , false ), + new( "/a/b" , "/a/b/c" , true , false ), + new( "/a/b" , "/a/b/c/" , true , false ), + new( "/a/b" , "/a/b/c/d" , true , false ), + new( "/!a" , "!a" , true , false ), + new( "/!a" , "b" , false , false ), + new( "/a[b" , "a[b" , true , false ), + new( "/a]b" , "a]b" , true , false ), + new( "/a?b" , "a?b" , true , false ), + new( "/a?b" , "axb" , false , false ), + new( "/*" , "a" , false , true ), + new( "/*" , "a/" , false , false ), + new( "/*" , "a/b" , false , false ), + new( "/**" , "a" , false , true ), + new( "/**" , "a/" , false , true ), + new( "/**" , "a/b" , false , true ), + new( "/a/*" , "a" , false , false ), // Not sure if this should not match. + new( "/a/*" , "a/" , false , true ), + new( "/a/*" , "a/b" , false , true ), + new( "/a/*" , "a/b/" , false , false ), + new( "/a/*" , "a/b/c" , false , false ), + new( "/a/**" , "a" , false , true ), // Not sure if this should match. + new( "/a/**" , "a/" , false , true ), + new( "/a/**" , "a/b" , false , true ), + new( "/a/**" , "a/b/" , false , true ), + new( "/a/**" , "a/b/c" , false , true ), + new( "/**/a/" , "a" , false , true ), + new( "/**/a/" , "a/" , false , true ), + new( "/**/a/" , "a/b" , false , true ), + new( "/**/b/" , "a/b" , false , true ), + new( "/**/b/" , "a/c" , false , false ), + new( "/a/*/b/" , "a/b" , false , false ), + new( "/a/*/b/" , "a/x/b" , false , true ), + new( "/a/*/b/" , "a/x/b/c" , false , true ), + new( "/a/*/b/" , "a/x/c" , false , false ), + new( "/a/*/b/" , "a/x/y/b" , false , false ), + new( "/a/**/b/" , "a/b" , false , true ), + new( "/a/**/b/" , "a/x/b" , false , true ), + new( "/a/**/b/" , "a/x/y/b" , false , true ), + new( "/a/**/b/" , "a/x/y/c" , false , false ), + new( "a/*/*" , "a/b" , false , false ), + new( "/a/*/*/d" , "a/b/c/d" , false , true ), + new( "/a/*/*/d" , "a/b/x/c/d" , false , false ), + new( "/a/**/*/d" , "a/b/x/c/d" , false , true ), + new( "*/*/b" , "a/b" , false , false ), + new( "/a*/" , "abc" , false , true ), + new( "/*b*/" , "abc" , false , true ), + new( "/*c/" , "abc" , false , true ), + new( "/a*c/" , "abc" , false , true ), + new( "/**/*x*/" , "a/b/cxy/d" , false , true ), + new( "/a/*.md" , "a/x.md" , false , true ), + new( "/*/*/*.md" , "a/b/x.md" , false , true ), + new( "**/*.md" , "a/b.md/x.md" , false , true ), + new( "/*.md" , "a/md" , false , false ), + new( "/a.*" , "a.b" , false , true ), + new( "/a.*" , "a.b/" , false , false ), + new( "/a.*" , "x/a.b/" , false , false ), + new( "/a.*/" , "a.b" , false , true ), + new( "/a.*/" , "a.b/" , false , true ), + new( "/**/*x*/AB/*/CD" , "a/b/cxy/AB/fff/CD" , false, true ), + new( "/**/*x*/AB/*/CD" , "a/b/cxy/AB/ff/ff/CD" , false, false ), + new( "/**/*x*/AB/**/CD/*" , "a/b/cxy/AB/ff/ff/CD" , false, false ), + new( "/**/*x*/AB/**/CD/*" , "a/b/cxy/AB/ff/ff/CD/h" , false, true ), + // @formatter:on }; - /// - /// A repro for https://github.com/dotnet/runtime/issues/80076 - /// - [Test] - public void TestGlobBugRepro() - { - var globMatcher = new Matcher(StringComparison.Ordinal); - globMatcher.AddInclude("/*/"); - - var dir = new InMemoryDirectoryInfo( - rootDir: "/", - files: new List { "/a/b" }); - - var patternMatchingResult = globMatcher.Execute(dir); - // The expected behavior is "Is.False", but actual behavior is "Is.True". - Assert.That(patternMatchingResult.HasMatches, Is.True); - } - /// /// Exercises Azure.Sdk.Tools.CodeOwnersParser.Tests.CodeOwnersFileTests.testCases. /// See comment on that member for details. @@ -147,7 +141,8 @@ public void TestParseAndFindOwnersForClosestMatch(TestCase testCase) VerifyFindOwnersForClosestMatch(testCase, codeownersEntries, useNewImpl: true, testCase.ExpectedNewMatch); } - private static void VerifyFindOwnersForClosestMatch(TestCase testCase, + private static void VerifyFindOwnersForClosestMatch( + TestCase testCase, List codeownersEntries, bool useNewImpl, bool expectedMatch) @@ -162,7 +157,9 @@ private static void VerifyFindOwnersForClosestMatch(TestCase testCase, Assert.That(entryLegacy.Owners.Count, Is.EqualTo(expectedMatch ? 1 : 0)); } - // ReSharper disable once NotAccessedPositionalProperty.Global - // Reason: Name is present to make it easier to refer to and distinguish test cases in VS test runner. - public record TestCase(string Name, string TargetPath, string CodeownersPath, bool ExpectedLegacyMatch, bool ExpectedNewMatch); + public record TestCase( + string CodeownersPath, + string TargetPath, + bool ExpectedLegacyMatch, + bool ExpectedNewMatch); } diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners/Program.cs b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners/Program.cs index cb16e53bd3b..efc105654b7 100644 --- a/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners/Program.cs +++ b/tools/code-owners-parser/Azure.Sdk.Tools.RetrieveCodeOwners/Program.cs @@ -23,7 +23,7 @@ public static int Main( bool filterOutNonUserAliases = false ) { - var target = targetDirectory.ToLower().Trim(); + var target = targetDirectory.Trim(); try { var codeOwnerEntry = CodeOwnersFile.ParseAndFindOwnersForClosestMatch(codeOwnerFilePath, target); if (filterOutNonUserAliases) diff --git a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs index b13df0f6c7c..6afbd446ec5 100644 --- a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs +++ b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs @@ -1,7 +1,6 @@ -using System; using System.Collections.Generic; using System.Linq; -using Microsoft.Extensions.FileSystemGlobbing; +using System.Text.RegularExpressions; namespace Azure.Sdk.Tools.CodeOwnersParser { @@ -9,12 +8,39 @@ namespace Azure.Sdk.Tools.CodeOwnersParser /// Represents a CODEOWNERS file entry that matched to targetPath from /// the list of entries, assumed to have been parsed from CODEOWNERS file. /// + /// To use this class, construct it. + /// /// To obtain the value of the matched entry, reference "Value" member. + /// + /// Reference: + /// https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#codeowners-syntax + /// https://git-scm.com/docs/gitignore#_pattern_format /// internal class MatchedCodeOwnerEntry { public readonly CodeOwnerEntry Value; + /// + /// The entry is valid if it obeys following conditions: + /// - The Value was obtained with a call to Azure.Sdk.Tools.CodeOwnersParser.CodeOwnersFile.ParseContent(). + /// - As a consequence, in the case of no match, the entry is not valid. + /// - the Value.PathExpression starts with "/". + /// + /// Once the validation described in the following issue is implemented: + /// https://github.com/Azure/azure-sdk-tools/issues/4859 + /// To be valid, the entry will also have to obey following conditions: + /// - if the Value.PathExpression ends with "/", at least one corresponding + /// directory exists in the repository + /// - if the Value.PathExpression does not end with "/", at least one corresponding + /// file exists in the repository. + /// + public bool IsValid => this.Value.PathExpression.StartsWith("/"); + + /// + /// Any CODEOWNERS path with these characters will be skipped. + /// Note these are valid parts of file paths, but we are not supporting + /// them to simplify the parser logic. + /// private static readonly char[] unsupportedChars = { '[', ']', '!', '?' }; public MatchedCodeOwnerEntry(List entries, string targetPath) @@ -24,10 +50,8 @@ public MatchedCodeOwnerEntry(List entries, string targetPath) /// /// Returns a CodeOwnerEntry from codeOwnerEntries that matches targetPath - /// per algorithm described in: - /// https://git-scm.com/docs/gitignore#_pattern_format - /// and - /// https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#codeowners-syntax + /// per algorithm described in the GitHub CODEOWNERS reference, + /// as linked to in this class comment. /// /// If there is no match, returns "new CodeOwnerEntry()". /// @@ -40,145 +64,104 @@ private static CodeOwnerEntry FindOwnersForClosestMatch( if (!targetPath.StartsWith("/")) targetPath = "/" + targetPath; - // We do not trim or add the slash ("/") at the end of the targetPath because its - // presence influences the matching algorithm: - // Slash at the end denotes the target path is a directory, not a file, so it might - // match against a CODEOWNERS entry that matches only directories and not files. - - // Entries below take precedence, hence we read the file from the bottom up. - // By convention, entries in CODEOWNERS should be sorted top-down in the order of: - // - 'RepoPath', - // - 'ServicePath' - // - and then 'PackagePath'. - // However, due to lack of validation, as of 12/29/2022 this is not always the case. - for (int i = codeownersEntries.Count - 1; i >= 0; i--) - { - string codeownersPath = codeownersEntries[i].PathExpression; - if (ContainsUnsupportedCharacters(codeownersPath)) - { - continue; - } - - List globPatterns = ConvertToGlobPatterns(codeownersPath); - PatternMatchingResult patternMatchingResult = MatchGlobPatterns(targetPath, globPatterns); - if (patternMatchingResult.HasMatches) - { - return codeownersEntries[i]; - } - } - // assert: none of the codeownersEntries matched targetPath - return new CodeOwnerEntry(); + // Note we cannot add or trim the slash at the end of targetPath. + // Slash at the end of target path denotes it is a directory, not a file, + // so it can not match against a CODEOWNERS entry that is guaranteed to be a file, + // by the virtue of not ending with "/". + + + CodeOwnerEntry matchedEntry = codeownersEntries + .Where(entry => !ContainsUnsupportedCharacters(entry.PathExpression)) + // Entries listed in CODEOWNERS file below take precedence, hence we read the file from the bottom up. + // By convention, entries in CODEOWNERS should be sorted top-down in the order of: + // - 'RepoPath', + // - 'ServicePath' + // - and then 'PackagePath'. + // However, due to lack of validation, as of 12/29/2022 this is not always the case. + .Reverse() + .FirstOrDefault( + entry => Matches(targetPath, entry), + // assert: none of the codeownersEntries matched targetPath + new CodeOwnerEntry()); + + return matchedEntry; } private static bool ContainsUnsupportedCharacters(string codeownersPath) => unsupportedChars.Any(codeownersPath.Contains); - /// - /// Converts codeownersPath to a set of glob patterns to include in - /// glob matching. The conversion is a translation from codeowners and .gitignore - /// spec into glob. That is, it reduces the spec to glob rules, - /// which then can be checked against using glob matcher. - /// - /// - /// Usually 1 glob pattern to include in matching. In one special case - /// returns 2 patterns, which happens when the path needs to be interpreted - /// both as-is file, or as a directory prefix. - /// - private static List ConvertToGlobPatterns(string codeownersPath) + private static bool Matches(string targetPath, CodeOwnerEntry entry) { - codeownersPath = ConvertPrefix(codeownersPath); - var patternsToInclude = PatternsToInclude(codeownersPath); - return patternsToInclude; - } + string codeownersPath = entry.PathExpression; - private static string ConvertPrefix(string codeownersPath) - { - // Codeowners entry path starting with "/*" is equivalent to it starting with "*". - // Note this also covers cases when it starts with "/**". - if (codeownersPath.StartsWith("/*")) - codeownersPath = codeownersPath.Substring("/".Length); - - // If the codeownersPath doesn't have any slash at the beginning or in the middle, - // then it means its start is relative to any directory in the repository, - // hence we prepend "**/" to reflect this as a glob pattern. - if (!codeownersPath.TrimEnd('/').Contains("/")) - { - codeownersPath = "**/" + codeownersPath; - } - // If, on the other hand, codeownersPath has to start at the root, we ensure - // it starts with slash to reflect that. - else - { - if (!codeownersPath.StartsWith("/")) - { - codeownersPath = "/" + codeownersPath; - } - else - { - // codeownersPath already starts with "/", so nothing to prepend. - } - } + targetPath = NormalizeTargetPath(targetPath, codeownersPath); - return codeownersPath; + Regex regex = ConvertToRegex(codeownersPath); + return regex.IsMatch(targetPath); } - private static List PatternsToInclude(string codeownersPath) + private static string NormalizeTargetPath(string targetPath, string codeownersPath) { - List patternsToInclude = new List(); - - if (codeownersPath.EndsWith("/")) - { - patternsToInclude.Add(ConvertDirectorySuffix(codeownersPath)); - } - else - { - patternsToInclude.Add(ConvertDirectorySuffix(codeownersPath + "/")); - patternsToInclude.Add(codeownersPath); - } - - return patternsToInclude; + // If the considered CODEOWNERS path ends with "/", it means we can + // assume targetPath also is a path to directory. + // + // This works in all 3 cases, which are: + // + // 1. The targetPath is the same as the CODEOWNERS path, except + // the targetPath doesn't have "/" at the end. If so, + // it might be a path to a file or directory, + // but the exact path match with CODEOWNERS path and our validation + // guarantees it is an existing directory, hence we can append "/". + // + // 2. The targetPath is a prefix path of CODEOWNERS path. In such case + // there won't be a match, and appending "/" won't change that. + // + // 3. The CODEOWNERS path is a prefix path of targetPath. In such case + // there will be a match, and appending "/" won't change that. + if (codeownersPath.EndsWith("/") && !targetPath.EndsWith("/")) + targetPath += "/"; + + return targetPath; } - private static string ConvertDirectorySuffix(string codeownersPath) + private static Regex ConvertToRegex(string codeownersPath) { - // If the codeownersPath doesn't already end with "*", - // we need to append "**", to denote that codeownersPath has to match - // a prefix of the targetPath, not the entire path. - if (!codeownersPath.TrimEnd('/').EndsWith("*")) - { - codeownersPath += "**"; - } - else + // CODEOWNERS paths that do not start with "/" are relative and considered invalid. + // However, here we handle such cases to accomodate for parsing CODEOWNERS file + // paths that somehow slipped through validation. We do so by instead treating + // such paths as if they were absolute to repository root, i.e. starting with "/". + if (!codeownersPath.StartsWith("/")) + codeownersPath = "/" + codeownersPath; + + string pattern = codeownersPath; + + // Kind of hoping here that the CODEOWNERS path will never have + // "_DOUBLE_STAR_" or "_SINGLE_STAR_" strings in it. + pattern = pattern.Replace("**", "_DOUBLE_STAR_"); + pattern = pattern.Replace("*", "_SINGLE_STAR_"); + + pattern = Regex.Escape(pattern); + + // Denote that all paths are absolute by prepending "beginning of string" symbol. + pattern = "^" + pattern; + + // Lack of slash at the end denotes the path is a path to a file, + // per our validation logic. + // Note we assume this is the case even if the path is invalid, + // even though in such case it might not necessarily be true. + if (!(pattern.EndsWith("/") + || pattern.EndsWith("_DOUBLE_STAR_"))) { - // codeownersPath directory already has stars in the suffix, so nothing to do. - // Example paths: - // apps/*/ - // apps/**/ + // Append "end of string", symbol, denoting the match has to be exact, + // not a substring, as we are dealing with a file. + pattern += "$"; } - return codeownersPath; - } - - private static PatternMatchingResult MatchGlobPatterns( - string targetPath, - List patterns) - { - // Note we use StringComparison.Ordinal, not StringComparison.OrdinalIgnoreCase, - // as CODEOWNERS paths are case-sensitive. - var globMatcher = new Matcher(StringComparison.Ordinal); - - foreach (var pattern in patterns) - { - globMatcher.AddInclude(pattern); - } - - var dir = new InMemoryDirectoryInfo( - // This 'rootDir: "/"' is used here only because the globMatcher API requires it. - rootDir: "/", - files: new List { targetPath }); - - var patternMatchingResult = globMatcher.Execute(dir); - return patternMatchingResult; + pattern = pattern.Replace("_DOUBLE_STAR_/", "(.*)"); + pattern = pattern.Replace("/_DOUBLE_STAR_", "(.*)"); + pattern = pattern.Replace("_DOUBLE_STAR_", "(.*)"); + pattern = pattern.Replace("_SINGLE_STAR_", "([^/]*)"); + return new Regex(pattern); } } } From f50e7915ddf9b7d57666f1c66f18ff50d77df28e Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Sat, 7 Jan 2023 06:16:16 -0800 Subject: [PATCH 2/9] remove obsolete reference to Microsoft.Extensions.FileSystemGlobbing --- .../CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/code-owners-parser/CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj b/tools/code-owners-parser/CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj index 8a4e48256af..eb6d93f360b 100644 --- a/tools/code-owners-parser/CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj +++ b/tools/code-owners-parser/CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj @@ -7,7 +7,6 @@ - From 5d55bb5df932d7818b1fad3da658d21f4f34501b Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Sat, 7 Jan 2023 06:26:24 -0800 Subject: [PATCH 3/9] clarify cases handling **, plus improve comments --- .../CodeOwnersFileTests.cs | 17 +++++++++-------- .../CodeOwnersParser/MatchedCodeOwnerEntry.cs | 11 ++++++++++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs b/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs index ce22e4a2897..6013207bf5d 100644 --- a/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs +++ b/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs @@ -8,18 +8,18 @@ public class CodeOwnersFileTests { /// /// A battery of test cases specifying behavior of new logic matching target - /// path to CODEOWNERS entries , and comparing it to existing, legacy logic. + /// path to CODEOWNERS entries, and comparing it to existing, legacy logic. /// - /// The logic that has changed lives in CodeOwnersFile.FindOwnersForClosestMatch. - /// - /// The new logic supports matching against wildcards, while the old one doesn't. + /// The logic that has changed is located in CodeOwnersFile.FindOwnersForClosestMatch. /// /// In the test case table below, any discrepancy between legacy and new - /// parser expected matches that doesn't pertain to wildcard matching denotes - /// a potential backward compatibility and/or existing defect in the legacy parser. - /// + /// matcher expected matches that doesn't pertain to wildcard matching denotes + /// a potential backward compatibility and/or existing defect in the legacy matcher. + /// /// For further details, please see: - /// https://github.com/Azure/azure-sdk-tools/issues/2770 + /// - Class comment for Azure.Sdk.Tools.CodeOwnersParser.MatchedCodeOwnerEntry + /// - https://github.com/Azure/azure-sdk-tools/issues/2770 + /// - https://github.com/Azure/azure-sdk-tools/issues/4859 /// private static readonly TestCase[] testCases = { @@ -96,6 +96,7 @@ public class CodeOwnersFileTests new( "/a/*/b/" , "a/x/b/c" , false , true ), new( "/a/*/b/" , "a/x/c" , false , false ), new( "/a/*/b/" , "a/x/y/b" , false , false ), + new( "/a**b/" , "a/x/y/b" , false , true ), new( "/a/**/b/" , "a/b" , false , true ), new( "/a/**/b/" , "a/x/b" , false , true ), new( "/a/**/b/" , "a/x/y/b" , false , true ), diff --git a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs index 6afbd446ec5..268b5868ff8 100644 --- a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs +++ b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs @@ -8,6 +8,12 @@ namespace Azure.Sdk.Tools.CodeOwnersParser /// Represents a CODEOWNERS file entry that matched to targetPath from /// the list of entries, assumed to have been parsed from CODEOWNERS file. /// + /// This is a new matcher, compared to the old one, located in: + /// CodeOwnersFile.FindOwnersForClosestMatchLegacyImpl() + /// This new matcher supports matching against wildcards, while the old one doesn't. + /// This new matcher is designed to work with CODEOWNERS file validation: + /// https://github.com/Azure/azure-sdk-tools/issues/4859 + /// /// To use this class, construct it. /// /// To obtain the value of the matched entry, reference "Value" member. @@ -39,7 +45,7 @@ internal class MatchedCodeOwnerEntry /// /// Any CODEOWNERS path with these characters will be skipped. /// Note these are valid parts of file paths, but we are not supporting - /// them to simplify the parser logic. + /// them to simplify the matcher logic. /// private static readonly char[] unsupportedChars = { '[', ']', '!', '?' }; @@ -157,8 +163,11 @@ private static Regex ConvertToRegex(string codeownersPath) pattern += "$"; } + // Note that the "/**/" case is implicitly covered by "**/". pattern = pattern.Replace("_DOUBLE_STAR_/", "(.*)"); + // This case is necessary to cover suffix case, e.g. "/foo/bar/**". pattern = pattern.Replace("/_DOUBLE_STAR_", "(.*)"); + // This case is necessary to cover inline **, e.g. "/a**b/". pattern = pattern.Replace("_DOUBLE_STAR_", "(.*)"); pattern = pattern.Replace("_SINGLE_STAR_", "([^/]*)"); return new Regex(pattern); From 86ed4fe9d2a478f45c707404ca491a6d4c618caa Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Sat, 7 Jan 2023 06:42:39 -0800 Subject: [PATCH 4/9] refactor --- .../CodeOwnersParser/MatchedCodeOwnerEntry.cs | 83 ++++++++++++------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs index 268b5868ff8..9a870666fb2 100644 --- a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs +++ b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs @@ -27,20 +27,9 @@ internal class MatchedCodeOwnerEntry public readonly CodeOwnerEntry Value; /// - /// The entry is valid if it obeys following conditions: - /// - The Value was obtained with a call to Azure.Sdk.Tools.CodeOwnersParser.CodeOwnersFile.ParseContent(). - /// - As a consequence, in the case of no match, the entry is not valid. - /// - the Value.PathExpression starts with "/". - /// - /// Once the validation described in the following issue is implemented: - /// https://github.com/Azure/azure-sdk-tools/issues/4859 - /// To be valid, the entry will also have to obey following conditions: - /// - if the Value.PathExpression ends with "/", at least one corresponding - /// directory exists in the repository - /// - if the Value.PathExpression does not end with "/", at least one corresponding - /// file exists in the repository. + /// See comment on IsCodeOwnersPathValid /// - public bool IsValid => this.Value.PathExpression.StartsWith("/"); + public bool IsValid => IsCodeOwnersPathValid(this.Value.PathExpression); /// /// Any CODEOWNERS path with these characters will be skipped. @@ -132,12 +121,7 @@ private static string NormalizeTargetPath(string targetPath, string codeownersPa private static Regex ConvertToRegex(string codeownersPath) { - // CODEOWNERS paths that do not start with "/" are relative and considered invalid. - // However, here we handle such cases to accomodate for parsing CODEOWNERS file - // paths that somehow slipped through validation. We do so by instead treating - // such paths as if they were absolute to repository root, i.e. starting with "/". - if (!codeownersPath.StartsWith("/")) - codeownersPath = "/" + codeownersPath; + codeownersPath = ConvertPathIfInvalid(codeownersPath); string pattern = codeownersPath; @@ -148,29 +132,64 @@ private static Regex ConvertToRegex(string codeownersPath) pattern = Regex.Escape(pattern); - // Denote that all paths are absolute by prepending "beginning of string" symbol. + pattern = AddStringAnchors(pattern); + + // Note that the "/**/" case is implicitly covered by "**/". + pattern = pattern.Replace("_DOUBLE_STAR_/", "(.*)"); + // This case is necessary to cover suffix case, e.g. "/foo/bar/**". + pattern = pattern.Replace("/_DOUBLE_STAR_", "(.*)"); + // This case is necessary to cover inline **, e.g. "/a**b/". + pattern = pattern.Replace("_DOUBLE_STAR_", "(.*)"); + pattern = pattern.Replace("_SINGLE_STAR_", "([^/]*)"); + return new Regex(pattern); + } + + private static string ConvertPathIfInvalid(string codeownersPath) + { + // CODEOWNERS paths that do not start with "/" are relative and considered invalid. + // However, here we handle such cases to accomodate for parsing CODEOWNERS file + // paths that somehow slipped through validation. We do so by instead treating + // such paths as if they were absolute to repository root, i.e. starting with "/". + if (!IsCodeOwnersPathValid(codeownersPath)) + codeownersPath = "/" + codeownersPath; + return codeownersPath; + } + + private static string AddStringAnchors(string pattern) + { + // Denote that all paths are absolute by pre-pending "beginning of string" symbol. pattern = "^" + pattern; // Lack of slash at the end denotes the path is a path to a file, // per our validation logic. - // Note we assume this is the case even if the path is invalid, - // even though in such case it might not necessarily be true. - if (!(pattern.EndsWith("/") - || pattern.EndsWith("_DOUBLE_STAR_"))) + // Note we assume this is path to a file even if the path is invalid, + // even though in such case the path might be a path to a directory. + if (!pattern.EndsWith("/")) { // Append "end of string", symbol, denoting the match has to be exact, // not a substring, as we are dealing with a file. pattern += "$"; } - // Note that the "/**/" case is implicitly covered by "**/". - pattern = pattern.Replace("_DOUBLE_STAR_/", "(.*)"); - // This case is necessary to cover suffix case, e.g. "/foo/bar/**". - pattern = pattern.Replace("/_DOUBLE_STAR_", "(.*)"); - // This case is necessary to cover inline **, e.g. "/a**b/". - pattern = pattern.Replace("_DOUBLE_STAR_", "(.*)"); - pattern = pattern.Replace("_SINGLE_STAR_", "([^/]*)"); - return new Regex(pattern); + return pattern; } + + /// + /// The entry is valid if it obeys following conditions: + /// - The Value was obtained with a call to + /// Azure.Sdk.Tools.CodeOwnersParser.CodeOwnersFile.ParseContent(). + /// - As a consequence, in the case of no match, the entry is not valid. + /// - the Value.PathExpression starts with "/". + /// + /// Once the validation described in the following issue is implemented: + /// https://github.com/Azure/azure-sdk-tools/issues/4859 + /// To be valid, the entry will also have to obey following conditions: + /// - if the Value.PathExpression ends with "/", at least one corresponding + /// directory exists in the repository + /// - if the Value.PathExpression does not end with "/", at least one corresponding + /// file exists in the repository. + /// + private static bool IsCodeOwnersPathValid(string codeownersPath) + => codeownersPath.StartsWith("/"); } } From ee2a4e19d0627b07f1a15a8405c3c6bc0bd3df93 Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Sat, 7 Jan 2023 07:08:59 -0800 Subject: [PATCH 5/9] replace NormalizeTargetPath with regex manipulation --- .../CodeOwnersParser/MatchedCodeOwnerEntry.cs | 90 +++++++++---------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs index 9a870666fb2..587e92bfa24 100644 --- a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs +++ b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs @@ -89,37 +89,11 @@ private static bool Matches(string targetPath, CodeOwnerEntry entry) { string codeownersPath = entry.PathExpression; - targetPath = NormalizeTargetPath(targetPath, codeownersPath); - - Regex regex = ConvertToRegex(codeownersPath); + Regex regex = ConvertToRegex(targetPath, codeownersPath); return regex.IsMatch(targetPath); } - private static string NormalizeTargetPath(string targetPath, string codeownersPath) - { - // If the considered CODEOWNERS path ends with "/", it means we can - // assume targetPath also is a path to directory. - // - // This works in all 3 cases, which are: - // - // 1. The targetPath is the same as the CODEOWNERS path, except - // the targetPath doesn't have "/" at the end. If so, - // it might be a path to a file or directory, - // but the exact path match with CODEOWNERS path and our validation - // guarantees it is an existing directory, hence we can append "/". - // - // 2. The targetPath is a prefix path of CODEOWNERS path. In such case - // there won't be a match, and appending "/" won't change that. - // - // 3. The CODEOWNERS path is a prefix path of targetPath. In such case - // there will be a match, and appending "/" won't change that. - if (codeownersPath.EndsWith("/") && !targetPath.EndsWith("/")) - targetPath += "/"; - - return targetPath; - } - - private static Regex ConvertToRegex(string codeownersPath) + private static Regex ConvertToRegex(string targetPath, string codeownersPath) { codeownersPath = ConvertPathIfInvalid(codeownersPath); @@ -132,7 +106,10 @@ private static Regex ConvertToRegex(string codeownersPath) pattern = Regex.Escape(pattern); - pattern = AddStringAnchors(pattern); + // Denote that all paths are absolute by pre-pending "beginning of string" symbol. + pattern = "^" + pattern; + + pattern = SetPatternSuffix(targetPath, pattern); // Note that the "/**/" case is implicitly covered by "**/". pattern = pattern.Replace("_DOUBLE_STAR_/", "(.*)"); @@ -141,39 +118,60 @@ private static Regex ConvertToRegex(string codeownersPath) // This case is necessary to cover inline **, e.g. "/a**b/". pattern = pattern.Replace("_DOUBLE_STAR_", "(.*)"); pattern = pattern.Replace("_SINGLE_STAR_", "([^/]*)"); - return new Regex(pattern); - } - private static string ConvertPathIfInvalid(string codeownersPath) - { - // CODEOWNERS paths that do not start with "/" are relative and considered invalid. - // However, here we handle such cases to accomodate for parsing CODEOWNERS file - // paths that somehow slipped through validation. We do so by instead treating - // such paths as if they were absolute to repository root, i.e. starting with "/". - if (!IsCodeOwnersPathValid(codeownersPath)) - codeownersPath = "/" + codeownersPath; - return codeownersPath; + return new Regex(pattern); } - private static string AddStringAnchors(string pattern) + private static string SetPatternSuffix(string targetPath, string pattern) { - // Denote that all paths are absolute by pre-pending "beginning of string" symbol. - pattern = "^" + pattern; - // Lack of slash at the end denotes the path is a path to a file, // per our validation logic. // Note we assume this is path to a file even if the path is invalid, // even though in such case the path might be a path to a directory. if (!pattern.EndsWith("/")) { - // Append "end of string", symbol, denoting the match has to be exact, + // Append "end of string" symbol, denoting the match has to be exact, // not a substring, as we are dealing with a file. pattern += "$"; } + // If the CODEOWNERS pattern is matching only against directories, + // but the targetPath may not be a directory, we need to trim + // the "/" from the pattern to ensure match is present. + // + // To illustrate this, consider following cases: + // + // targetPath: /a , /a*/ + // pattern: /a/ , /abc + // + // Without trimming pattern to be "/a" and "/a*" respectively, + // these wouldn't match, but they should. + // + // On the other hand, trimming the suffix "/" when it is not + // necessary won't lead to issues. E.g.: + // + // targetPath: /a/b + // pattern: /a/ + // + // Here we still have a prefix match even if we trim pattern to "/a". + else if (pattern.EndsWith("/") && !targetPath.EndsWith("/")) + { + pattern = pattern.TrimEnd('/'); + } return pattern; } + private static string ConvertPathIfInvalid(string codeownersPath) + { + // CODEOWNERS paths that do not start with "/" are relative and considered invalid. + // However, here we handle such cases to accomodate for parsing CODEOWNERS file + // paths that somehow slipped through validation. We do so by instead treating + // such paths as if they were absolute to repository root, i.e. starting with "/". + if (!IsCodeOwnersPathValid(codeownersPath)) + codeownersPath = "/" + codeownersPath; + return codeownersPath; + } + /// /// The entry is valid if it obeys following conditions: /// - The Value was obtained with a call to @@ -183,7 +181,7 @@ private static string AddStringAnchors(string pattern) /// /// Once the validation described in the following issue is implemented: /// https://github.com/Azure/azure-sdk-tools/issues/4859 - /// To be valid, the entry will also have to obey following conditions: + /// to be valid, the entry will also have to obey following conditions: /// - if the Value.PathExpression ends with "/", at least one corresponding /// directory exists in the repository /// - if the Value.PathExpression does not end with "/", at least one corresponding From 7c6b4ed91333773dd6bb5b54b02641e15b455a84 Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Sun, 8 Jan 2023 19:02:12 -0800 Subject: [PATCH 6/9] improve comments --- .../CodeOwnersParser/MatchedCodeOwnerEntry.cs | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs index 587e92bfa24..b1d61a548b9 100644 --- a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs +++ b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs @@ -9,7 +9,9 @@ namespace Azure.Sdk.Tools.CodeOwnersParser /// the list of entries, assumed to have been parsed from CODEOWNERS file. /// /// This is a new matcher, compared to the old one, located in: - /// CodeOwnersFile.FindOwnersForClosestMatchLegacyImpl() + /// + /// CodeOwnersFile.FindOwnersForClosestMatchLegacyImpl() + /// /// This new matcher supports matching against wildcards, while the old one doesn't. /// This new matcher is designed to work with CODEOWNERS file validation: /// https://github.com/Azure/azure-sdk-tools/issues/4859 @@ -82,14 +84,23 @@ private static CodeOwnerEntry FindOwnersForClosestMatch( return matchedEntry; } + /// + /// See the comment on unsupportedChars. + /// private static bool ContainsUnsupportedCharacters(string codeownersPath) => unsupportedChars.Any(codeownersPath.Contains); + /// + /// Returns true if the regex expression representing the PathExpression + /// of CODEOWNERS entry matches a prefix of targetPath. + /// private static bool Matches(string targetPath, CodeOwnerEntry entry) { string codeownersPath = entry.PathExpression; Regex regex = ConvertToRegex(targetPath, codeownersPath); + // Is prefix match. I.e. it will return true if the regex matches + // a prefix of targetPath. return regex.IsMatch(targetPath); } @@ -99,7 +110,7 @@ private static Regex ConvertToRegex(string targetPath, string codeownersPath) string pattern = codeownersPath; - // Kind of hoping here that the CODEOWNERS path will never have + // Kind of hoping here that the CODEOWNERS paths will never have // "_DOUBLE_STAR_" or "_SINGLE_STAR_" strings in it. pattern = pattern.Replace("**", "_DOUBLE_STAR_"); pattern = pattern.Replace("*", "_SINGLE_STAR_"); @@ -135,11 +146,13 @@ private static string SetPatternSuffix(string targetPath, string pattern) pattern += "$"; } // If the CODEOWNERS pattern is matching only against directories, - // but the targetPath may not be a directory, we need to trim + // but the targetPath may not be a directory + // (as it doesn't have "/" at the end), we need to trim // the "/" from the pattern to ensure match is present. // // To illustrate this, consider following cases: // + // 1. 2. // targetPath: /a , /a*/ // pattern: /a/ , /abc // @@ -161,12 +174,15 @@ private static string SetPatternSuffix(string targetPath, string pattern) return pattern; } + /// + /// CODEOWNERS paths that do not start with "/" are relative and considered invalid, + /// See comment on "IsCodeOwnersPathValid" for definition of "valid". + /// However, here we handle such cases to accomodate for parsing CODEOWNERS file + /// paths that somehow slipped through that validation. We do so by instead treating + /// such paths as if they were absolute to repository root, i.e. starting with "/". + /// private static string ConvertPathIfInvalid(string codeownersPath) { - // CODEOWNERS paths that do not start with "/" are relative and considered invalid. - // However, here we handle such cases to accomodate for parsing CODEOWNERS file - // paths that somehow slipped through validation. We do so by instead treating - // such paths as if they were absolute to repository root, i.e. starting with "/". if (!IsCodeOwnersPathValid(codeownersPath)) codeownersPath = "/" + codeownersPath; return codeownersPath; From 98dbf7da2c237a5f5d4ee84b56bb582a6dfaa3ae Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Sun, 8 Jan 2023 19:38:48 -0800 Subject: [PATCH 7/9] add diagnostic logging and test cases showing paths with [, ], and ! can match. --- .../CodeOwnersFileTests.cs | 8 +- .../Azure.Sdk.Tools.CodeOwnersParser.csproj | 1 + .../CodeOwnersParser/MatchedCodeOwnerEntry.cs | 76 ++++++++++++++----- 3 files changed, 64 insertions(+), 21 deletions(-) diff --git a/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs b/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs index 6013207bf5d..46eac380324 100644 --- a/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs +++ b/tools/code-owners-parser/Azure.Sdk.Tools.CodeOwnersParser.Tests/CodeOwnersFileTests.cs @@ -73,9 +73,15 @@ public class CodeOwnersFileTests new( "/*" , "a" , false , true ), new( "/*" , "a/" , false , false ), new( "/*" , "a/b" , false , false ), + new( "/*" , "[" , false , true ), + new( "/*" , "]" , false , true ), + new( "/*" , "!" , false , true ), new( "/**" , "a" , false , true ), new( "/**" , "a/" , false , true ), new( "/**" , "a/b" , false , true ), + new( "/**" , "[" , false , true ), + new( "/**" , "]" , false , true ), + new( "/**" , "!" , false , true ), new( "/a/*" , "a" , false , false ), // Not sure if this should not match. new( "/a/*" , "a/" , false , true ), new( "/a/*" , "a/b" , false , true ), @@ -123,7 +129,7 @@ public class CodeOwnersFileTests new( "/**/*x*/AB/*/CD" , "a/b/cxy/AB/fff/CD" , false, true ), new( "/**/*x*/AB/*/CD" , "a/b/cxy/AB/ff/ff/CD" , false, false ), new( "/**/*x*/AB/**/CD/*" , "a/b/cxy/AB/ff/ff/CD" , false, false ), - new( "/**/*x*/AB/**/CD/*" , "a/b/cxy/AB/ff/ff/CD/h" , false, true ), + new( "/**/*x*/AB/**/CD/*" , "a/b/cxy/AB/[]/!!/CD/h" , false, true ), // @formatter:on }; diff --git a/tools/code-owners-parser/CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj b/tools/code-owners-parser/CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj index eb6d93f360b..03e676e6387 100644 --- a/tools/code-owners-parser/CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj +++ b/tools/code-owners-parser/CodeOwnersParser/Azure.Sdk.Tools.CodeOwnersParser.csproj @@ -7,6 +7,7 @@ + diff --git a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs index b1d61a548b9..76f3cd297ee 100644 --- a/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs +++ b/tools/code-owners-parser/CodeOwnersParser/MatchedCodeOwnerEntry.cs @@ -1,6 +1,7 @@ using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; namespace Azure.Sdk.Tools.CodeOwnersParser { @@ -26,6 +27,17 @@ namespace Azure.Sdk.Tools.CodeOwnersParser /// internal class MatchedCodeOwnerEntry { + /// + /// Token for temporarily substituting "**" in regex, to avoid it being escaped when + /// Regex.Escape is called. + /// + private const string DoubleStar = "_DOUBLE_STAR_"; + /// + /// Token for temporarily substituting "*" in regex, to avoid it being escaped when + /// Regex.Escape is called. + /// + private const string SingleStar = "_SINGLE_STAR_"; + public readonly CodeOwnerEntry Value; /// @@ -40,11 +52,20 @@ internal class MatchedCodeOwnerEntry /// private static readonly char[] unsupportedChars = { '[', ']', '!', '?' }; + private readonly ILogger log; + public MatchedCodeOwnerEntry(List entries, string targetPath) { + this.log = CreateLog(); this.Value = FindOwnersForClosestMatch(entries, targetPath); } + private ILogger CreateLog() + { + var loggerFactory = LoggerFactory.Create(builder => { builder.AddSimpleConsole(); }); + return loggerFactory.CreateLogger(); + } + /// /// Returns a CodeOwnerEntry from codeOwnerEntries that matches targetPath /// per algorithm described in the GitHub CODEOWNERS reference, @@ -52,7 +73,7 @@ public MatchedCodeOwnerEntry(List entries, string targetPath) /// /// If there is no match, returns "new CodeOwnerEntry()". /// - private static CodeOwnerEntry FindOwnersForClosestMatch( + private CodeOwnerEntry FindOwnersForClosestMatch( List codeownersEntries, string targetPath) { @@ -65,7 +86,6 @@ private static CodeOwnerEntry FindOwnersForClosestMatch( // Slash at the end of target path denotes it is a directory, not a file, // so it can not match against a CODEOWNERS entry that is guaranteed to be a file, // by the virtue of not ending with "/". - CodeOwnerEntry matchedEntry = codeownersEntries .Where(entry => !ContainsUnsupportedCharacters(entry.PathExpression)) @@ -87,14 +107,24 @@ private static CodeOwnerEntry FindOwnersForClosestMatch( /// /// See the comment on unsupportedChars. /// - private static bool ContainsUnsupportedCharacters(string codeownersPath) - => unsupportedChars.Any(codeownersPath.Contains); + private bool ContainsUnsupportedCharacters(string codeownersPath) + { + var contains = unsupportedChars.Any(codeownersPath.Contains); + if (contains) + { + log.LogWarning( + $"CODEOWNERS path \"{codeownersPath}\" contains unsupported characters: " + + string.Join(' ', unsupportedChars) + + " Because of that this path will never match."); + } + return contains; + } /// /// Returns true if the regex expression representing the PathExpression /// of CODEOWNERS entry matches a prefix of targetPath. /// - private static bool Matches(string targetPath, CodeOwnerEntry entry) + private bool Matches(string targetPath, CodeOwnerEntry entry) { string codeownersPath = entry.PathExpression; @@ -104,16 +134,21 @@ private static bool Matches(string targetPath, CodeOwnerEntry entry) return regex.IsMatch(targetPath); } - private static Regex ConvertToRegex(string targetPath, string codeownersPath) + private Regex ConvertToRegex(string targetPath, string codeownersPath) { - codeownersPath = ConvertPathIfInvalid(codeownersPath); + codeownersPath = NormalizePath(codeownersPath); string pattern = codeownersPath; - // Kind of hoping here that the CODEOWNERS paths will never have - // "_DOUBLE_STAR_" or "_SINGLE_STAR_" strings in it. - pattern = pattern.Replace("**", "_DOUBLE_STAR_"); - pattern = pattern.Replace("*", "_SINGLE_STAR_"); + if (codeownersPath.Contains(DoubleStar) || pattern.Contains(SingleStar)) + { + log.LogWarning( + $"CODEOWNERS path \"{codeownersPath}\" contains reserved phrases: " + + $"\"{DoubleStar}\" or \"{SingleStar}\""); + } + + pattern = pattern.Replace("**", DoubleStar); + pattern = pattern.Replace("*", SingleStar); pattern = Regex.Escape(pattern); @@ -123,12 +158,12 @@ private static Regex ConvertToRegex(string targetPath, string codeownersPath) pattern = SetPatternSuffix(targetPath, pattern); // Note that the "/**/" case is implicitly covered by "**/". - pattern = pattern.Replace("_DOUBLE_STAR_/", "(.*)"); + pattern = pattern.Replace($"{DoubleStar}/", "(.*)"); // This case is necessary to cover suffix case, e.g. "/foo/bar/**". - pattern = pattern.Replace("/_DOUBLE_STAR_", "(.*)"); + pattern = pattern.Replace($"/{DoubleStar}", "(.*)"); // This case is necessary to cover inline **, e.g. "/a**b/". - pattern = pattern.Replace("_DOUBLE_STAR_", "(.*)"); - pattern = pattern.Replace("_SINGLE_STAR_", "([^/]*)"); + pattern = pattern.Replace(DoubleStar, "(.*)"); + pattern = pattern.Replace(SingleStar, "([^/]*)"); return new Regex(pattern); } @@ -181,9 +216,9 @@ private static string SetPatternSuffix(string targetPath, string pattern) /// paths that somehow slipped through that validation. We do so by instead treating /// such paths as if they were absolute to repository root, i.e. starting with "/". /// - private static string ConvertPathIfInvalid(string codeownersPath) + private string NormalizePath(string codeownersPath) { - if (!IsCodeOwnersPathValid(codeownersPath)) + if (!codeownersPath.StartsWith("/")) codeownersPath = "/" + codeownersPath; return codeownersPath; } @@ -193,17 +228,18 @@ private static string ConvertPathIfInvalid(string codeownersPath) /// - The Value was obtained with a call to /// Azure.Sdk.Tools.CodeOwnersParser.CodeOwnersFile.ParseContent(). /// - As a consequence, in the case of no match, the entry is not valid. + /// - It does not contain unsupported characters (see "unsupportedChars"). /// - the Value.PathExpression starts with "/". /// /// Once the validation described in the following issue is implemented: /// https://github.com/Azure/azure-sdk-tools/issues/4859 /// to be valid, the entry will also have to obey following conditions: /// - if the Value.PathExpression ends with "/", at least one corresponding - /// directory exists in the repository + /// directory exists in the repository. /// - if the Value.PathExpression does not end with "/", at least one corresponding /// file exists in the repository. /// - private static bool IsCodeOwnersPathValid(string codeownersPath) - => codeownersPath.StartsWith("/"); + private bool IsCodeOwnersPathValid(string codeownersPath) + => codeownersPath.StartsWith("/") && !ContainsUnsupportedCharacters(codeownersPath); } } From 38a669730c9d2f025798e8a7f3f3b6a64f603751 Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Sun, 8 Jan 2023 20:14:45 -0800 Subject: [PATCH 8/9] Fix Microsoft.Extensions.Logging version downgrade --- tools/identity-resolution/identity-resolution.csproj | 2 +- .../Azure.Sdk.Tools.NotificationConfiguration.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/identity-resolution/identity-resolution.csproj b/tools/identity-resolution/identity-resolution.csproj index 42deeddf898..f8666f676aa 100644 --- a/tools/identity-resolution/identity-resolution.csproj +++ b/tools/identity-resolution/identity-resolution.csproj @@ -6,7 +6,7 @@ - + diff --git a/tools/notification-configuration/notification-creator/Azure.Sdk.Tools.NotificationConfiguration.csproj b/tools/notification-configuration/notification-creator/Azure.Sdk.Tools.NotificationConfiguration.csproj index 7acff793635..5b76741d34a 100644 --- a/tools/notification-configuration/notification-creator/Azure.Sdk.Tools.NotificationConfiguration.csproj +++ b/tools/notification-configuration/notification-creator/Azure.Sdk.Tools.NotificationConfiguration.csproj @@ -9,7 +9,7 @@ - + From 121207b09156a63f2274b7cd0d0a10ba238688c2 Mon Sep 17 00:00:00 2001 From: Konrad Jamrozik Date: Sun, 8 Jan 2023 20:17:08 -0800 Subject: [PATCH 9/9] update LoggerFactory in NotificationConfiguration.Main --- .../notification-creator/Program.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/notification-configuration/notification-creator/Program.cs b/tools/notification-configuration/notification-creator/Program.cs index 1212581ed2f..bee812834f4 100644 --- a/tools/notification-configuration/notification-creator/Program.cs +++ b/tools/notification-configuration/notification-creator/Program.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.VisualStudio.Services.Common; @@ -39,12 +39,10 @@ static async Task Main( var devOpsCreds = new VssBasicCredential("nobody", devOpsToken); var devOpsConnection = new VssConnection(new Uri($"https://dev.azure.com/{organization}/"), devOpsCreds); -#pragma warning disable CS0618 // Type or member is obsolete var loggerFactory = LoggerFactory.Create(builder => { - builder.AddConsole(config => { config.IncludeScopes = true; }); + builder.AddSimpleConsole(config => { config.IncludeScopes = true; }); }); -#pragma warning restore CS0618 // Type or member is obsolete var devOpsServiceLogger = loggerFactory.CreateLogger(); var notificationConfiguratorLogger = loggerFactory.CreateLogger();