From a51b918b11a76a4ec60da5acc1041934c086a3b7 Mon Sep 17 00:00:00 2001 From: "e.solutions" <17569373+Pfoerd@users.noreply.github.com> Date: Thu, 9 Jun 2022 11:13:05 +0200 Subject: [PATCH] add alternatives to package matcher This change will allow defining alternations within `()` and `[]`. I.e. it is now possible to match both `some.a.pkg` and `some.b.pkg` with a single pattern `some.[a|b].pkg` or capture it via `some.(a|b).pkg`. The demand for this comes from users dealing with legacy package structures, or where the package structure is dictated from the outside (e.g. via enterprise architecture). They might sometimes need to create the slices from different parts of the package tree, e.g. `..[application|domain].(*)`. While this is already possible using the `SliceAssignment` API this is not very straight forward and also demands quite some boilerplate. We apply the following constraints to the new API to keep it simple to implement and easy and consistent for the user: 1) No nested groups via `[]` or `()` 1.1) `()` within `()` would just be confusing and there does not seem to be any valid use case for it 1.2) `()` within `[]` would be ambiguous and hard to implement (e.g. within `[first.(*)|second.(*)]` the second `(*)` would be capturing group 2 within the regex, even though group 1 and 2 are mutually exclusive) 1.3) `[]` within `[]` would make the implementation a lot more complex, since we would now need to match the correct brackets of nested groups when creating the regex 1.4) `[]` within `()` would be easy to implement, but make the API inconsistent ("why can I nest it like this, but not the other way around?") 2) No toplevel alternations. This goes together with 1), since it would make the implementation more complex to prevent nested capturing groups Issue: #662 Signed-off-by: e.solutions <17569373+Pfoerd@users.noreply.github.com> on-behalf-of: @e-solutions-GmbH Signed-off-by: Peter Gafert --- .../archunit/core/domain/PackageMatcher.java | 60 ++++++- .../core/domain/PackageMatcherTest.java | 149 ++++++++++++------ 2 files changed, 154 insertions(+), 55 deletions(-) diff --git a/archunit/src/main/java/com/tngtech/archunit/core/domain/PackageMatcher.java b/archunit/src/main/java/com/tngtech/archunit/core/domain/PackageMatcher.java index 059e7b1d0c..80a7013102 100644 --- a/archunit/src/main/java/com/tngtech/archunit/core/domain/PackageMatcher.java +++ b/archunit/src/main/java/com/tngtech/archunit/core/domain/PackageMatcher.java @@ -22,7 +22,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.IntStream; +import java.util.stream.Stream; +import com.google.common.base.CharMatcher; import com.google.common.collect.ImmutableSet; import com.tngtech.archunit.PublicAPI; @@ -41,6 +43,12 @@ *
  • {@code '*.*.pack*..'} matches {@code 'a.b.packfix.c.d'}, * but neither {@code 'a.packfix.b'} nor {@code 'a.b.prepack.d'}
  • * + * You can also use alternations with the '|' operator within brackets. For example + * + *

    * Furthermore, the use of capturing groups is supported. In this case '(*)' matches any sequence of characters, * but not the dot '.', while '(**)' matches any sequence including the dot.
    * For example @@ -49,6 +57,7 @@ *

  • {@code '..service.(**)'} matches {@code 'a.service.hello.more'} and group 1 would be {@code 'hello.more'}
  • *
  • {@code 'my.(*)..service.(**)'} matches {@code 'my.company.some.service.hello.more'} * and group 1 would be {@code 'company'}, while group 2 would be {@code 'hello.more'}
  • + *
  • {@code '..service.(a|b*)..'} matches {@code 'a.service.bar.more'} and group 1 would be {@code 'bar'}
  • * * Create via {@link PackageMatcher#of(String) PackageMatcher.of(packageIdentifier)} */ @@ -62,7 +71,14 @@ public final class PackageMatcher { private static final String TWO_STAR_CAPTURE_REGEX = "(\\w+(?:\\.\\w+)*)"; static final String TWO_STAR_REGEX_MARKER = "#%#%#"; - private static final Set PACKAGE_CONTROL_SYMBOLS = ImmutableSet.of('*', '(', ')', '.'); + private static final Pattern ILLEGAL_ALTERNATION_PATTERN = Pattern.compile("\\[[^|]*]"); + private static final Pattern ILLEGAL_NESTED_GROUP_PATTERN = Pattern.compile( + nestedGroupRegex('(', ')', '(') + + "|" + nestedGroupRegex('(', ')', '[') + + "|" + nestedGroupRegex('[', ']', '(') + + "|" + nestedGroupRegex('[', ']', '[')); + + private static final Set PACKAGE_CONTROL_SYMBOLS = ImmutableSet.of('*', '(', ')', '.', '|', '[', ']'); private final String packageIdentifier; private final Pattern packagePattern; @@ -84,9 +100,34 @@ private void validate(String packageIdentifier) { if (packageIdentifier.contains("(..)")) { throw new IllegalArgumentException("Package Identifier does not support capturing via (..), use (**) instead"); } + if (ILLEGAL_ALTERNATION_PATTERN.matcher(packageIdentifier).find()) { + throw new IllegalArgumentException( + "Package Identifier does not allow alternation brackets '[]' without specifying any alternative via '|' inside"); + } + if (containsToplevelAlternation(packageIdentifier)) { + throw new IllegalArgumentException( + "Package Identifier only supports '|' inside of '[]' or '()'"); + } + if (ILLEGAL_NESTED_GROUP_PATTERN.matcher(packageIdentifier).find()) { + throw new IllegalArgumentException("Package Identifier does not support nesting '()' or '[]' within other '()' or '[]'"); + } validateCharacters(packageIdentifier); } + private boolean containsToplevelAlternation(String packageIdentifier) { + Stream prefixesBeforeAlternation = IntStream.range(0, packageIdentifier.length()) + .filter(i -> packageIdentifier.charAt(i) == '|') + .mapToObj(alternationIndex -> packageIdentifier.substring(0, alternationIndex)); + + return prefixesBeforeAlternation.anyMatch(beforeAlternation -> + sameNumberOfOccurrences(beforeAlternation, '(', ')') + && sameNumberOfOccurrences(beforeAlternation, '[', ']')); + } + + private boolean sameNumberOfOccurrences(String string, char first, char second) { + return CharMatcher.is(first).countIn(string) == CharMatcher.is(second).countIn(string); + } + private void validateCharacters(String packageIdentifier) { for (int i = 0; i < packageIdentifier.length(); i++) { char c = packageIdentifier.charAt(i); @@ -99,12 +140,13 @@ private void validateCharacters(String packageIdentifier) { } private String convertToRegex(String packageIdentifier) { - return packageIdentifier. - replace(TWO_STAR_CAPTURE_LITERAL, TWO_STAR_REGEX_MARKER). - replace("*", "\\w+"). - replace(".", "\\."). - replace(TWO_STAR_REGEX_MARKER, TWO_STAR_CAPTURE_REGEX). - replace("\\.\\.", TWO_DOTS_REGEX); + return packageIdentifier + .replaceAll("\\[(.*?)]", "(?:$1)") // replacing all '[..|..]' with '(?:..|..)' + .replace(TWO_STAR_CAPTURE_LITERAL, TWO_STAR_REGEX_MARKER) + .replace("*", "\\w+") + .replace(".", "\\.") + .replace(TWO_STAR_REGEX_MARKER, TWO_STAR_CAPTURE_REGEX) + .replace("\\.\\.", TWO_DOTS_REGEX); } /** @@ -144,6 +186,10 @@ public String toString() { return "PackageMatcher{" + packageIdentifier + '}'; } + private static String nestedGroupRegex(char outerOpeningChar, char outerClosingChar, char nestedOpeningChar) { + return "\\" + outerOpeningChar + "[^" + outerClosingChar + "]*\\" + nestedOpeningChar; + } + public static final class Result { private final Matcher matcher; diff --git a/archunit/src/test/java/com/tngtech/archunit/core/domain/PackageMatcherTest.java b/archunit/src/test/java/com/tngtech/archunit/core/domain/PackageMatcherTest.java index d64d2cc86a..9728599c61 100644 --- a/archunit/src/test/java/com/tngtech/archunit/core/domain/PackageMatcherTest.java +++ b/archunit/src/test/java/com/tngtech/archunit/core/domain/PackageMatcherTest.java @@ -5,6 +5,7 @@ import com.tngtech.archunit.core.domain.PackageMatcher.Result; import com.tngtech.java.junit.dataprovider.DataProvider; import com.tngtech.java.junit.dataprovider.DataProviderRunner; +import com.tngtech.java.junit.dataprovider.UseDataProvider; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -12,6 +13,7 @@ import static com.tngtech.archunit.core.domain.PackageMatcher.TO_GROUPS; import static com.tngtech.archunit.testutil.Assertions.assertThat; +import static com.tngtech.java.junit.dataprovider.DataProviders.testForEach; @RunWith(DataProviderRunner.class) public class PackageMatcherTest { @@ -20,34 +22,41 @@ public class PackageMatcherTest { @Test @DataProvider(value = { - "some.arbitrary.pkg | some.arbitrary.pkg | true", - "some.arbitrary.pkg | some.thing.different | false", - "some..pkg | some.arbitrary.pkg | true", - "some..middle..pkg | some.arbitrary.middle.more.pkg | true", - "*..pkg | some.arbitrary.pkg | true", - "some..* | some.arbitrary.pkg | true", - "*..pkg | some.arbitrary.pkg.toomuch | false", - "toomuch.some..* | some.arbitrary.pkg | false", - "*..wrong | some.arbitrary.pkg | false", - "some..* | wrong.arbitrary.pkg | false", - "..some | some | true", - "some.. | some | true", - "*..some | some | false", - "some..* | some | false", - "..some | asome | false", - "some.. | somea | false", - "*.*.* | wrong.arbitrary.pkg | true", - "*.*.* | wrong.arbitrary.pkg.toomuch | false", - "some.arbi*.pk*.. | some.arbitrary.pkg.whatever | true", - "some.arbi*.. | some.brbitrary.pkg | false", - "some.*rary.*kg.. | some.arbitrary.pkg.whatever | true", - "some.*rary.. | some.arbitrarz.pkg | false", - "some.pkg | someepkg | false", - "..pkg.. | some.random.pkg.maybe.anywhere | true", - "..p.. | s.r.p.m.a | true", - "*..pkg..* | some.random.pkg.maybe.anywhere | true", - "*..p..* | s.r.p.m.a | true" - }, splitBy = "\\|") + "some.arbitrary.pkg , some.arbitrary.pkg , true", + "some.arbitrary.pkg , some.thing.different , false", + "some..pkg , some.arbitrary.pkg , true", + "some..middle..pkg , some.arbitrary.middle.more.pkg , true", + "*..pkg , some.arbitrary.pkg , true", + "some..* , some.arbitrary.pkg , true", + "*..pkg , some.arbitrary.pkg.toomuch , false", + "toomuch.some..* , some.arbitrary.pkg , false", + "*..wrong , some.arbitrary.pkg , false", + "some..* , wrong.arbitrary.pkg , false", + "..some , some , true", + "some.. , some , true", + "*..some , some , false", + "some..* , some , false", + "..some , asome , false", + "some.. , somea , false", + "*.*.* , wrong.arbitrary.pkg , true", + "*.*.* , wrong.arbitrary.pkg.toomuch , false", + "some.arbi*.pk*.. , some.arbitrary.pkg.whatever , true", + "some.arbi*.. , some.brbitrary.pkg , false", + "some.*rary.*kg.. , some.arbitrary.pkg.whatever , true", + "some.*rary.. , some.arbitrarz.pkg , false", + "some.pkg , someepkg , false", + "..pkg.. , some.random.pkg.maybe.anywhere , true", + "..p.. , s.r.p.m.a , true", + "*..pkg..* , some.random.pkg.maybe.anywhere , true", + "*..p..* , s.r.p.m.a , true", + "..[a|b|c].pk*.. , some.a.pkg.whatever , true", + "..[b|c].pk*.. , some.a.pkg.whatever , false", + "..[a|b*].pk*.. , some.bitrary.pkg.whatever , true", + "..[a|b*].pk*.. , some.a.pkg.whatever , true", + "..[a|b*].pk*.. , some.arbitrary.pkg.whatever , false", + "..[*c*|*d*].pk*.. , some.anydinside.pkg.whatever , true", + "..[*c*|*d*].pk*.. , some.nofit.pkg.whatever , false", + }) public void match(String matcher, String target, boolean matches) { assertThat(PackageMatcher.of(matcher).matches(target)) .as("package matches") @@ -56,26 +65,35 @@ public void match(String matcher, String target, boolean matches) { @Test @DataProvider(value = { - "some.(*).pkg | some.arbitrary.pkg | arbitrary", - "some.arb(*)ry.pkg | some.arbitrary.pkg | itra", - "some.arb(*)ry.pkg | some.arbit.rary.pkg | null", - "some.(*).matches.(*).pkg | some.first.matches.second.pkg | first:second", - "(*).matches.(*) | start.matches.end | start:end", - "(*).(*).(*).(*) | a.b.c.d | a:b:c:d", - "(*) | some | some", - "some.(*).pkg | some.in.between.pkg | null", - "some.(**).pkg | some.in.between.pkg | in.between", - "some.(**).pkg.(*) | some.in.between.pkg.addon | in.between:addon", - "some(**)pkg | somerandom.in.between.longpkg | random.in.between.long", - "some.(**).pkg | somer.in.between.pkg | null", - "some.(**).pkg | some.in.between.gpkg | null", - "so(*)me.(**)pkg.an(*).more | soinfme.in.between.gpkg.and.more | inf:in.between.g:d", - "so(*)me.(**)pkg.an(*).more | soinfme.in.between.gpkg.an.more | null", - "so(**)me | some | null", - "so(*)me | some | null", - "(**)so | awe.some.aso | awe.some.a", - "so(**) | soan.some.we | an.some.we", - }, splitBy = "\\|") + "some.(*).pkg , some.arbitrary.pkg , arbitrary", + "some.arb(*)ry.pkg , some.arbitrary.pkg , itra", + "some.arb(*)ry.pkg , some.arbit.rary.pkg , null", + "some.(*).matches.(*).pkg , some.first.matches.second.pkg , first:second", + "(*).matches.(*) , start.matches.end , start:end", + "(*).(*).(*).(*) , a.b.c.d , a:b:c:d", + "(*) , some , some", + "some.(*).pkg , some.in.between.pkg , null", + "some.(**).pkg , some.in.between.pkg , in.between", + "some.(**).pkg.(*) , some.in.between.pkg.addon , in.between:addon", + "some(**)pkg , somerandom.in.between.longpkg , random.in.between.long", + "some.(**).pkg , somer.in.between.pkg , null", + "some.(**).pkg , some.in.between.gpkg , null", + "so(*)me.(**)pkg.an(*).more , soinfme.in.between.gpkg.and.more , inf:in.between.g:d", + "so(*)me.(**)pkg.an(*).more , soinfme.in.between.gpkg.an.more , null", + "so(**)me , some , null", + "so(*)me , some , null", + "(**)so , awe.some.aso , awe.some.a", + "so(**) , soan.some.we , an.some.we", + "..(a|b).pk*.(c|d).. , some.a.pkg.d.whatever , a:d", + "..(a|b).[p|*g].(c|d).. , some.a.pkg.d.whatever , a:d", + "..[a|b|c].pk*.(c|d).. , some.c.pkg.d.whatever , d", + "..(a|b*|cd).pk*.(**).end , some.bitrary.pkg.in.between.end, bitrary:in.between", + "..(a.b|c.d).pk* , some.a.b.pkg , a.b", + "..[application|domain.*|infrastructure].(*).. , com.example.application.a.http , a", + "..[application|domain.*|infrastructure].(*).. , com.example.domain.api.a , a", + "..[application|domain.*|infrastructure].(*).. , com.example.domain.logic.a , a", + "..[application|domain.*|infrastructure].(*).. , com.example.infrastructure.a.file , a" + }) public void capture_groups(String matcher, String target, String groupString) { assertThat(PackageMatcher.of(matcher).match(target).isPresent()) .as("'%s' matching '%s'", matcher, target) @@ -113,6 +131,41 @@ public void should_reject_capturing_with_two_dots() { PackageMatcher.of("some.(..).package"); } + @Test + public void should_reject_non_alternating_alternatives() { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage("Package Identifier does not allow alternation brackets '[]' without specifying any alternative via '|' inside"); + + PackageMatcher.of("some.[nonalternating].package"); + } + + @Test + public void should_reject_toplevel_alternations() { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage("Package Identifier only supports '|' inside of '[]' or '()'"); + + PackageMatcher.of("some.pkg|other.pkg"); + } + + @DataProvider + public static Object[][] data_reject_nesting_of_groups() { + return testForEach( + "some.(pkg.(other).pkg)..", + "some.(pkg.[a|b].pkg)..", + "some.[inside.(pkg).it|other.(pkg).it].pkg", + "some.[inside.[a|b].it|other].pkg" + ); + } + + @Test + @UseDataProvider + public void test_reject_nesting_of_groups(String packageIdentifier) { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage("Package Identifier does not support nesting '()' or '[]' within other '()' or '[]'"); + + PackageMatcher.of(packageIdentifier); + } + @Test public void should_reject_illegal_characters() { String illegalPackageIdentifier = "some" + PackageMatcher.TWO_STAR_REGEX_MARKER + "package";