Skip to content

Commit

Permalink
add alternatives to package matcher
Browse files Browse the repository at this point in the history
This change will allow defining alternations within `()` and `[]`. I.e. it is now possible to match both `some.a.pkg` and `some.b.pkg` with a single pattern `some.[a|b].pkg` or capture it via `some.(a|b).pkg`. The demand for this comes from users dealing with legacy package structures, or where the package structure is dictated from the outside (e.g. via enterprise architecture). They might sometimes need to create the slices from different parts of the package tree, e.g. `..[application|domain].(*)`. While this is already possible using the `SliceAssignment` API this is not very straight forward and also demands quite some boilerplate.

We apply the following constraints to the new API to keep it simple to implement and easy and consistent for the user:
1) No nested groups via `[]` or `()`
  1.1) `()` within `()` would just be confusing and there does not seem to be any valid use case for it
  1.2) `()` within `[]` would be ambiguous and hard to implement (e.g. within `[first.(*)|second.(*)]` the second `(*)` would be capturing group 2 within the regex, even though group 1 and 2 are mutually exclusive)
  1.3) `[]` within `[]` would make the implementation a lot more complex, since we would now need to match the correct brackets of nested groups when creating the regex
  1.4) `[]` within `()` would be easy to implement, but make the API inconsistent ("why can I nest it like this, but not the other way around?")
2) No toplevel alternations. This goes together with 1), since it would make the implementation more complex to prevent nested capturing groups

Issue: TNG#662
Signed-off-by: e.solutions <17569373+Pfoerd@users.noreply.github.com>
on-behalf-of: @e-solutions-GmbH <info@esolutions.de>
Signed-off-by: Peter Gafert <peter.gafert@tngtech.com>
  • Loading branch information
Pfoerd authored and codecholeric committed Jul 11, 2022
1 parent cd783af commit a51b918
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableSet;
import com.tngtech.archunit.PublicAPI;

Expand All @@ -41,6 +43,12 @@
* <li><b>{@code '*.*.pack*..'}</b> matches <b>{@code 'a.b.packfix.c.d'}</b>,
* but neither <b>{@code 'a.packfix.b'}</b> nor <b>{@code 'a.b.prepack.d'}</b></li>
* </ul>
* You can also use alternations with the '|' operator within brackets. For example
* <ul>
* <li><b>{@code 'pack.[a.c|b*].d'}</b> matches <b>{@code 'pack.a.c.d'} or <b>{@code 'pack.bar.d'}</b>, but neither
* <b>{@code 'pack.a.d'}</b> nor <b>{@code 'pack.b.c.d'}</b></li>
* </ul>
* <p>
* Furthermore, the use of capturing groups is supported. In this case '(*)' matches any sequence of characters,
* but not the dot '.', while '(**)' matches any sequence including the dot. <br>
* For example
Expand All @@ -49,6 +57,7 @@
* <li><b>{@code '..service.(**)'}</b> matches <b>{@code 'a.service.hello.more'}</b> and group 1 would be <b>{@code 'hello.more'}</b></li>
* <li><b>{@code 'my.(*)..service.(**)'}</b> matches <b>{@code 'my.company.some.service.hello.more'}</b>
* and group 1 would be <b>{@code 'company'}</b>, while group 2 would be <b>{@code 'hello.more'}</b></li>
* <li><b>{@code '..service.(a|b*)..'}</b> matches <b>{@code 'a.service.bar.more'}</b> and group 1 would be <b>{@code 'bar'}</b></li>
* </ul>
* Create via {@link PackageMatcher#of(String) PackageMatcher.of(packageIdentifier)}
*/
Expand All @@ -62,7 +71,14 @@ public final class PackageMatcher {
private static final String TWO_STAR_CAPTURE_REGEX = "(\\w+(?:\\.\\w+)*)";
static final String TWO_STAR_REGEX_MARKER = "#%#%#";

private static final Set<Character> PACKAGE_CONTROL_SYMBOLS = ImmutableSet.of('*', '(', ')', '.');
private static final Pattern ILLEGAL_ALTERNATION_PATTERN = Pattern.compile("\\[[^|]*]");
private static final Pattern ILLEGAL_NESTED_GROUP_PATTERN = Pattern.compile(
nestedGroupRegex('(', ')', '(')
+ "|" + nestedGroupRegex('(', ')', '[')
+ "|" + nestedGroupRegex('[', ']', '(')
+ "|" + nestedGroupRegex('[', ']', '['));

private static final Set<Character> PACKAGE_CONTROL_SYMBOLS = ImmutableSet.of('*', '(', ')', '.', '|', '[', ']');

private final String packageIdentifier;
private final Pattern packagePattern;
Expand All @@ -84,9 +100,34 @@ private void validate(String packageIdentifier) {
if (packageIdentifier.contains("(..)")) {
throw new IllegalArgumentException("Package Identifier does not support capturing via (..), use (**) instead");
}
if (ILLEGAL_ALTERNATION_PATTERN.matcher(packageIdentifier).find()) {
throw new IllegalArgumentException(
"Package Identifier does not allow alternation brackets '[]' without specifying any alternative via '|' inside");
}
if (containsToplevelAlternation(packageIdentifier)) {
throw new IllegalArgumentException(
"Package Identifier only supports '|' inside of '[]' or '()'");
}
if (ILLEGAL_NESTED_GROUP_PATTERN.matcher(packageIdentifier).find()) {
throw new IllegalArgumentException("Package Identifier does not support nesting '()' or '[]' within other '()' or '[]'");
}
validateCharacters(packageIdentifier);
}

private boolean containsToplevelAlternation(String packageIdentifier) {
Stream<String> prefixesBeforeAlternation = IntStream.range(0, packageIdentifier.length())
.filter(i -> packageIdentifier.charAt(i) == '|')
.mapToObj(alternationIndex -> packageIdentifier.substring(0, alternationIndex));

return prefixesBeforeAlternation.anyMatch(beforeAlternation ->
sameNumberOfOccurrences(beforeAlternation, '(', ')')
&& sameNumberOfOccurrences(beforeAlternation, '[', ']'));
}

private boolean sameNumberOfOccurrences(String string, char first, char second) {
return CharMatcher.is(first).countIn(string) == CharMatcher.is(second).countIn(string);
}

private void validateCharacters(String packageIdentifier) {
for (int i = 0; i < packageIdentifier.length(); i++) {
char c = packageIdentifier.charAt(i);
Expand All @@ -99,12 +140,13 @@ private void validateCharacters(String packageIdentifier) {
}

private String convertToRegex(String packageIdentifier) {
return packageIdentifier.
replace(TWO_STAR_CAPTURE_LITERAL, TWO_STAR_REGEX_MARKER).
replace("*", "\\w+").
replace(".", "\\.").
replace(TWO_STAR_REGEX_MARKER, TWO_STAR_CAPTURE_REGEX).
replace("\\.\\.", TWO_DOTS_REGEX);
return packageIdentifier
.replaceAll("\\[(.*?)]", "(?:$1)") // replacing all '[..|..]' with '(?:..|..)'
.replace(TWO_STAR_CAPTURE_LITERAL, TWO_STAR_REGEX_MARKER)
.replace("*", "\\w+")
.replace(".", "\\.")
.replace(TWO_STAR_REGEX_MARKER, TWO_STAR_CAPTURE_REGEX)
.replace("\\.\\.", TWO_DOTS_REGEX);
}

/**
Expand Down Expand Up @@ -144,6 +186,10 @@ public String toString() {
return "PackageMatcher{" + packageIdentifier + '}';
}

private static String nestedGroupRegex(char outerOpeningChar, char outerClosingChar, char nestedOpeningChar) {
return "\\" + outerOpeningChar + "[^" + outerClosingChar + "]*\\" + nestedOpeningChar;
}

public static final class Result {
private final Matcher matcher;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
import com.tngtech.archunit.core.domain.PackageMatcher.Result;
import com.tngtech.java.junit.dataprovider.DataProvider;
import com.tngtech.java.junit.dataprovider.DataProviderRunner;
import com.tngtech.java.junit.dataprovider.UseDataProvider;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;

import static com.tngtech.archunit.core.domain.PackageMatcher.TO_GROUPS;
import static com.tngtech.archunit.testutil.Assertions.assertThat;
import static com.tngtech.java.junit.dataprovider.DataProviders.testForEach;

@RunWith(DataProviderRunner.class)
public class PackageMatcherTest {
Expand All @@ -20,34 +22,41 @@ public class PackageMatcherTest {

@Test
@DataProvider(value = {
"some.arbitrary.pkg | some.arbitrary.pkg | true",
"some.arbitrary.pkg | some.thing.different | false",
"some..pkg | some.arbitrary.pkg | true",
"some..middle..pkg | some.arbitrary.middle.more.pkg | true",
"*..pkg | some.arbitrary.pkg | true",
"some..* | some.arbitrary.pkg | true",
"*..pkg | some.arbitrary.pkg.toomuch | false",
"toomuch.some..* | some.arbitrary.pkg | false",
"*..wrong | some.arbitrary.pkg | false",
"some..* | wrong.arbitrary.pkg | false",
"..some | some | true",
"some.. | some | true",
"*..some | some | false",
"some..* | some | false",
"..some | asome | false",
"some.. | somea | false",
"*.*.* | wrong.arbitrary.pkg | true",
"*.*.* | wrong.arbitrary.pkg.toomuch | false",
"some.arbi*.pk*.. | some.arbitrary.pkg.whatever | true",
"some.arbi*.. | some.brbitrary.pkg | false",
"some.*rary.*kg.. | some.arbitrary.pkg.whatever | true",
"some.*rary.. | some.arbitrarz.pkg | false",
"some.pkg | someepkg | false",
"..pkg.. | some.random.pkg.maybe.anywhere | true",
"..p.. | s.r.p.m.a | true",
"*..pkg..* | some.random.pkg.maybe.anywhere | true",
"*..p..* | s.r.p.m.a | true"
}, splitBy = "\\|")
"some.arbitrary.pkg , some.arbitrary.pkg , true",
"some.arbitrary.pkg , some.thing.different , false",
"some..pkg , some.arbitrary.pkg , true",
"some..middle..pkg , some.arbitrary.middle.more.pkg , true",
"*..pkg , some.arbitrary.pkg , true",
"some..* , some.arbitrary.pkg , true",
"*..pkg , some.arbitrary.pkg.toomuch , false",
"toomuch.some..* , some.arbitrary.pkg , false",
"*..wrong , some.arbitrary.pkg , false",
"some..* , wrong.arbitrary.pkg , false",
"..some , some , true",
"some.. , some , true",
"*..some , some , false",
"some..* , some , false",
"..some , asome , false",
"some.. , somea , false",
"*.*.* , wrong.arbitrary.pkg , true",
"*.*.* , wrong.arbitrary.pkg.toomuch , false",
"some.arbi*.pk*.. , some.arbitrary.pkg.whatever , true",
"some.arbi*.. , some.brbitrary.pkg , false",
"some.*rary.*kg.. , some.arbitrary.pkg.whatever , true",
"some.*rary.. , some.arbitrarz.pkg , false",
"some.pkg , someepkg , false",
"..pkg.. , some.random.pkg.maybe.anywhere , true",
"..p.. , s.r.p.m.a , true",
"*..pkg..* , some.random.pkg.maybe.anywhere , true",
"*..p..* , s.r.p.m.a , true",
"..[a|b|c].pk*.. , some.a.pkg.whatever , true",
"..[b|c].pk*.. , some.a.pkg.whatever , false",
"..[a|b*].pk*.. , some.bitrary.pkg.whatever , true",
"..[a|b*].pk*.. , some.a.pkg.whatever , true",
"..[a|b*].pk*.. , some.arbitrary.pkg.whatever , false",
"..[*c*|*d*].pk*.. , some.anydinside.pkg.whatever , true",
"..[*c*|*d*].pk*.. , some.nofit.pkg.whatever , false",
})
public void match(String matcher, String target, boolean matches) {
assertThat(PackageMatcher.of(matcher).matches(target))
.as("package matches")
Expand All @@ -56,26 +65,35 @@ public void match(String matcher, String target, boolean matches) {

@Test
@DataProvider(value = {
"some.(*).pkg | some.arbitrary.pkg | arbitrary",
"some.arb(*)ry.pkg | some.arbitrary.pkg | itra",
"some.arb(*)ry.pkg | some.arbit.rary.pkg | null",
"some.(*).matches.(*).pkg | some.first.matches.second.pkg | first:second",
"(*).matches.(*) | start.matches.end | start:end",
"(*).(*).(*).(*) | a.b.c.d | a:b:c:d",
"(*) | some | some",
"some.(*).pkg | some.in.between.pkg | null",
"some.(**).pkg | some.in.between.pkg | in.between",
"some.(**).pkg.(*) | some.in.between.pkg.addon | in.between:addon",
"some(**)pkg | somerandom.in.between.longpkg | random.in.between.long",
"some.(**).pkg | somer.in.between.pkg | null",
"some.(**).pkg | some.in.between.gpkg | null",
"so(*)me.(**)pkg.an(*).more | soinfme.in.between.gpkg.and.more | inf:in.between.g:d",
"so(*)me.(**)pkg.an(*).more | soinfme.in.between.gpkg.an.more | null",
"so(**)me | some | null",
"so(*)me | some | null",
"(**)so | awe.some.aso | awe.some.a",
"so(**) | soan.some.we | an.some.we",
}, splitBy = "\\|")
"some.(*).pkg , some.arbitrary.pkg , arbitrary",
"some.arb(*)ry.pkg , some.arbitrary.pkg , itra",
"some.arb(*)ry.pkg , some.arbit.rary.pkg , null",
"some.(*).matches.(*).pkg , some.first.matches.second.pkg , first:second",
"(*).matches.(*) , start.matches.end , start:end",
"(*).(*).(*).(*) , a.b.c.d , a:b:c:d",
"(*) , some , some",
"some.(*).pkg , some.in.between.pkg , null",
"some.(**).pkg , some.in.between.pkg , in.between",
"some.(**).pkg.(*) , some.in.between.pkg.addon , in.between:addon",
"some(**)pkg , somerandom.in.between.longpkg , random.in.between.long",
"some.(**).pkg , somer.in.between.pkg , null",
"some.(**).pkg , some.in.between.gpkg , null",
"so(*)me.(**)pkg.an(*).more , soinfme.in.between.gpkg.and.more , inf:in.between.g:d",
"so(*)me.(**)pkg.an(*).more , soinfme.in.between.gpkg.an.more , null",
"so(**)me , some , null",
"so(*)me , some , null",
"(**)so , awe.some.aso , awe.some.a",
"so(**) , soan.some.we , an.some.we",
"..(a|b).pk*.(c|d).. , some.a.pkg.d.whatever , a:d",
"..(a|b).[p|*g].(c|d).. , some.a.pkg.d.whatever , a:d",
"..[a|b|c].pk*.(c|d).. , some.c.pkg.d.whatever , d",
"..(a|b*|cd).pk*.(**).end , some.bitrary.pkg.in.between.end, bitrary:in.between",
"..(a.b|c.d).pk* , some.a.b.pkg , a.b",
"..[application|domain.*|infrastructure].(*).. , com.example.application.a.http , a",
"..[application|domain.*|infrastructure].(*).. , com.example.domain.api.a , a",
"..[application|domain.*|infrastructure].(*).. , com.example.domain.logic.a , a",
"..[application|domain.*|infrastructure].(*).. , com.example.infrastructure.a.file , a"
})
public void capture_groups(String matcher, String target, String groupString) {
assertThat(PackageMatcher.of(matcher).match(target).isPresent())
.as("'%s' matching '%s'", matcher, target)
Expand Down Expand Up @@ -113,6 +131,41 @@ public void should_reject_capturing_with_two_dots() {
PackageMatcher.of("some.(..).package");
}

@Test
public void should_reject_non_alternating_alternatives() {
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Package Identifier does not allow alternation brackets '[]' without specifying any alternative via '|' inside");

PackageMatcher.of("some.[nonalternating].package");
}

@Test
public void should_reject_toplevel_alternations() {
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Package Identifier only supports '|' inside of '[]' or '()'");

PackageMatcher.of("some.pkg|other.pkg");
}

@DataProvider
public static Object[][] data_reject_nesting_of_groups() {
return testForEach(
"some.(pkg.(other).pkg)..",
"some.(pkg.[a|b].pkg)..",
"some.[inside.(pkg).it|other.(pkg).it].pkg",
"some.[inside.[a|b].it|other].pkg"
);
}

@Test
@UseDataProvider
public void test_reject_nesting_of_groups(String packageIdentifier) {
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Package Identifier does not support nesting '()' or '[]' within other '()' or '[]'");

PackageMatcher.of(packageIdentifier);
}

@Test
public void should_reject_illegal_characters() {
String illegalPackageIdentifier = "some" + PackageMatcher.TWO_STAR_REGEX_MARKER + "package";
Expand Down

0 comments on commit a51b918

Please sign in to comment.