Skip to content

Commit

Permalink
Manually backport #11060 (Simple regex normalization refactor) (#11147)…
Browse files Browse the repository at this point in the history
… (#11185)

* Manually backport 11060

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

* spotless

Signed-off-by: Stephen Crawford <steecraw@amazon.com>

---------

Signed-off-by: Stephen Crawford <steecraw@amazon.com>
(cherry picked from commit 725d505)

Co-authored-by: Stephen Crawford <65832608+scrawfor99@users.noreply.github.com>
  • Loading branch information
cwperks and stephen-crawford authored Nov 13, 2023
1 parent ac4de44 commit 2eb43d3
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 27 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Dependencies

### Changed
- Use iterative approach to evaluate Regex.simpleMatch ([#11060](https://github.com/opensearch-project/OpenSearch/pull/11060))

### Deprecated

Expand Down
54 changes: 27 additions & 27 deletions server/src/main/java/org/opensearch/common/regex/Regex.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,35 +129,35 @@ public static boolean simpleMatch(String pattern, String str, boolean caseInsens
}

private static boolean simpleMatchWithNormalizedStrings(String pattern, String str) {
final int firstIndex = pattern.indexOf('*');
if (firstIndex == -1) {
return pattern.equals(str);
}
if (firstIndex == 0) {
if (pattern.length() == 1) {
return true;
}
final int nextIndex = pattern.indexOf('*', firstIndex + 1);
if (nextIndex == -1) {
// str.endsWith(pattern.substring(1)), but avoiding the construction of pattern.substring(1):
return str.regionMatches(str.length() - pattern.length() + 1, pattern, 1, pattern.length() - 1);
} else if (nextIndex == 1) {
// Double wildcard "**" - skipping the first "*"
return simpleMatchWithNormalizedStrings(pattern.substring(1), str);
}
final String part = pattern.substring(1, nextIndex);
int partIndex = str.indexOf(part);
while (partIndex != -1) {
if (simpleMatchWithNormalizedStrings(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) {
return true;
}
partIndex = str.indexOf(part, partIndex + 1);
int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1;
while (sIdx < str.length()) {
// both chars matching, incrementing both pointers
if (pIdx < pattern.length() && str.charAt(sIdx) == pattern.charAt(pIdx)) {
sIdx++;
pIdx++;
} else if (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') {
// wildcard found, only incrementing pattern pointer
wildcardIdx = pIdx;
match = sIdx;
pIdx++;
} else if (wildcardIdx != -1) {
// last pattern pointer was a wildcard, incrementing string pointer
pIdx = wildcardIdx + 1;
match++;
sIdx = match;
} else {
// current pattern pointer is not a wildcard, last pattern pointer was also not a wildcard
// characters do not match
return false;
}
return false;
}
return str.regionMatches(0, pattern, 0, firstIndex)
&& (firstIndex == pattern.length() - 1 // only wildcard in pattern is at the end, so no need to look at the rest of the string
|| simpleMatchWithNormalizedStrings(pattern.substring(firstIndex), str.substring(firstIndex)));

// check for remaining characters in pattern
while (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') {
pIdx++;
}

return pIdx == pattern.length();
}

/**
Expand Down
15 changes: 15 additions & 0 deletions server/src/test/java/org/opensearch/common/regex/RegexTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,21 @@ public void testDoubleWildcardMatch() {
assertTrue(Regex.simpleMatch("fff*******ddd", "fffabcddd"));
assertTrue(Regex.simpleMatch("fff*******ddd", "FffAbcdDd", true));
assertFalse(Regex.simpleMatch("fff******ddd", "fffabcdd"));
assertFalse(Regex.simpleMatch("fff*******ddd", "FffAbcdDd", false));
assertTrue(Regex.simpleMatch("abCDefGH******ddd", "abCDefGHddd", false));
assertTrue(Regex.simpleMatch("******", "a"));
assertTrue(Regex.simpleMatch("***WILDcard***", "aaaaaaaaWILDcardZZZZZZ", false));
assertFalse(Regex.simpleMatch("***xxxxx123456789xxxxxx***", "xxxxxabcdxxxxx", false));
assertFalse(Regex.simpleMatch("***xxxxxabcdxxxxx***", "xxxxxABCDxxxxx", false));
assertTrue(Regex.simpleMatch("***xxxxxabcdxxxxx***", "xxxxxABCDxxxxx", true));
assertTrue(Regex.simpleMatch("**stephenIsSuperCool**", "ItIsTrueThatStephenIsSuperCoolSoYouShouldLetThisIn", true));
assertTrue(
Regex.simpleMatch(
"**w**X**y**Z**",
"abcdeFGHIJKLMNOPqrstuvwabcdeFGHIJKLMNOPqrstuvwXabcdeFGHIJKLMNOPqrstuvwXyabcdeFGHIJKLMNOPqrstuvwXyZ",
false
)
);
}

public void testSimpleMatch() {
Expand Down

0 comments on commit 2eb43d3

Please sign in to comment.