Skip to content

Commit

Permalink
feat: Improve handling of complex regexes like email, password or uri
Browse files Browse the repository at this point in the history
Patterns used to validate email, password and uri can be quite complex. The current generators
are struggling to generate valid values expecially when a fixed, large length is required. In this cases
specific code is introduced in this commit to handle these 3 particular cases
  • Loading branch information
en-milie committed Sep 19, 2024
1 parent f937248 commit 910e422
Show file tree
Hide file tree
Showing 9 changed files with 304 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ private String generateWithAdjustedLength(Schema schema, int adjustedLength) {
int fromSchemaLengthAdjusted = (fromSchemaLength.intValue() > Integer.MAX_VALUE / 100 - adjustedLength) ? Integer.MAX_VALUE / 100 : fromSchemaLength.intValue();
int generatedStringLength = fromSchemaLengthAdjusted + adjustedLength;

String generated = StringGenerator.generateExactLength(pattern, generatedStringLength);
String generated = StringGenerator.generateExactLength(schema, pattern, generatedStringLength);

if (CatsModelUtils.isByteArraySchema(schema)) {
return Base64.getEncoder().encodeToString(generated.getBytes(StandardCharsets.UTF_8));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import jakarta.inject.Singleton;

import java.util.List;
import java.util.Locale;

/**
* A generator class implementing interfaces for generating valid and invalid password data formats.
Expand All @@ -21,7 +22,7 @@ public Object generate(Schema<?> schema) {

@Override
public boolean appliesTo(String format, String propertyName) {
return "password".equalsIgnoreCase(format);
return "password".equalsIgnoreCase(format) || propertyName.toLowerCase(Locale.ROOT).endsWith("password");
}

@Override
Expand Down
45 changes: 45 additions & 0 deletions src/main/java/com/endava/cats/generator/simple/RegexFlattener.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.endava.cats.generator.simple;

/**
* Flattens a regex by simplifying character classes, quantifiers, and removing unnecessary parentheses.
*/
public abstract class RegexFlattener {


/**
* Flattens a regex by simplifying character classes, quantifiers, and removing unnecessary parentheses.
*
* @param regex the regex to flatten
* @return the flattened regex
*/
public static String flattenRegex(String regex) {
regex = simplifyCharacterClasses(regex);
regex = simplifyQuantifiers(regex);
// regex = useNonCapturingGroups(regex);

return regex;
}

public static String useNonCapturingGroups(String regex) {
return regex.replaceAll("\\((?!\\?:)(?=[^()]*\\|)", "(?:");
}

private static String simplifyCharacterClasses(String regex) {
regex = regex.replaceAll("\\[a-zA-Z0-9_\\]", "\\\\w");
regex = regex.replaceAll("\\[0-9\\]", "\\\\d");
regex = regex.replaceAll("\\[\\s\\t\\r\\n\\f\\]", "\\\\s");

regex = regex.replaceAll("\\[^\\\\d\\]", "\\\\D");
regex = regex.replaceAll("\\[^\\\\w\\]", "\\\\W");
regex = regex.replaceAll("\\[^\\\\s\\]", "\\\\S");

return regex;
}

private static String simplifyQuantifiers(String regex) {
regex = regex.replaceAll("\\{0,1\\}", "?");
regex = regex.replaceAll("\\{1,\\}", "+");
regex = regex.replaceAll("\\{0,\\}", "*");
return regex;
}
}
156 changes: 128 additions & 28 deletions src/main/java/com/endava/cats/generator/simple/StringGenerator.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.endava.cats.generator.simple;

import com.endava.cats.util.CatsModelUtils;
import com.endava.cats.util.CatsUtil;
import com.github.curiousoddman.rgxgen.RgxGen;
import io.github.ludovicianul.prettylogger.PrettyLogger;
Expand All @@ -14,12 +15,19 @@
import org.springframework.util.CollectionUtils;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import static com.endava.cats.util.CatsModelUtils.isEmail;
import static com.endava.cats.util.CatsModelUtils.isPassword;
import static com.endava.cats.util.CatsModelUtils.isUri;

/**
* Generates strings based on different criteria.
*/
Expand Down Expand Up @@ -49,6 +57,11 @@ public class StringGenerator {

private static final Pattern LENGTH_INLINE_PATTERN = Pattern.compile("(\\^)?(\\[[^]]*]\\{\\d+}|\\(\\[[^]]*]\\{\\d+}\\)\\?)*(\\$)?");

private static final String ALPHANUMERIC = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
private static final String[] DOMAINS = {"example", "cats", "google", "yahoo"};
private static final String[] TLDS = {".com", ".net", ".org", ".io"};
private static final String[] URI_SCHEMES = {"http", "https", "ftp", "file"};

/**
* Represents an empty string.
*/
Expand Down Expand Up @@ -127,12 +140,17 @@ public static String generateLargeString(int times) {
* @param length the desired length
* @return a generated value of exact length provided
*/
public static String generateExactLength(String regex, int length) {
public static String generateExactLength(Schema<?> schema, String regex, int length) {
if (length <= 0) {
return EMPTY;
}

StringBuilder initialValue = new StringBuilder(StringGenerator.sanitize(generate(cleanPattern(regex), length, length)));
String stringFromComplexRegex = generateComplexRegex(schema, length);
if (stringFromComplexRegex != null) {
return stringFromComplexRegex;
}

StringBuilder initialValue = new StringBuilder(StringGenerator.sanitize(generate(regex, length, length)));

if (initialValue.isEmpty()) {
return EMPTY;
Expand Down Expand Up @@ -162,9 +180,10 @@ public static String generateExactLength(String regex, int length) {
*/
public static String generate(String pattern, int min, int max) {
LOGGER.debug("Generate for pattern {} min {} max {}", pattern, min, max);
pattern = cleanPattern(pattern);
String cleanedPattern = cleanPattern(pattern);
String flattenedPattern = RegexFlattener.flattenRegex(cleanedPattern);

GeneratorParams generatorParams = new GeneratorParams(pattern, min, max);
GeneratorParams generatorParams = new GeneratorParams(flattenedPattern, min, max, cleanedPattern);

String generatedWithRgxGenerator = callGenerateTwice(StringGenerator::generateUsingRgxGenerator, generatorParams);
if (generatedWithRgxGenerator != null) {
Expand All @@ -187,18 +206,18 @@ public static String generate(String pattern, int min, int max) {
public static String callGenerateTwice(Function<GeneratorParams, String> generator, GeneratorParams generatorParams) {
try {
String initialVersion = generator.apply(generatorParams);
if (initialVersion.matches(generatorParams.pattern)) {
LOGGER.debug("Generated value " + initialVersion + " matched " + generatorParams.pattern);
if (initialVersion.matches(generatorParams.originalPattern())) {
LOGGER.debug("Generated value " + initialVersion + " matched " + generatorParams.originalPattern());
return initialVersion;
}
} catch (Exception e) {
LOGGER.debug("Generator {} failed #atempt 1", generator.getClass().getSimpleName());
}

try {
String secondVersion = generator.apply(new GeneratorParams(removeLookaheadAssertions(generatorParams.pattern), generatorParams.min, generatorParams.max));
if (secondVersion.matches(generatorParams.pattern)) {
LOGGER.debug("Generated value with lookaheads removed " + secondVersion + " matched " + generatorParams.pattern);
String secondVersion = generator.apply(new GeneratorParams(removeLookaheadAssertions(generatorParams.cleanedPattern()), generatorParams.min, generatorParams.max, generatorParams.originalPattern()));
if (secondVersion.matches(generatorParams.originalPattern())) {
LOGGER.debug("Generated value with lookaheads removed " + secondVersion + " matched " + generatorParams.originalPattern());
return secondVersion;
}
} catch (Exception e) {
Expand Down Expand Up @@ -236,7 +255,8 @@ public static String cleanPattern(String pattern) {
}

private static String generateUsingRegexpGen(GeneratorParams generatorParams) {
String pattern = generatorParams.pattern;
String pattern = generatorParams.cleanedPattern();
String originalPattern = generatorParams.originalPattern();
int min = generatorParams.min;
int max = generatorParams.max;

Expand All @@ -248,7 +268,7 @@ private static String generateUsingRegexpGen(GeneratorParams generatorParams) {
}
String generated = generator.generate(REGEXP_RANDOM_GEN, min, max);

if (generated.matches(pattern)) {
if (generated.matches(originalPattern)) {
LOGGER.debug("Generated using REGEXP {} matches {}", generated, pattern);
return generated;
}
Expand All @@ -259,21 +279,22 @@ private static String generateUsingRegexpGen(GeneratorParams generatorParams) {
}

private static String generateUsingCatsRegexGenerator(GeneratorParams generatorParams) {
String pattern = generatorParams.pattern;
String pattern = generatorParams.cleanedPattern();
String originalPattern = generatorParams.originalPattern();
int min = generatorParams.min;
int max = generatorParams.max;

for (int i = 0; i < MAX_ATTEMPTS_GENERATE; i++) {
Pattern compiledPattern = Pattern.compile(pattern);
String secondVersionBase = RegexGenerator.generate(compiledPattern, EMPTY, min, max);

if (secondVersionBase.matches(pattern)) {
if (secondVersionBase.matches(originalPattern)) {
LOGGER.debug("Generated using CATS generator {} and matches {}", secondVersionBase, pattern);
return secondVersionBase;
}
String generatedString = composeString(secondVersionBase, min, max);

if (generatedString.matches(pattern)) {
if (generatedString.matches(originalPattern)) {
LOGGER.debug("Generated using CATS generator {} and matches {}", generatedString, pattern);
return generatedString;
}
Expand All @@ -284,19 +305,20 @@ private static String generateUsingCatsRegexGenerator(GeneratorParams generatorP
private static String generateUsingRgxGenerator(GeneratorParams generatorParams) {
int attempts = 0;
String generatedValue;
String pattern = generatorParams.pattern;
String pattern = generatorParams.cleanedPattern();
String originalPattern = generatorParams.originalPattern();
int min = generatorParams.min;
int max = generatorParams.max;

try {
do {
generatedValue = new RgxGen(pattern).generate();
if ((hasLengthInline(pattern) || isSetOfAlternatives(pattern) || (min <= 0 && max <= 0)) && generatedValue.matches(pattern)) {
if ((hasLengthInline(pattern) || isSetOfAlternatives(pattern) || (min <= 0 && max <= 0)) && generatedValue.matches(originalPattern)) {
return generatedValue;
}
generatedValue = composeString(generatedValue, min, max);
attempts++;
} while (attempts < MAX_ATTEMPTS_GENERATE && !generatedValue.matches(pattern));
} while (attempts < MAX_ATTEMPTS_GENERATE && !generatedValue.matches(originalPattern));
} catch (Exception e) {
LOGGER.debug("RGX generator failed, returning empty.", e);
return ALPHANUMERIC_VALUE;
Expand Down Expand Up @@ -443,6 +465,11 @@ public static String generateValueBasedOnMinMax(Schema<?> property) {
maxLength = minLength;
}

String complexRegexGenerated = generateComplexRegex(property, Math.max(1, maxLength));
if (complexRegexGenerated != null) {
return complexRegexGenerated;
}

return StringGenerator.generate(pattern, minLength, maxLength);
}

Expand Down Expand Up @@ -481,30 +508,103 @@ public static String removeLookaheadAssertions(String regex) {
return regex;
}

public static String generateFixedLengthEmail(int length) {
String domain = DOMAINS[CatsUtil.random().nextInt(DOMAINS.length)];
String tld = TLDS[CatsUtil.random().nextInt(TLDS.length)];

int localPartLength = length - domain.length() - tld.length() - 1; // -1 for '@'

StringBuilder localPart = new StringBuilder();
for (int i = 0; i < localPartLength; i++) {
localPart.append(ALPHANUMERIC.charAt(CatsUtil.random().nextInt(ALPHANUMERIC.length())));
}

return localPart + "@" + domain + tld;
}

public static String generateFixedLengthUri(int length) {
String scheme = URI_SCHEMES[CatsUtil.random().nextInt(URI_SCHEMES.length)];

String domain = DOMAINS[CatsUtil.random().nextInt(DOMAINS.length)];
String tld = TLDS[CatsUtil.random().nextInt(TLDS.length)];

String fixedPart = scheme + "://" + domain + tld;

int pathLength = length - fixedPart.length();
if (pathLength <= 0) {
return fixedPart.substring(0, length);
}

StringBuilder path = new StringBuilder();
path.append("/");
for (int i = 0; i < pathLength - 1; i++) {
path.append(ALPHANUMERIC.charAt(CatsUtil.random().nextInt(ALPHANUMERIC.length())));
}

return fixedPart + path;
}

/**
* There are complex regexes which will fail to generate a string of a given length, especially for a fixed and large length.
* This is particularly true for email addresses and URIs where patterns can be quite complex.
* Sometimes, for large generated strings, the match of the generated string against the given regex will fail with StackOverflowError.
* <p>
* This method tries to generate a string of a given length for such complex regexes. It only supports URIs and emails for now.
*
* @param schema the schema
* @param length the length
* @return a string of given length matching patterns schema
*/
private static String generateComplexRegex(Schema<?> schema, int length) {
if (StringUtils.isBlank(schema.getPattern())) {
return null;
}

String lowerField = Optional.ofNullable(schema.getExtensions()).orElse(Collections.emptyMap()).getOrDefault(CatsModelUtils.X_CATS_FIELD_NAME, "").toString().toLowerCase(Locale.ROOT);
String pattern = schema.getPattern();

if (isUri(pattern, lowerField)) {
return generateFixedLengthUri(length);
}
if (isEmail(pattern, lowerField)) {
return generateFixedLengthEmail(length);
}
if (isPassword(pattern, lowerField)) {
return "catsISC00l#" + RandomStringUtils.secure().nextPrint(length - 11);
}

return null;
}

/**
* A record that holds the parameters for the string generator.
*
* @param pattern the pattern to check
* @param min the minimum length
* @param max the maximum length
* @param cleanedPattern the pattern to check
* @param min the minimum length
* @param max the maximum length
* @param originalPattern the original pattern
*/
public record GeneratorParams(String pattern, int min, int max) {
public record GeneratorParams(String cleanedPattern, int min, int max, String originalPattern) {
/**
* Instantiates a new Generator params.
*
* @param pattern the pattern
* @param min the min
* @param max the max
* @param cleanedPattern the pattern
* @param min the min
* @param max the max
* @param originalPattern the original pattern
*/
public GeneratorParams(String pattern, int min, int max) {
public GeneratorParams(String cleanedPattern, int min, int max, String originalPattern) {
this.min = min;
this.max = max;
this.originalPattern = inlineLengthIfNeeded(originalPattern, min, max);
this.cleanedPattern = inlineLengthIfNeeded(cleanedPattern, min, max);
}

private String inlineLengthIfNeeded(String pattern, int min, int max) {
if (!hasLength(pattern) && (min > 0 || max > 0)) {
this.pattern = pattern + "{" + min + "," + max + "}";
} else {
this.pattern = pattern;
return pattern + "{" + min + "," + max + "}";
}
return pattern;
}
}
}
4 changes: 3 additions & 1 deletion src/main/java/com/endava/cats/model/FuzzingData.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.endava.cats.model;

import com.endava.cats.http.HttpMethod;
import com.endava.cats.util.JsonUtils;
import com.endava.cats.util.CatsModelUtils;
import com.endava.cats.util.JsonUtils;
import io.github.ludovicianul.prettylogger.PrettyLogger;
import io.github.ludovicianul.prettylogger.PrettyLoggerFactory;
import io.swagger.v3.oas.models.OpenAPI;
Expand Down Expand Up @@ -163,6 +163,8 @@ private Set<CatsField> getFields(Schema<?> schema, String prefix) {

return catsFields.stream()
.filter(catsField -> this.getRequestPropertyTypes().get(catsField.getName()) != null)
//this is a bit of a hack that might be abused in the future to include a full object as extension. currently it only holds the field name
.peek(catsField -> catsField.getSchema().addExtension(CatsModelUtils.X_CATS_FIELD_NAME, catsField.getName()))
.collect(Collectors.toSet());
}

Expand Down
Loading

0 comments on commit 910e422

Please sign in to comment.