Skip to content

Commit

Permalink
fix: parsing langcfg patterns may fail if onigurama dialect was used
Browse files Browse the repository at this point in the history
  • Loading branch information
sebthom committed Jan 26, 2024
1 parent 3f9e2b6 commit dc48499
Show file tree
Hide file tree
Showing 9 changed files with 233 additions and 44 deletions.
1 change: 1 addition & 0 deletions org.eclipse.tm4e.core/META-INF/MANIFEST.MF
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Export-Package: org.eclipse.tm4e.core,
org.eclipse.tm4e.core.internal.grammar;x-friends:="org.eclipse.tm4e.core.tests",
org.eclipse.tm4e.core.internal.grammar.tokenattrs;x-friends:="org.eclipse.tm4e.core.tests",
org.eclipse.tm4e.core.internal.matcher;x-friends:="org.eclipse.tm4e.core.tests",
org.eclipse.tm4e.core.internal.oniguruma;x-friends:="org.eclipse.tm4e.languageconfiguration",
org.eclipse.tm4e.core.internal.theme;x-friends:="org.eclipse.tm4e.core.tests",
org.eclipse.tm4e.core.internal.theme.raw;x-friends:="org.eclipse.tm4e.core.tests",
org.eclipse.tm4e.core.internal.utils;x-friends:="org.eclipse.tm4e.core.tests,org.eclipse.tm4e.registry,org.eclipse.tm4e.languageconfiguration,org.eclipse.tm4e.markdown,org.eclipse.tm4e.ui,org.eclipse.tm4e.ui.tests",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.tm4e.core.TMException;
import org.eclipse.tm4e.core.internal.utils.StringUtils;
import org.jcodings.specific.UTF8Encoding;
import org.joni.Matcher;
import org.joni.Option;
Expand All @@ -37,7 +38,7 @@
* @see <a href="https://github.com/atom/node-oniguruma/blob/master/src/onig-reg-exp.cc">
* github.com/atom/node-oniguruma/blob/master/src/onig-reg-exp.cc</a>
*/
final class OnigRegExp {
public final class OnigRegExp {
private static final Logger LOGGER = System.getLogger(OnigRegExp.class.getName());

/**
Expand All @@ -53,23 +54,40 @@ final class OnigRegExp {
@Nullable
private OnigResult lastSearchResult;

private final String pattern;
private final Regex regex;

private final boolean hasGAnchor;

OnigRegExp(final String source) {
hasGAnchor = source.contains("\\G");
final byte[] pattern = source.getBytes(StandardCharsets.UTF_8);
/**
* @throws TMException if parsing fails
*/
public OnigRegExp(final String pattern) {
this(pattern, false);
}

/**
* @throws TMException if parsing fails
*/
public OnigRegExp(final String pattern, final boolean ignoreCase) {
this.pattern = pattern;
hasGAnchor = pattern.contains("\\G");
final byte[] patternBytes = pattern.getBytes(StandardCharsets.UTF_8);
try {
regex = new Regex(pattern, 0, pattern.length, Option.CAPTURE_GROUP, UTF8Encoding.INSTANCE, Syntax.DEFAULT,
int options = Option.CAPTURE_GROUP;
if (ignoreCase)
options |= Option.IGNORECASE;
regex = new Regex(patternBytes, 0, patternBytes.length, options, UTF8Encoding.INSTANCE, Syntax.DEFAULT,
LOGGER.isLoggable(Level.WARNING) ? LOGGER_WARN_CALLBACK : WarnCallback.NONE);
} catch (final SyntaxException ex) {
throw new TMException("Parsing regex pattern \"" + source + "\" failed with " + ex, ex);
throw new TMException("Parsing regex pattern \"" + pattern + "\" failed with " + ex, ex);
}
}

@Nullable
OnigResult search(final OnigString str, final int startPosition) {
/**
* @return null if not found
*/
public @Nullable OnigResult search(final OnigString str, final int startPosition) {
if (hasGAnchor) {
// Should not use caching, because the regular expression
// targets the current search position (\G)
Expand Down Expand Up @@ -99,4 +117,15 @@ private OnigResult search(final byte[] data, final int startPosition, final int
}
return null;
}

public String pattern() {
return pattern;
}

@Override
public String toString() {
return StringUtils.toString(this, sb -> {
sb.append("pattern=").append(pattern);
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* @see <a href="https://github.com/atom/node-oniguruma/blob/master/src/onig-result.cc">
* github.com/atom/node-oniguruma/blob/master/src/onig-result.cc</a>
*/
final class OnigResult {
public final class OnigResult {

private int indexInScanner;
private final Region region;
Expand All @@ -32,25 +32,30 @@ final class OnigResult {
this.indexInScanner = indexInScanner;
}

int getIndex() {
public int getIndex() {
return indexInScanner;
}

void setIndex(final int index) {
indexInScanner = index;
}

int locationAt(final int index) {
public int locationAt(final int index) {
final int bytes = region.getBeg(index);
return bytes > 0 ? bytes : 0;
}

int count() {
public int count() {
return region.getNumRegs();
}

int lengthAt(final int index) {
public int lengthAt(final int index) {
final int bytes = region.getEnd(index) - region.getBeg(index);
return bytes > 0 ? bytes : 0;
}

@Override
public String toString() {
return "OnigResult [indexInScanner=" + indexInScanner + ", region=" + region + "]";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
*/
package org.eclipse.tm4e.languageconfiguration.internal.model;

import java.util.regex.Pattern;

import org.eclipse.tm4e.core.internal.utils.StringUtils;

/**
Expand All @@ -30,10 +28,10 @@ public final class FoldingRules {
* See [wikipedia](https://en.wikipedia.org/wiki/Off-side_rule) for more information.
*/
public final boolean offSide;
public final Pattern markersStart;
public final Pattern markersEnd;
public final RegExPattern markersStart;
public final RegExPattern markersEnd;

public FoldingRules(final boolean offSide, final Pattern markersStart, final Pattern markersEnd) {
public FoldingRules(final boolean offSide, final RegExPattern markersStart, final RegExPattern markersEnd) {
this.offSide = offSide;
this.markersStart = markersStart;
this.markersEnd = markersEnd;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,13 @@
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.eclipse.jdt.annotation.NonNull;
import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.tm4e.languageconfiguration.LanguageConfigurationPlugin;
import org.eclipse.tm4e.languageconfiguration.internal.model.EnterAction.IndentAction;
import org.eclipse.tm4e.languageconfiguration.internal.utils.RegExpUtils;

import com.google.gson.GsonBuilder;
import com.google.gson.JsonDeserializer;
Expand Down Expand Up @@ -244,12 +242,7 @@ private static String removeTrailingCommas(String jsonString) {
.fromJson(jsonString, LanguageConfiguration.class);
}

private static @Nullable Pattern getAsPattern(@Nullable final JsonElement element) {
final var pattern = getPattern(element);
return pattern == null ? null : RegExpUtils.create(pattern);
}

private static @Nullable String getPattern(@Nullable final JsonElement element) {
private static @Nullable RegExPattern getAsPattern(@Nullable final JsonElement element) {
if (element == null) {
return null;
}
Expand All @@ -260,10 +253,11 @@ private static String removeTrailingCommas(String jsonString) {
return null;
}
final var flags = getAsString(((JsonObject) element).get("flags"));
return flags != null ? pattern + "(?" + flags + ")" : pattern;
//return flags != null ? pattern + "(?" + flags + ")" : pattern;
return RegExPattern.of(pattern, flags);
}
// ex : "^<\\/([_:\\w][_:\\w-.\\d]*)\\s*>"
return getAsString(element);
return RegExPattern.ofNullable(getAsString(element), null);
}

private static @Nullable String getAsString(@Nullable final JsonElement element) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
*/
package org.eclipse.tm4e.languageconfiguration.internal.model;

import java.util.regex.Pattern;

import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.tm4e.core.TMException;
import org.eclipse.tm4e.core.internal.utils.StringUtils;

/**
Expand All @@ -29,26 +28,26 @@ public final class OnEnterRule {
/**
* This rule will only execute if the text before the cursor matches this regular expression.
*/
public final Pattern beforeText;
public final RegExPattern beforeText;

/**
* This rule will only execute if the text after the cursor matches this regular expression.
*/
@Nullable
public final Pattern afterText;
public final @Nullable RegExPattern afterText;

/**
* This rule will only execute if the text above the current line matches this regular expression.
*/
@Nullable
public final Pattern previousLineText;

public final @Nullable RegExPattern previousLineText;

/**
* The action to execute.
*/
public final EnterAction action;

public OnEnterRule(final Pattern beforeText, @Nullable final Pattern afterText, @Nullable final Pattern previousLineText, final EnterAction action) {
public OnEnterRule(final RegExPattern beforeText, final @Nullable RegExPattern afterText, final @Nullable RegExPattern previousLineText,
final EnterAction action) {
this.beforeText = beforeText;
this.afterText = afterText;
this.previousLineText = previousLineText;
Expand All @@ -58,12 +57,13 @@ public OnEnterRule(final Pattern beforeText, @Nullable final Pattern afterText,
/**
* Only for unit tests
*
* @throws PatternSyntaxException if beforeText or afterText contain invalid regex pattern
* @throws TMException if beforeText, afterText or previousLineText contain invalid regex pattern
*/
OnEnterRule(final String beforeText, @Nullable final String afterText, @Nullable final String previousLineText, final EnterAction action) {
this.beforeText = Pattern.compile(beforeText);
this.afterText = afterText == null ? null : Pattern.compile(afterText);
this.previousLineText = previousLineText == null ? null : Pattern.compile(previousLineText);
OnEnterRule(final String beforeText, final @Nullable String afterText, final @Nullable String previousLineText,
final EnterAction action) {
this.beforeText = RegExPattern.of(beforeText);
this.afterText = afterText == null ? null : RegExPattern.of(afterText);
this.previousLineText = previousLineText == null ? null : RegExPattern.of(previousLineText);
this.action = action;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/**
* Copyright (c) 2024 Vegard IT GmbH and others.
*
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* - Sebastian Thomschke - initial implementation
*/
package org.eclipse.tm4e.languageconfiguration.internal.model;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.tm4e.core.TMException;
import org.eclipse.tm4e.core.internal.oniguruma.OnigRegExp;
import org.eclipse.tm4e.core.internal.oniguruma.OnigString;

public abstract class RegExPattern {

private static final class JavaRegExPattern extends RegExPattern {
final Pattern pattern;

JavaRegExPattern(final String pattern, final @Nullable String flags) throws PatternSyntaxException {
this.pattern = Pattern.compile(flags == null ? pattern : pattern + "(?" + flags + ")");
}

@Override
public boolean matchesFully(final String text) {
return pattern.matcher(text).matches();
}

@Override
public boolean matchesPartially(final String text) {
return pattern.matcher(text).find();
}

@Override
public String pattern() {
return pattern.pattern();
}
}

private static final class OnigRegExPattern extends RegExPattern {
final OnigRegExp regex;

OnigRegExPattern(final String pattern, final @Nullable String flags) throws PatternSyntaxException {
this.regex = new OnigRegExp(pattern, flags != null && flags.contains("i"));
}

@Override
public boolean matchesFully(final String text) {
final var result = regex.search(OnigString.of(text), 0);
return result != null && result.count() == 1 && result.lengthAt(0) == text.length();
}

@Override
public boolean matchesPartially(final String text) {
return regex.search(OnigString.of(text), 0) != null;
}

@Override
public String pattern() {
return regex.pattern();
}
}

/**
* @param pattern {@link Pattern} or {@link OnigRegExp} compatible pattern
*
* @throws TMException if pattern parsing fails
*/
public static RegExPattern of(final String pattern) {
return of(pattern, null);
}

/**
* @param pattern {@link Pattern} or {@link OnigRegExp} compatible pattern
*
* @throws TMException if pattern parsing fails
*/
public static RegExPattern of(final String pattern, final @Nullable String flags) {
try {
return new JavaRegExPattern(pattern, flags);
} catch (Exception ex) {
// try onigurama as fallback
return new OnigRegExPattern(pattern, flags);
}
}

/**
* @param pattern {@link Pattern} or {@link OnigRegExp} compatible pattern
*
* @return null if pattern is null or the pattern is invalid
*/
public static @Nullable RegExPattern ofNullable(final @Nullable String pattern) {
return ofNullable(pattern, null);
}

/**
* @param pattern {@link Pattern} or {@link OnigRegExp} compatible pattern
*
* @return null if pattern is null or the pattern is invalid
*/
public static @Nullable RegExPattern ofNullable(final @Nullable String pattern, final @Nullable String flags) {
if (pattern != null) {
try {
return new JavaRegExPattern(pattern, flags);
} catch (Exception ex) {
try {
// try onigurama as fallback
return new OnigRegExPattern(pattern, flags);
} catch (Exception ex1) {
ex1.printStackTrace();
}
}
}
return null;
}

public abstract boolean matchesFully(String text);

public abstract boolean matchesPartially(String text);

public abstract String pattern();

@Override
public String toString() {
return pattern();
}
}
Loading

0 comments on commit dc48499

Please sign in to comment.