-
-
Notifications
You must be signed in to change notification settings - Fork 382
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Use Oniguruma-To-ES in the JS engine (#828) #832
Changes from 1 commit
9fc2b85
9695ea0
45bd321
f4dd9dc
b516d72
a56ffbb
aeaea84
46f5331
76c2778
8ae1608
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,8 +4,7 @@ | |
RegexEngineString, | ||
} from '@shikijs/types' | ||
import type { IOnigMatch } from '@shikijs/vscode-textmate' | ||
import { onigurumaToRegexp } from 'oniguruma-to-js' | ||
import { replacements } from './replacements' | ||
import { toRegExp } from 'oniguruma-to-es' | ||
|
||
export interface JavaScriptRegexEngineOptions { | ||
/** | ||
|
@@ -16,7 +15,7 @@ | |
forgiving?: boolean | ||
|
||
/** | ||
* Use JavaScript to simulate some unsupported regex features. | ||
* Cleanup some grammar patterns before use. | ||
* | ||
* @default true | ||
*/ | ||
|
@@ -30,7 +29,7 @@ | |
/** | ||
* Custom pattern to RegExp constructor. | ||
* | ||
* By default `oniguruma-to-js` is used. | ||
* By default `oniguruma-to-es` is used. | ||
*/ | ||
regexConstructor?: (pattern: string) => RegExp | ||
} | ||
|
@@ -41,18 +40,19 @@ | |
* The default RegExp constructor for JavaScript regex engine. | ||
*/ | ||
export function defaultJavaScriptRegexConstructor(pattern: string): RegExp { | ||
return onigurumaToRegexp( | ||
return toRegExp( | ||
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/compare.test.ts > cases > json-basic
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/compare.test.ts > cases > html-basic
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/compare.test.ts > cases > ts-basic
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/compare.test.ts > cases > jsonc
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/compare.test.ts > cases > vue
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/compare.test.ts > cases > toml
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/compare.test.ts > cases > sql
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/general.test.ts > should > works
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/general.test.ts > should > dynamic load theme and lang
Check failure on line 43 in packages/engine-javascript/src/index.ts GitHub Actions / test (18.x, ubuntu-latest)packages/engine-javascript/test/verify.test.ts
|
||
pattern, | ||
{ | ||
flags: 'dgm', | ||
ignoreContiguousAnchors: true, | ||
accuracy: 'loose', | ||
global: true, | ||
hasIndices: true, | ||
tmGrammar: true, | ||
}, | ||
) | ||
} | ||
|
||
export class JavaScriptScanner implements PatternScanner { | ||
regexps: (RegExp | null)[] | ||
contiguousAnchorSimulation: boolean[] | ||
|
||
constructor( | ||
public patterns: string[], | ||
|
@@ -65,8 +65,7 @@ | |
regexConstructor = defaultJavaScriptRegexConstructor, | ||
} = options | ||
|
||
this.contiguousAnchorSimulation = Array.from({ length: patterns.length }, () => false) | ||
this.regexps = patterns.map((p, idx) => { | ||
this.regexps = patterns.map((p) => { | ||
/** | ||
* vscode-textmate replace anchors to \uFFFF, where we still not sure how to handle it correctly | ||
* | ||
|
@@ -77,10 +76,6 @@ | |
if (simulation) | ||
p = p.replaceAll('(^|\\\uFFFF)', '(^|\\G)') | ||
|
||
// Detect contiguous anchors for simulation | ||
if (simulation && (p.startsWith('(^|\\G)') || p.startsWith('(\\G|^)'))) | ||
this.contiguousAnchorSimulation[idx] = true | ||
|
||
// Cache | ||
const cached = cache?.get(p) | ||
if (cached) { | ||
|
@@ -92,13 +87,7 @@ | |
throw cached | ||
} | ||
try { | ||
let pattern = p | ||
if (simulation) { | ||
for (const [from, to] of replacements) { | ||
pattern = pattern.replaceAll(from, to) | ||
} | ||
} | ||
const regex = regexConstructor(pattern) | ||
const regex = regexConstructor(p) | ||
cache?.set(p, regex) | ||
return regex | ||
} | ||
|
@@ -143,25 +132,18 @@ | |
if (!regexp) | ||
continue | ||
try { | ||
let offset = 0 | ||
regexp.lastIndex = startPosition | ||
let match = regexp.exec(str) | ||
const match = regexp.exec(str) | ||
|
||
// If a regex starts with `(^|\\G)` or `(\\G|^)`, we simulate the behavior by cutting the string | ||
if (!match && this.contiguousAnchorSimulation[i]) { | ||
offset = startPosition | ||
regexp.lastIndex = 0 | ||
match = regexp.exec(str.slice(startPosition)) | ||
} | ||
if (!match) | ||
continue | ||
|
||
// If the match is at the start position, return it immediately | ||
if (match.index === startPosition) { | ||
return toResult(i, match, offset) | ||
return toResult(i, match, 0) | ||
} | ||
// Otherwise, store it for later | ||
pending.push([i, match, offset]) | ||
pending.push([i, match, 0]) | ||
} | ||
catch (e) { | ||
if (this.options.forgiving) | ||
|
@@ -187,9 +169,10 @@ | |
/** | ||
* Use the modern JavaScript RegExp engine to implement the OnigScanner. | ||
* | ||
* As Oniguruma regex is more powerful than JavaScript regex, some patterns may not be supported. | ||
* Errors will be thrown when parsing TextMate grammars with unsupported patterns. | ||
* Set `forgiving` to `true` to ignore these errors and skip the unsupported patterns. | ||
* As Oniguruma supports some features that can't be emulated using native JavaScript regexes, some | ||
* patterns are not supported. Errors will be thrown when parsing TextMate grammars with | ||
* unsupported patterns, and when the grammar includes patterns that use invalid Oniguruma syntax. | ||
* Set `forgiving` to `true` to ignore these errors and skip any unsupported or invalid patterns. | ||
* | ||
* @experimental | ||
*/ | ||
|
This file was deleted.
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure whether
oniguruma-to-es
's dependencies should also be externalized. I removed the comment about considering bundling because I'd much prefer not to do that if it means these projects will lose much of their download stats (which helps add to their credibility which is really valuable for now, especially while they're new projects).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Totally! If
oniguruma-to-es
gets externalized, then its dependencies will be as well.