diff --git a/docs/references/engine-js-compat.md b/docs/references/engine-js-compat.md index 27a76cb5e..fef12f059 100644 --- a/docs/references/engine-js-compat.md +++ b/docs/references/engine-js-compat.md @@ -11,9 +11,9 @@ | | Count | | :-------------- | --------------------------------: | | Total Languages | 213 | -| Fully Supported | [164](#fully-supported-languages) | -| Mismatched | [20](#mismatched-languages) | -| Unsupported | [29](#unsupported-languages) | +| Fully Supported | [171](#fully-supported-languages) | +| Mismatched | [24](#mismatched-languages) | +| Unsupported | [18](#unsupported-languages) | ## Fully Supported Languages @@ -29,6 +29,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | applescript | ✅ OK | 152 | - | | | ara | ✅ OK | 54 | - | | | asm | ✅ OK | 297 | - | | +| astro | ✅ OK | 1090 | - | | | awk | ✅ OK | 36 | - | | | ballerina | ✅ OK | 230 | - | | | bat | ✅ OK | 58 | - | | @@ -67,6 +68,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | fluent | ✅ OK | 23 | - | | | fortran-fixed-form | ✅ OK | 332 | - | | | fortran-free-form | ✅ OK | 328 | - | | +| fsharp | ✅ OK | 239 | - | | | fsl | ✅ OK | 30 | - | | | gdresource | ✅ OK | 157 | - | | | gdscript | ✅ OK | 93 | - | | @@ -117,6 +119,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | move | ✅ OK | 120 | - | | | narrat | ✅ OK | 34 | - | | | nextflow | ✅ OK | 17 | - | | +| nim | ✅ OK | 1126 | - | | | nix | ✅ OK | 80 | - | | | nushell | ✅ OK | 81 | - | | | objective-c | ✅ OK | 223 | - | | @@ -143,6 +146,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | riscv | ✅ OK | 36 | - | | | rust | ✅ OK | 89 | - | | | sas | ✅ OK | 101 | - | | +| sass | ✅ OK | 69 | - | | | scala | ✅ OK | 112 | - | | | scheme | ✅ OK | 34 | - | | | scss | ✅ OK | 234 | - | | @@ -154,6 +158,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | sql | ✅ OK | 67 | - | | | ssh-config | ✅ OK | 12 | - | | | stylus | ✅ OK | 107 | - | | +| svelte | ✅ OK | 1491 | - | | | system-verilog | ✅ OK | 102 | - | | | systemd | ✅ OK | 32 | - | | | tasl | ✅ OK | 23 | - | | @@ -176,6 +181,8 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | verilog | ✅ OK | 33 | - | | | vhdl | ✅ OK | 82 | - | | | viml | ✅ OK | 72 | - | | +| vue | ✅ OK | 1597 | - | | +| vue-html | ✅ OK | 1620 | - | | | vyper | ✅ OK | 238 | - | | | wasm | ✅ OK | 78 | - | | | wenyan | ✅ OK | 18 | - | | @@ -200,12 +207,16 @@ Languages that does not throw with the JavaScript RegExp engine, but will produc | elixir | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=elixir) | 708 | - | 179 | | erlang | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=erlang) | 147 | - | 470 | | glsl | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=glsl) | 186 | - | 306 | +| haml | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=haml) | 1612 | - | 48 | | kusto | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=kusto) | 60 | - | 40 | +| markdown | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=markdown) | 118 | - | 648 | +| mdc | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mdc) | 784 | - | 407 | | mermaid | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mermaid) | 129 | - | 38 | | nginx | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=nginx) | 378 | - | 4 | | objective-cpp | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=objective-cpp) | 309 | - | 172 | | php | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=php) | 1131 | - | 605 | | po | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=po) | 23 | - | 336 | +| pug | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=pug) | 1013 | - | 164 | | ruby | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=ruby) | 1307 | - | 1 | | shellscript | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=shellscript) | 148 | - | 56 | | smalltalk | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=smalltalk) | 35 | - | 40 | @@ -220,18 +231,9 @@ Languages that throws with the JavaScript RegExp engine (contains syntaxes that | Language | Highlight Match | Patterns Parsable | Patterns Failed | Diff | | ---------- | :------------------------------------------------------------------------- | ----------------: | --------------: | ---: | | ada | ✅ OK | 201 | 1 | | -| astro | ✅ OK | 1088 | 2 | | -| sass | ✅ OK | 67 | 2 | | -| fsharp | ✅ OK | 232 | 7 | | -| nim | ✅ OK | 1119 | 7 | | -| svelte | ✅ OK | 1482 | 9 | | -| vue | ✅ OK | 1588 | 9 | | -| vue-html | ✅ OK | 1611 | 9 | | -| asciidoc | ✅ OK | 4388 | 93 | | -| wikitext | ✅ OK | 5208 | 95 | | +| wikitext | ✅ OK | 5217 | 86 | | +| asciidoc | ✅ OK | 4390 | 91 | | | blade | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=blade) | 1124 | 2 | | -| pug | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=pug) | 1011 | 2 | 164 | -| haml | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=haml) | 1603 | 9 | 48 | | rst | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=rst) | 1835 | 22 | 62 | | latex | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=latex) | 2451 | 48 | 25 | | powershell | ❌ Error | 87 | 1 | | @@ -240,8 +242,6 @@ Languages that throws with the JavaScript RegExp engine (contains syntaxes that | swift | ❌ Error | 325 | 4 | 18 | | kotlin | ❌ Error | 52 | 6 | 2986 | | purescript | ❌ Error | 67 | 6 | 1488 | -| markdown | ❌ Error | 111 | 7 | 584 | -| mdc | ❌ Error | 777 | 7 | 377 | | apex | ❌ Error | 173 | 14 | 242 | | haskell | ❌ Error | 136 | 21 | 12 | | cpp | ❌ Error | 490 | 22 | 25 | diff --git a/packages/engine-javascript/scripts/generate.ts b/packages/engine-javascript/scripts/generate.ts new file mode 100644 index 000000000..de90fddcd --- /dev/null +++ b/packages/engine-javascript/scripts/generate.ts @@ -0,0 +1,48 @@ +import fs from 'node:fs/promises' +import { expandRecursiveBackReference } from './utils' + +interface ReplacementRecursiveBackReference { + type: 'recursive-back-reference' + regex: string + groupName: string + fallback: string + recursive?: number +} + +interface ReplacementStatic { + type: 'static' + regex: string + replacement: string +} + +type Replacement = ReplacementRecursiveBackReference | ReplacementStatic + +const replacements: Replacement[] = [ + { + // Subroutine recursive reference are not supported in JavaScript regex engine. + // We expand a few levels of recursion to literals to simulate the behavior (incomplete) + type: 'recursive-back-reference', + regex: '(?[^\\[\\]\\\\]|\\\\.|\\[\\g*+\\])', + groupName: 'square', + fallback: '(?:[^\\[\\]\\\\])', + }, + { + type: 'recursive-back-reference', + regex: '(?(?>[^\\s()]+)|\\(\\g*\\))', + groupName: 'url', + fallback: '[^\\s\\(\\)]', + }, +] + +const result = replacements.map((r) => { + switch (r.type) { + case 'recursive-back-reference': + return [r.regex, expandRecursiveBackReference(r.regex, r.groupName, r.fallback, r.recursive ?? 2)] + case 'static': + return [r.regex, r.replacement] + default: + throw new Error(`Unknown replacement type: ${(r as any).type}`) + } +}) + +fs.writeFile(new URL('../src/replacements.ts', import.meta.url), `// Generated by script\n\nexport const replacements = ${JSON.stringify(result, null, 2)} as [string, string][]\n`, 'utf-8') diff --git a/packages/engine-javascript/scripts/utils.ts b/packages/engine-javascript/scripts/utils.ts new file mode 100644 index 000000000..5bed87010 --- /dev/null +++ b/packages/engine-javascript/scripts/utils.ts @@ -0,0 +1,21 @@ +export function expandRecursiveBackReference( + regex: string, + name: string, + fallback: string, + recursive = 2, +) { + const refMarker = new RegExp(`\\\\g<${name}>`, 'g') + const groupMaker = new RegExp(`\\(\\?<${name}>`, 'g') + const normalized = regex.replace(groupMaker, '(?:') + + let out = regex + for (let i = 0; i < recursive; i++) { + out = out.replace(refMarker, normalized) + } + + out = out + .replace(refMarker, fallback) + .replace(groupMaker, '(?:') + + return out +} diff --git a/packages/engine-javascript/src/index.ts b/packages/engine-javascript/src/index.ts index 9a639a74e..4a12ad6db 100644 --- a/packages/engine-javascript/src/index.ts +++ b/packages/engine-javascript/src/index.ts @@ -4,6 +4,7 @@ import type { RegexEngineString, } from '@shikijs/types' import { onigurumaToRegexp } from 'oniguruma-to-js' +import { replacements } from './replacements' export interface JavaScriptRegexEngineOptions { /** @@ -77,7 +78,13 @@ export class JavaScriptScanner implements PatternScanner { throw cached } try { - const regex = regexConstructor(p) + let pattern = p + if (simulation) { + for (const [from, to] of replacements) { + pattern = pattern.replaceAll(from, to) + } + } + const regex = regexConstructor(pattern) cache?.set(p, regex) return regex } diff --git a/packages/engine-javascript/src/replacements.ts b/packages/engine-javascript/src/replacements.ts new file mode 100644 index 000000000..9d7f20791 --- /dev/null +++ b/packages/engine-javascript/src/replacements.ts @@ -0,0 +1,12 @@ +// Generated by script + +export const replacements = [ + [ + '(?[^\\[\\]\\\\]|\\\\.|\\[\\g*+\\])', + '(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*+\\])*+\\])*+\\])', + ], + [ + '(?(?>[^\\s()]+)|\\(\\g*\\))', + '(?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\([^\\s\\(\\)]*\\))*\\))*\\))', + ], +] as [string, string][] diff --git a/packages/engine-javascript/test/scripts.test.ts b/packages/engine-javascript/test/scripts.test.ts new file mode 100644 index 000000000..e183001dc --- /dev/null +++ b/packages/engine-javascript/test/scripts.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, it } from 'vitest' +import { expandRecursiveBackReference } from '../scripts/utils' + +describe('expandRecursiveBackReference', () => { + it('case 1', () => { + const name = 'square' + const regex = '(?[^\\[\\]\\\\]|\\\\.|\\[\\g*\\])' + const fallback = '(?:[^\\[\\]\\\\])' + + expect(expandRecursiveBackReference(regex, name, fallback, 0)) + .toMatchInlineSnapshot(`"(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])"`) + + expect(expandRecursiveBackReference(regex, name, fallback, 1)) + .toMatchInlineSnapshot(`"(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])"`) + + expect(expandRecursiveBackReference(regex, name, fallback, 2)) + .toMatchInlineSnapshot(`"(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])*\\])"`) + }) +}) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b51c5bc7e..0e5129eeb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -139,8 +139,8 @@ catalogs: specifier: ^1.3.4 version: 1.3.4 oniguruma-to-js: - specifier: 0.4.0 - version: 0.4.0 + specifier: 0.4.3 + version: 0.4.3 picocolors: specifier: ^1.1.0 version: 1.1.0 @@ -533,7 +533,7 @@ importers: version: link:../types oniguruma-to-js: specifier: 'catalog:' - version: 0.4.0 + version: 0.4.3 packages/engine-oniguruma: dependencies: @@ -4205,8 +4205,8 @@ packages: resolution: {integrity: sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==} engines: {node: '>=18'} - oniguruma-to-js@0.4.0: - resolution: {integrity: sha512-GwNFPQygkpDjO9MOr54Rqi01dGS+h9VAS//Qxz9lTN5B09CxqiIc7rydvdV+Ex2Z8Vk+zqfHH7hU6ePn8uf+Mg==} + oniguruma-to-js@0.4.3: + resolution: {integrity: sha512-X0jWUcAlxORhOqqBREgPMgnshB7ZGYszBNspP+tS9hPD3l13CdaXcHbgImoHUHlrvGx/7AvFEkTRhAGYh+jzjQ==} optionator@0.9.4: resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==} @@ -9442,7 +9442,7 @@ snapshots: dependencies: mimic-function: 5.0.1 - oniguruma-to-js@0.4.0: + oniguruma-to-js@0.4.3: dependencies: regex: 4.3.2 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index d3695b637..9de9d61b9 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -53,7 +53,7 @@ catalog: minimist: ^1.2.8 monaco-editor-core: ^0.51.0 ofetch: ^1.3.4 - oniguruma-to-js: 0.4.0 + oniguruma-to-js: 0.4.3 picocolors: ^1.1.0 pinia: ^2.2.2 pnpm: ^9.10.0