diff --git a/.vscode/settings.json b/.vscode/settings.json index 6dd5e3e39..41441c18a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -36,6 +36,7 @@ ], "references.preferredLocation": "peek", "cSpell.words": [ + "Extenerlaize", "shikijs" ] } diff --git a/docs/references/engine-js-compat.md b/docs/references/engine-js-compat.md index 0400b23dd..a4442aaa6 100644 --- a/docs/references/engine-js-compat.md +++ b/docs/references/engine-js-compat.md @@ -1,6 +1,6 @@ # JavaScript RegExp Engine Compatibility References -> Genreated on Thursday, September 5, 2024 +> Genreated on Monday, September 9, 2024 > > Version `1.16.2` > @@ -11,9 +11,9 @@ | | Count | | :-------------- | --------------------------------: | | Total Languages | 213 | -| Fully Supported | [188](#fully-supported-languages) | -| Mismatched | [17](#mismatched-languages) | -| Unsupported | [8](#unsupported-languages) | +| Fully Supported | [173](#fully-supported-languages) | +| Mismatched | [24](#mismatched-languages) | +| Unsupported | [16](#unsupported-languages) | ## Fully Supported Languages @@ -33,12 +33,10 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | astro | ✅ OK | 59 | - | | awk | ✅ OK | 36 | - | | ballerina | ✅ OK | 230 | - | -| bash | ✅ OK | 146 | - | | bat | ✅ OK | 58 | - | | berry | ✅ OK | 18 | - | | bibtex | ✅ OK | 19 | - | | bicep | ✅ OK | 28 | - | -| c | ✅ OK | 158 | - | | cadence | ✅ OK | 71 | - | | clarity | ✅ OK | 43 | - | | clj | ✅ OK | 38 | - | @@ -50,8 +48,6 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | coffee | ✅ OK | 120 | - | | common-lisp | ✅ OK | 57 | - | | coq | ✅ OK | 25 | - | -| cpp | ✅ OK | 220 | - | -| crystal | ✅ OK | 140 | - | | css | ✅ OK | 141 | - | | csv | ✅ OK | 1 | - | | cue | ✅ OK | 85 | - | @@ -65,11 +61,9 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | dotenv | ✅ OK | 9 | - | | dream-maker | ✅ OK | 55 | - | | edge | ✅ OK | 10 | - | -| elixir | ✅ OK | 102 | - | | elm | ✅ OK | 67 | - | | emacs-lisp | ✅ OK | 151 | - | | erb | ✅ OK | 6 | - | -| erlang | ✅ OK | 147 | - | | fennel | ✅ OK | 31 | - | | fish | ✅ OK | 22 | - | | fluent | ✅ OK | 23 | - | @@ -85,7 +79,6 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | gleam | ✅ OK | 26 | - | | glimmer-js | ✅ OK | 74 | - | | glimmer-ts | ✅ OK | 74 | - | -| glsl | ✅ OK | 7 | - | | gnuplot | ✅ OK | 82 | - | | go | ✅ OK | 115 | - | | graphql | ✅ OK | 63 | - | @@ -113,7 +106,6 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | jsonnet | ✅ OK | 33 | - | | jssm | ✅ OK | 30 | - | | jsx | ✅ OK | 376 | - | -| kotlin | ✅ OK | 58 | - | | lean | ✅ OK | 32 | - | | less | ✅ OK | 280 | - | | liquid | ✅ OK | 69 | - | @@ -125,21 +117,19 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | marko | ✅ OK | 81 | - | | matlab | ✅ OK | 77 | - | | mojo | ✅ OK | 213 | - | -| move | ✅ OK | 117 | - | +| move | ✅ OK | 120 | - | | narrat | ✅ OK | 34 | - | | nextflow | ✅ OK | 17 | - | | nim | ✅ OK | 114 | - | | nix | ✅ OK | 80 | - | | nushell | ✅ OK | 75 | - | | objective-c | ✅ OK | 217 | - | -| objective-cpp | ✅ OK | 297 | - | | ocaml | ✅ OK | 178 | - | | pascal | ✅ OK | 23 | - | | perl | ✅ OK | 155 | - | | plsql | ✅ OK | 43 | - | | postcss | ✅ OK | 47 | - | | powerquery | ✅ OK | 30 | - | -| powershell | ✅ OK | 88 | - | | prisma | ✅ OK | 26 | - | | prolog | ✅ OK | 26 | - | | proto | ✅ OK | 33 | - | @@ -155,7 +145,6 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | regexp | ✅ OK | 34 | - | | rel | ✅ OK | 17 | - | | riscv | ✅ OK | 36 | - | -| ruby | ✅ OK | 154 | - | | rust | ✅ OK | 89 | - | | sas | ✅ OK | 32 | - | | sass | ✅ OK | 67 | - | @@ -163,9 +152,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | scheme | ✅ OK | 34 | - | | scss | ✅ OK | 104 | - | | shaderlab | ✅ OK | 38 | - | -| shellscript | ✅ OK | 146 | - | | shellsession | ✅ OK | 2 | - | -| smalltalk | ✅ OK | 31 | - | | solidity | ✅ OK | 102 | - | | soy | ✅ OK | 45 | - | | sparql | ✅ OK | 4 | - | @@ -202,49 +189,62 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | wenyan | ✅ OK | 18 | - | | wgsl | ✅ OK | 44 | - | | wikitext | ✅ OK | 104 | - | -| wolfram | ✅ OK | 501 | - | | xml | ✅ OK | 30 | - | | xsl | ✅ OK | 5 | - | | yaml | ✅ OK | 46 | - | | zenscript | ✅ OK | 21 | - | | zig | ✅ OK | 51 | - | -| zsh | ✅ OK | 146 | - | ## Mismatched Languages Languages that does not throw with the JavaScript RegExp engine, but will produce different result than the WASM engine. Please use with caution. -| Language | Highlight Match | Patterns Parsable | Patterns Failed | -| ------------ | :-------------------------------------------------------------------------------- | ----------------: | --------------: | -| angular-html | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=angular-html) | 2 | - | -| apex | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=apex) | 187 | - | -| beancount | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=beancount) | 39 | - | -| blade | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=blade) | 330 | - | -| haml | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=haml) | 64 | - | -| kusto | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=kusto) | 60 | - | -| latex | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=latex) | 183 | - | -| markdown | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=markdown) | 103 | - | -| mdc | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mdc) | 27 | - | -| mermaid | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mermaid) | 129 | - | -| nginx | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=nginx) | 102 | - | -| php | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=php) | 328 | - | -| po | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=po) | 23 | - | -| pug | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=pug) | 92 | - | -| rst | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=rst) | 61 | - | -| splunk | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=splunk) | 17 | - | -| stata | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=stata) | 189 | - | +| Language | Highlight Match | Patterns Parsable | Patterns Failed | +| ------------- | :--------------------------------------------------------------------------------- | ----------------: | --------------: | +| angular-html | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=angular-html) | 2 | - | +| bash | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=bash) | 146 | - | +| beancount | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=beancount) | 39 | - | +| c | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=c) | 158 | - | +| crystal | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=crystal) | 140 | - | +| elixir | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=elixir) | 102 | - | +| erlang | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=erlang) | 147 | - | +| glsl | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=glsl) | 7 | - | +| haml | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=haml) | 64 | - | +| kusto | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=kusto) | 60 | - | +| latex | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=latex) | 183 | - | +| mermaid | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mermaid) | 129 | - | +| nginx | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=nginx) | 102 | - | +| objective-cpp | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=objective-cpp) | 297 | - | +| php | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=php) | 328 | - | +| po | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=po) | 23 | - | +| pug | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=pug) | 92 | - | +| rst | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=rst) | 61 | - | +| ruby | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=ruby) | 154 | - | +| shellscript | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=shellscript) | 146 | - | +| smalltalk | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=smalltalk) | 31 | - | +| splunk | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=splunk) | 17 | - | +| stata | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=stata) | 189 | - | +| zsh | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=zsh) | 146 | - | ## Unsupported Languages Languages that throws with the JavaScript RegExp engine (contains syntaxes that we can't polyfill yet). If you need to use these languages, please use the Oniguruma engine. -| Language | Highlight Match | Patterns Parsable | Patterns Failed | -| ---------- | :-------------------------------------------------------------------------- | ----------------: | --------------: | -| ada | ✅ OK | 199 | 1 | -| csharp | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=csharp) | 294 | 1 | -| razor | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=razor) | 83 | 2 | -| mdx | ❌ Error | 181 | - | -| purescript | ❌ Error | 68 | 4 | -| swift | ❌ Error | 302 | 4 | -| julia | ❌ Error | 90 | 5 | -| haskell | ❌ Error | 114 | 43 | +| Language | Highlight Match | Patterns Parsable | Patterns Failed | +| ---------- | :------------------------------------------------------------------------- | ----------------: | --------------: | +| ada | ✅ OK | 199 | 1 | +| blade | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=blade) | 328 | 2 | +| mdc | ❌ Error | 27 | - | +| powershell | ❌ Error | 87 | 1 | +| wolfram | ❌ Error | 500 | 1 | +| razor | ❌ Error | 82 | 3 | +| mdx | ❌ Error | 177 | 4 | +| swift | ❌ Error | 302 | 4 | +| julia | ❌ Error | 90 | 5 | +| kotlin | ❌ Error | 52 | 6 | +| markdown | ❌ Error | 96 | 7 | +| purescript | ❌ Error | 63 | 9 | +| apex | ❌ Error | 173 | 14 | +| cpp | ❌ Error | 198 | 22 | +| csharp | ❌ Error | 263 | 32 | +| haskell | ❌ Error | 113 | 44 | diff --git a/package.json b/package.json index 941d21a64..3997d88b5 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "type": "module", "version": "1.16.2", "private": true, - "packageManager": "pnpm@9.9.0", + "packageManager": "pnpm@9.10.0", "scripts": { "lint": "eslint . --cache", "release": "bumpp && pnpm -r publish", @@ -52,6 +52,7 @@ "picocolors": "catalog:", "pnpm": "catalog:", "prettier": "catalog:", + "regex": "catalog:", "rimraf": "catalog:", "rollup": "catalog:", "rollup-plugin-copy": "catalog:", diff --git a/packages/core/package.json b/packages/core/package.json index f4cc04b39..13c010e46 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -67,11 +67,12 @@ }, "dependencies": { "@shikijs/vscode-textmate": "catalog:", - "@types/hast": "catalog:" + "@types/hast": "catalog:", + "oniguruma-to-js": "catalog:", + "regex": "catalog:" }, "devDependencies": { "hast-util-to-html": "catalog:", - "oniguruma-to-js": "catalog:", "vscode-oniguruma": "catalog:" } } diff --git a/packages/core/rollup.config.mjs b/packages/core/rollup.config.mjs index 4796c3f63..414d8ebb6 100644 --- a/packages/core/rollup.config.mjs +++ b/packages/core/rollup.config.mjs @@ -35,6 +35,12 @@ const plugins = [ const external = [ 'hast', '@shikijs/vscode-textmate', + + // Externalize them to make it easier to patch and experiments + // Versions are pinned to avoid regressions + // Later we might consider to bundle them. + 'oniguruma-to-js', + 'regex', ] export default defineConfig([ diff --git a/packages/core/src/engines/javascript.ts b/packages/core/src/engines/javascript.ts index 975b73e87..52ac2f56f 100644 --- a/packages/core/src/engines/javascript.ts +++ b/packages/core/src/engines/javascript.ts @@ -1,4 +1,5 @@ import { onigurumaToRegexp } from 'oniguruma-to-js' +import { rewrite } from 'regex' import type { JavaScriptRegexEngineOptions, PatternScanner, RegexEngine, RegexEngineString } from '../types' const MAX = 4294967295 @@ -7,10 +8,22 @@ const MAX = 4294967295 * The default RegExp constructor for JavaScript regex engine. */ export function defaultJavaScriptRegexConstructor(pattern: string): RegExp { + pattern = pattern + // YAML specific handling; TODO: move to tm-grammars + .replaceAll('[^\\s[-?:,\\[\\]{}#&*!|>\'"%@`]]', '[^\\s\\-?:,\\[\\]{}#&*!|>\'"%@`]') + + const rewritten = rewrite(pattern, { + flags: 'dgm', + unicodeSetsPlugin: null, + disable: { + n: true, + v: true, + x: true, + }, + }) + return onigurumaToRegexp( - pattern - // YAML specific handling; TODO: move to tm-grammars - .replaceAll('[^\\s[-?:,\\[\\]{}#&*!|>\'"%@`]]', '[^\\s\\-?:,\\[\\]{}#&*!|>\'"%@`]'), + rewritten.expression, { flags: 'dgm', ignoreContiguousAnchors: true, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3f32e7843..b48e8c5a8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -139,7 +139,7 @@ catalogs: specifier: ^1.3.4 version: 1.3.4 oniguruma-to-js: - specifier: ^0.3.3 + specifier: 0.3.3 version: 0.3.3 picocolors: specifier: ^1.1.0 @@ -148,11 +148,14 @@ catalogs: specifier: ^2.2.2 version: 2.2.2 pnpm: - specifier: ^9.9.0 - version: 9.9.0 + specifier: ^9.10.0 + version: 9.10.0 prettier: specifier: ^3.3.3 version: 3.3.3 + regex: + specifier: 4.3.2 + version: 4.3.2 rehype-raw: specifier: ^7.0.0 version: 7.0.0 @@ -367,10 +370,13 @@ importers: version: 1.1.0 pnpm: specifier: 'catalog:' - version: 9.9.0 + version: 9.10.0 prettier: specifier: 'catalog:' version: 3.3.3 + regex: + specifier: 'catalog:' + version: 4.3.2 rimraf: specifier: 'catalog:' version: 6.0.1 @@ -509,13 +515,16 @@ importers: '@types/hast': specifier: ^3.0.4 version: 3.0.4 + oniguruma-to-js: + specifier: 'catalog:' + version: 0.3.3 + regex: + specifier: 'catalog:' + version: 4.3.2 devDependencies: hast-util-to-html: specifier: 'catalog:' version: 9.0.2 - oniguruma-to-js: - specifier: 'catalog:' - version: 0.3.3 vscode-oniguruma: specifier: ^1.7.0 version: 1.7.0 @@ -1619,7 +1628,7 @@ packages: resolution: {integrity: sha512-lpA3RZ9PdIG7qqhEfv79tBffNaoDuukFDrmhLqg9ifv99u/ehn+lOg30x2zmhf8AQqQUZaMk/B9fZraQ6/acDQ==} engines: {node: '>=14.0.0'} peerDependencies: - rollup: ^4.21.2 + rollup: ^1.20.0||^2.0.0||^3.0.0||^4.0.0 peerDependenciesMeta: rollup: optional: true @@ -4283,8 +4292,8 @@ packages: resolution: {integrity: sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==} engines: {node: '>=4'} - pnpm@9.9.0: - resolution: {integrity: sha512-YMGKzROL/2ldM5vmrRP36TbupnRWYNTMSndtUkfFQNDt7hpWNpXBg6ZuuRfviPK0/rH8JfMqetytx6rzQ46ZwQ==} + pnpm@9.10.0: + resolution: {integrity: sha512-c6Ka+jag0JLs5Scd5Rd+y/gxjUVOzXATQxMbjrwMGpHEh9pGq3fI5ZbWrPFGHjWUztS+zt+JIbB0+9hlPtcFHA==} engines: {node: '>=18.12'} hasBin: true @@ -4538,6 +4547,9 @@ packages: resolution: {integrity: sha512-J8rn6v4DBb2nnFqkqwy6/NnTYMcgLA+sLr0iIO41qpv0n+ngb7ksag2tMRl0inb1bbO/esUwzW1vbJi7K0sI0g==} engines: {node: ^12.0.0 || ^14.0.0 || >=16.0.0} + regex@4.3.2: + resolution: {integrity: sha512-kK/AA3A9K6q2js89+VMymcboLOlF5lZRCYJv3gzszXFHBr6kO6qLGzbm+UIugBEV8SMMKCTR59txoY6ctRHYVw==} + regexp-ast-analysis@0.7.1: resolution: {integrity: sha512-sZuz1dYW/ZsfG17WSAG7eS85r5a0dDsvg+7BiiYR5o6lKCAtUrEwdmRmaGF6rwVj3LcmAeYkOWKEPlbPzN3Y3A==} engines: {node: ^12.0.0 || ^14.0.0 || >=16.0.0} @@ -9594,7 +9606,7 @@ snapshots: pluralize@8.0.0: {} - pnpm@9.9.0: {} + pnpm@9.10.0: {} postcss-calc@9.0.1(postcss@8.4.44): dependencies: @@ -9825,6 +9837,8 @@ snapshots: dependencies: '@eslint-community/regexpp': 4.11.0 + regex@4.3.2: {} + regexp-ast-analysis@0.7.1: dependencies: '@eslint-community/regexpp': 4.11.0 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 4ef17f292..cbdbd4a49 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -51,11 +51,12 @@ catalog: minimist: ^1.2.8 monaco-editor-core: ^0.51.0 ofetch: ^1.3.4 - oniguruma-to-js: ^0.3.3 + oniguruma-to-js: 0.3.3 picocolors: ^1.1.0 pinia: ^2.2.2 - pnpm: ^9.9.0 + pnpm: ^9.10.0 prettier: ^3.3.3 + regex: 4.3.2 rehype-raw: ^7.0.0 rehype-stringify: ^10.0.0 remark-parse: ^11.0.0