Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Dynamic import refinements & fixes, optimization #102

Merged
merged 2 commits into from
Feb 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
node_modules
lib/lexer.wat
lib/lexer.js
dist
package-lock.json
yarn.lock
lib/lexer.emcc.js
34 changes: 21 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,23 @@ import { init, parse } from 'es-module-lexer';
await init;

const source = `
import { name } from 'mod';
import { name } from 'mod\\u1011';
import json from './json.json' assert { type: 'json' }
export var p = 5;
export function q () {
};
// Comments provided to demonstrate edge cases
import /*comment!*/ ('asdf', { assert: { type: 'json' }});
import /*comment!*/ ( 'asdf', { assert: { type: 'json' }});
import /*comment!*/.meta.asdf;
`;

const [imports, exports] = parse(source, 'optional-sourcename');

// Returns "mod"
// Returns "modထ"
imports[0].n
// Returns "mod\u1011"
source.substring(imports[0].s, imports[0].e);
// "s" = start
// "e" = end
Expand All @@ -69,34 +70,41 @@ import { init, parse } from 'es-module-lexer';

// Returns "{ type: 'json' }"
source.substring(imports[1].a, imports[1].se);
// "a" = assert
// "a" = assert, -1 for no assertion

// Returns "p,q"
exports.toString();

// Dynamic imports are indicated by imports[2].d > -1
// In this case the "d" index is the start of the dynamic import
// In this case the "d" index is the start of the dynamic import bracket
// Returns true
imports[2].d > -1;

// Returns "asdf"
// Returns "asdf" (only for string literal dynamic imports)
imports[2].n
// Returns "import /*comment!*/ ( 'asdf', { assert: { type: 'json' } })"
source.substring(imports[2].ss, imports[2].se);
// Returns "'asdf'"
source.substring(imports[2].s, imports[2].e);
// Returns "import /*comment!*/ ("
source.substring(imports[2].d, imports[2].s);
// Returns "import /*comment!*/ ('asdf', { assert: { type: 'json' } })"
source.substring(imports[2].d, imports[2].se + 1);
// Returns "( 'asdf', { assert: { type: 'json' } })"
source.substring(imports[2].d, imports[2].se);
// Returns "{ assert: { type: 'json' } }"
source.substring(imports[2].a, imports[2].e);
// ss is the same as d
// as, ae not used for dynamic imports
source.substring(imports[2].a, imports[2].se - 1);

// For non-string dynamic import expressions:
// - n will be undefined
// - a is currently -1 even if there is an assertion
// - e is currently the character before the closing )

// For nested dynamic imports, the se value of the outer import is -1 as end tracking does not
// currently support nested dynamic immports

// import.meta is indicated by imports[2].d === -2
// Returns true
imports[2].d === -2;
// Returns "import /*comment!*/.meta"
source.substring(imports[2].s, imports[2].e);
// ss and se are the same for import meta
})();
```

Expand Down
130 changes: 73 additions & 57 deletions chompfile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ default-task = "test"

extensions = ['chomp@0.1:npm', 'chomp@0.1:footprint']

[env]
OPT = 'z'

[env-default]
WASI_PATH = '../wasi-sdk-12.0'
EMSDK_PATH = '../emsdk'
Expand Down Expand Up @@ -33,7 +36,37 @@ run = 'node --expose-gc bench/index.js'
[[task]]
target = 'dist/lexer.asm.js'
deps = ['lib/lexer.asm.js']
run = 'terser lib/lexer.asm.js -c -m -o dist/lexer.asm.js'
engine = 'node'
run = '''
import { readFileSync, writeFileSync } from 'fs';
import terser from 'terser';
const jsSource = readFileSync('lib/lexer.asm.js', 'utf8');
const pjson = JSON.parse(readFileSync('package.json', 'utf8'));
const min = process.env.OPT === 'z';
let minified = min && terser.minify(jsSource, {
module: true,
compress: {
ecma: 6,
unsafe: true,
},
output: {
preamble: `/* es-module-lexer ${pjson.version} */`
}
}).code;
if (minified) {
const replacements = [
[/Int\d+Array:(Int\d+Array)/g, '$1'],
[/Uint\d+Array:(Uint\d+Array)/g, '$1']
];
for (const [from, to] of replacements) {
minified = minified.replace(from, to);
}
}
writeFileSync('dist/lexer.asm.js', minified ? minified : jsSource);
'''

[[task]]
target = 'dist/lexer.cjs'
Expand All @@ -44,25 +77,25 @@ run = 'babel dist/lexer.js | terser -c -m -o dist/lexer.cjs'
target = 'dist/lexer.js'
deps = ['lib/lexer.wasm', 'src/lexer.js', 'package.json']
engine = 'node'
env = { MINIFY = '1' }
run = '''
import { readFileSync, writeFileSync } from 'fs';
import terser from 'terser';
const wasmBuffer = readFileSync('lib/lexer.wasm');
const jsSource = readFileSync('src/lexer.js').toString();
const pjson = JSON.parse(readFileSync('package.json').toString());
const jsSource = readFileSync('src/lexer.js', 'utf8');
const pjson = JSON.parse(readFileSync('package.json', 'utf8'));
const jsSourceProcessed = jsSource.replace('WASM_BINARY', wasmBuffer.toString('base64'));
const minified = process.env.MINIFY && terser.minify(jsSourceProcessed, {
const min = process.env.OPT === 'z';
const minified = min && terser.minify(jsSourceProcessed, {
module: true,
output: {
preamble: `/* es-module-lexer ${pjson.version} */`
}
});
}).code;
writeFileSync('dist/lexer.js', minified ? minified.code : jsSourceProcessed);
writeFileSync('dist/lexer.js', minified ? minified : jsSourceProcessed);
'''

[[task]]
Expand All @@ -83,14 +116,12 @@ env = { PYTHONHOME = '' }
run = """
${{ EMSDK_PATH }}/emsdk install 1.40.1-fastcomp
${{ EMSDK_PATH }}/emsdk activate 1.40.1-fastcomp
echo "PYTHON HOME: $PYTHONHOME"
${{ EMSCRIPTEN_BIN }} ./src/lexer.c -o lib/lexer.emcc.js -s WASM=0 -Oz --closure 1 \
${{ EMSCRIPTEN_BIN }} ./src/lexer.c -o lib/lexer.emcc.js -s WASM=0 -O${{OPT}} --closure 1 \
-s EXPORTED_FUNCTIONS="['_parse','_sa','_e','_ri','_re','_is','_ie','_ss','_ip','_se','_ai','_id','_es','_ee','_f','_setSource']" \
-s ERROR_ON_UNDEFINED_SYMBOLS=0 -s SINGLE_FILE=1 -s TOTAL_STACK=15312 -s --separate-asm
-s ERROR_ON_UNDEFINED_SYMBOLS=0 -s SINGLE_FILE=1 -s TOTAL_STACK=4997968 -s --separate-asm -Wno-logical-op-parentheses -Wno-parentheses
rm lib/lexer.emcc.js
# rm lib/lexer.emcc.js
"""

[[task]]
Expand All @@ -103,56 +134,41 @@ run = '''
const wrapper_start = readFileSync('src/lexer.asm.js', 'utf8');
let source = readFileSync('lib/lexer.emcc.asm.js', 'utf8').trim();
// wrapper
const header = 'Module["asm"]=(/** @suppress {uselessCode} */ function(';
if (!source.startsWith(header))
throw new Error('Unexpected source header');
source = 'function asmInit(' + source.slice(header.length);
if (!source.endsWith(')'))
throw new Error('Unexpected source end');
source = source.slice(0, -1);
// function renaming
const fnStart = source.indexOf('EMSCRIPTEN_END_FUNCS');
if (fnStart === -1)
throw new Error('Expected EMSCRIPTEN_END_FUNCS');
// "Manual" tree-shaking
const endFuncs = 'EMSCRIPTEN_END_FUNCS';
const removeFunc = name => [new RegExp(`function ${name}\\([^]+?}\\s*(function|return\\s?{[^{}]+};?\\s*}\\s*$)`), '$1'];
const replacements = [
[/,_/g, ','],
['stackAlloc', 'sta'],
['setSource', 'ses'],
['parse', 'p'],
['__errno_location:Ia,', ''],
[/function Ia\(.+?}function/, 'function'],
['__apply_relocations:Da,', ''],
[/function Da\(.+?}function/, 'function'],
[',free:A', ''],
[/function A\(.+?}function/, 'function'],
['malloc:y,memcpy:F,memset:G,', ''],
[/function G\(.+?}function/, 'function'],
['emscripten_get_sbrk_ptr:Ga,', ''],
[/function Ga\(.+?}function/, 'function'],
[',stackRestore:Ea,stackSave:Ha', ''],
[/function Ea\(.+?}function/, 'function'],
[/function Ha\(.+?}function/, 'function'],
[/Module\["asm"\]=\s?\(\/\*\* @suppress {uselessCode} \*\/ function\(/, 'function asmInit('],
[/\)$/, ''],
[/,\s?_(\w+):/g, ',$1:', null, endFuncs],
['stackAlloc:', 'sta:', null, endFuncs],
['setSource:', 'ses:', null, endFuncs],
['parse:', 'p:', null, endFuncs],
[/___errno_location:\s?(\w+),/, '', removeFunc, endFuncs],
[/_apply_relocations:\s?(\w+),/, '', removeFunc, endFuncs],
[/,\s?free:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?malloc:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?memcpy:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?memset:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?emscripten_get_sbrk_ptr:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?stackRestore:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?stackSave:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s*\w+\s?=\s?env\.\w+\|0,\s*\w+\s?=\s?env\.\w+\|0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0\.0,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+/, ''],
[/,\s*\w+\s?=\s?\d+,\s*\w+\s?=\s?0.0;/, ';'],
[/function \w+\(\w+\)\s?{[^{}]+{[^{}s]+s\(\)[^{}]+}[^{}]+}/, ''],
[/\s*\/\/ EMSCRIPTEN_END_FUNCS\s*/, ''],
[/\s*\/\/ EMSCRIPTEN_START_FUNCS\s*/, ''],
[/function y\(.+?}function/, 'function'],
[/function F\(.+?}function/, 'function'],
[/function S\(.+?}function/, 'function'],
[',f=env.g|0,g=env.h|0,h=0,i=0,j=0,k=0,l=0,m=0,n=0,o=0.0,p=env.a,q=env.b,r=env.c,s=env.d,t=env.e,u=env.f', ''],
[',w=16128,x=0.0', ''],
[/global/g, 'g'],
[/buffer/g, 'f'],
[',env,', ',n,'],
];
for (const [from, to] of replacements) {
const replaced = source.replace(from, to);
if (source === replaced)
throw new Error(`Error on replacement ${from}`);
for (const [from, to, add, after] of replacements) {
const [matched, match] = source.match(from) || [];
if (!matched) {
console.log(source.slice(0, 1000));
throw new Error(`Match not found for ${from} -> ${to}${after ? `, after ${after}` : ''}`);
}
const afterIndex = after ? source.indexOf(after) : 0;
const replaced = source.slice(0, afterIndex) + source.slice(afterIndex).replace(from, to);
if (add) replacements.push(add(match));
source = replaced;
}
Expand Down
40 changes: 19 additions & 21 deletions lib/lexer.asm.js

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions lib/lexer.emcc.asm.js

Large diffs are not rendered by default.

Binary file modified lib/lexer.wasm
Binary file not shown.
36 changes: 17 additions & 19 deletions src/lexer.asm.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
let asm, asmBuffer, allocSize = 4194304, addr;
let asm, asmBuffer, allocSize = 131072, addr;

const isLE = new Uint8Array(new Uint16Array([1]).buffer)[0] === 1;
const copy = new Uint8Array(new Uint16Array([1]).buffer)[0] === 1 ? function (src, outBuf16) {
const len = src.length;
let i = 0;
while (i < len)
outBuf16[i] = src.charCodeAt(i++);
} : function (src, outBuf16) {
const len = src.length;
let i = 0;
while (i < len) {
const ch = src.charCodeAt(i);
outBuf16[i++] = (ch & 0xff) << 8 | ch >>> 8;
}
};
const words = 'xportmportlassetafromssertvoyiedeleinstantyreturdebuggeawaithrwhileforifcatcfinallels';

let source, name;
export function parse (_source, _name = '@') {
Expand All @@ -9,14 +22,15 @@ export function parse (_source, _name = '@') {
if (source.length > allocSize || !asm) {
while (source.length > allocSize) allocSize *= 2;
asmBuffer = new ArrayBuffer(allocSize * 4);
copy(words, new Uint16Array(asmBuffer, 16, words.length));
asm = asmInit({ Int8Array, Int16Array, Int32Array, Uint8Array, Uint16Array }, {}, asmBuffer);
addr = asm.sta(allocSize * 2);
}
const len = source.length + 1;
asm.ses(addr);
asm.sa(len - 1);

(isLE ? copyLE : copyBE)(source, new Uint16Array(asmBuffer, addr, len));
copy(source, new Uint16Array(asmBuffer, addr, len));

if (!asm.p()) {
acornPos = asm.e();
Expand All @@ -39,22 +53,6 @@ export function parse (_source, _name = '@') {
return [imports, exports, !!asm.f()];
}

function copyBE (src, outBuf16) {
const len = src.length;
let i = 0;
while (i < len) {
const ch = src.charCodeAt(i);
outBuf16[i++] = (ch & 0xff) << 8 | ch >>> 8;
}
}

function copyLE (src, outBuf16) {
const len = src.length;
let i = 0;
while (i < len)
outBuf16[i] = src.charCodeAt(i++);
}

/*
* Ported from Acorn
*
Expand Down
Loading