Skip to content

Commit

Permalink
Feat: Dynamic import refinements & fixes, optimization (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
guybedford authored Feb 22, 2022
1 parent f3f855e commit 5e6c44a
Show file tree
Hide file tree
Showing 10 changed files with 248 additions and 235 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
node_modules
lib/lexer.wat
lib/lexer.js
dist
package-lock.json
yarn.lock
lib/lexer.emcc.js
34 changes: 21 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,23 @@ import { init, parse } from 'es-module-lexer';
await init;

const source = `
import { name } from 'mod';
import { name } from 'mod\\u1011';
import json from './json.json' assert { type: 'json' }
export var p = 5;
export function q () {
};
// Comments provided to demonstrate edge cases
import /*comment!*/ ('asdf', { assert: { type: 'json' }});
import /*comment!*/ ( 'asdf', { assert: { type: 'json' }});
import /*comment!*/.meta.asdf;
`;

const [imports, exports] = parse(source, 'optional-sourcename');

// Returns "mod"
// Returns "modထ"
imports[0].n
// Returns "mod\u1011"
source.substring(imports[0].s, imports[0].e);
// "s" = start
// "e" = end
Expand All @@ -69,34 +70,41 @@ import { init, parse } from 'es-module-lexer';

// Returns "{ type: 'json' }"
source.substring(imports[1].a, imports[1].se);
// "a" = assert
// "a" = assert, -1 for no assertion

// Returns "p,q"
exports.toString();

// Dynamic imports are indicated by imports[2].d > -1
// In this case the "d" index is the start of the dynamic import
// In this case the "d" index is the start of the dynamic import bracket
// Returns true
imports[2].d > -1;

// Returns "asdf"
// Returns "asdf" (only for string literal dynamic imports)
imports[2].n
// Returns "import /*comment!*/ ( 'asdf', { assert: { type: 'json' } })"
source.substring(imports[2].ss, imports[2].se);
// Returns "'asdf'"
source.substring(imports[2].s, imports[2].e);
// Returns "import /*comment!*/ ("
source.substring(imports[2].d, imports[2].s);
// Returns "import /*comment!*/ ('asdf', { assert: { type: 'json' } })"
source.substring(imports[2].d, imports[2].se + 1);
// Returns "( 'asdf', { assert: { type: 'json' } })"
source.substring(imports[2].d, imports[2].se);
// Returns "{ assert: { type: 'json' } }"
source.substring(imports[2].a, imports[2].e);
// ss is the same as d
// as, ae not used for dynamic imports
source.substring(imports[2].a, imports[2].se - 1);

// For non-string dynamic import expressions:
// - n will be undefined
// - a is currently -1 even if there is an assertion
// - e is currently the character before the closing )

// For nested dynamic imports, the se value of the outer import is -1 as end tracking does not
// currently support nested dynamic immports

// import.meta is indicated by imports[2].d === -2
// Returns true
imports[2].d === -2;
// Returns "import /*comment!*/.meta"
source.substring(imports[2].s, imports[2].e);
// ss and se are the same for import meta
})();
```

Expand Down
130 changes: 73 additions & 57 deletions chompfile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ default-task = "test"

extensions = ['chomp@0.1:npm', 'chomp@0.1:footprint']

[env]
OPT = 'z'

[env-default]
WASI_PATH = '../wasi-sdk-12.0'
EMSDK_PATH = '../emsdk'
Expand Down Expand Up @@ -33,7 +36,37 @@ run = 'node --expose-gc bench/index.js'
[[task]]
target = 'dist/lexer.asm.js'
deps = ['lib/lexer.asm.js']
run = 'terser lib/lexer.asm.js -c -m -o dist/lexer.asm.js'
engine = 'node'
run = '''
import { readFileSync, writeFileSync } from 'fs';
import terser from 'terser';
const jsSource = readFileSync('lib/lexer.asm.js', 'utf8');
const pjson = JSON.parse(readFileSync('package.json', 'utf8'));
const min = process.env.OPT === 'z';
let minified = min && terser.minify(jsSource, {
module: true,
compress: {
ecma: 6,
unsafe: true,
},
output: {
preamble: `/* es-module-lexer ${pjson.version} */`
}
}).code;
if (minified) {
const replacements = [
[/Int\d+Array:(Int\d+Array)/g, '$1'],
[/Uint\d+Array:(Uint\d+Array)/g, '$1']
];
for (const [from, to] of replacements) {
minified = minified.replace(from, to);
}
}
writeFileSync('dist/lexer.asm.js', minified ? minified : jsSource);
'''

[[task]]
target = 'dist/lexer.cjs'
Expand All @@ -44,25 +77,25 @@ run = 'babel dist/lexer.js | terser -c -m -o dist/lexer.cjs'
target = 'dist/lexer.js'
deps = ['lib/lexer.wasm', 'src/lexer.js', 'package.json']
engine = 'node'
env = { MINIFY = '1' }
run = '''
import { readFileSync, writeFileSync } from 'fs';
import terser from 'terser';
const wasmBuffer = readFileSync('lib/lexer.wasm');
const jsSource = readFileSync('src/lexer.js').toString();
const pjson = JSON.parse(readFileSync('package.json').toString());
const jsSource = readFileSync('src/lexer.js', 'utf8');
const pjson = JSON.parse(readFileSync('package.json', 'utf8'));
const jsSourceProcessed = jsSource.replace('WASM_BINARY', wasmBuffer.toString('base64'));
const minified = process.env.MINIFY && terser.minify(jsSourceProcessed, {
const min = process.env.OPT === 'z';
const minified = min && terser.minify(jsSourceProcessed, {
module: true,
output: {
preamble: `/* es-module-lexer ${pjson.version} */`
}
});
}).code;
writeFileSync('dist/lexer.js', minified ? minified.code : jsSourceProcessed);
writeFileSync('dist/lexer.js', minified ? minified : jsSourceProcessed);
'''

[[task]]
Expand All @@ -83,14 +116,12 @@ env = { PYTHONHOME = '' }
run = """
${{ EMSDK_PATH }}/emsdk install 1.40.1-fastcomp
${{ EMSDK_PATH }}/emsdk activate 1.40.1-fastcomp
echo "PYTHON HOME: $PYTHONHOME"
${{ EMSCRIPTEN_BIN }} ./src/lexer.c -o lib/lexer.emcc.js -s WASM=0 -Oz --closure 1 \
${{ EMSCRIPTEN_BIN }} ./src/lexer.c -o lib/lexer.emcc.js -s WASM=0 -O${{OPT}} --closure 1 \
-s EXPORTED_FUNCTIONS="['_parse','_sa','_e','_ri','_re','_is','_ie','_ss','_ip','_se','_ai','_id','_es','_ee','_f','_setSource']" \
-s ERROR_ON_UNDEFINED_SYMBOLS=0 -s SINGLE_FILE=1 -s TOTAL_STACK=15312 -s --separate-asm
-s ERROR_ON_UNDEFINED_SYMBOLS=0 -s SINGLE_FILE=1 -s TOTAL_STACK=4997968 -s --separate-asm -Wno-logical-op-parentheses -Wno-parentheses
rm lib/lexer.emcc.js
# rm lib/lexer.emcc.js
"""

[[task]]
Expand All @@ -103,56 +134,41 @@ run = '''
const wrapper_start = readFileSync('src/lexer.asm.js', 'utf8');
let source = readFileSync('lib/lexer.emcc.asm.js', 'utf8').trim();
// wrapper
const header = 'Module["asm"]=(/** @suppress {uselessCode} */ function(';
if (!source.startsWith(header))
throw new Error('Unexpected source header');
source = 'function asmInit(' + source.slice(header.length);
if (!source.endsWith(')'))
throw new Error('Unexpected source end');
source = source.slice(0, -1);
// function renaming
const fnStart = source.indexOf('EMSCRIPTEN_END_FUNCS');
if (fnStart === -1)
throw new Error('Expected EMSCRIPTEN_END_FUNCS');
// "Manual" tree-shaking
const endFuncs = 'EMSCRIPTEN_END_FUNCS';
const removeFunc = name => [new RegExp(`function ${name}\\([^]+?}\\s*(function|return\\s?{[^{}]+};?\\s*}\\s*$)`), '$1'];
const replacements = [
[/,_/g, ','],
['stackAlloc', 'sta'],
['setSource', 'ses'],
['parse', 'p'],
['__errno_location:Ia,', ''],
[/function Ia\(.+?}function/, 'function'],
['__apply_relocations:Da,', ''],
[/function Da\(.+?}function/, 'function'],
[',free:A', ''],
[/function A\(.+?}function/, 'function'],
['malloc:y,memcpy:F,memset:G,', ''],
[/function G\(.+?}function/, 'function'],
['emscripten_get_sbrk_ptr:Ga,', ''],
[/function Ga\(.+?}function/, 'function'],
[',stackRestore:Ea,stackSave:Ha', ''],
[/function Ea\(.+?}function/, 'function'],
[/function Ha\(.+?}function/, 'function'],
[/Module\["asm"\]=\s?\(\/\*\* @suppress {uselessCode} \*\/ function\(/, 'function asmInit('],
[/\)$/, ''],
[/,\s?_(\w+):/g, ',$1:', null, endFuncs],
['stackAlloc:', 'sta:', null, endFuncs],
['setSource:', 'ses:', null, endFuncs],
['parse:', 'p:', null, endFuncs],
[/___errno_location:\s?(\w+),/, '', removeFunc, endFuncs],
[/_apply_relocations:\s?(\w+),/, '', removeFunc, endFuncs],
[/,\s?free:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?malloc:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?memcpy:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?memset:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?emscripten_get_sbrk_ptr:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?stackRestore:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?stackSave:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s*\w+\s?=\s?env\.\w+\|0,\s*\w+\s?=\s?env\.\w+\|0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0\.0,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+/, ''],
[/,\s*\w+\s?=\s?\d+,\s*\w+\s?=\s?0.0;/, ';'],
[/function \w+\(\w+\)\s?{[^{}]+{[^{}s]+s\(\)[^{}]+}[^{}]+}/, ''],
[/\s*\/\/ EMSCRIPTEN_END_FUNCS\s*/, ''],
[/\s*\/\/ EMSCRIPTEN_START_FUNCS\s*/, ''],
[/function y\(.+?}function/, 'function'],
[/function F\(.+?}function/, 'function'],
[/function S\(.+?}function/, 'function'],
[',f=env.g|0,g=env.h|0,h=0,i=0,j=0,k=0,l=0,m=0,n=0,o=0.0,p=env.a,q=env.b,r=env.c,s=env.d,t=env.e,u=env.f', ''],
[',w=16128,x=0.0', ''],
[/global/g, 'g'],
[/buffer/g, 'f'],
[',env,', ',n,'],
];
for (const [from, to] of replacements) {
const replaced = source.replace(from, to);
if (source === replaced)
throw new Error(`Error on replacement ${from}`);
for (const [from, to, add, after] of replacements) {
const [matched, match] = source.match(from) || [];
if (!matched) {
console.log(source.slice(0, 1000));
throw new Error(`Match not found for ${from} -> ${to}${after ? `, after ${after}` : ''}`);
}
const afterIndex = after ? source.indexOf(after) : 0;
const replaced = source.slice(0, afterIndex) + source.slice(afterIndex).replace(from, to);
if (add) replacements.push(add(match));
source = replaced;
}
Expand Down
40 changes: 19 additions & 21 deletions lib/lexer.asm.js

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions lib/lexer.emcc.asm.js

Large diffs are not rendered by default.

Binary file modified lib/lexer.wasm
Binary file not shown.
36 changes: 17 additions & 19 deletions src/lexer.asm.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
let asm, asmBuffer, allocSize = 4194304, addr;
let asm, asmBuffer, allocSize = 131072, addr;

const isLE = new Uint8Array(new Uint16Array([1]).buffer)[0] === 1;
const copy = new Uint8Array(new Uint16Array([1]).buffer)[0] === 1 ? function (src, outBuf16) {
const len = src.length;
let i = 0;
while (i < len)
outBuf16[i] = src.charCodeAt(i++);
} : function (src, outBuf16) {
const len = src.length;
let i = 0;
while (i < len) {
const ch = src.charCodeAt(i);
outBuf16[i++] = (ch & 0xff) << 8 | ch >>> 8;
}
};
const words = 'xportmportlassetafromssertvoyiedeleinstantyreturdebuggeawaithrwhileforifcatcfinallels';

let source, name;
export function parse (_source, _name = '@') {
Expand All @@ -9,14 +22,15 @@ export function parse (_source, _name = '@') {
if (source.length > allocSize || !asm) {
while (source.length > allocSize) allocSize *= 2;
asmBuffer = new ArrayBuffer(allocSize * 4);
copy(words, new Uint16Array(asmBuffer, 16, words.length));
asm = asmInit({ Int8Array, Int16Array, Int32Array, Uint8Array, Uint16Array }, {}, asmBuffer);
addr = asm.sta(allocSize * 2);
}
const len = source.length + 1;
asm.ses(addr);
asm.sa(len - 1);

(isLE ? copyLE : copyBE)(source, new Uint16Array(asmBuffer, addr, len));
copy(source, new Uint16Array(asmBuffer, addr, len));

if (!asm.p()) {
acornPos = asm.e();
Expand All @@ -39,22 +53,6 @@ export function parse (_source, _name = '@') {
return [imports, exports, !!asm.f()];
}

function copyBE (src, outBuf16) {
const len = src.length;
let i = 0;
while (i < len) {
const ch = src.charCodeAt(i);
outBuf16[i++] = (ch & 0xff) << 8 | ch >>> 8;
}
}

function copyLE (src, outBuf16) {
const len = src.length;
let i = 0;
while (i < len)
outBuf16[i] = src.charCodeAt(i++);
}

/*
* Ported from Acorn
*
Expand Down
Loading

0 comments on commit 5e6c44a

Please sign in to comment.