-
-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(scripts): Fix decode logic, add tests
The decode logic is useful for verifying that the encoded trie is sound.
- Loading branch information
Showing
3 changed files
with
176 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import { encodeTrie } from "./encode-trie.js"; | ||
import { decodeNode } from "./decode-trie.js"; | ||
|
||
import { getTrie } from "./trie.js"; | ||
|
||
import xmlMap from "../../maps/xml.json"; | ||
import entityMap from "../../maps/entities.json"; | ||
import legacyMap from "../../maps/legacy.json"; | ||
|
||
function decode(decodeMap: number[]) { | ||
const map = {}; | ||
decodeNode(decodeMap, map, "", 0); | ||
|
||
return map; | ||
} | ||
|
||
function mergeMaps( | ||
map: Record<string, string>, | ||
legacy: Record<string, string> | ||
): Record<string, string> { | ||
const merged = { | ||
...legacy, | ||
...Object.fromEntries( | ||
Object.entries(map).map(([key, value]) => [`${key};`, value]) | ||
), | ||
}; | ||
|
||
return merged; | ||
} | ||
|
||
describe("decode_trie", () => { | ||
it("should decode an empty node", () => | ||
expect(decode([0b0000_0000_0000_0000])).toStrictEqual({})); | ||
|
||
it("should decode an empty encode", () => | ||
expect(decode(encodeTrie({}))).toStrictEqual({})); | ||
|
||
it("should decode a node with a value", () => | ||
expect(decode(encodeTrie({ value: "a" }))).toStrictEqual({ "": "a" })); | ||
|
||
it("should decode a node with a multi-byte value", () => | ||
expect(decode(encodeTrie({ value: "ab" }))).toStrictEqual({ | ||
"": "ab", | ||
})); | ||
|
||
it("should decode a branch of size 1", () => | ||
expect( | ||
decode( | ||
encodeTrie({ | ||
next: new Map([["b".charCodeAt(0), { value: "a" }]]), | ||
}) | ||
) | ||
).toStrictEqual({ | ||
b: "a", | ||
})); | ||
|
||
it("should decode a dictionary of size 2", () => | ||
expect( | ||
decode( | ||
encodeTrie({ | ||
next: new Map([ | ||
["A".charCodeAt(0), { value: "a" }], | ||
["b".charCodeAt(0), { value: "B" }], | ||
]), | ||
}) | ||
) | ||
).toStrictEqual({ | ||
A: "a", | ||
b: "B", | ||
})); | ||
|
||
it("should decode a jump table of size 2", () => | ||
expect( | ||
decode( | ||
encodeTrie({ | ||
next: new Map([ | ||
["a".charCodeAt(0), { value: "a" }], | ||
["b".charCodeAt(0), { value: "B" }], | ||
]), | ||
}) | ||
) | ||
).toStrictEqual({ | ||
a: "a", | ||
b: "B", | ||
})); | ||
|
||
it("should decode the XML map", () => | ||
expect(decode(encodeTrie(getTrie(xmlMap, {})))).toStrictEqual( | ||
mergeMaps(xmlMap, {}) | ||
)); | ||
|
||
// Test takes a long time — skipped by default | ||
it.skip("should decode the HTML map", () => | ||
expect(decode(encodeTrie(getTrie(entityMap, legacyMap)))).toStrictEqual( | ||
mergeMaps(entityMap, legacyMap) | ||
)); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
enum BinTrieFlags { | ||
VALUE_LENGTH = 0b1100_0000_0000_0000, | ||
BRANCH_LENGTH = 0b0011_1111_1000_0000, | ||
JUMP_TABLE = 0b0000_0000_0111_1111, | ||
} | ||
|
||
export function decodeNode( | ||
decodeMap: number[], | ||
resultMap: Record<string, string>, | ||
prefix: string, | ||
startIndex: number | ||
): void { | ||
const current = decodeMap[startIndex]; | ||
const valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; | ||
|
||
if (valueLength > 0) { | ||
resultMap[prefix] = | ||
valueLength === 1 | ||
? String.fromCharCode( | ||
decodeMap[startIndex] & ~BinTrieFlags.VALUE_LENGTH | ||
) | ||
: valueLength === 2 | ||
? String.fromCharCode(decodeMap[startIndex + 1]) | ||
: String.fromCharCode( | ||
decodeMap[startIndex + 1], | ||
decodeMap[startIndex + 2] | ||
); | ||
} | ||
|
||
const branchLength = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; | ||
const jumpOffset = current & BinTrieFlags.JUMP_TABLE; | ||
|
||
if (valueLength === 1 || (branchLength === 0 && jumpOffset === 0)) { | ||
return; | ||
} | ||
|
||
const branchIdx = startIndex + Math.max(valueLength, 1); | ||
|
||
if (branchLength === 0) { | ||
return decodeNode( | ||
decodeMap, | ||
resultMap, | ||
prefix + String.fromCharCode(jumpOffset), | ||
branchIdx | ||
); | ||
} | ||
|
||
if (jumpOffset !== 0) { | ||
for (let i = 0; i < branchLength; i++) { | ||
const val = decodeMap[branchIdx + i] - 1; | ||
if (val !== -1) { | ||
const code = jumpOffset + i; | ||
|
||
console.log("CODE", String.fromCharCode(code), i, val); | ||
|
||
decodeNode( | ||
decodeMap, | ||
resultMap, | ||
prefix + String.fromCharCode(code), | ||
val | ||
); | ||
} | ||
} | ||
} else { | ||
for (let i = 0; i < branchLength; i++) { | ||
console.log( | ||
"BRANCH", | ||
String.fromCharCode(decodeMap[branchIdx + i]) | ||
); | ||
|
||
decodeNode( | ||
decodeMap, | ||
resultMap, | ||
prefix + String.fromCharCode(decodeMap[branchIdx + i]), | ||
decodeMap[branchIdx + branchLength + i] | ||
); | ||
} | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.