Skip to content

Commit

Permalink
chore(scripts): Fix decode logic, add tests
Browse files Browse the repository at this point in the history
The decode logic is useful for verifying that the encoded trie is sound.
  • Loading branch information
fb55 committed Apr 7, 2022
1 parent bf89c65 commit 6cb56f3
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 79 deletions.
97 changes: 97 additions & 0 deletions scripts/trie/decode-trie.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import { encodeTrie } from "./encode-trie.js";
import { decodeNode } from "./decode-trie.js";

import { getTrie } from "./trie.js";

import xmlMap from "../../maps/xml.json";
import entityMap from "../../maps/entities.json";
import legacyMap from "../../maps/legacy.json";

function decode(decodeMap: number[]) {
const map = {};
decodeNode(decodeMap, map, "", 0);

return map;
}

function mergeMaps(
map: Record<string, string>,
legacy: Record<string, string>
): Record<string, string> {
const merged = {
...legacy,
...Object.fromEntries(
Object.entries(map).map(([key, value]) => [`${key};`, value])
),
};

return merged;
}

describe("decode_trie", () => {
it("should decode an empty node", () =>
expect(decode([0b0000_0000_0000_0000])).toStrictEqual({}));

it("should decode an empty encode", () =>
expect(decode(encodeTrie({}))).toStrictEqual({}));

it("should decode a node with a value", () =>
expect(decode(encodeTrie({ value: "a" }))).toStrictEqual({ "": "a" }));

it("should decode a node with a multi-byte value", () =>
expect(decode(encodeTrie({ value: "ab" }))).toStrictEqual({
"": "ab",
}));

it("should decode a branch of size 1", () =>
expect(
decode(
encodeTrie({
next: new Map([["b".charCodeAt(0), { value: "a" }]]),
})
)
).toStrictEqual({
b: "a",
}));

it("should decode a dictionary of size 2", () =>
expect(
decode(
encodeTrie({
next: new Map([
["A".charCodeAt(0), { value: "a" }],
["b".charCodeAt(0), { value: "B" }],
]),
})
)
).toStrictEqual({
A: "a",
b: "B",
}));

it("should decode a jump table of size 2", () =>
expect(
decode(
encodeTrie({
next: new Map([
["a".charCodeAt(0), { value: "a" }],
["b".charCodeAt(0), { value: "B" }],
]),
})
)
).toStrictEqual({
a: "a",
b: "B",
}));

it("should decode the XML map", () =>
expect(decode(encodeTrie(getTrie(xmlMap, {})))).toStrictEqual(
mergeMaps(xmlMap, {})
));

// Test takes a long time — skipped by default
it.skip("should decode the HTML map", () =>
expect(decode(encodeTrie(getTrie(entityMap, legacyMap)))).toStrictEqual(
mergeMaps(entityMap, legacyMap)
));
});
79 changes: 79 additions & 0 deletions scripts/trie/decode-trie.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
enum BinTrieFlags {
VALUE_LENGTH = 0b1100_0000_0000_0000,
BRANCH_LENGTH = 0b0011_1111_1000_0000,
JUMP_TABLE = 0b0000_0000_0111_1111,
}

export function decodeNode(
decodeMap: number[],
resultMap: Record<string, string>,
prefix: string,
startIndex: number
): void {
const current = decodeMap[startIndex];
const valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;

if (valueLength > 0) {
resultMap[prefix] =
valueLength === 1
? String.fromCharCode(
decodeMap[startIndex] & ~BinTrieFlags.VALUE_LENGTH
)
: valueLength === 2
? String.fromCharCode(decodeMap[startIndex + 1])
: String.fromCharCode(
decodeMap[startIndex + 1],
decodeMap[startIndex + 2]
);
}

const branchLength = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
const jumpOffset = current & BinTrieFlags.JUMP_TABLE;

if (valueLength === 1 || (branchLength === 0 && jumpOffset === 0)) {
return;
}

const branchIdx = startIndex + Math.max(valueLength, 1);

if (branchLength === 0) {
return decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(jumpOffset),
branchIdx
);
}

if (jumpOffset !== 0) {
for (let i = 0; i < branchLength; i++) {
const val = decodeMap[branchIdx + i] - 1;
if (val !== -1) {
const code = jumpOffset + i;

console.log("CODE", String.fromCharCode(code), i, val);

decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(code),
val
);
}
}
} else {
for (let i = 0; i < branchLength; i++) {
console.log(
"BRANCH",
String.fromCharCode(decodeMap[branchIdx + i])
);

decodeNode(
decodeMap,
resultMap,
prefix + String.fromCharCode(decodeMap[branchIdx + i]),
decodeMap[branchIdx + branchLength + i]
);
}
}
}
79 changes: 0 additions & 79 deletions scripts/trie/utils.ts

This file was deleted.

0 comments on commit 6cb56f3

Please sign in to comment.