Skip to content

Commit

Permalink
fix(encode): Handle edge-case with surrogate pairs
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Apr 7, 2022
1 parent aa1dae5 commit 413c48b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 20 deletions.
42 changes: 22 additions & 20 deletions src/encode-trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,36 +30,38 @@ export function encodeHTMLTrieRe(regExp: RegExp, str: string): string {

while ((match = regExp.exec(str)) !== null) {
const i = match.index;
ret += str.substring(lastIdx, i);
const char = str.charCodeAt(i);
let next = htmlTrie.get(char);

if (next != null) {
if (typeof next !== "string") {
// We are in a branch. Try to match the next char.
if (i + 1 < str.length) {
const value =
typeof next.n === "number"
? next.n === str.charCodeAt(i + 1)
? next.o
: undefined
: next.n.get(str.charCodeAt(i + 1));
if (typeof next === "object") {
// We are in a branch. Try to match the next char.
if (i + 1 < str.length) {
const nextChar = str.charCodeAt(i + 1);
const value =
typeof next.n === "number"
? next.n === nextChar
? next.o
: undefined
: next.n.get(nextChar);

if (value !== undefined) {
ret += str.substring(lastIdx, i) + value;
lastIdx = regExp.lastIndex += 1;
continue;
}
if (value !== undefined) {
ret += value;
lastIdx = regExp.lastIndex += 1;
continue;
}

// If we have a character without a value, use a numeric entitiy.
next = next.v ?? `&#x${char.toString(16)};`;
}

ret += str.substring(lastIdx, i) + next;
next = next.v;
}

// We might have a tree node without a value; skip and use a numeric entitiy.
if (next !== undefined) {
ret += next;
lastIdx = i + 1;
} else {
const cp = getCodePoint(str, i);
ret += `${str.substring(lastIdx, i)}&#x${cp.toString(16)};`;
ret += `&#x${cp.toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIdx = regExp.lastIndex += Number(cp !== char);
}
Expand Down
3 changes: 3 additions & 0 deletions src/encode.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ describe("Encode->decode test", () => {

it("should encode trailing parts of entities", () =>
expect(entities.encodeHTML("\ud835")).toBe("&#xd835;"));

it("should encode surrogate pair with first surrogate equivalent of entity, without corresponding entity", () =>
expect(entities.encodeHTML("\u{1d4a4}")).toBe("&#x1d4a4;"));
});

describe("encodeNonAsciiHTML", () => {
Expand Down

0 comments on commit 413c48b

Please sign in to comment.