Skip to content

Commit

Permalink
refactor: Inline replacers, xml code map
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Mar 26, 2022
1 parent c86d836 commit ef4958a
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 46 deletions.
9 changes: 9 additions & 0 deletions src/encode.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ describe("Encode->decode test", () => {
"data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAALAAABAAEAAAIBRAA7";
expect(entities.decode(entities.encode(data))).toBe(data);
});

it("should HTML encode all ASCII characters", () => {
for (let i = 0; i < 128; i++) {
const char = String.fromCharCode(i);
const encoded = entities.encodeHTML(char);
const decoded = entities.decodeHTML(encoded);
expect(decoded).toBe(char);
}
});
});

describe("encodeNonAsciiHTML", () => {
Expand Down
56 changes: 10 additions & 46 deletions src/encode.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import xmlMap from "./maps/xml.json";
import { encodeHTMLTrieRe, getCodePoint } from "./encode-trie";

import htmlMap from "./maps/entities.json";
const htmlReplacer = /[\t\n!-,./:-@[-`\f{-}$\x80-\uFFFF]/g;
const xmlReplacer = /["&'<>$\x80-\uFFFF]/g;
const xmlInvalidChars = /[&<>'"]/g;

const htmlReplacer = getCharRegExp(htmlMap, true);
const xmlReplacer = getCharRegExp(xmlMap, true);
const xmlInvalidChars = getCharRegExp(xmlMap, false);

const xmlCodeMap = new Map(
Object.keys(xmlMap).map((k) => [
(xmlMap as Record<string, string>)[k].charCodeAt(0),
`&${k};`,
])
);
const xmlCodeMap = new Map([
[34, "&quot;"],
[38, "&amp;"],
[39, "&apos;"],
[60, "&lt;"],
[62, "&gt;"],
]);

/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
Expand Down Expand Up @@ -73,40 +71,6 @@ export function encodeNonAsciiHTML(data: string): string {
return encodeHTMLTrieRe(xmlReplacer, data);
}

function getCharRegExp(map: Record<string, string>, nonAscii: boolean): RegExp {
// Collect the start characters of all entities
const chars = Object.keys(map)
.map((k) => `\\${map[k].charAt(0)}`)
.filter((v) => !nonAscii || v.charCodeAt(1) < 128)
.sort((a, b) => a.charCodeAt(1) - b.charCodeAt(1))
// Remove duplicates
.filter((v, i, a) => v !== a[i + 1]);

// Add ranges to single characters.
for (let start = 0; start < chars.length - 1; start++) {
// Find the end of a run of characters
let end = start;
while (
end < chars.length - 1 &&
chars[end].charCodeAt(1) + 1 === chars[end + 1].charCodeAt(1)
) {
end += 1;
}

const count = 1 + end - start;

// We want to replace at least three characters
if (count < 3) continue;

chars.splice(start, count, `${chars[start]}-${chars[end]}`);
}

return new RegExp(
`[${chars.join("")}${nonAscii ? "\\x80-\\uFFFF" : ""}]`,
"g"
);
}

/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using numeric hexadecimal reference (eg. `&#xfc;`).
Expand Down

0 comments on commit ef4958a

Please sign in to comment.