diff --git a/test/pdfs/issue18099_reduced.pdf b/test/pdfs/issue18099_reduced.pdf new file mode 100644 index 00000000000000..02b7ac0ee40303 Binary files /dev/null and b/test/pdfs/issue18099_reduced.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 44f9de06b93730..9b287d59581f22 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -3418,6 +3418,18 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) await loadingTask.destroy(); }); + it("gets text content, correctly handling documents with toUnicode cmaps that omit leading zeros on hex-encoded UTF-16", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("issue18099_reduced.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); + const text = mergeText(items); + expect(text).toEqual("Hello world!"); + }); it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () { if (isNodeJS) {