Skip to content

Commit

Permalink
Merge pull request #18493 from calixteman/fix_utf16_alt_text
Browse files Browse the repository at this point in the history
[Editor] Correctly save a non-ascii alt text
  • Loading branch information
calixteman authored Jul 24, 2024
2 parents cb73751 + c306562 commit 2efa3e4
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 27 deletions.
33 changes: 11 additions & 22 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ import {
escapeString,
getInheritableProperty,
getRotationMatrix,
isAscii,
isNumberArray,
lookupMatrix,
lookupNormalRect,
lookupRect,
numberToString,
stringToAsciiOrUTF16BE,
stringToUTF16String,
} from "./core_utils.js";
import {
Expand Down Expand Up @@ -2133,9 +2133,12 @@ class WidgetAnnotation extends Annotation {
value,
};

const encoder = val =>
isAscii(val) ? val : stringToUTF16String(val, /* bigEndian = */ true);
dict.set("V", Array.isArray(value) ? value.map(encoder) : encoder(value));
dict.set(
"V",
Array.isArray(value)
? value.map(stringToAsciiOrUTF16BE)
: stringToAsciiOrUTF16BE(value)
);
this.amendSavedDict(annotationStorage, dict);

const maybeMK = this._getMKDict(rotation);
Expand Down Expand Up @@ -3852,21 +3855,13 @@ class FreeTextAnnotation extends MarkupAnnotation {
freetext.set("Rect", rect);
const da = `/Helv ${fontSize} Tf ${getPdfColor(color, /* isFill */ true)}`;
freetext.set("DA", da);
freetext.set(
"Contents",
isAscii(value)
? value
: stringToUTF16String(value, /* bigEndian = */ true)
);
freetext.set("Contents", stringToAsciiOrUTF16BE(value));
freetext.set("F", 4);
freetext.set("Border", [0, 0, 0]);
freetext.set("Rotate", rotation);

if (user) {
freetext.set(
"T",
isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true)
);
freetext.set("T", stringToAsciiOrUTF16BE(user));
}

if (apRef || ap) {
Expand Down Expand Up @@ -4600,10 +4595,7 @@ class HighlightAnnotation extends MarkupAnnotation {
highlight.set("CA", opacity);

if (user) {
highlight.set(
"T",
isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true)
);
highlight.set("T", stringToAsciiOrUTF16BE(user));
}

if (apRef || ap) {
Expand Down Expand Up @@ -4885,10 +4877,7 @@ class StampAnnotation extends MarkupAnnotation {
stamp.set("Rotate", rotation);

if (user) {
stamp.set(
"T",
isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true)
);
stamp.set("T", stringToAsciiOrUTF16BE(user));
}

if (apRef || ap) {
Expand Down
5 changes: 5 additions & 0 deletions src/core/core_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,10 @@ function getNewAnnotationsMap(annotationStorage) {
return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null;
}

function stringToAsciiOrUTF16BE(str) {
return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true);
}

function isAscii(str) {
return /^[\x00-\x7F]*$/.test(str);
}
Expand Down Expand Up @@ -699,6 +703,7 @@ export {
readUint16,
readUint32,
recoverJsURL,
stringToAsciiOrUTF16BE,
stringToUTF16HexString,
stringToUTF16String,
toRomanNumerals,
Expand Down
9 changes: 5 additions & 4 deletions src/core/struct_tree.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import { AnnotationPrefix, stringToPDFString, warn } from "../shared/util.js";
import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js";
import { NumberTree } from "./name_number_tree.js";
import { stringToAsciiOrUTF16BE } from "./core_utils.js";
import { writeObject } from "./writer.js";

const MAX_DEPTH = 40;
Expand Down Expand Up @@ -316,19 +317,19 @@ class StructTreeRoot {
tagDict.set("S", Name.get(type));

if (title) {
tagDict.set("T", title);
tagDict.set("T", stringToAsciiOrUTF16BE(title));
}
if (lang) {
tagDict.set("Lang", lang);
}
if (alt) {
tagDict.set("Alt", alt);
tagDict.set("Alt", stringToAsciiOrUTF16BE(alt));
}
if (expanded) {
tagDict.set("E", expanded);
tagDict.set("E", stringToAsciiOrUTF16BE(expanded));
}
if (actualText) {
tagDict.set("ActualText", actualText);
tagDict.set("ActualText", stringToAsciiOrUTF16BE(actualText));
}

await this.#updateParentTag({
Expand Down
47 changes: 46 additions & 1 deletion test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2524,6 +2524,21 @@ describe("api", function () {
alt: "Hello World",
},
});
// Test if an alt-text using utf-16 is correctly handled.
// The Mahjong tile code is 0x1F000.
pdfDoc.annotationStorage.setValue("pdfjs_internal_editor_1", {
annotationType: AnnotationEditorType.STAMP,
rect: [128, 400, 148, 420],
rotation: 0,
bitmap: structuredClone(bitmap),
bitmapId: "im2",
pageIndex: 0,
structTreeParentId: "p3R_mc14",
accessibilityData: {
type: "Figure",
alt: "Γειά σου with a Mahjong tile 🀀",
},
});

const data = await pdfDoc.saveDocument();
await loadingTask.destroy();
Expand All @@ -2532,7 +2547,7 @@ describe("api", function () {
pdfDoc = await loadingTask.promise;
const page = await pdfDoc.getPage(1);
const tree = await page.getStructTree();
const [predecessor, leaf] = findNode(
let [predecessor, leaf] = findNode(
null,
tree,
0,
Expand Down Expand Up @@ -2560,6 +2575,36 @@ describe("api", function () {
alt: "Hello World",
});

let count = 0;
[predecessor, leaf] = findNode(null, tree, 0, node => {
if (node.role === "Figure") {
count += 1;
return count === 2;
}
return false;
});

expect(predecessor).toEqual({
role: "Span",
children: [
{
type: "content",
id: "p3R_mc14",
},
],
});

expect(leaf).toEqual({
role: "Figure",
children: [
{
type: "annotation",
id: "pdfjs_internal_id_481R",
},
],
alt: "Γειά σου with a Mahjong tile 🀀",
});

await loadingTask.destroy();
});

Expand Down

0 comments on commit 2efa3e4

Please sign in to comment.