Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid re-parsing global images that failed decoding (issue 18042, PR 17428 follow-up) #18047

Merged
merged 1 commit into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 31 additions & 16 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -793,30 +793,42 @@ class PartialEvaluator {
args = [objId, w, h];
operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent);

// For large (at least 500x500) or more complex images that we'll cache
// globally, check if the image is still cached locally on the main-thread
// to avoid having to re-parse the image (since that can be slow).
if (
cacheGlobally &&
(w * h > 250000 || dict.has("SMask") || dict.has("Mask"))
) {
const localLength = await this.handler.sendWithPromise("commonobj", [
objId,
"CopyLocalImage",
{ imageRef },
]);

if (localLength) {
if (cacheGlobally) {
if (this.globalImageCache.hasDecodeFailed(imageRef)) {
this.globalImageCache.setData(imageRef, {
objId,
fn: OPS.paintImageXObject,
args,
optionalContent,
byteSize: 0, // Temporary entry, to avoid `setData` returning early.
byteSize: 0, // Data is `null`, since decoding failed previously.
});
this.globalImageCache.addByteSize(imageRef, localLength);

this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
return;
}

// For large (at least 500x500) or more complex images that we'll cache
// globally, check if the image is still cached locally on the main-thread
// to avoid having to re-parse the image (since that can be slow).
if (w * h > 250000 || dict.has("SMask") || dict.has("Mask")) {
const localLength = await this.handler.sendWithPromise("commonobj", [
objId,
"CopyLocalImage",
{ imageRef },
]);

if (localLength) {
this.globalImageCache.setData(imageRef, {
objId,
fn: OPS.paintImageXObject,
args,
optionalContent,
byteSize: 0, // Temporary entry, to avoid `setData` returning early.
});
this.globalImageCache.addByteSize(imageRef, localLength);
return;
}
}
}

PDFImage.buildImage({
Expand Down Expand Up @@ -846,6 +858,9 @@ class PartialEvaluator {
.catch(reason => {
warn(`Unable to decode image "${objId}": "${reason}".`);

if (imageRef) {
this.globalImageCache.addDecodeFailed(imageRef);
}
return this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
});

Expand Down
23 changes: 17 additions & 6 deletions src/core/image_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
unreachable,
warn,
} from "../shared/util.js";
import { RefSetCache } from "./primitives.js";
import { RefSet, RefSetCache } from "./primitives.js";

class BaseLocalCache {
constructor(options) {
Expand Down Expand Up @@ -178,6 +178,8 @@ class GlobalImageCache {

static MAX_BYTE_SIZE = 5 * MAX_IMAGE_SIZE_TO_CACHE;

#decodeFailedSet = new RefSet();

constructor() {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
Expand All @@ -189,19 +191,19 @@ class GlobalImageCache {
this._imageCache = new RefSetCache();
}

get _byteSize() {
get #byteSize() {
let byteSize = 0;
for (const imageData of this._imageCache) {
byteSize += imageData.byteSize;
}
return byteSize;
}

get _cacheLimitReached() {
get #cacheLimitReached() {
if (this._imageCache.size < GlobalImageCache.MIN_IMAGES_TO_CACHE) {
return false;
}
if (this._byteSize < GlobalImageCache.MAX_BYTE_SIZE) {
if (this.#byteSize < GlobalImageCache.MAX_BYTE_SIZE) {
return false;
}
return true;
Expand All @@ -218,12 +220,20 @@ class GlobalImageCache {
if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) {
return false;
}
if (!this._imageCache.has(ref) && this._cacheLimitReached) {
if (!this._imageCache.has(ref) && this.#cacheLimitReached) {
return false;
}
return true;
}

addDecodeFailed(ref) {
this.#decodeFailedSet.put(ref);
}

hasDecodeFailed(ref) {
return this.#decodeFailedSet.has(ref);
}

/**
* PLEASE NOTE: Must be called *after* the `setData` method.
*/
Expand Down Expand Up @@ -265,7 +275,7 @@ class GlobalImageCache {
if (this._imageCache.has(ref)) {
return;
}
if (this._cacheLimitReached) {
if (this.#cacheLimitReached) {
warn("GlobalImageCache.setData - cache limit reached.");
return;
}
Expand All @@ -274,6 +284,7 @@ class GlobalImageCache {

clear(onlyData = false) {
if (!onlyData) {
this.#decodeFailedSet.clear();
this._refCache.clear();
}
this._imageCache.clear();
Expand Down
2 changes: 1 addition & 1 deletion src/display/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -2806,7 +2806,7 @@ class WorkerTransport {

for (const pageProxy of this.#pageCache.values()) {
for (const [, data] of pageProxy.objs) {
if (data.ref !== imageRef) {
if (data?.ref !== imageRef) {
continue;
}
if (!data.dataLen) {
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
!issue17679.pdf
!issue17679_2.pdf
!issue18030.pdf
!issue18042.pdf
!issue14953.pdf
!issue15367.pdf
!issue15372.pdf
Expand Down
Binary file added test/pdfs/issue18042.pdf
Binary file not shown.
59 changes: 59 additions & 0 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -4203,6 +4203,65 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
firstStatsOverall = null;
});

it("caches image resources at the document/page level, with corrupt images (issue 18042)", async function () {
const { NUM_PAGES_THRESHOLD } = GlobalImageCache;

const loadingTask = getDocument(buildGetDocumentParams("issue18042.pdf"));
const pdfDoc = await loadingTask.promise;
let checkedGlobalDecodeFailed = false;

for (let i = 1; i <= pdfDoc.numPages; i++) {
const pdfPage = await pdfDoc.getPage(i);
const viewport = pdfPage.getViewport({ scale: 1 });

const canvasAndCtx = CanvasFactory.create(
viewport.width,
viewport.height
);
const renderTask = pdfPage.render({
canvasContext: canvasAndCtx.context,
viewport,
});

await renderTask.promise;
const opList = renderTask.getOperatorList();
// The canvas is no longer necessary, since we only care about
// the image-data below.
CanvasFactory.destroy(canvasAndCtx);

const { commonObjs, objs } = pdfPage;
const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject);
const [objId] = opList.argsArray[imgIndex];

if (i < NUM_PAGES_THRESHOLD) {
expect(objId).toEqual(`img_p${i - 1}_1`);

expect(objs.has(objId)).toEqual(true);
expect(commonObjs.has(objId)).toEqual(false);
} else {
expect(objId).toEqual(
`g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1`
);

expect(objs.has(objId)).toEqual(false);
expect(commonObjs.has(objId)).toEqual(true);
}

// Ensure that the actual image data is identical for all pages.
const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs;
const imgData = objsPool.get(objId);

expect(imgData).toBe(null);

if (i === NUM_PAGES_THRESHOLD) {
checkedGlobalDecodeFailed = true;
}
}
expect(checkedGlobalDecodeFailed).toBeTruthy();

await loadingTask.destroy();
});

it("render for printing, with `printAnnotationStorage` set", async function () {
async function getPrintData(printAnnotationStorage = null) {
const canvasAndCtx = CanvasFactory.create(
Expand Down