From c5f92437f7ae45844f4265f7d55b961c55767f6e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 4 May 2024 15:59:07 +0200 Subject: [PATCH] Avoid re-parsing global images that failed decoding (issue 18042, PR 17428 follow-up) For images that failed to decode once we want to avoid a pointless round-trip to the main-thread, which could otherwise happen for globally cached images. --- src/core/evaluator.js | 47 ++++++++++++++++++++----------- src/core/image_utils.js | 23 +++++++++++---- src/display/api.js | 2 +- test/pdfs/.gitignore | 1 + test/pdfs/issue18042.pdf | Bin 0 -> 1247 bytes test/unit/api_spec.js | 59 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 109 insertions(+), 23 deletions(-) create mode 100644 test/pdfs/issue18042.pdf diff --git a/src/core/evaluator.js b/src/core/evaluator.js index e1bd19434137d..8a822954ac421 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -793,30 +793,42 @@ class PartialEvaluator { args = [objId, w, h]; operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent); - // For large (at least 500x500) or more complex images that we'll cache - // globally, check if the image is still cached locally on the main-thread - // to avoid having to re-parse the image (since that can be slow). - if ( - cacheGlobally && - (w * h > 250000 || dict.has("SMask") || dict.has("Mask")) - ) { - const localLength = await this.handler.sendWithPromise("commonobj", [ - objId, - "CopyLocalImage", - { imageRef }, - ]); - - if (localLength) { + if (cacheGlobally) { + if (this.globalImageCache.hasDecodeFailed(imageRef)) { this.globalImageCache.setData(imageRef, { objId, fn: OPS.paintImageXObject, args, optionalContent, - byteSize: 0, // Temporary entry, to avoid `setData` returning early. + byteSize: 0, // Data is `null`, since decoding failed previously. }); - this.globalImageCache.addByteSize(imageRef, localLength); + + this._sendImgData(objId, /* imgData = */ null, cacheGlobally); return; } + + // For large (at least 500x500) or more complex images that we'll cache + // globally, check if the image is still cached locally on the main-thread + // to avoid having to re-parse the image (since that can be slow). + if (w * h > 250000 || dict.has("SMask") || dict.has("Mask")) { + const localLength = await this.handler.sendWithPromise("commonobj", [ + objId, + "CopyLocalImage", + { imageRef }, + ]); + + if (localLength) { + this.globalImageCache.setData(imageRef, { + objId, + fn: OPS.paintImageXObject, + args, + optionalContent, + byteSize: 0, // Temporary entry, to avoid `setData` returning early. + }); + this.globalImageCache.addByteSize(imageRef, localLength); + return; + } + } } PDFImage.buildImage({ @@ -846,6 +858,9 @@ class PartialEvaluator { .catch(reason => { warn(`Unable to decode image "${objId}": "${reason}".`); + if (imageRef) { + this.globalImageCache.addDecodeFailed(imageRef); + } return this._sendImgData(objId, /* imgData = */ null, cacheGlobally); }); diff --git a/src/core/image_utils.js b/src/core/image_utils.js index 021f4373fcfd4..110277c9e4f16 100644 --- a/src/core/image_utils.js +++ b/src/core/image_utils.js @@ -19,7 +19,7 @@ import { unreachable, warn, } from "../shared/util.js"; -import { RefSetCache } from "./primitives.js"; +import { RefSet, RefSetCache } from "./primitives.js"; class BaseLocalCache { constructor(options) { @@ -178,6 +178,8 @@ class GlobalImageCache { static MAX_BYTE_SIZE = 5 * MAX_IMAGE_SIZE_TO_CACHE; + #decodeFailedSet = new RefSet(); + constructor() { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( @@ -189,7 +191,7 @@ class GlobalImageCache { this._imageCache = new RefSetCache(); } - get _byteSize() { + get #byteSize() { let byteSize = 0; for (const imageData of this._imageCache) { byteSize += imageData.byteSize; @@ -197,11 +199,11 @@ class GlobalImageCache { return byteSize; } - get _cacheLimitReached() { + get #cacheLimitReached() { if (this._imageCache.size < GlobalImageCache.MIN_IMAGES_TO_CACHE) { return false; } - if (this._byteSize < GlobalImageCache.MAX_BYTE_SIZE) { + if (this.#byteSize < GlobalImageCache.MAX_BYTE_SIZE) { return false; } return true; @@ -218,12 +220,20 @@ class GlobalImageCache { if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) { return false; } - if (!this._imageCache.has(ref) && this._cacheLimitReached) { + if (!this._imageCache.has(ref) && this.#cacheLimitReached) { return false; } return true; } + addDecodeFailed(ref) { + this.#decodeFailedSet.put(ref); + } + + hasDecodeFailed(ref) { + return this.#decodeFailedSet.has(ref); + } + /** * PLEASE NOTE: Must be called *after* the `setData` method. */ @@ -265,7 +275,7 @@ class GlobalImageCache { if (this._imageCache.has(ref)) { return; } - if (this._cacheLimitReached) { + if (this.#cacheLimitReached) { warn("GlobalImageCache.setData - cache limit reached."); return; } @@ -274,6 +284,7 @@ class GlobalImageCache { clear(onlyData = false) { if (!onlyData) { + this.#decodeFailedSet.clear(); this._refCache.clear(); } this._imageCache.clear(); diff --git a/src/display/api.js b/src/display/api.js index aaedfcf3befaf..644021e495dd4 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -2806,7 +2806,7 @@ class WorkerTransport { for (const pageProxy of this.#pageCache.values()) { for (const [, data] of pageProxy.objs) { - if (data.ref !== imageRef) { + if (data?.ref !== imageRef) { continue; } if (!data.dataLen) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 1f1198aa80d8c..e458cc9085a09 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -55,6 +55,7 @@ !issue17679.pdf !issue17679_2.pdf !issue18030.pdf +!issue18042.pdf !issue14953.pdf !issue15367.pdf !issue15372.pdf diff --git a/test/pdfs/issue18042.pdf b/test/pdfs/issue18042.pdf new file mode 100644 index 0000000000000000000000000000000000000000..26c3b9c39895e9b9ce7bced2dcf9b7542f3cc606 GIT binary patch literal 1247 zcmc&!OODe(5Ss5E+!SI3o&@(ze{(k;v*kFN7W=5|EW-KOH zgG0y&%*%I61Tzv!%;f~a(7-a&hI+eoh~%nN;DPz9$ZEI(w{i8Fl{Ks1tk-S0O@>`C z?XU}0DlAnEyzwz#U{;8}TvJ?&UeDTm6e|`L8MZ`6Sj$zF5_4U{tBYiYscQbu7dCX8 zU|~oV?336lNYgzv2Ypzl=Ac`3Y7V;fEr($-S6G2LoC}2mOl3xTe2(QrO~GZlfwiho zER6N7>sj&HqX-t$k4zjg21ru9`G5Xl5@DjY*LJVpxj|TDfs}iJ(}+W%;7Y3 z2bgCa$73#a= NUM_PAGES_THRESHOLD ? commonObjs : objs; + const imgData = objsPool.get(objId); + + expect(imgData).toBe(null); + + if (i === NUM_PAGES_THRESHOLD) { + checkedGlobalDecodeFailed = true; + } + } + expect(checkedGlobalDecodeFailed).toBeTruthy(); + + await loadingTask.destroy(); + }); + it("render for printing, with `printAnnotationStorage` set", async function () { async function getPrintData(printAnnotationStorage = null) { const canvasAndCtx = CanvasFactory.create(