From b72600dbdfd58127a7a4e97c38cbb36da6bfcb83 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Mon, 15 Jul 2024 22:18:10 +0200 Subject: [PATCH 1/3] Temporarily downscale 8XXXXXX-L.jpg covers The backing IA item is struggling with traffic, but will be re-enabled once it is switched from .tar to .zip in the next week or so --- openlibrary/coverstore/code.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/openlibrary/coverstore/code.py b/openlibrary/coverstore/code.py index 351be881e56..f7a541f98bf 100644 --- a/openlibrary/coverstore/code.py +++ b/openlibrary/coverstore/code.py @@ -258,6 +258,13 @@ def notfound(): if not value or (value and safeint(value) in config.blocked_covers): return notfound() + if 9_000_000 > int(value) >= 8_000_000 and size == "L": + # This item is currently offline due to heavy traffic; + # Fix incoming in the next ~week; See: + # - https://webarchive.jira.com/browse/PBOX-3879 + # - https://github.com/internetarchive/openlibrary/issues/9560 + size = "M" + # redirect to archive.org cluster for large size and original images whenever possible if size in ("L", "") and self.is_cover_in_cluster(value): url = zipview_url_from_id(int(value), size) From 6eb9854a69e7bf4abe2977fb7250daae1be4bc1e Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Tue, 16 Jul 2024 14:59:58 +0200 Subject: [PATCH 2/3] Fix error on cover id not integer The code previously returned notfound in get_details if the value wasn't an int. Instead, push that logic up to avoid erroring unexpectedly, and avoid having to convert to an int multiple times. --- openlibrary/coverstore/code.py | 39 +++++++++++++++------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/openlibrary/coverstore/code.py b/openlibrary/coverstore/code.py index f7a541f98bf..770029030cc 100644 --- a/openlibrary/coverstore/code.py +++ b/openlibrary/coverstore/code.py @@ -255,10 +255,11 @@ def notfound(): elif key != 'id': value = self.query(category, key, value) - if not value or (value and safeint(value) in config.blocked_covers): + value = safeint(value) + if value is None or value in config.blocked_covers: return notfound() - if 9_000_000 > int(value) >= 8_000_000 and size == "L": + if 9_000_000 > value >= 8_000_000 and size == "L": # This item is currently offline due to heavy traffic; # Fix incoming in the next ~week; See: # - https://webarchive.jira.com/browse/PBOX-3879 @@ -267,20 +268,19 @@ def notfound(): # redirect to archive.org cluster for large size and original images whenever possible if size in ("L", "") and self.is_cover_in_cluster(value): - url = zipview_url_from_id(int(value), size) + url = zipview_url_from_id(value, size) return web.found(url) # covers_0008 batches [_00, _82] are tar'd / zip'd in archive.org items - if isinstance(value, int) or value.isnumeric(): # noqa: SIM102 - if 8_820_000 > int(value) >= 8_000_000: - prefix = f"{size.lower()}_" if size else "" - pid = "%010d" % int(value) - item_id = f"{prefix}covers_{pid[:4]}" - item_tar = f"{prefix}covers_{pid[:4]}_{pid[4:6]}.tar" - item_file = f"{pid}{'-' + size.upper() if size else ''}" - path = f"{item_id}/{item_tar}/{item_file}.jpg" - protocol = web.ctx.protocol - return web.found(f"{protocol}://archive.org/download/{path}") + if 8_820_000 > value >= 8_000_000: + prefix = f"{size.lower()}_" if size else "" + pid = "%010d" % value + item_id = f"{prefix}covers_{pid[:4]}" + item_tar = f"{prefix}covers_{pid[:4]}_{pid[4:6]}.tar" + item_file = f"{pid}{'-' + size.upper() if size else ''}" + path = f"{item_id}/{item_tar}/{item_file}.jpg" + protocol = web.ctx.protocol + return web.found(f"{protocol}://archive.org/download/{path}") d = self.get_details(value, size.lower()) if not d: @@ -336,14 +336,9 @@ def get_ia_cover_url(self, identifier, size="M"): h, ) - def get_details(self, coverid, size=""): - try: - coverid = int(coverid) - except ValueError: - return None - + def get_details(self, coverid: int, size=""): # Use tar index if available to avoid db query. We have 0-6M images in tar balls. - if isinstance(coverid, int) and coverid < 6000000 and size in "sml": + if coverid < 6000000 and size in "sml": path = self.get_tar_filename(coverid, size) if path: @@ -357,12 +352,12 @@ def get_details(self, coverid, size=""): return db.details(coverid) - def is_cover_in_cluster(self, coverid): + def is_cover_in_cluster(self, coverid: int): """Returns True if the cover is moved to archive.org cluster. It is found by looking at the config variable max_coveritem_index. """ try: - return int(coverid) < IMAGES_PER_ITEM * config.get("max_coveritem_index", 0) + return coverid < IMAGES_PER_ITEM * config.get("max_coveritem_index", 0) except (TypeError, ValueError): return False From b1083fb5d341c0ea11f6cafd8c7e9de94ddfad7f Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Tue, 16 Jul 2024 17:45:37 +0200 Subject: [PATCH 3/3] Only limit the .tar files --- openlibrary/coverstore/code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/coverstore/code.py b/openlibrary/coverstore/code.py index 770029030cc..66218a1da1a 100644 --- a/openlibrary/coverstore/code.py +++ b/openlibrary/coverstore/code.py @@ -259,7 +259,7 @@ def notfound(): if value is None or value in config.blocked_covers: return notfound() - if 9_000_000 > value >= 8_000_000 and size == "L": + if 8_820_000 > value >= 8_000_000 and size == "L": # This item is currently offline due to heavy traffic; # Fix incoming in the next ~week; See: # - https://webarchive.jira.com/browse/PBOX-3879