Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Temporarily downscale 8XXXXXX-L.jpg covers #9567

Merged
merged 3 commits into from
Jul 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 23 additions & 21 deletions openlibrary/coverstore/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,25 +255,32 @@ def notfound():
elif key != 'id':
value = self.query(category, key, value)

if not value or (value and safeint(value) in config.blocked_covers):
value = safeint(value)
if value is None or value in config.blocked_covers:
return notfound()

if 8_820_000 > value >= 8_000_000 and size == "L":
# This item is currently offline due to heavy traffic;
# Fix incoming in the next ~week; See:
# - https://webarchive.jira.com/browse/PBOX-3879
# - https://github.com/internetarchive/openlibrary/issues/9560
size = "M"

# redirect to archive.org cluster for large size and original images whenever possible
if size in ("L", "") and self.is_cover_in_cluster(value):
url = zipview_url_from_id(int(value), size)
url = zipview_url_from_id(value, size)
return web.found(url)

# covers_0008 batches [_00, _82] are tar'd / zip'd in archive.org items
if isinstance(value, int) or value.isnumeric(): # noqa: SIM102
if 8_820_000 > int(value) >= 8_000_000:
prefix = f"{size.lower()}_" if size else ""
pid = "%010d" % int(value)
item_id = f"{prefix}covers_{pid[:4]}"
item_tar = f"{prefix}covers_{pid[:4]}_{pid[4:6]}.tar"
item_file = f"{pid}{'-' + size.upper() if size else ''}"
path = f"{item_id}/{item_tar}/{item_file}.jpg"
protocol = web.ctx.protocol
return web.found(f"{protocol}://archive.org/download/{path}")
if 8_820_000 > value >= 8_000_000:
prefix = f"{size.lower()}_" if size else ""
pid = "%010d" % value
item_id = f"{prefix}covers_{pid[:4]}"
item_tar = f"{prefix}covers_{pid[:4]}_{pid[4:6]}.tar"
item_file = f"{pid}{'-' + size.upper() if size else ''}"
path = f"{item_id}/{item_tar}/{item_file}.jpg"
protocol = web.ctx.protocol
return web.found(f"{protocol}://archive.org/download/{path}")

d = self.get_details(value, size.lower())
if not d:
Expand Down Expand Up @@ -329,14 +336,9 @@ def get_ia_cover_url(self, identifier, size="M"):
h,
)

def get_details(self, coverid, size=""):
try:
coverid = int(coverid)
except ValueError:
return None

def get_details(self, coverid: int, size=""):
# Use tar index if available to avoid db query. We have 0-6M images in tar balls.
if isinstance(coverid, int) and coverid < 6000000 and size in "sml":
if coverid < 6000000 and size in "sml":
path = self.get_tar_filename(coverid, size)

if path:
Expand All @@ -350,12 +352,12 @@ def get_details(self, coverid, size=""):

return db.details(coverid)

def is_cover_in_cluster(self, coverid):
def is_cover_in_cluster(self, coverid: int):
"""Returns True if the cover is moved to archive.org cluster.
It is found by looking at the config variable max_coveritem_index.
"""
try:
return int(coverid) < IMAGES_PER_ITEM * config.get("max_coveritem_index", 0)
return coverid < IMAGES_PER_ITEM * config.get("max_coveritem_index", 0)
except (TypeError, ValueError):
return False

Expand Down
Loading