Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fixes #10134] New simple renderer to generate thumbnails for PDFs #10135

Merged
merged 10 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions geonode/documents/management/commands/sync_geonode_documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#########################################################################
#
# Copyright (C) 2022 OSGeo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################

import logging

from django.core.management.base import BaseCommand
from geonode.documents.models import Document
from geonode.documents.tasks import create_document_thumbnail

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = ("Update documents. For the moment only thumbnails can be updated")

def add_arguments(self, parser):
parser.add_argument(
'--updatethumbnails',
action='store_true',
dest="updatethumbnails",
default=False,
help="Update the document thumbnails.")

def handle(self, *args, **options):
updatethumbnails = options.get('updatethumbnails')
for doc in Document.objects.all():
if updatethumbnails:
if doc.thumbnail_url is None or doc.thumbnail_url == '':
try:
create_document_thumbnail(doc.id)
except Exception:
logger.error(f"[ERROR] Thumbnail for [{doc.name}] couldn't be created")
40 changes: 40 additions & 0 deletions geonode/documents/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################
import os
import io

from PIL import Image
import fitz

from celery.utils.log import get_task_logger

Expand All @@ -31,6 +33,39 @@
logger = get_task_logger(__name__)


class DocumentRenderer():
FILETYPES = ['pdf']

def __init__(self) -> None:
pass

def supports(self, filename):
return self._get_filetype(filename) in self.FILETYPES

def render(self, filename):
content = None
if self.supports(filename):
filetype = self._get_filetype(filename)
render = getattr(self, f'render_{filetype}')
content = render(filename)
return content

def render_pdf(self, filename):
try:
doc = fitz.open(filename)
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
return pix.pil_tobytes(format="PNG")
except Exception as e:
logger.warning(f'Cound not generate thumbnail for {filename}: {e}')
return None

def _get_filetype(self, filname):
return os.path.splitext(filname)[1][1:]


doc_renderer = DocumentRenderer()


@app.task(
bind=True,
name='geonode.documents.tasks.create_document_thumbnail',
Expand Down Expand Up @@ -75,6 +110,11 @@ def create_document_thumbnail(self, object_id):
if image_file is not None:
image_file.close()

elif doc_renderer.supports(document.files[0]):
try:
thumbnail_content = doc_renderer.render(document.files[0])
except Exception as e:
print(e)
if not thumbnail_content:
logger.warning(f"Thumbnail for document #{object_id} empty.")
ResourceBase.objects.filter(id=document.id).update(thumbnail_url=None)
Expand Down
18 changes: 18 additions & 0 deletions geonode/documents/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ def test_non_image_documents_thumbnail(self):
def test_image_documents_thumbnail(self):
self.client.login(username='admin', password='admin')
try:
# test image doc
with open(os.path.join(f"{self.project_root}", "tests/data/img.gif"), "rb") as f:
data = {
'title': "img File Doc",
Expand All @@ -303,8 +304,25 @@ def test_image_documents_thumbnail(self):
self.assertEqual(file.size, (400, 200))
# check thumbnail qualty and extention
self.assertEqual(file.format, 'JPEG')
# test pdf doc
with open(os.path.join(f"{self.project_root}", "tests/data/pdf_doc.pdf"), "rb") as f:
data = {
'title': "Pdf File Doc",
'doc_file': f,
'extension': 'pdf',
}
self.client.post(reverse('document_upload'), data=data)
d = Document.objects.get(title='Pdf File Doc')
self.assertIsNotNone(d.thumbnail_url)
thumb_file = os.path.join(
settings.MEDIA_ROOT, f"thumbs/{os.path.basename(urlparse(d.thumbnail_url).path)}"
)
file = Image.open(thumb_file)
# check thumbnail qualty and extention
self.assertEqual(file.format, 'JPEG')
finally:
Document.objects.filter(title='img File Doc').delete()
Document.objects.filter(title='Pdf File Doc').delete()

def test_upload_document_form_size_limit(self):
form_data = {
Expand Down
Binary file added geonode/documents/tests/data/pdf_doc.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ zipstream-new==1.1.8
schema==0.7.5
rdflib==6.1.1
smart_open==6.2.0
PyMuPDF==1.20.2

# Django Apps
django-allauth==0.51.0
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ install_requires =
schema==0.7.5
rdflib==6.1.1
smart_open==6.2.0
PyMuPDF==1.20.2

# Django Apps
django-allauth==0.51.0
Expand Down