Skip to content

Commit

Permalink
[Fixes #10134] New simple renderer to generate thumbnails for PDFs (#…
Browse files Browse the repository at this point in the history
…10135) (#10136)

* PDF thumbnail renderer

* - add unit tests

* command to generate thumbnails for docs

* flake fix

* renamed management command

* add requirement to setup.cfg

* make command similar to other sync commands

* removed unused import

* fix flake8

Co-authored-by: marthamareal <marthamareal@gmail.com>

Co-authored-by: Giovanni Allegri <giohappy@gmail.com>
Co-authored-by: marthamareal <marthamareal@gmail.com>
  • Loading branch information
3 people authored Oct 12, 2022
1 parent b17f801 commit 57b84fc
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 0 deletions.
48 changes: 48 additions & 0 deletions geonode/documents/management/commands/sync_geonode_documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#########################################################################
#
# Copyright (C) 2022 OSGeo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################

import logging

from django.core.management.base import BaseCommand
from geonode.documents.models import Document
from geonode.documents.tasks import create_document_thumbnail

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = ("Update documents. For the moment only thumbnails can be updated")

def add_arguments(self, parser):
parser.add_argument(
'--updatethumbnails',
action='store_true',
dest="updatethumbnails",
default=False,
help="Update the document thumbnails.")

def handle(self, *args, **options):
updatethumbnails = options.get('updatethumbnails')
for doc in Document.objects.all():
if updatethumbnails:
if doc.thumbnail_url is None or doc.thumbnail_url == '':
try:
create_document_thumbnail(doc.id)
except Exception:
logger.error(f"[ERROR] Thumbnail for [{doc.name}] couldn't be created")
40 changes: 40 additions & 0 deletions geonode/documents/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################
import os
import io

from PIL import Image
import fitz

from celery.utils.log import get_task_logger

Expand All @@ -31,6 +33,39 @@
logger = get_task_logger(__name__)


class DocumentRenderer():
FILETYPES = ['pdf']

def __init__(self) -> None:
pass

def supports(self, filename):
return self._get_filetype(filename) in self.FILETYPES

def render(self, filename):
content = None
if self.supports(filename):
filetype = self._get_filetype(filename)
render = getattr(self, f'render_{filetype}')
content = render(filename)
return content

def render_pdf(self, filename):
try:
doc = fitz.open(filename)
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
return pix.pil_tobytes(format="PNG")
except Exception as e:
logger.warning(f'Cound not generate thumbnail for {filename}: {e}')
return None

def _get_filetype(self, filname):
return os.path.splitext(filname)[1][1:]


doc_renderer = DocumentRenderer()


@app.task(
bind=True,
name='geonode.documents.tasks.create_document_thumbnail',
Expand Down Expand Up @@ -75,6 +110,11 @@ def create_document_thumbnail(self, object_id):
if image_file is not None:
image_file.close()

elif doc_renderer.supports(document.files[0]):
try:
thumbnail_content = doc_renderer.render(document.files[0])
except Exception as e:
print(e)
if not thumbnail_content:
logger.warning(f"Thumbnail for document #{object_id} empty.")
ResourceBase.objects.filter(id=document.id).update(thumbnail_url=None)
Expand Down
18 changes: 18 additions & 0 deletions geonode/documents/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ def test_non_image_documents_thumbnail(self):
def test_image_documents_thumbnail(self):
self.client.login(username='admin', password='admin')
try:
# test image doc
with open(os.path.join(f"{self.project_root}", "tests/data/img.gif"), "rb") as f:
data = {
'title': "img File Doc",
Expand All @@ -303,8 +304,25 @@ def test_image_documents_thumbnail(self):
self.assertEqual(file.size, (400, 200))
# check thumbnail qualty and extention
self.assertEqual(file.format, 'JPEG')
# test pdf doc
with open(os.path.join(f"{self.project_root}", "tests/data/pdf_doc.pdf"), "rb") as f:
data = {
'title': "Pdf File Doc",
'doc_file': f,
'extension': 'pdf',
}
self.client.post(reverse('document_upload'), data=data)
d = Document.objects.get(title='Pdf File Doc')
self.assertIsNotNone(d.thumbnail_url)
thumb_file = os.path.join(
settings.MEDIA_ROOT, f"thumbs/{os.path.basename(urlparse(d.thumbnail_url).path)}"
)
file = Image.open(thumb_file)
# check thumbnail qualty and extention
self.assertEqual(file.format, 'JPEG')
finally:
Document.objects.filter(title='img File Doc').delete()
Document.objects.filter(title='Pdf File Doc').delete()

def test_upload_document_form_size_limit(self):
form_data = {
Expand Down
Binary file added geonode/documents/tests/data/pdf_doc.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ zipstream-new==1.1.8
schema==0.7.5
rdflib==6.1.1
smart_open==6.2.0
PyMuPDF==1.20.2

# Django Apps
django-allauth==0.51.0
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ install_requires =
schema==0.7.5
rdflib==6.1.1
smart_open==6.2.0
PyMuPDF==1.20.2

# Django Apps
django-allauth==0.51.0
Expand Down

0 comments on commit 57b84fc

Please sign in to comment.