From 0c5d086286f613fa9346dbc43db65aab2d044121 Mon Sep 17 00:00:00 2001 From: itasli Date: Thu, 15 Jun 2023 13:33:19 +0000 Subject: [PATCH 1/2] Fix fitz camelCase deprecation and .PDF not being recognized as pdf file --- ppocr/utils/utility.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py index 18357c8e97..5c53a5edaf 100755 --- a/ppocr/utils/utility.py +++ b/ppocr/utils/utility.py @@ -74,7 +74,7 @@ def get_image_file_list(img_file): def check_and_read(img_path): - if os.path.basename(img_path)[-3:] in ['gif', 'GIF']: + if os.path.basename(img_path)[-3:].lower() in ['gif']: gif = cv2.VideoCapture(img_path) ret, frame = gif.read() if not ret: @@ -85,19 +85,19 @@ def check_and_read(img_path): frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) imgvalue = frame[:, :, ::-1] return imgvalue, True, False - elif os.path.basename(img_path)[-3:] in ['pdf']: + elif os.path.basename(img_path)[-3:].lower() in ['pdf']: import fitz from PIL import Image imgs = [] with fitz.open(img_path) as pdf: - for pg in range(0, pdf.pageCount): + for pg in range(0, pdf.page_count): page = pdf[pg] mat = fitz.Matrix(2, 2) - pm = page.getPixmap(matrix=mat, alpha=False) + pm = page.get_pixmap(matrix=mat, alpha=False) # if width or height > 2000 pixels, don't enlarge the image if pm.width > 2000 or pm.height > 2000: - pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False) + pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False) img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) From 4d7c6e43918b63b3c93cbfd3c8973b6e413dadc2 Mon Sep 17 00:00:00 2001 From: itasli Date: Tue, 4 Jul 2023 14:40:20 +0200 Subject: [PATCH 2/2] refactor get_image_file_list function --- ppocr/utils/utility.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py index 5c53a5edaf..ebff2fe7f6 100755 --- a/ppocr/utils/utility.py +++ b/ppocr/utils/utility.py @@ -59,7 +59,6 @@ def get_image_file_list(img_file): if img_file is None or not os.path.exists(img_file): raise Exception("not found any img file in {}".format(img_file)) - img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'pdf'} if os.path.isfile(img_file) and _check_image_file(img_file): imgs_lists.append(img_file) elif os.path.isdir(img_file): @@ -74,7 +73,7 @@ def get_image_file_list(img_file): def check_and_read(img_path): - if os.path.basename(img_path)[-3:].lower() in ['gif']: + if os.path.basename(img_path)[-3:].lower() == 'gif': gif = cv2.VideoCapture(img_path) ret, frame = gif.read() if not ret: @@ -85,7 +84,7 @@ def check_and_read(img_path): frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) imgvalue = frame[:, :, ::-1] return imgvalue, True, False - elif os.path.basename(img_path)[-3:].lower() in ['pdf']: + elif os.path.basename(img_path)[-3:].lower() == 'pdf': import fitz from PIL import Image imgs = []