From bf650c8e0c6bbf41cb7a476b7439ddbf6f5f6f24 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Wed, 20 Nov 2024 23:18:20 +0800 Subject: [PATCH 01/12] fix: compatible with albumentations=1.4.* --- cnocr/data_utils/transforms.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/cnocr/data_utils/transforms.py b/cnocr/data_utils/transforms.py index a09085a..cbf627b 100644 --- a/cnocr/data_utils/transforms.py +++ b/cnocr/data_utils/transforms.py @@ -138,14 +138,13 @@ def apply(self, img, **params): return img -class RandomStretchAug(alb.Resize): +class RandomStretchAug(ImageOnlyTransform): """保持高度不变的情况下,对图像的宽度进行随机拉伸""" + def __init__( - self, min_ratio=0.9, max_ratio=1.1, min_width=8, always_apply=False, p=1 - ): - super(RandomStretchAug, self).__init__( - height=0, width=0, always_apply=always_apply, p=p - ) + self, min_ratio=0.9, max_ratio=1.1, min_width=8, always_apply=False, p=1 + ): + super().__init__(always_apply=always_apply, p=p) self.min_width = min_width self.min_ratio = min_ratio self.max_ratio = max_ratio @@ -171,7 +170,7 @@ def __init__(self, crop_size, always_apply=False, p=1.0): always_apply (bool): Whether to always apply the crop. Defaults to False. p (float): The probability of applying the crop. Defaults to 1.0. """ - super(CustomRandomCrop, self).__init__(always_apply, p) + super().__init__(always_apply=always_apply, p=p) self.crop_size = crop_size def cal_params(self, img): @@ -210,7 +209,7 @@ class TransparentOverlay(ImageOnlyTransform): def __init__( self, max_height_ratio, max_width_ratio, alpha, always_apply=False, p=1.0 ): - super(TransparentOverlay, self).__init__(always_apply, p) + super().__init__(always_apply=always_apply, p=p) self.max_height_ratio = max_height_ratio self.max_width_ratio = max_width_ratio self.alpha = alpha @@ -316,9 +315,9 @@ def __call__(self, ori_image: torch.Tensor) -> torch.Tensor: _train_alb_transform = alb.Compose( [ - CustomRandomCrop((8, 10), p=0.8), + CustomRandomCrop(crop_size=(8, 10), always_apply=False, p=0.8), alb.OneOf([Erosion((2, 3)), Dilation((2, 3))], p=0.1), - TransparentOverlay(1.0, 0.1, alpha=0.4, p=0.2), # 半透明的矩形框覆盖 + TransparentOverlay(1.0, 0.1, alpha=0.4, always_apply=False, p=0.2), # 半透明的矩形框覆盖 alb.Affine(shear={"x": (0, 3), "y": (-3, 0)}, cval=(255, 255, 255), p=0.03), alb.ShiftScaleRotate( shift_limit_x=(0, 0.04), @@ -382,9 +381,9 @@ def __call__(self, ori_image: torch.Tensor) -> torch.Tensor: _ft_alb_transform = alb.Compose( [ - CustomRandomCrop((4, 4), p=0.8), + CustomRandomCrop(crop_size=(4, 4), always_apply=False, p=0.8), alb.OneOf([Erosion((2, 3)), Dilation((2, 3))], p=0.1), - TransparentOverlay(1.0, 0.1, alpha=0.4, p=0.2), # 半透明的矩形框覆盖 + TransparentOverlay(1.0, 0.1, alpha=0.4, always_apply=False, p=0.2), # 半透明的矩形框覆盖 alb.RandomBrightnessContrast(0.1, 0.1, True, p=0.1), alb.ImageCompression(95, p=0.3), alb.GaussNoise(20, p=0.2), @@ -413,7 +412,7 @@ def __call__(self, ori_image: torch.Tensor) -> torch.Tensor: _test_alb_transform = alb.Compose( [ - CustomRandomCrop((6, 8), p=0.8), + CustomRandomCrop(crop_size=(6, 8), p=0.8), ToSingleChannelGray(always_apply=True), CustomNormalize(always_apply=True), # alb.Normalize(0.456045, 0.224567, always_apply=True), From 1c535d22986dc8c590c653da68696999cacfb823 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Wed, 20 Nov 2024 23:35:42 +0800 Subject: [PATCH 02/12] fix: transform_func fails to serialize when multiple processes are used --- cnocr/dataset_utils.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/cnocr/dataset_utils.py b/cnocr/dataset_utils.py index 188baa2..361b9b8 100644 --- a/cnocr/dataset_utils.py +++ b/cnocr/dataset_utils.py @@ -22,6 +22,8 @@ from datasets import Dataset, Image import numpy as np import torch +import os +from pathlib import Path from .consts import IMG_STANDARD_HEIGHT from .utils import read_tsv_file, pad_img_seq @@ -41,6 +43,25 @@ def preprocess(img): return img.resize(target_w_h) +def apply_transforms(img, transforms): + """Apply transforms to a single image.""" + img = np.array(img) + if img.ndim == 2: + img = np.expand_dims(img, 0) + return transforms(torch.from_numpy(img)) + + +def create_transform_func(transforms): + """Create a transform function that can be pickled.""" + def transform_func(examples): + outs = [] + for img in examples['image']: + outs.append(apply_transforms(img, transforms)) + examples['transformed_image'] = outs + return examples + return transform_func + + def gen_dataset( index_fp, img_folder=None, transforms=None, mode='train', num_workers=None ) -> Dataset: @@ -80,18 +101,7 @@ def map_func(examples): dataset = dataset.map(map_func, batched=True, num_proc=num_workers) if transforms is not None: - - def transform_func(examples): - outs = [] - for img in examples['image']: - img = np.array(img) - if img.ndim == 2: - img = np.expand_dims(img, 0) - outs.append(transforms(torch.from_numpy(img))) - examples['transformed_image'] = outs - return examples - - dataset.set_transform(transform_func) + dataset.set_transform(create_transform_func(transforms)) return dataset From 1f50a11abad02ba8d9fc5f04845fc9bc4214198c Mon Sep 17 00:00:00 2001 From: breezedeus Date: Wed, 27 Nov 2024 22:49:28 +0800 Subject: [PATCH 03/12] use PIL instead of cv2 to read images, to support dir with Chinese --- cnocr/utils.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/cnocr/utils.py b/cnocr/utils.py index a1fd5ed..8e34ef0 100644 --- a/cnocr/utils.py +++ b/cnocr/utils.py @@ -23,12 +23,11 @@ from pathlib import Path import logging import platform -import zipfile import requests from typing import Union, Any, Tuple, List, Optional, Dict from tqdm import tqdm -from PIL import Image +from PIL import Image, ImageOps import cv2 import numpy as np import torch @@ -272,16 +271,18 @@ def read_img(path: Union[str, Path], gray=True) -> np.ndarray: * when `gray==True`, return a gray image, with dim [height, width, 1], with values range from 0 to 255 * when `gray==False`, return a color image, with dim [height, width, 3], with values range from 0 to 255 """ + try: + img = Image.open(path) + img = ImageOps.exif_transpose(img) # 识别旋转后的图片(pillow不会自动识别) + except Exception as e: + raise FileNotFoundError(f'Error loading image: {path}') + if gray: - img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) - if img is None: - raise FileNotFoundError(f'Error loading image: {path}') - return np.expand_dims(img, -1) + img = img.convert('L') + return np.expand_dims(np.array(img), -1) else: - img = cv2.imread(path) - if img is None: - raise FileNotFoundError(f'Error loading image: {path}') - return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = img.convert('RGB') + return np.array(img) def save_img(img: Union[Tensor, np.ndarray], path): From 6b19fbd5b7cc721b2b62ae2528c34973ba010440 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Wed, 27 Nov 2024 22:52:58 +0800 Subject: [PATCH 04/12] Integrated latest PPOCRv4 models based on RapidOCR for faster inference --- cnocr/app.py | 2 +- cnocr/cli.py | 4 +- cnocr/cn_ocr.py | 9 ++- cnocr/consts.py | 12 +++ cnocr/ppocr/__init__.py | 1 + cnocr/ppocr/consts.py | 40 ++++++---- cnocr/ppocr/rapid_recognizer.py | 135 ++++++++++++++++++++++++++++++++ 7 files changed, 179 insertions(+), 24 deletions(-) create mode 100644 cnocr/ppocr/rapid_recognizer.py diff --git a/cnocr/app.py b/cnocr/app.py index c8b643e..268d7d6 100644 --- a/cnocr/app.py +++ b/cnocr/app.py @@ -129,7 +129,7 @@ def main(): det_models.append(('naive_det', 'onnx')) det_models.sort() det_model_name = st.sidebar.selectbox( - '选择检测模型', det_models, index=det_models.index(('ch_PP-OCRv3_det', 'onnx')) + '选择检测模型', det_models, index=det_models.index(('ch_PP-OCRv4_det', 'onnx')) ) all_models = list(REC_AVAILABLE_MODELS.all_models()) diff --git a/cnocr/cli.py b/cnocr/cli.py index e49851f..763f61c 100644 --- a/cnocr/cli.py +++ b/cnocr/cli.py @@ -215,8 +215,8 @@ def visualize_example(example, fp_prefix): '-d', '--det-model-name', type=str, - default='ch_PP-OCRv3_det', - help='检测模型名称。默认值为 ch_PP-OCRv3_det', + default='ch_PP-OCRv4_det', + help='检测模型名称。默认值为 ch_PP-OCRv4_det', ) @click.option( '--det-model-backend', diff --git a/cnocr/cn_ocr.py b/cnocr/cn_ocr.py index 294a895..1d9779d 100644 --- a/cnocr/cn_ocr.py +++ b/cnocr/cn_ocr.py @@ -35,7 +35,7 @@ from .utils import data_dir, read_img from .line_split import line_split from .recognizer import Recognizer -from .ppocr import PPRecognizer, PP_SPACE +from .ppocr import PPRecognizer, RapidRecognizer, PP_SPACE logger = logging.getLogger(__name__) @@ -64,7 +64,7 @@ def __init__( self, rec_model_name: str = 'densenet_lite_136-gru', *, - det_model_name: str = 'ch_PP-OCRv3_det', + det_model_name: str = 'ch_PP-OCRv4_det', cand_alphabet: Optional[Union[Collection, str]] = None, context: str = 'cpu', # ['cpu', 'gpu', 'cuda'] rec_model_fp: Optional[str] = None, @@ -83,7 +83,7 @@ def __init__( Args: rec_model_name (str): 识别模型名称。默认为 `densenet_lite_136-gru` - det_model_name (str): 检测模型名称。默认为 `ch_PP-OCRv3_det` + det_model_name (str): 检测模型名称。默认为 `ch_PP-OCRv4_det` cand_alphabet (Optional[Union[Collection, str]]): 待识别字符所在的候选集合。默认为 `None`,表示不限定识别字符范围 context (str): 'cpu', or 'gpu'。表明预测时是使用CPU还是GPU。默认为 `cpu`。 此参数仅在 `model_backend=='pytorch'` 时有效。 @@ -143,7 +143,8 @@ def __init__( if self.rec_space == REC_AVAILABLE_MODELS.CNOCR_SPACE: rec_cls = Recognizer elif self.rec_space == PP_SPACE: - rec_cls = PPRecognizer + rec_name = REC_AVAILABLE_MODELS.get_value(rec_model_name, rec_model_backend, 'recognizer') + rec_cls = RapidRecognizer if rec_name == 'RapidRecognizer' else PPRecognizer if rec_vocab_fp is not None: logger.warning('param `vocab_fp` is invalid for %s models' % PP_SPACE) else: diff --git a/cnocr/consts.py b/cnocr/consts.py index 3e35c64..1b52295 100644 --- a/cnocr/consts.py +++ b/cnocr/consts.py @@ -335,6 +335,18 @@ def get_vocab_fp( ) return CN_VOCAB_FP + def get_value(self, model_name, model_backend, key) -> Optional[Any]: + if (model_name, model_backend) in self.CNOCR_MODELS: + info = self.CNOCR_MODELS[(model_name, model_backend)] + elif (model_name, model_backend) in self.OUTER_MODELS: + info = self.OUTER_MODELS[(model_name, model_backend)] + else: + logger.warning( + 'no url is found for model %s' % ((model_name, model_backend),) + ) + return None + return info.get(key) + def get_epoch(self, model_name, model_backend) -> Optional[int]: if (model_name, model_backend) in self.CNOCR_MODELS: return self.CNOCR_MODELS[(model_name, model_backend)]['epoch'] diff --git a/cnocr/ppocr/__init__.py b/cnocr/ppocr/__init__.py index 1ecd659..f1e1249 100644 --- a/cnocr/ppocr/__init__.py +++ b/cnocr/ppocr/__init__.py @@ -3,5 +3,6 @@ from ..consts import AVAILABLE_MODELS from .consts import MODEL_LABELS_FILE_DICT, PP_SPACE from .pp_recognizer import PPRecognizer +from .rapid_recognizer import RapidRecognizer AVAILABLE_MODELS.register_models(MODEL_LABELS_FILE_DICT, space=PP_SPACE) diff --git a/cnocr/ppocr/consts.py b/cnocr/ppocr/consts.py index 122e657..c377f7a 100644 --- a/cnocr/ppocr/consts.py +++ b/cnocr/ppocr/consts.py @@ -20,29 +20,35 @@ from pathlib import Path -VOCAB_DIR = Path(__file__).parent / 'utils' +VOCAB_DIR = Path(__file__).parent / "utils" MODEL_LABELS_FILE_DICT = { - ('ch_PP-OCRv3', 'onnx'): { - 'vocab_fp': VOCAB_DIR / 'ppocr_keys_v1.txt', # 简体中英文 - 'url': 'ch_PP-OCRv3_rec_infer-onnx.zip', + ("ch_PP-OCRv3", "onnx"): { + "vocab_fp": VOCAB_DIR / "ppocr_keys_v1.txt", # 简体中英文 + "url": "ch_PP-OCRv3_rec_infer-onnx.zip", }, - ('ch_ppocr_mobile_v2.0', 'onnx'): { - 'vocab_fp': VOCAB_DIR / 'ppocr_keys_v1.txt', - 'url': 'ch_ppocr_mobile_v2.0_rec_infer-onnx.zip', + ("ch_ppocr_mobile_v2.0", "onnx"): { + "vocab_fp": VOCAB_DIR / "ppocr_keys_v1.txt", + "url": "ch_ppocr_mobile_v2.0_rec_infer-onnx.zip", }, - ('en_PP-OCRv3', 'onnx'): { - 'vocab_fp': VOCAB_DIR / 'en_dict.txt', # 英文 - 'url': 'en_PP-OCRv3_rec_infer-onnx.zip', + ("en_number_mobile_v2.0", "onnx"): { + "vocab_fp": VOCAB_DIR / "en_dict.txt", + "url": "en_number_mobile_v2.0_rec_infer-onnx.zip", }, - ('en_number_mobile_v2.0', 'onnx'): { - 'vocab_fp': VOCAB_DIR / 'en_dict.txt', - 'url': 'en_number_mobile_v2.0_rec_infer-onnx.zip', + ("en_PP-OCRv3", "onnx"): { + "vocab_fp": VOCAB_DIR / "en_dict.txt", # 英文 + "url": "en_PP-OCRv3_rec_infer-onnx.zip", + "recognizer": "RapidRecognizer", + "repo": "breezedeus/cnocr-ppocr-en_PP-OCRv3", }, - ('chinese_cht_PP-OCRv3', 'onnx'): { - 'vocab_fp': VOCAB_DIR / 'chinese_cht_dict.txt', # 繁体中文 - 'url': 'chinese_cht_PP-OCRv3_rec_infer-onnx.zip', + ("ch_PP-OCRv4", "onnx"): { + "recognizer": "RapidRecognizer", + "repo": "breezedeus/cnocr-ppocr-ch_PP-OCRv4", + }, + ("ch_PP-OCRv4_server", "onnx"): { + "recognizer": "RapidRecognizer", + "repo": "breezedeus/cnocr-ppocr-ch_PP-OCRv4_server", }, } -PP_SPACE = 'ppocr' +PP_SPACE = "ppocr" diff --git a/cnocr/ppocr/rapid_recognizer.py b/cnocr/ppocr/rapid_recognizer.py new file mode 100644 index 0000000..66e0c68 --- /dev/null +++ b/cnocr/ppocr/rapid_recognizer.py @@ -0,0 +1,135 @@ +# coding: utf-8 +# Copyright (C) 2022-2024, [Breezedeus](https://github.com/breezedeus). +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. + +import os +import logging +from typing import Union, Optional, List, Tuple +from pathlib import Path + +import numpy as np +from rapidocr_onnxruntime.ch_ppocr_rec.text_recognize import TextRecognizer +from cnstd.utils import prepare_model_files + +from ..utils import data_dir, read_img +from ..recognizer import Recognizer +from .consts import PP_SPACE +from ..consts import MODEL_VERSION, AVAILABLE_MODELS + + +logger = logging.getLogger(__name__) + + +class RapidRecognizer(Recognizer): + def __init__( + self, + model_name: str = "ch_PP-OCRv3", + *, + model_fp: Optional[str] = None, + root: Union[str, Path] = data_dir(), + context: str = "cpu", # ['cpu', 'gpu'] + rec_image_shape: str = "3, 48, 320", + **kwargs + ): + """ + 基于 rapidocr_onnxruntime 的文本识别器。 + + Args: + model_name (str): 模型名称。默认为 `ch_PP-OCRv3` + model_fp (Optional[str]): 如果不使用系统自带的模型,可以通过此参数直接指定所使用的模型文件('.onnx' 文件) + root (Union[str, Path]): 模型文件所在的根目录 + context (str): 使用的设备。默认为 `cpu`,可选 `gpu` + rec_image_shape (str): 输入图片尺寸,无需更改使用默认值即可。默认值:`"3, 32, 320"` + **kwargs: 其他参数 + """ + self.rec_image_shape = [int(v) for v in rec_image_shape.split(",")] + self._model_name = model_name + self._model_backend = "onnx" + use_gpu = context.lower() not in ("cpu", "mps") + + self._assert_and_prepare_model_files(model_fp, root) + + config = { + "use_cuda": use_gpu, + "rec_img_shape": self.rec_image_shape, + "rec_batch_num": 6, + "model_path": self._model_fp, + } + self.recognizer = TextRecognizer(config) + + def _assert_and_prepare_model_files(self, model_fp, root): + if model_fp is not None and not os.path.isfile(model_fp): + raise FileNotFoundError("can not find model file %s" % model_fp) + + if model_fp is not None: + self._model_fp = model_fp + return + + root = os.path.join(root, MODEL_VERSION) + self._model_dir = os.path.join(root, PP_SPACE, self._model_name) + model_fp = os.path.join(self._model_dir, "%s_rec_infer.onnx" % self._model_name) + if not os.path.isfile(model_fp): + logger.warning("can not find model file %s" % model_fp) + if (self._model_name, self._model_backend) not in AVAILABLE_MODELS: + raise NotImplementedError( + "%s is not a downloadable model" + % ((self._model_name, self._model_backend),) + ) + remote_repo = AVAILABLE_MODELS.get_value( + self._model_name, self._model_backend, "repo" + ) + model_fp = prepare_model_files(model_fp, remote_repo) + + self._model_fp = model_fp + logger.info("use model: %s" % self._model_fp) + + def recognize( + self, img_list: List[Union[str, Path, np.ndarray]], batch_size: int = 6 + ) -> List[Tuple[str, float]]: + """ + 识别图片中的文字。 + Args: + img_list: 支持以下格式的图片数据: + + 图片路径 + + 已经从图片文件中读入的数据 + batch_size: 待处理图片数据的批大小。 + + Returns: + 列表,每个元素是对应图片的识别结果,由 (text, score) 组成,其中: + + text: 识别出的文本 + + score: 识别结果的得分 + """ + if not isinstance(img_list, (list, tuple)): + img_list = [img_list] + + self.recognizer.rec_batch_num = batch_size + + img_data_list = [] + for img in img_list: + if isinstance(img, (str, Path)): + img = read_img(img, gray=False) + if len(img.shape) == 3 and img.shape[2] == 3: + img = img[..., ::-1] # RGB to BGR + img_data_list.append(img) + + results, _ = self.recognizer(img_data_list) + return results + + def recognize_one_line( + self, img: Union[str, Path, np.ndarray] + ) -> Tuple[str, float]: + """ + 识别图片中的一行文字。 + Args: + img: 支持以下格式的图片数据: + + 图片路径 + + 已经从图片文件中读入的数据 + + Returns: + (text, score): + + text: 识别出的文本 + + score: 识别结果的得分 + """ + results = self.recognize([img]) + return results[0] From b94bcdf7e2e0d428be07f518fe36908afa2cf156 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Wed, 27 Nov 2024 23:09:52 +0800 Subject: [PATCH 05/12] update deps --- docs/requirements.txt | 3 ++- requirements.in | 5 +++-- setup.py | 7 ++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 77920cd..a00972a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -20,7 +20,8 @@ torchmetrics>=0.9.0 pillow>=5.3.0 onnx onnxruntime -cnstd>=1.2.3.6 +cnstd>=1.2.5 +rapidocr_onnxruntime<1.4 # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements.in b/requirements.in index 7b79bba..562cf14 100644 --- a/requirements.in +++ b/requirements.in @@ -10,9 +10,10 @@ numpy pytorch-lightning>=2.0.0 wandb datasets[vision] -albumentations +albumentations<1.4.0 torchmetrics>=0.9.0 pillow>=5.3.0 onnx onnxruntime -cnstd>=1.2.3.6 +rapidocr_onnxruntime<1.4 +cnstd>=1.2.5 diff --git a/setup.py b/setup.py index 8b7dd6b..7000262 100644 --- a/setup.py +++ b/setup.py @@ -47,11 +47,12 @@ "torchmetrics", "pillow>=5.3.0", "onnx", - "cnstd>=1.2.3.4", + "cnstd>=1.2.5", + "rapidocr_onnxruntime<1.4", ] extras_require = { - "ort-cpu": ["onnxruntime"], - "ort-gpu": ["onnxruntime-gpu"], + "ort-cpu": ["onnxruntime", "rapidocr_onnxruntime<1.4"], + "ort-gpu": ["onnxruntime-gpu", "rapidocr_onnxruntime<1.4"], "serve": ["uvicorn[standard]", "fastapi", "python-multipart", "pydantic"], "dev": ["albumentations", "pip-tools", "pytest", "datasets[vision]"], } From 08f111198ff76f7aeee72ac1d7e806b17d12c4c6 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Wed, 27 Nov 2024 23:10:01 +0800 Subject: [PATCH 06/12] update docs --- README.md | 17 ++++++++++++++--- README_en.md | 21 ++++++++++++++++----- docs/RELEASE.md | 19 +++++++++++++++++++ docs/models.md | 6 ++++-- 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index dcc0af7..3bb6eba 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,16 @@ --- +### [Update 2024.11.28]:发布 V2.3.1 + +主要变更: + +* 基于 RapidOCR 集成 PPOCRv4 最新版 OCR 模型,提供更多的模型选择 + * 新增支持 PP-OCRv4 识别模型,包括标准版和服务器版 +* 修改读文件实现方式,支持 Windows 的中文路径 +* 修复Bug:当使用多个进程时,transform_func 无法序列化 +* 修复Bug:与 albumentations=1.4.* 兼容 + ### [Update 2023.12.24]:发布 V2.3 主要变更: @@ -376,13 +386,13 @@ print(ocr_out) | ------------------------------------------------------------ | ------------ | --------- | ------------ | ------------ | ------------------------------ | -------------------- | | db_shufflenet_v2 | √ | X | cnocr | 18 M | 简体中文、繁体中文、英文、数字 | √ | | **db_shufflenet_v2_small** | √ | X | cnocr | 12 M | 简体中文、繁体中文、英文、数字 | √ | -| [db_shufflenet_v2_tiny](https://mp.weixin.qq.com/s/fHPNoGyo72EFApVhEgR6Nw) | √ | X | cnocr | 7.5 M | 简体中文、繁体中文、英文、数字 | √ | | db_mobilenet_v3 | √ | X | cnocr | 16 M | 简体中文、繁体中文、英文、数字 | √ | | db_mobilenet_v3_small | √ | X | cnocr | 7.9 M | 简体中文、繁体中文、英文、数字 | √ | | db_resnet34 | √ | X | cnocr | 86 M | 简体中文、繁体中文、英文、数字 | √ | | db_resnet18 | √ | X | cnocr | 47 M | 简体中文、繁体中文、英文、数字 | √ | +| ch_PP-OCRv4_det | X | √ | ppocr | 4.5 M | 简体中文、繁体中文、英文、数字 | √ | +| ch_PP-OCRv4_det_server | X | √ | ppocr | 108 M | 简体中文、繁体中文、英文、数字 | √ | | ch_PP-OCRv3_det | X | √ | ppocr | 2.3 M | 简体中文、繁体中文、英文、数字 | √ | -| ch_PP-OCRv2_det | X | √ | ppocr | 2.2 M | 简体中文、繁体中文、英文、数字 | √ | | **en_PP-OCRv3_det** | X | √ | ppocr | 2.3 M | **英文**、数字 | √ | @@ -419,6 +429,8 @@ print(ocr_out) | **number-densenet_lite_136-fc** 🆕 | √ | √ | cnocr | 2.7 M | **纯数字**(仅包含 `0~9` 十个数字) | X | | **number-densenet_lite_136-gru** 🆕
([星球会员](https://t.zsxq.com/FEYZRJQ)专享) | √ | √ | cnocr | 5.5 M | **纯数字**(仅包含 `0~9` 十个数字) | X | | **number-densenet_lite_666-gru_large** 🆕
(购买链接:[B站](https://gf.bilibili.com/item/detail/1104055055)、[Lemon Squeezy](https://ocr.lemonsqueezy.com/)) | √ | √ | cnocr | 55 M | **纯数字**(仅包含 `0~9` 十个数字) | X | +| ch_PP-OCRv4 | X | √ | ppocr | 10 M | 简体中文、英文、数字 | √ | +| ch_PP-OCRv4_server | X | √ | ppocr | 86 M | 简体中文、英文、数字 | √ | | ch_PP-OCRv3 | X | √ | ppocr | 10 M | 简体中文、英文、数字 | √ | | ch_ppocr_mobile_v2.0 | X | √ | ppocr | 4.2 M | 简体中文、英文、数字 | √ | | en_PP-OCRv3 | X | √ | ppocr | 8.5 M | **英文**、数字 | √ | @@ -452,4 +464,3 @@ print(ocr_out) --- 官方代码库:[https://github.com/breezedeus/cnocr](https://github.com/breezedeus/cnocr)。 - diff --git a/README_en.md b/README_en.md index d9e2aa0..23b737d 100644 --- a/README_en.md +++ b/README_en.md @@ -38,6 +38,16 @@ --- +### [Update 2024.11.28]: Release of V2.3.1 + +Major Changes: + +* Based on RapidOCR, integrate the latest version of PPOCRv4 OCR models, providing more model options + * Add support for PP-OCRv4 recognition models, including standard and server versions +* Modify the implementation of reading files to support Chinese paths on Windows +* Fix bug: When using multiple processes, the transform_func cannot be serialized +* Fix bug: Compatible with albumentations=1.4.* + ### [Update 2023.12.24]: Release of V2.3 Major Changes: @@ -300,13 +310,13 @@ Refer to [CnSTD](https://github.com/breezedeus/CnSTD?tab=readme-ov-file#%E5%B7%B | **en_PP-OCRv3_det** | X | √ | ppocr | 2.3 M | **English**、Numbers | √ | | db_shufflenet_v2 | √ | X | cnocr | 18 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | | **db_shufflenet_v2_small** | √ | X | cnocr | 12 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | -| [db_shufflenet_v2_tiny](https://mp.weixin.qq.com/s/fHPNoGyo72EFApVhEgR6Nw) | √ | X | cnocr | 7.5 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | | db_mobilenet_v3 | √ | X | cnocr | 16 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | | db_mobilenet_v3_small | √ | X | cnocr | 7.9 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | | db_resnet34 | √ | X | cnocr | 86 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | | db_resnet18 | √ | X | cnocr | 47 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | +| ch_PP-OCRv4_det | X | √ | ppocr | 4.5 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | +| ch_PP-OCRv4_det_server | X | √ | ppocr | 108 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | | ch_PP-OCRv3_det | X | √ | ppocr | 2.3 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | -| ch_PP-OCRv2_det | X | √ | ppocr | 2.2 M | Simplified Chinese, Traditional Chinese, English, Numbers | √ | @@ -342,12 +352,13 @@ For more details, see: [Available Models](https://cnocr.readthedocs.io/zh-cn/sta | **number-densenet_lite_136-fc** 🆕 | √ | √ | cnocr | 2.7 M | **Pure Numeric** (contains only the ten digits `0~9`) | X | | **number-densenet_lite_136-gru** 🆕
([Planet Members](https://t.zsxq.com/FEYZRJQ) Only) | √ | √ | cnocr | 5.5 M | **Pure Numeric** (contains only the ten digits `0~9`) | X | | **number-densenet_lite_666-gru_large** 🆕
([Purchase Link](https://ocr.lemonsqueezy.com)) | √ | √ | cnocr | 56 M | **Pure Numeric** (contains only the ten digits `0~9`) | X | +| ch_PP-OCRv4 | X | √ | ppocr | 10 M | Simplified Chinese, English, Numbers | √ | +| ch_PP-OCRv4_server | X | √ | ppocr | 86 M | Simplified Chinese, English, Numbers | √ | +| ch_PP-OCRv3 | X | √ | ppocr | 10 M | Simplified Chinese, English, Numbers | √ | +| ch_ppocr_mobile_v2.0 | X | √ | ppocr | 4.2 M | Simplified Chinese, English, Numbers | √ | | **en_PP-OCRv3** | X | √ | ppocr | 8.5 M | **English**、Numbers | √ | | **en_number_mobile_v2.0** | X | √ | ppocr | 1.8 M | **English**、Numbers | √ | | **chinese_cht_PP-OCRv3** | X | √ | ppocr | 11 M | **Traditional Chinese**, English, Numbers | X | -| densenet_lite_136-gru | √ | √ | cnocr | 12 M | Simplified Chinese, English, Numbers | X | -| ch_PP-OCRv3 | X | √ | ppocr | 10 M | Simplified Chinese, English, Numbers | √ | -| ch_ppocr_mobile_v2.0 | X | √ | ppocr | 4.2 M | Simplified Chinese, English, Numbers | √ | ## Future work diff --git a/docs/RELEASE.md b/docs/RELEASE.md index 097b3d0..ad2b23c 100644 --- a/docs/RELEASE.md +++ b/docs/RELEASE.md @@ -1,5 +1,24 @@ # Release Notes +### Update 2024.11.28:发布 V2.3.1 + +主要变更: + +* 基于 RapidOCR 集成 PPOCRv4 最新版 OCR 模型,提供更多的模型选择 + * 新增支持 PP-OCRv4 识别模型,包括标准版和服务器版 +* 修改读文件实现方式,支持 Windows 的中文路径 +* 修复Bug:当使用多个进程时,transform_func 无法序列化 +* 修复Bug:与 albumentations=1.4.* 兼容 + +Major Changes: + +* Based on RapidOCR, integrate the latest version of PPOCRv4 OCR models, providing more model options + * Add support for PP-OCRv4 recognition models, including standard and server versions +* Modify the implementation of reading files to support Chinese paths on Windows +* Fix bug: When using multiple processes, the transform_func cannot be serialized +* Fix bug: Compatible with albumentations=1.4.* + + ### Update 2024.06.22:发布 V2.3.0.3 主要变更: diff --git a/docs/models.md b/docs/models.md index ef87d64..70f92be 100644 --- a/docs/models.md +++ b/docs/models.md @@ -14,13 +14,13 @@ | ------------------------------------------------------------ | ------------ | --------- | ------------ | ------------ | ------------------------------ | -------------------- | | db_shufflenet_v2 | √ | X | cnocr | 18 M | 简体中文、繁体中文、英文、数字 | √ | | **db_shufflenet_v2_small** | √ | X | cnocr | 12 M | 简体中文、繁体中文、英文、数字 | √ | -| [db_shufflenet_v2_tiny](https://mp.weixin.qq.com/s/fHPNoGyo72EFApVhEgR6Nw) | √ | X | cnocr | 7.5 M | 简体中文、繁体中文、英文、数字 | √ | | db_mobilenet_v3 | √ | X | cnocr | 16 M | 简体中文、繁体中文、英文、数字 | √ | | db_mobilenet_v3_small | √ | X | cnocr | 7.9 M | 简体中文、繁体中文、英文、数字 | √ | | db_resnet34 | √ | X | cnocr | 86 M | 简体中文、繁体中文、英文、数字 | √ | | db_resnet18 | √ | X | cnocr | 47 M | 简体中文、繁体中文、英文、数字 | √ | +| ch_PP-OCRv4_det | X | √ | ppocr | 4.5 M | 简体中文、繁体中文、英文、数字 | √ | +| ch_PP-OCRv4_det_server | X | √ | ppocr | 108 M | 简体中文、繁体中文、英文、数字 | √ | | ch_PP-OCRv3_det | X | √ | ppocr | 2.3 M | 简体中文、繁体中文、英文、数字 | √ | -| ch_PP-OCRv2_det | X | √ | ppocr | 2.2 M | 简体中文、繁体中文、英文、数字 | √ | | **en_PP-OCRv3_det** | X | √ | ppocr | 2.3 M | **英文**、数字 | √ | @@ -87,6 +87,8 @@ CnOCR 的自有模型从结构上可以分为两阶段:第一阶段是获得oc | `model_name` | PyTorch 版本 | ONNX 版本 | 支持语言 | 是否支持竖排文字识别 | 模型文件大小 | | --------------------- | ------------ | --------- | ------------------------ | -------------------- | ------------ | +| ch_PP-OCRv4 | X | √ | 简体中文、英文、数字 | √ | 10 M | +| ch_PP-OCRv4_server | X | √ | 简体中文、英文、数字 | √ | 86 M | | ch_PP-OCRv3 | X | √ | 简体中文、英文、数字 | √ | 10 M | | ch_ppocr_mobile_v2.0 | X | √ | 简体中文、英文、数字 | √ | 4.2 M | | en_PP-OCRv3 | X | √ | **英文**、数字 | √ | 8.5 M | From eca6bb3606212468af8ac669d6190b13e0b1a6e2 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Wed, 27 Nov 2024 23:10:10 +0800 Subject: [PATCH 07/12] bump version --- cnocr/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cnocr/__version__.py b/cnocr/__version__.py index e1dd690..9efae81 100644 --- a/cnocr/__version__.py +++ b/cnocr/__version__.py @@ -17,4 +17,4 @@ # specific language governing permissions and limitations # under the License. -__version__ = '2.3.0.3' +__version__ = '2.3.1' From aaf83de16b704d2a71c8fd55dc00fb801a0174e4 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Sat, 30 Nov 2024 20:24:45 +0800 Subject: [PATCH 08/12] Add multilingual OCR model support --- .gitignore | 50 +++++++++++++++++++++++++++++++++ README.md | 5 ++++ README_en.md | 15 ++++++---- cnocr/ppocr/consts.py | 24 ++++++++++++++++ cnocr/ppocr/pp_recognizer.py | 2 +- docs/RELEASE.md | 19 +++++++++++-- docs/demo.md | 2 ++ docs/models.md | 5 ++++ tests/test_rapidocr.py | 54 ++++++++++++++++++++++++++++++++++++ 9 files changed, 167 insertions(+), 9 deletions(-) create mode 100644 .gitignore create mode 100644 tests/test_rapidocr.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f0c1ce1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +.idea/.gitignore +.idea/crnn-mxnet-chinese-text-recognition.iml +.idea/git_toolbox_blame.xml +.idea/git_toolbox_prj.xml +.idea/misc.xml +.idea/modules.xml +.idea/vcs.xml +.idea/dictionaries/king.xml +.idea/inspectionProfiles/Project_Default.xml +.pytest_cache/v/cache/lastfailed +.pytest_cache/v/cache/nodeids +.pytest_cache/v/cache/stepwise +.streamlit/* +.ropeproject/* +*/__pycache__/* +*/*/__pycache__/* +*/*/*/__pycache__/* +dist/* +build/* +predict-result*/* +eval_results/* +*/*.pyc +*.ckpt +*/*.ckpt +inference/* +runs/* +s1/* +tmp*/* +wandb/* +debug*/* +docs/fonts/* +docs/predict-outputs/* +data/* +*.onnx +*.jpg +*.png +*.jpeg +*.gif +*.zip +*.pyo +*.pyc +*.egg-info +fonts/* +cnocr.egg-info/dependency_links.txt +cnocr.egg-info/entry_points.txt +cnocr.egg-info/not-zip-safe +cnocr.egg-info/PKG-INFO +cnocr.egg-info/requires.txt +cnocr.egg-info/SOURCES.txt +cnocr.egg-info/top_level.txt diff --git a/README.md b/README.md index 3bb6eba..1aac834 100644 --- a/README.md +++ b/README.md @@ -433,9 +433,14 @@ print(ocr_out) | ch_PP-OCRv4_server | X | √ | ppocr | 86 M | 简体中文、英文、数字 | √ | | ch_PP-OCRv3 | X | √ | ppocr | 10 M | 简体中文、英文、数字 | √ | | ch_ppocr_mobile_v2.0 | X | √ | ppocr | 4.2 M | 简体中文、英文、数字 | √ | +| en_PP-OCRv4 | X | √ | ppocr | 8.6 M | **英文**、数字 | √ | | en_PP-OCRv3 | X | √ | ppocr | 8.5 M | **英文**、数字 | √ | | en_number_mobile_v2.0 | X | √ | ppocr | 1.8 M | **英文**、数字 | √ | | chinese_cht_PP-OCRv3 | X | √ | ppocr | 11 M | **繁体中文**、英文、数字 | X | +| japan_PP-OCRv3 | X | √ | ppocr | 9.6 M | **日文**、英文、数字 | √ | +| korean_PP-OCRv3 | X | √ | ppocr | 9.4 M | **韩文**、英文、数字 | √ | +| latin_PP-OCRv3 | X | √ | ppocr | 8.6 M | **拉丁文**、英文、数字 | √ | +| arabic_PP-OCRv3 | X | √ | ppocr | 8.6 M | **阿拉伯文**、英文、数字 | √ | diff --git a/README_en.md b/README_en.md index 23b737d..331586a 100644 --- a/README_en.md +++ b/README_en.md @@ -354,11 +354,16 @@ For more details, see: [Available Models](https://cnocr.readthedocs.io/zh-cn/sta | **number-densenet_lite_666-gru_large** 🆕
([Purchase Link](https://ocr.lemonsqueezy.com)) | √ | √ | cnocr | 56 M | **Pure Numeric** (contains only the ten digits `0~9`) | X | | ch_PP-OCRv4 | X | √ | ppocr | 10 M | Simplified Chinese, English, Numbers | √ | | ch_PP-OCRv4_server | X | √ | ppocr | 86 M | Simplified Chinese, English, Numbers | √ | -| ch_PP-OCRv3 | X | √ | ppocr | 10 M | Simplified Chinese, English, Numbers | √ | -| ch_ppocr_mobile_v2.0 | X | √ | ppocr | 4.2 M | Simplified Chinese, English, Numbers | √ | -| **en_PP-OCRv3** | X | √ | ppocr | 8.5 M | **English**、Numbers | √ | -| **en_number_mobile_v2.0** | X | √ | ppocr | 1.8 M | **English**、Numbers | √ | -| **chinese_cht_PP-OCRv3** | X | √ | ppocr | 11 M | **Traditional Chinese**, English, Numbers | X | +| ch_PP-OCRv3 | X | √ | ppocr | 10 M | Simplified Chinese, English, Numbers | √ | +| ch_ppocr_mobile_v2.0 | X | √ | ppocr | 4.2 M | Simplified Chinese, English, Numbers | √ | +| **en_PP-OCRv3** | X | √ | ppocr | 8.5 M | **English**、Numbers | √ | +| **en_PP-OCRv4** | X | √ | ppocr | 8.6 M | **English**、Numbers | √ | +| **en_number_mobile_v2.0** | X | √ | ppocr | 1.8 M | **English**、Numbers | √ | +| **chinese_cht_PP-OCRv3** | X | √ | ppocr | 11 M | **Traditional Chinese**, English, Numbers | X | +| **japan_PP-OCRv3** | X | √ | ppocr | 9.6 M | **Japanese**, English, Numbers | √ | +| **korean_PP-OCRv3** | X | √ | ppocr | 9.4 M | **Korean**, English, Numbers | √ | +| **latin_PP-OCRv3** | X | √ | ppocr | 8.6 M | **Latin**, English, Numbers | √ | +| **arabic_PP-OCRv3** | X | √ | ppocr | 8.6 M | **Arabic**, English, Numbers | √ | ## Future work diff --git a/cnocr/ppocr/consts.py b/cnocr/ppocr/consts.py index c377f7a..ecf92f5 100644 --- a/cnocr/ppocr/consts.py +++ b/cnocr/ppocr/consts.py @@ -35,12 +35,36 @@ "vocab_fp": VOCAB_DIR / "en_dict.txt", "url": "en_number_mobile_v2.0_rec_infer-onnx.zip", }, + ("chinese_cht_PP-OCRv3", "onnx"): { + "vocab_fp": VOCAB_DIR / "chinese_cht_dict.txt", # 繁体中文 + "url": "chinese_cht_PP-OCRv3_rec_infer-onnx.zip", + }, + ("japan_PP-OCRv3", "onnx"): { + "recognizer": "RapidRecognizer", + "repo": "breezedeus/cnocr-ppocr-japan_PP-OCRv3", + }, + ("korean_PP-OCRv3", "onnx"): { + "recognizer": "RapidRecognizer", + "repo": "breezedeus/cnocr-ppocr-korean_PP-OCRv3", + }, + ("latin_PP-OCRv3", "onnx"): { + "recognizer": "RapidRecognizer", + "repo": "breezedeus/cnocr-ppocr-latin_PP-OCRv3", + }, + ("arabic_PP-OCRv3", "onnx"): { + "recognizer": "RapidRecognizer", + "repo": "breezedeus/cnocr-ppocr-arabic_PP-OCRv3", + }, ("en_PP-OCRv3", "onnx"): { "vocab_fp": VOCAB_DIR / "en_dict.txt", # 英文 "url": "en_PP-OCRv3_rec_infer-onnx.zip", "recognizer": "RapidRecognizer", "repo": "breezedeus/cnocr-ppocr-en_PP-OCRv3", }, + ("en_PP-OCRv4", "onnx"): { + "recognizer": "RapidRecognizer", + "repo": "breezedeus/cnocr-ppocr-en_PP-OCRv3", + }, ("ch_PP-OCRv4", "onnx"): { "recognizer": "RapidRecognizer", "repo": "breezedeus/cnocr-ppocr-ch_PP-OCRv4", diff --git a/cnocr/ppocr/pp_recognizer.py b/cnocr/ppocr/pp_recognizer.py index c8e7089..f6a4d43 100755 --- a/cnocr/ppocr/pp_recognizer.py +++ b/cnocr/ppocr/pp_recognizer.py @@ -73,6 +73,7 @@ def __init__( vocab_fp = AVAILABLE_MODELS.get_vocab_fp(self._model_name, self._model_backend) self._assert_and_prepare_model_files(model_fp, root) + logger.info('use model: %s' % self._model_fp) postprocess_params = { 'name': 'CTCLabelDecode', 'character_dict_path': vocab_fp, @@ -114,7 +115,6 @@ def _assert_and_prepare_model_files(self, model_fp, root): ) # download the .zip file and unzip self._model_fp = model_fp - logger.info('use model: %s' % self._model_fp) def resize_norm_img(self, img, max_wh_ratio): """ diff --git a/docs/RELEASE.md b/docs/RELEASE.md index ad2b23c..eb5a4c5 100644 --- a/docs/RELEASE.md +++ b/docs/RELEASE.md @@ -5,7 +5,14 @@ 主要变更: * 基于 RapidOCR 集成 PPOCRv4 最新版 OCR 模型,提供更多的模型选择 - * 新增支持 PP-OCRv4 识别模型,包括标准版和服务器版 + * 新增支持 PP-OCRv4 检测和识别模型,包括标准版和服务器版 + * 新增多语言OCR模型支持: + * chinese_cht_PP-OCRv3:繁体中文识别 + * japan_PP-OCRv3:日文识别 + * korean_PP-OCRv3:韩文识别 + * latin_PP-OCRv3:拉丁文识别 + * arabic_PP-OCRv3:阿拉伯文识别 + * en_PP-OCRv4:英文识别(v4版本) * 修改读文件实现方式,支持 Windows 的中文路径 * 修复Bug:当使用多个进程时,transform_func 无法序列化 * 修复Bug:与 albumentations=1.4.* 兼容 @@ -13,12 +20,18 @@ Major Changes: * Based on RapidOCR, integrate the latest version of PPOCRv4 OCR models, providing more model options - * Add support for PP-OCRv4 recognition models, including standard and server versions + * Add support for PP-OCRv4 detection and recognition models, including standard and server versions + * Add multilingual OCR model support: + * chinese_cht_PP-OCRv3: Traditional Chinese recognition + * japan_PP-OCRv3: Japanese recognition + * korean_PP-OCRv3: Korean recognition + * latin_PP-OCRv3: Latin recognition + * arabic_PP-OCRv3: Arabic recognition + * en_PP-OCRv4: English recognition (v4 version) * Modify the implementation of reading files to support Chinese paths on Windows * Fix bug: When using multiple processes, the transform_func cannot be serialized * Fix bug: Compatible with albumentations=1.4.* - ### Update 2024.06.22:发布 V2.3.0.3 主要变更: diff --git a/docs/demo.md b/docs/demo.md index 91548e7..01f2c32 100644 --- a/docs/demo.md +++ b/docs/demo.md @@ -2,6 +2,8 @@ 地址:[https://huggingface.co/spaces/breezedeus/CnOCR-Demo](https://huggingface.co/spaces/breezedeus/CnOCR-Demo) 。 + 国内镜像:[https://hf.qhduan.com/spaces/breezedeus/CnOCR-Demo](https://hf.qhduan.com/spaces/breezedeus/CnOCR-Demo) 。 + ![Demo](figs/demo.jpg) diff --git a/docs/models.md b/docs/models.md index 70f92be..23de0bd 100644 --- a/docs/models.md +++ b/docs/models.md @@ -92,7 +92,12 @@ CnOCR 的自有模型从结构上可以分为两阶段:第一阶段是获得oc | ch_PP-OCRv3 | X | √ | 简体中文、英文、数字 | √ | 10 M | | ch_ppocr_mobile_v2.0 | X | √ | 简体中文、英文、数字 | √ | 4.2 M | | en_PP-OCRv3 | X | √ | **英文**、数字 | √ | 8.5 M | +| en_PP-OCRv4 | X | √ | **英文**、数字 | √ | 8.6 M | | en_number_mobile_v2.0 | X | √ | **英文**、数字 | √ | 1.8 M | | chinese_cht_PP-OCRv3 | X | √ | **繁体中文**、英文、数字 | X | 11 M | +| japan_PP-OCRv3 | X | √ | **日文**、英文、数字 | √ | 9.6 M | +| korean_PP-OCRv3 | X | √ | **韩文**、英文、数字 | √ | 9.4 M | +| latin_PP-OCRv3 | X | √ | **拉丁文**、英文、数字 | √ | 8.6 M | +| arabic_PP-OCRv3 | X | √ | **阿拉伯文**、英文、数字 | √ | 8.6 M | 更多模型可参考 [PaddleOCR/models_list.md](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/models_list.md) 。如有其他外语(如日、韩等)识别需求,可在 **知识星球** [**CnOCR/CnSTD私享群**](https://t.zsxq.com/FEYZRJQ) 中向作者提出建议。 diff --git a/tests/test_rapidocr.py b/tests/test_rapidocr.py new file mode 100644 index 0000000..97f7098 --- /dev/null +++ b/tests/test_rapidocr.py @@ -0,0 +1,54 @@ +# coding: utf-8 +# Copyright (C) 2021, [Breezedeus](https://github.com/breezedeus). +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import pytest +import torch +from pathlib import Path + +from rapidocr_onnxruntime import RapidOCR +from rapidocr_onnxruntime.utils import LoadImage +from rapidocr_onnxruntime.ch_ppocr_rec import TextRecognizer +from rapidocr_onnxruntime.ch_ppocr_det import TextDetector +from rapidocr_onnxruntime.utils import LoadImage + +def test_whole_pipeline(): + engine = RapidOCR(det_model_path="en_PP-OCRv3_det_infer.onnx", rec_model_path="en_PP-OCRv4_rec_infer.onnx") + + root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + example_dir = Path(root_dir) / 'docs/examples' + # img_path = example_dir / 'multi-line_cn1.png' + img_path = example_dir / 'en_ticket.jpeg' + result, elapse = engine(img_path) + print(result) + breakpoint() + print(elapse) + + +def test_rec(): + config = {'intra_op_num_threads': -1, 'inter_op_num_threads': -1, 'use_cuda': False, 'use_dml': False, 'model_path': 'en_PP-OCRv4_rec_infer.onnx', 'rec_img_shape': [3, 48, 320], 'rec_batch_num': 6} + # config = dict(det_model_path="en_PP-OCRv3_det_infer.onnx") + engine = TextRecognizer(config) + + root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + example_dir = Path(root_dir) / 'docs/examples' + # img_path = example_dir / 'multi-line_cn1.png' + img_path = example_dir / 'hybrid.png' + result, elapse = engine(LoadImage()(img_path)) + print(result) \ No newline at end of file From 77dd5294f750b317b2a4e9818b4b235661d73226 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Sat, 30 Nov 2024 20:29:11 +0800 Subject: [PATCH 09/12] fix the date --- docs/RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/RELEASE.md b/docs/RELEASE.md index eb5a4c5..ba55f95 100644 --- a/docs/RELEASE.md +++ b/docs/RELEASE.md @@ -1,6 +1,6 @@ # Release Notes -### Update 2024.11.28:发布 V2.3.1 +### Update 2024.11.30:发布 V2.3.1 主要变更: From 588d39ce92efa1156a3145ec112441da2320a222 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Sat, 30 Nov 2024 21:08:11 +0800 Subject: [PATCH 10/12] Update deps --- requirements.in | 2 +- requirements.txt | 208 ++++++++++++++++++++++++++++------------------- setup.py | 2 +- 3 files changed, 126 insertions(+), 86 deletions(-) diff --git a/requirements.in b/requirements.in index 562cf14..34bd8ea 100644 --- a/requirements.in +++ b/requirements.in @@ -16,4 +16,4 @@ pillow>=5.3.0 onnx onnxruntime rapidocr_onnxruntime<1.4 -cnstd>=1.2.5 +cnstd>=1.2.5.1 diff --git a/requirements.txt b/requirements.txt index 0356353..c5a23aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,9 @@ --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple -aiohttp==3.9.1 +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.11.8 # via # datasets # fsspec @@ -16,53 +18,51 @@ aiosignal==1.3.1 # via aiohttp albumentations==1.3.1 # via -r requirements.in -appdirs==1.4.4 - # via wandb -async-timeout==4.0.3 +async-timeout==5.0.1 # via aiohttp -attrs==23.1.0 +attrs==24.2.0 # via aiohttp -certifi==2023.11.17 +certifi==2024.8.30 # via # requests # sentry-sdk -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests click==8.1.7 # via # -r requirements.in # cnstd # wandb -cnstd==1.2.3.6 +cnstd==1.2.5.1 # via -r requirements.in coloredlogs==15.0.1 # via onnxruntime -contourpy==1.2.0 +contourpy==1.3.0 # via matplotlib cycler==0.12.1 # via matplotlib -datasets[vision]==2.16.0 +datasets[vision]==3.1.0 # via -r requirements.in -dill==0.3.7 +dill==0.3.8 # via # datasets # multiprocess docker-pycreds==0.4.0 # via wandb -filelock==3.13.1 +filelock==3.16.1 # via # datasets # huggingface-hub # torch -flatbuffers==23.5.26 +flatbuffers==24.3.25 # via onnxruntime -fonttools==4.47.0 +fonttools==4.55.0 # via matplotlib -frozenlist==1.4.1 +frozenlist==1.5.0 # via # aiohttp # aiosignal -fsspec[http]==2023.10.0 +fsspec[http]==2024.9.0 # via # datasets # huggingface-hub @@ -70,53 +70,54 @@ fsspec[http]==2023.10.0 # torch gitdb==4.0.11 # via gitpython -gitpython==3.1.40 +gitpython==3.1.43 # via wandb -huggingface-hub==0.20.1 +huggingface-hub==0.26.3 # via # cnstd # datasets humanfriendly==10.0 # via coloredlogs -idna==3.6 +idna==3.10 # via # requests # yarl -imageio==2.33.1 +imageio==2.36.1 # via scikit-image -importlib-resources==6.1.1 +importlib-resources==6.4.5 # via matplotlib -jinja2==3.1.2 +jinja2==3.1.4 # via torch -joblib==1.3.2 +joblib==1.4.2 # via scikit-learn -kiwisolver==1.4.5 +kiwisolver==1.4.7 # via matplotlib -lazy-loader==0.3 +lazy-loader==0.4 # via scikit-image -lightning-utilities==0.10.0 +lightning-utilities==0.11.9 # via # pytorch-lightning # torchmetrics -markupsafe==2.1.3 +markupsafe==3.0.2 # via jinja2 -matplotlib==3.8.2 +matplotlib==3.9.3 # via # cnstd # seaborn + # ultralytics mpmath==1.3.0 # via sympy -multidict==6.0.4 +multidict==6.1.0 # via # aiohttp # yarl -multiprocess==0.70.15 +multiprocess==0.70.16 # via datasets networkx==3.2.1 # via # scikit-image # torch -numpy==1.26.2 +numpy==1.26.4 # via # -r requirements.in # albumentations @@ -130,9 +131,8 @@ numpy==1.26.2 # opencv-python # opencv-python-headless # pandas - # pyarrow - # pytorch-lightning # qudida + # rapidocr-onnxruntime # scikit-image # scikit-learn # scipy @@ -141,163 +141,203 @@ numpy==1.26.2 # tifffile # torchmetrics # torchvision -onnx==1.15.0 + # ultralytics + # ultralytics-thop +onnx==1.17.0 # via # -r requirements.in # cnstd -onnxruntime==1.16.3 - # via -r requirements.in -opencv-python==4.8.1.78 - # via cnstd -opencv-python-headless==4.8.1.78 +onnxruntime==1.19.2 + # via + # -r requirements.in + # rapidocr-onnxruntime +opencv-python==4.10.0.84 + # via + # cnstd + # rapidocr-onnxruntime + # ultralytics +opencv-python-headless==4.10.0.84 # via # albumentations # qudida -packaging==23.2 +packaging==24.2 # via # datasets # huggingface-hub + # lazy-loader # lightning-utilities # matplotlib # onnxruntime # pytorch-lightning # scikit-image # torchmetrics -pandas==2.1.4 +pandas==2.2.3 # via # cnstd # datasets # seaborn -pillow==10.1.0 + # ultralytics +pillow==11.0.0 # via # -r requirements.in # cnstd # datasets # imageio # matplotlib + # rapidocr-onnxruntime # scikit-image # torchvision -polygon3==3.0.9.1 - # via cnstd -protobuf==4.25.1 + # ultralytics +platformdirs==4.3.6 + # via wandb +propcache==0.2.0 + # via + # aiohttp + # yarl +protobuf==5.29.0 # via # onnx # onnxruntime # wandb -psutil==5.9.7 - # via wandb -pyarrow==14.0.2 - # via datasets -pyarrow-hotfix==0.6 +psutil==6.1.0 + # via + # ultralytics + # wandb +py-cpuinfo==9.0.0 + # via ultralytics +pyarrow==18.1.0 # via datasets -pyclipper==1.3.0.post5 - # via cnstd -pyparsing==3.1.1 +pyclipper==1.3.0.post6 + # via + # cnstd + # rapidocr-onnxruntime +pyparsing==3.2.0 # via matplotlib -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via # matplotlib # pandas -pytorch-lightning==2.1.3 +pytorch-lightning==2.4.0 # via # -r requirements.in # cnstd -pytz==2023.3.post1 +pytz==2024.2 # via pandas -pyyaml==6.0.1 +pyyaml==6.0.2 # via # albumentations # cnstd # datasets # huggingface-hub # pytorch-lightning + # rapidocr-onnxruntime + # ultralytics # wandb qudida==0.0.4 # via albumentations -requests==2.31.0 +rapidocr-onnxruntime==1.3.25 + # via + # -r requirements.in + # cnstd +requests==2.32.3 # via # datasets - # fsspec # huggingface-hub - # torchvision + # ultralytics # wandb -scikit-image==0.22.0 +scikit-image==0.24.0 # via albumentations -scikit-learn==1.3.2 +scikit-learn==1.5.2 # via qudida -scipy==1.11.4 +scipy==1.13.1 # via # albumentations # cnstd # scikit-image # scikit-learn -seaborn==0.13.0 - # via cnstd -sentry-sdk==1.39.1 + # ultralytics +seaborn==0.13.2 + # via + # cnstd + # ultralytics +sentry-sdk==2.19.0 # via wandb -setproctitle==1.3.3 +setproctitle==1.3.4 # via wandb -shapely==2.0.2 - # via cnstd +shapely==2.0.6 + # via + # cnstd + # rapidocr-onnxruntime six==1.16.0 # via # docker-pycreds # python-dateutil + # rapidocr-onnxruntime smmap==5.0.1 # via gitdb -sympy==1.12 +sympy==1.13.1 # via # onnxruntime # torch -threadpoolctl==3.2.0 +threadpoolctl==3.5.0 # via scikit-learn -tifffile==2023.12.9 +tifffile==2024.8.30 # via scikit-image -torch==2.1.2 +torch==2.5.1 # via # -r requirements.in # cnstd # pytorch-lightning # torchmetrics # torchvision -torchmetrics==1.2.1 + # ultralytics + # ultralytics-thop +torchmetrics==1.6.0 # via # -r requirements.in # pytorch-lightning -torchvision==0.16.2 +torchvision==0.20.1 # via # -r requirements.in # cnstd -tqdm==4.66.1 + # ultralytics +tqdm==4.67.1 # via # -r requirements.in # cnstd # datasets # huggingface-hub # pytorch-lightning -typing-extensions==4.9.0 + # rapidocr-onnxruntime + # ultralytics +typing-extensions==4.12.2 # via # huggingface-hub # lightning-utilities + # multidict # pytorch-lightning # qudida # torch # wandb -tzdata==2023.3 +tzdata==2024.2 # via pandas -unidecode==1.3.7 +ultralytics==8.3.39 + # via cnstd +ultralytics-thop==2.0.12 + # via ultralytics +unidecode==1.3.8 # via cnstd -urllib3==2.1.0 +urllib3==2.2.3 # via # requests # sentry-sdk -wandb==0.16.1 +wandb==0.18.7 # via -r requirements.in -xxhash==3.4.1 +xxhash==3.5.0 # via datasets -yarl==1.9.4 +yarl==1.18.0 # via aiohttp -zipp==3.17.0 +zipp==3.21.0 # via importlib-resources # The following packages are considered to be unsafe in a requirements file: diff --git a/setup.py b/setup.py index 7000262..8c00560 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "torchmetrics", "pillow>=5.3.0", "onnx", - "cnstd>=1.2.5", + "cnstd>=1.2.5.1", "rapidocr_onnxruntime<1.4", ] extras_require = { From b8cf99600ecc80ebcceb03bf1a1848055c400375 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Sat, 30 Nov 2024 21:10:13 +0800 Subject: [PATCH 11/12] remove unnecessary require --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 8c00560..91ff6ce 100644 --- a/setup.py +++ b/setup.py @@ -51,8 +51,8 @@ "rapidocr_onnxruntime<1.4", ] extras_require = { - "ort-cpu": ["onnxruntime", "rapidocr_onnxruntime<1.4"], - "ort-gpu": ["onnxruntime-gpu", "rapidocr_onnxruntime<1.4"], + "ort-cpu": ["onnxruntime"], + "ort-gpu": ["onnxruntime-gpu"], "serve": ["uvicorn[standard]", "fastapi", "python-multipart", "pydantic"], "dev": ["albumentations", "pip-tools", "pytest", "datasets[vision]"], } From 579dc4d319b087b3416299d40adf36b813398260 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Sat, 30 Nov 2024 21:26:46 +0800 Subject: [PATCH 12/12] fix typo --- cnocr/ppocr/consts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cnocr/ppocr/consts.py b/cnocr/ppocr/consts.py index ecf92f5..ffbc343 100644 --- a/cnocr/ppocr/consts.py +++ b/cnocr/ppocr/consts.py @@ -63,7 +63,7 @@ }, ("en_PP-OCRv4", "onnx"): { "recognizer": "RapidRecognizer", - "repo": "breezedeus/cnocr-ppocr-en_PP-OCRv3", + "repo": "breezedeus/cnocr-ppocr-en_PP-OCRv4", }, ("ch_PP-OCRv4", "onnx"): { "recognizer": "RapidRecognizer",