diff --git a/Makefile b/Makefile index 9f745fb..970404e 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ package: rm -rf build python setup.py sdist bdist_wheel -VERSION = 2.2.4.1 +VERSION = 2.2.4.2 upload: python -m twine upload dist/cnocr-$(VERSION)* --verbose diff --git a/cnocr/__version__.py b/cnocr/__version__.py index 8649b0b..6bd281d 100644 --- a/cnocr/__version__.py +++ b/cnocr/__version__.py @@ -17,4 +17,4 @@ # specific language governing permissions and limitations # under the License. -__version__ = '2.2.4.1' +__version__ = '2.2.4.2' diff --git a/cnocr/consts.py b/cnocr/consts.py index b3c2741..a87dfc8 100644 --- a/cnocr/consts.py +++ b/cnocr/consts.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (C) 2021, [Breezedeus](https://github.com/breezedeus). +# Copyright (C) 2021-2023, [Breezedeus](https://github.com/breezedeus). # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -17,6 +17,7 @@ # specific language governing permissions and limitations # under the License. +import os import string from collections import OrderedDict from pathlib import Path @@ -32,6 +33,7 @@ # 模型版本只对应到第二层,第三层的改动表示模型兼容。 # 如: __version__ = '2.2.*',对应的 MODEL_VERSION 都是 '2.2' MODEL_VERSION = '.'.join(__version__.split('.', maxsplit=2)[:2]) +DOWNLOAD_SOURCE = os.environ.get('CNOCR_DOWNLOAD_SOURCE', 'CN') IMG_STANDARD_HEIGHT = 32 CN_VOCAB_FP = Path(__file__).parent.absolute() / 'label_cn.txt' @@ -135,20 +137,25 @@ HF_HUB_SUBFOLDER = "models/cnocr/%s" % MODEL_VERSION PAID_HF_HUB_REPO_ID = "breezedeus/paid-models" PAID_HF_HUB_SUBFOLDER = "cnocr/%s" % MODEL_VERSION +CN_OSS_ENDPOINT = ( + "https://sg-models.oss-cn-beijing.aliyuncs.com/cnocr/%s/" % MODEL_VERSION +) def format_hf_hub_url(url: str, is_paid_model=False) -> dict: + out_dict = {'filename': url} + if is_paid_model: repo_id = PAID_HF_HUB_REPO_ID subfolder = PAID_HF_HUB_SUBFOLDER else: repo_id = HF_HUB_REPO_ID subfolder = HF_HUB_SUBFOLDER - return { - 'repo_id': repo_id, - 'subfolder': subfolder, - 'filename': url, - } + out_dict['cn_oss'] = CN_OSS_ENDPOINT + out_dict.update( + {'repo_id': repo_id, 'subfolder': subfolder,} + ) + return out_dict class AvailableModels(object): diff --git a/cnocr/ppocr/pp_recognizer.py b/cnocr/ppocr/pp_recognizer.py index d535017..8d97528 100755 --- a/cnocr/ppocr/pp_recognizer.py +++ b/cnocr/ppocr/pp_recognizer.py @@ -33,7 +33,7 @@ from .postprocess import build_post_process from .utility import create_predictor from .consts import PP_SPACE -from ..consts import MODEL_VERSION, AVAILABLE_MODELS +from ..consts import MODEL_VERSION, AVAILABLE_MODELS, DOWNLOAD_SOURCE logger = logging.getLogger(__name__) @@ -106,7 +106,7 @@ def _assert_and_prepare_model_files(self, model_fp, root): % ((self._model_name, self._model_backend),) ) url = AVAILABLE_MODELS.get_url(self._model_name, self._model_backend) - get_model_file(url, self._model_dir) + get_model_file(url, self._model_dir, download_source=DOWNLOAD_SOURCE) # download the .zip file and unzip self._model_fp = model_fp logger.info('use model: %s' % self._model_fp) diff --git a/cnocr/recognizer.py b/cnocr/recognizer.py index 17fe7a8..b97330c 100644 --- a/cnocr/recognizer.py +++ b/cnocr/recognizer.py @@ -28,7 +28,7 @@ import torch from cnstd.utils import get_model_file -from .consts import MODEL_VERSION, AVAILABLE_MODELS +from .consts import MODEL_VERSION, AVAILABLE_MODELS, DOWNLOAD_SOURCE from .models.ocr_model import OcrModel from .utils import ( data_dir, @@ -177,7 +177,7 @@ def _assert_and_prepare_model_files(self, model_fp, root): % ((self._model_name, self._model_backend),) ) url = AVAILABLE_MODELS.get_url(self._model_name, self._model_backend) - get_model_file(url, self._model_dir) + get_model_file(url, self._model_dir, download_source=DOWNLOAD_SOURCE) # download the .zip file and unzip fps = glob( '%s/%s*.%s' % (self._model_dir, self._model_file_prefix, model_ext) ) diff --git a/docs/RELEASE.md b/docs/RELEASE.md index c49e7dc..3789d44 100644 --- a/docs/RELEASE.md +++ b/docs/RELEASE.md @@ -1,5 +1,11 @@ # Release Notes +### Update 2023.10.09:发布 V2.2.4.2 + +主要变更: + +* 支持基于环境变量 `CNOCR_DOWNLOAD_SOURCE` 的取值,来决定不同的模型下载路径。 + ### Update 2023.10.01:发布 V2.2.4.1 主要变更: diff --git a/docs/models.md b/docs/models.md index 85e28e5..0f99279 100644 --- a/docs/models.md +++ b/docs/models.md @@ -8,7 +8,7 @@ ## 检测模型 -具体说明请参考 **[CnSTD 文档](https://github.com/breezedeus/cnstd)**,以下仅罗列出可用模型: +具体说明请参考 **[CnSTD 文档](https://github.com/breezedeus/CnSTD/tree/master#%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95)**,以下仅罗列出可用模型: | `det_model_name` | PyTorch 版本 | ONNX 版本 | 模型原始来源 | 模型文件大小 | 支持语言 | 是否支持竖排文字识别 | | ------------------------------------------------------------ | ------------ | --------- | ------------ | ------------ | ------------------------------ | -------------------- | diff --git a/docs/usage.md b/docs/usage.md index 5c2cf74..6409809 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -2,11 +2,13 @@ ## 模型文件自动下载 -首次使用 **CnOCR** 时,系统会**自动下载** zip 格式的模型压缩文件,并存于 `~/.cnocr`目录(Windows下默认路径为 `C:\Users\\AppData\Roaming\cnocr`)。 +首次使用 **CnOCR** 时,系统会**自动下载** zip 格式的识别模型压缩文件,并存于 `~/.cnocr`目录(Windows下默认路径为 `C:\Users\\AppData\Roaming\cnocr`)。 下载后的zip文件代码会自动对其解压,然后把解压后的模型相关目录放于`~/.cnocr/2.2`目录中。 如果系统无法自动成功下载zip文件,则需要手动从 **[cnstd-cnocr-models](https://huggingface.co/breezedeus/cnstd-cnocr-models/tree/main)** 下载此zip文件并把它放于 `~/.cnocr/2.2`目录。如果下载太慢,也可以从 [百度云盘](https://pan.baidu.com/s/1wvIjbjw95akE-h_o1HQd9w?pwd=nocr) 下载, 提取码为 ` nocr`。 +检测模型的下载请参考 **[CnSTD 文档](https://github.com/breezedeus/CnSTD/tree/master#%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95)**。 + 放置好 zip 文件后,后面的事代码就会自动执行了。 ## 详细使用说明 diff --git a/requirements.in b/requirements.in index ed6f6ed..cdfca0b 100644 --- a/requirements.in +++ b/requirements.in @@ -14,4 +14,4 @@ torchmetrics>=0.9.0 pillow>=5.3.0 onnx onnxruntime -cnstd>=1.2.3.4 +cnstd>=1.2.3.5 diff --git a/requirements.txt b/requirements.txt index d168342..d1d3b68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,7 +33,7 @@ click==8.1.7 # -r requirements.in # cnstd # wandb -cnstd==1.2.3.4 +cnstd==1.2.3.5 # via -r requirements.in coloredlogs==15.0.1 # via onnxruntime