Skip to content

Commit

Permalink
Merge pull request #286 from breezedeus/dev
Browse files Browse the repository at this point in the history
use environment variable to determine the download source for models
  • Loading branch information
breezedeus authored Oct 9, 2023
2 parents c8d623d + c4ab3a9 commit 7b59abe
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ package:
rm -rf build
python setup.py sdist bdist_wheel

VERSION = 2.2.4.1
VERSION = 2.2.4.2
upload:
python -m twine upload dist/cnocr-$(VERSION)* --verbose

Expand Down
2 changes: 1 addition & 1 deletion cnocr/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@
# specific language governing permissions and limitations
# under the License.

__version__ = '2.2.4.1'
__version__ = '2.2.4.2'
19 changes: 13 additions & 6 deletions cnocr/consts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# coding: utf-8
# Copyright (C) 2021, [Breezedeus](https://github.com/breezedeus).
# Copyright (C) 2021-2023, [Breezedeus](https://github.com/breezedeus).
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
Expand All @@ -17,6 +17,7 @@
# specific language governing permissions and limitations
# under the License.

import os
import string
from collections import OrderedDict
from pathlib import Path
Expand All @@ -32,6 +33,7 @@
# 模型版本只对应到第二层,第三层的改动表示模型兼容。
# 如: __version__ = '2.2.*',对应的 MODEL_VERSION 都是 '2.2'
MODEL_VERSION = '.'.join(__version__.split('.', maxsplit=2)[:2])
DOWNLOAD_SOURCE = os.environ.get('CNOCR_DOWNLOAD_SOURCE', 'CN')

IMG_STANDARD_HEIGHT = 32
CN_VOCAB_FP = Path(__file__).parent.absolute() / 'label_cn.txt'
Expand Down Expand Up @@ -135,20 +137,25 @@
HF_HUB_SUBFOLDER = "models/cnocr/%s" % MODEL_VERSION
PAID_HF_HUB_REPO_ID = "breezedeus/paid-models"
PAID_HF_HUB_SUBFOLDER = "cnocr/%s" % MODEL_VERSION
CN_OSS_ENDPOINT = (
"https://sg-models.oss-cn-beijing.aliyuncs.com/cnocr/%s/" % MODEL_VERSION
)


def format_hf_hub_url(url: str, is_paid_model=False) -> dict:
out_dict = {'filename': url}

if is_paid_model:
repo_id = PAID_HF_HUB_REPO_ID
subfolder = PAID_HF_HUB_SUBFOLDER
else:
repo_id = HF_HUB_REPO_ID
subfolder = HF_HUB_SUBFOLDER
return {
'repo_id': repo_id,
'subfolder': subfolder,
'filename': url,
}
out_dict['cn_oss'] = CN_OSS_ENDPOINT
out_dict.update(
{'repo_id': repo_id, 'subfolder': subfolder,}
)
return out_dict


class AvailableModels(object):
Expand Down
4 changes: 2 additions & 2 deletions cnocr/ppocr/pp_recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from .postprocess import build_post_process
from .utility import create_predictor
from .consts import PP_SPACE
from ..consts import MODEL_VERSION, AVAILABLE_MODELS
from ..consts import MODEL_VERSION, AVAILABLE_MODELS, DOWNLOAD_SOURCE


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -106,7 +106,7 @@ def _assert_and_prepare_model_files(self, model_fp, root):
% ((self._model_name, self._model_backend),)
)
url = AVAILABLE_MODELS.get_url(self._model_name, self._model_backend)
get_model_file(url, self._model_dir)
get_model_file(url, self._model_dir, download_source=DOWNLOAD_SOURCE) # download the .zip file and unzip

self._model_fp = model_fp
logger.info('use model: %s' % self._model_fp)
Expand Down
4 changes: 2 additions & 2 deletions cnocr/recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import torch
from cnstd.utils import get_model_file

from .consts import MODEL_VERSION, AVAILABLE_MODELS
from .consts import MODEL_VERSION, AVAILABLE_MODELS, DOWNLOAD_SOURCE
from .models.ocr_model import OcrModel
from .utils import (
data_dir,
Expand Down Expand Up @@ -177,7 +177,7 @@ def _assert_and_prepare_model_files(self, model_fp, root):
% ((self._model_name, self._model_backend),)
)
url = AVAILABLE_MODELS.get_url(self._model_name, self._model_backend)
get_model_file(url, self._model_dir)
get_model_file(url, self._model_dir, download_source=DOWNLOAD_SOURCE) # download the .zip file and unzip
fps = glob(
'%s/%s*.%s' % (self._model_dir, self._model_file_prefix, model_ext)
)
Expand Down
6 changes: 6 additions & 0 deletions docs/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Release Notes

### Update 2023.10.09:发布 V2.2.4.2

主要变更:

* 支持基于环境变量 `CNOCR_DOWNLOAD_SOURCE` 的取值,来决定不同的模型下载路径。

### Update 2023.10.01:发布 V2.2.4.1

主要变更:
Expand Down
2 changes: 1 addition & 1 deletion docs/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

## 检测模型

具体说明请参考 **[CnSTD 文档](https://github.com/breezedeus/cnstd)**,以下仅罗列出可用模型:
具体说明请参考 **[CnSTD 文档](https://github.com/breezedeus/CnSTD/tree/master#%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95)**,以下仅罗列出可用模型:

| `det_model_name` | PyTorch 版本 | ONNX 版本 | 模型原始来源 | 模型文件大小 | 支持语言 | 是否支持竖排文字识别 |
| ------------------------------------------------------------ | ------------ | --------- | ------------ | ------------ | ------------------------------ | -------------------- |
Expand Down
4 changes: 3 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

## 模型文件自动下载

首次使用 **CnOCR** 时,系统会**自动下载** zip 格式的模型压缩文件,并存于 `~/.cnocr`目录(Windows下默认路径为 `C:\Users\<username>\AppData\Roaming\cnocr`)。
首次使用 **CnOCR** 时,系统会**自动下载** zip 格式的识别模型压缩文件,并存于 `~/.cnocr`目录(Windows下默认路径为 `C:\Users\<username>\AppData\Roaming\cnocr`)。
下载后的zip文件代码会自动对其解压,然后把解压后的模型相关目录放于`~/.cnocr/2.2`目录中。

如果系统无法自动成功下载zip文件,则需要手动从 **[cnstd-cnocr-models](https://huggingface.co/breezedeus/cnstd-cnocr-models/tree/main)** 下载此zip文件并把它放于 `~/.cnocr/2.2`目录。如果下载太慢,也可以从 [百度云盘](https://pan.baidu.com/s/1wvIjbjw95akE-h_o1HQd9w?pwd=nocr) 下载, 提取码为 ` nocr`

检测模型的下载请参考 **[CnSTD 文档](https://github.com/breezedeus/CnSTD/tree/master#%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95)**

放置好 zip 文件后,后面的事代码就会自动执行了。

## 详细使用说明
Expand Down
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ torchmetrics>=0.9.0
pillow>=5.3.0
onnx
onnxruntime
cnstd>=1.2.3.4
cnstd>=1.2.3.5
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ click==8.1.7
# -r requirements.in
# cnstd
# wandb
cnstd==1.2.3.4
cnstd==1.2.3.5
# via -r requirements.in
coloredlogs==15.0.1
# via onnxruntime
Expand Down

0 comments on commit 7b59abe

Please sign in to comment.