diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..8d35599 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,71 @@ +name: Bug report +description: 反馈与提交BUG / Feedback and submit bugs +title: "[BUG] 标题简要描述 / Brief description of the title" +labels: [ "bug" ] +assignees: [ ] +body: + - type: markdown + attributes: + value: | + 创建一个反馈报告以帮助我们改进 / Create a feedback report to help us improve + - type: checkboxes + id: checkboxes + attributes: + label: 一些验证 / Some verifications + description: 在提交问题之前,请确保您完成以下操作 / Before submitting an issue, please ensure you have completed the following + options: + - label: 是否查看 wiki、issues 后自己尝试解决 / Have you tried to resolve the issue by checking the wiki and existing issues? + required: true + - label: 请 **确保** 您的问题能在 [releases](https://github.com/aoguai/LiYing/releases/latest) 发布的最新版本(包含测试版本)上复现 (如果不是请先更新到最新版本复现后再提交问题) / Please **ensure** your issue can be reproduced on the latest version (including test versions) released in [releases](https://github.com/aoguai/LiYing/releases/latest) (if not, please update to the latest version and reproduce the issue before submitting it) + required: true + - label: 搜索检查是否已经存在请求相同功能的问题/讨论,以避免重复创建问题 / Search to check if there are already issues/discussions requesting the same feature to avoid duplication + required: true + - label: 确认知晓并同意维护者直接关闭不符合 issue 规范的问题 / Acknowledge and agree that maintainers can directly close issues that do not follow the issue guidelines + required: true + - label: 确保提供下列BUG描述及其复现步骤, 否则我同意维护者直接关闭问题 / Ensure to provide the following bug description and reproduction steps, otherwise, I agree that maintainers can directly close the issue + required: true + - type: textarea + id: bug-description + attributes: + label: | + BUG 描述或反馈描述 / Bug description or feedback description + description: | + 请输入 BUG 描述或反馈描述及其复现步骤,请使用尽量准确的描述。 / Please enter the bug description or feedback description and its reproduction steps. Use as accurate a description as possible. + validations: + required: true + - type: textarea + id: expected-behavior + attributes: + label: | + 预期的效果 / Expected behavior + description: | + 简明扼要地描述你原来希望的效果。 / Briefly describe what you originally expected to happen. + validations: + required: true + - type: textarea + id: screenshots + attributes: + label: | + 截图 / Screenshots + description: | + 添加截图以帮助解释你的问题。 / Add screenshots to help explain your issue. + validations: + required: true + - type: textarea + id: system-info + attributes: + label: | + 系统信息 / System information + description: | + 请说明您的操作系统: [例如.Windows]以及软件版本 [例如. V1.6] / Please specify your operating system: [e.g., Windows] and software version [e.g., V1.6] + validations: + required: true + - type: textarea + id: additional-info + attributes: + label: | + 额外的信息 / Additional information + description: | + 在此添加关于问题的任何其他背景、猜想、推断。 / Add any other context, assumptions, or inferences about the issue here. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..cb54aa0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: 讨论交流 / Discussions + url: https://github.com/aoguai/LiYing/discussions + about: Use GitHub discussions for message-board style questions and discussions. \ No newline at end of file diff --git a/.github/workflows/Create Comment.yml b/.github/workflows/Create Comment.yml new file mode 100644 index 0000000..10b20ac --- /dev/null +++ b/.github/workflows/Create Comment.yml @@ -0,0 +1,21 @@ +name: Create Comment + +on: + issues: + types: [labeled] + +jobs: + create-comment: + runs-on: ubuntu-latest + if: github.event.label.name == 'need info' + steps: + - name: Create comment + uses: actions-cool/issues-helper@v3 + with: + actions: 'create-comment' + token: ${{ secrets.GITHUB_TOKEN }} + issue-number: ${{ github.event.issue.number }} + body: | + Hello ${{ github.event.issue.user.login }}. It seems that more information is needed for this issue. Please provide additional details. + + 你好 ${{ github.event.issue.user.login }}。看起来这个问题需要更多信息。请提供额外的细节。 diff --git a/.github/workflows/Issue Close Question.yml b/.github/workflows/Issue Close Question.yml new file mode 100644 index 0000000..b90ed7a --- /dev/null +++ b/.github/workflows/Issue Close Question.yml @@ -0,0 +1,26 @@ + +name: Issue Close Question + +on: + schedule: + - cron: "0 0 * * *" + +permissions: + contents: read + +jobs: + issue-close-require: + permissions: + issues: write + pull-requests: write + runs-on: ubuntu-latest + steps: + - name: needs more info + uses: actions-cool/issues-helper@v3 + with: + actions: 'close-issues' + labels: 'need info' + inactive-day: 3 + body: | + This issue has been closed automatically because it has not had recent activity for 3 days. If you have any questions, please comment here. + 由于该 Issue 3天未收到回应,现已被自动关闭,若有任何问题,可评论回复。 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aa857af --- /dev/null +++ b/.gitignore @@ -0,0 +1,120 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +.vscode/ + +.idea/ + +_myPython +_myPython/* + +data/*.jpg +data/*.jepg +data/*.png + +src/tool/ext/*.exe +src/model/*.onnx + +*.zip diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0ad25db --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/data/data.ini b/data/data.ini new file mode 100644 index 0000000..65e8f64 --- /dev/null +++ b/data/data.ini @@ -0,0 +1,118 @@ +[小一寸照片] +打印尺寸 = 2.2cm x 3.2cm +电子版尺寸 = 260px x 378px +分辨率 = 300dpi + +[一寸照片] +打印尺寸 = 2.5cm x 3.5cm +电子版尺寸 = 295px x 413px +分辨率 = 300dpi + +[大一寸照片] +打印尺寸 = 3.3cm x 4.8cm +电子版尺寸 = 390px x 567px +分辨率 = 300dpi + +[小二寸照片] +打印尺寸 = 3.5cm x 4.5cm +电子版尺寸 = 413px x 531px +分辨率 = 300dpi + +[二寸照片] +打印尺寸 = 3.5cm x 4.9cm +电子版尺寸 = 413px x 626px +分辨率 = 300dpi + +[五寸照片] +打印尺寸 = 8.9cm x 12.7cm +电子版尺寸 = 1050px x 1500px + +[六寸照片] +打印尺寸 = 10.2cm x 15.2cm +电子版尺寸 = 1300px x 1950px + +[七寸照片] +打印尺寸 = 12.7cm x 17.8cm +电子版尺寸 = 1500px x 2102px + +[驾驶证、驾照] +电子版尺寸 = 260px x 378px +分辨率 = 300dpi +文件格式 = jpg +文件大小 = 14-30KB + +[居民身份证] +电子版尺寸 = 358px x 441px +分辨率 = 350dpi +文件格式 = RGB + +[社保照片] +打印尺寸 = 2.6cm x 3.2cm +电子版尺寸 = 358px x 441px +分辨率 = 350dpi +文件格式 = jpg +文件大小 = 9-20KB + +[成人高等教育考试] +打印尺寸 = 3.3cm x 4.8cm +电子版尺寸 = 390px x 567px +分辨率 = 300dpi +文件格式 = jpg +文件大小 = 20-200KB + +[艺考报名照片] +打印尺寸 = 3.5cm x 4.9cm +电子版尺寸 = 413px x 625px + +[普通话水平考试照片] +打印尺寸 = 3.3cm x 4.8cm +电子版尺寸 = 390px x 567px + +[公务员考试] +打印尺寸 = 3.5cm x 4.5cm +电子版尺寸 = 413px x 531px + +[高考报名照片] +电子版尺寸 = 480px x 640px +文件大小 = 50-120KB + +[全国高等教育自学考试] +打印尺寸 = 1.2cm x 1.6cm +电子版尺寸 = 144px x 192px + +[护士执业资格考试照片] +电子版尺寸 = 295px x 413px +文件格式 = jpg +文件大小 = ≥45KB + +[教师资格证照片] +打印尺寸 = 3cm x 4.1cm +电子版尺寸 = 360px x 480px + +[全国计算机等级考试] +打印尺寸 = 3.3cm x 4.8cm +电子版尺寸 = 144px x 192px +文件格式 = jpg +文件大小 = 20-200KB + +[日本语能力考试] +打印尺寸 = 3cm x 4cm +电子版尺寸 = ≥360px x 480px +文件格式 = jpg/jpeg +文件大小 = 20-200KB + +[全国英语等级考试] +打印尺寸 = 3.3cm x 4.8cm +电子版尺寸 = 144px x 192px +文件大小 = 20-200KB + +[二级计算机考试] +打印尺寸 = 3.3cm x 4.8cm +电子版尺寸 = ≥144px x 192px +文件格式 = jpg +文件大小 = 20-200KB + +[小学教师资格证] +电子版尺寸 = 150px x 200px +文件格式 = jpg/jpeg +文件大小 = ≤200KB diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 0000000..f95d690 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. \ No newline at end of file diff --git a/docs/README-EN.md b/docs/README-EN.md new file mode 100644 index 0000000..870621c --- /dev/null +++ b/docs/README-EN.md @@ -0,0 +1,158 @@ +# LiYing + +[简体中文](./README.md) | English + +LiYing is an automated photo processing program designed for automating general post-processing workflows in photo studios. + +## Introduction + +LiYing can automatically identify human bodies and faces, correct angles, change background colors, crop passport photos to any size, and automatically arrange them. + +LiYing can run completely offline. All image processing operations are performed locally. + +### Simple Workflow Description + +![workflows](../images/workflows.png) + +### Demonstration + +| ![test1](../images/test1.jpg) | ![test2](../images/test2.jpg) | ![test3](../images/test3.jpg) | +| ----------------------------- | ---------------------------- | ---------------------------- | +| ![test1_output_sheet](../images/test1_output_sheet.jpg)(1-inch on 5-inch photo paper - 3x3) | ![test2_output_sheet](../images/test2_output_sheet.jpg)(2-inch on 5-inch photo paper - 2x2) | ![test3_output_sheet](../images/test3_output_sheet.jpg)(1-inch on 6-inch photo paper - 4x2) | + +**Note: This project is specifically for processing passport photos and may not work perfectly on any arbitrary image. The input images should be standard single-person portrait photos.** + +**It is normal for unexpected results to occur if you use complex images to create passport photos.** + +## Getting Started + +### Bundled Package + +If you are a Windows user and do not need to review the code, you can [download the bundled package](https://github.com/aoguai/LiYing/releases/latest) (tested on Windows 7 SP1 & Windows 10). Extract it and drag your images or directory into `run.bat` to start LiYing. + +### Setup and Installation + +You can install and configure LiYing locally by following the instructions below. + +#### Prerequisites + +LiYing depends on AGPicCompress, which in turn requires `mozjpeg` and `pngquant`. + +You may need to manually install `pngquant`. Refer to the [official pngquant documentation](https://pngquant.org/) and add it to the appropriate location. + +LiYing checks for `pngquant` in the following locations, which you can configure: +- Environment variables (recommended) +- LiYing/src directory +- `ext` directory under LiYing/src + +This allows AGPicCompress to locate and use `pngquant` for PNG image compression. + +##### Microsoft Visual C++ Redistributable Dependency + +You need to install the latest [Microsoft Visual C++ Redistributable](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist). + +If you are using Windows, your minimum version should be Windows 7 SP1 or higher. + +#### Building from Source + +You can obtain the LiYing project code by running: + +```shell +git clone https://github.com/aoguai/LiYing +cd LiYing ## Enter the LiYing directory +pip install -r requirements.txt # Install Python helpers' dependencies +``` + +**Note: If you are using Windows 7, ensure you have at least Windows 7 SP1 and `onnxruntime==1.14.0`.** + +#### Downloading the Required Models + +Download the models used by the project and place them in `LiYing/src/model`, or specify the model paths in the command line. + +| Purpose | Model Name | Download Link | Source | +|---------------------------|-------------------|----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------| +| Face Recognition | Yunnet | [Download Link](https://github.com/opencv/opencv_zoo/blob/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx) | [Yunnet](https://github.com/ShiqiYu/libfacedetection) | +| Subject Recognition and Background Replacement | RMBG-1.4 | [Download Link](https://huggingface.co/briaai/RMBG-1.4/blob/main/onnx/model.onnx) | [RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4) | +| Body Recognition | yolov8n-pose | [Download Link](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-pose.pt) | [ultralytics](https://github.com/ultralytics/ultralytics) | + +**Note: For the yolov8n-pose model, you need to export it to an ONNX model. Refer to the [official documentation](https://docs.ultralytics.com/integrations/onnx/) for instructions.** + +#### Running + +```shell +cd LiYing/src +python main.py --help +``` + +For Windows users, the project provides a batch script for convenience: + +```shell +cd LiYing +run_en.bat ./images/test1.jpg +``` + +#### CLI Parameters and Help + +```shell +python main.py --help +Usage: main.py [OPTIONS] IMG_PATH + + Image Processing CLI Tools + + IMG_PATH: Input Image Path + +Options: + -y, --yolov8-model-path PATH Path to YOLOv8 model + -u, --yunet-model-path PATH Path to YuNet model + -r, --rmbg-model-path PATH Path to RMBG model + -b, --bgr-list BGR_LIST BGR channel values list (comma-separated) + for image composition + -s, --save-path PATH Path to save the output image + -p, --photo-type TEXT Type of photo + --photo-sheet-size [5|6] Size of the photo sheet (5-inch or 6-inch) + -c, --compress / --no-compress Whether to compress the image + -sc, --save-corrected / --no-save-corrected + Whether to save the corrected image + -bg, --change-background / --no-change-background + Whether to change the background + -sb, --save-background / --no-save-background + Whether to save the image with changed + background + -sr, --sheet-rows INTEGER Number of rows in the photo sheet + -sc, --sheet-cols INTEGER Number of columns in the photo sheet + --rotate / --no-rotate Whether to rotate the photo by 90 degrees + -rs, --resize / --no-resize Whether to resize the image + -srz, --save-resized / --no-save-resized + Whether to save the resized image + --help Show this message and exit. +``` + +## Acknowledgments + +The project was created to help my parents complete their work more easily. I would like to thank my parents for their support. + +### Related Projects + +Special thanks to the following projects and contributors for providing models and theories: + +- [Yunnet](https://github.com/ShiqiYu/libfacedetection) +- [RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4) +- [ultralytics](https://github.com/ultralytics/ultralytics) + +You might also be interested in the image compression part, which is another open-source project of mine: + +- [AGPicCompress](https://github.com/aoguai/AGPicCompress) + +It depends on: + +- [mozjpeg](https://github.com/mozilla/mozjpeg) +- [pngquant](https://github.com/kornelski/pngquant) +- [mozjpeg-lossless-optimization](https://github.com/wanadev/mozjpeg-lossless-optimization) + +## Contribution + +LiYing is an open-source project, and community participation is highly welcomed. To contribute to this project, please follow the [Contribution Guide](./CONTRIBUTING.md). + +## License + +[LiYing](https://github.com/aoguai/LiYing) is open-sourced under the AGPL-3.0 license. For details, please refer to the [LICENSE](../LICENSE) file. \ No newline at end of file diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..e7701f5 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,159 @@ +# LiYing + +简体中文 | [English](./README-EN.md) + +LiYing 是一套适用于自动化完成一般照相馆后期流程的照片自动处理的程序。 + +## 介绍 + +LiYing 可以完成人体、人脸自动识别,角度自动纠正,自动更换任意背景色,任意尺寸证件照自动裁切,并自动排版。 + +LiYing 可以完全离线运行。所有图像处理操作都在本地运行。 + +### 简单工作流说明 + +![workflows](../images/workflows.png) + +### 效果展示 + +| ![test1](../images/test1.jpg) | ![test2](../images/test2.jpg) | ![test3](../images/test3.jpg) | +| ----------------------------- | ---------------------------- | ---------------------------- | +| ![test1_output_sheet](../images/test1_output_sheet.jpg)(1寸-5寸相片纸-3*3) | ![test2_output_sheet](../images/test2_output_sheet.jpg)(2寸-5寸相片纸-2*2) | ![test3_output_sheet](../images/test3_output_sheet.jpg)(1寸-6寸相片纸-4*2) | + +**注:本项目仅针对证件照图像处理,而非要求任意照片图像都可以完美执行,所以该项目的输入图片应该是符合一般要求的单人肖像照片。** + +**如果您使用复杂图片制作证件照出现意外情况属于正常现象。** + +## 开始使用 + +### 整合包 + +如果你是 Windows 用户且没有代码阅览需求,可以[下载整合包](https://github.com/aoguai/LiYing/releases/latest)(已在 Windows 7 SP1 & Windows 10),解压将图片或目录拖入 run.bat 即可启动 LiYing。 + +### 设置和安装 + +您可以按照以下说明进行安装和配置,从而在本地环境中使用 LiYing。 + +#### 先决条件 + +LiYing 依赖于 AGPicCompress ,而 AGPicCompress 需要依赖于 mozjpeg 和 pngquant + +其中你可能需要手动安装 pngquant,你可以参考 [pngquant 官方文档](https://pngquant.org/)并将其添加到对应位置 + +LiYing 会在以下位置检测 pngquant 是否存在,你可以自由配置 +- 环境变量(推荐) +- LiYing/src 目录下 +- LiYing/src 目录下的 `ext` 目录 + +以便 AGPicCompress 能够找到 pngquant 并使用它进行 PNG 图片的压缩。 + +#### Microsoft Visual C++ Redistributable 依赖 + +您需要安装最新 [Microsoft Visual C++ Redistributable 依赖](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist) + + +如果您使用的是 Windows 系统,您的最低版本应该是 Windows 7 SP1 及以上。 + +#### 从源码构建 + +您可以通过以下方式获取 LiYing 项目的代码: + +```shell +git clone https://github.com/aoguai/LiYing +cd LiYing ## 进入 LiYing 目录 +pip install -r requirements.txt # install Python helpers' dependencies +``` + +注: 如果您使用的是 Windows 7 系统请您至少需要是 Windows 7 SP1 以上版本,且要求 `onnxruntime==1.14.0` + +#### 下载对应模型 + +您需要下载该项目使用到的模型并将其放置在 `LiYing/src/model` 中。或者您可以在 CIL 中指定模型路径。 + +| 用途 | 模型名称 | 下载链接 | 来源 | +|------------------------|--------------------|------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------| +| 人脸识别 | Yunnet | [下载链接](https://github.com/opencv/opencv_zoo/blob/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx) | [Yunnet](https://github.com/ShiqiYu/libfacedetection) | +| 主体识别替换背景 | RMBG-1.4 | [下载链接](https://huggingface.co/briaai/RMBG-1.4/blob/main/onnx/model.onnx) | [RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4) | +| 人体识别 | yolov8n-pose | [下载链接](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-pose.pt) | [ultralytics](https://github.com/ultralytics/ultralytics) | + +**注: 对于 yolov8n-pose 模型,您需要将其导出为 ONNX 模型,您可以参考[官方文档](https://docs.ultralytics.com/integrations/onnx/)实现** + +#### 运行 + +```shell +cd LiYing/src +python main.py --help +``` + +对于 Window 用户,项目提供了 bat 运行脚本方便您使用: + +```shell +cd LiYing +run.bat ./images/test1.jpg +``` + +#### CIL 参数信息与帮助 +```shell +python main.py --help +Usage: main.py [OPTIONS] IMG_PATH + + 图像处理 CLI 工具 + + IMG_PATH: 输入图像路径 + +Options: + -y, --yolov8-model-path PATH YOLOv8 模型路径 + -u, --yunet-model-path PATH YuNet 模型路径 + -r, --rmbg-model-path PATH RMBG 模型路径 + -b, --bgr-list BGR_LIST BGR 通道值列表(逗号分隔),用于图像合成 + -s, --save-path PATH 保存路径 + -p, --photo-type TEXT 照片类型 + --photo-sheet-size [5|6] 选择照片表格的尺寸(五寸或六寸) + -c, --compress / --no-compress 是否压缩图像 + -sc, --save-corrected / --no-save-corrected + 是否保存修正图像后的图片 + -bg, --change-background / --no-change-background + 是否替换背景 + -sb, --save-background / --no-save-background + 是否保存替换背景后的图像 + -sr, --sheet-rows INTEGER 照片表格的行数 + -sc, --sheet-cols INTEGER 照片表格的列数 + --rotate / --no-rotate 是否旋转照片90度 + -rs, --resize / --no-resize 是否调整图像尺寸 + -srz, --save-resized / --no-save-resized + 是否保存调整尺寸后的图像 + --help Show this message and exit. + +``` + +## 致谢 + +该项目的制作初衷和项目名称来源于帮助我的父母更轻松的完成他们的工作,在此感谢我的父母。 + +### 相关 + +同时特别感谢以下项目和贡献者: + +提供模型与理论 + +- [Yunnet](https://github.com/ShiqiYu/libfacedetection) +- [RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4) +- [ultralytics](https://github.com/ultralytics/ultralytics) + +或许你会对图片压缩部分感兴趣,那是我另一个开源项目 + +- [AGPicCompress](https://github.com/aoguai/AGPicCompress) + +它依赖于 + +- [mozjpeg](https://github.com/mozilla/mozjpeg) +- [pngquant](https://github.com/kornelski/pngquant) +- [mozjpeg-lossless-optimization](https://github.com/wanadev/mozjpeg-lossless-optimization) + +## 贡献 + +LiYing 是一个开源项目,非常欢迎社区的参与。要为该项目做出贡献,请遵循[贡献指南](./CONTRIBUTING.md)。 + +## License 说明 + +[LiYing](https://github.com/aoguai/LiYing) 使用 AGPL-3.0 license 进行开源,详情请参阅 [LICENSE](../LICENSE) 文件。 diff --git a/images/test1.jpg b/images/test1.jpg new file mode 100644 index 0000000..040c19f Binary files /dev/null and b/images/test1.jpg differ diff --git a/images/test1_output_background.jpg b/images/test1_output_background.jpg new file mode 100644 index 0000000..f0f7166 Binary files /dev/null and b/images/test1_output_background.jpg differ diff --git a/images/test1_output_corrected.jpg b/images/test1_output_corrected.jpg new file mode 100644 index 0000000..ed5070a Binary files /dev/null and b/images/test1_output_corrected.jpg differ diff --git a/images/test1_output_resized.jpg b/images/test1_output_resized.jpg new file mode 100644 index 0000000..1e908df Binary files /dev/null and b/images/test1_output_resized.jpg differ diff --git a/images/test1_output_sheet.jpg b/images/test1_output_sheet.jpg new file mode 100644 index 0000000..32b9717 Binary files /dev/null and b/images/test1_output_sheet.jpg differ diff --git a/images/test2.jpg b/images/test2.jpg new file mode 100644 index 0000000..74b11fb Binary files /dev/null and b/images/test2.jpg differ diff --git a/images/test2_output_sheet.jpg b/images/test2_output_sheet.jpg new file mode 100644 index 0000000..79e6000 Binary files /dev/null and b/images/test2_output_sheet.jpg differ diff --git a/images/test3.jpg b/images/test3.jpg new file mode 100644 index 0000000..97ab45b Binary files /dev/null and b/images/test3.jpg differ diff --git a/images/test3_output_sheet.jpg b/images/test3_output_sheet.jpg new file mode 100644 index 0000000..8028e04 Binary files /dev/null and b/images/test3_output_sheet.jpg differ diff --git a/images/workflows.png b/images/workflows.png new file mode 100644 index 0000000..18fa9f1 Binary files /dev/null and b/images/workflows.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e690e5a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +click +colorama +mozjpeg_lossless_optimization +onnxruntime>=1.14.0 +Pillow +opencv-python +numpy \ No newline at end of file diff --git a/run.bat b/run.bat new file mode 100644 index 0000000..6edf3e4 --- /dev/null +++ b/run.bat @@ -0,0 +1,126 @@ +@echo off +setlocal enabledelayedexpansion + +REM ȡǰļĿ¼ +set SCRIPT_DIR=%~dp0 + +REM Python·ĿĿ¼ +set PYTHON_EXE=%SCRIPT_DIR%_myPython\python.exe +set SCRIPT_PATH=%SCRIPT_DIR%src\main.py + +REM ǷϷļĿ¼ +if "%~1"=="" ( + echo ϷͼƬļĿ¼˽ű + pause + exit /b +) + +REM ȡϷŵ· +set INPUT_PATH=%~1 + +echo LiYing +echo Github: https://github.com/aoguai/LiYing +echo LICENSE: AGPL-3.0 license +echo ---------------------------------------- +REM ѯû +REM ѯûǷͼߴ +set /p "resize=Ƿͼߴ磨yes/noĬΪ yes" +if /i "!resize!"=="no" ( + set resize=--no-resize + REM ûѡ񲻵ߴ磬Ĭϲߴͼ + set save_resized=--no-save-resized +) else ( + set resize=--resize + REM ûѡߴû루ĬyesѯǷ񱣴ͼ + set /p "save_resized=Ƿ񱣴ߴͼyes/noĬΪ no" + if /i "!save_resized!"=="yes" ( + set save_resized=--save-resized + ) else ( + set save_resized=--no-save-resized + ) + REM ѯƬ + set /p "photo_type= ƬͣĬΪ һƬ" + if "!photo_type!"=="" set photo_type="һƬ" +) + +set /p "photo-sheet-size= Ƭֽߴ磨ĬΪ 5磩" +if "!photo-sheet-size!"=="" set photo-sheet-size="5" + +set /p "compress=Ƿѹͼyes/noĬΪ yes" +if /i "!compress!"=="no" set compress=--no-compress +if /i "!compress!"=="yes" set compress=--compress +if /i "!compress!"=="" set compress=--compress + +set /p "save_corrected=Ƿ񱣴ͼͼƬyes/noĬΪ no" +if /i "!save_corrected!"=="yes" set save_corrected=--save-corrected +if /i "!save_corrected!"=="no" set save_corrected=--no-save-corrected + +set /p "change_background=Ƿ滻yes/noĬΪ yes" +if /i "!change_background!"=="no" ( + set change_background=--no-change-background + REM ûѡ滻Ĭϲ滻ͼ + set save_background=--no-save-background +) else ( + set change_background=--change-background + REM ѯʱɫ + set /p "bgr_list= BGR ֵͨбŷָĬΪ 1.0,1.0,1.0" + if "!bgr_list!"=="" set bgr_list=1.0,0.0,0.0 + if "!bgr_list!"=="" set bgr_list=0.05,0.36,0.647 + if "!bgr_list!"=="" set bgr_list=1.0,1.0,1.0 + if "!bgr_list!"=="" set bgr_list=1.0,1.0,1.0 + REM ѯǷ񱣴ͼ + set /p "save_background=Ƿ񱣴滻ͼyes/noĬΪ no" + if /i "!save_background!"=="yes" ( + set save_background=--save-background + ) else ( + set save_background=--no-save-background + ) +) + +set /p "sheet_rows= ƬĬΪ 3" +if "!sheet_rows!"=="" set sheet_rows=3 + +set /p "sheet_cols= ƬĬΪ 3" +if "!sheet_cols!"=="" set sheet_cols=3 + +set /p "rotate=ǷתƬ90ȣyes/noĬΪ no" +if /i "!rotate!"=="yes" set rotate=--rotate +if /i "!rotate!"=="no" set rotate= + +REM жϷŵļĿ¼ +if exist "%INPUT_PATH%\" ( + REM Ŀ¼е jpg png ļ + for %%f in ("%INPUT_PATH%\*.jpg" "%INPUT_PATH%\*.png") do ( + REM ȡļ·ļ + set "INPUT_FILE=%%~ff" + set "OUTPUT_PATH=%%~dpnf_output%%~xf" + + REM ִPythonűͼ + "%PYTHON_EXE%" "%SCRIPT_PATH%" "%%~ff" ^ + -b "%bgr_list%" ^ + -s "%%~dpnf_output%%~xf" ^ + -p "%photo_type%" ^ + --photo-sheet-size "%photo-sheet-size%"^ + %compress% %save_corrected% %change_background% %save_background% ^ + -sr %sheet_rows% -sc %sheet_cols% ^ + %rotate% %resize% %save_resized% + ) +) else ( + REM ļֱӴļ + set INPUT_DIR=%~dp1 + set INPUT_FILE=%~nx1 + set OUTPUT_PATH=%INPUT_DIR%%~n1_output%~x1 + + REM ʹ setlocal enabledelayedexpansion Ҫʹ !! ñ + start "" "%PYTHON_EXE%" "%SCRIPT_PATH%" "!INPUT_PATH!" ^ + -b "%bgr_list%" ^ + -s "!OUTPUT_PATH!" ^ + -p "%photo_type%" ^ + --photo-sheet-size "%photo-sheet-size%"^ + %compress% %save_corrected% %change_background% %save_background% ^ + -sr %sheet_rows% -sc %sheet_cols% ^ + %rotate% %resize% %save_resized% +) + +pause + diff --git a/run_en.bat b/run_en.bat new file mode 100644 index 0000000..f459c0b --- /dev/null +++ b/run_en.bat @@ -0,0 +1,126 @@ +@echo off +set CLI_LANGUAGE=en +setlocal enabledelayedexpansion + +REM Get the current batch file directory +set SCRIPT_DIR=%~dp0 + +REM Set Python interpreter path and project directory +set PYTHON_EXE=%SCRIPT_DIR%_myPython\python.exe +set SCRIPT_PATH=%SCRIPT_DIR%src\main.py + +REM Check if files or directories were dragged and dropped +if "%~1"=="" ( + echo Please drag and drop image files or directories onto this script + pause + exit /b +) + +REM Get the dropped path +set INPUT_PATH=%~1 + +echo LiYing +echo Github: https://github.com/aoguai/LiYing +echo LICENSE: AGPL-3.0 license +echo ---------------------------------------- +REM Prompt user for input parameters +REM Ask the user whether to resize images +set /p "resize=Resize images (yes/no, default is yes):" +if /i "!resize!"=="no" ( + set resize=--no-resize + REM If the user chooses not to resize, do not save the resized image by default + set save_resized=--no-save-resized +) else ( + set resize=--resize + REM If the user chooses to resize or enters nothing (default yes), ask whether to save the resized image + set /p "save_resized=Save resized images (yes/no, default is no):" + if /i "!save_resized!"=="yes" ( + set save_resized=--save-resized + ) else ( + set save_resized=--no-save-resized + ) + REM Ask for photo type + set /p "photo_type=Enter photo type (default is passport photo):" + if "!photo_type!"=="" set photo_type="passport photo" +) + +set /p "photo-sheet-size=Enter photo sheet size (default is 5 inches):" +if "!photo-sheet-size!"=="" set photo-sheet-size="5 inches" + +set /p "compress=Compress images (yes/no, default is yes):" +if /i "!compress!"=="no" set compress=--no-compress +if /i "!compress!"=="yes" set compress=--compress +if /i "!compress!"=="" set compress=--compress + +set /p "save_corrected=Save corrected images (yes/no, default is no):" +if /i "!save_corrected!"=="yes" set save_corrected=--save-corrected +if /i "!save_corrected!"=="no" set save_corrected=--no-save-corrected + +set /p "change_background=Change background (yes/no, default is yes):" +if /i "!change_background!"=="no" ( + set change_background=--no-change-background + REM If the user chooses not to change the background, do not save the changed background image by default + set save_background=--no-save-background +) else ( + set change_background=--change-background + REM Ask for background color + set /p "bgr_list=Enter BGR channel values (comma separated, default is 1.0,1.0,1.0):" + if "!bgr_list!"=="red" set bgr_list=1.0,0.0,0.0 + if "!bgr_list!"=="blue" set bgr_list=0.05,0.36,0.647 + if "!bgr_list!"=="white" set bgr_list=1.0,1.0,1.0 + if "!bgr_list!"=="" set bgr_list=1.0,1.0,1.0 + REM Ask whether to save the changed background image + set /p "save_background=Save images with changed background (yes/no, default is no):" + if /i "!save_background!"=="yes" ( + set save_background=--save-background + ) else ( + set save_background=--no-save-background + ) +) + +set /p "sheet_rows=Enter the number of rows in the photo sheet (default is 3):" +if "!sheet_rows!"=="" set sheet_rows=3 + +set /p "sheet_cols=Enter the number of columns in the photo sheet (default is 3):" +if "!sheet_cols!"=="" set sheet_cols=3 + +set /p "rotate=Rotate photos 90 degrees (yes/no, default is no):" +if /i "!rotate!"=="yes" set rotate=--rotate +if /i "!rotate!"=="no" set rotate= + +REM Check if the dropped item is a file or a directory +if exist "%INPUT_PATH%\" ( + REM If it's a directory, iterate through all jpg and png files in it + for %%f in ("%INPUT_PATH%\*.jpg" "%INPUT_PATH%\*.png") do ( + REM Extract folder path and file name + set "INPUT_FILE=%%~ff" + set "OUTPUT_PATH=%%~dpnf_output%%~xf" + + REM Execute Python script to process the image + "%PYTHON_EXE%" "%SCRIPT_PATH%" "%%~ff" ^ + -b "%bgr_list%" ^ + -s "%%~dpnf_output%%~xf" ^ + -p "%photo_type%" ^ + --photo-sheet-size "%photo-sheet-size%"^ + %compress% %save_corrected% %change_background% %save_background% ^ + -sr %sheet_rows% -sc %sheet_cols% ^ + %rotate% %resize% %save_resized% + ) +) else ( + REM If it's a file, process the file directly + set INPUT_DIR=%~dp1 + set INPUT_FILE=%~nx1 + set OUTPUT_PATH=%INPUT_DIR%%~n1_output%~x1 + + REM Due to setlocal enabledelayedexpansion, use !variable_name! to reference variables + start "" "%PYTHON_EXE%" "%SCRIPT_PATH%" "!INPUT_PATH!" ^ + -b "%bgr_list%" ^ + -s "!OUTPUT_PATH!" ^ + -p "%photo_type%" ^ + --photo-sheet-size "%photo-sheet-size%"^ + %compress% %save_corrected% %change_background% %save_background% ^ + -sr %sheet_rows% -sc %sheet_cols% ^ + %rotate% %resize% %save_resized% +) + +pause diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..7c69b92 --- /dev/null +++ b/src/main.py @@ -0,0 +1,145 @@ +import click +import os +import sys + +# 设置项目根目录 +PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) +DATA_DIR = os.path.join(PROJECT_ROOT, 'data') +MODEL_DIR = os.path.join(PROJECT_ROOT, 'model') +TOOL_DIR = os.path.join(PROJECT_ROOT, 'tool') +# 将数据目录和模型目录添加到 sys.path 中 +sys.path.append(DATA_DIR) +sys.path.append(MODEL_DIR) +sys.path.append(TOOL_DIR) + +# 设置 src 目录为 PYTHONPATH +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src'))) + +from tool.ImageProcessor import ImageProcessor +from tool.PhotoSheetGenerator import PhotoSheetGenerator + + +class BGRListType(click.ParamType): + name = 'bgr_list' + + def convert(self, value, param, ctx): + if value: + try: + return tuple(float(x) for x in value.split(',')) + except ValueError: + self.fail(f'{value} is not a valid BGR list format. Expected format: FLOAT,FLOAT,FLOAT.') + return (1.0, 1.0, 1.0) # 默认值 + + +def get_language(): + return os.getenv('CLI_LANGUAGE', 'en') # 默认语言为英文 + + +# 定义多语言支持的输出信息 +messages = { + 'en': { + 'corrected_saved': 'Corrected image saved to {path}', + 'background_saved': 'Background-changed image saved to {path}', + 'resized_saved': 'Resized image saved to {path}', + 'sheet_saved': 'Photo sheet saved to {path}', + }, + 'zh': { + 'corrected_saved': '裁剪并修正后的图像已保存到 {path}', + 'background_saved': '替换背景后的图像已保存到 {path}', + 'resized_saved': '调整尺寸后的图像已保存到 {path}', + 'sheet_saved': '照片表格已保存到 {path}', + } +} + +def echo_message(key, **kwargs): + lang = get_language() + message = messages.get(lang, messages['en']).get(key, '') + click.echo(message.format(**kwargs)) + + +@click.command() +@click.argument('img_path', type=click.Path(exists=True, resolve_path=True)) +@click.option('-y', '--yolov8-model-path', type=click.Path(), + default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'model/yolov8n-pose.onnx'), + help='Path to YOLOv8 model' if get_language() == 'en' else 'YOLOv8 模型路径') +@click.option('-u', '--yunet-model-path', type=click.Path(), + default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'model/face_detection_yunet_2023mar.onnx'), + help='Path to YuNet model' if get_language() == 'en' else 'YuNet 模型路径') +@click.option('-r', '--rmbg-model-path', type=click.Path(), + default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'model/RMBG-1.4-model.onnx'), + help='Path to RMBG model' if get_language() == 'en' else 'RMBG 模型路径') +@click.option('-b', '--bgr-list', type=BGRListType(), default='1.0,1.0,1.0', + help='BGR channel values list (comma-separated) for image composition' if get_language() == 'en' else 'BGR 通道值列表(逗号分隔),用于图像合成') +@click.option('-s', '--save-path', type=click.Path(), default='output.jpg', + help='Path to save the output image' if get_language() == 'en' else '保存路径') +@click.option('-p', '--photo-type', type=str, default='一寸照片', + help='Type of photo' if get_language() == 'en' else '照片类型') +@click.option('--photo-sheet-size', type=click.Choice(['5', '6'], case_sensitive=False), default='5', + help='Size of the photo sheet (5-inch or 6-inch)' if get_language() == 'en' else '选择照片表格的尺寸(五寸或六寸)') +@click.option('-c', '--compress/--no-compress', default=False, + help='Whether to compress the image' if get_language() == 'en' else '是否压缩图像') +@click.option('-sc', '--save-corrected/--no-save-corrected', default=False, + help='Whether to save the corrected image' if get_language() == 'en' else '是否保存修正图像后的图片') +@click.option('-bg', '--change-background/--no-change-background', default=False, + help='Whether to change the background' if get_language() == 'en' else '是否替换背景') +@click.option('-sb', '--save-background/--no-save-background', default=False, + help='Whether to save the image with changed background' if get_language() == 'en' else '是否保存替换背景后的图像') +@click.option('-sr', '--sheet-rows', type=int, default=3, + help='Number of rows in the photo sheet' if get_language() == 'en' else '照片表格的行数') +@click.option('-sc', '--sheet-cols', type=int, default=3, + help='Number of columns in the photo sheet' if get_language() == 'en' else '照片表格的列数') +@click.option('--rotate/--no-rotate', default=False, + help='Whether to rotate the photo by 90 degrees' if get_language() == 'en' else '是否旋转照片90度') +@click.option('-rs', '--resize/--no-resize', default=True, + help='Whether to resize the image' if get_language() == 'en' else '是否调整图像尺寸') +@click.option('-srz', '--save-resized/--no-save-resized', default=False, + help='Whether to save the resized image' if get_language() == 'en' else '是否保存调整尺寸后的图像') +def cli(img_path, yolov8_model_path, yunet_model_path, rmbg_model_path, bgr_list, save_path, photo_type, + photo_sheet_size, compress, save_corrected, + change_background, save_background, sheet_rows, sheet_cols, rotate, resize, save_resized): + """ + 图像处理 CLI 工具 + + IMG_PATH: 输入图像路径 + """ + # 创建图像处理器实例 + processor = ImageProcessor(img_path, yolov8_model_path, yunet_model_path, rmbg_model_path, bgr_list, y_b=compress) + + # 裁剪并修正图像 + processor.crop_and_correct_image() + if save_corrected: + corrected_path = os.path.splitext(save_path)[0] + '_corrected' + os.path.splitext(save_path)[1] + processor.save_photos(corrected_path, compress) + echo_message('corrected_saved', path=corrected_path) + + # 可选的替换背景 + if change_background: + processor.change_background() + if save_background: + background_path = os.path.splitext(save_path)[0] + '_background' + os.path.splitext(save_path)[1] + processor.save_photos(background_path, compress) + echo_message('background_saved', path=background_path) + + # 可选的调整尺寸 + if resize: + processor.resize_image(photo_type) + if save_resized: + resized_path = os.path.splitext(save_path)[0] + '_resized' + os.path.splitext(save_path)[1] + processor.save_photos(resized_path, compress) + echo_message('resized_saved', path=resized_path) + + # 生成照片表格 + # 设置照片表格尺寸 + if photo_sheet_size == '5': + sheet_width, sheet_height = 1050, 1500 + else: + sheet_width, sheet_height = 1300, 1950 + generator = PhotoSheetGenerator([sheet_width, sheet_height]) + photo_sheet_cv = generator.generate_photo_sheet(processor.photo.image, sheet_rows, sheet_cols, rotate) + sheet_path = os.path.splitext(save_path)[0] + '_sheet' + os.path.splitext(save_path)[1] + generator.save_photo_sheet(photo_sheet_cv, sheet_path) + echo_message('sheet_saved', path=sheet_path) + + +if __name__ == "__main__": + cli() diff --git a/src/model/.gitignore b/src/model/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/src/model/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/src/tool/ImageInference.py b/src/tool/ImageInference.py new file mode 100644 index 0000000..9d928a0 --- /dev/null +++ b/src/tool/ImageInference.py @@ -0,0 +1,93 @@ +import cv2 +import numpy as np +import onnxruntime as ort + + +class ImageInference: + def __init__(self, model_path, bgr_list=None): + """ + Initialize the image inference object. + + :param model_path: Path to the ONNX model + :param bgr_list: List of BGR channel values for image composition + """ + if bgr_list is None: + bgr_list = [1.0, 0.0, 0.0] + self.model_path = model_path + try: + self.sess = ort.InferenceSession(model_path) + except Exception as e: + raise RuntimeError(f"Failed to load ONNX model from {model_path}: {e}") + self.rec = [np.zeros((1, 1, 1, 1), dtype=np.float32)] * 4 + self.downsample_ratio = np.array([0.25], dtype=np.float32) # Ensure FP32 + self.bgr = np.array(bgr_list, dtype=np.float32).reshape((3, 1, 1)) # BGR + + def update_bgr(self, bgr_list): + """ + Update BGR channel values. + + :param bgr_list: New list of BGR channel values + """ + if bgr_list is None or not isinstance(bgr_list, (list, tuple)) or len(bgr_list) != 3: + raise ValueError("bgr_list must be a list or tuple containing 3 elements.") + self.bgr = np.array(bgr_list, dtype=np.float32).reshape((3, 1, 1)) + + @staticmethod + def normalize(frame: np.ndarray) -> np.ndarray: + """ + Normalize the image. + + :param frame: Input image (H, W) or (H, W, C) + :return: Normalized image (B=1, C, H, W) + :rtype: np.ndarray + """ + if frame is None: + raise ValueError("Input cannot be None.") + if not isinstance(frame, np.ndarray): + raise TypeError("Input must be a numpy array.") + if frame.ndim == 2: + # If the image is grayscale, expand to 3 channels + img = np.expand_dims(frame, axis=-1) + img = np.repeat(img, 3, axis=-1) + elif frame.ndim == 3 and frame.shape[2] == 3: + img = frame + else: + raise ValueError("Input shape must be (H, W) or (H, W, 3).") + + img = img.astype(np.float32) / 255.0 + img = img[:, :, ::-1] # Convert from RGB to BGR + img = np.transpose(img, (2, 0, 1)) # Transpose to (C, H, W) + img = np.expand_dims(img, axis=0) # Expand to (B=1, C, H, W) + return img + + def infer_rvm_frame(self, image: np.ndarray) -> np.ndarray: + """ + Perform inference on the image using the RVM model. + + :param image: Input image (H, W) or (H, W, C) + :return: Inferred image (H, W, 3) + :rtype: np.ndarray + """ + src = self.normalize(image) + + # Perform inference + try: + fgr, pha, *self.rec = self.sess.run(None, { + "src": src, + "r1i": self.rec[0], + "r2i": self.rec[1], + "r3i": self.rec[2], + "r4i": self.rec[3], + "downsample_ratio": self.downsample_ratio + }) + except Exception as e: + raise RuntimeError(f"ONNX model inference failed: {e}") + + # Compose image + merge_frame = fgr * pha + self.bgr * (1. - pha) # (1, 3, H, W) + merge_frame = merge_frame[0] * 255.0 # (3, H, W) + merge_frame = merge_frame.astype(np.uint8) # Convert to uint8 + merge_frame = np.transpose(merge_frame, (1, 2, 0)) # Transpose to (H, W, 3) + merge_frame = cv2.cvtColor(merge_frame, cv2.COLOR_BGR2RGB) # Convert from BGR to RGB + + return merge_frame diff --git a/src/tool/ImageProcessor.py b/src/tool/ImageProcessor.py new file mode 100644 index 0000000..c495c14 --- /dev/null +++ b/src/tool/ImageProcessor.py @@ -0,0 +1,266 @@ +import os + +import cv2 as cv +import numpy as np + +from ImageSegmentation import ImageSegmentation +from PhotoEntity import PhotoEntity +from PhotoRequirements import PhotoRequirements + + +def get_model_file(filename): + return os.path.join('model', filename) + + +class ImageProcessor: + """ + Image processing class for cropping and correcting the human region in images. + """ + + def __init__(self, img_path, + yolov8_model_path=get_model_file('yolov8n-pose.onnx'), + yunet_model_path=get_model_file('face_detection_yunet_2023mar.onnx'), + RMBG_model_path=get_model_file('RMBG-1.4-model.onnx'), + bgr_list=None, + y_b=False): + """ + Initialize ImageProcessor instance + + :param img_path: Path to the image + :param yolov8_model_path: Path to the YOLOv8 model + :param yunet_model_path: Path to the YuNet model + :param RMBG_model_path: Path to the RMBG model + :param bgr_list: List of BGR channel values for image composition + """ + if not os.path.exists(img_path): + raise FileNotFoundError(f"Image path does not exist: {img_path}") + if not os.path.exists(yolov8_model_path): + raise FileNotFoundError(f"YOLOv8 model path does not exist: {yolov8_model_path}") + if not os.path.exists(yunet_model_path): + raise FileNotFoundError(f"YuNet model path does not exist: {yunet_model_path}") + if not os.path.exists(RMBG_model_path): + raise FileNotFoundError(f"RMBG model path does not exist: {RMBG_model_path}") + + self.photo = PhotoEntity(img_path, yolov8_model_path, yunet_model_path, y_b) + # self.inference = ImageInference(RVM_model_path, bgr_list if bgr_list is not None else [1.0, 1.0, 1.0]) + self.segmentation = ImageSegmentation(model_path=RMBG_model_path, model_input_size=[1024, 1024], + bgr_list=bgr_list if bgr_list is not None else [1.0, 1.0, 1.0]) + self.photo_requirements_detector = PhotoRequirements() + + @staticmethod + def rotate_image(image: np.ndarray, angle: float) -> np.ndarray: + """ + Rotate the image + + :param image: Original image (numpy.ndarray) + :param angle: Rotation angle (degrees) + :return: Rotated image (numpy.ndarray) + """ + if not isinstance(image, np.ndarray): + raise TypeError("The input image must be of type numpy.ndarray") + if not isinstance(angle, (int, float)): + raise TypeError("The rotation angle must be of type int or float") + + height, width = image.shape[:2] + center = (width / 2, height / 2) + matrix = cv.getRotationMatrix2D(center, angle, 1.0) + rotated_image = cv.warpAffine(image, matrix, (width, height), flags=cv.INTER_CUBIC) + return rotated_image + + @staticmethod + def compute_rotation_angle(left_shoulder: tuple, right_shoulder: tuple, image_shape: tuple) -> float: + """ + Compute the rotation angle to align the shoulders horizontally + + :param left_shoulder: Coordinates of the left shoulder keypoint (normalized or pixel coordinates) + :param right_shoulder: Coordinates of the right shoulder keypoint (normalized or pixel coordinates) + :param image_shape: Height and width of the image + :return: Rotation angle (degrees) + :rtype: float + """ + if not (isinstance(left_shoulder, tuple) and len(left_shoulder) == 3): + raise ValueError("The left shoulder keypoint format is incorrect") + if not (isinstance(right_shoulder, tuple) and len(right_shoulder) == 3): + raise ValueError("The right shoulder keypoint format is incorrect") + if not (isinstance(image_shape, tuple) and len(image_shape) == 2): + raise ValueError("The image size format is incorrect") + + height, width = image_shape + + # If coordinates are normalized, convert to pixel coordinates + if left_shoulder[2] < 1.0 and right_shoulder[2] < 1.0: + left_shoulder = (left_shoulder[0] * width, left_shoulder[1] * height) + right_shoulder = (right_shoulder[0] * width, right_shoulder[1] * height) + + dx = right_shoulder[0] - left_shoulder[0] + dy = right_shoulder[1] - left_shoulder[1] + angle = np.arctan2(dy, dx) * (180 / np.pi) # Compute the angle + return angle + + def crop_and_correct_image(self) -> PhotoEntity: + """ + Crop and correct the human region in the image + + :return: Updated PhotoEntity instance + :rtype: PhotoEntity + :raises ValueError: If no single person is detected + """ + if self.photo.person_bbox is not None: + height, width = self.photo.image.shape[:2] + + # Get bounding box coordinates and keypoints + bbox_xyxy = self.photo.person_bbox + x1, y1, x2, y2 = bbox_xyxy + bbox_keypoints = self.photo.person_keypoints + bbox_height = y2 - y1 + + # Get shoulder keypoints + left_shoulder = (bbox_keypoints[18], bbox_keypoints[19], + bbox_keypoints[20]) # bbox_keypoints[5] right shoulder + right_shoulder = (bbox_keypoints[15], bbox_keypoints[16], bbox_keypoints[17]) # bbox_keypoints[6] left shoulder + # print(left_shoulder, right_shoulder) + + # Compute rotation angle + angle = self.compute_rotation_angle(left_shoulder, right_shoulder, (height, width)) + + # Rotate the image + rotated_image = self.rotate_image(self.photo.image, angle) if abs(angle) > 5 else self.photo.image + + # Recalculate crop box position in the rotated image + height, width = rotated_image.shape[:2] + x1, y1, x2, y2 = int(x1 * width / width), int(y1 * height / height), int(x2 * width / width), int( + y2 * height / height) + + # Adjust crop area to ensure the top does not exceed the image range + top_margin = bbox_height / 5 + y1 = max(int(y1), 0) if y1 >= top_margin else 0 + + # If y1 is less than 60 pixels from the top of the face detection box, adjust it + if y1 != 0 and self.photo.face_bbox is not None: + if y1 - self.photo.face_bbox[1] < max(int(height / 600 * 60), 60): + y1 = max(int(y1 - (int(height / 600 * 60))), 0) + + # Adjust the crop area to ensure the lower body is not too long + shoulder_margin = y1 + bbox_height / max(int(height / 600 * 16), 16) + y2 = min(y2, height - int(shoulder_margin)) if left_shoulder[1] > shoulder_margin or right_shoulder[ + 1] > shoulder_margin else y2 + + # Adjust the crop area to ensure the face is centered in the image + left_eye = [bbox_keypoints[6], bbox_keypoints[7], bbox_keypoints[8]] # bbox_keypoints[2] + right_eye = [bbox_keypoints[3], bbox_keypoints[4], bbox_keypoints[5]] # bbox_keypoints[1] + # print(left_eye, right_eye) + face_center_x = (left_eye[0] + right_eye[0]) / 2 + crop_width = x2 - x1 + + x1 = max(int(face_center_x - crop_width / 2), 0) + x2 = min(int(face_center_x + crop_width / 2), width) + + # Ensure the crop area does not exceed the image range + x1 = 0 if x1 < 0 else x1 + x2 = width if x2 > width else x2 + + # print(x1,x2,y1,y2) + + # Crop the image + cropped_image = rotated_image[y1:y2, x1:x2] + + # Update the PhotoEntity object's image and re-detect + self.photo.image = cropped_image + self.photo.detect() + # Manually set the person bounding box to the full image range + self.photo.person_bbox = [0, 0, cropped_image.shape[1], cropped_image.shape[0]] + return self.photo + else: + raise ValueError('No single person detected.') + + def change_background(self, bgr_list=None) -> PhotoEntity: + """ + Replace the background of the human region in the image + + :param bgr_list: New list of BGR channel values + :return: Updated PhotoEntity instance + :rtype: PhotoEntity + """ + if bgr_list is not None: + if not (isinstance(bgr_list, list) and len(bgr_list) == 3): + raise ValueError("The BGR value format is incorrect") + self.segmentation.bgr_list = bgr_list + + self.photo.image = self.segmentation.infer(self.photo.image) + return self.photo + + def resize_image(self, photo_type): + """ + Resize the image proportionally according to the specified photo type. + + :param photo_type: The type of the photo + """ + # Get the target dimensions + width, height, _ = self.photo_requirements_detector.get_resize_image_list(photo_type) + # print(width, height) + + # Get the original image dimensions + orig_height, orig_width = self.photo.image.shape[:2] + # print(orig_width, orig_height) + + # Check if the dimensions are integer multiples + is_width_multiple = (orig_width % width == 0) if orig_width >= width else (width % orig_width == 0) + is_height_multiple = (orig_height % height == 0) if orig_height >= height else (height % orig_height == 0) + + if is_width_multiple and is_height_multiple: + # Resize the image proportionally + self.photo.image = cv.resize(self.photo.image, (width, height), interpolation=cv.INTER_AREA) + return self.photo.image + + def get_crop_coordinates(original_size, aspect_ratio): + original_width, original_height = original_size + crop_width = original_width + crop_height = int(crop_width / aspect_ratio) + + if crop_height > original_height: + crop_height = original_height + crop_width = int(crop_height * aspect_ratio) + + x_start = (original_width - crop_width) // 2 + y_start = 0 + + return x_start, x_start + crop_width, y_start, y_start + crop_height + + x1, x2, y1, y2 = get_crop_coordinates((orig_width, orig_height), width / height) + # print(x1, x2, y1, y2) + + cropped_image = self.photo.image[y1:y2, x1:x2] + + # Update the PhotoEntity object's image + self.photo.image = cropped_image + + # Resize the image proportionally + self.photo.image = cv.resize(self.photo.image, (width, height), interpolation=cv.INTER_AREA) + return self.photo.image + + def save_photos(self, save_path: str, y_b=False) -> None: + """ + Save the image to the specified path. + + :param save_path: The path to save the image + :param y_b: Whether to compress the image + """ + if y_b: + ext = os.path.splitext(save_path)[1].lower() + encode_format = '.jpg' if ext in ['.jpg', '.jpeg'] else '.png' if ext == '.png' else None + if encode_format is None: + raise ValueError(f"Unsupported file format: {ext}") + + is_success, buffer = cv.imencode(encode_format, self.photo.image) + if not is_success: + raise ValueError("Failed to encode the image to bytes") + + image_bytes = buffer.tobytes() + + compressed_bytes = self.photo.ImageCompressor_detector.compress_image_from_bytes(image_bytes) + + compressed_image = cv.imdecode(np.frombuffer(compressed_bytes, np.uint8), cv.IMREAD_COLOR) + self.photo.image = compressed_image + + cv.imwrite(save_path, self.photo.image) + diff --git a/src/tool/ImageSegmentation.py b/src/tool/ImageSegmentation.py new file mode 100644 index 0000000..90e0f98 --- /dev/null +++ b/src/tool/ImageSegmentation.py @@ -0,0 +1,107 @@ +import onnxruntime as ort +import numpy as np +from PIL import Image + + +def bgr_to_rgba(bgr): + if not isinstance(bgr, (list, tuple)) or len(bgr) != 3: + raise ValueError("bgr must be a list or tuple containing three elements") + if any(not (0 <= color <= 1) for color in bgr): + raise ValueError("bgr values must be in the range [0, 1]") + + blue, green, red = bgr + + # Normalize to the 255 range + red = int(red * 255) + green = int(green * 255) + blue = int(blue * 255) + + # Add Alpha channel (fully opaque) + alpha = 255 + + # Return RGBA values + return red, green, blue, alpha + + +class ImageSegmentation: + def __init__(self, model_path: str, model_input_size: list, bgr_list: list): + if not isinstance(model_path, str) or not model_path.endswith('.onnx'): + raise ValueError("model_path must be a valid ONNX model file path") + if not isinstance(model_input_size, list) or len(model_input_size) != 2: + raise ValueError("model_input_size must be a list with two elements") + if any(not isinstance(size, int) or size <= 0 for size in model_input_size): + raise ValueError("model_input_size elements must be positive integers") + + # Initialize model path and input size + self.model_path = model_path + self.model_input_size = model_input_size + try: + self.ort_session = ort.InferenceSession(model_path) + except Exception as e: + raise RuntimeError(f"Failed to load ONNX model: {e}") + + self.bgr_list = bgr_to_rgba(bgr_list) + + def preprocess_image(self, im: np.ndarray) -> np.ndarray: + # If the image is grayscale, add a dimension to make it a color image + if len(im.shape) < 3: + im = im[:, :, np.newaxis] + # Resize the image to match the model input size + try: + im_resized = np.array(Image.fromarray(im).resize(self.model_input_size, Image.BILINEAR)) + except Exception as e: + raise RuntimeError(f"Error resizing image: {e}") + # Normalize image pixel values to the [0, 1] range + image = im_resized.astype(np.float32) / 255.0 + # Further normalize image data + mean = np.array([0.5, 0.5, 0.5], dtype=np.float32) + std = np.array([1.0, 1.0, 1.0], dtype=np.float32) + image = (image - mean) / std + # Convert the image to the required shape + image = image.transpose(2, 0, 1) # Change dimension order (channels, height, width) + return np.expand_dims(image, axis=0) # Add batch dimension + + def postprocess_image(self, result: np.ndarray, im_size: list) -> np.ndarray: + # Resize the result image to match the original image size + result = np.squeeze(result) + try: + result = np.array(Image.fromarray(result).resize(im_size, Image.BILINEAR)) + except Exception as e: + raise RuntimeError(f"Error resizing result image: {e}") + # Normalize the result image data + ma = result.max() + mi = result.min() + result = (result - mi) / (ma - mi) + # Convert to uint8 image + im_array = (result * 255).astype(np.uint8) + return im_array + + def infer(self, image: np.ndarray) -> np.ndarray: + # Prepare the input image + orig_im_size = image.shape[0:2] + image_preprocessed = self.preprocess_image(image) + + # Perform inference (image segmentation) + ort_inputs = {self.ort_session.get_inputs()[0].name: image_preprocessed} + try: + ort_outs = self.ort_session.run(None, ort_inputs) + except Exception as e: + raise RuntimeError(f"ONNX inference failed: {e}") + result = ort_outs[0] + + # Post-process the result image + result_image = self.postprocess_image(result, orig_im_size) + + # Save the result image + try: + pil_im = Image.fromarray(result_image).convert("L") + orig_image = Image.fromarray(image).convert("RGBA") + pil_im = pil_im.resize(orig_image.size) + except Exception as e: + raise RuntimeError(f"Error processing images: {e}") + no_bg_image = Image.new("RGBA", orig_image.size, self.bgr_list) + no_bg_image.paste(orig_image, mask=pil_im) + + # Convert to OpenCV image + no_bg_image_cv = np.array(no_bg_image) + return no_bg_image_cv diff --git a/src/tool/PhotoEntity.py b/src/tool/PhotoEntity.py new file mode 100644 index 0000000..dab87b6 --- /dev/null +++ b/src/tool/PhotoEntity.py @@ -0,0 +1,194 @@ +import os +import cv2 as cv +from PIL import Image, ExifTags +import numpy as np + +from yolov8_detector import YOLOv8Detector +from YuNet import FaceDetector +from agpic import ImageCompressor + + +class PhotoEntity: + def __init__(self, img_path, yolov8_model_path, yunet_model_path, y_b=False): + """ + Initialize the PhotoEntity class. + + :param img_path: Path to the image + :param yolov8_model_path: Path to the YOLOv8 model + :param yunet_model_path: Path to the YuNet model + :param y_b: Whether to compress the image, defaults to False + """ + self.img_path = img_path + self.image = self._correct_image_orientation(img_path) + self.yolov8_detector = YOLOv8Detector(yolov8_model_path) + self.face_detector = FaceDetector(yunet_model_path) + self.ImageCompressor_detector = ImageCompressor() + if y_b: + self._compress_image() + + # Initialize detection result attributes + self.person_bbox = None + self.person_label = None + self.person_keypoints = None + self.person_width = None + self.person_height = None + self.face_bbox = None + self.face_width = None + self.face_height = None + self.detect() + + def _correct_image_orientation(self, image_path): + # Open the image and read EXIF information + image = Image.open(image_path) + try: + exif = image._getexif() + if exif is not None: + # Get EXIF tags + for tag, value in exif.items(): + if tag in ExifTags.TAGS: + if ExifTags.TAGS[tag] == 'Orientation': + orientation = value + # Adjust the image based on orientation + if orientation == 3: + image = image.rotate(180, expand=True) + elif orientation == 6: + image = image.rotate(270, expand=True) + elif orientation == 8: + image = image.rotate(90, expand=True) + except (AttributeError, KeyError, IndexError) as e: + raise e + + # Convert Pillow image object to OpenCV image object + image_np = np.array(image) + # OpenCV defaults to BGR format, so convert to RGB + image_np = cv.cvtColor(image_np, cv.COLOR_RGB2BGR) + + return image_np + + def _compress_image(self): + """ + Compress the image to reduce memory usage. + """ + ext = os.path.splitext(self.img_path)[1].lower() + encode_format = '.jpg' if ext in ['.jpg', '.jpeg'] else '.png' + + # Convert OpenCV image to byte format + is_success, buffer = cv.imencode(encode_format, self.image) + if not is_success: + raise ValueError("Failed to encode the image to byte format") + + image_bytes = buffer.tobytes() + + # Call compress_image_from_bytes function to compress the image + compressed_bytes = self.ImageCompressor_detector.compress_image_from_bytes(image_bytes) + + # Convert the compressed bytes back to OpenCV image format + self.image = cv.imdecode(np.frombuffer(compressed_bytes, np.uint8), cv.IMREAD_COLOR) + + def detect(self, detect_person=True, detect_face=True): + """ + Detect persons and faces in the image. + + :param detect_person: Whether to detect persons, defaults to True + :param detect_face: Whether to detect faces, defaults to True + """ + if detect_person: + self.detect_person() + if detect_face: + self.detect_face() + + def detect_person(self): + """ + Detect persons in the image. + """ + person_result, original_img = self.yolov8_detector.detect_person(self.img_path) + if person_result: + self.person_bbox = person_result['bbox_xyxy'] + self.person_label = person_result['bbox_label'] + self.person_keypoints = person_result['bbox_keypoints'] + self.person_width = self.person_bbox[2] - self.person_bbox[0] + self.person_height = self.person_bbox[3] - self.person_bbox[1] + else: + self._reset_person_data() + + def detect_face(self): + """ + Detect faces in the image. + """ + face_results = self.face_detector.process_image(self.img_path) + if not (face_results is None) and len(face_results) > 0: + self.face_bbox = face_results[0][:4].astype('uint32') + self.face_width = int(self.face_bbox[2]) - int(self.face_bbox[0]) + self.face_height = int(self.face_bbox[3]) - int(self.face_bbox[1]) + else: + self._reset_face_data() + + def _reset_person_data(self): + """ + Reset person detection data. + """ + self.person_bbox = None + self.person_label = None + self.person_keypoints = None + self.person_width = None + self.person_height = None + + def _reset_face_data(self): + """ + Reset face detection data. + """ + self.face_bbox = None + self.face_width = None + self.face_height = None + + def set_img_path(self, img_path): + """ + Set the image path and re-detect. + + :param img_path: New image path + """ + self.img_path = img_path + self.image = cv.imdecode(np.fromfile(img_path, dtype=np.uint8), cv.IMREAD_COLOR) + self.detect() + + def set_yolov8_model_path(self, model_path): + """ + Set the YOLOv8 model path and re-detect. + + :param model_path: New YOLOv8 model path + """ + self.yolov8_detector = YOLOv8Detector(model_path) + self.detect() + + def set_yunet_model_path(self, model_path): + """ + Set the YuNet model path and re-detect. + + :param model_path: New YuNet model path + """ + self.face_detector = FaceDetector(model_path) + self.detect() + + def manually_set_person_data(self, bbox, label, keypoints): + """ + Manually set person detection data. + + :param bbox: Person bounding box + :param label: Person label + :param keypoints: Person keypoints + """ + self.person_bbox = bbox + self.person_label = label + self.person_keypoints = keypoints + self.person_width = self.person_bbox[2] - self.person_bbox[0] + self.person_height = self.person_bbox[3] - self.person_bbox[1] + + def manually_set_face_data(self, bbox): + """ + Manually set face detection data. + + :param bbox: Face bounding box + """ + self.face_bbox = bbox + self.face_width = self.face_bbox[2] - self.face_bbox[0] + self.face_height = self.face_bbox[3] - self.face_bbox[1] diff --git a/src/tool/PhotoRequirements.py b/src/tool/PhotoRequirements.py new file mode 100644 index 0000000..c7e5217 --- /dev/null +++ b/src/tool/PhotoRequirements.py @@ -0,0 +1,58 @@ +import configparser +import os + + +class PhotoRequirements: + def __init__(self, config_file=None): + if config_file is None: + config_file = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + 'data', 'data.ini' + ) + + if not os.path.isfile(config_file): + raise FileNotFoundError(f"配置文件 {config_file} 不存在") + + self.config_file = config_file + self.config = configparser.ConfigParser() + try: + with open(config_file, 'r', encoding='utf-8') as file: + self.config.read_file(file) + except Exception as e: + raise IOError(f"读取配置文件时出错: {e}") + + def get_requirements(self, photo_type): + if not isinstance(photo_type, str): + raise TypeError("photo_type必须是字符串。") + + if photo_type in self.config: + requirements = self.config[photo_type] + return { + '打印尺寸': requirements.get('打印尺寸', 'N/A'), + '电子版尺寸': requirements.get('电子版尺寸', 'N/A'), + '分辨率': requirements.get('分辨率', 'N/A'), + '文件格式': requirements.get('文件格式', 'N/A'), + '文件大小': requirements.get('文件大小', 'N/A') + } + else: + return None + + def list_photo_types(self): + return self.config.sections() + + def get_resize_image_list(self, photo_type): + requirements = self.get_requirements(photo_type) + if not requirements: + print("未找到指定的照片类型。") + return None + + electronic_size = requirements['电子版尺寸'].replace("dpi", "") + if electronic_size == 'N/A': + return "300" + + try: + width, height = map(int, electronic_size.replace("px", "").split(' x ')) + except ValueError: + raise ValueError(f"电子尺寸格式无效: {electronic_size}") + + return [width, height, electronic_size] \ No newline at end of file diff --git a/src/tool/PhotoSheetGenerator.py b/src/tool/PhotoSheetGenerator.py new file mode 100644 index 0000000..233d0e1 --- /dev/null +++ b/src/tool/PhotoSheetGenerator.py @@ -0,0 +1,74 @@ +import cv2 +from PIL import Image, ImageDraw +import numpy as np + + +class PhotoSheetGenerator: + def __init__(self, five_inch_size=(1050, 1500)): + self.five_inch_size = five_inch_size + + @staticmethod + def cv2_to_pillow(cv2_image): + """Convert OpenCV image data to Pillow image""" + cv2_image_rgb = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB) + return Image.fromarray(cv2_image_rgb) + + @staticmethod + def pillow_to_cv2(pillow_image): + """Convert Pillow image to OpenCV image data""" + cv2_image_rgb = cv2.cvtColor(np.array(pillow_image), cv2.COLOR_RGB2BGR) + return cv2_image_rgb + + def generate_photo_sheet(self, one_inch_photo_cv2, rows=3, cols=3, rotate=False): + one_inch_height, one_inch_width = one_inch_photo_cv2.shape[:2] + + # Convert OpenCV image data to Pillow image + one_inch_photo_pillow = self.cv2_to_pillow(one_inch_photo_cv2) + + # Rotate photo + if rotate: + one_inch_photo_pillow = one_inch_photo_pillow.rotate(90, expand=True) + one_inch_height, one_inch_width = one_inch_width, one_inch_height + + # Create photo sheet (white background) + five_inch_photo = Image.new('RGB', self.five_inch_size, 'white') + + # Create photo with black border + bordered_one_inch_photo = Image.new('RGB', (one_inch_width + 2, one_inch_height + 2), 'black') + bordered_one_inch_photo.paste(one_inch_photo_pillow, (1, 1)) + + # Calculate positions for the photos on the sheet + total_width = cols * (one_inch_width + 2) + total_height = rows * (one_inch_height + 2) + + if total_width > self.five_inch_size[0] or total_height > self.five_inch_size[1]: + raise ValueError("The specified layout exceeds the size of the photo sheet") + + start_x = (self.five_inch_size[0] - total_width) // 2 + start_y = (self.five_inch_size[1] - total_height) // 2 + + # Arrange photos on the sheet in an n*m layout + for i in range(rows): + for j in range(cols): + x = start_x + j * (one_inch_width + 2) + y = start_y + i * (one_inch_height + 2) + five_inch_photo.paste(bordered_one_inch_photo, (x, y)) + + # Draw alignment lines + draw = ImageDraw.Draw(five_inch_photo) + draw.rectangle([start_x, start_y, self.five_inch_size[0], self.five_inch_size[1]], outline="black") + + # Return the generated photo sheet as a Pillow image + return self.pillow_to_cv2(five_inch_photo) + + def save_photo_sheet(self, photo_sheet_cv, output_path): + """Save the generated photo sheet as an image file""" + if not isinstance(output_path, str): + raise TypeError("output_path must be a string") + if not output_path.lower().endswith(('.png', '.jpg', '.jpeg')): + raise ValueError("output_path must be a valid image file path ending with .png, .jpg, or .jpeg") + try: + photo_sheet = self.cv2_to_pillow(photo_sheet_cv) + photo_sheet.save(output_path) + except Exception as e: + raise IOError(f"Failed to save photo: {e}") diff --git a/src/tool/YuNet.py b/src/tool/YuNet.py new file mode 100644 index 0000000..b8bb16f --- /dev/null +++ b/src/tool/YuNet.py @@ -0,0 +1,168 @@ +import cv2 as cv +import numpy as np + + +class YuNet: + """ + YuNet face detector class. + + :param model_path: Path to the model file + :type model_path: str + :param input_size: Size of the input image, in the form [w, h], default is [320, 320] + :type input_size: list[int] + :param conf_threshold: Confidence threshold, default is 0.6 + :type conf_threshold: float + :param nms_threshold: Non-maximum suppression threshold, default is 0.3 + :type nms_threshold: float + :param top_k: Number of top detections to keep, default is 5000 + :type top_k: int + :param backend_id: ID of the backend to use, default is 0 + :type backend_id: int + :param target_id: ID of the target device, default is 0 + :type target_id: int + :return: None + :rtype: None + """ + + def __init__(self, model_path, input_size=[320, 320], conf_threshold=0.6, nms_threshold=0.3, top_k=5000, + backend_id=0, + target_id=0): + self._model_path = model_path + self._input_size = tuple(input_size) # [w, h] + self._conf_threshold = conf_threshold + self._nms_threshold = nms_threshold + self._top_k = top_k + self._backend_id = backend_id + self._target_id = target_id + + self._model = cv.FaceDetectorYN.create( + model=self._model_path, + config="", + input_size=self._input_size, + score_threshold=self._conf_threshold, + nms_threshold=self._nms_threshold, + top_k=self._top_k, + backend_id=self._backend_id, + target_id=self._target_id) + + @property + def name(self): + return self.__class__.__name__ + + def set_backend_and_target(self, backend_id, target_id): + """ + Set the backend ID and target ID. + + :param backend_id: Backend ID + :type backend_id: int + :param target_id: Target ID + :type target_id: int + :return: None + :rtype: None + """ + self._backend_id = backend_id + self._target_id = target_id + self._model = cv.FaceDetectorYN.create( + model=self._model_path, + config="", + input_size=self._input_size, + score_threshold=self._conf_threshold, + nms_threshold=self._nms_threshold, + top_k=self._top_k, + backend_id=self._backend_id, + target_id=self._target_id) + + def set_input_size(self, input_size): + """ + Set the size of the input image. + + :param input_size: Size of the input image, in the form [w, h] + :type input_size: list[int] + :return: None + :rtype: None + """ + self._model.setInputSize(tuple(input_size)) + + def infer(self, image): + """ + Perform inference to detect faces in the image. + + :param image: The image to be processed + :type image: numpy.ndarray + :return: Detected face information, a numpy array of shape [n, 15], where each row represents a detected face with 15 elements: [x1, y1, x2, y2, score, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7] + :rtype: numpy.ndarray + """ + # Forward inference + faces = self._model.detect(image) + return faces[1] + + +class FaceDetector: + """ + Face detector class. + + :param model_path: Path to the model file + :type model_path: str + :param conf_threshold: Minimum confidence threshold, default is 0.9 + :type conf_threshold: float + :param nms_threshold: Non-maximum suppression threshold, default is 0.3 + :type nms_threshold: float + :param top_k: Number of top detections to keep, default is 5000 + :type top_k: int + :param backend_id: Backend ID, default is cv2.dnn.DNN_BACKEND_OPENCV + :type backend_id: int + :param target_id: Target ID, default is cv2.dnn.DNN_TARGET_CPU + :type target_id: int + :return: None + :rtype: None + """ + + def __init__(self, model_path, conf_threshold=0.9, nms_threshold=0.3, top_k=5000, + backend_id=cv.dnn.DNN_BACKEND_OPENCV, target_id=cv.dnn.DNN_TARGET_CPU): + self.model = YuNet(model_path=model_path, + input_size=[320, 320], + conf_threshold=conf_threshold, + nms_threshold=nms_threshold, + top_k=top_k, + backend_id=backend_id, + target_id=target_id) + + def process_image(self, image_path, origin_size=False): + """ + Process the image for face detection. + + :param image_path: Path to the image file to be processed + :type image_path: str + :param origin_size: Whether to keep the original size + :type origin_size: bool + :return: Detected face information, a numpy array of shape [n, 15], where each row represents a detected face with 15 elements: [x1, y1, x2, y2, score, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7] + :rtype: numpy.ndarray + """ + image = cv.imdecode(np.fromfile(image_path, dtype=np.uint8), cv.IMREAD_COLOR) + h, w, _ = image.shape + target_size = 320 + max_size = 320 + im_shape = image.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + resize_factor = float(target_size) / float(im_size_min) + + if np.round(resize_factor * im_size_max) > max_size: + resize_factor = float(max_size) / float(im_size_max) + + if origin_size: + resize_factor = 1 + + if resize_factor != 1: + image = cv.resize(image, None, None, fx=resize_factor, fy=resize_factor, interpolation=cv.INTER_LINEAR) + h, w, _ = image.shape + + self.model.set_input_size([w, h]) + results = self.model.infer(image) + if results is not None: + if resize_factor != 1: + results = results[:, :15] / resize_factor + else: + results = [] + + return results diff --git a/src/tool/__init__.py b/src/tool/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tool/agpic.py b/src/tool/agpic.py new file mode 100644 index 0000000..cd1db90 --- /dev/null +++ b/src/tool/agpic.py @@ -0,0 +1,364 @@ +import os +import shutil +import subprocess +import tempfile +import time +import uuid +import warnings +from io import BytesIO +from pathlib import Path + +import click +import mozjpeg_lossless_optimization +from PIL import Image + + +class QualityInteger(click.ParamType): + name = "QualityInteger" + + @staticmethod + def _parse_int(value): + try: + return int(value) + except ValueError: + return None + + def convert(self, value, param, ctx): + """ + Convert a string to an integer or an integer range. + + :param value: The value to convert. + :type value: str + :param param: The parameter. + :type param: XXX + :param ctx: The context. + :type ctx: XXX + :return: The converted integer or integer range. + :rtype: int or tuple[int, int] + """ + if value.isdigit(): + return self._parse_int(value) + parts = value.split('-') + if len(parts) != 2: + raise click.BadParameter(f'"{value}": The parameter does not conform to the format like 80-90 or 90') + min_v, max_v = map(self._parse_int, parts) + if min_v is None or max_v is None or min_v > max_v or min_v <= 0 or max_v <= 0: + raise click.BadParameter(f'"{value}": The parameter does not conform to the format like 80-90 or 90') + return min_v, max_v + + +def generate_output_path(fp, output=None): + """ + Generate the output path. + + :param fp: The file path. + :type fp: Path + :param output: The output path. + :type output: Path or None + :return: The output path. + :rtype: Path + """ + uuid_str = get_uuid("".join(["AGPicCompress", str(time.time()), fp.name])) + new_fp = Path(fp.parent, f"{fp.stem}_{uuid_str}_compressed{fp.suffix}") + + if output: + if output.is_dir(): + # If it is a directory, check if it exists, create it if it doesn't + output.mkdir(parents=True, exist_ok=True) + new_fp = output / new_fp.name + elif output.exists(): + # If it is a file, check if it exists, throw an exception if it does + raise FileExistsError(f'"{output}": already exists') + else: + new_fp = output + + return new_fp + + +def optimize_output_path(fp, output=None, force=False): + """ + Optimize the output path. + + :param fp: File path. + :type fp: Path + :param output: Output path. + :type output: Path + :param force: Whether to force overwrite. + :type force: bool + :return: Output path. + :rtype: Path + """ + if force and output: + if output.is_dir(): + output.mkdir(parents=True, exist_ok=True) + new_fp = output / fp.name + else: + new_fp = output + elif not force and output: + new_fp = generate_output_path(fp, output) + else: + new_fp = generate_output_path(fp) + + return new_fp + + +def find_pngquant_cmd(): + """ + Find and return the executable file path of pngquant. + + :return: The executable file path of pngquant, or None if not found. + :rtype: str or None + """ + pngquant_cmd = shutil.which('pngquant') + if pngquant_cmd: + return pngquant_cmd + exe_extension = '.exe' if os.name == 'nt' else '' + search_paths = [Path(__file__).resolve().parent, Path(__file__).resolve().parent / 'ext'] + for search_path in search_paths: + pngquant_exe_path = search_path / f'pngquant{exe_extension}' + if pngquant_exe_path.exists(): + return str(pngquant_exe_path) + return None + + +def get_uuid(name): + """ + Get the UUID string of the specified string. + + :param name: The name string for UUID generation. + :type name: str + :return: The UUID string generated based on the specified name. + :rtype: str + """ + return str(uuid.uuid3(uuid.NAMESPACE_DNS, name)) + + +class ImageCompressor: + def __init__(self): + pass + + @staticmethod + def compress_image(fp, force=False, quality=None, output=None, webp=False): + """ + Compression function. + + :param fp: File name or directory name. + :type fp: Path + :param force: Whether to overwrite if a file with the same name exists. + :type force: bool + :param quality: Compression quality. 80-90, or 90. + :type quality: int or tuple[int, int] + :param output: Output path or output directory + :type output: Path + :param webp: Whether to convert to WebP format. + :type webp: bool + """ + + # Check if the file exists + if not fp.exists(): + raise FileNotFoundError(f'"{fp}": Path or directory does not exist') + if output: + if not output.is_dir(): + if output.suffix == '': + output.mkdir(parents=True, exist_ok=True) + elif output.suffix.lower() not in ['.png', '.jpg', '.jpeg', '.webp']: + raise ValueError(f'"{output.name}": Unsupported output file format') + elif output.suffix.lower() == '.webp': + webp = True + output = output.with_name(f"{output.stem}_2webp{fp.suffix}") + if output.suffix.lower() != fp.suffix.lower(): + raise ValueError('Inconsistent output file format with input file format') + + if fp.is_dir(): + for file in fp.iterdir(): + if file.is_file() and file.suffix.lower() in ['.png', '.jpg', '.jpeg']: + ImageCompressor.compress_image(file, force, quality, output, webp) + return + + ext = fp.suffix.lower() + if ext == '.png': + ImageCompressor._compress_png(fp, force, quality, output, webp) + elif ext in ['.jpg', '.jpeg']: + ImageCompressor._compress_jpg(fp, force, quality, output, webp) + else: + raise ValueError(f'"{fp.name}": Unsupported output file format') + + @staticmethod + def _convert_to_webp(fp): + """ + Convert an image to WebP format. + + :param fp: Image file path. + :type fp: Path + :return: WebP image file path. + :rtype: Path or None + """ + # Check if the file exists + if Path(fp).exists(): + img = Image.open(fp) + webp_fp = Path(fp).with_suffix('.webp') + img.save(webp_fp, 'webp') + # Delete the original image file + os.remove(fp) + return webp_fp + return None + + @staticmethod + def _compress_png(fp, force=False, quality=None, output=None, webp=False): + """ + Compress PNG images and specify compression quality. + + :param fp: Path of the image file. + :type fp: Path + :param force: Whether to overwrite if a file with the same name exists. Defaults to False. + :type force: bool + :param quality: Compression quality. Defaults to None. + :type quality: int or tuple[int, int] + :param output: Output path. + :type output: Path + :param webp: Whether to convert to WebP format. + :type webp: bool + """ + new_fp = optimize_output_path(fp, output, force) + quality_command = f'--quality {quality}' if isinstance(quality, int) else f'--quality {quality[0]}-{quality[1]}' if isinstance(quality, tuple) else '' + pngquant_cmd = find_pngquant_cmd() + if not pngquant_cmd: + raise FileNotFoundError('pngquant not found. Please ensure pngquant is installed or added to the environment variable') + command = f'{pngquant_cmd} {fp} --skip-if-larger -f -o {new_fp} {quality_command}' + subprocess.run(command, shell=True, check=True) + if not new_fp.exists(): + warnings.warn(f'"{fp}": The compressed image file was not generated successfully. It may no longer be compressible or no longer exist', Warning) + return + if webp: + ImageCompressor._convert_to_webp(new_fp) + + @staticmethod + def _compress_jpg(fp, force=False, quality=None, output=None, webp=False): + """ + Compress JPG images and specify compression quality. + + :param fp: Image file path. + :type fp: Path + :param force: Whether to overwrite if a file with the same name already exists, default is False. + :type force: bool + :param quality: Compression quality, default is None. + :type quality: int or None + :param output: Output path. + :type output: Path + :param webp: Whether to convert to WebP format. + :type webp: bool + """ + if quality is not None and not isinstance(quality, int): + raise ValueError(f'"{quality}": Unsupported type for quality parameter') + new_fp = optimize_output_path(fp, output, force) + with Image.open(fp) as img: + img = img.convert("RGB") + with BytesIO() as buffer: + img.save(buffer, format="JPEG", quality=quality if quality else 75) + input_jpeg_bytes = buffer.getvalue() + optimized_jpeg_bytes = mozjpeg_lossless_optimization.optimize(input_jpeg_bytes) + with open(new_fp, "wb") as output_jpeg_file: + output_jpeg_file.write(optimized_jpeg_bytes) + if not new_fp.exists(): + warnings.warn(f'"{fp}": The compressed image file was not generated successfully. It may no longer be compressible or no longer exist', Warning) + return + if webp: + ImageCompressor._convert_to_webp(new_fp) + + @staticmethod + def compress_image_from_bytes(image_bytes, quality=80, output_format='JPEG', webp=False): + """ + Compresses image data and returns the compressed image data. + + :param image_bytes: The byte representation of the image data. + :type image_bytes: bytes + :param quality: The compression quality, ranging from 1 to 100. + :type quality: int + :param output_format: The output format of the image, default is 'JPEG'. + :type output_format: str + :param webp: Whether to convert to WebP format. + :type webp: bool + :return: The byte representation of the compressed image data. + :rtype: bytes + """ + # Load the image data into a PIL image object in memory + with BytesIO(image_bytes) as img_buffer: + img = Image.open(img_buffer).convert('RGB') + output_buffer = BytesIO() + if output_format.upper() == 'JPEG': + img.save(output_buffer, format=output_format, quality=quality, optimize=True) + compressed_img_bytes = output_buffer.getvalue() + compressed_img_bytes = mozjpeg_lossless_optimization.optimize(compressed_img_bytes) + if webp: + output_buffer = BytesIO() + img = Image.open(BytesIO(compressed_img_bytes)) + img.save(output_buffer, format='webp') + compressed_img_bytes = output_buffer.getvalue() + elif output_format.upper() == 'PNG': + with tempfile.TemporaryDirectory() as temp_dir: + temp_png_file_path = Path(temp_dir) / f'temp_{get_uuid(f"AGPicCompress{time.time()}")}.png' + with open(temp_png_file_path, 'wb') as temp_png_file: + temp_png_file.write(image_bytes) + new_fp = optimize_output_path(temp_png_file_path, Path(temp_dir), False) + pngquant_cmd = find_pngquant_cmd() + if not pngquant_cmd: + raise FileNotFoundError('pngquant not found. Please ensure pngquant is installed or added to the environment variable') + quality_command = f'--quality {quality}' if isinstance(quality, int) else f'--quality {quality[0]}-{quality[1]}' if isinstance(quality, tuple) else '' + command = f'{pngquant_cmd} {temp_png_file_path} --skip-if-larger -f -o {new_fp} {quality_command}' + subprocess.run(command, shell=True, check=True) + if new_fp.exists(): + with open(new_fp, 'rb') as compressed_img_file: + compressed_img_bytes = compressed_img_file.read() + else: + warnings.warn('The compressed image file was not generated successfully. It may no longer be compressible or no longer exist', Warning) + return None + if webp: + output_buffer = BytesIO() + img = Image.open(BytesIO(compressed_img_bytes)) + img.save(output_buffer, format='webp') + compressed_img_bytes = output_buffer.getvalue() + else: + raise ValueError(f'"{output_format}": Unsupported output file format') + return compressed_img_bytes + + @staticmethod + @click.command() + @click.argument('fp') + @click.option( + "--force", "-f", "--violent", + is_flag=True, + help="Whether to overwrite if a file with the same name exists, defaults to False." + ) + @click.option('--quality', "-q", default="80", type=QualityInteger(), + help="Compression quality. 80-90, or 90, default is 80.") + @click.option('--output', '-o', help='Output path or output directory.') + @click.option('--webp', is_flag=True, help='Convert images to WebP format, default is False.') + def cli_compress(fp, force=False, quality=None, output=None, webp=False): + """ + Compress images via command line. + + :param fp: Image file path or directory path. + :type fp: str + + :param force: Whether to overwrite if a file with the same name exists, defaults to False. + :type force: bool + + :param quality: Compression quality. 80-90, or 90, default is 80. + :type quality: int or tuple[int, int] + + :param output: Output path or output directory. + :type output: str + + :param webp: Convert images to WebP format, default is False. + :type webp: bool + """ + if not fp: + raise ValueError(f'"{fp}": The file path or directory cannot be empty') + + fp_path = Path(fp) + + if output: + output_path = Path(output) if len(output) > 0 else None + + ImageCompressor.compress_image(fp_path, force, quality, output_path, webp) + return \ No newline at end of file diff --git a/src/tool/ext/.gitignore b/src/tool/ext/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/src/tool/ext/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/src/tool/yolov8_detector.py b/src/tool/yolov8_detector.py new file mode 100644 index 0000000..7d80056 --- /dev/null +++ b/src/tool/yolov8_detector.py @@ -0,0 +1,158 @@ +import logging +import os + +import cv2 as cv +import numpy as np +import onnxruntime as ort + + +class YOLOv8Detector: + def __init__(self, model_path, input_size=(640, 640), box_score=0.25, kpt_score=0.5, nms_thr=0.2): + """ + Initialize the YOLOv8 detector + + Parameters: + model_path (str): Path to the model file, can be an absolute or relative path. + input_size (tuple): Input image size. + box_score (float): Confidence threshold for detection boxes. + kpt_score (float): Confidence threshold for keypoints. + nms_thr (float): Non-Maximum Suppression (NMS) threshold. + """ + assert model_path.endswith('.onnx'), f"invalid onnx model: {model_path}" + assert os.path.exists(model_path), f"model not found: {model_path}" + + # Set log level to ERROR to disable default console info output + logging.getLogger('ultralytics').setLevel(logging.ERROR) + + # Create ONNX Runtime session + self.session = ort.InferenceSession(model_path) + self.input_name = self.session.get_inputs()[0].name + self.input_size = input_size + self.box_score = box_score + self.kpt_score = kpt_score + self.nms_thr = nms_thr + + def preprocess(self, img_path): + # Read the image + img = cv.imread(img_path) + if img is None: + raise ValueError(f"Failed to read image from {img_path}") + + input_w, input_h = self.input_size + padded_img = np.ones((input_h, input_w, 3), dtype=np.uint8) * 114 + r = min(input_w / img.shape[1], input_h / img.shape[0]) + resized_img = cv.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv.INTER_LINEAR).astype(np.uint8) + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose((2, 0, 1))[::-1, ] + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) / 255.0 + return padded_img, r, img + + def postprocess(self, output, ratio): + predict = output[0].squeeze(0).T + predict = predict[predict[:, 4] > self.box_score, :] + scores = predict[:, 4] + boxes = predict[:, 0:4] / ratio + boxes = self.xywh2xyxy(boxes) + kpts = predict[:, 5:] + for i in range(kpts.shape[0]): + for j in range(kpts.shape[1] // 3): + if kpts[i, 3 * j + 2] < self.kpt_score: + kpts[i, 3 * j: 3 * (j + 1)] = [-1, -1, -1] + else: + kpts[i, 3 * j] /= ratio + kpts[i, 3 * j + 1] /= ratio + idxes = self.nms_process(boxes, scores) + result = {'boxes': boxes[idxes, :].astype(int).tolist(), 'kpts': kpts[idxes, :].astype(float).tolist(), + 'scores': scores[idxes].tolist()} + return result + + def xywh2xyxy(self, box): + box_xyxy = box.copy() + box_xyxy[..., 0] = box[..., 0] - box[..., 2] / 2 + box_xyxy[..., 1] = box[..., 1] - box[..., 3] / 2 + box_xyxy[..., 2] = box[..., 0] + box[..., 2] / 2 + box_xyxy[..., 3] = box[..., 1] + box[..., 3] / 2 + return box_xyxy + + def nms_process(self, boxes, scores): + sorted_idx = np.argsort(scores)[::-1] + keep_idx = [] + while sorted_idx.size > 0: + idx = sorted_idx[0] + keep_idx.append(idx) + ious = self.compute_iou(boxes[idx, :], boxes[sorted_idx[1:], :]) + rest_idx = np.where(ious < self.nms_thr)[0] + sorted_idx = sorted_idx[rest_idx + 1] + return keep_idx + + def compute_iou(self, box, boxes): + xmin = np.maximum(box[0], boxes[:, 0]) + ymin = np.maximum(box[1], boxes[:, 1]) + xmax = np.minimum(box[2], boxes[:, 2]) + ymax = np.minimum(box[3], boxes[:, 3]) + inter_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) + box_area = (box[2] - box[0]) * (box[3] - box[1]) + boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + union_area = box_area + boxes_area - inter_area + return inter_area / union_area + + def detect(self, img_path): + """ + Detect objects in an image + + Parameters: + img_path (str): Path to the image file, can be an absolute or relative path. + + Returns: + results: Detection results. + """ + image, ratio, original_img = self.preprocess(img_path) + ort_input = {self.input_name: image[None, :]} + output = self.session.run(None, ort_input) + result = self.postprocess(output, ratio) + return result, original_img + + def detect_person(self, img_path): + """ + Detect if there is only one person in the image + + Parameters: + img_path (str): Path to the image file, can be an absolute or relative path. + + Returns: + dict: Contains the coordinates of the box, predicted class, coordinates of all keypoints, and confidence scores. + If more or fewer than one person is detected, returns None. + """ + result, original_img = self.detect(img_path) + boxes = result['boxes'] + scores = result['scores'] + kpts = result['kpts'] + + # Only handle cases where exactly one person is detected + if len(boxes) == 1: + bbox_xyxy = boxes[0] + bbox_label = 0 # Assuming person class is 0 + bbox_keypoints = kpts[0] + return { + 'bbox_xyxy': bbox_xyxy, + 'bbox_label': bbox_label, + 'bbox_keypoints': bbox_keypoints + }, original_img + return None, original_img + + def draw_result(self, img, result, with_label=False): + boxes, kpts, scores = result['boxes'], result['kpts'], result['scores'] + for box, kpt, score in zip(boxes, kpts, scores): + x1, y1, x2, y2 = box + label_str = "{:.0f}%".format(score * 100) + label_size, baseline = cv.getTextSize(label_str, cv.FONT_HERSHEY_SIMPLEX, 0.5, 2) + cv.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2) + if with_label: + cv.rectangle(img, (x1, y1), (x1 + label_size[0], y1 + label_size[1] + baseline), (0, 0, 255), -1) + cv.putText(img, label_str, (x1, y1 + label_size[1]), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) + for idx in range(len(kpt) // 3): + x, y, score = kpt[3 * idx: 3 * (idx + 1)] + if score > 0: + cv.circle(img, (int(x), int(y)), 2, (0, 255, 0), -1) + return img