From d903316b9887bd006993244fe94d32667e78eba5 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Sun, 11 Feb 2024 17:12:07 +0100 Subject: [PATCH] apply formatting --- .clang-format | 3 +- .gitattributes | 2 +- .github/FUNDING.yml | 2 +- .github/workflows/docs.yml | 2 +- .gitignore | 2 +- .pre-commit-config.yaml | 10 ++-- CMakeLists.txt | 60 +++++++++++-------- COPYING | 10 ++-- HISTORY.md | 3 +- Makefile | 2 +- README.md | 2 +- _custom_build/backend.py | 21 ++++--- docs/changelog.rst | 2 +- docs/conf.py | 21 ++++--- docs/index.rst | 5 -- docs/installation.rst | 4 +- make.bat | 20 +++---- pyproject.toml | 1 + setup.py | 18 +++--- src/Levenshtein/CMakeLists.txt | 46 ++++++++------ .../Levenshtein-c/_levenshtein.hpp | 4 +- src/Levenshtein/StringMatcher.py | 13 ++-- src/Levenshtein/__init__.py | 45 ++++++-------- src/Levenshtein/__init__.pyi | 30 +++------- src/Levenshtein/generate.sh | 2 +- tests/test_levenshtein_distance.py | 2 + tests/test_matching_blocks.py | 5 +- tests/test_median.py | 7 +-- tests/test_seq_ratio.py | 2 + tests/test_set_ratio.py | 2 + 30 files changed, 174 insertions(+), 174 deletions(-) diff --git a/.clang-format b/.clang-format index 3962aaa..2a1d4bd 100644 --- a/.clang-format +++ b/.clang-format @@ -5,7 +5,6 @@ AccessModifierOffset: -4 AllowShortIfStatementsOnASingleLine: true PointerAlignment: Left AllowShortBlocksOnASingleLine: Always -AllowShortIfStatementsOnASingleLine: true AllowShortFunctionsOnASingleLine: None AllowShortLambdasOnASingleLine: None BreakBeforeBraces: Custom @@ -27,4 +26,4 @@ BraceWrapping: AllowAllConstructorInitializersOnNextLine: true ConstructorInitializerAllOnOneLineOrOnePerLine: true AllowShortCaseLabelsOnASingleLine: true -IndentPPDirectives: AfterHash \ No newline at end of file +IndentPPDirectives: AfterHash diff --git a/.gitattributes b/.gitattributes index 734912c..c7e7dee 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -src/c_levenshtein.c linguist-vendored \ No newline at end of file +src/c_levenshtein.c linguist-vendored diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index ca7edfe..286d9b7 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1,2 @@ github: maxbachmann -custom: ["https://www.paypal.com/donate/?hosted_button_id=VGWQBBD5CTWJU"] \ No newline at end of file +custom: ["https://www.paypal.com/donate/?hosted_button_id=VGWQBBD5CTWJU"] diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 57659ab..f8f2939 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -29,4 +29,4 @@ jobs: uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./build/html \ No newline at end of file + publish_dir: ./build/html diff --git a/.gitignore b/.gitignore index 231aa7e..16f0fdc 100644 --- a/.gitignore +++ b/.gitignore @@ -139,4 +139,4 @@ cython_debug/ # vscode .vscode/ -_skbuild/ \ No newline at end of file +_skbuild/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 52c8fbe..4b08bc7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -112,13 +112,13 @@ repos: hooks: - id: codespell exclude: ".*/test_.*.py" - args: ["-x", ".codespell-ignore-lines"] + #args: ["-x", ".codespell-ignore-lines"] # Check for common shell mistakes -- repo: https://github.com/shellcheck-py/shellcheck-py - rev: "v0.9.0.2" - hooks: - - id: shellcheck +#- repo: https://github.com/shellcheck-py/shellcheck-py +# rev: "v0.9.0.6" +# hooks: +# - id: shellcheck # Disallow some common capitalization mistakes - repo: local diff --git a/CMakeLists.txt b/CMakeLists.txt index dd60a89..665f92d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,48 +6,58 @@ set(SKBUILD_LINK_LIBRARIES_KEYWORD PRIVATE) set(Python_FIND_IMPLEMENTATIONS CPython PyPy) set(THREADS_PREFER_PTHREAD_FLAG ON) -if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version") +if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + set(CMAKE_OSX_DEPLOYMENT_TARGET + "10.9" + CACHE STRING "Minimum OS X deployment version") endif() project(Levenshtein LANGUAGES C CXX) -if (MSVC) - add_compile_options(/W4) +if(MSVC) + add_compile_options(/W4) else() - add_compile_options(-Wall -Wextra -pedantic) + add_compile_options(-Wall -Wextra -pedantic) endif() if(CMAKE_VERSION VERSION_LESS 3.18) - find_package(Python COMPONENTS Interpreter Development REQUIRED) + find_package( + Python + COMPONENTS Interpreter Development + REQUIRED) else() - set(Python_ARTIFACTS_INTERACTIVE TRUE) - find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) + set(Python_ARTIFACTS_INTERACTIVE TRUE) + find_package( + Python + COMPONENTS Interpreter Development.Module + REQUIRED) endif() if(CMAKE_VERSION VERSION_LESS 3.17) - execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX').split('.')[1])" - OUTPUT_VARIABLE Python_SOABI - OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT) - message(STATUS "Corrected SOABI: ${Python_SOABI}") + execute_process( + COMMAND + "${Python_EXECUTABLE}" -c + "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX').split('.')[1])" + OUTPUT_VARIABLE Python_SOABI + OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT) + message(STATUS "Corrected SOABI: ${Python_SOABI}") elseif("${Python_INTERPRETER_ID}" STREQUAL "PyPy") - message(STATUS "PyPy SOABI: ${Python_SOABI}") - execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX').split('.')[1])" - OUTPUT_VARIABLE Python_SOABI - OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT) - message(STATUS "Corrected SOABI: ${Python_SOABI}") + message(STATUS "PyPy SOABI: ${Python_SOABI}") + execute_process( + COMMAND + "${Python_EXECUTABLE}" -c + "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX').split('.')[1])" + OUTPUT_VARIABLE Python_SOABI + OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT) + message(STATUS "Corrected SOABI: ${Python_SOABI}") endif() find_package(rapidfuzz 3.0.0 QUIET) -if (rapidfuzz_FOUND) - message(STATUS "Using system supplied version of rapidfuzz-cpp") +if(rapidfuzz_FOUND) + message(STATUS "Using system supplied version of rapidfuzz-cpp") else() - message(STATUS "Using packaged version of rapidfuzz-cpp") - add_subdirectory(extern/rapidfuzz-cpp) + message(STATUS "Using packaged version of rapidfuzz-cpp") + add_subdirectory(extern/rapidfuzz-cpp) endif() set(LEV_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/Levenshtein) diff --git a/COPYING b/COPYING index 3be4dc3..b99c25d 100644 --- a/COPYING +++ b/COPYING @@ -56,7 +56,7 @@ patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. - + GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION @@ -111,7 +111,7 @@ above, provided that you also meet all of these conditions: License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) - + These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in @@ -169,7 +169,7 @@ access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. - + 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is @@ -226,7 +226,7 @@ impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. - + 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License @@ -279,7 +279,7 @@ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS - + Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest diff --git a/HISTORY.md b/HISTORY.md index 2c139bb..4662f82 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -262,5 +262,4 @@ v0.8.0 set was added ### v0.1.0 -* Inital release - +* Initial release diff --git a/Makefile b/Makefile index 64c3158..b97de95 100644 --- a/Makefile +++ b/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/README.md b/README.md index 8e7da43..557fc50 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@

Continous Integration + alt="Continuous Integration"> = _version.parse("3.12"): @@ -21,6 +24,7 @@ def _cmake_required(): return True + def _ninja_required(): if _platform.system() == "Windows": print("Ninja is part of the MSVC installation on Windows") @@ -28,7 +32,7 @@ def _ninja_required(): for generator in ("ninja", "make"): try: - _subprocess.check_output([generator, '--version']) + _subprocess.check_output([generator, "--version"]) print(f"Using System version of {generator}") return False except (OSError, _subprocess.CalledProcessError): @@ -36,11 +40,12 @@ def _ninja_required(): return True + def get_requires_for_build_wheel(config_settings=None): packages = [] if _cmake_required(): - packages.append('cmake') + packages.append("cmake") if _ninja_required(): - packages.append('ninja') + packages.append("ninja") return _orig.get_requires_for_build_wheel(config_settings) + packages diff --git a/docs/changelog.rst b/docs/changelog.rst index d343267..e121a7d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1 +1 @@ -.. include:: ../HISTORY.txt \ No newline at end of file +.. include:: ../HISTORY.txt diff --git a/docs/conf.py b/docs/conf.py index 4d96f77..92b54dd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,25 +16,24 @@ # -- Project information ----------------------------------------------------- +from __future__ import annotations -project = 'Levenshtein' -copyright = '2021, Max Bachmann' -author = 'Max Bachmann' +project = "Levenshtein" +copyright = "2021, Max Bachmann" +author = "Max Bachmann" # The full version, including alpha/beta/rc tags -release = '0.23.0' +release = "0.23.0" # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [ - 'sphinx.ext.autodoc', 'sphinx.ext.napoleon' -] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -47,13 +46,13 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] napoleon_google_docstring = False -# -- Extension configuration ------------------------------------------------- \ No newline at end of file +# -- Extension configuration ------------------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index c59a58f..d26b4c2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,8 +1,3 @@ -.. RapidFuzz documentation master file, created by - sphinx-quickstart on Fri Jan 1 19:02:29 2021. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - Welcome to Levenshtein's documentation! ======================================= diff --git a/docs/installation.rst b/docs/installation.rst index 57b2ff7..f7fa66b 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -1,12 +1,12 @@ Installation ============ -The recommended method to install Levenshtein is by using `pip` (the Python package manager) +The recommended method to install Levenshtein is by using ``pip`` (the Python package manager) using pip --------- -Levenshtein can be installed with `pip`: +Levenshtein can be installed with ``pip``: .. code-block:: sh diff --git a/make.bat b/make.bat index 07b36dd..cfa9f8a 100644 --- a/make.bat +++ b/make.bat @@ -5,7 +5,7 @@ pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build + set SPHINXBUILD=sphinx-build ) set SOURCEDIR=docs set BUILDDIR=build @@ -14,15 +14,15 @@ if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% diff --git a/pyproject.toml b/pyproject.toml index b3eacbf..bb711c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ extend-ignore = [ "E501", # Line too long "PT004", # Use underscore for non-returning fixture (use usefixture instead) "PTH123", # use pathlib instead of builtin open + "PLC1901", # simply not always correct ] target-version = "py38" src = ["src"] diff --git a/setup.py b/setup.py index 098529a..b10c4b2 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,8 @@ +from __future__ import annotations + from skbuild import setup -with open('README.md', 'rt', encoding="utf8") as f: +with open("README.md", encoding="utf8") as f: readme = f.read() setup( @@ -13,9 +15,8 @@ description="Python extension for computing string edit distances and similarities.", long_description=readme, long_description_content_type="text/markdown", - license="GPL", - license_file = "COPYING", + license_file="COPYING", classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", @@ -23,13 +24,10 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", - "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)" + "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", ], - packages=["Levenshtein"], - package_dir={'':'src'}, - package_data={ - "Levenshtein": ["*.pyi", "py.typed"] - }, - python_requires=">=3.8" + package_dir={"": "src"}, + package_data={"Levenshtein": ["*.pyi", "py.typed"]}, + python_requires=">=3.8", ) diff --git a/src/Levenshtein/CMakeLists.txt b/src/Levenshtein/CMakeLists.txt index 973d58a..5bd14ca 100644 --- a/src/Levenshtein/CMakeLists.txt +++ b/src/Levenshtein/CMakeLists.txt @@ -1,30 +1,38 @@ - function(create_cython_target _name) - if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx) - set(${_name} ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx PARENT_SCOPE) - else() - find_package(Cython REQUIRED) - add_cython_target(${_name} CXX) - set(${_name} ${_name} PARENT_SCOPE) - endif() + if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx) + set(${_name} + ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx + PARENT_SCOPE) + else() + find_package(Cython REQUIRED) + add_cython_target(${_name} CXX) + set(${_name} + ${_name} + PARENT_SCOPE) + endif() endfunction(create_cython_target) function(rf_add_library name) - if(CMAKE_VERSION VERSION_LESS 3.17) - Python_add_library(${name} MODULE ${ARGV}) - get_property(suffix TARGET ${name} PROPERTY SUFFIX) - if (NOT suffix) - set (suffix "${CMAKE_SHARED_MODULE_SUFFIX}") - endif() - set_property (TARGET ${name} PROPERTY SUFFIX ".${Python_SOABI}${suffix}") - else() - Python_add_library(${name} MODULE WITH_SOABI ${ARGV}) + if(CMAKE_VERSION VERSION_LESS 3.17) + python_add_library(${name} MODULE ${ARGV}) + get_property( + suffix + TARGET ${name} + PROPERTY SUFFIX) + if(NOT suffix) + set(suffix "${CMAKE_SHARED_MODULE_SUFFIX}") endif() + set_property(TARGET ${name} PROPERTY SUFFIX ".${Python_SOABI}${suffix}") + else() + python_add_library(${name} MODULE WITH_SOABI ${ARGV}) + endif() endfunction(rf_add_library) create_cython_target(levenshtein_cpp) -rf_add_library(levenshtein_cpp ${levenshtein_cpp} ${LEV_BASE_DIR}/Levenshtein-c/_levenshtein.cpp) +rf_add_library(levenshtein_cpp ${levenshtein_cpp} + ${LEV_BASE_DIR}/Levenshtein-c/_levenshtein.cpp) target_compile_features(levenshtein_cpp PUBLIC cxx_std_17) -target_include_directories(levenshtein_cpp PRIVATE ${LEV_BASE_DIR}/Levenshtein-c) +target_include_directories(levenshtein_cpp + PRIVATE ${LEV_BASE_DIR}/Levenshtein-c) target_link_libraries(levenshtein_cpp PRIVATE rapidfuzz::rapidfuzz) install(TARGETS levenshtein_cpp LIBRARY DESTINATION src/Levenshtein) diff --git a/src/Levenshtein/Levenshtein-c/_levenshtein.hpp b/src/Levenshtein/Levenshtein-c/_levenshtein.hpp index 1d2af80..d409785 100644 --- a/src/Levenshtein/Levenshtein-c/_levenshtein.hpp +++ b/src/Levenshtein/Levenshtein-c/_levenshtein.hpp @@ -60,7 +60,7 @@ static inline bool is_valid_string(PyObject* py_str) if (PyBytes_Check(py_str)) is_string = true; else if (PyUnicode_Check(py_str)) { - // PEP 623 deprecates legacy strings and therefor + // PEP 623 deprecates legacy strings and therefore // deprecates e.g. PyUnicode_READY in Python 3.10 #if PY_VERSION_HEX < PYTHON_VERSION(3, 10, 0) if (PyUnicode_READY(py_str)) @@ -580,7 +580,7 @@ static inline double lev_edit_seq_distance(const std::vector& strings if (strings1_start == strings1_end) return (double)std::distance(strings2_start, strings2_end); if (strings2_start == strings2_end) return (double)std::distance(strings1_start, strings1_end); - /* initalize first row */ + /* initialize first row */ size_t n1 = std::distance(strings1_start, strings1_end); size_t n2 = std::distance(strings2_start, strings2_end); auto row = std::make_unique(n2 + 1); diff --git a/src/Levenshtein/StringMatcher.py b/src/Levenshtein/StringMatcher.py index aa462da..269b98a 100644 --- a/src/Levenshtein/StringMatcher.py +++ b/src/Levenshtein/StringMatcher.py @@ -1,6 +1,9 @@ -from Levenshtein import * +from __future__ import annotations + from warnings import warn +from Levenshtein import distance, editops, matching_blocks, opcodes, ratio + class StringMatcher: """A SequenceMatcher-like class built on the top of Levenshtein""" @@ -11,9 +14,9 @@ def _reset_cache(self): def __init__(self, isjunk=None, seq1="", seq2="", autojunk=False): if isjunk: - warn("isjunk NOT implemented, it will be ignored") + warn("isjunk NOT implemented, it will be ignored", stacklevel=1) if autojunk: - warn("autojunk NOT implemented, it will be ignored") + warn("autojunk NOT implemented, it will be ignored", stacklevel=1) self._str1, self._str2 = seq1, seq2 self._reset_cache() @@ -47,9 +50,7 @@ def get_editops(self): def get_matching_blocks(self): if not self._matching_blocks: - self._matching_blocks = matching_blocks( - self.get_opcodes(), self._str1, self._str2 - ) + self._matching_blocks = matching_blocks(self.get_opcodes(), self._str1, self._str2) return self._matching_blocks def ratio(self): diff --git a/src/Levenshtein/__init__.py b/src/Levenshtein/__init__.py index f992e21..e585128 100644 --- a/src/Levenshtein/__init__.py +++ b/src/Levenshtein/__init__.py @@ -13,31 +13,34 @@ arguments to a function (method) have to be of the same type (or its subclasses). """ +from __future__ import annotations __author__: str = "Max Bachmann" __license__: str = "GPL" -import rapidfuzz.distance.Levenshtein as _Levenshtein -import rapidfuzz.distance.Indel as _Indel +import importlib.metadata + import rapidfuzz.distance.Hamming as _Hamming +import rapidfuzz.distance.Indel as _Indel import rapidfuzz.distance.Jaro as _Jaro import rapidfuzz.distance.JaroWinkler as _JaroWinkler +import rapidfuzz.distance.Levenshtein as _Levenshtein from rapidfuzz.distance import ( Editops as _Editops, +) +from rapidfuzz.distance import ( Opcodes as _Opcodes, ) from Levenshtein.levenshtein_cpp import ( - quickmedian, median, median_improve, + quickmedian, + seqratio, setmedian, setratio, - seqratio, ) -import importlib.metadata - try: __version__: str = importlib.metadata.version(__package__ or __name__) except importlib.metadata.PackageNotFoundError: @@ -64,9 +67,7 @@ ] -def distance( - s1, s2, *, weights=(1, 1, 1), processor=None, score_cutoff=None, score_hint=None -): +def distance(s1, s2, *, weights=(1, 1, 1), processor=None, score_cutoff=None, score_hint=None): """ Calculates the minimum number of insertions, deletions, and substitutions required to change one sequence into the other according to Levenshtein with custom @@ -139,7 +140,7 @@ def ratio(s1, s2, *, processor=None, score_cutoff=None): Calculates a normalized indel similarity in the range [0, 1]. The indel distance calculates the minimum number of insertions and deletions required to change one sequence into the other. - + This is calculated as ``1 - (distance / (len1 + len2))`` Parameters @@ -180,9 +181,7 @@ def ratio(s1, s2, *, processor=None, score_cutoff=None): >>> ratio(["lewenstein"], ["levenshtein"], processor=lambda s: s[0]) 0.8571428571428572 """ - return _Indel.normalized_similarity( - s1, s2, processor=processor, score_cutoff=score_cutoff - ) + return _Indel.normalized_similarity(s1, s2, processor=processor, score_cutoff=score_cutoff) def hamming(s1, s2, *, pad=True, processor=None, score_cutoff=None): @@ -250,9 +249,7 @@ def jaro(s1, s2, *, processor=None, score_cutoff=None) -> float: return _Jaro.similarity(s1, s2, processor=processor, score_cutoff=score_cutoff) -def jaro_winkler( - s1, s2, *, prefix_weight=0.1, processor=None, score_cutoff=None -) -> float: +def jaro_winkler(s1, s2, *, prefix_weight=0.1, processor=None, score_cutoff=None) -> float: """ Calculates the jaro winkler similarity @@ -329,7 +326,7 @@ def editops(*args): The result is a list of triples (operation, spos, dpos), where operation is one of 'equal', 'replace', 'insert', or 'delete'; spos and dpos are position of characters in the first (source) and the - second (destination) strings. These are operations on signle + second (destination) strings. These are operations on single characters. In fact the returned list doesn't contain the 'equal', but all the related functions accept both lists with and without 'equal's. @@ -433,11 +430,7 @@ def matching_blocks(edit_operations, source_string, destination_string): 'ees' """ len1 = source_string if isinstance(source_string, int) else len(source_string) - len2 = ( - destination_string - if isinstance(destination_string, int) - else len(destination_string) - ) + len2 = destination_string if isinstance(destination_string, int) else len(destination_string) if not edit_operations or len(edit_operations[0]) == 3: return _Editops(edit_operations, len1, len2).as_matching_blocks() @@ -480,13 +473,9 @@ def apply_edit(edit_operations, source_string, destination_string): len2 = len(destination_string) if len(edit_operations[0]) == 3: - return _Editops(edit_operations, len1, len2).apply( - source_string, destination_string - ) + return _Editops(edit_operations, len1, len2).apply(source_string, destination_string) - return _Opcodes(edit_operations, len1, len2).apply( - source_string, destination_string - ) + return _Opcodes(edit_operations, len1, len2).apply(source_string, destination_string) def subtract_edit(edit_operations, subsequence): diff --git a/src/Levenshtein/__init__.pyi b/src/Levenshtein/__init__.pyi index c71aba9..30176cc 100644 --- a/src/Levenshtein/__init__.pyi +++ b/src/Levenshtein/__init__.pyi @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Callable, Hashable, Sequence, overload __author__: str @@ -31,32 +33,18 @@ def matching_blocks( source_string: Sequence[Hashable] | int, destination_string: Sequence[Hashable] | int, ) -> _MatchingBlocks: ... -def subtract_edit( - edit_operations: _EditopsList, subsequence: _EditopsList -) -> _EditopsList: ... -def apply_edit( - edit_operations: _AnyEditops, source_string: str, destination_string: str -) -> str: ... -def median( - strlist: list[str | bytes], wlist: list[float] | None = None -) -> str: ... -def quickmedian( - strlist: list[str | bytes], wlist: list[float] | None = None -) -> str: ... +def subtract_edit(edit_operations: _EditopsList, subsequence: _EditopsList) -> _EditopsList: ... +def apply_edit(edit_operations: _AnyEditops, source_string: str, destination_string: str) -> str: ... +def median(strlist: list[str | bytes], wlist: list[float] | None = None) -> str: ... +def quickmedian(strlist: list[str | bytes], wlist: list[float] | None = None) -> str: ... def median_improve( string: str | bytes, strlist: list[str | bytes], wlist: list[float] | None = None, ) -> str: ... -def setmedian( - strlist: list[str | bytes], wlist: list[float] | None = None -) -> str: ... -def setratio( - strlist1: list[str | bytes], strlist2: list[str | bytes] -) -> float: ... -def seqratio( - strlist1: list[str | bytes], strlist2: list[str | bytes] -) -> float: ... +def setmedian(strlist: list[str | bytes], wlist: list[float] | None = None) -> str: ... +def setratio(strlist1: list[str | bytes], strlist2: list[str | bytes]) -> float: ... +def seqratio(strlist1: list[str | bytes], strlist2: list[str | bytes]) -> float: ... def distance( s1: Sequence[Hashable], s2: Sequence[Hashable], diff --git a/src/Levenshtein/generate.sh b/src/Levenshtein/generate.sh index 8466a27..0209ee8 100644 --- a/src/Levenshtein/generate.sh +++ b/src/Levenshtein/generate.sh @@ -7,4 +7,4 @@ generate_cython() echo "Generated $curdir/$1.cxx" } -generate_cython levenshtein_cpp \ No newline at end of file +generate_cython levenshtein_cpp diff --git a/tests/test_levenshtein_distance.py b/tests/test_levenshtein_distance.py index 2f3b4c1..f6e960d 100644 --- a/tests/test_levenshtein_distance.py +++ b/tests/test_levenshtein_distance.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import Levenshtein diff --git a/tests/test_matching_blocks.py b/tests/test_matching_blocks.py index f30c6ec..e2256fd 100644 --- a/tests/test_matching_blocks.py +++ b/tests/test_matching_blocks.py @@ -1,6 +1,9 @@ -from Levenshtein import * +from __future__ import annotations + from rapidfuzz.distance import MatchingBlock +from Levenshtein import editops, matching_blocks + def test_simple(): a, b = "spam", "park" diff --git a/tests/test_median.py b/tests/test_median.py index 5da62af..eafd9fa 100644 --- a/tests/test_median.py +++ b/tests/test_median.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import Levenshtein @@ -28,10 +30,7 @@ def test_documented(): assert Levenshtein.median(fixme) == "Levenshtein" assert Levenshtein.quickmedian(fixme) == "Levnshein" assert Levenshtein.median_improve("spam", fixme) == "enhtein" - assert ( - Levenshtein.median_improve(Levenshtein.median_improve("spam", fixme), fixme) - == "Levenshtein" - ) + assert Levenshtein.median_improve(Levenshtein.median_improve("spam", fixme), fixme) == "Levenshtein" assert ( Levenshtein.setmedian( [ diff --git a/tests/test_seq_ratio.py b/tests/test_seq_ratio.py index 1efb3b3..f95ea66 100644 --- a/tests/test_seq_ratio.py +++ b/tests/test_seq_ratio.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import Levenshtein diff --git a/tests/test_set_ratio.py b/tests/test_set_ratio.py index 7e9ca4b..9ed9e32 100644 --- a/tests/test_set_ratio.py +++ b/tests/test_set_ratio.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import Levenshtein