From ba243a8cb287cdfa38a19681d1d922a7420c9710 Mon Sep 17 00:00:00 2001 From: bvandercar-vt Date: Thu, 10 Oct 2024 11:04:11 -0600 Subject: [PATCH] reduce len() calls --- bench/benchmark_cdist.py | 10 ++++++---- bench/benchmark_cpdist.py | 11 +++++++---- bench/benchmark_scorer.py | 10 ++++++---- src/rapidfuzz/fuzz_py.py | 9 +++++---- src/rapidfuzz/process_cpp_impl.pyx | 10 ++++++---- src/rapidfuzz/process_py.py | 7 +++++-- 6 files changed, 35 insertions(+), 22 deletions(-) diff --git a/bench/benchmark_cdist.py b/bench/benchmark_cdist.py index 171e9b72..2264bc12 100644 --- a/bench/benchmark_cdist.py +++ b/bench/benchmark_cdist.py @@ -58,13 +58,15 @@ def get_platform(): def benchmark(): words = ["".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) for _ in range(10000)] - sample_rate = len(words) // 100 + len_words = len(words) + sample_rate = len_words // 100 sample = words[::sample_rate] - total = len(words) * len(sample) + len_sample = len(sample) + total = len_words * len_sample print("System:", get_platform()) - print("Words :", len(words)) - print("Sample:", len(sample)) + print("Words :", len_words) + print("Sample:", len_sample) print("Total : %s calls\n" % total) def wrap_cdist(scorer, processor): diff --git a/bench/benchmark_cpdist.py b/bench/benchmark_cpdist.py index 8e67b8ae..68c243f9 100644 --- a/bench/benchmark_cpdist.py +++ b/bench/benchmark_cpdist.py @@ -47,14 +47,17 @@ def get_platform(): def benchmark(): words = ["".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) for _ in range(1000000)] - sample_rate = len(words) // 2 + len_words = len(words) + sample_rate = len_words // 2 words1 = words[:sample_rate] words2 = words[sample_rate::] - total = len(words1) + len_words1 = len(words1) + len_words2 = len(words2) + total = len_words1 print("System:", get_platform()) - print("Words :", len(words1)) - print("Sample:", len(words2)) + print("Words :", len_words1) + print("Sample:", len_words2) print("Total : %s calls\n" % total) def wrap_cpdist(scorer): diff --git a/bench/benchmark_scorer.py b/bench/benchmark_scorer.py index ae3b98e0..55884291 100644 --- a/bench/benchmark_scorer.py +++ b/bench/benchmark_scorer.py @@ -47,14 +47,16 @@ def get_platform(): def benchmark(): words = ["".join(random.choice(string.ascii_letters + string.digits) for _ in range(10)) for _ in range(10000)] - sample_rate = len(words) // 100 + len_words = len(words) + sample_rate = len_words // 100 sample = words[::sample_rate] + len_sample = len(sample) - total = len(words) * len(sample) + total = len_words * len_sample print("System:", get_platform()) - print("Words :", len(words)) - print("Sample:", len(sample)) + print("Words :", len_words) + print("Sample:", len_sample) print("Total : %s calls\n" % total) def wrap(f): diff --git a/src/rapidfuzz/fuzz_py.py b/src/rapidfuzz/fuzz_py.py index 36eda853..3f8fab3d 100644 --- a/src/rapidfuzz/fuzz_py.py +++ b/src/rapidfuzz/fuzz_py.py @@ -316,9 +316,10 @@ def partial_ratio_alignment( if not s1 and not s2: return ScoreAlignment(100.0, 0, 0, 0, 0) - s1, s2 = conv_sequences(s1, s2) - if len(s1) <= len(s2): + len1 = len(s1) + len2 = len(s2) + if len1 <= len2: shorter = s1 longer = s2 else: @@ -326,7 +327,7 @@ def partial_ratio_alignment( longer = s1 res = _partial_ratio_impl(shorter, longer, score_cutoff / 100) - if res.score != 100 and len(s1) == len(s2): + if res.score != 100 and len1 == len2: score_cutoff = max(score_cutoff, res.score) res2 = _partial_ratio_impl(longer, shorter, score_cutoff / 100) if res2.score > res.score: @@ -335,7 +336,7 @@ def partial_ratio_alignment( if res.score < score_cutoff: return None - if len(s1) <= len(s2): + if len1 <= len2: return res return ScoreAlignment(res.score, res.dest_start, res.dest_end, res.src_start, res.src_end) diff --git a/src/rapidfuzz/process_cpp_impl.pyx b/src/rapidfuzz/process_cpp_impl.pyx index 90bae64d..9c4bf218 100644 --- a/src/rapidfuzz/process_cpp_impl.pyx +++ b/src/rapidfuzz/process_cpp_impl.pyx @@ -1208,6 +1208,7 @@ def extract(query, choices, *, scorer=WRatio, processor=None, limit=5, score_cut cdef RF_Scorer* scorer_context = NULL cdef RF_ScorerFlags scorer_flags cdef int64_t c_limit + cdef int64_t choices_len = len(choices) scorer_kwargs = scorer_kwargs.copy() if scorer_kwargs else {} setupPandas() @@ -1216,14 +1217,15 @@ def extract(query, choices, *, scorer=WRatio, processor=None, limit=5, score_cut return [] try: - if limit is None or limit > len(choices): - limit = len(choices) + if limit is None or limit > choices_len: + limit = choices_len except TypeError: # handle generators. In Theory we could retrieve the length later on while # preprocessing the choices, but this is good enough for now choices = list(choices) - if limit is None or limit > len(choices): - limit = len(choices) + choices_len = len(choices) + if limit is None or limit > choices_len: + limit = choices_len c_limit = limit if c_limit == 1: diff --git a/src/rapidfuzz/process_py.py b/src/rapidfuzz/process_py.py index 69e1f49f..4d482dda 100644 --- a/src/rapidfuzz/process_py.py +++ b/src/rapidfuzz/process_py.py @@ -643,14 +643,17 @@ def cpdist( """ import numpy as np - if len(queries) != len(choices): + len_queries = len(queries) + len_choices = len(choices) + + if len_queries != len_choices: error_message = "Length of queries and choices must be the same!" raise ValueError(error_message) _ = workers, score_hint scorer_kwargs = scorer_kwargs or {} dtype = _dtype_to_type_num(dtype, scorer, scorer_kwargs) - results = np.zeros((len(queries),), dtype=dtype) + results = np.zeros((len_queries,), dtype=dtype) setupPandas()