From 0c2d65f186ff3bce6c7b6dfac140764b3a78d9ce Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Sun, 8 Oct 2023 17:07:45 +0200 Subject: [PATCH] fix some bugs in simd Jaro/JaroWinkler implementation --- extras/rapidfuzz_amalgamated.hpp | 82 +++++++++++++++-------------- rapidfuzz/details/distance.hpp | 20 +++---- rapidfuzz/details/intrinsics.hpp | 2 +- rapidfuzz/details/simd_avx2.hpp | 2 +- rapidfuzz/details/simd_sse2.hpp | 4 +- rapidfuzz/distance/Indel.hpp | 2 +- rapidfuzz/distance/Jaro.hpp | 8 +-- rapidfuzz/distance/JaroWinkler.hpp | 14 ++--- rapidfuzz/distance/Jaro_impl.hpp | 22 ++++---- rapidfuzz/distance/LCSseq.hpp | 2 +- rapidfuzz/distance/Levenshtein.hpp | 2 +- rapidfuzz/distance/OSA.hpp | 2 +- test/distance/tests-JaroWinkler.cpp | 13 +++-- 13 files changed, 91 insertions(+), 84 deletions(-) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 35bc7820..9ea2a6b7 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2023-10-08 12:45:00.456286 +// Generated: 2023-10-08 17:39:04.586178 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -1375,7 +1375,7 @@ T bit_mask_lsb(int n) { T mask = static_cast(-1); if (n < static_cast(sizeof(T) * 8)) { - mask += static_cast(1) << n; + mask += static_cast(1) << static_cast(n); } return mask; } @@ -2345,7 +2345,7 @@ static inline native_simd operator>(const native_simd& a, static inline native_simd operator>(const native_simd& a, const native_simd& b) noexcept { - __m256i signbit = _mm256_set1_epi32(0x80000000); + __m256i signbit = _mm256_set1_epi32(static_cast(0x80000000)); __m256i a1 = _mm256_xor_si256(a, signbit); __m256i b1 = _mm256_xor_si256(b, signbit); return _mm256_cmpgt_epi32(a1, b1); // signed compare @@ -2934,7 +2934,7 @@ static inline native_simd operator>(const native_simd& a, static inline native_simd operator>(const native_simd& a, const native_simd& b) noexcept { - __m128i signbit = _mm_set1_epi32(0x80000000); + __m128i signbit = _mm_set1_epi32(static_cast(0x80000000)); __m128i a1 = _mm_xor_si128(a, signbit); __m128i b1 = _mm_xor_si128(b, signbit); return _mm_cmpgt_epi32(a1, b1); // signed compare @@ -2943,7 +2943,7 @@ static inline native_simd operator>(const native_simd& a, static inline native_simd operator>(const native_simd& a, const native_simd& b) noexcept { - __m128i sign32 = _mm_set1_epi32(0x80000000); // sign bit of each dword + __m128i sign32 = _mm_set1_epi32(static_cast(0x80000000)); // sign bit of each dword __m128i aflip = _mm_xor_si128(a, sign32); // a with sign bits flipped to use signed compare __m128i bflip = _mm_xor_si128(b, sign32); // b with sign bits flipped to use signed compare __m128i equal = _mm_cmpeq_epi32(a, b); // a == b, dwords @@ -3322,7 +3322,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { friend T; }; -template +template struct MultiNormalizedMetricBase { template void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, @@ -3362,23 +3362,23 @@ struct MultiNormalizedMetricBase { throw std::invalid_argument("scores has to have >= result_count() elements"); // reinterpretation only works when the types have the same size - int64_t* scores_i64 = nullptr; - if constexpr (sizeof(double) == sizeof(int64_t)) - scores_i64 = reinterpret_cast(scores); + ResType* scores_orig = nullptr; + if constexpr (sizeof(double) == sizeof(ResType)) + scores_orig = reinterpret_cast(scores); else - scores_i64 = new int64_t[derived.result_count()]; + scores_orig = new ResType[derived.result_count()]; Range s2_(s2); - derived.distance(scores_i64, derived.result_count(), s2_); + derived.distance(scores_orig, derived.result_count(), s2_); for (size_t i = 0; i < derived.get_input_count(); ++i) { auto maximum = derived.maximum(i, s2); double norm_dist = - (maximum != 0) ? static_cast(scores_i64[i]) / static_cast(maximum) : 0.0; + (maximum != 0) ? static_cast(scores_orig[i]) / static_cast(maximum) : 0.0; scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0; } - if constexpr (sizeof(double) != sizeof(int64_t)) delete[] scores_i64; + if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig; } template @@ -3400,7 +3400,7 @@ struct MultiNormalizedMetricBase { }; template -struct MultiDistanceBase : public MultiNormalizedMetricBase { +struct MultiDistanceBase : public MultiNormalizedMetricBase { template void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const @@ -3451,7 +3451,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { }; template -struct MultiSimilarityBase : public MultiNormalizedMetricBase { +struct MultiSimilarityBase : public MultiNormalizedMetricBase { template void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const @@ -4726,7 +4726,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, int std::numeric_limits::max()> { private: friend detail::MultiSimilarityBase, int64_t, 0, std::numeric_limits::max()>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, int64_t>; constexpr static size_t get_vec_size() { @@ -5012,7 +5012,7 @@ struct MultiIndel : public detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()> { private: friend detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, int64_t>; public: MultiIndel(size_t count) : scorer(count) @@ -5570,7 +5570,7 @@ double jaro_similarity(const BlockPatternMatchVector& PM, Range P, Ran #ifdef RAPIDFUZZ_SIMD template void jaro_similarity_simd(Range scores, const detail::BlockPatternMatchVector& block, - const std::vector& s1_lengths, Range s2, + const std::vector& s1_lengths, Range s2, double score_cutoff) noexcept { # ifdef RAPIDFUZZ_AVX2 @@ -5588,15 +5588,15 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch size_t result_index = 0; if (score_cutoff > 1.0) { - for (int64_t i = 0; i < s1_lengths.size(); i++) + for (int64_t i = 0; i < static_cast(s1_lengths.size()); i++) scores[i] = 0.0; return; } if (s2.empty()) { - for (int64_t i = 0; i < s1_lengths.size(); i++) - scores[i] = s1_lengths[i] ? 0.0 : 1.0; + for (size_t i = 0; i < s1_lengths.size(); i++) + scores[static_cast(i)] = s1_lengths[i] ? 0.0 : 1.0; return; } @@ -5617,8 +5617,8 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch if (Bound > maxBound) maxBound = Bound; - boundMaskSize_[i] = bit_mask_lsb(2 * Bound); - boundMask_[i] = bit_mask_lsb(Bound + 1); + boundMaskSize_[i] = bit_mask_lsb(static_cast(2 * Bound)); + boundMask_[i] = bit_mask_lsb(static_cast(Bound + 1)); }); if (s2_cur.size() > lastRelevantChar) s2_cur.remove_suffix(s2_cur.size() - lastRelevantChar); @@ -5649,7 +5649,7 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch P_flag.store(P_flags.data()); alignas(32) std::array T_flags; T_flag.store(T_flags.data()); - for (int64_t i = 0; i < vec_width; ++i) { + for (size_t i = 0; i < vec_width; ++i) { VecType CommonChars = counts[i]; if (!jaro_common_char_filter(s1_lengths[result_index], s2.size(), CommonChars, score_cutoff)) { scores[static_cast(result_index)] = 0.0; @@ -5661,10 +5661,11 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch VecType T_flag_cur = T_flags[i]; size_t Transpositions = 0; - int64_t cur_block = i / 4; - int64_t offset = 8 * (i % 4); + static constexpr size_t vecs_per_word = vec_width / vecs; + int64_t cur_block = static_cast(i / vecs_per_word); + int64_t offset = static_cast(sizeof(VecType) * 8 * (i % vecs_per_word)); while (T_flag_cur) { - uint64_t PatternFlagMask = blsi(P_flag_cur); + VecType PatternFlagMask = blsi(P_flag_cur); Transpositions += !(block.get(cur_block, s2[countr_zero(T_flag_cur)]) & (PatternFlagMask << offset)); @@ -5675,6 +5676,7 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch double Sim = jaro_calculate_similarity(s1_lengths[result_index], s2.size(), CommonChars, Transpositions); + scores[static_cast(result_index)] = (Sim >= score_cutoff) ? Sim : 0; result_index++; } @@ -5763,7 +5765,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, private: friend detail::MultiSimilarityBase, double, 0, 1>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, double>; constexpr static size_t get_vec_size() { @@ -5829,7 +5831,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, if (pos >= input_count) throw std::invalid_argument("out of bounds insert"); - str_lens[pos] = static_cast(len); + str_lens[pos] = len; for (; first1 != last1; ++first1) { PM.insert(block, *first1, block_pos); block_pos++; @@ -5857,7 +5859,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, } template - double maximum(size_t s1_idx, detail::Range) const + double maximum([[maybe_unused]] size_t s1_idx, detail::Range) const { return 1.0; } @@ -5870,7 +5872,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, size_t input_count; size_t pos = 0; detail::BlockPatternMatchVector PM; - std::vector str_lens; + std::vector str_lens; }; } /* namespace experimental */ @@ -6070,7 +6072,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase, double, 0, 1>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, double>; public: MultiJaroWinkler(size_t count, double prefix_weight_) : scorer(count), prefix_weight(prefix_weight_) @@ -6100,10 +6102,10 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase(std::distance(first1, last1)); + int64_t len = std::distance(first1, last1); std::array prefix; - for (size_t i = 0; i < std::min(len, 4); ++i) - prefix[i] = (uint64_t)first1[i]; + for (size_t i = 0; i < std::min(static_cast(len), 4); ++i) + prefix[i] = static_cast(first1[i]); str_lens.push_back(len); prefixes.push_back(prefix); @@ -6117,7 +6119,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase= result_count() elements"); - scorer.similarity(scores, score_count, s2, score_cutoff); + scorer.similarity(scores, score_count, s2, std::min(0.7, score_cutoff)); for (size_t i = 0; i < get_input_count(); ++i) { if (scores[i] > 0.7) { @@ -6135,7 +6137,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase - double maximum(size_t s1_idx, detail::Range) const + double maximum([[maybe_unused]] size_t s1_idx, detail::Range) const { return 1.0; } @@ -6145,7 +6147,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase str_lens; + std::vector str_lens; // todo this could lead to incorrect results when comparing uint64_t with int64_t std::vector> prefixes; MultiJaro scorer; @@ -7674,7 +7676,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, int64_t>; constexpr static size_t get_vec_size() { @@ -8240,7 +8242,7 @@ struct MultiOSA : public detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()> { private: friend detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, int64_t>; constexpr static size_t get_vec_size() { diff --git a/rapidfuzz/details/distance.hpp b/rapidfuzz/details/distance.hpp index 815b1ccf..467e8c32 100644 --- a/rapidfuzz/details/distance.hpp +++ b/rapidfuzz/details/distance.hpp @@ -362,7 +362,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { friend T; }; -template +template struct MultiNormalizedMetricBase { template void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, @@ -402,23 +402,23 @@ struct MultiNormalizedMetricBase { throw std::invalid_argument("scores has to have >= result_count() elements"); // reinterpretation only works when the types have the same size - int64_t* scores_i64 = nullptr; - if constexpr (sizeof(double) == sizeof(int64_t)) - scores_i64 = reinterpret_cast(scores); + ResType* scores_orig = nullptr; + if constexpr (sizeof(double) == sizeof(ResType)) + scores_orig = reinterpret_cast(scores); else - scores_i64 = new int64_t[derived.result_count()]; + scores_orig = new ResType[derived.result_count()]; Range s2_(s2); - derived.distance(scores_i64, derived.result_count(), s2_); + derived.distance(scores_orig, derived.result_count(), s2_); for (size_t i = 0; i < derived.get_input_count(); ++i) { auto maximum = derived.maximum(i, s2); double norm_dist = - (maximum != 0) ? static_cast(scores_i64[i]) / static_cast(maximum) : 0.0; + (maximum != 0) ? static_cast(scores_orig[i]) / static_cast(maximum) : 0.0; scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0; } - if constexpr (sizeof(double) != sizeof(int64_t)) delete[] scores_i64; + if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig; } template @@ -440,7 +440,7 @@ struct MultiNormalizedMetricBase { }; template -struct MultiDistanceBase : public MultiNormalizedMetricBase { +struct MultiDistanceBase : public MultiNormalizedMetricBase { template void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const @@ -491,7 +491,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { }; template -struct MultiSimilarityBase : public MultiNormalizedMetricBase { +struct MultiSimilarityBase : public MultiNormalizedMetricBase { template void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const diff --git a/rapidfuzz/details/intrinsics.hpp b/rapidfuzz/details/intrinsics.hpp index 327ec652..4b95677b 100644 --- a/rapidfuzz/details/intrinsics.hpp +++ b/rapidfuzz/details/intrinsics.hpp @@ -21,7 +21,7 @@ T bit_mask_lsb(int n) { T mask = static_cast(-1); if (n < static_cast(sizeof(T) * 8)) { - mask += static_cast(1) << n; + mask += static_cast(1) << static_cast(n); } return mask; } diff --git a/rapidfuzz/details/simd_avx2.hpp b/rapidfuzz/details/simd_avx2.hpp index b8cddacf..fb076654 100644 --- a/rapidfuzz/details/simd_avx2.hpp +++ b/rapidfuzz/details/simd_avx2.hpp @@ -533,7 +533,7 @@ static inline native_simd operator>(const native_simd& a, static inline native_simd operator>(const native_simd& a, const native_simd& b) noexcept { - __m256i signbit = _mm256_set1_epi32(0x80000000); + __m256i signbit = _mm256_set1_epi32(static_cast(0x80000000)); __m256i a1 = _mm256_xor_si256(a, signbit); __m256i b1 = _mm256_xor_si256(b, signbit); return _mm256_cmpgt_epi32(a1, b1); // signed compare diff --git a/rapidfuzz/details/simd_sse2.hpp b/rapidfuzz/details/simd_sse2.hpp index 6ccc1f17..f156a4f0 100644 --- a/rapidfuzz/details/simd_sse2.hpp +++ b/rapidfuzz/details/simd_sse2.hpp @@ -562,7 +562,7 @@ static inline native_simd operator>(const native_simd& a, static inline native_simd operator>(const native_simd& a, const native_simd& b) noexcept { - __m128i signbit = _mm_set1_epi32(0x80000000); + __m128i signbit = _mm_set1_epi32(static_cast(0x80000000)); __m128i a1 = _mm_xor_si128(a, signbit); __m128i b1 = _mm_xor_si128(b, signbit); return _mm_cmpgt_epi32(a1, b1); // signed compare @@ -571,7 +571,7 @@ static inline native_simd operator>(const native_simd& a, static inline native_simd operator>(const native_simd& a, const native_simd& b) noexcept { - __m128i sign32 = _mm_set1_epi32(0x80000000); // sign bit of each dword + __m128i sign32 = _mm_set1_epi32(static_cast(0x80000000)); // sign bit of each dword __m128i aflip = _mm_xor_si128(a, sign32); // a with sign bits flipped to use signed compare __m128i bflip = _mm_xor_si128(b, sign32); // b with sign bits flipped to use signed compare __m128i equal = _mm_cmpeq_epi32(a, b); // a == b, dwords diff --git a/rapidfuzz/distance/Indel.hpp b/rapidfuzz/distance/Indel.hpp index d0c39111..a51c241c 100644 --- a/rapidfuzz/distance/Indel.hpp +++ b/rapidfuzz/distance/Indel.hpp @@ -81,7 +81,7 @@ struct MultiIndel : public detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()> { private: friend detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, int64_t>; public: MultiIndel(size_t count) : scorer(count) diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index 717f8979..e2d6ab52 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -68,7 +68,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, private: friend detail::MultiSimilarityBase, double, 0, 1>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, double>; constexpr static size_t get_vec_size() { @@ -134,7 +134,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, if (pos >= input_count) throw std::invalid_argument("out of bounds insert"); - str_lens[pos] = static_cast(len); + str_lens[pos] = len; for (; first1 != last1; ++first1) { PM.insert(block, *first1, block_pos); block_pos++; @@ -162,7 +162,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, } template - double maximum(size_t s1_idx, detail::Range) const + double maximum([[maybe_unused]] size_t s1_idx, detail::Range) const { return 1.0; } @@ -175,7 +175,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, size_t input_count; size_t pos = 0; detail::BlockPatternMatchVector PM; - std::vector str_lens; + std::vector str_lens; }; } /* namespace experimental */ diff --git a/rapidfuzz/distance/JaroWinkler.hpp b/rapidfuzz/distance/JaroWinkler.hpp index 4db0efd1..54420095 100644 --- a/rapidfuzz/distance/JaroWinkler.hpp +++ b/rapidfuzz/distance/JaroWinkler.hpp @@ -80,7 +80,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase, double, 0, 1>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, double>; public: MultiJaroWinkler(size_t count, double prefix_weight_) : scorer(count), prefix_weight(prefix_weight_) @@ -110,10 +110,10 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase(std::distance(first1, last1)); + int64_t len = std::distance(first1, last1); std::array prefix; - for (size_t i = 0; i < std::min(len, 4); ++i) - prefix[i] = (uint64_t)first1[i]; + for (size_t i = 0; i < std::min(static_cast(len), 4); ++i) + prefix[i] = static_cast(first1[i]); str_lens.push_back(len); prefixes.push_back(prefix); @@ -127,7 +127,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase= result_count() elements"); - scorer.similarity(scores, score_count, s2, score_cutoff); + scorer.similarity(scores, score_count, s2, std::min(0.7, score_cutoff)); for (size_t i = 0; i < get_input_count(); ++i) { if (scores[i] > 0.7) { @@ -145,7 +145,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase - double maximum(size_t s1_idx, detail::Range) const + double maximum([[maybe_unused]] size_t s1_idx, detail::Range) const { return 1.0; } @@ -155,7 +155,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase str_lens; + std::vector str_lens; // todo this could lead to incorrect results when comparing uint64_t with int64_t std::vector> prefixes; MultiJaro scorer; diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 78882266..e4156ac1 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -453,7 +453,7 @@ double jaro_similarity(const BlockPatternMatchVector& PM, Range P, Ran #ifdef RAPIDFUZZ_SIMD template void jaro_similarity_simd(Range scores, const detail::BlockPatternMatchVector& block, - const std::vector& s1_lengths, Range s2, + const std::vector& s1_lengths, Range s2, double score_cutoff) noexcept { # ifdef RAPIDFUZZ_AVX2 @@ -471,15 +471,15 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch size_t result_index = 0; if (score_cutoff > 1.0) { - for (int64_t i = 0; i < s1_lengths.size(); i++) + for (int64_t i = 0; i < static_cast(s1_lengths.size()); i++) scores[i] = 0.0; return; } if (s2.empty()) { - for (int64_t i = 0; i < s1_lengths.size(); i++) - scores[i] = s1_lengths[i] ? 0.0 : 1.0; + for (size_t i = 0; i < s1_lengths.size(); i++) + scores[static_cast(i)] = s1_lengths[i] ? 0.0 : 1.0; return; } @@ -500,8 +500,8 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch if (Bound > maxBound) maxBound = Bound; - boundMaskSize_[i] = bit_mask_lsb(2 * Bound); - boundMask_[i] = bit_mask_lsb(Bound + 1); + boundMaskSize_[i] = bit_mask_lsb(static_cast(2 * Bound)); + boundMask_[i] = bit_mask_lsb(static_cast(Bound + 1)); }); if (s2_cur.size() > lastRelevantChar) s2_cur.remove_suffix(s2_cur.size() - lastRelevantChar); @@ -532,7 +532,7 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch P_flag.store(P_flags.data()); alignas(32) std::array T_flags; T_flag.store(T_flags.data()); - for (int64_t i = 0; i < vec_width; ++i) { + for (size_t i = 0; i < vec_width; ++i) { VecType CommonChars = counts[i]; if (!jaro_common_char_filter(s1_lengths[result_index], s2.size(), CommonChars, score_cutoff)) { scores[static_cast(result_index)] = 0.0; @@ -544,10 +544,11 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch VecType T_flag_cur = T_flags[i]; size_t Transpositions = 0; - int64_t cur_block = i / 4; - int64_t offset = 8 * (i % 4); + static constexpr size_t vecs_per_word = vec_width / vecs; + int64_t cur_block = static_cast(i / vecs_per_word); + int64_t offset = static_cast(sizeof(VecType) * 8 * (i % vecs_per_word)); while (T_flag_cur) { - uint64_t PatternFlagMask = blsi(P_flag_cur); + VecType PatternFlagMask = blsi(P_flag_cur); Transpositions += !(block.get(cur_block, s2[countr_zero(T_flag_cur)]) & (PatternFlagMask << offset)); @@ -558,6 +559,7 @@ void jaro_similarity_simd(Range scores, const detail::BlockPatternMatch double Sim = jaro_calculate_similarity(s1_lengths[result_index], s2.size(), CommonChars, Transpositions); + scores[static_cast(result_index)] = (Sim >= score_cutoff) ? Sim : 0; result_index++; } diff --git a/rapidfuzz/distance/LCSseq.hpp b/rapidfuzz/distance/LCSseq.hpp index 67178852..4b16f23b 100644 --- a/rapidfuzz/distance/LCSseq.hpp +++ b/rapidfuzz/distance/LCSseq.hpp @@ -82,7 +82,7 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, int std::numeric_limits::max()> { private: friend detail::MultiSimilarityBase, int64_t, 0, std::numeric_limits::max()>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, int64_t>; constexpr static size_t get_vec_size() { diff --git a/rapidfuzz/distance/Levenshtein.hpp b/rapidfuzz/distance/Levenshtein.hpp index 305c6c4a..b952f415 100644 --- a/rapidfuzz/distance/Levenshtein.hpp +++ b/rapidfuzz/distance/Levenshtein.hpp @@ -302,7 +302,7 @@ struct MultiLevenshtein : public detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, int64_t>; constexpr static size_t get_vec_size() { diff --git a/rapidfuzz/distance/OSA.hpp b/rapidfuzz/distance/OSA.hpp index f74cc4d6..9214890b 100644 --- a/rapidfuzz/distance/OSA.hpp +++ b/rapidfuzz/distance/OSA.hpp @@ -117,7 +117,7 @@ struct MultiOSA : public detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()> { private: friend detail::MultiDistanceBase, int64_t, 0, std::numeric_limits::max()>; - friend detail::MultiNormalizedMetricBase>; + friend detail::MultiNormalizedMetricBase, int64_t>; constexpr static size_t get_vec_size() { diff --git a/test/distance/tests-JaroWinkler.cpp b/test/distance/tests-JaroWinkler.cpp index da17e735..c3e48e1d 100644 --- a/test/distance/tests-JaroWinkler.cpp +++ b/test/distance/tests-JaroWinkler.cpp @@ -24,10 +24,12 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double #ifdef RAPIDFUZZ_SIMD std::vector results(256 / 8); if (s1.size() <= 8) { - rapidfuzz::experimental::MultiJaroWinkler<8> simd_scorer(1, prefix_weight); - simd_scorer.insert(s1); + rapidfuzz::experimental::MultiJaroWinkler<8> simd_scorer(32, prefix_weight); + for(unsigned int i = 0; i < 32; ++i) + simd_scorer.insert(s1); simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); - REQUIRE(res1 == Approx(results[0])); + for(unsigned int i = 0; i < 32; ++i) + REQUIRE(res1 == Approx(results[i])); } if (s1.size() <= 16) { rapidfuzz::experimental::MultiJaroWinkler<16> simd_scorer(1, prefix_weight); @@ -89,10 +91,11 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr */ TEST_CASE("JaroWinklerTest") { - std::array names = {"james", "robert", "john", "michael", "william", + std::array names = {"james", "robert", "john", "michael", "william", "david", "joseph", "thomas", "charles", "mary", "patricia", "jennifer", "linda", "elizabeth", "barbara", - "susan", "jessica", "sarah", "karen", ""}; + "susan", "jessica", "sarah", "karen", "" + "aaaaaaaa", "aabaaab"}; SECTION("testFullResultWithScoreCutoff") {