Skip to content

Commit

Permalink
fix compiler errors
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Sep 3, 2022
1 parent 21fc816 commit 9c3ac35
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 67 deletions.
54 changes: 28 additions & 26 deletions extras/rapidfuzz_amalgamated.hpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
// SPDX-License-Identifier: MIT
// RapidFuzz v1.0.2
// Generated: 2022-09-03 19:36:32.122526
// Generated: 2022-09-03 21:47:43.168879
// ----------------------------------------------------------
// This file is an amalgamation of multiple different files.
// You probably shouldn't edit it directly.
// ----------------------------------------------------------
#ifndef RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED
#define RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED

#include <algorithm>
#include <cmath>

#include <cassert>
Expand All @@ -34,11 +35,10 @@ struct GrowingHashmap {
using size_type = unsigned int;

private:
static constexpr value_type _empty_val = value_type();
static constexpr size_type min_size = 8;
struct MapElem {
key_type key;
value_type value = _empty_val;
value_type value = value_type();
};

int used;
Expand Down Expand Up @@ -95,7 +95,7 @@ struct GrowingHashmap {

value_type get(key_type key) const noexcept
{
if (m_map == NULL) return _empty_val;
if (m_map == NULL) return value_type();

return m_map[lookup(key)].value;
}
Expand All @@ -106,7 +106,7 @@ struct GrowingHashmap {

size_t i = lookup(key);

if (m_map[i].value == _empty_val) {
if (m_map[i].value == value_type()) {
/* resize when 2/3 full */
if (++fill * 3 >= (mask + 1) * 2) {
grow((used + 1) * 2);
Expand Down Expand Up @@ -136,12 +136,12 @@ struct GrowingHashmap {
size_t hash = static_cast<size_t>(key);
size_t i = hash & static_cast<size_t>(mask);

if (m_map[i].value == _empty_val || m_map[i].key == key) return i;
if (m_map[i].value == value_type() || m_map[i].key == key) return i;

size_t perturb = hash;
while (true) {
i = (i * 5 + perturb + 1) & static_cast<size_t>(mask);
if (m_map[i].value == _empty_val || m_map[i].key == key) return i;
if (m_map[i].value == value_type() || m_map[i].key == key) return i;

perturb >>= 5;
}
Expand All @@ -160,7 +160,7 @@ struct GrowingHashmap {
mask = newSize - 1;

for (int i = 0; used > 0; i++)
if (oldMap[i].value != _empty_val) {
if (oldMap[i].value != value_type()) {
size_t j = lookup(oldMap[i].key);

m_map[j].key = oldMap[i].key;
Expand Down Expand Up @@ -382,9 +382,9 @@ struct BitMatrix {
} // namespace detail
} // namespace rapidfuzz

#include <iostream>
#include <iterator>
#include <limits>
#include <ostream>
#include <stdexcept>
#include <vector>

Expand Down Expand Up @@ -2249,6 +2249,20 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase<T> {
namespace rapidfuzz {
namespace detail {

template <typename IntType>
struct RowId {
IntType val = -1;
friend bool operator==(const RowId& lhs, const RowId& rhs)
{
return lhs.val == rhs.val;
}

friend bool operator!=(const RowId& lhs, const RowId& rhs)
{
return !(lhs == rhs);
}
};

/*
* based on the paper
* "Linear space string correction algorithm using the Damerau-Levenshtein distance"
Expand All @@ -2262,20 +2276,7 @@ int64_t damerau_levenshtein_distance_zhao(Range<InputIt1> s1, Range<InputIt2> s2
IntType maxVal = static_cast<IntType>(std::max(len1, len2) + 1);
assert(std::numeric_limits<IntType>::max() > maxVal);

struct RowId {
IntType val = -1;
bool operator==(const RowId& other)
{
return val == other.val;
}

bool operator!=(const RowId& other)
{
return !(*this == other);
}
};

HybridGrowingHashmap<typename Range<InputIt1>::value_type, RowId> last_row_id;
HybridGrowingHashmap<typename Range<InputIt1>::value_type, RowId<IntType>> last_row_id;
size_t size = static_cast<size_t>(s2.size() + 2);
assume(size != 0);
std::vector<IntType> FR_arr(size, maxVal);
Expand Down Expand Up @@ -2489,7 +2490,7 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase<CachedDamera
template <typename InputIt2>
int64_t maximum(detail::Range<InputIt2> s2) const
{
return std::max(static_cast<int64_t>(s1.size()), s2.size());
return std::max(static_cast<ptrdiff_t>(s1.size()), s2.size());
}

template <typename InputIt2>
Expand Down Expand Up @@ -3292,6 +3293,7 @@ class LCSseq : public SimilarityBase<LCSseq> {
} // namespace detail
} // namespace rapidfuzz

#include <algorithm>
#include <cmath>
#include <limits>

Expand Down Expand Up @@ -3379,7 +3381,7 @@ struct CachedLCSseq : detail::CachedSimilarityBase<CachedLCSseq<CharT1>> {
template <typename InputIt2>
int64_t maximum(detail::Range<InputIt2> s2) const
{
return std::max(static_cast<int64_t>(s1.size()), s2.size());
return std::max(static_cast<ptrdiff_t>(s1.size()), s2.size());
}

template <typename InputIt2>
Expand Down Expand Up @@ -3481,7 +3483,7 @@ template <typename InputIt1, typename InputIt2>
int64_t indel_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
int64_t score_cutoff = 0.0)
{
return detail::Indel::normalized_distance(first1, last1, first2, last2, score_cutoff);
return detail::Indel::similarity(first1, last1, first2, last2, score_cutoff);
}

template <typename Sentence1, typename Sentence2>
Expand Down
13 changes: 6 additions & 7 deletions rapidfuzz/details/GrowingHashmap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@ struct GrowingHashmap {
using size_type = unsigned int;

private:
static constexpr value_type _empty_val = value_type();
static constexpr size_type min_size = 8;
struct MapElem {
key_type key;
value_type value = _empty_val;
value_type value = value_type();
};

int used;
Expand Down Expand Up @@ -80,7 +79,7 @@ struct GrowingHashmap {

value_type get(key_type key) const noexcept
{
if (m_map == NULL) return _empty_val;
if (m_map == NULL) return value_type();

return m_map[lookup(key)].value;
}
Expand All @@ -91,7 +90,7 @@ struct GrowingHashmap {

size_t i = lookup(key);

if (m_map[i].value == _empty_val) {
if (m_map[i].value == value_type()) {
/* resize when 2/3 full */
if (++fill * 3 >= (mask + 1) * 2) {
grow((used + 1) * 2);
Expand Down Expand Up @@ -121,12 +120,12 @@ struct GrowingHashmap {
size_t hash = static_cast<size_t>(key);
size_t i = hash & static_cast<size_t>(mask);

if (m_map[i].value == _empty_val || m_map[i].key == key) return i;
if (m_map[i].value == value_type() || m_map[i].key == key) return i;

size_t perturb = hash;
while (true) {
i = (i * 5 + perturb + 1) & static_cast<size_t>(mask);
if (m_map[i].value == _empty_val || m_map[i].key == key) return i;
if (m_map[i].value == value_type() || m_map[i].key == key) return i;

perturb >>= 5;
}
Expand All @@ -145,7 +144,7 @@ struct GrowingHashmap {
mask = newSize - 1;

for (int i = 0; used > 0; i++)
if (oldMap[i].value != _empty_val) {
if (oldMap[i].value != value_type()) {
size_t j = lookup(oldMap[i].key);

m_map[j].key = oldMap[i].key;
Expand Down
2 changes: 1 addition & 1 deletion rapidfuzz/details/Range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

#pragma once

#include <iostream>
#include <iterator>
#include <limits>
#include <ostream>
#include <stdexcept>
#include <vector>

Expand Down
3 changes: 2 additions & 1 deletion rapidfuzz/distance/DamerauLevenshtein.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: MIT */
/* Copyright © 2022-present Max Bachmann */

#include <algorithm>
#include <cmath>
#include <rapidfuzz/distance/DamerauLevenshtein_impl.hpp>

Expand Down Expand Up @@ -125,7 +126,7 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase<CachedDamera
template <typename InputIt2>
int64_t maximum(detail::Range<InputIt2> s2) const
{
return std::max(static_cast<int64_t>(s1.size()), s2.size());
return std::max(static_cast<ptrdiff_t>(s1.size()), s2.size());
}

template <typename InputIt2>
Expand Down
29 changes: 15 additions & 14 deletions rapidfuzz/distance/DamerauLevenshtein_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@
namespace rapidfuzz {
namespace detail {

template <typename IntType>
struct RowId {
IntType val = -1;
friend bool operator==(const RowId& lhs, const RowId& rhs)
{
return lhs.val == rhs.val;
}

friend bool operator!=(const RowId& lhs, const RowId& rhs)
{
return !(lhs == rhs);
}
};

/*
* based on the paper
* "Linear space string correction algorithm using the Damerau-Levenshtein distance"
Expand All @@ -29,20 +43,7 @@ int64_t damerau_levenshtein_distance_zhao(Range<InputIt1> s1, Range<InputIt2> s2
IntType maxVal = static_cast<IntType>(std::max(len1, len2) + 1);
assert(std::numeric_limits<IntType>::max() > maxVal);

struct RowId {
IntType val = -1;
bool operator==(const RowId& other)
{
return val == other.val;
}

bool operator!=(const RowId& other)
{
return !(*this == other);
}
};

HybridGrowingHashmap<typename Range<InputIt1>::value_type, RowId> last_row_id;
HybridGrowingHashmap<typename Range<InputIt1>::value_type, RowId<IntType>> last_row_id;
size_t size = static_cast<size_t>(s2.size() + 2);
assume(size != 0);
std::vector<IntType> FR_arr(size, maxVal);
Expand Down
2 changes: 1 addition & 1 deletion rapidfuzz/distance/Indel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ template <typename InputIt1, typename InputIt2>
int64_t indel_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
int64_t score_cutoff = 0.0)
{
return detail::Indel::normalized_distance(first1, last1, first2, last2, score_cutoff);
return detail::Indel::similarity(first1, last1, first2, last2, score_cutoff);
}

template <typename Sentence1, typename Sentence2>
Expand Down
3 changes: 2 additions & 1 deletion rapidfuzz/distance/LCSseq.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once
#include <rapidfuzz/distance/LCSseq_impl.hpp>

#include <algorithm>
#include <cmath>
#include <limits>

Expand Down Expand Up @@ -91,7 +92,7 @@ struct CachedLCSseq : detail::CachedSimilarityBase<CachedLCSseq<CharT1>> {
template <typename InputIt2>
int64_t maximum(detail::Range<InputIt2> s2) const
{
return std::max(static_cast<int64_t>(s1.size()), s2.size());
return std::max(static_cast<ptrdiff_t>(s1.size()), s2.size());
}

template <typename InputIt2>
Expand Down
45 changes: 43 additions & 2 deletions test/distance/tests-DamerauLevenshtein.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,45 @@ int64_t damerau_levenshtein_distance(const Sentence1& s1, const Sentence2& s2,
int64_t max = std::numeric_limits<int64_t>::max())
{
int64_t res1 = rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, max);
int64_t res2 = rapidfuzz::experimental::damerau_levenshtein_distance(s1.begin(), s1.end(), s2.begin(),
s2.end(), max);
rapidfuzz::experimental::CachedDamerauLevenshtein<typename Sentence1::value_type> scorer(s1);
int64_t res2 = scorer.distance(s2, max);
int64_t res3 = scorer.distance(s2, max);
int64_t res4 = scorer.distance(s2.begin(), s2.end(), max);
REQUIRE(res1 == res2);
REQUIRE(res1 == res3);
REQUIRE(res1 == res4);
return res1;
}

template <typename Sentence1, typename Sentence2>
int64_t damerau_levenshtein_similarity(const Sentence1& s1, const Sentence2& s2, int64_t max = 0)
{
int64_t res1 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1, s2, max);
int64_t res2 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1.begin(), s1.end(), s2.begin(),
s2.end(), max);
rapidfuzz::experimental::CachedDamerauLevenshtein<typename Sentence1::value_type> scorer(s1);
int64_t res3 = scorer.similarity(s2, max);
int64_t res4 = scorer.similarity(s2.begin(), s2.end(), max);
REQUIRE(res1 == res2);
REQUIRE(res1 == res3);
REQUIRE(res1 == res4);
return res1;
}

template <typename Sentence1, typename Sentence2>
double damerau_levenshtein_normalized_distance(const Sentence1& s1, const Sentence2& s2,
double score_cutoff = 1.0)
{
double res1 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance(s1, s2, score_cutoff);
double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance(
s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff);
rapidfuzz::experimental::CachedDamerauLevenshtein<typename Sentence1::value_type> scorer(s1);
double res3 = scorer.normalized_distance(s2, score_cutoff);
double res4 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff);
REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001));
REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001));
REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001));
return res1;
}

Expand All @@ -33,9 +69,14 @@ double damerau_levenshtein_normalized_similarity(const Sentence1& s1, const Sent
double score_cutoff = 0.0)
{
double res1 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity(s1, s2, score_cutoff);
double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity(
s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff);
rapidfuzz::experimental::CachedDamerauLevenshtein<typename Sentence1::value_type> scorer(s1);
double res2 = scorer.normalized_similarity(s2, score_cutoff);
double res3 = scorer.normalized_similarity(s2, score_cutoff);
double res4 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff);
REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001));
REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001));
REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001));
return res1;
}

Expand Down
Loading

0 comments on commit 9c3ac35

Please sign in to comment.