Skip to content

Commit

Permalink
fix #523: {MarisaDict,DartsDict}::Match() does not check for exact match
Browse files Browse the repository at this point in the history
if query is longer than the longest word in dictionary.
  • Loading branch information
lotem committed Dec 20, 2020
1 parent 7a60db1 commit f753bb5
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/DartsDict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,13 @@ size_t DartsDict::KeyMaxLength() const { return maxLength; }

Optional<const DictEntry*> DartsDict::Match(const char* word,
size_t len) const {
if (len > maxLength) {
return Optional<const DictEntry*>::Null();
}
Darts::DoubleArray& dict = *internal->doubleArray;
Darts::DoubleArray::result_pair_type result;

dict.exactMatchSearch(word, result, (std::min)(maxLength, len));
dict.exactMatchSearch(word, result, len);
if (result.value != -1) {
return Optional<const DictEntry*>(
lexicon->At(static_cast<size_t>(result.value)));
Expand Down
12 changes: 12 additions & 0 deletions src/DartsDictTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

#include "DartsDict.hpp"
#include "TestUtilsUTF8.hpp"
#include "TextDictTestBase.hpp"

namespace opencc {
Expand Down Expand Up @@ -56,4 +57,15 @@ TEST_F(DartsDictTest, Deserialization) {
TestDict(deserializedTextDict);
}

TEST_F(DartsDictTest, ExactMatch) {
auto there = dartsDict->Match("積羽沉舟", 12);
EXPECT_FALSE(there.IsNull());
auto dictEntry = there.Get();
EXPECT_EQ(1, dictEntry->NumValues());
EXPECT_EQ(utf8("羣輕折軸"), dictEntry->GetDefault());

auto nowhere = dartsDict->Match("積羽沉舟衆口鑠金", 24);
EXPECT_TRUE(nowhere.IsNull());
}

} // namespace opencc
5 changes: 4 additions & 1 deletion src/MarisaDict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ size_t MarisaDict::KeyMaxLength() const { return maxLength; }

Optional<const DictEntry*> MarisaDict::Match(const char* word,
size_t len) const {
if (len > maxLength) {
return Optional<const DictEntry*>::Null();
}
const marisa::Trie& trie = *internal->marisa;
marisa::Agent agent;
agent.set_query(word, (std::min)(maxLength, len));
agent.set_query(word, len);
if (trie.lookup(agent)) {
return Optional<const DictEntry*>(lexicon->At(agent.key().id()));
} else {
Expand Down
12 changes: 12 additions & 0 deletions src/MarisaDictTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

#include "MarisaDict.hpp"
#include "TestUtilsUTF8.hpp"
#include "TextDictTestBase.hpp"

namespace opencc {
Expand Down Expand Up @@ -52,4 +53,15 @@ TEST_F(MarisaDictTest, Deserialization) {
}
}

TEST_F(MarisaDictTest, ExactMatch) {
auto there = dict->Match("積羽沉舟", 12);
EXPECT_FALSE(there.IsNull());
auto dictEntry = there.Get();
EXPECT_EQ(1, dictEntry->NumValues());
EXPECT_EQ(utf8("羣輕折軸"), dictEntry->GetDefault());

auto nowhere = dict->Match("積羽沉舟衆口鑠金", 24);
EXPECT_TRUE(nowhere.IsNull());
}

} // namespace opencc
12 changes: 12 additions & 0 deletions src/TextDictTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* limitations under the License.
*/

#include "TestUtilsUTF8.hpp"
#include "TextDictTestBase.hpp"

namespace opencc {
Expand All @@ -39,4 +40,15 @@ TEST_F(TextDictTest, Deserialization) {
TestDict(deserialized);
}

TEST_F(TextDictTest, ExactMatch) {
auto there = textDict->Match("積羽沉舟", 12);
EXPECT_FALSE(there.IsNull());
auto dictEntry = there.Get();
EXPECT_EQ(1, dictEntry->NumValues());
EXPECT_EQ(utf8("羣輕折軸"), dictEntry->GetDefault());

auto nowhere = textDict->Match("積羽沉舟衆口鑠金", 24);
EXPECT_TRUE(nowhere.IsNull());
}

} // namespace opencc

0 comments on commit f753bb5

Please sign in to comment.