Skip to content

Commit

Permalink
Merge pull request #19 from kuke/fix_decoder_dev
Browse files Browse the repository at this point in the history
Fix some problems in the ctc beam search decoder
  • Loading branch information
Yibing Liu authored Nov 17, 2017
2 parents 994c591 + 5fdd0d0 commit e1013dc
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 14 deletions.
2 changes: 1 addition & 1 deletion decoders/swig/path_trie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ PathTrie* PathTrie::get_path_trie(int new_char, bool reset) {
} else {
if (has_dictionary_) {
matcher_->SetState(dictionary_state_);
bool found = matcher_->Find(new_char);
bool found = matcher_->Find(new_char + 1);
if (!found) {
// Adding this character causes word outside dictionary
auto FSTZERO = fst::TropicalWeight::Zero();
Expand Down
16 changes: 6 additions & 10 deletions decoders/swig/scorer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,15 @@ void Scorer::set_char_map(const std::vector<std::string>& char_list) {
char_list_ = char_list;
char_map_.clear();

// Set the char map for the FST for spelling correction
for (size_t i = 0; i < char_list_.size(); i++) {
if (char_list_[i] == " ") {
SPACE_ID_ = i;
char_map_[' '] = i;
} else if (char_list_[i].size() == 1) {
char_map_[char_list_[i][0]] = i;
}
// The initial state of FST is state 0, hence the index of chars in
// the FST should start from 1 to avoid the conflict with the initial
// state, otherwise wrong decoding results would be given.
char_map_[char_list_[i]] = i + 1;
}
}

Expand Down Expand Up @@ -193,17 +195,11 @@ std::vector<std::string> Scorer::make_ngram(PathTrie* prefix) {

void Scorer::fill_dictionary(bool add_space) {
fst::StdVectorFst dictionary;
// First reverse char_list so ints can be accessed by chars
std::unordered_map<std::string, int> char_map;
for (size_t i = 0; i < char_list_.size(); i++) {
char_map[char_list_[i]] = i;
}

// For each unigram convert to ints and put in trie
int dict_size = 0;
for (const auto& word : vocabulary_) {
bool added = add_word_to_dictionary(
word, char_map, add_space, SPACE_ID_, &dictionary);
word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
dict_size += added ? 1 : 0;
}

Expand Down
2 changes: 1 addition & 1 deletion decoders/swig/scorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class Scorer {

int SPACE_ID_;
std::vector<std::string> char_list_;
std::unordered_map<char, int> char_map_;
std::unordered_map<std::string, int> char_map_;

std::vector<std::string> vocabulary_;
};
Expand Down
2 changes: 1 addition & 1 deletion decoders/swig/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def compile_test(header, library):

setup(
name='swig_decoders',
version='1.0',
version='1.1',
description="""CTC decoders""",
ext_modules=decoders_module,
py_modules=['swig_decoders'], )
2 changes: 1 addition & 1 deletion setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ if [ $? != 0 ]; then
fi

# install decoders
python -c "import swig_decoders"
python -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")"
if [ $? != 0 ]; then
cd decoders/swig > /dev/null
sh setup.sh
Expand Down

0 comments on commit e1013dc

Please sign in to comment.