Skip to content

Commit

Permalink
[update] fix melotts english generate.
Browse files Browse the repository at this point in the history
  • Loading branch information
Abandon-ht committed Dec 2, 2024
1 parent fb8feab commit 71fe7f2
Showing 1 changed file with 37 additions and 1 deletion.
38 changes: 37 additions & 1 deletion projects/llm_framework/main_melotts/src/runner/Lexicon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,45 @@ class Lexicon {
return words;
}

bool is_english(std::string s) {
if (s.size() == 1)
return (s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z');
else
return false;
}

std::vector<std::string> merge_english(const std::vector<std::string>& splitted_text) {
std::vector<std::string> words;
int i = 0;
while (i < splitted_text.size()) {
std::string s;
if (is_english(splitted_text[i])) {
while (i < splitted_text.size()) {
if (!is_english(splitted_text[i])) {
break;
}
s += splitted_text[i];
i++;
}
// to lowercase
std::transform(s.begin(), s.end(), s.begin(),
[](unsigned char c){ return std::tolower(c); });
words.push_back(s);
if (i >= splitted_text.size())
break;
}
else {
words.push_back(splitted_text[i]);
i++;
}
}
return words;
}

void convert(const std::string& text, std::vector<int>& phones, std::vector<int>& tones) {
auto splitted_text = splitEachChar(text);
for (auto c : splitted_text) {
auto zh_mix_en = merge_english(splitted_text);
for (auto c : zh_mix_en) {
std::string s{c};
if (s == "")
s = ",";
Expand Down

0 comments on commit 71fe7f2

Please sign in to comment.