Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TextServer] Fix get_word_breaks and its uses. #79054

Merged
merged 1 commit into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions doc/classes/TextServer.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1739,8 +1739,9 @@
When [param chars_per_line] is greater than zero, line break boundaries are returned instead.
[codeblock]
var ts = TextServerManager.get_primary_interface()
print(ts.string_get_word_breaks("Godot Engine")) # Prints [0, 5, 6, 12]
print(ts.string_get_word_breaks("Godot Engine", "en", 5)) # Prints [0, 5, 6, 11, 11, 12]
print(ts.string_get_word_breaks("The Godot Engine, 4")) # Prints [0, 3, 4, 9, 10, 16, 18, 19], which corresponds to the following substrings: "The", "Godot", "Engine", "4"
print(ts.string_get_word_breaks("The Godot Engine, 4", "en", 5)) # Prints [0, 3, 4, 9, 10, 15, 15, 19], which corresponds to the following substrings: "The", "Godot", "Engin", "e, 4"
print(ts.string_get_word_breaks("The Godot Engine, 4", "en", 10)) # Prints [0, 9, 10, 19], which corresponds to the following substrings: "The Godot", "Engine, 4"
[/codeblock]
</description>
</method>
Expand Down
2 changes: 1 addition & 1 deletion editor/debugger/script_editor_debugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ void ScriptEditorDebugger::_set_reason_text(const String &p_reason, MessageType
for (int i = 0; i < boundaries.size(); i += 2) {
const int start = boundaries[i];
const int end = boundaries[i + 1];
lines.append(p_reason.substr(start, end - start + 1));
lines.append(p_reason.substr(start, end - start));
}

reason->set_tooltip_text(String("\n").join(lines));
Expand Down
154 changes: 93 additions & 61 deletions modules/text_server_adv/text_server_adv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7048,10 +7048,10 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str

HashSet<int> breaks;
UErrorCode err = U_ZERO_ERROR;
UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err);
UBreakIterator *bi = ubrk_open(UBRK_WORD, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err);
if (U_SUCCESS(err)) {
while (ubrk_next(bi) != UBRK_DONE) {
int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1;
int pos = _convert_pos(p_string, utf16, ubrk_current(bi));
if (pos != p_string.length() - 1) {
breaks.insert(pos);
}
Expand All @@ -7061,79 +7061,111 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str

PackedInt32Array ret;

int line_start = 0;
int line_end = 0; // End of last word on current line.
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
int word_length = 0;
if (p_chars_per_line > 0) {
int line_start = 0;
int last_break = -1;
int line_length = 0;

for (int i = 0; i < p_string.length(); i++) {
const char32_t c = p_string[i];
for (int i = 0; i < p_string.length(); i++) {
const char32_t c = p_string[i];

if (is_linebreak(c)) {
// Force newline.
ret.push_back(line_start);
ret.push_back(i);
line_start = i + 1;
line_end = line_start;
word_start = line_start;
word_length = 0;
} else if (c == 0xfffc) {
continue;
} else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
// A whitespace ends current word.
if (word_length > 0) {
line_end = i - 1;
word_start = -1;
word_length = 0;
}
} else if (breaks.has(i)) {
// End current word, no space.
if (word_length > 0) {
line_end = i;
word_start = i + 1;
word_length = 0;
}
if (p_chars_per_line <= 0) {
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
}
} else {
if (word_start == -1) {
word_start = i;
if (p_chars_per_line <= 0) {
bool is_lb = is_linebreak(c);
bool is_ws = is_whitespace(c);
bool is_p = (u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || c == 0xfffc;

if (is_lb) {
if (line_length > 0) {
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
ret.push_back(i);
}
line_start = i;
line_length = 0;
last_break = -1;
continue;
} else if (breaks.has(i) || is_ws || is_p) {
last_break = i;
}
word_length += 1;

if (p_chars_per_line > 0) {
if (word_length > p_chars_per_line) {
// Word too long: wrap before current character.
if (line_length == p_chars_per_line) {
if (last_break != -1) {
int last_break_w_spaces = last_break;
while (last_break > line_start && is_whitespace(p_string[last_break - 1])) {
last_break--;
}
if (line_start != last_break) {
ret.push_back(line_start);
ret.push_back(last_break);
}
while (last_break_w_spaces < p_string.length() && is_whitespace(p_string[last_break_w_spaces])) {
last_break_w_spaces++;
}
line_start = last_break_w_spaces;
if (last_break_w_spaces < i) {
line_length = i - last_break_w_spaces;
} else {
i = last_break_w_spaces;
line_length = 0;
}
} else {
ret.push_back(line_start);
ret.push_back(i);
line_start = i;
line_end = i;
line_length = 0;
}
last_break = -1;
}
line_length++;
}
if (line_length > 0) {
ret.push_back(line_start);
ret.push_back(p_string.length());
}
} else {
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
int word_length = 0;

for (int i = 0; i < p_string.length(); i++) {
const char32_t c = p_string[i];

bool is_lb = is_linebreak(c);
bool is_ws = is_whitespace(c);
bool is_p = (u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || c == 0xfffc;

if (word_start == -1) {
if (!is_lb && !is_ws && !is_p) {
word_start = i;
word_length = 1;
} else if (i - line_start + 1 > p_chars_per_line) {
// Line too long: wrap after the last word.
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
}
continue;
}

if (is_lb) {
if (word_start != -1 && word_length > 0) {
ret.push_back(word_start);
ret.push_back(i);
}
word_start = -1;
word_length = 0;
} else if (breaks.has(i) || is_ws || is_p) {
if (word_start != -1 && word_length > 0) {
ret.push_back(word_start);
ret.push_back(i);
}
if (is_ws || is_p) {
word_start = -1;
} else {
word_start = i;
}
word_length = 0;
}

word_length++;
}
if (word_start != -1 && word_length > 0) {
ret.push_back(word_start);
ret.push_back(p_string.length());
}
}
if (line_start < p_string.length()) {
ret.push_back(line_start);
ret.push_back(p_string.length());
}

return ret;
}

Expand Down
132 changes: 86 additions & 46 deletions modules/text_server_fb/text_server_fb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4492,65 +4492,105 @@ String TextServerFallback::_string_to_title(const String &p_string, const String
PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int64_t p_chars_per_line) const {
PackedInt32Array ret;

int line_start = 0;
int line_end = 0; // End of last word on current line.
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
int word_length = 0;
if (p_chars_per_line > 0) {
int line_start = 0;
int last_break = -1;
int line_length = 0;

for (int i = 0; i < p_string.length(); i++) {
const char32_t c = p_string[i];
for (int i = 0; i < p_string.length(); i++) {
const char32_t c = p_string[i];

if (is_linebreak(c)) {
// Force newline.
ret.push_back(line_start);
ret.push_back(i);
line_start = i + 1;
line_end = line_start;
word_start = line_start;
word_length = 0;
} else if (c == 0xfffc) {
continue;
} else if ((is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
// A whitespace ends current word.
if (word_length > 0) {
line_end = i - 1;
word_start = -1;
word_length = 0;
}
} else {
if (word_start == -1) {
word_start = i;
if (p_chars_per_line <= 0) {
bool is_lb = is_linebreak(c);
bool is_ws = is_whitespace(c);
bool is_p = (is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || c == 0xfffc;

if (is_lb) {
if (line_length > 0) {
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
ret.push_back(i);
}
line_start = i;
line_length = 0;
last_break = -1;
continue;
} else if (is_ws || is_p) {
last_break = i;
}
word_length += 1;

if (p_chars_per_line > 0) {
if (word_length > p_chars_per_line) {
// Word too long: wrap before current character.
if (line_length == p_chars_per_line) {
if (last_break != -1) {
int last_break_w_spaces = last_break;
while (last_break > line_start && is_whitespace(p_string[last_break - 1])) {
last_break--;
}
if (line_start != last_break) {
ret.push_back(line_start);
ret.push_back(last_break);
}
while (last_break_w_spaces < p_string.length() && is_whitespace(p_string[last_break_w_spaces])) {
last_break_w_spaces++;
}
line_start = last_break_w_spaces;
if (last_break_w_spaces < i) {
line_length = i - last_break_w_spaces;
} else {
i = last_break_w_spaces;
line_length = 0;
}
} else {
ret.push_back(line_start);
ret.push_back(i);
line_start = i;
line_end = i;
line_length = 0;
}
last_break = -1;
}
line_length++;
}
if (line_length > 0) {
ret.push_back(line_start);
ret.push_back(p_string.length());
}
} else {
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
int word_length = 0;

for (int i = 0; i < p_string.length(); i++) {
const char32_t c = p_string[i];

bool is_lb = is_linebreak(c);
bool is_ws = is_whitespace(c);
bool is_p = (is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || c == 0xfffc;

if (word_start == -1) {
if (!is_lb && !is_ws && !is_p) {
word_start = i;
word_length = 1;
} else if (i - line_start + 1 > p_chars_per_line) {
// Line too long: wrap after the last word.
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
}
continue;
}

if (is_lb) {
if (word_start != -1 && word_length > 0) {
ret.push_back(word_start);
ret.push_back(i);
}
word_start = -1;
word_length = 0;
} else if (is_ws || is_p) {
if (word_start != -1 && word_length > 0) {
ret.push_back(word_start);
ret.push_back(i);
}
word_start = -1;
word_length = 0;
}

word_length++;
}
if (word_start != -1 && word_length > 0) {
ret.push_back(word_start);
ret.push_back(p_string.length());
}
}
if (line_start < p_string.length()) {
ret.push_back(line_start);
ret.push_back(p_string.length());
}
return ret;
}
Expand Down
8 changes: 7 additions & 1 deletion platform/linuxbsd/tts_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,18 @@ void TTS_Linux::_speech_event(int p_msg_id, int p_type) {
}

PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language);
int prev_end = -1;
for (int i = 0; i < breaks.size(); i += 2) {
const int start = breaks[i];
const int end = breaks[i + 1];
text += message.text.substr(start, end - start + 1);
if (prev_end != -1 && prev_end != start) {
text += message.text.substr(prev_end, start - prev_end);
}
text += message.text.substr(start, end - start);
text += "<mark name=\"" + String::num_int64(end, 10) + "\"/>";
prev_end = end;
}

spd_set_synthesis_voice(synth, message.voice.utf8().get_data());
spd_set_volume(synth, message.volume * 2 - 100);
spd_set_voice_pitch(synth, (message.pitch - 1) * 100);
Expand Down
Loading