Skip to content

Commit

Permalink
Unicode normalization follow up, adding character navigation and seve…
Browse files Browse the repository at this point in the history
…ral fixes (#16622)

Fixes #16622
fixes #16640

Summary of the issue:
It has been discussed that normalization would also be helpful for character navigation. There's also an issue where character descriptions and symbol pronunciation didn't work correctly because normalization took place before symbol processing. Furthermore, for the UnicodeNormalizationOffsetConverter used for braille, it was discovered that diffing didn't turn out to be accurate enough.

Description of user facing changes
When normalization is enabled, there's an extra option Report normalized when navigating by character in the speech settings.
Added global commands foor speech and braille normalization (without assigned gesture)
When normalization is enabled, characters will now always be normalized as well.
Braille Unicode normalization is more reliable now.
Description of development approach
Normalization is now always applied to speech, rather than only for object and text info speech. I also changed some helper functions to be able to report normalized when navigating by character.
Added a SuppressUnicodeNormalizationCommand that allows you to suppress global normalization within a speech sequence. This command is used when creating a spelling sequence, because spelling has its own normalization logic now. It also ensures that when spelling a character or providing a character description (i.e. NVDA+. double press), normalization does not occur. It can even be used to disable character normalization altogether if necessary.
Rewrote UnicodeNormalizationOffsetConverter to use a new function in NVDAHelper local that uses uniscribe to give a list of offsets for character boundaries. This allows us to split a string into glyphs and then apply normalization to every glyph, which results in more reliable offset calculation and less complex code.
  • Loading branch information
LeonarddeR authored Jun 11, 2024
1 parent 014a6e7 commit 6b366fe
Show file tree
Hide file tree
Showing 16 changed files with 513 additions and 187 deletions.
1 change: 1 addition & 0 deletions nvdaHelper/local/nvdaHelperLocal.def
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ EXPORTS
displayModel_requestTextChangeNotificationsForWindow
calculateWordOffsets
calculateCharacterOffsets
calculateCharacterBoundaries
findWindowWithClassInThread
registerUIAProperty
registerUIAAnnotationType
Expand Down
8 changes: 8 additions & 0 deletions nvdaHelper/local/nvdaHelperLocal.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,13 @@ LRESULT cancellableSendMessageTimeout(HWND hwnd, UINT Msg, WPARAM wParam, LPARAM
*/
void nvdaHelperLocal_initialize(bool secureMode);
void nvdaHelperLocal_terminate();
/*
* Calculate the start offsets for characters in a string.
* @param text: The text to calculate offsets for.
* @param textLength: The length of the provided text, encluding a terminating NULL character.
* @param offsets: An array of size textLength allocated by the caller to fill with offsets.
* @param offsetsCount: The number of offsets in the array after calculation.
*/
bool calculateCharacterBoundaries(const wchar_t* text, int textLength, int* offsets, int* offsetsCount);

#endif
77 changes: 53 additions & 24 deletions nvdaHelper/local/textUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,52 +14,82 @@ This license can be found at:

#include <windows.h>
#include <usp10.h>
#include <vector>
#include <common/log.h>

using namespace std;

enum UNIT {
UNIT_CHARACTER,
UNIT_WORD
};

vector<SCRIPT_LOGATTR> _getLogAttrArray(const wchar_t* text, int textLength) {
if (textLength <= 0 || !text) {
return {};
}
vector<SCRIPT_ITEM> items(textLength + 1);
int numItems = 0;
if (ScriptItemize(text, textLength, textLength, nullptr, nullptr, items.data(), &numItems) != S_OK || numItems == 0) {
return {};
}

vector<SCRIPT_LOGATTR> logAttrArray(textLength);
int nextICharPos = textLength;
for (int itemIndex = numItems - 1; itemIndex >= 0; --itemIndex) {
int iCharPos = items[itemIndex].iCharPos;
int iCharLength = nextICharPos - iCharPos;
if (ScriptBreak(text + iCharPos, iCharLength, &(items[itemIndex].a), logAttrArray.data() + iCharPos) != S_OK) {
return {};
}
}
return logAttrArray;
}

bool calculateCharacterBoundaries(const wchar_t* text, int textLength, int* offsets, int* offsetsCount) {
if (!offsets) {
return false;
}
vector<SCRIPT_LOGATTR> logAttrArray = _getLogAttrArray(text, textLength);
if (logAttrArray.empty()) {
return false;
}
int count = 0;
for (int i = 0; i < textLength; ++i) {
if (logAttrArray[i].fCharStop) {
offsets[count++] = i;
}
}
*offsetsCount = count;
return true;
}

bool _calculateUniscribeOffsets(enum UNIT unit, wchar_t* text, int textLength, int offset, int* startOffset, int* endOffset) {
if(unit!=UNIT_CHARACTER&&unit!=UNIT_WORD) {
LOG_ERROR(L"Unsupported unit");
return false;
}
if(textLength<=0) return false;
if(offset<0) return false;
if(offset>=textLength) {
*startOffset=offset;
*endOffset=offset+1;
if (offset < 0 || !text) {
return false;
}
if (offset >= textLength) {
*startOffset = offset;
*endOffset = offset + 1;
return true;
}
SCRIPT_ITEM* pItems=new SCRIPT_ITEM[textLength+1];
int numItems=0;
if(ScriptItemize(text,textLength,textLength,NULL,NULL,pItems,&numItems)!=S_OK||numItems==0) {
delete[] pItems;
vector<SCRIPT_LOGATTR> logAttrArray = _getLogAttrArray(text, textLength);
if (logAttrArray.empty()) {
return false;
}
SCRIPT_LOGATTR* logAttrArray=new SCRIPT_LOGATTR[textLength];
int nextICharPos=textLength;
for(int itemIndex=numItems-1;itemIndex>=0;--itemIndex) {
int iCharPos=pItems[itemIndex].iCharPos;
int iCharLength=nextICharPos-iCharPos;
if(ScriptBreak(text+iCharPos,iCharLength,&(pItems[itemIndex].a),logAttrArray+iCharPos)!=S_OK) {
delete[] pItems;
delete[] logAttrArray;
return false;
}
}
delete[] pItems;
if(unit==UNIT_CHARACTER) {
for(int i=offset;i>=0;--i) {
if(logAttrArray[i].fCharStop) {
if (logAttrArray[i].fCharStop) {
*startOffset=i;
break;
}
}
for(int i=offset+1;i<textLength;++i) {
if(logAttrArray[i].fCharStop) {
if (logAttrArray[i].fCharStop) {
*endOffset=i;
break;
}
Expand Down Expand Up @@ -110,7 +140,6 @@ bool _calculateUniscribeOffsets(enum UNIT unit, wchar_t* text, int textLength, i
}
}
}
delete[] logAttrArray;
return true;
}

Expand Down
1 change: 1 addition & 0 deletions source/config/configSpec.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
symbolLevel = integer(default=100)
trustVoiceLanguage = boolean(default=true)
unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
reportNormalizedForCharacterNavigation = boolean(default=true)
includeCLDR = boolean(default=True)
beepSpeechModePitch = integer(default=10000,min=50,max=11025)
outputDevice = string(default=default)
Expand Down
56 changes: 56 additions & 0 deletions source/globalCommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -3607,6 +3607,34 @@ def script_braille_cycleShowSelection(self, gesture: inputCore.InputGesture) ->
braille.handler.initialDisplay()
ui.message(msg)

@script(
# Translators: Input help mode message for Braille Unicode normalization command.
description=_("Cycle through the braille Unicode normalization states"),
category=SCRCAT_BRAILLE
)
def script_braille_cycleUnicodeNormalization(self, gesture: inputCore.InputGesture) -> None:
featureFlag: FeatureFlag = config.conf["braille"]["unicodeNormalization"]
boolFlag: BoolFlag = featureFlag.enumClassType
values = [x.value for x in boolFlag]
currentValue = featureFlag.value.value
nextValueIndex = (currentValue % len(values)) + 1
nextName: str = boolFlag(nextValueIndex).name
config.conf["braille"]["unicodeNormalization"] = nextName
featureFlag = config.conf["braille"]["unicodeNormalization"]
if featureFlag.isDefault():
# Translators: Used when reporting braille Unicode normalization state
# (default behavior).
msg = _("Braille Unicode normalization default ({default})").format(
default=featureFlag.behaviorOfDefault.displayString
)
else:
# Translators: Used when reporting braille Unicode normalization state
# (disabled or enabled).
msg = _("Braille Unicode normalization {state}").format(
state=BoolFlag[nextName].displayString
)
ui.message(msg)

@script(
description=_(
# Translators: Input help mode message for report clipboard text command.
Expand Down Expand Up @@ -4383,6 +4411,34 @@ def script_toggleReportCLDR(self, gesture):
characterProcessing.clearSpeechSymbols()
ui.message(state)

@script(
# Translators: Input help mode message for speech Unicode normalization command.
description=_("Cycle through the speech Unicode normalization states"),
category=SCRCAT_SPEECH
)
def script_speech_cycleUnicodeNormalization(self, gesture: inputCore.InputGesture) -> None:
featureFlag: FeatureFlag = config.conf["speech"]["unicodeNormalization"]
boolFlag: BoolFlag = featureFlag.enumClassType
values = [x.value for x in boolFlag]
currentValue = featureFlag.value.value
nextValueIndex = (currentValue % len(values)) + 1
nextName: str = boolFlag(nextValueIndex).name
config.conf["speech"]["unicodeNormalization"] = nextName
featureFlag = config.conf["speech"]["unicodeNormalization"]
if featureFlag.isDefault():
# Translators: Used when reporting speech Unicode normalization state
# (default behavior).
msg = _("Speech Unicode normalization default ({default})").format(
default=featureFlag.behaviorOfDefault.displayString
)
else:
# Translators: Used when reporting speech Unicode normalization state
# (disabled or enabled).
msg = _("Speech Unicode normalization {state}").format(
state=BoolFlag[nextName].displayString
)
ui.message(msg)

_tempEnableScreenCurtain = True
_waitingOnScreenCurtainWarningDialog: Optional[wx.Dialog] = None
_toggleScreenCurtainMessage: Optional[str] = None
Expand Down
27 changes: 16 additions & 11 deletions source/gui/nvdaControls.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: UTF-8 -*-
# A part of NonVisual Desktop Access (NVDA)
# Copyright (C) 2016-2024 NV Access Limited, Derek Riemer, Cyrille Bougot, Luke Davis
# Copyright (C) 2016-2024 NV Access Limited, Derek Riemer, Cyrille Bougot, Luke Davis, Leonard de Ruijter
# This file is covered by the GNU General Public License.
# See the file COPYING for more details.
import collections
Expand Down Expand Up @@ -426,16 +425,18 @@ def __init__(
style=0,
validator=wx.DefaultValidator,
name=wx.ChoiceNameStr,
onChoiceEventHandler: typing.Callable[[wx.CommandEvent], None] | None = None,
):
"""
@param parent: The parent window.
@param keyPath: The list of keys required to get to the config value.
@param conf: The config.conf object.
@param pos: The position of the control. Forwarded to wx.Choice
@param size: The size of the control. Forwarded to wx.Choice
@param style: The style of the control. Forwarded to wx.Choice
@param validator: The validator for the control. Forwarded to wx.Choice
@param name: The name of the control. Forwarded to wx.Choice
:param parent: The parent window.
:param keyPath: The list of keys required to get to the config value.
:param conf: The config.conf object.
:param pos: The position of the control. Forwarded to wx.Choice
:param size: The size of the control. Forwarded to wx.Choice
:param style: The style of the control. Forwarded to wx.Choice
:param validator: The validator for the control. Forwarded to wx.Choice
:param name: The name of the control. Forwarded to wx.Choice
:param onChoiceEventHandler: Event handler bound for EVT_CHOICE
"""
self._confPath = keyPath
self._conf = conf
Expand All @@ -462,7 +463,11 @@ def __init__(
validator=validator,
name=name,
)

if onChoiceEventHandler is not None:
self.Bind(
wx.EVT_CHOICE,
onChoiceEventHandler
)
self.SetSelection(self._getChoiceIndex(configValue.value))
self.defaultValue = self._getConfSpecDefaultValue()
"""The default value of the config spec. Not the "behavior of default".
Expand Down
24 changes: 24 additions & 0 deletions source/gui/settingsDialogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1597,9 +1597,24 @@ def makeSettings(self, settingsSizer):
wxCtrlClass=nvdaControls.FeatureFlagCombo,
keyPath=["speech", "unicodeNormalization"],
conf=config.conf,
onChoiceEventHandler=self._onUnicodeNormalizationChange,
)
self.bindHelpEvent("SpeechUnicodeNormalization", self.unicodeNormalizationCombo)

# Translators: This is the label for a checkbox in the
# speech settings panel.
reportNormalizedForCharacterNavigationText = _("Report '&Normalized' when navigating by character")
self.reportNormalizedForCharacterNavigationCheckBox = settingsSizerHelper.addItem(
wx.CheckBox(self, label=reportNormalizedForCharacterNavigationText)
)
self.bindHelpEvent(
"SpeechReportNormalizedForCharacterNavigation",
self.reportNormalizedForCharacterNavigationCheckBox
)
self.reportNormalizedForCharacterNavigationCheckBox.SetValue(
config.conf["speech"]["reportNormalizedForCharacterNavigation"]
)

includeCLDRText = _(
# Translators: This is the label for a checkbox in the
# voice settings panel (if checked, data from the unicode CLDR will be used
Expand Down Expand Up @@ -1713,6 +1728,9 @@ def onSave(self):
].value
config.conf["speech"]["trustVoiceLanguage"] = self.trustVoiceLanguageCheckbox.IsChecked()
self.unicodeNormalizationCombo.saveCurrentValueToConf()
config.conf["speech"]["reportNormalizedForCharacterNavigation"] = (
self.reportNormalizedForCharacterNavigationCheckBox.IsChecked()
)
currentIncludeCLDR = config.conf["speech"]["includeCLDR"]
config.conf["speech"]["includeCLDR"] = newIncludeCldr = self.includeCLDRCheckbox.IsChecked()
if currentIncludeCLDR is not newIncludeCldr:
Expand Down Expand Up @@ -1753,6 +1771,12 @@ def _onSpeechModesListChange(self, evt: wx.CommandEvent):
+ [self._allSpeechModes.index(speech.SpeechMode.talk)]
)

def _onUnicodeNormalizationChange(self, evt: wx.CommandEvent):
evt.Skip()
self.reportNormalizedForCharacterNavigationCheckBox.Enable(
bool(self.unicodeNormalizationCombo._getControlCurrentFlag())
)

def isValid(self) -> bool:
enabledSpeechModes = self.speechModesList.CheckedItems
if len(enabledSpeechModes) < 2:
Expand Down
20 changes: 19 additions & 1 deletion source/speech/commands.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: UTF-8 -*-
# A part of NonVisual Desktop Access (NVDA)
# This file is covered by the GNU General Public License.
# See the file COPYING for more details.
Expand Down Expand Up @@ -202,6 +201,25 @@ class EndUtteranceCommand(SpeechCommand):
def __repr__(self):
return "EndUtteranceCommand()"


class SuppressUnicodeNormalizationCommand(SpeechCommand):
"""Suppresses Unicode normalization at a point in a speech sequence.
For any text after this, Unicode normalization will be suppressed when state is True.
When state is False, original behavior of normalization will be restored.
This command is a no-op when normalization is disabled.
"""
state: bool

def __init__(self, state: bool = True):
"""
:param state: Suppress normalization if True, don't suppress when False
"""
self.state = state

def __repr__(self):
return f"SuppressUnicodeNormalizationCommand({self.state!r})"


class BaseProsodyCommand(SynthParamCommand):
"""Base class for commands which change voice prosody; i.e. pitch, rate, etc.
The change to the setting is specified using either an offset or a multiplier, but not both.
Expand Down
3 changes: 3 additions & 0 deletions source/speech/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# Commands that are used in this file.
EndUtteranceCommand,
LangChangeCommand,
SuppressUnicodeNormalizationCommand,
SynthParamCommand,
BaseCallbackCommand,
ConfigProfileTriggerCommand,
Expand Down Expand Up @@ -363,6 +364,8 @@ def _processSpeechSequence(self, inSeq: SpeechSequence):
continue
if isinstance(command, SynthParamCommand):
paramTracker.update(command)
if isinstance(command, SuppressUnicodeNormalizationCommand):
continue # Not handled by speech manager
outSeq.append(command)
# Add the last sequence and make sure the sequence ends the utterance.
self._ensureEndUtterance(outSeq, outSeqs, paramsToReplay, paramTracker)
Expand Down
Loading

0 comments on commit 6b366fe

Please sign in to comment.