Unicode normalization follow up, adding character navigation and seve…

…ral fixes (#16622) Fixes #16622 fixes #16640 Summary of the issue: It has been discussed that normalization would also be helpful for character navigation. There's also an issue where character descriptions and symbol pronunciation didn't work correctly because normalization took place before symbol processing. Furthermore, for the UnicodeNormalizationOffsetConverter used for braille, it was discovered that diffing didn't turn out to be accurate enough. Description of user facing changes When normalization is enabled, there's an extra option Report normalized when navigating by character in the speech settings. Added global commands foor speech and braille normalization (without assigned gesture) When normalization is enabled, characters will now always be normalized as well. Braille Unicode normalization is more reliable now. Description of development approach Normalization is now always applied to speech, rather than only for object and text info speech. I also changed some helper functions to be able to report normalized when navigating by character. Added a SuppressUnicodeNormalizationCommand that allows you to suppress global normalization within a speech sequence. This command is used when creating a spelling sequence, because spelling has its own normalization logic now. It also ensures that when spelling a character or providing a character description (i.e. NVDA+. double press), normalization does not occur. It can even be used to disable character normalization altogether if necessary. Rewrote UnicodeNormalizationOffsetConverter to use a new function in NVDAHelper local that uses uniscribe to give a list of offsets for character boundaries. This allows us to split a string into glyphs and then apply normalization to every glyph, which results in more reliable offset calculation and less complex code.
nvaccess · Jun 11, 2024 · 6b366fe · 6b366fe
1 parent 014a6e7
commit 6b366fe
Show file tree

Hide file tree

Showing 16 changed files with 513 additions and 187 deletions.
diff --git a/nvdaHelper/local/nvdaHelperLocal.def b/nvdaHelper/local/nvdaHelperLocal.def
@@ -59,6 +59,7 @@ EXPORTS
 	displayModel_requestTextChangeNotificationsForWindow
 	calculateWordOffsets
 	calculateCharacterOffsets
+	calculateCharacterBoundaries
 	findWindowWithClassInThread
 	registerUIAProperty
 	registerUIAAnnotationType

diff --git a/nvdaHelper/local/nvdaHelperLocal.h b/nvdaHelper/local/nvdaHelperLocal.h
@@ -24,5 +24,13 @@ LRESULT cancellableSendMessageTimeout(HWND hwnd, UINT Msg, WPARAM wParam, LPARAM
  */
 void nvdaHelperLocal_initialize(bool secureMode);
 void nvdaHelperLocal_terminate();
+/*
+ * Calculate the start offsets for characters in a string.
+ * @param text: The text to calculate offsets for.
+ * @param textLength: The length of the provided text, encluding a terminating NULL character.
+ * @param offsets: An array of size textLength allocated by the caller to fill with offsets.
+ * @param offsetsCount: The number of offsets in the array after calculation.
+ */
+bool calculateCharacterBoundaries(const wchar_t* text, int textLength, int* offsets, int* offsetsCount);
 
 #endif
diff --git a/nvdaHelper/local/textUtils.cpp b/nvdaHelper/local/textUtils.cpp
@@ -14,52 +14,82 @@ This license can be found at:
 
 #include <windows.h>
 #include <usp10.h>
+#include <vector>
 #include <common/log.h>
 
+using namespace std;
+
 enum UNIT {
 	UNIT_CHARACTER,
 	UNIT_WORD
 };
 
+vector<SCRIPT_LOGATTR> _getLogAttrArray(const wchar_t* text, int textLength) {
+	if (textLength <= 0 || !text) {
+		return {};
+	}
+	vector<SCRIPT_ITEM> items(textLength + 1);
+	int numItems = 0;
+	if (ScriptItemize(text, textLength, textLength, nullptr, nullptr, items.data(), &numItems) != S_OK || numItems == 0) {
+		return {};
+	}
+
+	vector<SCRIPT_LOGATTR> logAttrArray(textLength);
+	int nextICharPos = textLength;
+	for (int itemIndex = numItems - 1; itemIndex >= 0; --itemIndex) {
+		int iCharPos = items[itemIndex].iCharPos;
+		int iCharLength = nextICharPos - iCharPos;
+		if (ScriptBreak(text + iCharPos, iCharLength, &(items[itemIndex].a), logAttrArray.data() + iCharPos) != S_OK) {
+			return {};
+		}
+	}
+	return logAttrArray;
+}
+
+bool calculateCharacterBoundaries(const wchar_t* text, int textLength, int* offsets, int* offsetsCount) {
+	if (!offsets) {
+		return false;
+	}
+	vector<SCRIPT_LOGATTR> logAttrArray = _getLogAttrArray(text, textLength);
+	if (logAttrArray.empty()) {
+		return false;
+	}
+	int count = 0;
+	for (int i = 0; i < textLength; ++i) {
+		if (logAttrArray[i].fCharStop) {
+			offsets[count++] = i;
+		}
+	}
+	*offsetsCount = count;
+	return true;
+}
+
 bool _calculateUniscribeOffsets(enum UNIT unit, wchar_t* text, int textLength, int offset, int* startOffset, int* endOffset) {
 	if(unit!=UNIT_CHARACTER&&unit!=UNIT_WORD) {
 		LOG_ERROR(L"Unsupported unit");
 		return false;
 	}
-	if(textLength<=0) return false;
-	if(offset<0) return false;
-	if(offset>=textLength) {
-		*startOffset=offset;
-		*endOffset=offset+1;
+	if (offset < 0 || !text) {
+		return false;
+	}
+	if (offset >= textLength) {
+		*startOffset = offset;
+		*endOffset = offset + 1;
 		return true;
 	}
-	SCRIPT_ITEM* pItems=new SCRIPT_ITEM[textLength+1];
-	int numItems=0;
-	if(ScriptItemize(text,textLength,textLength,NULL,NULL,pItems,&numItems)!=S_OK||numItems==0) {
-		delete[] pItems;
+	vector<SCRIPT_LOGATTR> logAttrArray = _getLogAttrArray(text, textLength);
+	if (logAttrArray.empty()) {
 		return false;
 	}
-	SCRIPT_LOGATTR* logAttrArray=new SCRIPT_LOGATTR[textLength];
-	int nextICharPos=textLength;
-	for(int itemIndex=numItems-1;itemIndex>=0;--itemIndex) {
-		int iCharPos=pItems[itemIndex].iCharPos;
-		int iCharLength=nextICharPos-iCharPos;
-		if(ScriptBreak(text+iCharPos,iCharLength,&(pItems[itemIndex].a),logAttrArray+iCharPos)!=S_OK) {
-			delete[] pItems;
-			delete[] logAttrArray;
-			return false;
-		}
-	}
-	delete[] pItems;
 	if(unit==UNIT_CHARACTER) {
 		for(int i=offset;i>=0;--i) {
-			if(logAttrArray[i].fCharStop) {
+			if (logAttrArray[i].fCharStop) {
 				*startOffset=i;
 				break;
 			}
 		}
 		for(int i=offset+1;i<textLength;++i) {
-			if(logAttrArray[i].fCharStop) {
+			if (logAttrArray[i].fCharStop) {
 				*endOffset=i;
 				break;
 			}
@@ -110,7 +140,6 @@ bool _calculateUniscribeOffsets(enum UNIT unit, wchar_t* text, int textLength, i
 			}
 		}
 	}
-	delete[] logAttrArray;
 	return true;
 }
 

diff --git a/source/config/configSpec.py b/source/config/configSpec.py
@@ -36,6 +36,7 @@
 	symbolLevel = integer(default=100)
 	trustVoiceLanguage = boolean(default=true)
 	unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
+	reportNormalizedForCharacterNavigation = boolean(default=true)
 	includeCLDR = boolean(default=True)
 	beepSpeechModePitch = integer(default=10000,min=50,max=11025)
 	outputDevice = string(default=default)

diff --git a/source/globalCommands.py b/source/globalCommands.py
@@ -3607,6 +3607,34 @@ def script_braille_cycleShowSelection(self, gesture: inputCore.InputGesture) ->
 		braille.handler.initialDisplay()
 		ui.message(msg)
 
+	@script(
+		# Translators: Input help mode message for Braille Unicode normalization command.
+		description=_("Cycle through the braille Unicode normalization states"),
+		category=SCRCAT_BRAILLE
+	)
+	def script_braille_cycleUnicodeNormalization(self, gesture: inputCore.InputGesture) -> None:
+		featureFlag: FeatureFlag = config.conf["braille"]["unicodeNormalization"]
+		boolFlag: BoolFlag = featureFlag.enumClassType
+		values = [x.value for x in boolFlag]
+		currentValue = featureFlag.value.value
+		nextValueIndex = (currentValue % len(values)) + 1
+		nextName: str = boolFlag(nextValueIndex).name
+		config.conf["braille"]["unicodeNormalization"] = nextName
+		featureFlag = config.conf["braille"]["unicodeNormalization"]
+		if featureFlag.isDefault():
+			# Translators: Used when reporting braille Unicode normalization state
+			# (default behavior).
+			msg = _("Braille Unicode normalization default ({default})").format(
+				default=featureFlag.behaviorOfDefault.displayString
+			)
+		else:
+			# Translators: Used when reporting braille Unicode normalization state
+			# (disabled or enabled).
+			msg = _("Braille Unicode normalization {state}").format(
+				state=BoolFlag[nextName].displayString
+			)
+		ui.message(msg)
+
 	@script(
 		description=_(
 			# Translators: Input help mode message for report clipboard text command.
@@ -4383,6 +4411,34 @@ def script_toggleReportCLDR(self, gesture):
 		characterProcessing.clearSpeechSymbols()
 		ui.message(state)
 
+	@script(
+		# Translators: Input help mode message for speech Unicode normalization command.
+		description=_("Cycle through the speech Unicode normalization states"),
+		category=SCRCAT_SPEECH
+	)
+	def script_speech_cycleUnicodeNormalization(self, gesture: inputCore.InputGesture) -> None:
+		featureFlag: FeatureFlag = config.conf["speech"]["unicodeNormalization"]
+		boolFlag: BoolFlag = featureFlag.enumClassType
+		values = [x.value for x in boolFlag]
+		currentValue = featureFlag.value.value
+		nextValueIndex = (currentValue % len(values)) + 1
+		nextName: str = boolFlag(nextValueIndex).name
+		config.conf["speech"]["unicodeNormalization"] = nextName
+		featureFlag = config.conf["speech"]["unicodeNormalization"]
+		if featureFlag.isDefault():
+			# Translators: Used when reporting speech Unicode normalization state
+			# (default behavior).
+			msg = _("Speech Unicode normalization default ({default})").format(
+				default=featureFlag.behaviorOfDefault.displayString
+			)
+		else:
+			# Translators: Used when reporting speech Unicode normalization state
+			# (disabled or enabled).
+			msg = _("Speech Unicode normalization {state}").format(
+				state=BoolFlag[nextName].displayString
+			)
+		ui.message(msg)
+
 	_tempEnableScreenCurtain = True
 	_waitingOnScreenCurtainWarningDialog: Optional[wx.Dialog] = None
 	_toggleScreenCurtainMessage: Optional[str] = None

diff --git a/source/gui/nvdaControls.py b/source/gui/nvdaControls.py
@@ -1,6 +1,5 @@
-# -*- coding: UTF-8 -*-
 # A part of NonVisual Desktop Access (NVDA)
-# Copyright (C) 2016-2024 NV Access Limited, Derek Riemer, Cyrille Bougot, Luke Davis
+# Copyright (C) 2016-2024 NV Access Limited, Derek Riemer, Cyrille Bougot, Luke Davis, Leonard de Ruijter
 # This file is covered by the GNU General Public License.
 # See the file COPYING for more details.
 import collections
@@ -426,16 +425,18 @@ def __init__(
 			style=0,
 			validator=wx.DefaultValidator,
 			name=wx.ChoiceNameStr,
+			onChoiceEventHandler: typing.Callable[[wx.CommandEvent], None] | None = None,
 	):
 		"""
-		@param parent: The parent window.
-		@param keyPath: The list of keys required to get to the config value.
-		@param conf: The config.conf object.
-		@param pos: The position of the control. Forwarded to wx.Choice
-		@param size: The size of the control. Forwarded to wx.Choice
-		@param style: The style of the control. Forwarded to wx.Choice
-		@param validator: The validator for the control. Forwarded to wx.Choice
-		@param name: The name of the control. Forwarded to wx.Choice
+		:param parent: The parent window.
+		:param keyPath: The list of keys required to get to the config value.
+		:param conf: The config.conf object.
+		:param pos: The position of the control. Forwarded to wx.Choice
+		:param size: The size of the control. Forwarded to wx.Choice
+		:param style: The style of the control. Forwarded to wx.Choice
+		:param validator: The validator for the control. Forwarded to wx.Choice
+		:param name: The name of the control. Forwarded to wx.Choice
+		:param onChoiceEventHandler: Event handler bound for EVT_CHOICE
 		"""
 		self._confPath = keyPath
 		self._conf = conf
@@ -462,7 +463,11 @@ def __init__(
 			validator=validator,
 			name=name,
 		)
-
+		if onChoiceEventHandler is not None:
+			self.Bind(
+				wx.EVT_CHOICE,
+				onChoiceEventHandler
+			)
 		self.SetSelection(self._getChoiceIndex(configValue.value))
 		self.defaultValue = self._getConfSpecDefaultValue()
 		"""The default value of the config spec. Not the "behavior of default".

diff --git a/source/gui/settingsDialogs.py b/source/gui/settingsDialogs.py
@@ -1597,9 +1597,24 @@ def makeSettings(self, settingsSizer):
 			wxCtrlClass=nvdaControls.FeatureFlagCombo,
 			keyPath=["speech", "unicodeNormalization"],
 			conf=config.conf,
+			onChoiceEventHandler=self._onUnicodeNormalizationChange,
 		)
 		self.bindHelpEvent("SpeechUnicodeNormalization", self.unicodeNormalizationCombo)
 
+		# Translators: This is the label for a checkbox in the
+		# speech settings panel.
+		reportNormalizedForCharacterNavigationText = _("Report '&Normalized' when navigating by character")
+		self.reportNormalizedForCharacterNavigationCheckBox = settingsSizerHelper.addItem(
+			wx.CheckBox(self, label=reportNormalizedForCharacterNavigationText)
+		)
+		self.bindHelpEvent(
+			"SpeechReportNormalizedForCharacterNavigation",
+			self.reportNormalizedForCharacterNavigationCheckBox
+		)
+		self.reportNormalizedForCharacterNavigationCheckBox.SetValue(
+			config.conf["speech"]["reportNormalizedForCharacterNavigation"]
+		)
+
 		includeCLDRText = _(
 			# Translators: This is the label for a checkbox in the
 			# voice settings panel (if checked, data from the unicode CLDR will be used
@@ -1713,6 +1728,9 @@ def onSave(self):
 		].value
 		config.conf["speech"]["trustVoiceLanguage"] = self.trustVoiceLanguageCheckbox.IsChecked()
 		self.unicodeNormalizationCombo.saveCurrentValueToConf()
+		config.conf["speech"]["reportNormalizedForCharacterNavigation"] = (
+			self.reportNormalizedForCharacterNavigationCheckBox.IsChecked()
+		)
 		currentIncludeCLDR = config.conf["speech"]["includeCLDR"]
 		config.conf["speech"]["includeCLDR"] = newIncludeCldr = self.includeCLDRCheckbox.IsChecked()
 		if currentIncludeCLDR is not newIncludeCldr:
@@ -1753,6 +1771,12 @@ def _onSpeechModesListChange(self, evt: wx.CommandEvent):
 					+ [self._allSpeechModes.index(speech.SpeechMode.talk)]
 				)
 
+	def _onUnicodeNormalizationChange(self, evt: wx.CommandEvent):
+		evt.Skip()
+		self.reportNormalizedForCharacterNavigationCheckBox.Enable(
+			bool(self.unicodeNormalizationCombo._getControlCurrentFlag())
+		)
+
 	def isValid(self) -> bool:
 		enabledSpeechModes = self.speechModesList.CheckedItems
 		if len(enabledSpeechModes) < 2:

diff --git a/source/speech/commands.py b/source/speech/commands.py
@@ -1,4 +1,3 @@
-#  -*- coding: UTF-8 -*-
 # A part of NonVisual Desktop Access (NVDA)
 # This file is covered by the GNU General Public License.
 # See the file COPYING for more details.
@@ -202,6 +201,25 @@ class EndUtteranceCommand(SpeechCommand):
 	def __repr__(self):
 		return "EndUtteranceCommand()"
 
+
+class SuppressUnicodeNormalizationCommand(SpeechCommand):
+	"""Suppresses Unicode normalization at a point in a speech sequence.
+	For any text after this, Unicode normalization will be suppressed when state is True.
+	When state is False, original behavior of normalization will be restored.
+	This command is a no-op when normalization is disabled.
+	"""
+	state: bool
+
+	def __init__(self, state: bool = True):
+		"""
+		:param state: Suppress normalization if True, don't suppress when False
+		"""
+		self.state = state
+
+	def __repr__(self):
+		return f"SuppressUnicodeNormalizationCommand({self.state!r})"
+
+
 class BaseProsodyCommand(SynthParamCommand):
 	"""Base class for commands which change voice prosody; i.e. pitch, rate, etc.
 	The change to the setting is specified using either an offset or a multiplier, but not both.

diff --git a/source/speech/manager.py b/source/speech/manager.py
@@ -14,6 +14,7 @@
 	# Commands that are used in this file.
 	EndUtteranceCommand,
 	LangChangeCommand,
+	SuppressUnicodeNormalizationCommand,
 	SynthParamCommand,
 	BaseCallbackCommand,
 	ConfigProfileTriggerCommand,
@@ -363,6 +364,8 @@ def _processSpeechSequence(self, inSeq: SpeechSequence):
 				continue
 			if isinstance(command, SynthParamCommand):
 				paramTracker.update(command)
+			if isinstance(command, SuppressUnicodeNormalizationCommand):
+				continue  # Not handled by speech manager
 			outSeq.append(command)
 		# Add the last sequence and make sure the sequence ends the utterance.
 		self._ensureEndUtterance(outSeq, outSeqs, paramsToReplay, paramTracker)