From 1bd83708649e9d787b46d6e32c5b4a8e496a142d Mon Sep 17 00:00:00 2001 From: Veronica Berglyd Olsen <1619840+vkbo@users.noreply.github.com> Date: Tue, 12 Nov 2024 23:10:57 +0100 Subject: [PATCH 1/2] Update dialogue highlighting settings text in Preferences --- novelwriter/dialogs/preferences.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/novelwriter/dialogs/preferences.py b/novelwriter/dialogs/preferences.py index 444729ad1..b3c15b2e2 100644 --- a/novelwriter/dialogs/preferences.py +++ b/novelwriter/dialogs/preferences.py @@ -574,7 +574,7 @@ def buildForm(self) -> None: self.dialogLine.setText(CONFIG.dialogLine) self.mainForm.addRow( self.tr("Dialogue line symbols"), self.dialogLine, - self.tr("Lines starting with these symbols are always dialogue.") + self.tr("Lines starting with any of these symbols are dialogue.") ) self.narratorBreak = QLineEdit(self) @@ -583,8 +583,8 @@ def buildForm(self) -> None: self.narratorBreak.setAlignment(QtAlignCenter) self.narratorBreak.setText(CONFIG.narratorBreak) self.mainForm.addRow( - self.tr("Dialogue narrator break symbol"), self.narratorBreak, - self.tr("Symbol to indicate injected narrator break in dialogue") + self.tr("Narrator break symbol"), self.narratorBreak, + self.tr("Symbol to indicate a narrator break in dialogue") ) self.narratorDialog = QLineEdit(self) From c2b3eb4ac258c24a040c4199ec18f7d20a23dfd3 Mon Sep 17 00:00:00 2001 From: Veronica Berglyd Olsen <1619840+vkbo@users.noreply.github.com> Date: Wed, 13 Nov 2024 00:03:38 +0100 Subject: [PATCH 2/2] Fix issue of alternative unicode symbols being replaced too early --- novelwriter/constants.py | 5 +++++ novelwriter/formats/tokenizer.py | 32 ++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/novelwriter/constants.py b/novelwriter/constants.py index 25326f641..2d67b5a7f 100644 --- a/novelwriter/constants.py +++ b/novelwriter/constants.py @@ -542,6 +542,11 @@ class nwUnicode: U_LTRI = "\u25c0" # Left-pointing triangle U_LTRIS = "\u25c2" # Left-pointing triangle, small + # Special + U_UNKN = "\ufffd" # Unknown character + U_NAC1 = "\ufffe" # Not a character + U_NAC2 = "\uffff" # Not a character + # HTML Equivalents # ================ diff --git a/novelwriter/formats/tokenizer.py b/novelwriter/formats/tokenizer.py index 33cb4a8bf..9af172164 100644 --- a/novelwriter/formats/tokenizer.py +++ b/novelwriter/formats/tokenizer.py @@ -490,22 +490,14 @@ def setText(self, tHandle: str, text: str | None = None) -> None: return def doPreProcessing(self) -> None: - """Run trough the various replace dictionaries.""" + """Run pre-processing jobs before the text is tokenized.""" # Process the user's auto-replace dictionary - autoReplace = self._project.data.autoReplace - if len(autoReplace) > 0: + if autoReplace := self._project.data.autoReplace: repDict = {} for aKey, aVal in autoReplace.items(): repDict[f"<{aKey}>"] = aVal xRep = re.compile("|".join([re.escape(k) for k in repDict.keys()]), flags=re.DOTALL) self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text) - - # Process the translation map for placeholder characters - self._text = self._text.translate(str.maketrans({ - nwUnicode.U_MAPOS: nwUnicode.U_RSQUO, - nwUnicode.U_HBAR: nwUnicode.U_EMDASH, - })) - return def tokenizeText(self) -> None: @@ -538,13 +530,25 @@ def tokenizeText(self) -> None: firstIndent = self._firstIndent # Replace all instances of [br] with a placeholder character - text = REGEX_PATTERNS.lineBreak.sub("\uffff", self._text) + text = REGEX_PATTERNS.lineBreak.sub(nwUnicode.U_NAC2, self._text) + + # Translation Maps + transMapA = str.maketrans({ + nwUnicode.U_NAC2: "", # Used when [br] is ignored + nwUnicode.U_MAPOS: nwUnicode.U_RSQUO, + nwUnicode.U_HBAR: nwUnicode.U_EMDASH, + }) + transMapB = str.maketrans({ + nwUnicode.U_NAC2: "\n", # Used when [br] is not ignored + nwUnicode.U_MAPOS: nwUnicode.U_RSQUO, + nwUnicode.U_HBAR: nwUnicode.U_EMDASH, + }) nHead = 0 tHandle = self._handle or "" tBlocks: list[T_Block] = [B_EMPTY] for bLine in text.splitlines(): - aLine = bLine.replace("\uffff", "") # Remove placeholder characters + aLine = bLine.translate(transMapA) sLine = aLine.strip().lower() # Check for blank lines @@ -884,7 +888,7 @@ def tokenizeText(self) -> None: if doJustify and not cStyle & BlockFmt.ALIGNED: cStyle |= BlockFmt.JUSTIFY - pTxt = pLines[0][2].replace("\uffff", "\n") + pTxt = pLines[0][2].translate(transMapB) sBlocks.append(( BlockTyp.TEXT, pLines[0][1], pTxt, pLines[0][3], cStyle )) @@ -901,7 +905,7 @@ def tokenizeText(self) -> None: tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aBlock[3]) cStyle |= aBlock[4] - pTxt = tTxt[:-1].replace("\uffff", "\n") + pTxt = tTxt[:-1].translate(transMapB) sBlocks.append(( BlockTyp.TEXT, pLines[0][1], pTxt, tFmt, cStyle ))