From 1bd83708649e9d787b46d6e32c5b4a8e496a142d Mon Sep 17 00:00:00 2001
From: Veronica Berglyd Olsen <1619840+vkbo@users.noreply.github.com>
Date: Tue, 12 Nov 2024 23:10:57 +0100
Subject: [PATCH 1/2] Update dialogue highlighting settings text in Preferences

---
 novelwriter/dialogs/preferences.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/novelwriter/dialogs/preferences.py b/novelwriter/dialogs/preferences.py
index 444729ad1..b3c15b2e2 100644
--- a/novelwriter/dialogs/preferences.py
+++ b/novelwriter/dialogs/preferences.py
@@ -574,7 +574,7 @@ def buildForm(self) -> None:
         self.dialogLine.setText(CONFIG.dialogLine)
         self.mainForm.addRow(
             self.tr("Dialogue line symbols"), self.dialogLine,
-            self.tr("Lines starting with these symbols are always dialogue.")
+            self.tr("Lines starting with any of these symbols are dialogue.")
         )
 
         self.narratorBreak = QLineEdit(self)
@@ -583,8 +583,8 @@ def buildForm(self) -> None:
         self.narratorBreak.setAlignment(QtAlignCenter)
         self.narratorBreak.setText(CONFIG.narratorBreak)
         self.mainForm.addRow(
-            self.tr("Dialogue narrator break symbol"), self.narratorBreak,
-            self.tr("Symbol to indicate injected narrator break in dialogue")
+            self.tr("Narrator break symbol"), self.narratorBreak,
+            self.tr("Symbol to indicate a narrator break in dialogue")
         )
 
         self.narratorDialog = QLineEdit(self)

From c2b3eb4ac258c24a040c4199ec18f7d20a23dfd3 Mon Sep 17 00:00:00 2001
From: Veronica Berglyd Olsen <1619840+vkbo@users.noreply.github.com>
Date: Wed, 13 Nov 2024 00:03:38 +0100
Subject: [PATCH 2/2] Fix issue of alternative unicode symbols being replaced
 too early

---
 novelwriter/constants.py         |  5 +++++
 novelwriter/formats/tokenizer.py | 32 ++++++++++++++++++--------------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/novelwriter/constants.py b/novelwriter/constants.py
index 25326f641..2d67b5a7f 100644
--- a/novelwriter/constants.py
+++ b/novelwriter/constants.py
@@ -542,6 +542,11 @@ class nwUnicode:
     U_LTRI   = "\u25c0"  # Left-pointing triangle
     U_LTRIS  = "\u25c2"  # Left-pointing triangle, small
 
+    # Special
+    U_UNKN   = "\ufffd"  # Unknown character
+    U_NAC1   = "\ufffe"  # Not a character
+    U_NAC2   = "\uffff"  # Not a character
+
     # HTML Equivalents
     # ================
 
diff --git a/novelwriter/formats/tokenizer.py b/novelwriter/formats/tokenizer.py
index 33cb4a8bf..9af172164 100644
--- a/novelwriter/formats/tokenizer.py
+++ b/novelwriter/formats/tokenizer.py
@@ -490,22 +490,14 @@ def setText(self, tHandle: str, text: str | None = None) -> None:
         return
 
     def doPreProcessing(self) -> None:
-        """Run trough the various replace dictionaries."""
+        """Run pre-processing jobs before the text is tokenized."""
         # Process the user's auto-replace dictionary
-        autoReplace = self._project.data.autoReplace
-        if len(autoReplace) > 0:
+        if autoReplace := self._project.data.autoReplace:
             repDict = {}
             for aKey, aVal in autoReplace.items():
                 repDict[f"<{aKey}>"] = aVal
             xRep = re.compile("|".join([re.escape(k) for k in repDict.keys()]), flags=re.DOTALL)
             self._text = xRep.sub(lambda x: repDict[x.group(0)], self._text)
-
-        # Process the translation map for placeholder characters
-        self._text = self._text.translate(str.maketrans({
-            nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
-            nwUnicode.U_HBAR: nwUnicode.U_EMDASH,
-        }))
-
         return
 
     def tokenizeText(self) -> None:
@@ -538,13 +530,25 @@ def tokenizeText(self) -> None:
         firstIndent = self._firstIndent
 
         # Replace all instances of [br] with a placeholder character
-        text = REGEX_PATTERNS.lineBreak.sub("\uffff", self._text)
+        text = REGEX_PATTERNS.lineBreak.sub(nwUnicode.U_NAC2, self._text)
+
+        # Translation Maps
+        transMapA = str.maketrans({
+            nwUnicode.U_NAC2:  "",  # Used when [br] is ignored
+            nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
+            nwUnicode.U_HBAR:  nwUnicode.U_EMDASH,
+        })
+        transMapB = str.maketrans({
+            nwUnicode.U_NAC2:  "\n",  # Used when [br] is not ignored
+            nwUnicode.U_MAPOS: nwUnicode.U_RSQUO,
+            nwUnicode.U_HBAR:  nwUnicode.U_EMDASH,
+        })
 
         nHead = 0
         tHandle = self._handle or ""
         tBlocks: list[T_Block] = [B_EMPTY]
         for bLine in text.splitlines():
-            aLine = bLine.replace("\uffff", "")  # Remove placeholder characters
+            aLine = bLine.translate(transMapA)
             sLine = aLine.strip().lower()
 
             # Check for blank lines
@@ -884,7 +888,7 @@ def tokenizeText(self) -> None:
                         if doJustify and not cStyle & BlockFmt.ALIGNED:
                             cStyle |= BlockFmt.JUSTIFY
 
-                        pTxt = pLines[0][2].replace("\uffff", "\n")
+                        pTxt = pLines[0][2].translate(transMapB)
                         sBlocks.append((
                             BlockTyp.TEXT, pLines[0][1], pTxt, pLines[0][3], cStyle
                         ))
@@ -901,7 +905,7 @@ def tokenizeText(self) -> None:
                             tFmt.extend((p+tLen, fmt, key) for p, fmt, key in aBlock[3])
                             cStyle |= aBlock[4]
 
-                        pTxt = tTxt[:-1].replace("\uffff", "\n")
+                        pTxt = tTxt[:-1].translate(transMapB)
                         sBlocks.append((
                             BlockTyp.TEXT, pLines[0][1], pTxt, tFmt, cStyle
                         ))