From 3742586f8411102a3293c774f9fda6571e37534c Mon Sep 17 00:00:00 2001
From: Jules Bertholet <julesbertholet@quoi.xyz>
Date: Mon, 20 May 2024 01:10:09 -0400
Subject: [PATCH] Mark more `Prepended_Concatenation_Mark`s as non-advancing

---
 scripts/unicode.py | 11 +++++++++--
 src/lib.rs         |  9 +++++++--
 src/tables.rs      |  6 +++---
 tests/tests.rs     | 22 +++++++++++++++-------
 4 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/scripts/unicode.py b/scripts/unicode.py
index f748711..80affa5 100755
--- a/scripts/unicode.py
+++ b/scripts/unicode.py
@@ -241,10 +241,17 @@ def load_zero_widths() -> "list[bool]":
     # width 2. Therefore, we treat it as having width 2.
     zw_map[0x115F] = False
 
-    # Syriac abbreviation mark
-    # This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width
+    # Syriac abbreviation mark:
+    # Zero-width `Prepended_Concatenation_Mark`
     zw_map[0x070F] = True
 
+    # Some Arabic Prepended_Concatenation_Mark`s
+    # https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G27820
+    zw_map[0x0605] = True
+    zw_map[0x0890] = True
+    zw_map[0x0891] = True
+    zw_map[0x08E2] = True
+
     # U+A8FA DEVANAGARI CARET
     # https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
     zw_map[0xA8FA] = True
diff --git a/src/lib.rs b/src/lib.rs
index eb3900e..8e3c4ab 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -57,7 +57,12 @@
 //!      - [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43).
 //!    - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
 //!       with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
-//!    - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK.
+//!    - The following [`Prepended_Concatenation_Mark`]s:
+//!      - [`'\u{0605}'` NUMBER MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0605),
+//!      - [`'\u{070F}'` SYRIAC ABBREVIATION MARK](https://util.unicode.org/UnicodeJsps/character.jsp?a=070F),
+//!      - [`'\u{0890}'` POUND MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0890),
+//!      - [`'\u{0891}'` PIASTRE MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0891), and
+//!      - [`'\u{08E2}'` DISPUTED END OF AYAH](https://util.unicode.org/UnicodeJsps/character.jsp?a=08E2).
 //!    - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
 //! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
 //!    with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
@@ -70,6 +75,7 @@
 //! [`Emoji_Presentation`]: https://unicode.org/reports/tr51/#def_emoji_presentation
 //! [`Grapheme_Extend`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G52443
 //! [`Hangul_Syllable_Type`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593
+//! [`Prepended_Concatenation_Mark`]: https://www.unicode.org/versions/Unicode15.0.0/ch23.pdf#G37908
 //!
 //! [`Fullwidth`]: https://www.unicode.org/reports/tr11/#ED2
 //! [`Wide`]: https://www.unicode.org/reports/tr11/#ED4
@@ -80,7 +86,6 @@
 //!
 //! [Enclosed Ideographic Supplement]: https://unicode.org/charts/PDF/U1F200.pdf
 //!
-//! [Syriac]: https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G13006
 //! [Lisu tone letter]: https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078
 //!
 //! ## Canonical equivalence
diff --git a/src/tables.rs b/src/tables.rs
index f4464f7..521bd8e 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -320,7 +320,7 @@ pub mod charwidth {
         0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
         0x55, 0x55, 0x55, 0x55, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x10, 0x41, 0x10, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
-        0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
+        0x55, 0x55, 0x55, 0x51, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
         0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55,
         0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05, 0x00, 0x14, 0x00, 0x14, 0x04,
         0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55,
@@ -330,8 +330,8 @@ pub mod charwidth {
         0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x55, 0x55, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
         0x10, 0x00, 0x00, 0x01, 0x01, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
         0x55, 0x01, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
-        0x55, 0x55, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55,
+        0x50, 0x55, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55,
         0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x45, 0x54, 0x01,
         0x00, 0x54, 0x51, 0x01, 0x00, 0x55, 0x55, 0x05, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
         0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
diff --git a/tests/tests.rs b/tests/tests.rs
index 97aa497..44c2aa9 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -99,14 +99,22 @@ fn test_jamo() {
 
 #[test]
 fn test_prepended_concatenation_marks() {
-    assert_eq!('\u{0600}'.width(), Some(1));
-    assert_eq!('\u{08E2}'.width(), Some(1));
-    assert_eq!('\u{110BD}'.width(), Some(1));
-}
+    for c in [
+        '\u{0600}',
+        '\u{0601}',
+        '\u{0602}',
+        '\u{0603}',
+        '\u{0604}',
+        '\u{06DD}',
+        '\u{110BD}',
+        '\u{110CD}',
+    ] {
+        assert_eq!(c.width(), Some(1), "{c:?} should have width 1");
+    }
 
-#[test]
-fn test_syriac_abbreviation_mark() {
-    assert_eq!('\u{070F}'.width(), Some(0));
+    for c in ['\u{0605}', '\u{070F}', '\u{0890}', '\u{0891}', '\u{08E2}'] {
+        assert_eq!(c.width(), Some(0), "{c:?} should have width 0");
+    }
 }
 
 #[test]