From 70c9af0338878be37289e65525b8efc1e5eb6143 Mon Sep 17 00:00:00 2001
From: IKEDA Soji <ikeda@conversion.co.jp>
Date: Sun, 25 Jun 2017 12:07:36 +0900
Subject: [PATCH 1/4] Emphasis and East Asian text: code changes.

---
 src/inlines.c | 10 ++++++++--
 src/utf8.c    | 19 +++++++++++++++++++
 src/utf8.h    |  1 +
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/inlines.c b/src/inlines.c
index bbda78f3b..4c21e8137 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -421,17 +421,23 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
   }
   left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
                   (!cmark_utf8proc_is_punctuation(after_char) ||
+                   cmark_utf8proc_is_eastasian_punctuation(after_char) ||
                    cmark_utf8proc_is_space(before_char) ||
                    cmark_utf8proc_is_punctuation(before_char));
   right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
                    (!cmark_utf8proc_is_punctuation(before_char) ||
+                    cmark_utf8proc_is_eastasian_punctuation(before_char) ||
                     cmark_utf8proc_is_space(after_char) ||
                     cmark_utf8proc_is_punctuation(after_char));
   if (c == '_') {
     *can_open = left_flanking &&
-                (!right_flanking || cmark_utf8proc_is_punctuation(before_char));
+                (!right_flanking ||
+                 cmark_utf8proc_is_punctuation(before_char) ||
+                 cmark_utf8proc_is_eastasian_punctuation(after_char));
     *can_close = right_flanking &&
-                 (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
+                 (!left_flanking ||
+                  cmark_utf8proc_is_punctuation(after_char) ||
+                  cmark_utf8proc_is_eastasian_punctuation(before_char));
   } else if (c == '\'' || c == '"') {
     *can_open = left_flanking && !right_flanking &&
 	         before_char != ']' && before_char != ')';
diff --git a/src/utf8.c b/src/utf8.c
index c29bbf770..6cfd676b9 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -315,3 +315,22 @@ int cmark_utf8proc_is_punctuation(int32_t uc) {
       uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
       uc == 113823);
 }
+
+// matches punctuations with East_Asian_Width property value A, W, F or H.
+int cmark_utf8proc_is_eastasian_punctuation(int32_t uc) {
+  return (
+      uc == 161 || uc == 183 || uc == 191 || uc == 8208 ||
+      (uc >= 8211 && uc <= 8214) || uc == 8216 || uc == 8217 || uc == 8220 ||
+      uc == 8221 || (uc >= 8224 && uc <= 8226) || (uc >= 8228 && uc <= 8231) ||
+      uc == 8240 || uc == 8242 || uc == 8243 || uc == 8245 || uc == 8251 ||
+      uc == 8254 || uc == 9001 || uc == 9002 || (uc >= 12289 && uc <= 12291) ||
+      (uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
+      uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 ||
+      (uc >= 65040 && uc <= 65049) || (uc >= 65072 && uc <= 65106) ||
+      (uc >= 65108 && uc <= 65121) || uc == 65123 || uc == 65128 ||
+      uc == 65130 || uc == 65131 || (uc >= 65281 && uc <= 65283) ||
+      (uc >= 65285 && uc <= 65290) || (uc >= 65292 && uc <= 65295) ||
+      uc == 65306 || uc == 65307 || uc == 65311 || uc == 65312 ||
+      (uc >= 65339 && uc <= 65341) || uc == 65343 || uc == 65371 ||
+      uc == 65373 || (uc >= 65375 && uc <= 65381));
+}
diff --git a/src/utf8.h b/src/utf8.h
index 8e45714d4..91b8a6b01 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -16,6 +16,7 @@ void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
                           bufsize_t size);
 int cmark_utf8proc_is_space(int32_t uc);
 int cmark_utf8proc_is_punctuation(int32_t uc);
+int cmark_utf8proc_is_eastasian_punctuation(int32_t uc);
 
 #ifdef __cplusplus
 }

From d529d6c8f001167bfb91d856299c0eab7c748afc Mon Sep 17 00:00:00 2001
From: IKEDA Soji <ikeda@conversion.co.jp>
Date: Sun, 25 Jun 2017 12:12:37 +0900
Subject: [PATCH 2/4] Emphasis and East Asian text: Proposed changes to spec.

---
 test/spec.txt | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/test/spec.txt b/test/spec.txt
index 686283830..fd914c943 100644
--- a/test/spec.txt
+++ b/test/spec.txt
@@ -335,6 +335,9 @@ A [punctuation character](@) is an [ASCII
 punctuation character] or anything in
 the general Unicode categories  `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
 
+An [East Asian punctuation character](@) is a [punctuation character] with
+the East_Asian_Width property values `A`, `F`, `H` or `W`.
+
 ## Tabs
 
 Tabs in lines are not expanded to [spaces].  However,
@@ -5984,14 +5987,16 @@ a non-backslash-escaped `_` character.
 
 A [left-flanking delimiter run](@) is
 a [delimiter run] that is (a) not followed by [Unicode whitespace],
-and (b) not followed by a [punctuation character], or
+and (b) not followed by a [punctuation character] which is not
+[East Asian punctuation character], or
 preceded by [Unicode whitespace] or a [punctuation character].
 For purposes of this definition, the beginning and the end of
 the line count as Unicode whitespace.
 
 A [right-flanking delimiter run](@) is
 a [delimiter run] that is (a) not preceded by [Unicode whitespace],
-and (b) not preceded by a [punctuation character], or
+and (b) not preceded by a [punctuation character] which is not
+[East Asian punctuation character], or
 followed by [Unicode whitespace] or a [punctuation character].
 For purposes of this definition, the beginning and the end of
 the line count as Unicode whitespace.
@@ -6045,36 +6050,44 @@ The following rules define emphasis and strong emphasis:
 
 2.  A single `_` character [can open emphasis] iff
     it is part of a [left-flanking delimiter run]
-    and either (a) not part of a [right-flanking delimiter run]
-    or (b) part of a [right-flanking delimiter run]
-    preceded by punctuation.
+    and either (a) not part of a [right-flanking delimiter run],
+    (b) part of a [right-flanking delimiter run]
+    preceded by punctuation
+    or (c) part of a [right-flanking delimiter run]
+    followed by East Asian punctuation.
 
 3.  A single `*` character [can close emphasis](@)
     iff it is part of a [right-flanking delimiter run].
 
 4.  A single `_` character [can close emphasis] iff
     it is part of a [right-flanking delimiter run]
-    and either (a) not part of a [left-flanking delimiter run]
-    or (b) part of a [left-flanking delimiter run]
-    followed by punctuation.
+    and either (a) not part of a [left-flanking delimiter run],
+    (b) part of a [left-flanking delimiter run]
+    followed by punctuation
+    or (c) part of a [left-flanking delimiter run]
+    preceded by East Asian punctuation.
 
 5.  A double `**` [can open strong emphasis](@)
     iff it is part of a [left-flanking delimiter run].
 
 6.  A double `__` [can open strong emphasis] iff
     it is part of a [left-flanking delimiter run]
-    and either (a) not part of a [right-flanking delimiter run]
-    or (b) part of a [right-flanking delimiter run]
-    preceded by punctuation.
+    and either (a) not part of a [right-flanking delimiter run],
+    (b) part of a [right-flanking delimiter run]
+    preceded by punctuation
+    or (c) part of a [right-flanking delimiter run]
+    followed by East Asian punctuation.
 
 7.  A double `**` [can close strong emphasis](@)
     iff it is part of a [right-flanking delimiter run].
 
 8.  A double `__` [can close strong emphasis] iff
     it is part of a [right-flanking delimiter run]
-    and either (a) not part of a [left-flanking delimiter run]
-    or (b) part of a [left-flanking delimiter run]
-    followed by punctuation.
+    and either (a) not part of a [left-flanking delimiter run],
+    (b) part of a [left-flanking delimiter run]
+    followed by punctuation
+    or (c) part of a [left-flanking delimiter run]
+    preceded by East Asian punctuation.
 
 9.  Emphasis begins with a delimiter that [can open emphasis] and ends
     with a delimiter that [can close emphasis], and that uses the same

From ff2a079b0091041547b7374f92b4e942dc0a117a Mon Sep 17 00:00:00 2001
From: IKEDA Soji <ikeda@conversion.co.jp>
Date: Sun, 25 Jun 2017 12:18:09 +0900
Subject: [PATCH 3/4] Emphasis and East Asian text: Adding test cases.

---
 test/spec.txt | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

diff --git a/test/spec.txt b/test/spec.txt
index fd914c943..b8cb27b5e 100644
--- a/test/spec.txt
+++ b/test/spec.txt
@@ -6177,6 +6177,24 @@ a*"foo"*
 ````````````````````````````````
 
 
+This is emphasis, because the opening `*` is preceded
+by an alphanumeric and followed by East Asian punctuation, and hence
+part of a [left-flanking delimiter run]:
+
+```````````````````````````````` example
+a*「foo」*
+.
+<p>a<em>「foo」</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+『a』*「foo」*
+.
+<p>『a』<em>「foo」</em></p>
+````````````````````````````````
+
+
 Unicode nonbreaking spaces count as whitespace, too:
 
 ```````````````````````````````` example
@@ -6202,6 +6220,16 @@ foo*bar*
 ````````````````````````````````
 
 
+Emphasis including East Asian punctuation without leading whitespace is
+permitted:
+
+```````````````````````````````` example
+5*「6」*78
+.
+<p>5<em>「6」</em>78</p>
+````````````````````````````````
+
+
 Rule 2:
 
 ```````````````````````````````` example
@@ -6231,6 +6259,25 @@ a_"foo"_
 ````````````````````````````````
 
 
+This is emphasis, because the opening `_` is preceded
+by an alphanumeric and followed by East Asian punctuation, and hence
+part of a [left-flanking delimiter run]:
+
+```````````````````````````````` example
+a_「foo」_
+.
+<p>a<em>「foo」</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+『a』_「foo」_
+.
+<p>『a』<em>「foo」</em></p>
+````````````````````````````````
+
+
+Unicode nonbreaking spaces count as whitespace, too:
 Emphasis with `_` is not allowed inside words:
 
 ```````````````````````````````` example
@@ -6254,6 +6301,13 @@ foo_bar_
 ````````````````````````````````
 
 
+```````````````````````````````` example
+五_六_七八
+.
+<p>五_六_七八</p>
+````````````````````````````````
+
+
 Here `_` does not generate emphasis, because the first delimiter run
 is right-flanking and the second left-flanking:
 
@@ -6393,6 +6447,13 @@ _foo_bar_baz_
 ````````````````````````````````
 
 
+```````````````````````````````` example
+_み_か_ん_
+.
+<p><em>み_か_ん</em></p>
+````````````````````````````````
+
+
 This is emphasis, even though the closing delimiter is
 both left- and right-flanking, because it is followed by
 punctuation:
@@ -6434,6 +6495,24 @@ a**"foo"**
 ````````````````````````````````
 
 
+This is strong emphasis, because the opening `**` is preceded
+by an alphanumeric and followed by East Asian punctuation, and hence
+part of a [left-flanking delimiter run]:
+
+```````````````````````````````` example
+a**「foo」**
+.
+<p>a<strong>「foo」</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+『a』**「foo」**
+.
+<p>『a』<strong>「foo」</strong></p>
+````````````````````````````````
+
+
 Intraword strong emphasis with `**` is permitted:
 
 ```````````````````````````````` example
@@ -6482,6 +6561,23 @@ a__"foo"__
 ````````````````````````````````
 
 
+This is strong emphasis, because the opening `__` is preceded
+by an alphanumeric and followed by East Asian punctuation:
+
+```````````````````````````````` example
+a__「foo」__
+.
+<p>a<strong>「foo」</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+『a』__「foo」__
+.
+<p>『a』<strong>「foo」</strong></p>
+````````````````````````````````
+
+
 Intraword strong emphasis is forbidden with `__`:
 
 ```````````````````````````````` example
@@ -6505,6 +6601,13 @@ foo__bar__
 ````````````````````````````````
 
 
+```````````````````````````````` example
+五__六__七八
+.
+<p>五__六__七八</p>
+````````````````````````````````
+
+
 ```````````````````````````````` example
 __foo, __bar__, baz__
 .
@@ -6512,6 +6615,15 @@ __foo, __bar__, baz__
 ````````````````````````````````
 
 
+East Asian punctuations introduce ambiguity:
+
+```````````````````````````````` example
+__foo、__bar__、baz__
+.
+<p><strong>foo、</strong>bar<strong>、baz</strong></p>
+````````````````````````````````
+
+
 This is strong emphasis, even though the opening delimiter is
 both left- and right-flanking, because it is preceded by
 punctuation:
@@ -6549,6 +6661,15 @@ preceded by punctuation and followed by an alphanumeric:
 ````````````````````````````````
 
 
+East Asian punctuations is not the case:
+
+```````````````````````````````` example
+**〔**foo〕
+.
+<p><strong>〔</strong>foo〕</p>
+````````````````````````````````
+
+
 The point of this restriction is more easily appreciated
 with these examples:
 
@@ -6575,6 +6696,13 @@ with these examples:
 ````````````````````````````````
 
 
+```````````````````````````````` example
+**foo「*bar*」foo**
+.
+<p><strong>foo「<em>bar</em>」foo</strong></p>
+````````````````````````````````
+
+
 Intraword emphasis:
 
 ```````````````````````````````` example
@@ -6606,6 +6734,15 @@ __(__foo)
 ````````````````````````````````
 
 
+East Asian punctuations is not the case:
+
+```````````````````````````````` example
+__〔__foo〕
+.
+<p><strong>〔</strong>foo〕</p>
+````````````````````````````````
+
+
 The point of this restriction is more easily appreciated
 with this example:
 
@@ -6632,6 +6769,13 @@ __пристаням__стремятся
 ````````````````````````````````
 
 
+```````````````````````````````` example
+__か__き
+.
+<p>__か__き</p>
+````````````````````````````````
+
+
 ```````````````````````````````` example
 __foo__bar__baz__
 .

From 9a24044403268ab6c50177b6f3bf7724f8481700 Mon Sep 17 00:00:00 2001
From: IKEDA Soji <ikeda@conversion.co.jp>
Date: Tue, 27 Jun 2017 23:48:53 +0900
Subject: [PATCH 4/4] Copyedit on proposed spec.

---
 test/spec.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/spec.txt b/test/spec.txt
index b8cb27b5e..273c843eb 100644
--- a/test/spec.txt
+++ b/test/spec.txt
@@ -337,6 +337,8 @@ the general Unicode categories  `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
 
 An [East Asian punctuation character](@) is a [punctuation character] with
 the East_Asian_Width property values `A`, `F`, `H` or `W`.
+An [non-East Asian punctuation character](@) is a [punctuation character] with
+the East_Asian_Width property values `N` or `Na`.
 
 ## Tabs
 
@@ -5987,16 +5989,14 @@ a non-backslash-escaped `_` character.
 
 A [left-flanking delimiter run](@) is
 a [delimiter run] that is (a) not followed by [Unicode whitespace],
-and (b) not followed by a [punctuation character] which is not
-[East Asian punctuation character], or
+and (b) not followed by a [non-East Asian punctuation character], or
 preceded by [Unicode whitespace] or a [punctuation character].
 For purposes of this definition, the beginning and the end of
 the line count as Unicode whitespace.
 
 A [right-flanking delimiter run](@) is
 a [delimiter run] that is (a) not preceded by [Unicode whitespace],
-and (b) not preceded by a [punctuation character] which is not
-[East Asian punctuation character], or
+and (b) not preceded by a [non-East Asian punctuation character], or
 followed by [Unicode whitespace] or a [punctuation character].
 For purposes of this definition, the beginning and the end of
 the line count as Unicode whitespace.