From a3aa71a79e7c33005664377cff41fdf2995a9ec1 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Sat, 25 Apr 2020 02:31:09 +0200 Subject: [PATCH] util,readline: NFC-normalize strings before getStringWidth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The assumption here is that decomposed characters render like their composed character equivalents, and that working with the former comes with a risk of over-estimating string widths given that we compute them on a per-code-point basis. The regression test added here (한글 vs 한글) is an example of that happening. PR-URL: https://github.com/nodejs/node/pull/33052 Reviewed-By: Gus Caplan Reviewed-By: Michaël Zasso Reviewed-By: Anto Aravinth Reviewed-By: Colin Ihrig Reviewed-By: James M Snell --- lib/internal/util/inspect.js | 15 ++++++++++----- test/parallel/test-icu-stringwidth.js | 9 +++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/internal/util/inspect.js b/lib/internal/util/inspect.js index 862aed58b3d753..10242f2b5ef569 100644 --- a/lib/internal/util/inspect.js +++ b/lib/internal/util/inspect.js @@ -1917,6 +1917,13 @@ function formatWithOptionsInternal(inspectOptions, ...args) { return str; } +function prepareStringForGetStringWidth(str, removeControlChars) { + str = str.normalize('NFC'); + if (removeControlChars) + str = stripVTControlCharacters(str); + return str; +} + if (internalBinding('config').hasIntl) { const icu = internalBinding('icu'); // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence) @@ -1926,8 +1933,8 @@ if (internalBinding('config').hasIntl) { // the receiving end supports. getStringWidth = function getStringWidth(str, removeControlChars = true) { let width = 0; - if (removeControlChars) - str = stripVTControlCharacters(str); + + str = prepareStringForGetStringWidth(str, removeControlChars); for (let i = 0; i < str.length; i++) { // Try to avoid calling into C++ by first handling the ASCII portion of // the string. If it is fully ASCII, we skip the C++ part. @@ -1947,9 +1954,7 @@ if (internalBinding('config').hasIntl) { getStringWidth = function getStringWidth(str, removeControlChars = true) { let width = 0; - if (removeControlChars) - str = stripVTControlCharacters(str); - + str = prepareStringForGetStringWidth(str, removeControlChars); for (const char of str) { const code = char.codePointAt(0); if (isFullWidthCodePoint(code)) { diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index 66d75c4cbe2cb2..4e8389961d0d6d 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) { assert.strictEqual(getStringWidth(char), 1); } } + +{ + const a = '한글'.normalize('NFD'); // 한글 + const b = '한글'.normalize('NFC'); // 한글 + assert.strictEqual(a.length, 6); + assert.strictEqual(b.length, 2); + assert.strictEqual(getStringWidth(a), 4); + assert.strictEqual(getStringWidth(b), 4); +}