Skip to content

Commit

Permalink
util,readline: NFC-normalize strings before getStringWidth
Browse files Browse the repository at this point in the history
The assumption here is that decomposed characters render like their
composed character equivalents, and that working with the former
comes with a risk of over-estimating string widths given that
we compute them on a per-code-point basis. The regression test
added here (한글 vs 한글) is an example of that happening.

PR-URL: #33052
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Anto Aravinth <anto.aravinth.cse@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
addaleax authored and targos committed Apr 30, 2020
1 parent 8e02d77 commit 4f89c77
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
15 changes: 10 additions & 5 deletions lib/internal/util/inspect.js
Original file line number Diff line number Diff line change
Expand Up @@ -1914,6 +1914,13 @@ function formatWithOptions(inspectOptions, ...args) {
return str;
}

function prepareStringForGetStringWidth(str, removeControlChars) {
str = str.normalize('NFC');
if (removeControlChars)
str = stripVTControlCharacters(str);
return str;
}

if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
Expand All @@ -1923,8 +1930,8 @@ if (internalBinding('config').hasIntl) {
// the receiving end supports.
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;
if (removeControlChars)
str = stripVTControlCharacters(str);

str = prepareStringForGetStringWidth(str, removeControlChars);
for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part.
Expand All @@ -1944,9 +1951,7 @@ if (internalBinding('config').hasIntl) {
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;

if (removeControlChars)
str = stripVTControlCharacters(str);

str = prepareStringForGetStringWidth(str, removeControlChars);
for (const char of str) {
const code = char.codePointAt(0);
if (isFullWidthCodePoint(code)) {
Expand Down
9 changes: 9 additions & 0 deletions test/parallel/test-icu-stringwidth.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) {
assert.strictEqual(getStringWidth(char), 1);
}
}

{
const a = '한글'.normalize('NFD'); // 한글
const b = '한글'.normalize('NFC'); // 한글
assert.strictEqual(a.length, 6);
assert.strictEqual(b.length, 2);
assert.strictEqual(getStringWidth(a), 4);
assert.strictEqual(getStringWidth(b), 4);
}

0 comments on commit 4f89c77

Please sign in to comment.