From 7e2e2ef2be72a51e8fb77b3d821e58c0525100d8 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Sun, 25 Aug 2019 03:07:09 +0200 Subject: [PATCH 1/4] lib: add ASCII fast path to getStringWidth() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A lot of strings that are going to be passed to `getStringWidth()` are ASCII strings, for which the calculation is rather easy and calling into C++ can be skipped. confidence improvement accuracy (*) (**) (***) misc/getstringwidth.js n=100000 type='ascii' *** 328.99 % ±21.73% ±29.25% ±38.77% misc/getstringwidth.js n=100000 type='emojiseq' 2.94 % ±7.66% ±10.19% ±13.26% misc/getstringwidth.js n=100000 type='fullwidth' 4.70 % ±5.64% ±7.50% ±9.76% --- benchmark/misc/getstringwidth.js | 25 +++++++++++++++++++++ lib/internal/readline/utils.js | 32 +++++++++++++++++++++------ test/parallel/test-icu-stringwidth.js | 14 ++++++++++++ 3 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 benchmark/misc/getstringwidth.js diff --git a/benchmark/misc/getstringwidth.js b/benchmark/misc/getstringwidth.js new file mode 100644 index 00000000000000..8d3a763f9a7b94 --- /dev/null +++ b/benchmark/misc/getstringwidth.js @@ -0,0 +1,25 @@ +'use strict'; + +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + type: ['ascii', 'emojiseq', 'fullwidth'], + n: [10e4] +}, { + flags: ['--expose-internals'] +}); + +function main({ n, type }) { + const { getStringWidth } = require('internal/readline/utils'); + + const str = ({ + ascii: 'foobar'.repeat(100), + emojiseq: '👨‍👨‍👧‍👦👨‍👩‍👦‍👦👨‍👩‍👧‍👧👩‍👩‍👧‍👦'.repeat(10), + fullwidth: '你好'.repeat(150) + })[type]; + + bench.start(); + for (let j = 0; j < n; j += 1) + getStringWidth(str); + bench.end(n); +} diff --git a/lib/internal/readline/utils.js b/lib/internal/readline/utils.js index c6cd13a6bd19eb..fdae68b94e8edd 100644 --- a/lib/internal/readline/utils.js +++ b/lib/internal/readline/utils.js @@ -34,13 +34,31 @@ if (internalBinding('config').hasIntl) { const icu = internalBinding('icu'); getStringWidth = function getStringWidth(str, options) { options = options || {}; - if (!Number.isInteger(str)) - str = stripVTControlCharacters(String(str)); - return icu.getStringWidth( - str, - Boolean(options.ambiguousAsFullWidth), - Boolean(options.expandEmojiSequence) - ); + if (Number.isInteger(str)) { + return icu.getStringWidth( + str, + Boolean(options.ambiguousAsFullWidth), + Boolean(options.expandEmojiSequence) + ); + } + str = stripVTControlCharacters(String(str)); + let width = 0; + for (let i = 0; i < str.length; i++) { + // Try to avoid calling into C++ by first handling the ASCII portion of + // the string. If it is fully ASCII, we skip the C++ part. + const code = str.charCodeAt(i); + if (code < 127) { + width += code >= 32; + continue; + } + width += icu.getStringWidth( + str.slice(i), + Boolean(options.ambiguousAsFullWidth), + Boolean(options.expandEmojiSequence) + ); + break; + } + return width; }; isFullWidthCodePoint = function isFullWidthCodePoint(code, options) { diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index 0620d3af3934ca..02739e41041bb5 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -69,3 +69,17 @@ assert.strictEqual( // Control chars and combining chars are zero assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1); + +// Test that the fast path for ASCII characters yields results consistent +// with the 'slow' path. +for (const ambiguousAsFullWidth of [ false, true ]) { + for (let i = 0; i < 256; i++) { + const char = String.fromCharCode(i); + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), + readline.getStringWidth(char, { ambiguousAsFullWidth })); + assert.strictEqual( + readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }), + readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2); + } +} From 9f726cbb54756684580a7fb5f15704573d35b90a Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Sun, 25 Aug 2019 14:21:38 +0200 Subject: [PATCH 2/4] fixup! lib: add ASCII fast path to getStringWidth() --- lib/internal/readline/utils.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/internal/readline/utils.js b/lib/internal/readline/utils.js index fdae68b94e8edd..f72a03bb3915f4 100644 --- a/lib/internal/readline/utils.js +++ b/lib/internal/readline/utils.js @@ -35,10 +35,11 @@ if (internalBinding('config').hasIntl) { getStringWidth = function getStringWidth(str, options) { options = options || {}; if (Number.isInteger(str)) { + // Provide information about the character with code point 'str'. return icu.getStringWidth( str, Boolean(options.ambiguousAsFullWidth), - Boolean(options.expandEmojiSequence) + false ); } str = stripVTControlCharacters(String(str)); From 8afd07e623069e26d80dfc9728e69d9695444c42 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Sun, 25 Aug 2019 14:44:56 +0200 Subject: [PATCH 3/4] fixup! lib: add ASCII fast path to getStringWidth() --- test/parallel/test-icu-stringwidth.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index 02739e41041bb5..48384f916d9126 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -81,5 +81,13 @@ for (const ambiguousAsFullWidth of [ false, true ]) { assert.strictEqual( readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }), readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2); + + if (i < 32 || (i >= 127 && i < 160)) { // Control character + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), 0); + } else if (i < 127) { // Regular ASCII character + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), 1); + } } } From 1a90c4cf5710644198230f6c8e5b38feceae7e09 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Fri, 30 Aug 2019 18:56:03 +0200 Subject: [PATCH 4/4] fixup! lib: add ASCII fast path to getStringWidth() --- benchmark/misc/getstringwidth.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmark/misc/getstringwidth.js b/benchmark/misc/getstringwidth.js index 8d3a763f9a7b94..12f071c60dd7eb 100644 --- a/benchmark/misc/getstringwidth.js +++ b/benchmark/misc/getstringwidth.js @@ -3,7 +3,7 @@ const common = require('../common.js'); const bench = common.createBenchmark(main, { - type: ['ascii', 'emojiseq', 'fullwidth'], + type: ['ascii', 'mixed', 'emojiseq', 'fullwidth'], n: [10e4] }, { flags: ['--expose-internals'] @@ -14,6 +14,7 @@ function main({ n, type }) { const str = ({ ascii: 'foobar'.repeat(100), + mixed: 'foo'.repeat(100) + '😀' + 'bar'.repeat(100), emojiseq: '👨‍👨‍👧‍👦👨‍👩‍👦‍👦👨‍👩‍👧‍👧👩‍👩‍👧‍👦'.repeat(10), fullwidth: '你好'.repeat(150) })[type];