From 3be258c98eb16ca8f4e09f6f3b6883c8be818a3c Mon Sep 17 00:00:00 2001 From: Trevor Norris Date: Tue, 26 Jan 2016 16:37:31 -0700 Subject: [PATCH] buffer: add encoding parameter to fill() Can now call fill() using following parameters if value is a String: fill(string[, start[, end]][, encoding]) And with the following if value is a Buffer: fill(buffer[, start[, end]]) The encoding is ignored if value is not a String. All other non-Buffer values are coerced to a uint32. A multibyte strings will simply be copied into the Buffer until the number of bytes run out. Meaning partial strings can be left behind: Buffer(3).fill('\u0222'); // returns: In some encoding cases, such as 'hex', fill() will throw if the input string is not valid. PR-URL: https://github.com/nodejs/node/pull/4935 Reviewed-By: James M Snell --- doc/api/buffer.markdown | 27 +++- lib/buffer.js | 46 ++++-- src/node_buffer.cc | 79 ++++++++-- src/util.cc | 23 +++ src/util.h | 27 ++++ test/parallel/test-buffer-fill.js | 241 ++++++++++++++++++++++++++++++ 6 files changed, 411 insertions(+), 32 deletions(-) create mode 100644 test/parallel/test-buffer-fill.js diff --git a/doc/api/buffer.markdown b/doc/api/buffer.markdown index 1da5bb83cb4303..3163aeac8ebc3d 100644 --- a/doc/api/buffer.markdown +++ b/doc/api/buffer.markdown @@ -471,16 +471,19 @@ console.log(buf1.equals(buf3)); // Prints: false ``` -### buf.fill(value[, offset[, end]]) +### buf.fill(value[, offset[, end]][, encoding]) -* `value` {String|Number} +* `value` {String|Buffer|Number} * `offset` {Number} Default: 0 -* `end` {Number} Default: `buffer.length` +* `end` {Number} Default: `buf.length` +* `encoding` {String} Default: `'utf8'` * Return: {Buffer} -Fills the Buffer with the specified value. If the `offset` and `end` are not -given it will fill the entire Buffer. The method returns a reference to the -Buffer so calls can be chained. +Fills the Buffer with the specified value. If the `offset` (defaults to `0`) +and `end` (defaults to `buf.length`) are not given the entire buffer will be +filled. The method returns a reference to the Buffer, so calls can be chained. +This is meant as a small simplification to creating a Buffer. Allowing the +creation and fill of the Buffer to be done on a single line: ```js const b = new Buffer(50).fill('h'); @@ -488,6 +491,18 @@ console.log(b.toString()); // Prints: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh ``` +`encoding` is only relevant if `value` is a string. Otherwise it is ignored. +`value` is coerced to a `uint32` value if it is not a String or Number. + +The `fill()` operation writes bytes into the Buffer dumbly. If the final write +falls in between a multi-byte character then whatever bytes fit into the buffer +are written. + +```js +Buffer(3).fill('\u0222'); + // Prints: +``` + ### buf.indexOf(value[, byteOffset][, encoding]) * `value` {String|Buffer|Number} diff --git a/lib/buffer.js b/lib/buffer.js index 57725cc1fc7baa..766a82c2f21f0e 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -498,24 +498,48 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) { }; -Buffer.prototype.fill = function fill(val, start, end) { - start = start >> 0; - end = (end === undefined) ? this.length : end >> 0; +// Usage: +// buffer.fill(number[, offset[, end]]) +// buffer.fill(buffer[, offset[, end]]) +// buffer.fill(string[, offset[, end]][, encoding]) +Buffer.prototype.fill = function fill(val, start, end, encoding) { + // Handle string cases: + if (typeof val === 'string') { + if (typeof start === 'string') { + encoding = start; + start = 0; + end = this.length; + } else if (typeof end === 'string') { + encoding = end; + end = this.length; + } + if (val.length === 1) { + var code = val.charCodeAt(0); + if (code < 256) + val = code; + } + if (encoding !== undefined && typeof encoding !== 'string') { + throw new TypeError('encoding must be a string'); + } + if (typeof encoding === 'string' && !Buffer.isEncoding(encoding)) { + throw new TypeError('Unknown encoding: ' + encoding); + } + } else if (typeof val === 'number') { + val = val & 255; + } + + // Invalid ranges are not set to a default, so can range check early. if (start < 0 || end > this.length) throw new RangeError('Out of range index'); + if (end <= start) return this; - if (typeof val !== 'string') { - val = val >>> 0; - } else if (val.length === 1) { - var code = val.charCodeAt(0); - if (code < 256) - val = code; - } + start = start >>> 0; + end = end === undefined ? this.length : end >>> 0; - binding.fill(this, val, start, end); + binding.fill(this, val, start, end, encoding); return this; }; diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 988e41dbc9aa22..026a04028a1eb8 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -570,42 +570,91 @@ void Copy(const FunctionCallbackInfo &args) { void Fill(const FunctionCallbackInfo& args) { - THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); + Environment* env = Environment::GetCurrent(args); + + THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); SPREAD_ARG(args[0], ts_obj); size_t start = args[2]->Uint32Value(); size_t end = args[3]->Uint32Value(); - size_t length = end - start; - CHECK(length + start <= ts_obj_length); + size_t fill_length = end - start; + Local str_obj; + size_t str_length; + enum encoding enc; + CHECK(fill_length + start <= ts_obj_length); + + // First check if Buffer has been passed. + if (Buffer::HasInstance(args[1])) { + SPREAD_ARG(args[1], fill_obj); + str_length = fill_obj_length; + memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length)); + goto start_fill; + } - if (args[1]->IsNumber()) { + // Then coerce everything that's not a string. + if (!args[1]->IsString()) { int value = args[1]->Uint32Value() & 255; - memset(ts_obj_data + start, value, length); + memset(ts_obj_data + start, value, fill_length); return; } - node::Utf8Value str(args.GetIsolate(), args[1]); - size_t str_length = str.length(); - size_t in_there = str_length; - char* ptr = ts_obj_data + start + str_length; + str_obj = args[1]->ToString(env->isolate()); + enc = ParseEncoding(env->isolate(), args[4], UTF8); + str_length = + enc == UTF8 ? str_obj->Utf8Length() : + enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length(); + + if (enc == HEX && str_length % 2 != 0) + return env->ThrowTypeError("Invalid hex string"); if (str_length == 0) return; - memcpy(ts_obj_data + start, *str, MIN(str_length, length)); + // Can't use StringBytes::Write() in all cases. For example if attempting + // to write a two byte character into a one byte Buffer. + if (enc == UTF8) { + node::Utf8Value str(env->isolate(), args[1]); + memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length)); - if (str_length >= length) + } else if (enc == UCS2) { + node::TwoByteValue str(env->isolate(), args[1]); + memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length)); + + } else { + // Write initial String to Buffer, then use that memory to copy remainder + // of string. Correct the string length for cases like HEX where less than + // the total string length is written. + str_length = StringBytes::Write(env->isolate(), + ts_obj_data + start, + fill_length, + str_obj, + enc, + nullptr); + // This check is also needed in case Write() returns that no bytes could + // be written. + // TODO(trevnorris): Should this throw? Because of the string length was + // greater than 0 but couldn't be written then the string was invalid. + if (str_length == 0) + return; + } + + start_fill: + + if (str_length >= fill_length) return; - while (in_there < length - in_there) { + + size_t in_there = str_length; + char* ptr = ts_obj_data + start + str_length; + + while (in_there < fill_length - in_there) { memcpy(ptr, ts_obj_data + start, in_there); ptr += in_there; in_there *= 2; } - if (in_there < length) { - memcpy(ptr, ts_obj_data + start, length - in_there); - in_there = length; + if (in_there < fill_length) { + memcpy(ptr, ts_obj_data + start, fill_length - in_there); } } diff --git a/src/util.cc b/src/util.cc index 903fbbba134bd9..095e5582db0dfa 100644 --- a/src/util.cc +++ b/src/util.cc @@ -25,4 +25,27 @@ Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Local value) str_[length_] = '\0'; } + +TwoByteValue::TwoByteValue(v8::Isolate* isolate, v8::Local value) + : length_(0), str_(str_st_) { + if (value.IsEmpty()) + return; + + v8::Local string = value->ToString(isolate); + if (string.IsEmpty()) + return; + + // Allocate enough space to include the null terminator + size_t len = StringBytes::StorageSize(isolate, string, UCS2) + 1; + if (len > sizeof(str_st_)) { + str_ = static_cast(malloc(len)); + CHECK_NE(str_, nullptr); + } + + const int flags = + v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8; + length_ = string->Write(str_, 0, len, flags); + str_[length_] = '\0'; +} + } // namespace node diff --git a/src/util.h b/src/util.h index 7b2bc0f1a270ba..84d0b5a170d94c 100644 --- a/src/util.h +++ b/src/util.h @@ -205,6 +205,33 @@ class Utf8Value { char str_st_[1024]; }; +class TwoByteValue { + public: + explicit TwoByteValue(v8::Isolate* isolate, v8::Local value); + + ~TwoByteValue() { + if (str_ != str_st_) + free(str_); + } + + uint16_t* operator*() { + return str_; + }; + + const uint16_t* operator*() const { + return str_; + }; + + size_t length() const { + return length_; + }; + + private: + size_t length_; + uint16_t* str_; + uint16_t str_st_[1024]; +}; + } // namespace node #endif // SRC_UTIL_H_ diff --git a/test/parallel/test-buffer-fill.js b/test/parallel/test-buffer-fill.js new file mode 100644 index 00000000000000..4ff0b12a46c227 --- /dev/null +++ b/test/parallel/test-buffer-fill.js @@ -0,0 +1,241 @@ +'use strict'; + +require('../common'); +const assert = require('assert'); +const os = require('os'); +const SIZE = 28; + +const buf1 = Buffer(SIZE); +const buf2 = Buffer(SIZE); + + +// Default encoding +testBufs('abc'); +testBufs('\u0222aa'); +testBufs('a\u0234b\u0235c\u0236'); +testBufs('abc', 4); +testBufs('abc', 5); +testBufs('abc', SIZE); +testBufs('\u0222aa', 2); +testBufs('\u0222aa', 8); +testBufs('a\u0234b\u0235c\u0236', 4); +testBufs('a\u0234b\u0235c\u0236', 12); +testBufs('abc', 4, -1); +testBufs('abc', 4, 1); +testBufs('abc', 5, 1); +testBufs('\u0222aa', 2, -1); +testBufs('\u0222aa', 8, 1); +testBufs('a\u0234b\u0235c\u0236', 4, -1); +testBufs('a\u0234b\u0235c\u0236', 4, 1); +testBufs('a\u0234b\u0235c\u0236', 12, 1); + + +// UTF8 +testBufs('abc', 'utf8'); +testBufs('\u0222aa', 'utf8'); +testBufs('a\u0234b\u0235c\u0236', 'utf8'); +testBufs('abc', 4, 'utf8'); +testBufs('abc', 5, 'utf8'); +testBufs('abc', SIZE, 'utf8'); +testBufs('\u0222aa', 2, 'utf8'); +testBufs('\u0222aa', 8, 'utf8'); +testBufs('a\u0234b\u0235c\u0236', 4, 'utf8'); +testBufs('a\u0234b\u0235c\u0236', 12, 'utf8'); +testBufs('abc', 4, -1, 'utf8'); +testBufs('abc', 4, 1, 'utf8'); +testBufs('abc', 5, 1, 'utf8'); +testBufs('\u0222aa', 2, -1, 'utf8'); +testBufs('\u0222aa', 8, 1, 'utf8'); +testBufs('a\u0234b\u0235c\u0236', 4, -1, 'utf8'); +testBufs('a\u0234b\u0235c\u0236', 4, 1, 'utf8'); +testBufs('a\u0234b\u0235c\u0236', 12, 1, 'utf8'); +assert.equal(Buffer(1).fill(0).fill('\u0222')[0], 0xc8); + + +// BINARY +testBufs('abc', 'binary'); +testBufs('\u0222aa', 'binary'); +testBufs('a\u0234b\u0235c\u0236', 'binary'); +testBufs('abc', 4, 'binary'); +testBufs('abc', 5, 'binary'); +testBufs('abc', SIZE, 'binary'); +testBufs('\u0222aa', 2, 'binary'); +testBufs('\u0222aa', 8, 'binary'); +testBufs('a\u0234b\u0235c\u0236', 4, 'binary'); +testBufs('a\u0234b\u0235c\u0236', 12, 'binary'); +testBufs('abc', 4, -1, 'binary'); +testBufs('abc', 4, 1, 'binary'); +testBufs('abc', 5, 1, 'binary'); +testBufs('\u0222aa', 2, -1, 'binary'); +testBufs('\u0222aa', 8, 1, 'binary'); +testBufs('a\u0234b\u0235c\u0236', 4, -1, 'binary'); +testBufs('a\u0234b\u0235c\u0236', 4, 1, 'binary'); +testBufs('a\u0234b\u0235c\u0236', 12, 1, 'binary'); + + +// UCS2 +testBufs('abc', 'ucs2'); +testBufs('\u0222aa', 'ucs2'); +testBufs('a\u0234b\u0235c\u0236', 'ucs2'); +testBufs('abc', 4, 'ucs2'); +testBufs('abc', SIZE, 'ucs2'); +testBufs('\u0222aa', 2, 'ucs2'); +testBufs('\u0222aa', 8, 'ucs2'); +testBufs('a\u0234b\u0235c\u0236', 4, 'ucs2'); +testBufs('a\u0234b\u0235c\u0236', 12, 'ucs2'); +testBufs('abc', 4, -1, 'ucs2'); +testBufs('abc', 4, 1, 'ucs2'); +testBufs('abc', 5, 1, 'ucs2'); +testBufs('\u0222aa', 2, -1, 'ucs2'); +testBufs('\u0222aa', 8, 1, 'ucs2'); +testBufs('a\u0234b\u0235c\u0236', 4, -1, 'ucs2'); +testBufs('a\u0234b\u0235c\u0236', 4, 1, 'ucs2'); +testBufs('a\u0234b\u0235c\u0236', 12, 1, 'ucs2'); +assert.equal(Buffer(1).fill('\u0222', 'ucs2')[0], + os.endianness() === 'LE' ? 0x22 : 0x02); + + +// HEX +testBufs('616263', 'hex'); +testBufs('c8a26161', 'hex'); +testBufs('61c8b462c8b563c8b6', 'hex'); +testBufs('616263', 4, 'hex'); +testBufs('616263', 5, 'hex'); +testBufs('616263', SIZE, 'hex'); +testBufs('c8a26161', 2, 'hex'); +testBufs('c8a26161', 8, 'hex'); +testBufs('61c8b462c8b563c8b6', 4, 'hex'); +testBufs('61c8b462c8b563c8b6', 12, 'hex'); +testBufs('616263', 4, -1, 'hex'); +testBufs('616263', 4, 1, 'hex'); +testBufs('616263', 5, 1, 'hex'); +testBufs('c8a26161', 2, -1, 'hex'); +testBufs('c8a26161', 8, 1, 'hex'); +testBufs('61c8b462c8b563c8b6', 4, -1, 'hex'); +testBufs('61c8b462c8b563c8b6', 4, 1, 'hex'); +testBufs('61c8b462c8b563c8b6', 12, 1, 'hex'); +// Make sure this operation doesn't go on forever +buf1.fill('yKJh', 'hex'); +assert.throws(() => buf1.fill('\u0222', 'hex')); + + +// BASE64 +testBufs('YWJj', 'ucs2'); +testBufs('yKJhYQ==', 'ucs2'); +testBufs('Yci0Ysi1Y8i2', 'ucs2'); +testBufs('YWJj', 4, 'ucs2'); +testBufs('YWJj', SIZE, 'ucs2'); +testBufs('yKJhYQ==', 2, 'ucs2'); +testBufs('yKJhYQ==', 8, 'ucs2'); +testBufs('Yci0Ysi1Y8i2', 4, 'ucs2'); +testBufs('Yci0Ysi1Y8i2', 12, 'ucs2'); +testBufs('YWJj', 4, -1, 'ucs2'); +testBufs('YWJj', 4, 1, 'ucs2'); +testBufs('YWJj', 5, 1, 'ucs2'); +testBufs('yKJhYQ==', 2, -1, 'ucs2'); +testBufs('yKJhYQ==', 8, 1, 'ucs2'); +testBufs('Yci0Ysi1Y8i2', 4, -1, 'ucs2'); +testBufs('Yci0Ysi1Y8i2', 4, 1, 'ucs2'); +testBufs('Yci0Ysi1Y8i2', 12, 1, 'ucs2'); + + +// Buffer +const buf2Fill = Buffer(1).fill(2); +assert.deepEqual(genBuffer(4, [buf2Fill]), [2, 2, 2, 2]); +assert.deepEqual(genBuffer(4, [buf2Fill, 1]), [0, 2, 2, 2]); +assert.deepEqual(genBuffer(4, [buf2Fill, 1, 3]), [0, 2, 2, 0]); +assert.deepEqual(genBuffer(4, [buf2Fill, 1, 1]), [0, 0, 0, 0]); +assert.deepEqual(genBuffer(4, [buf2Fill, 1, -1]), [0, 0, 0, 0]); +const hexBufFill = Buffer(2).fill(0).fill('0102', 'hex'); +assert.deepEqual(genBuffer(4, [hexBufFill]), [1, 2, 1, 2]); +assert.deepEqual(genBuffer(4, [hexBufFill, 1]), [0, 1, 2, 1]); +assert.deepEqual(genBuffer(4, [hexBufFill, 1, 3]), [0, 1, 2, 0]); +assert.deepEqual(genBuffer(4, [hexBufFill, 1, 1]), [0, 0, 0, 0]); +assert.deepEqual(genBuffer(4, [hexBufFill, 1, -1]), [0, 0, 0, 0]); + + +// Check exceptions +assert.throws(() => buf1.fill(0, -1)); +assert.throws(() => buf1.fill(0, 0, buf1.length + 1)); +assert.throws(() => buf1.fill('', -1)); +assert.throws(() => buf1.fill('', 0, buf1.length + 1)); +assert.throws(() => buf1.fill('a', 0, buf1.length, 'node rocks!')); +assert.throws(() => buf1.fill('a', 0, 0, NaN)); +assert.throws(() => buf1.fill('a', 0, 0, null)); +assert.throws(() => buf1.fill('a', 0, 0, 'foo')); + + +function genBuffer(size, args) { + const b = Buffer(size); + return b.fill(0).fill.apply(b, args); +} + + +function bufReset() { + buf1.fill(0); + buf2.fill(0); +} + + +// This is mostly accurate. Except write() won't write partial bytes to the +// string while fill() blindly copies bytes into memory. To account for that an +// error will be thrown if not all the data can be written, and the SIZE has +// been massaged to work with the input characters. +function writeToFill(string, offset, end, encoding) { + if (typeof offset === 'string') { + encoding = offset; + offset = 0; + end = buf2.length; + } else if (typeof end === 'string') { + encoding = end; + end = buf2.length; + } else if (end === undefined) { + end = buf2.length; + } + + if (offset < 0 || end > buf2.length) + throw new RangeError('Out of range index'); + + if (end <= offset) + return buf2; + + offset >>>= 0; + end >>>= 0; + assert(offset <= buf2.length); + + // Convert "end" to "length" (which write understands). + const length = end - offset < 0 ? 0 : end - offset; + + var wasZero = false; + do { + const written = buf2.write(string, offset, length, encoding); + offset += written; + // Safety check in case write falls into infinite loop. + if (written === 0) { + if (wasZero) + throw new Error('Could not write all data to Buffer'); + else + wasZero = true; + } + } while (offset < buf2.length); + + // Correction for UCS2 operations. + if (os.endianness() === 'BE' && encoding === 'ucs2') { + for (var i = 0; i < buf2.length; i += 2) { + var tmp = buf2[i]; + buf2[i] = buf2[i + 1]; + buf2[i + 1] = tmp; + } + } + + return buf2; +} + + +function testBufs(string, offset, length, encoding) { + bufReset(); + buf1.fill.apply(buf1, arguments); + // Swap bytes on BE archs for ucs2 encoding. + assert.deepStrictEqual(buf1.fill.apply(buf1, arguments), + writeToFill.apply(null, arguments)); +}