Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

buffer: improve creation performance #6893

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 59 additions & 34 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@
'use strict';

const binding = process.binding('buffer');
const { isArrayBuffer } = process.binding('util');
const bindingObj = {};

class FastBuffer extends Uint8Array {}

FastBuffer.prototype.constructor = Buffer;
Buffer.prototype = FastBuffer.prototype;

exports.Buffer = Buffer;
exports.SlowBuffer = SlowBuffer;
exports.INSPECT_MAX_BYTES = 50;
Expand Down Expand Up @@ -63,24 +69,18 @@ Buffer.prototype.swap32 = function swap32() {
// do not own the ArrayBuffer allocator. Zero fill is always on in that case.
const zeroFill = bindingObj.zeroFill || [0];

function createBuffer(size, noZeroFill) {
if (noZeroFill)
zeroFill[0] = 0;

function createUnsafeBuffer(size) {
zeroFill[0] = 0;
try {
var ui8 = new Uint8Array(size);
return new FastBuffer(size);
} finally {
if (noZeroFill)
zeroFill[0] = 1;
zeroFill[0] = 1;
}

Object.setPrototypeOf(ui8, Buffer.prototype);
return ui8;
}

function createPool() {
poolSize = Buffer.poolSize;
allocPool = createBuffer(poolSize, true);
allocPool = createUnsafeBuffer(poolSize);
poolOffset = 0;
}
createPool();
Expand Down Expand Up @@ -138,7 +138,6 @@ Buffer.from = function(value, encodingOrOffset, length) {
return fromObject(value);
};

Object.setPrototypeOf(Buffer.prototype, Uint8Array.prototype);
Object.setPrototypeOf(Buffer, Uint8Array);

function assertSize(size) {
Expand All @@ -158,18 +157,16 @@ function assertSize(size) {
**/
Buffer.alloc = function(size, fill, encoding) {
assertSize(size);
if (size <= 0)
return createBuffer(size);
if (fill !== undefined) {
if (size > 0 && fill !== undefined) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does a separate check for 0 make sense here, i.e. size > 0 && fill !== undefined && fill !== 0?

Buffer.alloc(size, 0) is equivalent to Buffer.alloc(size), so just new FastBuffer(size) should work faster in that case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I think that would require benchmarking – createUnsafeBuffer() returns a slice from the pool, so I’d actually expect that to be faster than an extra typed array allocation.

Copy link
Member

@ChALkeR ChALkeR Jun 3, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@addaleax It's not just createUnsafeBuffer, it's createUnsafeBuffer + fill(0).
I believe that has been discussed before, and allocation was proven to be faster — that's why simple Buffer.alloc(size) does not use the pool.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ChALkeR I know there’s the extra fill() in there. But if you say it’s faster, I believe that :)

Copy link
Member

@ChALkeR ChALkeR Jun 3, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@addaleax Btw, nothing in this function uses slices from the pool. Perhaps it should?
Both new FastBuffer and createUnsafeBuffer just directly allocate a new instance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right. Maybe, but I’d leave that open for another PR, too, especially as it would introduce the subtle change that the return values of Buffer.alloc() would share their buffer property.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm down for that change. The kernel can probably optimize calls to calloc() a hair better. Though not going to consider it a blocking change.

// Since we are filling anyway, don't zero fill initially.
// Only pay attention to encoding if it's a string. This
// prevents accidentally sending in a number that would
// be interpretted as a start offset.
return typeof encoding === 'string' ?
createBuffer(size, true).fill(fill, encoding) :
createBuffer(size, true).fill(fill);
if (typeof encoding !== 'string')
encoding = undefined;
return createUnsafeBuffer(size).fill(fill, encoding);
}
return createBuffer(size);
return new FastBuffer(size);
};

/**
Expand All @@ -188,15 +185,15 @@ Buffer.allocUnsafe = function(size) {
**/
Buffer.allocUnsafeSlow = function(size) {
assertSize(size);
return createBuffer(size, true);
return createUnsafeBuffer(size);
};

// If --zero-fill-buffers command line argument is set, a zero-filled
// buffer is returned.
function SlowBuffer(length) {
if (+length != length)
length = 0;
return createBuffer(+length, true);
return createUnsafeBuffer(+length);
}

Object.setPrototypeOf(SlowBuffer.prototype, Uint8Array.prototype);
Expand All @@ -205,7 +202,7 @@ Object.setPrototypeOf(SlowBuffer, Uint8Array);

function allocate(size) {
if (size <= 0) {
return createBuffer(0);
return new FastBuffer();
}
if (size < (Buffer.poolSize >>> 1)) {
if (size > (poolSize - poolOffset))
Expand All @@ -218,7 +215,7 @@ function allocate(size) {
// Even though this is checked above, the conditional is a safety net and
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Side note: this comment is irrelevant now, probably since dd67608, I overlooked it.
/cc @trevnorris

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like it can be removed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like it has returned after a rebase =).
It's not critical, though, that could be removed later.

// sanity check to prevent any subsequent typed array allocation from not
// being zero filled.
return createBuffer(size, true);
return createUnsafeBuffer(size);
}
}

Expand All @@ -231,7 +228,7 @@ function fromString(string, encoding) {
throw new TypeError('"encoding" must be a valid string encoding');

if (string.length === 0)
return Buffer.alloc(0);
return new FastBuffer();

var length = byteLength(string, encoding);

Expand All @@ -251,18 +248,30 @@ function fromArrayLike(obj) {
const length = obj.length;
const b = allocate(length);
for (var i = 0; i < length; i++)
b[i] = obj[i] & 255;
b[i] = obj[i];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why change this?

Copy link
Member Author

@RReverser RReverser May 22, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just another unnecessary indirection that slightly changes performance and, well, readability (we inherit from Uint8Array, which already performs this exact same normalization).

return b;
}

function fromArrayBuffer(obj, byteOffset, length) {
if (!isArrayBuffer(obj))
throw new TypeError('argument is not an ArrayBuffer');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basically, this error can only be raised if someone’s messing with the prototype of obj to make it look like an ArrayBuffer, right?

If the others in @nodejs/buffer agree I’d suggest dropping this check then.

Copy link
Member Author

@RReverser RReverser May 22, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, initially I didn't put it in here, but there's a test against it: https://github.com/nodejs/node/blob/master/test/parallel/test-buffer-arraybuffer.js#L39-L46

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That test even passes without this check now, the expected TypeError is thrown when accessing byteLength below, although with a different message (which would technically make dropping this a semver-major change…).

I assume the isArrayBuffer check was only added in the first place to avoid crashes on the C++ side, but that doesn’t seem to be a concern anymore if everything’s done in JS.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the expected TypeError is thrown when accessing byteLength below, although with a different message (which would technically make dropping this a semver-major change…)

Yes, that's what I meant by not passing the test. Anyway, I totally don't mind changing it if that's what others want.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume the isArrayBuffer check was only added in the first place to avoid crashes on the C++ side, but that doesn’t seem to be a concern anymore if everything’s done in JS.

Question is if we want to enforce an actual ArrayBuffer, or if users could passed a munged up object that looks like one. Personally I prefer to leave these in where possible b/c it makes code transitions between C++ and JS smoother (buffer has bounced all over the place in the last 3 years).


byteOffset >>>= 0;

if (typeof length === 'undefined')
return binding.createFromArrayBuffer(obj, byteOffset);
const maxLength = obj.byteLength - byteOffset;

if (maxLength <= 0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be <. I know this is taken directly from the C++ code, but I think 85ab4a5 actually introduced a regression here:

Node v5.9.1:

> new Buffer(new Uint8Array().buffer)
<Buffer >

Node v5.10.1:

> new Buffer(new Uint8Array().buffer)
RangeError: 'offset' is out of bounds

@jasnell?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to change if confirmed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup. Is regression. Nice catch.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I change as part of this PR or as a separate one?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@RReverser Could always do a second commit here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually... Should I add a regression test for this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be great, yes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I got this correct — what happened here?

throw new RangeError("'offset' is out of bounds");

if (length === undefined) {
length = maxLength;
} else {
length >>>= 0;
if (length > maxLength)
throw new RangeError("'length' is out of bounds");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style nit: ' not " for strings.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, ESLint rules configured for the JS code seem to prohibit " except when otherwise leads to quote escaping inside of the string, so I tried to follow. Do you think '\'length\' is out of bounds' will look better? If so, happy to change, but believe that linter rules should be changed too to avoid confusion for future contributors.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we prefer

`'length' is out of bounds`

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ESLint complains against it too. I guess leaving as-is until the project styling rules are changed is the best option for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why '"length" is out of bounds' or just 'length is out of bounds' would be unreasonable. Not like not having quotes would lead to confusion.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thought the same reasoning as below:

It probably would be considered a major change. Landing that in a separate PR sounds good to me.

(given that error message becomes slightly different, and this might be a breaking change)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah bugger. annoying that such minor changes would cause this, but you're right.

}

length >>>= 0;
return binding.createFromArrayBuffer(obj, byteOffset, length);
return new FastBuffer(obj, byteOffset, length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that you're directly extending Uint8Array, the checks above would only seem necessary if we're trying to preserve the error messages. Otherwise the Uint8Array constructor would catch it.

Copy link
Member Author

@RReverser RReverser May 22, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup. Similar to the isArrayBuffer discussion above:

the expected TypeError is thrown when accessing byteLength below, although with a different message (which would technically make dropping this a semver-major change…)

Would it be a semver-major change if I remove those checks and amend messages in tests correspondingly? If so, should I submit that in a separate PR so that this one could land as a minor change?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It probably would be considered a major change. Landing that in a separate PR sounds good to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, will do in a separate one then.

}

function fromObject(obj) {
Expand All @@ -279,7 +288,7 @@ function fromObject(obj) {
if (obj) {
if (obj.buffer instanceof ArrayBuffer || 'length' in obj) {
if (typeof obj.length !== 'number' || obj.length !== obj.length) {
return allocate(0);
return new FastBuffer();
}
return fromArrayLike(obj);
}
Expand Down Expand Up @@ -346,7 +355,7 @@ Buffer.concat = function(list, length) {
throw new TypeError('"list" argument must be an Array of Buffers');

if (list.length === 0)
return Buffer.alloc(0);
return new FastBuffer();

if (length === undefined) {
length = 0;
Expand Down Expand Up @@ -823,10 +832,26 @@ Buffer.prototype.toJSON = function() {
};


function adjustOffset(offset, length) {
offset = +offset;
if (offset === 0 || Number.isNaN(offset)) {
return 0;
}
if (offset < 0) {
offset += length;
return offset > 0 ? offset : 0;
} else {
return offset < length ? offset : length;
}
}


Buffer.prototype.slice = function slice(start, end) {
const buffer = this.subarray(start, end);
Object.setPrototypeOf(buffer, Buffer.prototype);
return buffer;
const srcLength = this.length;
start = adjustOffset(start, srcLength);
end = end !== undefined ? adjustOffset(end, srcLength) : srcLength;
const newLength = end > start ? end - start : 0;
return new FastBuffer(this.buffer, this.byteOffset + start, newLength);
};


Expand Down
28 changes: 0 additions & 28 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -427,33 +427,6 @@ void CreateFromString(const FunctionCallbackInfo<Value>& args) {
}


void CreateFromArrayBuffer(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
if (!args[0]->IsArrayBuffer())
return env->ThrowTypeError("argument is not an ArrayBuffer");
Local<ArrayBuffer> ab = args[0].As<ArrayBuffer>();

size_t ab_length = ab->ByteLength();
size_t offset;
size_t max_length;

CHECK_NOT_OOB(ParseArrayIndex(args[1], 0, &offset));
CHECK_NOT_OOB(ParseArrayIndex(args[2], ab_length - offset, &max_length));

if (offset >= ab_length)
return env->ThrowRangeError("'offset' is out of bounds");
if (max_length > ab_length - offset)
return env->ThrowRangeError("'length' is out of bounds");

Local<Uint8Array> ui = Uint8Array::New(ab, offset, max_length);
Maybe<bool> mb =
ui->SetPrototype(env->context(), env->buffer_prototype_object());
if (!mb.FromMaybe(false))
return env->ThrowError("Unable to set Object prototype");
args.GetReturnValue().Set(ui);
}


template <encoding encoding>
void StringSlice(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
Expand Down Expand Up @@ -1242,7 +1215,6 @@ void Initialize(Local<Object> target,

env->SetMethod(target, "setupBufferJS", SetupBufferJS);
env->SetMethod(target, "createFromString", CreateFromString);
env->SetMethod(target, "createFromArrayBuffer", CreateFromArrayBuffer);

env->SetMethod(target, "byteLengthUtf8", ByteLengthUtf8);
env->SetMethod(target, "compare", Compare);
Expand Down