Skip to content

Commit

Permalink
fs: improve promise based readFile performance for big files
Browse files Browse the repository at this point in the history
This significantly reduces the peak memory for the promise
based readFile operation by reusing a single memory chunk after
each read and strinigifying that chunk immediately.

Signed-off-by: Ruben Bridgewater <ruben@bridgewater.de>
  • Loading branch information
BridgeAR committed Aug 20, 2022
1 parent 5e57d24 commit e72802b
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 38 deletions.
14 changes: 11 additions & 3 deletions benchmark/fs/readfile-promises.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,19 @@ const filename = path.resolve(tmpdir.path,

const bench = common.createBenchmark(main, {
duration: [5],
len: [1024, 16 * 1024 * 1024],
len: [
1024,
512 * 1024,
4 * 1024 ** 2,
8 * 1024 ** 2,
16 * 1024 ** 2,
32 * 1024 ** 2,
],
encoding: ['', 'utf8'],
concurrent: [1, 10]
});

function main({ len, duration, concurrent }) {
function main({ len, duration, encoding, concurrent }) {
try {
fs.unlinkSync(filename);
} catch {
Expand All @@ -44,7 +52,7 @@ function main({ len, duration, concurrent }) {
}, duration * 1000);

function read() {
fs.promises.readFile(filename)
fs.promises.readFile(filename, { encoding })
.then((res) => afterRead(undefined, res))
.catch((err) => afterRead(err));
}
Expand Down
6 changes: 6 additions & 0 deletions lib/fs.js
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,9 @@ function readFileAfterStat(err, stats) {
if (err)
return context.close(err);

// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
// wise, similar to the promise based version (less peak memory and chunked
// stringify operations vs multiple C++/JS boundary crossings).
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;

if (size > kIoMaxLength) {
Expand All @@ -352,6 +355,9 @@ function readFileAfterStat(err, stats) {

try {
if (size === 0) {
// TODO(BridgeAR): We are able to optimize this in case an encoding is used. If
// that's the case, let's use the StringDecoder and directly concat the
// result and to reuse the former chunk instead of allocating a new one.
context.buffers = [];
} else {
context.buffer = Buffer.allocUnsafeSlow(size);
Expand Down
84 changes: 49 additions & 35 deletions lib/internal/fs/promises.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ const {
promisify,
} = require('internal/util');
const { EventEmitterMixin } = require('internal/event_target');
const { StringDecoder } = require('string_decoder');
const { watch } = require('internal/fs/watchers');
const { isIterable } = require('internal/streams/utils');
const assert = require('internal/assert');
Expand Down Expand Up @@ -416,63 +417,76 @@ async function writeFileHandle(filehandle, data, signal, encoding) {

async function readFileHandle(filehandle, options) {
const signal = options?.signal;
const encoding = options?.encoding;
const decoder = encoding && new StringDecoder(encoding);

checkAborted(signal);

const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);

checkAborted(signal);

let size;
let size = 0;
let length = 0;
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
size = statFields[8/* size */];
} else {
size = 0;
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
}
if (length === 0) {
length = kReadFileUnknownBufferLength;
}

if (size > kIoMaxLength)
throw new ERR_FS_FILE_TOO_LARGE(size);

let endOfFile = false;
let totalRead = 0;
const noSize = size === 0;
const buffers = [];
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
do {
let buffer = Buffer.allocUnsafeSlow(length);
let result = '';
let offset = 0;
let buffers;

while (true) {
checkAborted(signal);
let buffer;
let offset;
let length;
if (noSize) {
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
offset = 0;
length = kReadFileUnknownBufferLength;
} else {
buffer = fullBuffer;
offset = totalRead;
length = MathMin(size - totalRead, kReadFileBufferLength);
}

const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
length, -1, kUsePromises)) || 0;
length, -1, kUsePromises)) ?? 0;
totalRead += bytesRead;
endOfFile = bytesRead === 0 || totalRead === size;
if (noSize && bytesRead > 0) {
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
ArrayPrototypePush(buffers, chunkBuffer);

if (bytesRead === 0 || totalRead === size || bytesRead !== buffer.length) {
const singleRead = bytesRead === totalRead;

if (bytesRead !== buffer.length) {
buffer = buffer.slice(0, bytesRead);
}

if (!encoding) {
if (size === 0 && !singleRead) {
ArrayPrototypePush(buffers, buffer);
return Buffer.concat(buffers, totalRead);
}
return buffer;
}

if (singleRead) {
return buffer.toString(encoding);
}
result += decoder.end(buffer);
return result;
}
} while (!endOfFile);

let result;
if (size > 0) {
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
} else {
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
totalRead);
if (encoding) {
result += decoder.write(buffer);
} else if (size !== 0) {
// TODO(BridgeAR): This condition needs a test. A file should be written
// that is chunked without encoding.
offset += bytesRead;
} else {
buffers ??= [];
// Unknown file size requires chunks.
ArrayPrototypePush(buffers, buffer);
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
}
}

return options.encoding ? result.toString(options.encoding) : result;
}

// All of the functions are defined as async in order to ensure that errors
Expand Down

0 comments on commit e72802b

Please sign in to comment.