From 3f028d6e0cf22d8429c2c2f4bc99464a1ed3089c Mon Sep 17 00:00:00 2001 From: Ruben Bridgewater Date: Fri, 19 Aug 2022 17:07:19 +0200 Subject: [PATCH] fs: improve promise based readFile performance for big files This significantly reduces the peak memory for the promise based readFile operation by reusing a single memory chunk after each read and strinigifying that chunk immediately. Signed-off-by: Ruben Bridgewater --- benchmark/fs/readfile-promises.js | 9 +++- lib/fs.js | 5 ++ lib/internal/fs/promises.js | 89 +++++++++++++++++++------------ 3 files changed, 69 insertions(+), 34 deletions(-) diff --git a/benchmark/fs/readfile-promises.js b/benchmark/fs/readfile-promises.js index 0fa92fdffad78d..af14509e710f7a 100644 --- a/benchmark/fs/readfile-promises.js +++ b/benchmark/fs/readfile-promises.js @@ -16,7 +16,14 @@ const filename = path.resolve(tmpdir.path, const bench = common.createBenchmark(main, { duration: [5], encoding: ['', 'utf-8'], - len: [1024, 16 * 1024 * 1024], + len: [ + 1024, + 512 * 1024, + 4 * 1024 ** 2, + 8 * 1024 ** 2, + 16 * 1024 ** 2, + 32 * 1024 ** 2, + ], concurrent: [1, 10] }); diff --git a/lib/fs.js b/lib/fs.js index fc9b630e81208c..5e110eef17dd8b 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -331,6 +331,9 @@ function readFileAfterStat(err, stats) { if (err) return context.close(err); + // TODO(BridgeAR): Check if allocating a smaller chunk is better performance + // wise, similar to the promise based version (less peak memory and chunked + // stringify operations vs multiple C++/JS boundary crossings). const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0; if (size > kIoMaxLength) { @@ -340,6 +343,8 @@ function readFileAfterStat(err, stats) { try { if (size === 0) { + // TODO(BridgeAR): If an encoding is set, use the StringDecoder to concat + // the result and reuse the buffer instead of allocating a new one. context.buffers = []; } else { context.buffer = Buffer.allocUnsafeSlow(size); diff --git a/lib/internal/fs/promises.js b/lib/internal/fs/promises.js index 39f9cfc6c2e1c7..dacaea2eef7b93 100644 --- a/lib/internal/fs/promises.js +++ b/lib/internal/fs/promises.js @@ -87,6 +87,7 @@ const { promisify, } = require('internal/util'); const { EventEmitterMixin } = require('internal/event_target'); +const { StringDecoder } = require('string_decoder'); const { watch } = require('internal/fs/watchers'); const { isIterable } = require('internal/streams/utils'); const assert = require('internal/assert'); @@ -419,6 +420,8 @@ async function writeFileHandle(filehandle, data, signal, encoding) { async function readFileHandle(filehandle, options) { const signal = options?.signal; + const encoding = options?.encoding; + const decoder = encoding && new StringDecoder(encoding); checkAborted(signal); @@ -426,56 +429,76 @@ async function readFileHandle(filehandle, options) { checkAborted(signal); - let size; + let size = 0; + let length = 0; if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) { size = statFields[8/* size */]; - } else { - size = 0; + length = encoding ? MathMin(size, kReadFileBufferLength) : size; + } + if (length === 0) { + length = kReadFileUnknownBufferLength; } if (size > kIoMaxLength) throw new ERR_FS_FILE_TOO_LARGE(size); - let endOfFile = false; let totalRead = 0; - const noSize = size === 0; - const buffers = []; - const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size); - do { + let buffer = Buffer.allocUnsafeSlow(length); + let result = ''; + let offset = 0; + let buffers; + const chunkedRead = length > kReadFileBufferLength; + + while (true) { checkAborted(signal); - let buffer; - let offset; - let length; - if (noSize) { - buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength); - offset = 0; - length = kReadFileUnknownBufferLength; - } else { - buffer = fullBuffer; - offset = totalRead; + + if (chunkedRead) { length = MathMin(size - totalRead, kReadFileBufferLength); } const bytesRead = (await binding.read(filehandle.fd, buffer, offset, - length, -1, kUsePromises)) || 0; + length, -1, kUsePromises)) ?? 0; totalRead += bytesRead; - endOfFile = bytesRead === 0 || totalRead === size; - if (noSize && bytesRead > 0) { - const isBufferFull = bytesRead === kReadFileUnknownBufferLength; - const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead); - ArrayPrototypePush(buffers, chunkBuffer); + + if (bytesRead === 0 || + totalRead === size || + (bytesRead !== buffer.length && !chunkedRead)) { + const singleRead = bytesRead === totalRead; + + const bytesToCheck = chunkedRead ? totalRead : bytesRead; + + if (bytesToCheck !== buffer.length) { + buffer = buffer.slice(0, bytesToCheck); + } + + if (!encoding) { + if (size === 0 && !singleRead) { + ArrayPrototypePush(buffers, buffer); + return Buffer.concat(buffers, totalRead); + } + return buffer; + } + + if (singleRead) { + return buffer.toString(encoding); + } + result += decoder.end(buffer); + return result; } - } while (!endOfFile); - let result; - if (size > 0) { - result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead); - } else { - result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers, - totalRead); + if (encoding) { + result += decoder.write(buffer); + } else if (size !== 0) { + // TODO(BridgeAR): This condition needs a test. A file should be read + // that is chunked without encoding. + offset = totalRead; + } else { + buffers ??= []; + // Unknown file size requires chunks. + ArrayPrototypePush(buffers, buffer); + buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength); + } } - - return options.encoding ? result.toString(options.encoding) : result; } // All of the functions are defined as async in order to ensure that errors