From 4d4997b1ce966a37dda31b56caa246801e98101a Mon Sep 17 00:00:00 2001 From: Ruben Bridgewater Date: Fri, 19 Aug 2022 17:07:19 +0200 Subject: [PATCH] fs: improve promise based readFile performance for big files This significantly reduces the peak memory for the promise based readFile operation by reusing a single memory chunk after each read and strinigifying that chunk immediately. Signed-off-by: Ruben Bridgewater --- benchmark/fs/readfile-promises.js | 9 ++- lib/fs.js | 5 ++ lib/internal/fs/promises.js | 87 ++++++++++++++-------- test/parallel/test-fs-promises-readfile.js | 18 ++++- 4 files changed, 82 insertions(+), 37 deletions(-) diff --git a/benchmark/fs/readfile-promises.js b/benchmark/fs/readfile-promises.js index 0fa92fdffad78d..af14509e710f7a 100644 --- a/benchmark/fs/readfile-promises.js +++ b/benchmark/fs/readfile-promises.js @@ -16,7 +16,14 @@ const filename = path.resolve(tmpdir.path, const bench = common.createBenchmark(main, { duration: [5], encoding: ['', 'utf-8'], - len: [1024, 16 * 1024 * 1024], + len: [ + 1024, + 512 * 1024, + 4 * 1024 ** 2, + 8 * 1024 ** 2, + 16 * 1024 ** 2, + 32 * 1024 ** 2, + ], concurrent: [1, 10] }); diff --git a/lib/fs.js b/lib/fs.js index fc9b630e81208c..5e110eef17dd8b 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -331,6 +331,9 @@ function readFileAfterStat(err, stats) { if (err) return context.close(err); + // TODO(BridgeAR): Check if allocating a smaller chunk is better performance + // wise, similar to the promise based version (less peak memory and chunked + // stringify operations vs multiple C++/JS boundary crossings). const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0; if (size > kIoMaxLength) { @@ -340,6 +343,8 @@ function readFileAfterStat(err, stats) { try { if (size === 0) { + // TODO(BridgeAR): If an encoding is set, use the StringDecoder to concat + // the result and reuse the buffer instead of allocating a new one. context.buffers = []; } else { context.buffer = Buffer.allocUnsafeSlow(size); diff --git a/lib/internal/fs/promises.js b/lib/internal/fs/promises.js index 39f9cfc6c2e1c7..ab731f9e77fef1 100644 --- a/lib/internal/fs/promises.js +++ b/lib/internal/fs/promises.js @@ -87,6 +87,7 @@ const { promisify, } = require('internal/util'); const { EventEmitterMixin } = require('internal/event_target'); +const { StringDecoder } = require('string_decoder'); const { watch } = require('internal/fs/watchers'); const { isIterable } = require('internal/streams/utils'); const assert = require('internal/assert'); @@ -419,6 +420,8 @@ async function writeFileHandle(filehandle, data, signal, encoding) { async function readFileHandle(filehandle, options) { const signal = options?.signal; + const encoding = options?.encoding; + const decoder = encoding && new StringDecoder(encoding); checkAborted(signal); @@ -426,56 +429,74 @@ async function readFileHandle(filehandle, options) { checkAborted(signal); - let size; + let size = 0; + let length = 0; if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) { size = statFields[8/* size */]; - } else { - size = 0; + length = encoding ? MathMin(size, kReadFileBufferLength) : size; + } + if (length === 0) { + length = kReadFileUnknownBufferLength; } if (size > kIoMaxLength) throw new ERR_FS_FILE_TOO_LARGE(size); - let endOfFile = false; let totalRead = 0; - const noSize = size === 0; - const buffers = []; - const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size); - do { + let buffer = Buffer.allocUnsafeSlow(length); + let result = ''; + let offset = 0; + let buffers; + const chunkedRead = length > kReadFileBufferLength; + + while (true) { checkAborted(signal); - let buffer; - let offset; - let length; - if (noSize) { - buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength); - offset = 0; - length = kReadFileUnknownBufferLength; - } else { - buffer = fullBuffer; - offset = totalRead; + + if (chunkedRead) { length = MathMin(size - totalRead, kReadFileBufferLength); } const bytesRead = (await binding.read(filehandle.fd, buffer, offset, - length, -1, kUsePromises)) || 0; + length, -1, kUsePromises)) ?? 0; totalRead += bytesRead; - endOfFile = bytesRead === 0 || totalRead === size; - if (noSize && bytesRead > 0) { - const isBufferFull = bytesRead === kReadFileUnknownBufferLength; - const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead); - ArrayPrototypePush(buffers, chunkBuffer); + + if (bytesRead === 0 || + totalRead === size || + (bytesRead !== buffer.length && !chunkedRead)) { + const singleRead = bytesRead === totalRead; + + const bytesToCheck = chunkedRead ? totalRead : bytesRead; + + if (bytesToCheck !== buffer.length) { + buffer = buffer.subarray(0, bytesToCheck); + } + + if (!encoding) { + if (size === 0 && !singleRead) { + ArrayPrototypePush(buffers, buffer); + return Buffer.concat(buffers, totalRead); + } + return buffer; + } + + if (singleRead) { + return buffer.toString(encoding); + } + result += decoder.end(buffer); + return result; } - } while (!endOfFile); - let result; - if (size > 0) { - result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead); - } else { - result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers, - totalRead); + if (encoding) { + result += decoder.write(buffer); + } else if (size !== 0) { + offset = totalRead; + } else { + buffers ??= []; + // Unknown file size requires chunks. + ArrayPrototypePush(buffers, buffer); + buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength); + } } - - return options.encoding ? result.toString(options.encoding) : result; } // All of the functions are defined as async in order to ensure that errors diff --git a/test/parallel/test-fs-promises-readfile.js b/test/parallel/test-fs-promises-readfile.js index a7fc46e5a6544a..5f2a8e236a7cea 100644 --- a/test/parallel/test-fs-promises-readfile.js +++ b/test/parallel/test-fs-promises-readfile.js @@ -1,3 +1,4 @@ +// Flags: --expose-internals 'use strict'; const common = require('../common'); @@ -6,15 +7,15 @@ const assert = require('assert'); const path = require('path'); const { writeFile, readFile } = require('fs').promises; const tmpdir = require('../common/tmpdir'); +const { internalBinding } = require('internal/test/binding'); +const fsBinding = internalBinding('fs'); tmpdir.refresh(); const fn = path.join(tmpdir.path, 'large-file'); // Creating large buffer with random content const largeBuffer = Buffer.from( - Array.apply(null, { length: 16834 * 2 }) - .map(Math.random) - .map((number) => (number * (1 << 8))) + Array.from({ length: 1024 ** 2 + 199999 }, (_, index) => index) ); async function createLargeFile() { @@ -69,6 +70,16 @@ async function validateWrongSignalParam() { } +async function validateZeroByteLiar() { + const originalFStat = fsBinding.fstat; + fsBinding.fstat = common.mustCall( + () => (/* stat fields */ [0, 1, 2, 3, 4, 5, 6, 7, 0 /* size */]) + ); + const readBuffer = await readFile(fn); + assert.strictEqual(readBuffer.toString(), largeBuffer.toString()); + fsBinding.fstat = originalFStat; +} + (async () => { await createLargeFile(); await validateReadFile(); @@ -76,4 +87,5 @@ async function validateWrongSignalParam() { await validateReadFileAbortLogicBefore(); await validateReadFileAbortLogicDuring(); await validateWrongSignalParam(); + await validateZeroByteLiar(); })().then(common.mustCall());