Skip to content

Commit

Permalink
fs: improve promise based readFile performance for big files
Browse files Browse the repository at this point in the history
This significantly reduces the peak memory for the promise
based readFile operation by reusing a single memory chunk after
each read and strinigifying that chunk immediately.

Signed-off-by: Ruben Bridgewater <[email protected]>
PR-URL: #44295
Reviewed-By: Matteo Collina <[email protected]>
Reviewed-By: James M Snell <[email protected]>
  • Loading branch information
BridgeAR authored Oct 6, 2022
1 parent 37f1e4b commit d3dd49f
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 37 deletions.
9 changes: 8 additions & 1 deletion benchmark/fs/readfile-promises.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ const filename = path.resolve(tmpdir.path,
const bench = common.createBenchmark(main, {
duration: [5],
encoding: ['', 'utf-8'],
len: [1024, 16 * 1024 * 1024],
len: [
1024,
512 * 1024,
4 * 1024 ** 2,
8 * 1024 ** 2,
16 * 1024 ** 2,
32 * 1024 ** 2,
],
concurrent: [1, 10]
});

Expand Down
5 changes: 5 additions & 0 deletions lib/fs.js
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,9 @@ function readFileAfterStat(err, stats) {
if (err)
return context.close(err);

// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
// wise, similar to the promise based version (less peak memory and chunked
// stringify operations vs multiple C++/JS boundary crossings).
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;

if (size > kIoMaxLength) {
Expand All @@ -340,6 +343,8 @@ function readFileAfterStat(err, stats) {

try {
if (size === 0) {
// TODO(BridgeAR): If an encoding is set, use the StringDecoder to concat
// the result and reuse the buffer instead of allocating a new one.
context.buffers = [];
} else {
context.buffer = Buffer.allocUnsafeSlow(size);
Expand Down
87 changes: 54 additions & 33 deletions lib/internal/fs/promises.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ const {
promisify,
} = require('internal/util');
const { EventEmitterMixin } = require('internal/event_target');
const { StringDecoder } = require('string_decoder');
const { watch } = require('internal/fs/watchers');
const { isIterable } = require('internal/streams/utils');
const assert = require('internal/assert');
Expand Down Expand Up @@ -419,63 +420,83 @@ async function writeFileHandle(filehandle, data, signal, encoding) {

async function readFileHandle(filehandle, options) {
const signal = options?.signal;
const encoding = options?.encoding;
const decoder = encoding && new StringDecoder(encoding);

checkAborted(signal);

const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);

checkAborted(signal);

let size;
let size = 0;
let length = 0;
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
size = statFields[8/* size */];
} else {
size = 0;
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
}
if (length === 0) {
length = kReadFileUnknownBufferLength;
}

if (size > kIoMaxLength)
throw new ERR_FS_FILE_TOO_LARGE(size);

let endOfFile = false;
let totalRead = 0;
const noSize = size === 0;
const buffers = [];
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
do {
let buffer = Buffer.allocUnsafeSlow(length);
let result = '';
let offset = 0;
let buffers;
const chunkedRead = length > kReadFileBufferLength;

while (true) {
checkAborted(signal);
let buffer;
let offset;
let length;
if (noSize) {
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
offset = 0;
length = kReadFileUnknownBufferLength;
} else {
buffer = fullBuffer;
offset = totalRead;

if (chunkedRead) {
length = MathMin(size - totalRead, kReadFileBufferLength);
}

const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
length, -1, kUsePromises)) || 0;
length, -1, kUsePromises)) ?? 0;
totalRead += bytesRead;
endOfFile = bytesRead === 0 || totalRead === size;
if (noSize && bytesRead > 0) {
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
ArrayPrototypePush(buffers, chunkBuffer);

if (bytesRead === 0 ||
totalRead === size ||
(bytesRead !== buffer.length && !chunkedRead)) {
const singleRead = bytesRead === totalRead;

const bytesToCheck = chunkedRead ? totalRead : bytesRead;

if (bytesToCheck !== buffer.length) {
buffer = buffer.subarray(0, bytesToCheck);
}

if (!encoding) {
if (size === 0 && !singleRead) {
ArrayPrototypePush(buffers, buffer);
return Buffer.concat(buffers, totalRead);
}
return buffer;
}

if (singleRead) {
return buffer.toString(encoding);
}
result += decoder.end(buffer);
return result;
}
} while (!endOfFile);

let result;
if (size > 0) {
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
} else {
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
totalRead);
if (encoding) {
result += decoder.write(buffer);
} else if (size !== 0) {
offset = totalRead;
} else {
buffers ??= [];
// Unknown file size requires chunks.
ArrayPrototypePush(buffers, buffer);
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
}
}

return options.encoding ? result.toString(options.encoding) : result;
}

// All of the functions are defined as async in order to ensure that errors
Expand Down
18 changes: 15 additions & 3 deletions test/parallel/test-fs-promises-readfile.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Flags: --expose-internals
'use strict';

const common = require('../common');
Expand All @@ -6,15 +7,15 @@ const assert = require('assert');
const path = require('path');
const { writeFile, readFile } = require('fs').promises;
const tmpdir = require('../common/tmpdir');
const { internalBinding } = require('internal/test/binding');
const fsBinding = internalBinding('fs');
tmpdir.refresh();

const fn = path.join(tmpdir.path, 'large-file');

// Creating large buffer with random content
const largeBuffer = Buffer.from(
Array.apply(null, { length: 16834 * 2 })
.map(Math.random)
.map((number) => (number * (1 << 8)))
Array.from({ length: 1024 ** 2 + 19 }, (_, index) => index)
);

async function createLargeFile() {
Expand Down Expand Up @@ -69,11 +70,22 @@ async function validateWrongSignalParam() {

}

async function validateZeroByteLiar() {
const originalFStat = fsBinding.fstat;
fsBinding.fstat = common.mustCall(
() => (/* stat fields */ [0, 1, 2, 3, 4, 5, 6, 7, 0 /* size */])
);
const readBuffer = await readFile(fn);
assert.strictEqual(readBuffer.toString(), largeBuffer.toString());
fsBinding.fstat = originalFStat;
}

(async () => {
await createLargeFile();
await validateReadFile();
await validateReadFileProc();
await validateReadFileAbortLogicBefore();
await validateReadFileAbortLogicDuring();
await validateWrongSignalParam();
await validateZeroByteLiar();
})().then(common.mustCall());

0 comments on commit d3dd49f

Please sign in to comment.