Skip to content

Commit

Permalink
[Reporting] Fix performance of CSV generation (elastic#120309)
Browse files Browse the repository at this point in the history
* [Reporting] Fix performance of CSV generation

* use a for loop with 1 operation instead of 3 chained maps

* do without the callback

* update comment
  • Loading branch information
tsullivan authored and kibanamachine committed Dec 4, 2021
1 parent 6deaeb5 commit 6144481
Showing 1 changed file with 33 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ export class CsvGenerator {
/*
* Format a Datatable into rows of CSV content
*/
private generateRows(
private async generateRows(
columns: string[],
table: Datatable,
builder: MaxSizeStringBuilder,
Expand All @@ -255,14 +255,38 @@ export class CsvGenerator {
break;
}

const row =
columns
.map((f) => ({ column: f, data: dataTableRow[f] }))
.map(this.formatCellValues(formatters))
.map(this.escapeValues(settings))
.join(settings.separator) + '\n';
/*
* Intrinsically, generating the rows is a synchronous process. Awaiting
* on a setImmediate call here partititions what could be a very long and
* CPU-intenstive synchronous process into an asychronous process. This
* give NodeJS to process other asychronous events that wait on the Event
* Loop.
*
* See: https://nodejs.org/en/docs/guides/dont-block-the-event-loop/
*
* It's likely this creates a lot of context switching, and adds to the
* time it would take to generate the CSV. There are alternatives to the
* chosen performance solution:
*
* 1. Partition the synchronous process with fewer partitions, by using
* the loop counter to call setImmediate only every N amount of rows.
* Testing is required to see what the best N value for most data will
* be.
*
* 2. Use a C++ add-on to generate the CSV using the Node Worker Pool
* instead of using the Event Loop
*/
await new Promise(setImmediate);

const rowDefinition: string[] = [];
const format = this.formatCellValues(formatters);
const escape = this.escapeValues(settings);

for (const column of columns) {
rowDefinition.push(escape(format({ column, data: dataTableRow[column] })));
}

if (!builder.tryAppend(row)) {
if (!builder.tryAppend(rowDefinition.join(settings.separator) + '\n')) {
this.logger.warn(`Max Size Reached after ${this.csvRowCount} rows.`);
this.maxSizeReached = true;
if (this.cancellationToken) {
Expand Down Expand Up @@ -377,7 +401,7 @@ export class CsvGenerator {
}

const formatters = this.getFormatters(table);
this.generateRows(columns, table, builder, formatters, settings);
await this.generateRows(columns, table, builder, formatters, settings);

// update iterator
currentRecord += table.rows.length;
Expand Down

0 comments on commit 6144481

Please sign in to comment.