From b752da9c2369a3c780cf9341978f5d4fb21b2f13 Mon Sep 17 00:00:00 2001 From: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Date: Sat, 4 Dec 2021 18:39:01 -0500 Subject: [PATCH] [Reporting] Fix performance of CSV generation (#120309) (#120446) * [Reporting] Fix performance of CSV generation * use a for loop with 1 operation instead of 3 chained maps * do without the callback * update comment Co-authored-by: Tim Sullivan --- .../generate_csv/generate_csv.ts | 42 +++++++++++++++---- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/x-pack/plugins/reporting/server/export_types/csv_searchsource/generate_csv/generate_csv.ts b/x-pack/plugins/reporting/server/export_types/csv_searchsource/generate_csv/generate_csv.ts index 76172da3e99c..4f2eb9109c95 100644 --- a/x-pack/plugins/reporting/server/export_types/csv_searchsource/generate_csv/generate_csv.ts +++ b/x-pack/plugins/reporting/server/export_types/csv_searchsource/generate_csv/generate_csv.ts @@ -242,7 +242,7 @@ export class CsvGenerator { /* * Format a Datatable into rows of CSV content */ - private generateRows( + private async generateRows( columns: string[], table: Datatable, builder: MaxSizeStringBuilder, @@ -255,14 +255,38 @@ export class CsvGenerator { break; } - const row = - columns - .map((f) => ({ column: f, data: dataTableRow[f] })) - .map(this.formatCellValues(formatters)) - .map(this.escapeValues(settings)) - .join(settings.separator) + '\n'; + /* + * Intrinsically, generating the rows is a synchronous process. Awaiting + * on a setImmediate call here partititions what could be a very long and + * CPU-intenstive synchronous process into an asychronous process. This + * give NodeJS to process other asychronous events that wait on the Event + * Loop. + * + * See: https://nodejs.org/en/docs/guides/dont-block-the-event-loop/ + * + * It's likely this creates a lot of context switching, and adds to the + * time it would take to generate the CSV. There are alternatives to the + * chosen performance solution: + * + * 1. Partition the synchronous process with fewer partitions, by using + * the loop counter to call setImmediate only every N amount of rows. + * Testing is required to see what the best N value for most data will + * be. + * + * 2. Use a C++ add-on to generate the CSV using the Node Worker Pool + * instead of using the Event Loop + */ + await new Promise(setImmediate); + + const rowDefinition: string[] = []; + const format = this.formatCellValues(formatters); + const escape = this.escapeValues(settings); + + for (const column of columns) { + rowDefinition.push(escape(format({ column, data: dataTableRow[column] }))); + } - if (!builder.tryAppend(row)) { + if (!builder.tryAppend(rowDefinition.join(settings.separator) + '\n')) { this.logger.warn(`Max Size Reached after ${this.csvRowCount} rows.`); this.maxSizeReached = true; if (this.cancellationToken) { @@ -377,7 +401,7 @@ export class CsvGenerator { } const formatters = this.getFormatters(table); - this.generateRows(columns, table, builder, formatters, settings); + await this.generateRows(columns, table, builder, formatters, settings); // update iterator currentRecord += table.rows.length;