diff --git a/libs/execution/src/lib/types/value-types/internal-representation-parsing.ts b/libs/execution/src/lib/types/value-types/internal-representation-parsing.ts index 39c2b032..ee1c7ad4 100644 --- a/libs/execution/src/lib/types/value-types/internal-representation-parsing.ts +++ b/libs/execution/src/lib/types/value-types/internal-representation-parsing.ts @@ -16,10 +16,27 @@ import { ValueTypeVisitor, } from '@jvalue/jayvee-language-server'; +export interface ParseOpts { + skipLeadingWhitespace: boolean; + skipTrailingWhitespace: boolean; +} + +const DEFAULT_PARSE_OPTS: ParseOpts = { + skipLeadingWhitespace: true, + skipTrailingWhitespace: true, +}; + export function parseValueToInternalRepresentation< I extends InternalValueRepresentation, ->(value: string, valueType: ValueType): I | undefined { - const visitor = new InternalRepresentationParserVisitor(value); +>( + value: string, + valueType: ValueType, + parseOpts?: Partial, +): I | undefined { + const visitor = new InternalRepresentationParserVisitor(value, { + ...DEFAULT_PARSE_OPTS, + ...parseOpts, + }); const result = valueType.acceptVisitor(visitor); if (!valueType.isInternalValueRepresentation(result)) { return undefined; @@ -30,20 +47,33 @@ export function parseValueToInternalRepresentation< class InternalRepresentationParserVisitor extends ValueTypeVisitor< InternalValueRepresentation | undefined > { - constructor(private value: string) { + constructor(private value: string, private parseOpts: ParseOpts) { super(); } + private applyTrimOptions(value: string): string { + // BUG: https://github.com/jvalue/jayvee/issues/646 + if (typeof this.value === 'string') { + if (this.parseOpts.skipLeadingWhitespace) { + value = value.trimStart(); + } + if (this.parseOpts.skipTrailingWhitespace) { + value = value.trimEnd(); + } + } + return value; + } + visitBoolean(vt: BooleanValuetype): boolean | undefined { - return vt.fromString(this.value); + return vt.fromString(this.applyTrimOptions(this.value)); } visitDecimal(vt: DecimalValuetype): number | undefined { - return vt.fromString(this.value); + return vt.fromString(this.applyTrimOptions(this.value)); } visitInteger(vt: IntegerValuetype): number | undefined { - return vt.fromString(this.value); + return vt.fromString(this.applyTrimOptions(this.value)); } visitText(vt: TextValuetype): string { diff --git a/libs/extensions/tabular/exec/src/lib/table-interpreter-executor.spec.ts b/libs/extensions/tabular/exec/src/lib/table-interpreter-executor.spec.ts index cf41dfec..8f135063 100644 --- a/libs/extensions/tabular/exec/src/lib/table-interpreter-executor.spec.ts +++ b/libs/extensions/tabular/exec/src/lib/table-interpreter-executor.spec.ts @@ -299,5 +299,57 @@ describe('Validation of TableInterpreterExecutor', () => { expect(result.right.getNumberOfRows()).toEqual(0); } }); + + it('should skip leading and trailing whitespace on numeric columns but not text columns', async () => { + const text = readJvTestAsset('valid-without-header.jv'); + + const testWorkbook = await readTestWorkbook('test-with-whitespace.xlsx'); + const result = await parseAndExecuteExecutor( + text, + testWorkbook.getSheetByName('Sheet1') as R.Sheet, + ); + + expect(R.isErr(result)).toEqual(false); + assert(R.isOk(result)); + + expect(result.right.ioType).toEqual(IOType.TABLE); + expect(result.right.getNumberOfColumns()).toEqual(3); + expect(result.right.getNumberOfRows()).toEqual(3); + + expect([...result.right.getColumns().keys()]).toStrictEqual([ + 'index', + 'name', + 'flag', + ]); + + const row = result.right.getRow(0); + const index = row.get('index'); + expect(index).toBe(0); + const name = row.get('name'); + expect(name).toBe(' text with leading whitespace'); + + for (let rowIdx = 1; rowIdx < result.right.getNumberOfRows(); rowIdx++) { + const row = result.right.getRow(rowIdx); + const index = row.get('index'); + expect(index).toBe(rowIdx); + } + }); + + it('should not skip leading or trailing whitespace if the relevant block properties are false', async () => { + const text = readJvTestAsset('valid-without-header-without-trim.jv'); + + const testWorkbook = await readTestWorkbook('test-with-whitespace.xlsx'); + const result = await parseAndExecuteExecutor( + text, + testWorkbook.getSheetByName('Sheet1') as R.Sheet, + ); + + expect(R.isErr(result)).toEqual(false); + if (R.isOk(result)) { + expect(result.right.ioType).toEqual(IOType.TABLE); + expect(result.right.getNumberOfColumns()).toEqual(3); + expect(result.right.getNumberOfRows()).toEqual(0); + } + }); }); }); diff --git a/libs/extensions/tabular/exec/src/lib/table-interpreter-executor.ts b/libs/extensions/tabular/exec/src/lib/table-interpreter-executor.ts index ffab27b8..c581241c 100644 --- a/libs/extensions/tabular/exec/src/lib/table-interpreter-executor.ts +++ b/libs/extensions/tabular/exec/src/lib/table-interpreter-executor.ts @@ -58,6 +58,14 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor< context.valueTypeProvider.Primitives.ValuetypeAssignment, ), ); + const skipLeadingWhitespace = context.getPropertyValue( + 'skipLeadingWhitespace', + context.valueTypeProvider.Primitives.Boolean, + ); + const skipTrailingWhitespace = context.getPropertyValue( + 'skipTrailingWhitespace', + context.valueTypeProvider.Primitives.Boolean, + ); let columnEntries: ColumnDefinitionEntry[]; @@ -107,6 +115,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor< inputSheet, header, columnEntries, + skipLeadingWhitespace, + skipTrailingWhitespace, context, ); context.logger.logDebug( @@ -119,6 +129,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor< sheet: Sheet, header: boolean, columnEntries: ColumnDefinitionEntry[], + skipLeadingWhitespace: boolean, + skipTrailingWhitespace: boolean, context: ExecutionContext, ): Table { const table = new Table(); @@ -141,6 +153,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor< sheetRow, sheetRowIndex, columnEntries, + skipLeadingWhitespace, + skipTrailingWhitespace, context, ); if (tableRow === undefined) { @@ -158,6 +172,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor< sheetRow: string[], sheetRowIndex: number, columnEntries: ColumnDefinitionEntry[], + skipLeadingWhitespace: boolean, + skipTrailingWhitespace: boolean, context: ExecutionContext, ): R.TableRow | undefined { let invalidRow = false; @@ -168,7 +184,13 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor< const value = sheetRow[sheetColumnIndex]!; const valueType = columnEntry.valueType; - const parsedValue = this.parseAndValidateValue(value, valueType, context); + const parsedValue = this.parseAndValidateValue( + value, + valueType, + skipLeadingWhitespace, + skipTrailingWhitespace, + context, + ); if (parsedValue === undefined) { const currentCellIndex = new CellIndex(sheetColumnIndex, sheetRowIndex); context.logger.logDebug( @@ -192,9 +214,14 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor< private parseAndValidateValue( value: string, valueType: ValueType, + skipLeadingWhitespace: boolean, + skipTrailingWhitespace: boolean, context: ExecutionContext, ): InternalValueRepresentation | undefined { - const parsedValue = parseValueToInternalRepresentation(value, valueType); + const parsedValue = parseValueToInternalRepresentation(value, valueType, { + skipLeadingWhitespace, + skipTrailingWhitespace, + }); if (parsedValue === undefined) { return undefined; } diff --git a/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/test-with-whitespace.xlsx b/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/test-with-whitespace.xlsx new file mode 100644 index 00000000..b880fe19 Binary files /dev/null and b/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/test-with-whitespace.xlsx differ diff --git a/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/test-with-whitespace.xlsx.license b/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/test-with-whitespace.xlsx.license new file mode 100644 index 00000000..e39adc51 --- /dev/null +++ b/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/test-with-whitespace.xlsx.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2025 Friedrich-Alexander-Universitat Erlangen-Nurnberg + +SPDX-License-Identifier: AGPL-3.0-only diff --git a/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/valid-without-header-without-trim.jv b/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/valid-without-header-without-trim.jv new file mode 100644 index 00000000..ec1fc6b7 --- /dev/null +++ b/libs/extensions/tabular/exec/test/assets/table-interpreter-executor/valid-without-header-without-trim.jv @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: 2025 Friedrich-Alexander-Universitat Erlangen-Nurnberg +// +// SPDX-License-Identifier: AGPL-3.0-only + +pipeline TestPipeline { + + block TestExtractor oftype TestSheetExtractor { } + + block TestBlock oftype TableInterpreter { + header: false; + columns: [ + "index" oftype integer, + "name" oftype text, + "flag" oftype boolean + ]; + skipLeadingWhitespace: false; + skipTrailingWhitespace: false; + } + + block TestLoader oftype TestTableLoader { } + + TestExtractor + -> TestBlock + -> TestLoader; +} diff --git a/libs/language-server/src/stdlib/builtin-block-types/TableInterpreter.jv b/libs/language-server/src/stdlib/builtin-block-types/TableInterpreter.jv index 866eacec..0d60cd70 100644 --- a/libs/language-server/src/stdlib/builtin-block-types/TableInterpreter.jv +++ b/libs/language-server/src/stdlib/builtin-block-types/TableInterpreter.jv @@ -4,7 +4,7 @@ /** * Interprets a `Sheet` as a `Table`. In case a header row is present in the sheet, its names can be matched with the provided column names. Otherwise, the provided column names are assigned in order. -* +* * @example Interprets a `Sheet` about cars with a topmost header row and interprets it as a `Table` by assigning a primitive value type to each column. The column names are matched to the header, so the order of the type assignments does not matter. * block CarsTableInterpreter oftype TableInterpreter { * header: true; @@ -14,7 +14,7 @@ * "cyl" oftype integer, * ]; * } -* +* * @example Interprets a `Sheet` about cars without a topmost header row and interprets it as a `Table` by sequentially assigning a name and a primitive value type to each column of the sheet. Note that the order of columns matters here. The first column (column `A`) will be named "name", the second column (column `B`) will be named "mpg" etc. * block CarsTableInterpreter oftype TableInterpreter { * header: false; @@ -28,14 +28,24 @@ publish builtin blocktype TableInterpreter { input default oftype Sheet; output default oftype Table; - - /** - * Whether the first row should be interpreted as header row. + + /** + * Whether the first row should be interpreted as header row. + */ + property header oftype boolean: true; + + /** + * Collection of value type assignments. Uses column names (potentially matched with the header or by sequence depending on the `header` property) to assign a primitive value type to each column. */ - property header oftype boolean: true; - - /** - * Collection of value type assignments. Uses column names (potentially matched with the header or by sequence depending on the `header` property) to assign a primitive value type to each column. + property columns oftype Collection; + + /** + * Whether to ignore whitespace before values. Does not apply to `text` cells + */ + property skipLeadingWhitespace oftype boolean: true; + + /** + * Whether to ignore whitespace after values. Does not apply to `text` cells */ - property columns oftype Collection; -} \ No newline at end of file + property skipTrailingWhitespace oftype boolean: true; +}