Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] Enable TableInterpreter to trim leading and trailing whitespace #647

Merged
merged 4 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,27 @@ import {
ValueTypeVisitor,
} from '@jvalue/jayvee-language-server';

export interface ParseOpts {
skipLeadingWhitespace: boolean;
skipTrailingWhitespace: boolean;
}

const DEFAULT_PARSE_OPTS: ParseOpts = {
skipLeadingWhitespace: true,
skipTrailingWhitespace: true,
};

export function parseValueToInternalRepresentation<
I extends InternalValueRepresentation,
>(value: string, valueType: ValueType<I>): I | undefined {
const visitor = new InternalRepresentationParserVisitor(value);
>(
value: string,
valueType: ValueType<I>,
parseOpts?: Partial<ParseOpts>,
): I | undefined {
const visitor = new InternalRepresentationParserVisitor(value, {
...DEFAULT_PARSE_OPTS,
...parseOpts,
});
const result = valueType.acceptVisitor(visitor);
if (!valueType.isInternalValueRepresentation(result)) {
return undefined;
Expand All @@ -30,20 +47,33 @@ export function parseValueToInternalRepresentation<
class InternalRepresentationParserVisitor extends ValueTypeVisitor<
InternalValueRepresentation | undefined
> {
constructor(private value: string) {
constructor(private value: string, private parseOpts: ParseOpts) {
super();
}

private applyTrimOptions(value: string): string {
// BUG: https://github.com/jvalue/jayvee/issues/646
if (typeof this.value === 'string') {
if (this.parseOpts.skipLeadingWhitespace) {
value = value.trimStart();
}
if (this.parseOpts.skipTrailingWhitespace) {
value = value.trimEnd();
}
}
return value;
}

visitBoolean(vt: BooleanValuetype): boolean | undefined {
return vt.fromString(this.value);
return vt.fromString(this.applyTrimOptions(this.value));
}

visitDecimal(vt: DecimalValuetype): number | undefined {
return vt.fromString(this.value);
return vt.fromString(this.applyTrimOptions(this.value));
}

visitInteger(vt: IntegerValuetype): number | undefined {
return vt.fromString(this.value);
return vt.fromString(this.applyTrimOptions(this.value));
}

visitText(vt: TextValuetype): string {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,5 +299,57 @@ describe('Validation of TableInterpreterExecutor', () => {
expect(result.right.getNumberOfRows()).toEqual(0);
}
});

it('should skip leading and trailing whitespace on numeric columns but not text columns', async () => {
const text = readJvTestAsset('valid-without-header.jv');

const testWorkbook = await readTestWorkbook('test-with-whitespace.xlsx');
const result = await parseAndExecuteExecutor(
text,
testWorkbook.getSheetByName('Sheet1') as R.Sheet,
);

expect(R.isErr(result)).toEqual(false);
assert(R.isOk(result));

expect(result.right.ioType).toEqual(IOType.TABLE);
expect(result.right.getNumberOfColumns()).toEqual(3);
expect(result.right.getNumberOfRows()).toEqual(3);

expect([...result.right.getColumns().keys()]).toStrictEqual([
'index',
'name',
'flag',
]);

const row = result.right.getRow(0);
const index = row.get('index');
expect(index).toBe(0);
const name = row.get('name');
expect(name).toBe(' text with leading whitespace');

for (let rowIdx = 1; rowIdx < result.right.getNumberOfRows(); rowIdx++) {
const row = result.right.getRow(rowIdx);
const index = row.get('index');
expect(index).toBe(rowIdx);
}
});

it('should not skip leading or trailing whitespace if the relevant block properties are false', async () => {
const text = readJvTestAsset('valid-without-header-without-trim.jv');

const testWorkbook = await readTestWorkbook('test-with-whitespace.xlsx');
const result = await parseAndExecuteExecutor(
text,
testWorkbook.getSheetByName('Sheet1') as R.Sheet,
);

expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TABLE);
expect(result.right.getNumberOfColumns()).toEqual(3);
expect(result.right.getNumberOfRows()).toEqual(0);
}
});
});
});
31 changes: 29 additions & 2 deletions libs/extensions/tabular/exec/src/lib/table-interpreter-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
context.valueTypeProvider.Primitives.ValuetypeAssignment,
),
);
const skipLeadingWhitespace = context.getPropertyValue(
'skipLeadingWhitespace',
context.valueTypeProvider.Primitives.Boolean,
);
const skipTrailingWhitespace = context.getPropertyValue(
'skipTrailingWhitespace',
context.valueTypeProvider.Primitives.Boolean,
);

let columnEntries: ColumnDefinitionEntry[];

Expand Down Expand Up @@ -107,6 +115,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
inputSheet,
header,
columnEntries,
skipLeadingWhitespace,
skipTrailingWhitespace,
context,
);
context.logger.logDebug(
Expand All @@ -119,6 +129,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
sheet: Sheet,
header: boolean,
columnEntries: ColumnDefinitionEntry[],
skipLeadingWhitespace: boolean,
skipTrailingWhitespace: boolean,
context: ExecutionContext,
): Table {
const table = new Table();
Expand All @@ -141,6 +153,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
sheetRow,
sheetRowIndex,
columnEntries,
skipLeadingWhitespace,
skipTrailingWhitespace,
context,
);
if (tableRow === undefined) {
Expand All @@ -158,6 +172,8 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
sheetRow: string[],
sheetRowIndex: number,
columnEntries: ColumnDefinitionEntry[],
skipLeadingWhitespace: boolean,
skipTrailingWhitespace: boolean,
context: ExecutionContext,
): R.TableRow | undefined {
let invalidRow = false;
Expand All @@ -168,7 +184,13 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
const value = sheetRow[sheetColumnIndex]!;
const valueType = columnEntry.valueType;

const parsedValue = this.parseAndValidateValue(value, valueType, context);
const parsedValue = this.parseAndValidateValue(
value,
valueType,
skipLeadingWhitespace,
skipTrailingWhitespace,
context,
);
if (parsedValue === undefined) {
const currentCellIndex = new CellIndex(sheetColumnIndex, sheetRowIndex);
context.logger.logDebug(
Expand All @@ -192,9 +214,14 @@ export class TableInterpreterExecutor extends AbstractBlockExecutor<
private parseAndValidateValue(
value: string,
valueType: ValueType,
skipLeadingWhitespace: boolean,
skipTrailingWhitespace: boolean,
context: ExecutionContext,
): InternalValueRepresentation | undefined {
const parsedValue = parseValueToInternalRepresentation(value, valueType);
const parsedValue = parseValueToInternalRepresentation(value, valueType, {
skipLeadingWhitespace,
skipTrailingWhitespace,
});
if (parsedValue === undefined) {
return undefined;
}
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2025 Friedrich-Alexander-Universitat Erlangen-Nurnberg

SPDX-License-Identifier: AGPL-3.0-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: 2025 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestSheetExtractor { }

block TestBlock oftype TableInterpreter {
header: false;
columns: [
"index" oftype integer,
"name" oftype text,
"flag" oftype boolean
];
skipLeadingWhitespace: false;
skipTrailingWhitespace: false;
}

block TestLoader oftype TestTableLoader { }

TestExtractor
-> TestBlock
-> TestLoader;
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

/**
* Interprets a `Sheet` as a `Table`. In case a header row is present in the sheet, its names can be matched with the provided column names. Otherwise, the provided column names are assigned in order.
*
*
* @example Interprets a `Sheet` about cars with a topmost header row and interprets it as a `Table` by assigning a primitive value type to each column. The column names are matched to the header, so the order of the type assignments does not matter.
* block CarsTableInterpreter oftype TableInterpreter {
* header: true;
Expand All @@ -14,7 +14,7 @@
* "cyl" oftype integer,
* ];
* }
*
*
* @example Interprets a `Sheet` about cars without a topmost header row and interprets it as a `Table` by sequentially assigning a name and a primitive value type to each column of the sheet. Note that the order of columns matters here. The first column (column `A`) will be named "name", the second column (column `B`) will be named "mpg" etc.
* block CarsTableInterpreter oftype TableInterpreter {
* header: false;
Expand All @@ -28,14 +28,24 @@
publish builtin blocktype TableInterpreter {
input default oftype Sheet;
output default oftype Table;

/**
* Whether the first row should be interpreted as header row.

/**
* Whether the first row should be interpreted as header row.
*/
property header oftype boolean: true;

/**
* Collection of value type assignments. Uses column names (potentially matched with the header or by sequence depending on the `header` property) to assign a primitive value type to each column.
*/
property header oftype boolean: true;

/**
* Collection of value type assignments. Uses column names (potentially matched with the header or by sequence depending on the `header` property) to assign a primitive value type to each column.
property columns oftype Collection<ValuetypeAssignment>;

/**
* Whether to ignore whitespace before values. Does not apply to `text` cells
*/
property skipLeadingWhitespace oftype boolean: true;

/**
* Whether to ignore whitespace after values. Does not apply to `text` cells
*/
property columns oftype Collection<ValuetypeAssignment>;
}
property skipTrailingWhitespace oftype boolean: true;
}