Skip to content

Commit

Permalink
Merge pull request #642 from jvalue/608-csv-newlines
Browse files Browse the repository at this point in the history
[FIX] Cannot parse CSV with newlines
  • Loading branch information
TungstnBallon authored Jan 28, 2025
2 parents 186dcb7 + 2663484 commit 902ba93
Showing 21 changed files with 292 additions and 167 deletions.
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@ import {
} from './io-type-implementation';

export class TextFile
extends FileSystemFile<string[]>
extends FileSystemFile<string>
implements IOTypeImplementation<IOType.TEXT_FILE>
{
public readonly ioType = IOType.TEXT_FILE;
65 changes: 64 additions & 1 deletion libs/execution/src/lib/util/file-util.spec.ts
Original file line number Diff line number Diff line change
@@ -2,14 +2,25 @@
//
// SPDX-License-Identifier: AGPL-3.0-only

import { FileExtension, MimeType } from '../types';
import * as R from '../blocks';
import { FileExtension, MimeType, TextFile } from '../types';

import {
inferFileExtensionFromContentTypeString,
inferFileExtensionFromFileExtensionString,
inferMimeTypeFromFileExtensionString,
transformTextFileLines,
} from './file-util';

function exampleTextFile(content: string): TextFile {
return new TextFile(
'exampleTextFile',
FileExtension.TXT,
MimeType.TEXT_PLAIN,
content,
);
}

describe('Validation of file-util', () => {
describe('Function inferMimeTypeFromContentTypeString', () => {
it('should diagnose no error on known mimeType', () => {
@@ -68,4 +79,56 @@ describe('Validation of file-util', () => {
expect(result).toEqual(undefined);
});
});
describe('Function transformTextFileLines', () => {
it('should diagnose no error without newline', async () => {
const file = exampleTextFile('some text content without a newline');
// eslint-disable-next-line @typescript-eslint/require-await
const spy = vi.fn(async (lines: string[]) => R.ok(lines));
const result = await transformTextFileLines(file, /\r?\n/, spy);

expect(spy).toHaveBeenCalledOnce();
expect(spy).toHaveBeenCalledWith(['some text content without a newline']);

expect(R.isOk(result)).toBe(true);
assert(R.isOk(result));

expect(result.right).toStrictEqual(file);
});
it('should diagnose no error on empty file', async () => {
const file = exampleTextFile('');

// eslint-disable-next-line @typescript-eslint/require-await
const spy = vi.fn(async (lines: string[]) => R.ok(lines));
const result = await transformTextFileLines(file, /\r?\n/, spy);

expect(spy).toHaveBeenCalledOnce();
expect(spy).toHaveBeenCalledWith([]);

expect(R.isOk(result)).toBe(true);
assert(R.isOk(result));

expect(result.right).toStrictEqual(file);
});
it('should diagnose no error on file with trailing newline', async () => {
const file = exampleTextFile(`some text content
with a
trailing newline
`);
// eslint-disable-next-line @typescript-eslint/require-await
const spy = vi.fn(async (lines: string[]) => R.ok(lines));
const result = await transformTextFileLines(file, /\r?\n/, spy);

expect(spy).toHaveBeenCalledOnce();
expect(spy).toHaveBeenCalledWith([
'some text content',
'with a ',
'trailing newline',
]);

expect(R.isOk(result)).toBe(true);
assert(R.isOk(result));

expect(result.right).toStrictEqual(file);
});
});
});
33 changes: 32 additions & 1 deletion libs/execution/src/lib/util/file-util.ts
Original file line number Diff line number Diff line change
@@ -4,7 +4,8 @@

import * as mime from 'mime-types';

import { FileExtension, MimeType } from '../types';
import * as R from '../blocks';
import { FileExtension, MimeType, TextFile } from '../types';

export function inferMimeTypeFromFileExtensionString(
fileExtension: string | undefined,
@@ -50,3 +51,33 @@ export function inferFileExtensionFromContentTypeString(
}
return undefined;
}

export async function transformTextFileLines(
file: TextFile,
lineBreakPattern: RegExp,
transformFn: (lines: string[]) => Promise<R.Result<string[]>>,
): Promise<R.Result<TextFile>> {
const lines = file.content.split(lineBreakPattern);
const lineBreak = file.content.match(lineBreakPattern)?.at(0) ?? '';

// There may be an additional empty line due to the previous splitting
let emptyNewline = false;
if (lines[lines.length - 1] === '') {
emptyNewline = true;
lines.pop();
}

const newLines = await transformFn(lines);
if (R.isErr(newLines)) {
return newLines;
}

let newContent = newLines.right.join(lineBreak);
if (emptyNewline) {
newContent += lineBreak;
}

return R.ok(
new TextFile(file.name, file.extension, file.mimeType, newContent),
);
}
1 change: 0 additions & 1 deletion libs/execution/src/lib/util/index.ts
Original file line number Diff line number Diff line change
@@ -4,4 +4,3 @@

export * from './implements-static-decorator';
export * from './file-util';
export * from './string-util';
14 changes: 0 additions & 14 deletions libs/execution/src/lib/util/string-util.ts

This file was deleted.

3 changes: 1 addition & 2 deletions libs/execution/test/utils/file-util.ts
Original file line number Diff line number Diff line change
@@ -12,7 +12,6 @@ import {
TextFile,
inferFileExtensionFromFileExtensionString,
inferMimeTypeFromFileExtensionString,
splitLines,
} from '../../src';

export function createBinaryFileFromLocalFile(fileName: string): BinaryFile {
@@ -39,6 +38,6 @@ export function createTextFileFromLocalFile(fileName: string): TextFile {
path.basename(fileName),
fileExtension,
mimeType,
splitLines(fileContent, /\r?\n/),
fileContent,
);
}
36 changes: 15 additions & 21 deletions libs/extensions/std/exec/src/text-file-interpreter-executor.spec.ts
Original file line number Diff line number Diff line change
@@ -92,9 +92,9 @@ describe('Validation of TextFileInterpreterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Multiline ', 'Test File']),
);
expect(result.right.content).toBe(`Multiline
Test File
`);
}
});

@@ -107,24 +107,18 @@ describe('Validation of TextFileInterpreterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['vehicle:268435857"0']),
);
}
});

it('should diagnose no error on custom lineBreak', async () => {
const text = readJvTestAsset('valid-custom-line-break.jv');

const testFile = readTestFile('test.txt');
const result = await parseAndExecuteExecutor(text, testFile);

expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Multiline \nTest', 'File\n']),
);
const expectedBytes = Buffer.from([
0xa, 0xd, 0xa, 0x3, 0x32, 0x2e, 0x30, 0x10, 0x0, 0x18, 0xe9, 0xa9, 0xba,
0xef, 0xbf, 0xbd, 0x6, 0x12, 0x45, 0xa, 0x11, 0x76, 0x65, 0x68, 0x69,
0x63, 0x6c, 0x65, 0x3a, 0x32, 0x36, 0x38, 0x34, 0x33, 0x35, 0x38, 0x35,
0x37, 0x22, 0x30, 0xa, 0xe, 0xa, 0x8, 0x31, 0x35, 0x39, 0x32, 0x33,
0x34, 0x37, 0x34, 0x2a, 0x2, 0x31, 0x30, 0x12, 0xf, 0xd, 0x27, 0xef,
0xbf, 0xbd, 0x39, 0x42, 0x15, 0xef, 0xbf, 0xbd, 0xf, 0x1f, 0xef, 0xbf,
0xbd, 0x1d, 0x0, 0x0, 0x2c, 0x43, 0x28, 0x0, 0x42, 0xb, 0xa, 0x9, 0x32,
0x36, 0x38, 0x34, 0x33, 0x35, 0x38, 0x35, 0x37,
]);
const actualBytes = Buffer.from(result.right.content);
expect(actualBytes).toStrictEqual(expectedBytes);
}
});
});
15 changes: 2 additions & 13 deletions libs/extensions/std/exec/src/text-file-interpreter-executor.ts
Original file line number Diff line number Diff line change
@@ -12,7 +12,6 @@ import {
type ExecutionContext,
TextFile,
implementsStatic,
splitLines,
} from '@jvalue/jayvee-execution';
import { IOType } from '@jvalue/jayvee-language-server';

@@ -36,25 +35,15 @@ export class TextFileInterpreterExecutor extends AbstractBlockExecutor<
'encoding',
context.valueTypeProvider.Primitives.Text,
);
const lineBreak = context.getPropertyValue(
'lineBreak',
context.valueTypeProvider.Primitives.Regex,
);

const decoder = new TextDecoder(encoding);
context.logger.logDebug(
`Decoding file content using encoding "${encoding}"`,
);
const textContent = decoder.decode(file.content);

context.logger.logDebug(
`Splitting lines using line break /${lineBreak.source}/`,
return R.ok(
new TextFile(file.name, file.extension, file.mimeType, textContent),
);
const lines = splitLines(textContent, lineBreak);
context.logger.logDebug(
`Lines were split successfully, the resulting text file has ${lines.length} lines`,
);

return R.ok(new TextFile(file.name, file.extension, file.mimeType, lines));
}
}
16 changes: 10 additions & 6 deletions libs/extensions/std/exec/src/text-line-deleter-executor.spec.ts
Original file line number Diff line number Diff line change
@@ -92,8 +92,9 @@ describe('Validation of TextLineDeleterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Test File']),
expect(result.right.content).toBe(
`Test File
`,
);
}
});
@@ -107,8 +108,10 @@ describe('Validation of TextLineDeleterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Multiline', 'Test File']),
expect(result.right.content).toBe(
`Multiline
Test File
`,
);
}
});
@@ -136,8 +139,9 @@ describe('Validation of TextLineDeleterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Test File']),
expect(result.right.content).toBe(
`Test File
`,
);
}
});
73 changes: 41 additions & 32 deletions libs/extensions/std/exec/src/text-line-deleter-executor.ts
Original file line number Diff line number Diff line change
@@ -12,6 +12,40 @@ import {
} from '@jvalue/jayvee-execution';
import { IOType } from '@jvalue/jayvee-language-server';

// eslint-disable-next-line @typescript-eslint/require-await
async function deleteLines(
lines: string[],
deleteIdxs: number[],
context: ExecutionContext,
): Promise<R.Result<string[]>> {
let lineIdx = 0;
for (const deleteIdx of deleteIdxs) {
if (deleteIdx > lines.length) {
return R.err({
message: `Line ${deleteIdx} does not exist in the text file, only ${lines.length} line(s) are present`,
diagnostic: {
node: context.getOrFailProperty('lines').value,
property: 'values',
index: lineIdx,
},
});
}
++lineIdx;
}

const distinctLines = new Set(deleteIdxs);
const sortedLines = [...distinctLines].sort((a, b) => a - b);

context.logger.logDebug(`Deleting line(s) ${sortedLines.join(', ')}`);

const reversedLines = sortedLines.reverse();
for (const lineToDelete of reversedLines) {
lines.splice(lineToDelete - 1, 1);
}

return R.ok(lines);
}

@implementsStatic<BlockExecutorClass>()
export class TextLineDeleterExecutor extends AbstractBlockExecutor<
IOType.TEXT_FILE,
@@ -23,48 +57,23 @@ export class TextLineDeleterExecutor extends AbstractBlockExecutor<
super(IOType.TEXT_FILE, IOType.TEXT_FILE);
}

// eslint-disable-next-line @typescript-eslint/require-await
async doExecute(
file: TextFile,
context: ExecutionContext,
): Promise<R.Result<TextFile>> {
const lines = context.getPropertyValue(
const deleteIdxs = context.getPropertyValue(
'lines',
context.valueTypeProvider.createCollectionValueTypeOf(
context.valueTypeProvider.Primitives.Integer,
),
);
const numberOfLines = file.content.length;

let lineIndex = 0;
for (const lineNumber of lines) {
if (lineNumber > numberOfLines) {
return R.err({
message: `Line ${lineNumber} does not exist in the text file, only ${file.content.length} line(s) are present`,
diagnostic: {
node: context.getOrFailProperty('lines').value,
property: 'values',
index: lineIndex,
},
});
}

++lineIndex;
}

const distinctLines = new Set(lines);
const sortedLines = [...distinctLines].sort((a, b) => a - b);

context.logger.logDebug(`Deleting line(s) ${sortedLines.join(', ')}`);

const reversedLines = sortedLines.reverse();
const newContent = [...file.content];
for (const lineToDelete of reversedLines) {
newContent.splice(lineToDelete - 1, 1);
}
const lineBreakPattern = context.getPropertyValue(
'lineBreak',
context.valueTypeProvider.Primitives.Regex,
);

return R.ok(
new TextFile(file.name, file.extension, file.mimeType, newContent),
return R.transformTextFileLines(file, lineBreakPattern, (lines) =>
deleteLines(lines, deleteIdxs, context),
);
}
}
Loading

0 comments on commit 902ba93

Please sign in to comment.