From 9c763238b05113b8d4807f240692588e85b9ebd2 Mon Sep 17 00:00:00 2001 From: dvirtz Date: Sun, 28 Jan 2024 16:26:37 +0000 Subject: [PATCH] fix: opening large files VSCode only supports files up to 50MB so stop before resulting JSON gets to this size. See https://github.com/microsoft/vscode/issues/31078 Fixes #114, #74 --- README.md | 10 +++++++++- src/parquet-document.ts | 10 ++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f56416..345df55 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,14 @@ The following setting options are available: |`parquet-viewer.jsonSpace`|0|JSON indentation space, passed to `JSON.stringify` as is, see [mdn](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify#parameters) for details. Doesn't apply when `parquet-viewer.backend` is `parquet-tools`.| |`parquet-viewer.parquetToolsPath`|`parquet-tools`|The name of the parquet-tools executable or a path to the parquet-tools jar| -### What's new +## Notes + +### Size limit + +VSCode allows extensions to work on files smaller than 50MB. +If the data is larger, it will be truncated a message indicating that will be appended to the output. +See https://github.com/microsoft/vscode/issues/31078 for details. + +## What's new See [CHANGELOG.md](CHANGELOG.md) diff --git a/src/parquet-document.ts b/src/parquet-document.ts index ea5947e..a1a55e6 100644 --- a/src/parquet-document.ts +++ b/src/parquet-document.ts @@ -64,6 +64,10 @@ export default class ParquetDocument implements vscode.Disposable { this._lastMod = mtimeMs; const lines: string[] = []; + const encoder = new TextEncoder(); + const FILE_SIZE_MB_LIMIT = 50; + const limitExceededMsg = JSON.stringify({warning: `file size exceeds ${FILE_SIZE_MB_LIMIT}MB limit`}); + let totalByteLength = encoder.encode(limitExceededMsg).byteLength; await vscode.window.withProgress({ location: vscode.ProgressLocation.Notification, @@ -72,6 +76,12 @@ export default class ParquetDocument implements vscode.Disposable { }, async (progress, token) => { for await (const line of this._backend.toJson(this._parquetPath, token)) { + const lineByteLength = encoder.encode(`${line}${os.EOL}`).byteLength; + totalByteLength += lineByteLength; + if (totalByteLength >= FILE_SIZE_MB_LIMIT * 1024 * 1024) { + lines.push(limitExceededMsg); + break; + } lines.push(line); } }