Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Brotli compression #144

Merged
merged 20 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ dist
!test/reference-test/files/*.parquet
examples/server/package-lock.json
test/browser/*.js
main.js
main.js.map
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,15 @@ var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet');
writer.setRowGroupSize(8192);
```

## Browser Tests

To run the browser tests (folder: `test/browser`) in a specific browser:

1. `npm i`
2. `npm run build:browser`
3. `npx serve .`
4. `open http://localhost:3000/test/browser/` in your preferred browser (Trailing `/` is required)

## Dependencies

Parquet uses [thrift](https://thrift.apache.org/) to encode the schema and other
Expand Down
73 changes: 0 additions & 73 deletions esbuild-plugins.js

This file was deleted.

12 changes: 12 additions & 0 deletions esbuild-plugins.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import path from 'node:path';
/**
* this plugin resolves to a browser version of compression.ts that uses different code for browsers
*/
export const compressionBrowserPlugin = {
name: 'compressionBrowser',
setup(build) {
build.onResolve({ filter: /^\.\/compression$/ }, (args) => {
return { path: path.join(args.resolveDir, args.path.replace('compression', 'browser/compression.ts')) };
});
},
};
13 changes: 8 additions & 5 deletions esbuild-serve.js → esbuild-serve.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,27 @@
* It attaches the parquet.js exports to a "parquetjs" global variable.
* See the example server for how to use it.
*/
const { compressionBrowserPlugin, wasmPlugin } = require('./esbuild-plugins');
import { compressionBrowserPlugin } from './esbuild-plugins.mjs';
import watPlugin from 'esbuild-plugin-wat';
import esbuild from 'esbuild';
// esbuild has TypeScript support by default. It will use .tsconfig
require('esbuild')
esbuild
.context({
entryPoints: ['parquet.ts'],
outfile: 'main.js',
define: { 'process.env.NODE_DEBUG': 'false', 'process.env.NODE_ENV': '"production"', global: 'window' },
platform: 'browser',
plugins: [compressionBrowserPlugin, wasmPlugin],
plugins: [compressionBrowserPlugin, watPlugin()],
sourcemap: 'external',
bundle: true,
minify: false,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keeping minify off here for the local serve.

globalName: 'parquetjs',
inject: ['./esbuild-shims.js'],
inject: ['./esbuild-shims.mjs'],
})
.then((context) => {
context
.serve({
servedir: __dirname,
servedir: './',
})
.then((server) => {
console.log('serving parquetjs', server);
Expand Down
2 changes: 0 additions & 2 deletions esbuild-shims.js

This file was deleted.

2 changes: 2 additions & 0 deletions esbuild-shims.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import { Buffer as buffer } from 'buffer/';
export let Buffer = buffer;
40 changes: 27 additions & 13 deletions esbuild.js → esbuild.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const esbuild = require('esbuild');
const path = require('path');
const { compressionBrowserPlugin, wasmPlugin } = require('./esbuild-plugins');
import esbuild from 'esbuild';
import watPlugin from 'esbuild-plugin-wat';
import { compressionBrowserPlugin } from './esbuild-plugins.mjs';
// esbuild has TypeScript support by default
const baseConfig = {
bundle: true,
Expand All @@ -10,11 +10,11 @@ const baseConfig = {
'process.env.NODE_ENV': '"production"',
global: 'window',
},
inject: ['./esbuild-shims.js'],
inject: ['./esbuild-shims.mjs'],
minify: true,
mainFields: ['browser', 'module', 'main'],
platform: 'browser', // default
plugins: [compressionBrowserPlugin, wasmPlugin],
plugins: [compressionBrowserPlugin, watPlugin()],
target: 'es2020', // default
};
// configuration for generating test code in browser
Expand All @@ -26,39 +26,53 @@ const testConfig = {
'process.env.NODE_ENV': '"production"',
global: 'window',
},
inject: ['./esbuild-shims.js'],
inject: ['./esbuild-shims.mjs'],
minify: false,
mainFields: ['browser', 'module', 'main'],
platform: 'browser', // default
plugins: [compressionBrowserPlugin, wasmPlugin],
plugins: [compressionBrowserPlugin, watPlugin()],
target: 'es2020', // default
};
const targets = [
{
...baseConfig,
globalName: 'parquetjs',
outdir: path.resolve(__dirname, 'dist', 'browser'),
outdir: './dist/browser',
},
{
...baseConfig,
format: 'esm',
outfile: path.resolve(__dirname, 'dist', 'browser', 'parquet.esm.js'),
outfile: 'dist/browser/parquet.esm.js',
},
{
...baseConfig,
format: 'cjs',
outfile: path.resolve(__dirname, 'dist', 'browser', 'parquet.cjs.js'),
outfile: 'dist/browser/parquet.cjs.js',
},
// Browser test code below
];

// Browser test code below is only in ESM
const testTargets = [
{
...testConfig,
outfile: path.resolve(__dirname, 'test', 'browser', 'main.js'),
format: 'esm',
mainFields: ['module', 'main'],
outfile: 'test/browser/main.js',
},
];

Promise.all(targets.map(esbuild.build))
.then((results) => {
if (results.reduce((m, r) => m && !r.warnings.length, true)) {
console.log('built with no errors or warnings');
console.log('built dist targets with no errors or warnings');
}
})
.then(() => {
return Promise.all(testTargets.map(esbuild.build));
})
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has to be after the other targets are built as it is dependent on it.

.then((results) => {
if (results.reduce((m, r) => m && !r.warnings.length, true)) {
console.log('built test targets with no errors or warnings');
}
})
.catch((e) => {
Expand Down
3 changes: 3 additions & 0 deletions eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ export default tseslint.config(
mochaPlugin.configs.flat.recommended,
...tseslint.configs.strict,
...tseslint.configs.stylistic,
{
ignores: ['dist/*', 'test/browser/main.js'],
},
{
rules: {
// TODO: Fix/ignore in tests and remove
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
13 changes: 13 additions & 0 deletions examples/server/views/parquetFiles.ejs
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,38 @@
<script>
const port = "<%= port %>"
const readit = function (filename) {
const output = document.getElementById('output');
parquetjs.ParquetReader
.openUrl(`http://localhost:${port}/files/${filename}`)
.then(async (reader) => {
let cursor = await reader.getCursor();

let record = null;
output.innerHTML = "";
while (record = await cursor.next()) {
output.innerHTML += `\nRecord: ${JSON.stringify(record, (_, v) => typeof v === 'bigint' ? v.toString() : v, 2)}`;
console.log("record", record);
}
return reader;
})
.then((reader) => reader.close())
.catch((e) => {
output.innerHTML += `\nERROR: ${e}`;
console.log("error", e);
})

}

const search = async function (filename, columnName, searchTerm) {
const output = document.getElementById('output');
const rdr = await parquetjs.ParquetReader.openUrl(`http://localhost:${port}/files/${filename}`)
const bfs = await rdr.getBloomFiltersFor([columnName])
if (!bfs[columnName]) return false
output.innerHTML = "";
Promise.all(bfs[columnName].map(async item => item.sbbf.check(searchTerm)))
.then(values => {
const res = values.some((el) => (el === true));
output.innerHTML += `\nResponse "${searchTerm}": ${JSON.stringify(res, null, 2)}`;
console.log("res: ", res)
return res
})
Expand All @@ -54,10 +61,16 @@
<li><button onclick="readit('fruits-bloomfilter.parquet')" >with Bloom filter</button></li>
<li><button onclick="readit('alltypes_dictionary.parquet')" >all types w/ dictionary</button></li>
<li><button onclick="readit('fruits.parquet')" >With (unsupported) Brötli compression</button></li>
<li><button onclick="readit('gzip-nation.impala.parquet')" >With gzip compression</button></li>
<li><button onclick="readit('snappy-compressed.parquet')" >With snappy compression</button></li>
<li><button onclick="readit('sample_brotli_compressed.parquet')" >With brotli compression</button></li>
<li><button onclick="readit('list.parquet')" >With list elements</button></li>
</ul>
<h3>Search fruits-bloomfilter.parquet "name" column for</h3>
<button onclick="search('fruits-bloomfilter.parquet', 'name', 'kiwi')">Search fruits-bloomfilter.parquet for "kiwi"</button>
<button onclick="search('fruits-bloomfilter.parquet', 'name', 'xxx')">Search fruits-bloomfilter.parquet for "xxx"</button>

<h2>Output</h2>
<pre id="output"></pre>
</body>
</html>
Loading
Loading