diff --git a/package.json b/package.json index 194d53d..be1d264 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "@react-buddy/ide-toolbox": "^2.4.0", "@react-buddy/palette-mui": "^5.0.1", "@tanstack/react-query": "^5", + "@zip.js/zip.js": "^2.7.52", "apache-arrow": "^17.0.0", "array-move": "^4.0.0", "arrow-js-ffi": "^0.4.2", @@ -28,6 +29,7 @@ "dayjs": "^1.11.12", "dotenv": "^16.4.5", "immer": "^10.1.1", + "native-file-system-adapter": "^3.0.1", "parquet-wasm": "0.6.1", "react": "^18.3.1", "react-code-blocks": "^0.1.6", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7688122..0d3eb40 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -50,6 +50,9 @@ dependencies: '@tanstack/react-query': specifier: ^5 version: 5.51.11(react@18.3.1) + '@zip.js/zip.js': + specifier: ^2.7.52 + version: 2.7.52 apache-arrow: specifier: ^17.0.0 version: 17.0.0 @@ -74,6 +77,9 @@ dependencies: immer: specifier: ^10.1.1 version: 10.1.1 + native-file-system-adapter: + specifier: ^3.0.1 + version: 3.0.1 parquet-wasm: specifier: 0.6.1 version: 0.6.1 @@ -4395,6 +4401,11 @@ packages: tslib: 1.14.1 dev: true + /@zip.js/zip.js@2.7.52: + resolution: {integrity: sha512-+5g7FQswvrCHwYKNMd/KFxZSObctLSsQOgqBSi0LzwHo3li9Eh1w5cF5ndjQw9Zbr3ajVnd2+XyiX85gAetx1Q==} + engines: {bun: '>=0.7.0', deno: '>=1.0.0', node: '>=16.5.0'} + dev: false + /accepts@1.3.8: resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==} engines: {node: '>= 0.6'} @@ -6022,6 +6033,16 @@ packages: pend: 1.2.0 dev: true + /fetch-blob@3.2.0: + resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==} + engines: {node: ^12.20 || >= 14.13} + requiresBuild: true + dependencies: + node-domexception: 1.0.0 + web-streams-polyfill: 3.3.3 + dev: false + optional: true + /figures@3.2.0: resolution: {integrity: sha512-yaduQFRKLXYOGgEn6AZau90j3ggSOyiqXU0F9JZfeXYhNa+Jk4X+s45A2zg5jns87GAFa34BBm2kXw4XpNcbdg==} engines: {node: '>=8'} @@ -7705,6 +7726,13 @@ packages: engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} hasBin: true + /native-file-system-adapter@3.0.1: + resolution: {integrity: sha512-ocuhsYk2SY0906LPc3QIMW+rCV3MdhqGiy7wV5Bf0e8/5TsMjDdyIwhNiVPiKxzTJLDrLT6h8BoV9ERfJscKhw==} + engines: {node: '>=14.8.0'} + optionalDependencies: + fetch-blob: 3.2.0 + dev: false + /natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} dev: true @@ -7725,6 +7753,13 @@ packages: minimatch: 3.1.2 dev: true + /node-domexception@1.0.0: + resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} + engines: {node: '>=10.5.0'} + requiresBuild: true + dev: false + optional: true + /node-fetch-native@1.6.4: resolution: {integrity: sha512-IhOigYzAKHd244OC0JIMIUrjzctirCmPkaIfhDeGcEETWof5zKYUW7e7MYvChGWh/4CJeXEgsRyGzuF334rOOQ==} dev: true @@ -9799,6 +9834,13 @@ packages: defaults: 1.0.4 dev: true + /web-streams-polyfill@3.3.3: + resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} + engines: {node: '>= 8'} + requiresBuild: true + dev: false + optional: true + /web-vitals@4.2.2: resolution: {integrity: sha512-nYfoOqb4EmElljyXU2qdeE76KsvoHdftQKY4DzA9Aw8DervCg2bG634pHLrJ/d6+B4mE3nWTSJv8Mo7B2mbZkw==} dev: false diff --git a/src/Components/AuthFile.tsx b/src/Components/AuthFile.tsx index 2cc48b4..9fe2575 100644 --- a/src/Components/AuthFile.tsx +++ b/src/Components/AuthFile.tsx @@ -2,13 +2,49 @@ import React, { useEffect, useState } from 'react' import { useCurrentUser } from './CurrentUserContext' import { useQuery } from '@tanstack/react-query' -import axios from 'axios' +import axios, {AxiosResponse} from 'axios' import { Link } from 'react-router-dom' import CircularProgress from '@mui/material/CircularProgress' import Button from '@mui/material/Button' import { ICONS } from '../constants' import SafeTooltip from './SafeTooltip' +const clean_filename = (filename: string) => { + return filename.replace(/\.parquet.*$/, '.parquet') +} + +export async function fetchAuthFile({ + url, + headers, +}: { + url: string + headers: Record +}): Promise<{filename: string, content: AxiosResponse}> { + let filename: string = 'file' + const response = await axios.get(url, { + headers, + responseType: 'blob', + }) + const redirect_url = response.headers['galv-storage-redirect-url'] + if (redirect_url) { + filename = redirect_url.split('/').pop() ?? filename + } else { + const disposition = response.headers['content-disposition'] + if (disposition) { + filename = + disposition.split('filename=')[1].split('"')[0] ?? filename + } else { + filename = url.split('/').pop() ?? filename + } + } + return { + filename: clean_filename(filename), + content: redirect_url + ? await axios.get(redirect_url, { responseType: 'blob' }) + : response, + } +} + export default function AuthFile({ url }: { url: string }) { const [dataUrl, setDataUrl] = useState('') const [filename, setFilename] = useState('file') @@ -20,32 +56,13 @@ export default function AuthFile({ url }: { url: string }) { const query = useQuery({ queryKey: [url], queryFn: async () => { - const response = await axios.get(url, { - headers, - responseType: 'blob', - }) - const redirect_url = response.headers['galv-storage-redirect-url'] - if (redirect_url) { - setFilename(redirect_url.split('/').pop() ?? 'file') - } else { - const disposition = response.headers['content-disposition'] - if (disposition) { - setFilename(disposition.split('filename=')[1].split('"')[0]) - } else { - setFilename(url.split('/').pop() ?? 'file') - } - } - return redirect_url - ? axios.get(redirect_url, { responseType: 'blob' }) - : response + const { filename, content } = await fetchAuthFile({ url, headers }) + setFilename(filename) + return content }, enabled: downloading, }) - const clean_filename = (filename: string) => { - return filename.replace(/\.parquet.*$/, '.parquet') - } - useEffect(() => { if (query.data) { setDataUrl(URL.createObjectURL(query.data.data)) diff --git a/src/Components/Dev.tsx b/src/Components/Dev.tsx index e9d99ce..40287e5 100644 --- a/src/Components/Dev.tsx +++ b/src/Components/Dev.tsx @@ -1,8 +1,20 @@ import React from 'react' +import DownloadDataset from './DownloadDataset' export function Dev() { if (!import.meta.env.DEV) { return <> } - return <> + return ( + <> + + + ) } diff --git a/src/Components/DownloadDataset.tsx b/src/Components/DownloadDataset.tsx new file mode 100644 index 0000000..73f3c3b --- /dev/null +++ b/src/Components/DownloadDataset.tsx @@ -0,0 +1,158 @@ +import React, { useState } from 'react' +import { showSaveFilePicker } from 'native-file-system-adapter' +import { BlobReader, BlobWriter, ZipWriter } from '@zip.js/zip.js' +import { useCurrentUser } from './CurrentUserContext' +import { Configuration, ObservedFile, ParquetPartitionsApi } from '@galv/galv' +import { get_url_components, has } from './misc' +import { fetchAuthFile } from './AuthFile' +import CircularProgress from '@mui/material/CircularProgress' +import { MdDownload } from 'react-icons/md' +import Button, { ButtonProps } from '@mui/material/Button' +import IconButton, { IconButtonProps } from '@mui/material/IconButton' + +// Don't use useQueries or useFetchResource here because we have a complex async flow + +const pathSplitterRegEx = /[/\\]/ + +type ZipBlobOptions = { + file: ObservedFile + api_config: Configuration + headers: Record + // if inDirectory is true use the file's name as the directory name + in_directory?: string | boolean +} +const zipBlobs = async ({ + file, + api_config, + headers, + in_directory = true, +}: ZipBlobOptions) => { + const dname = in_directory === true ? getFileName(file) : in_directory || '' + const dir_name = dname && !/\/$/.test(dname) ? `${dname}/` : dname + const partitions = await Promise.all( + file.parquet_partitions.map((partition_url) => { + // First, look up the ParquetPartitions for the file and get their file URLs + const components = get_url_components(partition_url) + if (!components?.resourceId) { + return Promise.resolve(undefined) + } + return new ParquetPartitionsApi(api_config) + .parquetPartitionsRetrieve({ id: components.resourceId }) + .then((response) => { + // Second, fetch the ParquetPartition file via getAuthFile + if ( + !has(response.data, 'parquet_file') || + response.data.parquet_file === null + ) { + return undefined + } + return fetchAuthFile({ + url: response.data.parquet_file, + headers, + }) + }) + }), + ) + const zipWriter = new ZipWriter(new BlobWriter('application/zip')) + await Promise.all( + partitions + .filter((p) => p !== undefined) + .map((p) => + zipWriter.add( + `${dir_name}${p.filename}`, + new BlobReader(p.content.data), + ), + ), + ) + return await zipWriter.close() +} + +const getFileName = ({ name, path, id }: ObservedFile) => { + // Return name, or basename without extension, or id + if (name) { + return name + } + if (path) { + const basename = path.split(pathSplitterRegEx).pop() + if (basename) { + return basename.split('.')[0] + } + } + return id +} + +const downloadZip = async (zipBlob: Blob, filename: string) => { + // Use showSaveFilePicker to prompt user for a file save location + const handle = await showSaveFilePicker({ + suggestedName: `${filename}.zip`, + types: [ + { + description: 'ZIP Archive', + accept: { 'application/zip': ['.zip'] }, + }, + ], + }) + + const writableStream = await handle.createWritable() + await writableStream.write(zipBlob) + await writableStream.close() +} + +type DownloadDatasetProps = UseIconButton extends true + ? { file: ObservedFile; iconButton: UseIconButton } & { + buttonProps: Partial> + } + : { file: ObservedFile; iconButton: UseIconButton } & { + buttonProps: Partial> + } + +export default function DownloadDataset({ + file, + iconButton, + ...buttonProps +}: DownloadDatasetProps) { + const { user, api_config } = useCurrentUser() + const [loading, setLoading] = useState(false) + const headers = { + authorization: `Bearer ${user?.token}`, + 'Galv-Storage-No-Redirect': true, + } + + const downloadZippedBlobs = async () => { + setLoading(true) + setTimeout(() => { + zipBlobs({ file, api_config, headers }) + .then((zipBlob) => downloadZip(zipBlob, getFileName(file))) + .finally(() => setLoading(false)) + }) + } + + if (!user) { + return + } + + if (iconButton) + return ( + + {loading ? : } + + ) + + return ( + + ) +}