Skip to content

Commit

Permalink
Athena: Remove duplicate first row (#3101)
Browse files Browse the repository at this point in the history
  • Loading branch information
fiskus authored Sep 26, 2022
1 parent cde94d0 commit 2c622d5
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 28 deletions.
2 changes: 1 addition & 1 deletion catalog/app/containers/Bucket/Queries/Athena/Athena.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ function ResultsContainer({
workgroup={workgroup}
>
{!!queryResults.rows.length && (
<CreatePackage bucket={bucket} rows={queryResults.rows} />
<CreatePackage bucket={bucket} queryResults={queryResults} />
)}
</ResultsBreadcrumbs>
{/* eslint-disable-next-line no-nested-ternary */}
Expand Down
39 changes: 20 additions & 19 deletions catalog/app/containers/Bucket/Queries/Athena/CreatePackage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,25 @@ function SeeDocsForCreatingPackage() {
}

function doQueryResultsContainManifestEntries(
rows: string[][],
): rows is [ManifestKey[], ...string[][]] {
const [head] = rows
queryResults: requests.athena.QueryResultsResponse,
): queryResults is requests.athena.QueryManifestsResponse {
const columnNames = queryResults.columns.map(({ name }) => name)
return (
head.includes('size') &&
head.includes('physical_keys') &&
head.includes('logical_key')
columnNames.includes('size') &&
columnNames.includes('physical_keys') &&
columnNames.includes('logical_key')
)
}

function rowToManifestEntryStringified(
row: string[],
head: ManifestKey[],
columns: requests.athena.QueryResultsColumns,
): ManifestEntryStringified {
return row.reduce((acc, value, index) => {
if (!head[index]) return acc
if (!columns[index].name) return acc
return {
...acc,
[head[index]]: value,
[columns[index].name]: value,
}
}, {} as ManifestEntryStringified)
}
Expand Down Expand Up @@ -72,11 +72,10 @@ function parseManifestEntryStringified(entry: ManifestEntryStringified): {
}

function parseQueryResults(
rows: [ManifestKey[], ...string[][]],
queryResults: requests.athena.QueryManifestsResponse,
): Record<string, Model.S3File> {
const [head, ...tail] = rows
const manifestEntries: ManifestEntryStringified[] = tail.reduce(
(memo, row) => memo.concat(rowToManifestEntryStringified(row, head)),
const manifestEntries: ManifestEntryStringified[] = queryResults.rows.reduce(
(memo, row) => memo.concat(rowToManifestEntryStringified(row, queryResults.columns)),
[] as ManifestEntryStringified[],
)
return manifestEntries.reduce(
Expand All @@ -90,26 +89,28 @@ function parseQueryResults(

interface CreatePackageProps {
bucket: string
rows: requests.athena.QueryResultsRows
queryResults: requests.athena.QueryResultsResponse
}

export default function CreatePackage({ bucket, rows }: CreatePackageProps) {
export default function CreatePackage({ bucket, queryResults }: CreatePackageProps) {
const addToPackage = AddToPackage.use()
const createDialog = usePackageCreationDialog({
bucket,
delayHashing: true,
disableStateDisplay: true,
})
const onPackage = React.useCallback(() => {
if (!doQueryResultsContainManifestEntries(rows)) return
if (!doQueryResultsContainManifestEntries(queryResults)) return

// TODO: make it lazy, and disable button
const entries = parseQueryResults(rows)
const entries = parseQueryResults(queryResults)
addToPackage?.merge(entries)
createDialog.open()
}, [addToPackage, createDialog, rows])
}, [addToPackage, createDialog, queryResults])

if (!doQueryResultsContainManifestEntries(rows)) return <SeeDocsForCreatingPackage />
if (!doQueryResultsContainManifestEntries(queryResults)) {
return <SeeDocsForCreatingPackage />
}

return (
<>
Expand Down
24 changes: 16 additions & 8 deletions catalog/app/containers/Bucket/Queries/requests/athena.ts
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,12 @@ export interface QueryResultsResponse {
rows: QueryResultsRows
}

type ManifestKey = 'hash' | 'logical_key' | 'meta' | 'physical_keys' | 'size'

export interface QueryManifestsResponse extends QueryResultsResponse {
rows: [ManifestKey[], ...string[][]]
}

interface QueryResultsArgs {
athena: Athena
queryExecutionId: string
Expand Down Expand Up @@ -361,15 +367,17 @@ async function fetchQueryResults({
(row) => row?.Data?.map((item) => item?.VarCharValue || '') || emptyRow,
) || emptyList
const rows = [...(prev?.rows || emptyList), ...parsed]
const columns =
queryResultsOutput.ResultSet?.ResultSetMetadata?.ColumnInfo?.map(
({ Name, Type }) => ({
name: Name,
type: Type,
}),
) || emptyColumns
const isHeadColumns = columns.every(({ name }, index) => name === rows[0][index])
return {
rows,
columns:
queryResultsOutput.ResultSet?.ResultSetMetadata?.ColumnInfo?.map(
({ Name, Type }) => ({
name: Name,
type: Type,
}),
) || emptyColumns,
rows: isHeadColumns ? rows.slice(1) : rows,
columns,
next: queryResultsOutput.NextToken,
queryExecution,
}
Expand Down
2 changes: 2 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
* [Added] `status_reports` lambda ([#2989](https://github.com/quiltdata/quilt/pull/2989), [#3088](https://github.com/quiltdata/quilt/pull/3088))
* [Added] Stack Status Admin UI: reports ([#3068](https://github.com/quiltdata/quilt/pull/3068))
* [Added] Edit button for text files in packages ([#3070](https://github.com/quiltdata/quilt/pull/3070))
* [Added] Add execution context for Athena query execution ([#3062](https://github.com/quiltdata/quilt/pull/3062))
* [Fixed] Fix package creation in S3 buckets with SSE-KMS enabled ([#2754](https://github.com/quiltdata/quilt/pull/2754))
* [Fixed] Fix creation of packages with large (4+ GiB) files ([#2933](https://github.com/quiltdata/quilt/pull/2933))
* [Fixed] Fix pre-popullation of default dates when using "dateformat" + {"format": "date"} ([3082](https://github.com/quiltdata/quilt/pull/3082))
Expand All @@ -42,6 +43,7 @@
* [Changed] Rework package indexing: now package indexes have documents only for current versions of package pointer objects, documents for 'latest' pointers have `package_hash`, `package_stats`, `comment`, `metadata` fields properly populated ([#2897](https://github.com/quiltdata/quilt/pull/2897))
* [Changed] Remove ClientRequestToken (idempotency token) for making Athena queries ([#2992](https://github.com/quiltdata/quilt/pull/2992))
* [Changed] Fixed config and docs mistyping: `ui.athena.defaultWorkflow` should be `ui.athena.defaultWorkgroup` ([#3067](https://github.com/quiltdata/quilt/pull/3067))
* [Changed] Use dedicated columns field instead of first row, fix duplicated first row in table results ([#3101](https://github.com/quiltdata/quilt/pull/3101))

## Docs
* [Added] Fix four-deep headers so auto-link generation works ([#3100](https://github.com/quiltdata/quilt/pull/3100))
Expand Down

0 comments on commit 2c622d5

Please sign in to comment.