Skip to content

Commit

Permalink
[feat] improvements to duckDB column type handling
Browse files Browse the repository at this point in the history
Signed-off-by: Ihor Dykhta <[email protected]>
  • Loading branch information
igorDykhta committed Feb 2, 2025
1 parent 9a38508 commit e0322a1
Show file tree
Hide file tree
Showing 17 changed files with 365 additions and 134 deletions.
2 changes: 1 addition & 1 deletion src/components/src/common/data-table/header-cell.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ const HeaderCellFactory = (FieldToken: React.FC<FieldTokenProps>) => {
const firstCell = columnIndex === 0;
const isFormatted = Boolean(colMeta[column]?.displayFormat);
const formatLabels = isFormatted ? getFieldFormatLabels(colMeta[column].type) : [];
const onSortTable = useCallback(() => sortTableColumn(column), [sortTableColumn, column]);
const onSortTable = useCallback(() => sortTableColumn?.(column), [sortTableColumn, column]);
const onToggleOptionMenu = useCallback(
() => toggleMoreOptions(column),
[toggleMoreOptions, column]
Expand Down
13 changes: 13 additions & 0 deletions src/constants/src/default-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1578,3 +1578,16 @@ export const SYNC_TIMELINE_MODES: Record<string, SyncTimelineMode> = {
start: 0,
end: 1
};

/**
* Enum holding GeoArrow extension type names
*/
export enum GEOARROW_EXTENSIONS {
POINT = 'geoarrow.point',
LINESTRING = 'geoarrow.linestring',
POLYGON = 'geoarrow.polygon',
MULTIPOINT = 'geoarrow.multipoint',
MULTILINESTRING = 'geoarrow.multilinestring',
MULTIPOLYGON = 'geoarrow.multipolygon',
WKB = 'geoarrow.wkb'
}
1 change: 1 addition & 0 deletions src/deckgl-arrow-layers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
],
"dependencies": {
"@geoarrow/geoarrow-js": "^0.3.0",
"@kepler.gl/constants": "^3.1.0",
"@math.gl/core": "^4.0.0",
"@math.gl/polygon": "^4.0.0",
"@math.gl/types": "^4.0.0",
Expand Down
13 changes: 0 additions & 13 deletions src/deckgl-arrow-layers/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,4 @@
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

/**
* Enum holding GeoArrow extension type names
*/
export enum EXTENSION_NAME {
POINT = 'geoarrow.point',
LINESTRING = 'geoarrow.linestring',
POLYGON = 'geoarrow.polygon',
MULTIPOINT = 'geoarrow.multipoint',
MULTILINESTRING = 'geoarrow.multilinestring',
MULTIPOLYGON = 'geoarrow.multipolygon',
WKB = 'geoarrow.wkb'
}

export const DEFAULT_COLOR: [number, number, number, number] = [0, 0, 0, 255];
2 changes: 0 additions & 2 deletions src/deckgl-arrow-layers/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: MIT
// Copyright contributors to the kepler.gl project

export {EXTENSION_NAME} from './constants';

export {GeoArrowScatterplotLayer} from './layers/geo-arrow-scatterplot-layer';
export {GeoArrowTextLayer} from './layers/geo-arrow-text-layer';
export {GeoArrowArcLayer} from './layers/geo-arrow-arc-layer';
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ import {
} from '../utils/utils';
import {GeoArrowExtraPickingProps, computeChunkOffsets, getPickingInfo} from '../utils/picking';
import {ColorAccessor, FloatAccessor, GeoArrowPickingInfo, ExtensionProps} from '../types';
import {EXTENSION_NAME} from '../constants';
import {validateAccessors} from '../utils/validate';

import {GEOARROW_EXTENSIONS} from '@kepler.gl/constants';

/** All properties supported by GeoArrowScatterplotLayer */
export type GeoArrowScatterplotLayerProps = Omit<
ScatterplotLayerProps<arrow.Table>,
Expand Down Expand Up @@ -121,12 +122,12 @@ export class GeoArrowScatterplotLayer<ExtraProps extends object = object> extend

throw new Error('getPosition should pass in an arrow Vector of Point or MultiPoint type');
} else {
const pointVector = getGeometryVector(table, EXTENSION_NAME.POINT);
const pointVector = getGeometryVector(table, GEOARROW_EXTENSIONS.POINT);
if (pointVector !== null) {
return this._renderLayersPoint(pointVector);
}

const multiPointVector = getGeometryVector(table, EXTENSION_NAME.MULTIPOINT);
const multiPointVector = getGeometryVector(table, GEOARROW_EXTENSIONS.MULTIPOINT);
if (multiPointVector !== null) {
return this._renderLayersMultiPoint(multiPointVector);
}
Expand Down
6 changes: 4 additions & 2 deletions src/deckgl-arrow-layers/src/layers/geo-arrow-text-layer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ import {TextLayer} from '@deck.gl/layers/typed';
import type {TextLayerProps} from '@deck.gl/layers';
import * as arrow from 'apache-arrow';
import * as ga from '@geoarrow/geoarrow-js';

import {GEOARROW_EXTENSIONS} from '@kepler.gl/constants';

import {
assignAccessor,
expandArrayToCoords,
Expand All @@ -26,7 +29,6 @@ import {
} from '../utils/utils';
import {GeoArrowExtraPickingProps, computeChunkOffsets, getPickingInfo} from '../utils/picking';
import {ColorAccessor, FloatAccessor, GeoArrowPickingInfo, ExtensionProps} from '../types';
import {EXTENSION_NAME} from '../constants';
import {validateAccessors} from '../utils/validate';

/** All properties supported by GeoArrowTextLayer */
Expand Down Expand Up @@ -167,7 +169,7 @@ export class GeoArrowTextLayer<ExtraProps extends object = object> extends Compo

throw new Error('getPosition should pass in an arrow Vector of Point type');
} else {
const pointVector = getGeometryVector(table, EXTENSION_NAME.POINT);
const pointVector = getGeometryVector(table, GEOARROW_EXTENSIONS.POINT);
if (pointVector !== null) {
return this._renderLayersPoint(pointVector);
}
Expand Down
19 changes: 12 additions & 7 deletions src/duckdb/src/components/preview-data-panel.tsx
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: MIT
// Copyright contributors to the kepler.gl project

import * as arrow from 'apache-arrow';
import React, {useCallback, useMemo, useState, CSSProperties} from 'react';
import {withTheme} from 'styled-components';

import {DataTable, renderedSize} from '@kepler.gl/components';
import {parseFieldValue, createDataContainer} from '@kepler.gl/utils';
import {arrowSchemaToFields} from '@kepler.gl/processors';
import {DataForm} from '@kepler.gl/utils';
import {withTheme} from 'styled-components';
import {parseFieldValue, createDataContainer, DataForm} from '@kepler.gl/utils';

type BaseComponentProps = {
className?: string;
Expand Down Expand Up @@ -39,14 +39,19 @@ export type DataTableStyle = {
optionsButton?: number;
};

export type QueryResult = {
table: arrow.Table;
duckDbTypesMap: Record<string, string>;
};

export type PreviewDataPanelProps = BaseComponentProps & {
result: any;
result: QueryResult;
rowsToCalculatePreview?: number;
theme?: any;
setColumnDisplayFormat?: (formats: {[key: string]: string}) => void;
defaultPinnedColumns?: string[];
dataTableStyle: DataTableStyle;
onAddResultToMap: (result: any) => void;
onAddResultToMap: (result: QueryResult) => void;
};

const PreviewDataPanelWOTheme: React.FC<PreviewDataPanelProps> = ({
Expand All @@ -57,9 +62,9 @@ const PreviewDataPanelWOTheme: React.FC<PreviewDataPanelProps> = ({
theme
}) => {
const [pinnedColumns, setPinnedColumns] = useState<string[]>(defaultPinnedColumns);
const fields = useMemo(() => arrowSchemaToFields(result.schema), [result.schema]);
const fields = useMemo(() => arrowSchemaToFields(result.table, result.duckDbTypesMap), [result]);
const dataContainer = useMemo(() => {
const cols = [...Array(result.numCols).keys()].map(i => result.getChildAt(i));
const cols = [...Array(result.table.numCols).keys()].map(i => result.table.getChildAt(i));

const dataContainer = createDataContainer(cols, {
fields,
Expand Down
52 changes: 37 additions & 15 deletions src/duckdb/src/components/sql-panel.tsx
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
// SPDX-License-Identifier: MIT
// Copyright contributors to the kepler.gl project

import * as arrow from 'apache-arrow';
import React, {useCallback, useState, useEffect} from 'react';
import {useDispatch} from 'react-redux';
import styled from 'styled-components';
import {Panel, PanelGroup, PanelResizeHandle} from 'react-resizable-panels';
import MonacoEditor from './monaco-editor';
import {SchemaPanel} from './schema-panel';
import {PreviewDataPanel} from './preview-data-panel';
import {getDuckDB} from '../init';
import {Button, IconButton, Tooltip} from '@kepler.gl/components';
import {generateHashId} from '@kepler.gl/common-utils';

import {addDataToMap} from '@kepler.gl/actions';
import {Icons, LoadingSpinner} from '@kepler.gl/components';
import {generateHashId} from '@kepler.gl/common-utils';
import {Button, IconButton, Icons, LoadingSpinner, Tooltip} from '@kepler.gl/components';

import {arrowSchemaToFields} from '@kepler.gl/processors';
import {sidePanelBg, panelBorderColor} from '@kepler.gl/styles';
import {isAppleDevice} from '@kepler.gl/utils';
import {arrowSchemaToFields} from '@kepler.gl/processors';

import MonacoEditor from './monaco-editor';
import {SchemaPanel} from './schema-panel';
import {PreviewDataPanel, QueryResult} from './preview-data-panel';
import {getDuckDB} from '../init';
import {
constructST_asWKBQuery,
getDuckDBColumnTypes,
getDuckDBColumnTypesMap,
getGeometryColumns,
setGeoArrowWKBExtension
} from '../table/duckdb-table-utils';

const StyledSqlPanel = styled.div`
display: flex;
Expand Down Expand Up @@ -125,7 +133,7 @@ export const SqlPanel: React.FC<SqlPanelProps> = ({initialSql = ''}) => {
const params = new URLSearchParams(window.location.search);
return params.get('sql') || initialSql;
});
const [result, setResult] = useState<null | arrow.Table>(null);
const [result, setResult] = useState<null | QueryResult>(null);
const [error, setError] = useState<Error | null>(null);
const [counter, setCounter] = useState(0);
const [tableSchema, setTableSchema] = useState([]);
Expand Down Expand Up @@ -154,8 +162,22 @@ export const SqlPanel: React.FC<SqlPanelProps> = ({initialSql = ''}) => {
const db = await getDuckDB();
const connection = await db.connect();

const arrowResult = await connection.query(sql);
setResult(arrowResult);
// FIND a cheap way to get DuckDb types with a single query - temp table? cte?
const tableName = 'temp_keplergl_table';

await connection.query(`CREATE OR REPLACE TABLE '${tableName}' AS ${sql}`);

const duckDbColumns = await getDuckDBColumnTypes(connection, tableName);
const columnsToConvertToWKB = getGeometryColumns(duckDbColumns);
const adjustedQuery = constructST_asWKBQuery(tableName, columnsToConvertToWKB);
const arrowResult = await connection.query(adjustedQuery);
setGeoArrowWKBExtension(arrowResult, duckDbColumns);

await connection.query(`DROP TABLE ${tableName};`);

const duckDbTypesMap = getDuckDBColumnTypesMap(duckDbColumns);

setResult({table: arrowResult, duckDbTypesMap});
setError(null);

connection.close();
Expand All @@ -176,13 +198,13 @@ export const SqlPanel: React.FC<SqlPanelProps> = ({initialSql = ''}) => {
const onAddResultToMap = useCallback(() => {
if (!result) return;

const keplerFields = arrowSchemaToFields(result.schema);
const keplerFields = arrowSchemaToFields(result.table, result.duckDbTypesMap);

const datasetToAdd = {
data: {
fields: keplerFields,
// TODO type AddDataToMapPayload -> rows -> + arrow.Table
rows: result as any
rows: result.table as any
},
info: {
id: generateHashId(),
Expand Down Expand Up @@ -250,7 +272,7 @@ export const SqlPanel: React.FC<SqlPanelProps> = ({initialSql = ''}) => {
<Button secondary onClick={onAddResultToMap}>
Add to Map
</Button>
<div>{result.numRows} rows</div>
<div>{result.table.numRows} rows</div>
</StyledResultActions>
<PreviewDataPanel
result={result}
Expand Down
72 changes: 71 additions & 1 deletion src/duckdb/src/table/duckdb-table-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,79 @@

// Copied from loaders.gl/geoarrow

// TODO: Remove once Kepler.gl is upgraded to loaders.gl 4.4+
// TODO: Remove isGeoArrow* once Kepler.gl is upgraded to loaders.gl 4.4+

import * as arrow from 'apache-arrow';
import {DataType} from 'apache-arrow/type';
import {AsyncDuckDBConnection} from '@duckdb/duckdb-wasm';

import {GEOARROW_EXTENSIONS} from '@kepler.gl/constants';

export type DuckDbColumnDesc = {name: string; type: string};

/**
* Queries a table for description.
* @param connection An active DuckDB connection.
* @param tableName A name of DuckDB table to query.
* @returns An array of column names and DuckDB types.
*/
export async function getDuckDBColumnTypes(
connection: AsyncDuckDBConnection,
tableName: string
): Promise<DuckDbColumnDesc[]> {
const resDescribe = await connection.query(`DESCRIBE "${tableName}";`);

const duckDbTypes: DuckDbColumnDesc[] = [];
const numRows = resDescribe.numRows;
for (let i = 0; i < numRows; ++i) {
const columnName = resDescribe.getChildAt(0)?.get(i);
const columnType = resDescribe.getChildAt(1)?.get(i);

duckDbTypes.push({
name: columnName,
type: columnType
});
}

return duckDbTypes;
}

export function getDuckDBColumnTypesMap(columns: DuckDbColumnDesc[]) {
return columns.reduce((acc, value) => {
acc[value.name] = value.type;
return acc;
}, {} as Record<string, string>);
}

export function constructST_asWKBQuery(tableName: string, columnsToConvertToWKB: string[]): string {
// ST_AsWKB for GEOMETRY columns
const exclude =
columnsToConvertToWKB.length > 0 ? `EXCLUDE ${columnsToConvertToWKB.join(', ')}` : '';
const asWKB =
columnsToConvertToWKB.length > 0
? ', ' + columnsToConvertToWKB.map(column => `ST_AsWKB(${column}) as ${column}`).join(', ')
: '';
return `SELECT * ${exclude} ${asWKB} FROM '${tableName}';`;
}

export function getGeometryColumns(columns: DuckDbColumnDesc[]): string[] {
const geometryColumns: string[] = [];
columns.forEach(f => {
if (f.type === 'GEOMETRY') {
geometryColumns.push(f.name);
}
});
return geometryColumns;
}

export function setGeoArrowWKBExtension(table: arrow.Table, columns: DuckDbColumnDesc[]) {
table.schema.fields.forEach(field => {
const info = columns.find(t => t.name === field.name);
if (info?.type === 'GEOMETRY') {
field.metadata.set('ARROW:extension:name', GEOARROW_EXTENSIONS.WKB);
}
});
}

/** Checks whether the given Apache Arrow JS type is a Point data type */
export function isGeoArrowPoint(type: DataType) {
Expand Down
Loading

0 comments on commit e0322a1

Please sign in to comment.