From 22255fa5dc75d5747eaf1c5be7953ae92fe1a668 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Sat, 26 Aug 2023 13:28:38 +0200 Subject: [PATCH 1/2] Apply @Tishj's arrow_conversion_refactor.patch --- .../src/spatial/gdal/functions/st_read.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/spatial/src/spatial/gdal/functions/st_read.cpp b/spatial/src/spatial/gdal/functions/st_read.cpp index afac84e3..76a5c753 100644 --- a/spatial/src/spatial/gdal/functions/st_read.cpp +++ b/spatial/src/spatial/gdal/functions/st_read.cpp @@ -94,12 +94,12 @@ struct GdalScanFunctionData : public TableFunctionData { vector layer_creation_options; unique_ptr spatial_filter; GDALDatasetUniquePtr dataset; - unordered_map> arrow_convert_data; idx_t max_threads; // before they are renamed vector all_names; vector all_types; atomic lines_read; + ArrowTableType arrow_table; }; struct GdalScanLocalState : ArrowScanLocalState { @@ -351,16 +351,19 @@ unique_ptr GdalTableFunction::Bind(ClientContext &context, TableFu ':', 'e', 'x', 't', 'e', 'n', 's', 'i', 'o', 'n', ':', 'n', 'a', 'm', 'e', '\a', '\0', '\0', '\0', 'o', 'g', 'c', '.', 'w', 'k', 'b'}; + auto arrow_type = GetArrowLogicalType(attribute); if (attribute.metadata != nullptr && strncmp(attribute.metadata, ogc_flag, sizeof(ogc_flag)) == 0) { // This is a WKB geometry blob - GetArrowLogicalType(attribute, result->arrow_convert_data, col_idx); + result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); return_types.emplace_back(core::GeoTypes::WKB_BLOB()); } else if (attribute.dictionary) { - result->arrow_convert_data[col_idx] = - make_uniq(GetArrowLogicalType(attribute, result->arrow_convert_data, col_idx)); - return_types.emplace_back(GetArrowLogicalType(*attribute.dictionary, result->arrow_convert_data, col_idx)); + auto dictionary_type = GetArrowLogicalType(attribute); + return_types.emplace_back(dictionary_type->GetDuckType()); + arrow_type->SetDictionary(std::move(dictionary_type)); + result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); } else { - return_types.emplace_back(GetArrowLogicalType(attribute, result->arrow_convert_data, col_idx)); + return_types.emplace_back(arrow_type->GetDuckType()); + result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); } // keep these around for projection/filter pushdown later @@ -492,12 +495,12 @@ void GdalTableFunction::Scan(ClientContext &context, TableFunctionInput &input, if (global_state.CanRemoveFilterColumns()) { state.all_columns.Reset(); state.all_columns.SetCardinality(output_size); - ArrowToDuckDB(state, data.arrow_convert_data, state.all_columns, data.lines_read - output_size, false); + ArrowToDuckDB(state, data.arrow_table.GetColumns(), state.all_columns, data.lines_read - output_size, false); output.ReferenceColumns(state.all_columns, global_state.projection_ids); } else { output.SetCardinality(output_size); - ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size, false); + ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, data.lines_read - output_size, false); } output.Verify(); From 0b8fb3ecf0dd112cdcae8755c4df9bcb1f5197a7 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Sat, 26 Aug 2023 13:29:51 +0200 Subject: [PATCH 2/2] Update to ducdkb/duckdb@ade3443f --- duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb b/duckdb index a532702b..ade3443f 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit a532702b9a50df8f21c56ed48e6cf0914cf0a1e1 +Subproject commit ade3443f4a8003fd726a74e5fbe2c176e85d7c9a