Skip to content

Commit

Permalink
ADBC: add spatial support for DuckDB databases and GeoParquet
Browse files Browse the repository at this point in the history
  • Loading branch information
rouault committed Dec 7, 2024
1 parent 04b227e commit 1f78ff8
Show file tree
Hide file tree
Showing 5 changed files with 955 additions and 42 deletions.
78 changes: 72 additions & 6 deletions autotest/ogr/ogr_adbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,14 +246,58 @@ def test_ogr_adbc_duckdb_parquet_with_spatial():
allowed_drivers=["ADBC"],
open_options=[
"PRELUDE_STATEMENTS=INSTALL spatial",
"PRELUDE_STATEMENTS=LOAD spatial",
],
) as ds:
with ds.ExecuteSQL(
"SELECT ST_AsText(geometry) FROM read_parquet('data/parquet/poly.parquet')"
) as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f.GetField(0).startswith("POLYGON")
lyr = ds.GetLayer(0)
assert lyr.GetGeomType() == ogr.wkbPolygon
assert lyr.TestCapability(ogr.OLCFastGetExtent)
minx, maxx, miny, maxy = lyr.GetExtent()
assert (minx, maxx, miny, maxy) == (
478315.53125,
481645.3125,
4762880.5,
4765610.5,
)
assert lyr.GetExtent3D() == (
478315.53125,
481645.3125,
4762880.5,
4765610.5,
float("inf"),
float("-inf"),
)
assert lyr.GetSpatialRef().GetAuthorityCode(None) == "27700"
f = lyr.GetNextFeature()
assert f.GetGeometryRef().ExportToWkt().startswith("POLYGON ((")

assert lyr.GetFeatureCount() == 10
lyr.SetAttributeFilter("false")

assert lyr.GetFeatureCount() == 0
lyr.SetAttributeFilter("true")

lyr.SetAttributeFilter(None)
assert lyr.GetFeatureCount() == 10
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
assert lyr.GetFeatureCount() == 10
lyr.SetSpatialFilterRect(minx, miny, minx, maxy)
assert lyr.GetFeatureCount() < 10
lyr.SetSpatialFilterRect(maxx, miny, maxx, maxy)
assert lyr.GetFeatureCount() < 10
lyr.SetSpatialFilterRect(minx, miny, maxx, miny)
assert lyr.GetFeatureCount() < 10
lyr.SetSpatialFilterRect(minx, maxy, maxx, maxy)
assert lyr.GetFeatureCount() < 10

lyr.SetAttributeFilter("true")
lyr.SetSpatialFilter(None)
assert lyr.GetFeatureCount() == 10
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
assert lyr.GetFeatureCount() == 10

lyr.SetAttributeFilter("false")
lyr.SetSpatialFilterRect(minx, miny, maxx, maxy)
assert lyr.GetFeatureCount() == 0


###############################################################################
Expand Down Expand Up @@ -325,6 +369,28 @@ def test_ogr_adbc_test_ogrsf_parquet_filename_with_glob():
assert "ERROR" not in ret


###############################################################################
# Run test_ogrsf on a GeoParquet file


def test_ogr_adbc_test_ogrsf_geoparquet():

if not _has_libduckdb():
pytest.skip("libduckdb.so missing")

import test_cli_utilities

if test_cli_utilities.get_test_ogrsf_path() is None:
pytest.skip()

ret = gdaltest.runexternal(
test_cli_utilities.get_test_ogrsf_path() + " -ro ADBC:data/parquet/poly.parquet"
)

assert "INFO" in ret
assert "ERROR" not in ret


###############################################################################
# Test DATETIME_AS_STRING=YES GetArrowStream() option

Expand Down
14 changes: 13 additions & 1 deletion doc/source/drivers/vector/adbc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ Consult the `installation instruction <https://arrow.apache.org/adbc/current/dri
for the various ADBC drivers. At time of writing, there are drivers for
SQLite3, PostgreSQL, Snowflake, BigQuery, DuckDB, Flight SQL, etc.

The driver is read-only, and there is no support for spatial data currently.
The driver is read-only.

There is spatial support when the underlying ADBC driver is DuckDB, for
native spatil DuckDB databases and GeoParquet datasets, and when the spatial
extension is installed.

Connection string
-----------------
Expand Down Expand Up @@ -106,6 +110,14 @@ GDAL ADBC driver as a way of locating and loading the ADBC driver if GDAL was
not built with ADBC Driver Manager support or if an embedding application has
an updated or augmented collection of drivers available.

Filtering
---------

Attribute filters are passed to the underlying ADBC engine.

Spatial filters are passed to DuckDB when it is the underlying ADBC engine
and for DuckDB spatial databases and GeoParquet datasets.

Examples
--------

Expand Down
49 changes: 47 additions & 2 deletions ogr/ogrsf_frmts/adbc/ogr_adbc.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ class OGRADBCLayer final : public OGRLayer,
friend class OGRADBCDataset;

OGRADBCDataset *m_poDS = nullptr;
const std::string m_osBaseStatement{}; // as provided by user
std::string m_osModifiedBaseStatement{}; // above tuned to use ST_AsWKB()
std::string m_osModifiedSelect{}; // SELECT part of above
std::string m_osAttributeFilter{};
std::unique_ptr<AdbcStatement> m_statement{};
std::unique_ptr<OGRArrowArrayToOGRFeatureAdapterLayer> m_poAdapterLayer{};
std::unique_ptr<OGRArrowArrayStream> m_stream{};
Expand All @@ -103,17 +107,36 @@ class OGRADBCLayer final : public OGRLayer,
GIntBig m_nFeatureID = 0;
bool m_bIsParquetLayer = false;

//! Describe the bbox column of a geometry column
struct GeomColBBOX
{
std::string osXMin{}; // empty if no bbox column
std::string osYMin{};
std::string osXMax{};
std::string osYMax{};
};

std::vector<GeomColBBOX>
m_geomColBBOX{}; // same size as GetGeomFieldCount()
std::vector<OGREnvelope3D> m_extents{}; // same size as GetGeomFieldCount()

OGRFeature *GetNextRawFeature();
bool GetArrowStreamInternal(struct ArrowArrayStream *out_stream);
GIntBig GetFeatureCountParquet();

void BuildLayerDefn(bool bInternalUse);
bool ReplaceStatement(const char *pszNewStatement);
bool UpdateStatement();
std::string GetCurrentStatement() const;

CPL_DISALLOW_COPY_ASSIGN(OGRADBCLayer)

public:
OGRADBCLayer(OGRADBCDataset *poDS, const char *pszName,
const char *pszStatement,
std::unique_ptr<AdbcStatement> poStatement,
std::unique_ptr<OGRArrowArrayStream> poStream,
ArrowSchema *schema);
ArrowSchema *schema, bool bInternalUse);
~OGRADBCLayer() override;

OGRFeatureDefn *GetLayerDefn() override
Expand All @@ -128,6 +151,20 @@ class OGRADBCLayer final : public OGRLayer,
bool GetArrowStream(struct ArrowArrayStream *out_stream,
CSLConstList papszOptions = nullptr) override;
GIntBig GetFeatureCount(int bForce) override;

void SetSpatialFilter(OGRGeometry *poGeom) override
{
SetSpatialFilter(0, poGeom);
}

OGRErr SetAttributeFilter(const char *pszFilter) override;
void SetSpatialFilter(int iGeomField, OGRGeometry *poGeom) override;

OGRErr GetExtent(OGREnvelope *psExtent, int bForce = TRUE) override;
OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent,
int bForce = TRUE) override;
OGRErr GetExtent3D(int iGeomField, OGREnvelope3D *psExtent,
int bForce = TRUE) override;
};

/************************************************************************/
Expand All @@ -143,6 +180,8 @@ class OGRADBCDataset final : public GDALDataset
std::unique_ptr<AdbcConnection> m_connection{};
std::vector<std::unique_ptr<OGRLayer>> m_apoLayers{};
std::string m_osParquetFilename{};
bool m_bIsDuckDB = false;
bool m_bSpatialLoaded = false;

public:
OGRADBCDataset() = default;
Expand All @@ -164,7 +203,13 @@ class OGRADBCDataset final : public GDALDataset
OGRLayer *GetLayerByName(const char *pszName) override;

std::unique_ptr<OGRADBCLayer> CreateLayer(const char *pszStatement,
const char *pszLayerName);
const char *pszLayerName,
bool bInternalUse);

std::unique_ptr<OGRADBCLayer> CreateInternalLayer(const char *pszStatement)
{
return CreateLayer(pszStatement, "temp", true);
}

OGRLayer *ExecuteSQL(const char *pszStatement, OGRGeometry *poSpatialFilter,
const char *pszDialect) override;
Expand Down
Loading

0 comments on commit 1f78ff8

Please sign in to comment.