From 6aa054cbfd934bdd3827ba41b3209b9c86225515 Mon Sep 17 00:00:00 2001 From: Giorgio Basile <4904905+giorgiobasile@users.noreply.github.com> Date: Mon, 27 May 2024 15:41:09 +0200 Subject: [PATCH 1/2] ENH: add from_wkt and from_wkb tools --- dask_geopandas/__init__.py | 6 ++++ dask_geopandas/core.py | 50 +++++++++++++++++++++++++++++ dask_geopandas/expr.py | 50 +++++++++++++++++++++++++++++ dask_geopandas/tests/test_core.py | 26 +++++++++++++++ doc/source/docs/reference/tools.rst | 2 ++ 5 files changed, 134 insertions(+) diff --git a/dask_geopandas/__init__.py b/dask_geopandas/__init__.py index f9c21464..ec8867eb 100644 --- a/dask_geopandas/__init__.py +++ b/dask_geopandas/__init__.py @@ -5,6 +5,8 @@ if backends.QUERY_PLANNING_ON: from .expr import ( points_from_xy, + from_wkt, + from_wkb, GeoDataFrame, GeoSeries, from_geopandas, @@ -13,6 +15,8 @@ else: from .core import ( points_from_xy, + from_wkt, + from_wkb, GeoDataFrame, GeoSeries, from_geopandas, @@ -30,6 +34,8 @@ __all__ = [ "points_from_xy", + "from_wkt", + "from_wkb", "GeoDataFrame", "GeoSeries", "from_geopandas", diff --git a/dask_geopandas/core.py b/dask_geopandas/core.py index c8358025..b7619e6c 100644 --- a/dask_geopandas/core.py +++ b/dask_geopandas/core.py @@ -884,6 +884,56 @@ def func(data, x, y, z): ) +def from_wkt(wkt, crs=None): + """ + Convert dask.dataframe.Series of WKT objects to a GeoSeries. + + Parameters + ---------- + wkt: dask Series + A dask Series containing WKT objects. + crs: value, optional + Coordinate Reference System of the geometry objects. Can be anything + accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + Returns + ------- + GeoSeries + """ + + def func(data): + return geopandas.GeoSeries.from_wkt(data, index=data.index, crs=crs) + + return wkt.map_partitions(func, meta=geopandas.GeoSeries(), token="from_wkt") + + +def from_wkb(wkb, crs=None): + """ + Convert dask.dataframe.Series of WKB objects to a GeoSeries. + + Parameters + ---------- + wkb: dask Series + A dask Series containing WKB objects. + crs: value, optional + Coordinate Reference System of the geometry objects. Can be anything + accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + Returns + ------- + GeoSeries + """ + + def func(data): + return geopandas.GeoSeries.from_wkb(data, index=data.index, crs=crs) + + return wkb.map_partitions(func, meta=geopandas.GeoSeries(), token="from_wkb") + + for name in [ "area", "geom_type", diff --git a/dask_geopandas/expr.py b/dask_geopandas/expr.py index 5d22728a..aaad9840 100644 --- a/dask_geopandas/expr.py +++ b/dask_geopandas/expr.py @@ -916,6 +916,56 @@ def func(data, x, y, z): ) +def from_wkt(wkt, crs=None): + """ + Convert dask.dataframe.Series of WKT objects to a GeoSeries. + + Parameters + ---------- + wkt: dask Series + A dask Series containing WKT objects. + crs: value, optional + Coordinate Reference System of the geometry objects. Can be anything + accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + Returns + ------- + GeoSeries + """ + + def func(data): + return geopandas.GeoSeries.from_wkt(data, index=data.index, crs=crs) + + return wkt.map_partitions(func, meta=geopandas.GeoSeries(), token="from_wkt") + + +def from_wkb(wkb, crs=None): + """ + Convert dask.dataframe.Series of WKB objects to a GeoSeries. + + Parameters + ---------- + wkb: dask Series + A dask Series containing WKB objects. + crs: value, optional + Coordinate Reference System of the geometry objects. Can be anything + accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + Returns + ------- + GeoSeries + """ + + def func(data): + return geopandas.GeoSeries.from_wkb(data, index=data.index, crs=crs) + + return wkb.map_partitions(func, meta=geopandas.GeoSeries(), token="from_wkb") + + for name in [ "area", "geom_type", diff --git a/dask_geopandas/tests/test_core.py b/dask_geopandas/tests/test_core.py index 4220460f..dffd6de9 100644 --- a/dask_geopandas/tests/test_core.py +++ b/dask_geopandas/tests/test_core.py @@ -177,6 +177,32 @@ def test_points_from_xy_with_crs(): assert_geoseries_equal(actual.compute(), expected) +def test_from_wkt(): + wkt = [ + "POLYGON ((-64.8 32.3, -65.5 18.3, -80.3 25.2, -64.8 32.3))", + "POLYGON ((-81.079102 35.496456, -81.166992 31.914868, -75.541992 31.914868, -75.629883 35.675147, -81.079102 35.496456))", # noqa E501 + ] + expected = geopandas.GeoSeries.from_wkt(wkt, crs="EPSG:4326") + df = pd.DataFrame({"wkt": wkt}) + ddf = dd.from_pandas(df, npartitions=2) + actual = dask_geopandas.from_wkt(ddf["wkt"], crs="EPSG:4326") + assert isinstance(actual, dask_geopandas.GeoSeries) + assert_geoseries_equal(actual.compute(), expected) + + +def test_from_wkb(): + wkb = [ + "0103000000010000000400000033333333333350c0666666666626404000000000006050c0cdcccccccc4c324033333333331354c0333333333333394033333333333350c06666666666264040", # noqa E501 + "0103000000010000000500000016c3d501104554c095f3c5de8bbf414064ac36ffaf4a54c02c280cca34ea3f4064ac36ffafe252c02c280cca34ea3f409c53c90050e852c00b7f86376bd6414016c3d501104554c095f3c5de8bbf4140", # noqa E501 + ] + expected = geopandas.GeoSeries.from_wkb(wkb, crs="EPSG:4326") + df = pd.DataFrame({"wkb": wkb}) + ddf = dd.from_pandas(df, npartitions=2) + actual = dask_geopandas.from_wkb(ddf["wkb"], crs="EPSG:4326") + assert isinstance(actual, dask_geopandas.GeoSeries) + assert_geoseries_equal(actual.compute(), expected) + + def test_geodataframe_crs(geodf_points_crs): df = geodf_points_crs original = df.crs diff --git a/doc/source/docs/reference/tools.rst b/doc/source/docs/reference/tools.rst index e0b6ed05..5296a606 100644 --- a/doc/source/docs/reference/tools.rst +++ b/doc/source/docs/reference/tools.rst @@ -9,3 +9,5 @@ Tools sjoin clip points_from_xy + from_wkt + from_wkb \ No newline at end of file From 1360d00c0df15a22850553d2e81a2406035deaf3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 24 Jun 2024 12:35:42 +0200 Subject: [PATCH 2/2] linting --- dask_geopandas/tests/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dask_geopandas/tests/test_core.py b/dask_geopandas/tests/test_core.py index 9dfb2c30..f5f089c2 100644 --- a/dask_geopandas/tests/test_core.py +++ b/dask_geopandas/tests/test_core.py @@ -198,8 +198,8 @@ def test_from_wkt(): def test_from_wkb(): wkb = [ - "0103000000010000000400000033333333333350c0666666666626404000000000006050c0cdcccccccc4c324033333333331354c0333333333333394033333333333350c06666666666264040", # noqa E501 - "0103000000010000000500000016c3d501104554c095f3c5de8bbf414064ac36ffaf4a54c02c280cca34ea3f4064ac36ffafe252c02c280cca34ea3f409c53c90050e852c00b7f86376bd6414016c3d501104554c095f3c5de8bbf4140", # noqa E501 + "0103000000010000000400000033333333333350c0666666666626404000000000006050c0cdcccccccc4c324033333333331354c0333333333333394033333333333350c06666666666264040", + "0103000000010000000500000016c3d501104554c095f3c5de8bbf414064ac36ffaf4a54c02c280cca34ea3f4064ac36ffafe252c02c280cca34ea3f409c53c90050e852c00b7f86376bd6414016c3d501104554c095f3c5de8bbf4140", ] expected = geopandas.GeoSeries.from_wkb(wkb, crs="EPSG:4326") df = pd.DataFrame({"wkb": wkb})