From b7c50ded2d1c7ebda219c0581d0fe297ccccbadd Mon Sep 17 00:00:00 2001 From: Giorgio Basile <4904905+giorgiobasile@users.noreply.github.com> Date: Mon, 27 May 2024 15:41:09 +0200 Subject: [PATCH] ENH: add from_wkt and from_wkb functions --- dask_geopandas/__init__.py | 6 ++++ dask_geopandas/core.py | 54 +++++++++++++++++++++++++++++++ dask_geopandas/expr.py | 54 +++++++++++++++++++++++++++++++ dask_geopandas/tests/test_core.py | 26 +++++++++++++++ 4 files changed, 140 insertions(+) diff --git a/dask_geopandas/__init__.py b/dask_geopandas/__init__.py index f9c21464..ec8867eb 100644 --- a/dask_geopandas/__init__.py +++ b/dask_geopandas/__init__.py @@ -5,6 +5,8 @@ if backends.QUERY_PLANNING_ON: from .expr import ( points_from_xy, + from_wkt, + from_wkb, GeoDataFrame, GeoSeries, from_geopandas, @@ -13,6 +15,8 @@ else: from .core import ( points_from_xy, + from_wkt, + from_wkb, GeoDataFrame, GeoSeries, from_geopandas, @@ -30,6 +34,8 @@ __all__ = [ "points_from_xy", + "from_wkt", + "from_wkb", "GeoDataFrame", "GeoSeries", "from_geopandas", diff --git a/dask_geopandas/core.py b/dask_geopandas/core.py index c8358025..fd572197 100644 --- a/dask_geopandas/core.py +++ b/dask_geopandas/core.py @@ -884,6 +884,60 @@ def func(data, x, y, z): ) +def from_wkt(df, wkt="wkt", crs=None): + """ + Convert dask.dataframe of WKT objects to a GeoSeries. + + Parameters + ---------- + df : dask DataFrame + A dask DataFrame containing WKT objects. + wkt : str + The name of the column in `df` containing the WKT objects. + crs: value, optional + Coordinate Reference System of the geometry objects. Can be anything + accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + Returns + ------- + GeoSeries + """ + + def func(data, wkt): + return geopandas.GeoSeries.from_wkt(data[wkt], index=data.index, crs=crs) + + return df.map_partitions(func, wkt, meta=geopandas.GeoSeries(), token="from_wkt") + + +def from_wkb(df, wkb="wkb", crs=None): + """ + Convert dask.dataframe of WKB objects to a GeoSeries. + + Parameters + ---------- + df : dask DataFrame + A dask DataFrame containing WKB objects. + wkb : str + The name of the column in `df` containing the WKB objects. + crs: value, optional + Coordinate Reference System of the geometry objects. Can be anything + accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + Returns + ------- + GeoSeries + """ + + def func(data, wkb): + return geopandas.GeoSeries.from_wkb(data[wkb], index=data.index, crs=crs) + + return df.map_partitions(func, wkb, meta=geopandas.GeoSeries(), token="from_wkb") + + for name in [ "area", "geom_type", diff --git a/dask_geopandas/expr.py b/dask_geopandas/expr.py index 5d22728a..4ee07284 100644 --- a/dask_geopandas/expr.py +++ b/dask_geopandas/expr.py @@ -916,6 +916,60 @@ def func(data, x, y, z): ) +def from_wkt(df, wkt="wkt", crs=None): + """ + Convert dask.dataframe of WKT objects to a GeoSeries. + + Parameters + ---------- + df : dask DataFrame + A dask DataFrame containing WKT objects. + wkt : str + The name of the column in `df` containing the WKT objects. + crs: value, optional + Coordinate Reference System of the geometry objects. Can be anything + accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + Returns + ------- + GeoSeries + """ + + def func(data, wkt): + return geopandas.GeoSeries.from_wkt(data[wkt], index=data.index, crs=crs) + + return df.map_partitions(func, wkt, meta=geopandas.GeoSeries(), token="from_wkt") + + +def from_wkb(df, wkb="wkb", crs=None): + """ + Convert dask.dataframe of WKB objects to a GeoSeries. + + Parameters + ---------- + df : dask DataFrame + A dask DataFrame containing WKB objects. + wkb : str + The name of the column in `df` containing the WKB objects. + crs: value, optional + Coordinate Reference System of the geometry objects. Can be anything + accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + Returns + ------- + GeoSeries + """ + + def func(data, wkb): + return geopandas.GeoSeries.from_wkb(data[wkb], index=data.index, crs=crs) + + return df.map_partitions(func, wkb, meta=geopandas.GeoSeries(), token="from_wkb") + + for name in [ "area", "geom_type", diff --git a/dask_geopandas/tests/test_core.py b/dask_geopandas/tests/test_core.py index 4220460f..e4b44263 100644 --- a/dask_geopandas/tests/test_core.py +++ b/dask_geopandas/tests/test_core.py @@ -177,6 +177,32 @@ def test_points_from_xy_with_crs(): assert_geoseries_equal(actual.compute(), expected) +def test_from_wkt(): + wkt = [ + "POLYGON ((-64.8 32.3, -65.5 18.3, -80.3 25.2, -64.8 32.3))", + "POLYGON ((-81.079102 35.496456, -81.166992 31.914868, -75.541992 31.914868, -75.629883 35.675147, -81.079102 35.496456))", # noqa E501 + ] + expected = geopandas.GeoSeries.from_wkt(wkt, crs="EPSG:4326") + df = pd.DataFrame({"wkt": wkt}) + ddf = dd.from_pandas(df, npartitions=2) + actual = dask_geopandas.from_wkt(ddf, "wkt", crs="EPSG:4326") + assert isinstance(actual, dask_geopandas.GeoSeries) + assert_geoseries_equal(actual.compute(), expected) + + +def test_from_wkb(): + wkb = [ + "0103000000010000000400000033333333333350c0666666666626404000000000006050c0cdcccccccc4c324033333333331354c0333333333333394033333333333350c06666666666264040", # noqa E501 + "0103000000010000000500000016c3d501104554c095f3c5de8bbf414064ac36ffaf4a54c02c280cca34ea3f4064ac36ffafe252c02c280cca34ea3f409c53c90050e852c00b7f86376bd6414016c3d501104554c095f3c5de8bbf4140", # noqa E501 + ] + expected = geopandas.GeoSeries.from_wkb(wkb, crs="EPSG:4326") + df = pd.DataFrame({"wkb": wkb}) + ddf = dd.from_pandas(df, npartitions=2) + actual = dask_geopandas.from_wkb(ddf, "wkb", crs="EPSG:4326") + assert isinstance(actual, dask_geopandas.GeoSeries) + assert_geoseries_equal(actual.compute(), expected) + + def test_geodataframe_crs(geodf_points_crs): df = geodf_points_crs original = df.crs