From 052e0c4f51c752cf158d29c9d252a5b943a8f6fa Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 21 Sep 2022 10:49:38 -0500 Subject: [PATCH 1/7] Get points and multipoints working. --- python/cuspatial/cuspatial/core/geoseries.py | 62 ++++++++++++++++--- .../cuspatial/tests/test_geoseries.py | 30 +++++++++ 2 files changed, 85 insertions(+), 7 deletions(-) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index fe4fceb67..0e84bbba9 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -118,28 +118,66 @@ def type(self): return result class GeoColumnAccessor: - def __init__(self, list_series): + def __init__(self, list_series, meta): self._series = list_series self._col = self._series._column + self._meta = meta + self._type = Feature_Enum.POINT + """ Frozen as a working version of points and multipoints + Linestrings and Polygons need another level of indirection @property def x(self): - return cudf.Series(self._col.leaves().values[0::2]) + types = self._meta.input_types + offsets = self._meta.union_offsets + indices = offsets[types == self._type.value] + result = self._col.take(indices._column).leaves().values + return cudf.Series(result[::2]) @property def y(self): - return cudf.Series(self._col.leaves().values[1::2]) + types = self._meta.input_types + offsets = self._meta.union_offsets + indices = offsets[types == self._type.value] + result = self._col.take(indices._column).leaves().values + return cudf.Series(result[1::2]) + """ + + @property + def x(self): + types = self._meta.input_types + offsets = self._meta.union_offsets + indices = offsets[types == self._type.value] + result = self._col.take(indices._column).leaves().values + breakpoint() + return cudf.Series(result[::2]) + + @property + def y(self): + types = self._meta.input_types + offsets = self._meta.union_offsets + indices = offsets[types == self._type.value] + result = self._col.take(indices._column).leaves().values + return cudf.Series(result[1::2]) @property def xy(self): return cudf.Series(self._col.leaves().values) class MultiPointGeoColumnAccessor(GeoColumnAccessor): + def __init__(self, list_series, meta): + super().__init__(list_series, meta) + self._type = Feature_Enum.MULTIPOINT + @property def geometry_offset(self): return cudf.Series(self._col.offsets.values) class LineStringGeoColumnAccessor(GeoColumnAccessor): + def __init__(self, list_series, meta): + super().__init__(list_series, meta) + self._type = Feature_Enum.LINESTRING + @property def geometry_offset(self): return cudf.Series(self._col.offsets.values) @@ -149,6 +187,10 @@ def part_offset(self): return cudf.Series(self._col.elements.offsets.values) class PolygonGeoColumnAccessor(GeoColumnAccessor): + def __init__(self, list_series, meta): + super().__init__(list_series, meta) + self._type = Feature_Enum.POLYGON + @property def geometry_offset(self): return cudf.Series(self._col.offsets.values) @@ -166,28 +208,34 @@ def points(self): """ Access the `PointsArray` of the underlying `GeoArrowBuffers`. """ - return self.GeoColumnAccessor(self._column.points) + return self.GeoColumnAccessor(self._column.points, self._column._meta) @property def multipoints(self): """ Access the `MultiPointArray` of the underlying `GeoArrowBuffers`. """ - return self.MultiPointGeoColumnAccessor(self._column.mpoints) + return self.MultiPointGeoColumnAccessor( + self._column.mpoints, self._column._meta + ) @property def lines(self): """ Access the `LineArray` of the underlying `GeoArrowBuffers`. """ - return self.LineStringGeoColumnAccessor(self._column.lines) + return self.LineStringGeoColumnAccessor( + self._column.lines, self._column._meta + ) @property def polygons(self): """ Access the `PolygonArray` of the underlying `GeoArrowBuffers`. """ - return self.PolygonGeoColumnAccessor(self._column.polygons) + return self.PolygonGeoColumnAccessor( + self._column.polygons, self._column._meta + ) def __repr__(self): # TODO: Implement Iloc with slices so that we can use `Series.__repr__` diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index c14c584a8..cd29d6493 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -346,6 +346,36 @@ def test_size(gs, series_slice): assert len(gi) == len(cugs) +def test_geometry_point_slicing(gs): + cugs = cuspatial.from_geopandas(gs) + assert (cugs[:1].points.x == cudf.Series([-1])).all() + assert (cugs[:1].points.y == cudf.Series([0])).all() + assert (cugs[3:].points.x == cudf.Series([9])).all() + assert (cugs[3:].points.y == cudf.Series([10])).all() + assert (cugs[0:4].points.x == cudf.Series([-1, 9])).all() + assert (cugs[0:4].points.y == cudf.Series([0, 10])).all() + + +def test_geometry_multipoint_slicing(gs): + cugs = cuspatial.from_geopandas(gs) + assert (cugs[:2].multipoints.x == cudf.Series([1, 3])).all() + assert (cugs[:2].multipoints.y == cudf.Series([2, 4])).all() + assert (cugs[2:].multipoints.x == cudf.Series([5, 7])).all() + assert (cugs[2:].multipoints.y == cudf.Series([6, 8])).all() + assert (cugs[0:4].multipoints.x == cudf.Series([1, 3, 5, 7])).all() + assert (cugs[0:4].multipoints.y == cudf.Series([2, 4, 6, 8])).all() + + +def test_geometry_linestring_slicing(gs): + cugs = cuspatial.from_geopandas(gs) + assert (cugs[:5].lines.x == cudf.Series([11, 13])).all() + assert (cugs[:5].lines.y == cudf.Series([12, 14])).all() + assert (cugs[5:].lines.x == cudf.Series([31, 33])).all() + assert (cugs[5:].lines.y == cudf.Series([32, 34])).all() + assert (cugs[0:4].lines.x == cudf.Series([11, 13, 31, 33])).all() + assert (cugs[0:4].lines.y == cudf.Series([12, 14, 33, 34])).all() + + def test_loc(gs): index = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"] gs.index = index From 7f032f6891bd7e2ef085d77062d53659a265ff14 Mon Sep 17 00:00:00 2001 From: Thomson Comer Date: Wed, 21 Sep 2022 11:06:43 -0500 Subject: [PATCH 2/7] Pass tests. --- python/cuspatial/cuspatial/core/geoseries.py | 31 ++----- .../cuspatial/tests/test_geoseries.py | 80 ++++++++++++++++++- 2 files changed, 84 insertions(+), 27 deletions(-) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index 0e84bbba9..a9999cf06 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -100,7 +100,9 @@ def __init__( index = data.index if index is None: index = cudf.RangeIndex(0, len(column)) - super().__init__(column, index, dtype, name, nan_as_null) + super().__init__( + column, index, dtype=dtype, name=name, nan_as_null=nan_as_null + ) @property def type(self): @@ -124,8 +126,6 @@ def __init__(self, list_series, meta): self._meta = meta self._type = Feature_Enum.POINT - """ Frozen as a working version of points and multipoints - Linestrings and Polygons need another level of indirection @property def x(self): types = self._meta.input_types @@ -134,24 +134,6 @@ def x(self): result = self._col.take(indices._column).leaves().values return cudf.Series(result[::2]) - @property - def y(self): - types = self._meta.input_types - offsets = self._meta.union_offsets - indices = offsets[types == self._type.value] - result = self._col.take(indices._column).leaves().values - return cudf.Series(result[1::2]) - """ - - @property - def x(self): - types = self._meta.input_types - offsets = self._meta.union_offsets - indices = offsets[types == self._type.value] - result = self._col.take(indices._column).leaves().values - breakpoint() - return cudf.Series(result[::2]) - @property def y(self): types = self._meta.input_types @@ -320,9 +302,11 @@ def __getitem__(self, item): ) if isinstance(item, Integral): - return GeoSeries(column).to_shapely() + return GeoSeries(column, name=self._sr.name).to_shapely() else: - return GeoSeries(column, index=self._sr.index[indexes]) + return GeoSeries( + column, index=self._sr.index[indexes], name=self._sr.name + ) def from_arrow(union): column = GeoColumn( @@ -364,6 +348,7 @@ def to_geopandas(self, nullable=False): return gpGeoSeries( final_union_slice.to_shapely(), index=self.index.to_pandas(), + name=self.name, ) def to_pandas(self): diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index cd29d6493..81f368d2f 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -370,10 +370,82 @@ def test_geometry_linestring_slicing(gs): cugs = cuspatial.from_geopandas(gs) assert (cugs[:5].lines.x == cudf.Series([11, 13])).all() assert (cugs[:5].lines.y == cudf.Series([12, 14])).all() - assert (cugs[5:].lines.x == cudf.Series([31, 33])).all() - assert (cugs[5:].lines.y == cudf.Series([32, 34])).all() - assert (cugs[0:4].lines.x == cudf.Series([11, 13, 31, 33])).all() - assert (cugs[0:4].lines.y == cudf.Series([12, 14, 33, 34])).all() + assert (cugs[:6].lines.x == cudf.Series([11, 13, 15, 17, 19, 21])).all() + assert (cugs[:6].lines.y == cudf.Series([12, 14, 16, 18, 20, 22])).all() + assert (cugs[7:].lines.x == cudf.Series([31, 33])).all() + assert (cugs[7:].lines.y == cudf.Series([32, 34])).all() + assert (cugs[6:].lines.x == cudf.Series([23, 25, 27, 29, 31, 33])).all() + assert (cugs[6:].lines.y == cudf.Series([24, 26, 28, 30, 32, 34])).all() + + +def test_geometry_polygon_slicing(gs): + cugs = cuspatial.from_geopandas(gs) + assert (cugs[:9].polygons.x == cudf.Series([35, 37, 39, 41, 35])).all() + assert (cugs[:9].polygons.y == cudf.Series([36, 38, 40, 42, 36])).all() + assert ( + cugs[:10].polygons.x + == cudf.Series( + [ + 35, + 37, + 39, + 41, + 35, + 43, + 45, + 47, + 43, + 49, + 51, + 53, + 49, + 55, + 57, + 59, + 55, + 61, + 63, + 65, + 61, + ] + ) + ).all() + assert ( + cugs[:10].polygons.y + == cudf.Series( + [ + 36, + 38, + 40, + 42, + 36, + 44, + 46, + 48, + 44, + 50, + 52, + 54, + 50, + 56, + 58, + 60, + 56, + 62, + 64, + 66, + 62, + ] + ) + ).all() + assert ( + cugs[11:].polygons.x + == cudf.Series([97, 99, 102, 101, 97, 106, 108, 110, 113, 106]) + ).all() + assert ( + cugs[11:].polygons.y + == cudf.Series([98, 101, 103, 108, 98, 107, 109, 111, 108, 107]) + ).all() def test_loc(gs): From 88f8452dd0969906a397bd3fb47e74feba023f73 Mon Sep 17 00:00:00 2001 From: Thomson Comer Date: Wed, 21 Sep 2022 11:21:47 -0500 Subject: [PATCH 3/7] Add xy accessor and tests for points and multipoints. --- python/cuspatial/cuspatial/core/geoseries.py | 6 +++++- python/cuspatial/cuspatial/tests/test_geoseries.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index a9999cf06..e592af6c5 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -144,7 +144,11 @@ def y(self): @property def xy(self): - return cudf.Series(self._col.leaves().values) + types = self._meta.input_types + offsets = self._meta.union_offsets + indices = offsets[types == self._type.value] + result = self._col.take(indices._column).leaves().values + return cudf.Series(result) class MultiPointGeoColumnAccessor(GeoColumnAccessor): def __init__(self, list_series, meta): diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index 81f368d2f..84cd176cc 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -350,30 +350,44 @@ def test_geometry_point_slicing(gs): cugs = cuspatial.from_geopandas(gs) assert (cugs[:1].points.x == cudf.Series([-1])).all() assert (cugs[:1].points.y == cudf.Series([0])).all() + assert (cugs[:1].points.xy == cudf.Series([-1, 0])).all() assert (cugs[3:].points.x == cudf.Series([9])).all() assert (cugs[3:].points.y == cudf.Series([10])).all() + assert (cugs[3:].points.xy == cudf.Series([9, 10])).all() assert (cugs[0:4].points.x == cudf.Series([-1, 9])).all() assert (cugs[0:4].points.y == cudf.Series([0, 10])).all() + assert (cugs[0:4].points.xy == cudf.Series([-1, 0, 9, 10])).all() def test_geometry_multipoint_slicing(gs): cugs = cuspatial.from_geopandas(gs) assert (cugs[:2].multipoints.x == cudf.Series([1, 3])).all() assert (cugs[:2].multipoints.y == cudf.Series([2, 4])).all() + assert (cugs[:2].multipoints.xy == cudf.Series([1, 2, 3, 4])).all() assert (cugs[2:].multipoints.x == cudf.Series([5, 7])).all() assert (cugs[2:].multipoints.y == cudf.Series([6, 8])).all() + assert (cugs[2:].multipoints.xy == cudf.Series([5, 6, 7, 8])).all() assert (cugs[0:4].multipoints.x == cudf.Series([1, 3, 5, 7])).all() assert (cugs[0:4].multipoints.y == cudf.Series([2, 4, 6, 8])).all() + assert ( + cugs[0:4].multipoints.xy == cudf.Series([1, 2, 3, 4, 5, 6, 7, 8]) + ).all() def test_geometry_linestring_slicing(gs): cugs = cuspatial.from_geopandas(gs) assert (cugs[:5].lines.x == cudf.Series([11, 13])).all() assert (cugs[:5].lines.y == cudf.Series([12, 14])).all() + assert (cugs[:5].lines.xy == cudf.Series([11, 12, 13, 14])).all() assert (cugs[:6].lines.x == cudf.Series([11, 13, 15, 17, 19, 21])).all() assert (cugs[:6].lines.y == cudf.Series([12, 14, 16, 18, 20, 22])).all() + assert ( + cugs[:6].lines.xy + == cudf.Series([11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]) + ).all() assert (cugs[7:].lines.x == cudf.Series([31, 33])).all() assert (cugs[7:].lines.y == cudf.Series([32, 34])).all() + assert (cugs[7:].lines.xy == cudf.Series([31, 32, 33, 34])).all() assert (cugs[6:].lines.x == cudf.Series([23, 25, 27, 29, 31, 33])).all() assert (cugs[6:].lines.y == cudf.Series([24, 26, 28, 30, 32, 34])).all() From 3cb08f2502e2f145f7c9f5d664c8668499af350e Mon Sep 17 00:00:00 2001 From: Thomson Comer Date: Wed, 21 Sep 2022 11:25:46 -0500 Subject: [PATCH 4/7] Add xy tests for polygons --- .../cuspatial/tests/test_geoseries.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index 84cd176cc..c8f9cf857 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -390,12 +390,20 @@ def test_geometry_linestring_slicing(gs): assert (cugs[7:].lines.xy == cudf.Series([31, 32, 33, 34])).all() assert (cugs[6:].lines.x == cudf.Series([23, 25, 27, 29, 31, 33])).all() assert (cugs[6:].lines.y == cudf.Series([24, 26, 28, 30, 32, 34])).all() + assert ( + cugs[6:].lines.xy + == cudf.Series([23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]) + ).all() def test_geometry_polygon_slicing(gs): cugs = cuspatial.from_geopandas(gs) assert (cugs[:9].polygons.x == cudf.Series([35, 37, 39, 41, 35])).all() assert (cugs[:9].polygons.y == cudf.Series([36, 38, 40, 42, 36])).all() + assert ( + cugs[:9].polygons.xy + == cudf.Series([35, 36, 37, 38, 39, 40, 41, 42, 35, 36]) + ).all() assert ( cugs[:10].polygons.x == cudf.Series( @@ -460,6 +468,33 @@ def test_geometry_polygon_slicing(gs): cugs[11:].polygons.y == cudf.Series([98, 101, 103, 108, 98, 107, 109, 111, 108, 107]) ).all() + assert ( + cugs[11:].polygons.xy + == cudf.Series( + [ + 97, + 98, + 99, + 101, + 102, + 103, + 101, + 108, + 97, + 98, + 106, + 107, + 108, + 109, + 110, + 111, + 113, + 108, + 106, + 107, + ] + ) + ).all() def test_loc(gs): From 8af5b9a5226e15d518ba77ea0a140510218098e7 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 21 Sep 2022 17:29:00 -0500 Subject: [PATCH 5/7] Update python/cuspatial/cuspatial/core/geoseries.py Co-authored-by: Michael Wang --- python/cuspatial/cuspatial/core/geoseries.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index e592af6c5..4b9fb970e 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -136,11 +136,7 @@ def x(self): @property def y(self): - types = self._meta.input_types - offsets = self._meta.union_offsets - indices = offsets[types == self._type.value] - result = self._col.take(indices._column).leaves().values - return cudf.Series(result[1::2]) + return self.xy[1::2] @property def xy(self): From 308cbc32f1f642f3ad05502ae829be7fd9722129 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 21 Sep 2022 17:29:05 -0500 Subject: [PATCH 6/7] Update python/cuspatial/cuspatial/core/geoseries.py Co-authored-by: Michael Wang --- python/cuspatial/cuspatial/core/geoseries.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index 4b9fb970e..2fc8a36c7 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -128,11 +128,7 @@ def __init__(self, list_series, meta): @property def x(self): - types = self._meta.input_types - offsets = self._meta.union_offsets - indices = offsets[types == self._type.value] - result = self._col.take(indices._column).leaves().values - return cudf.Series(result[::2]) + return self.xy[::2] @property def y(self): From d273619309d88e1ecc98544a58ee1bc4eecce4eb Mon Sep 17 00:00:00 2001 From: Thomson Comer Date: Fri, 23 Sep 2022 13:02:58 -0500 Subject: [PATCH 7/7] Simply x/y indexing and break out a few tests as I went down the index rabbithole. --- python/cuspatial/cuspatial/core/geoseries.py | 4 ++-- .../cuspatial/tests/test_geodataframe.py | 20 +++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index cce53284c..a5289863b 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -130,11 +130,11 @@ def __init__(self, list_series, meta): @property def x(self): - return self.xy[::2] + return self.xy[::2].reset_index(drop=True) @property def y(self): - return self.xy[1::2] + return self.xy[1::2].reset_index(drop=True) @property def xy(self): diff --git a/python/cuspatial/cuspatial/tests/test_geodataframe.py b/python/cuspatial/cuspatial/tests/test_geodataframe.py index d6c85a95b..9dc76af81 100644 --- a/python/cuspatial/cuspatial/tests/test_geodataframe.py +++ b/python/cuspatial/cuspatial/tests/test_geodataframe.py @@ -135,13 +135,19 @@ def test_interleaved_point(gpdf, polys): cugs = cugpdf["geometry"] gs = gpdf["geometry"] pd.testing.assert_series_equal( - cugs.points.x.to_pandas(), + cugs.points.x.to_pandas().reset_index(drop=True), gs[gs.type == "Point"].x.reset_index(drop=True), ) pd.testing.assert_series_equal( - cugs.points.y.to_pandas(), + cugs.points.y.to_pandas().reset_index(drop=True), gs[gs.type == "Point"].y.reset_index(drop=True), ) + + +def test_interleaved_multipoint(gpdf, polys): + cugpdf = cuspatial.from_geopandas(gpdf) + cugs = cugpdf["geometry"] + gs = gpdf["geometry"] cudf.testing.assert_series_equal( cudf.Series.from_arrow(cugs.multipoints.x.to_arrow()), cudf.Series( @@ -164,6 +170,11 @@ def test_interleaved_point(gpdf, polys): ).flatten() ), ) + + +def test_interleaved_lines(gpdf, polys): + cugpdf = cuspatial.from_geopandas(gpdf) + cugs = cugpdf["geometry"] cudf.testing.assert_series_equal( cudf.Series.from_arrow(cugs.lines.x.to_arrow()), cudf.Series( @@ -178,6 +189,11 @@ def test_interleaved_point(gpdf, polys): dtype="float64", ), ) + + +def test_interleaved_polygons(gpdf, polys): + cugpdf = cuspatial.from_geopandas(gpdf) + cugs = cugpdf["geometry"] cudf.testing.assert_series_equal( cudf.Series.from_arrow(cugs.polygons.x.to_arrow()), cudf.Series(polys[:, 0], dtype="float64"),