diff --git a/python/cuspatial/cuspatial/core/binops/__init__.py b/python/cuspatial/cuspatial/core/binpreds/__init__.py similarity index 100% rename from python/cuspatial/cuspatial/core/binops/__init__.py rename to python/cuspatial/cuspatial/core/binpreds/__init__.py diff --git a/python/cuspatial/cuspatial/core/binpreds/binpreds.py b/python/cuspatial/cuspatial/core/binpreds/binpreds.py new file mode 100644 index 000000000..7466e3627 --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/binpreds.py @@ -0,0 +1,272 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +from abc import ABC, abstractmethod + +import cudf + +from cuspatial.core._column.geocolumn import GeoColumn +from cuspatial.core.binpreds.contains import contains_properly +from cuspatial.utils.column_utils import ( + contains_only_linestrings, + contains_only_multipoints, + contains_only_polygons, + has_same_geometry, +) + + +class BinaryPredicate(ABC): + @abstractmethod + def preprocess(self, lhs, rhs): + """Preprocess the input data for the binary predicate. This method + should be implemented by subclasses. Preprocess and postprocess are + used to implement the discrete math rules of the binary predicates. + + Notes + ----- + Should only be called once. + + Parameters + ---------- + op : str + The binary predicate to perform. + lhs : GeoSeries + The left-hand-side of the GeoSeries-level binary predicate. + rhs : GeoSeries + The right-hand-side of the GeoSeries-level binary predicate. + + Returns + ------- + GeoSeries + The left-hand-side of the internal binary predicate, may be + reordered. + GeoSeries + The right-hand-side of the internal binary predicate, may be + reordered. + """ + pass + + @abstractmethod + def postprocess(self, point_indices, point_result): + """Postprocess the output data for the binary predicate. This method + should be implemented by subclasses. + + Postprocess converts the raw results of the binary predicate into + the final result. This is where the discrete math rules are applied. + + Parameters + ---------- + op : str + The binary predicate to post process. Determines for example the + set predicate to use for computing the result. + point_indices : cudf.Series + The indices of the points in the original GeoSeries. + point_result : cudf.Series + The raw result of the binary predicate. + + Returns + ------- + cudf.Series + The output of the post processing, True/False results for + the specified binary op. + """ + pass + + def __init__(self, lhs, rhs, align=True): + """Compute the binary predicate `op` on `lhs` and `rhs`. + + There are ten binary predicates supported by cuspatial: + - `.equals` + - `.disjoint` + - `.touches` + - `.contains` + - `.contains_properly` + - `.covers` + - `.intersects` + - `.within` + - `.crosses` + - `.overlaps` + + There are thirty-six ordering combinations of `lhs` and `rhs`, the + unordered pairs of each `point`, `multipoint`, `linestring`, + `multilinestring`, `polygon`, and `multipolygon`. The ordering of + `lhs` and `rhs` is important because the result of the binary + predicate is not symmetric. For example, `A.contains(B)` is not + the same as `B.contains(A)`. + + Parameters + ---------- + op : str + The binary predicate to perform. + lhs : GeoSeries + The left-hand-side of the binary predicate. + rhs : GeoSeries + The right-hand-side of the binary predicate. + align : bool + If True, align the indices of `lhs` and `rhs` before performing + the binary predicate. If False, `lhs` and `rhs` must have the + same index. + + Returns + ------- + GeoSeries + A GeoSeries containing the result of the binary predicate. + """ + (self.lhs, self.rhs) = lhs.align(rhs) if align else (lhs, rhs) + self.align = align + + def __call__(self) -> cudf.Series: + """Return the result of the binary predicate.""" + # Type disambiguation + # Type disambiguation has a large effect on the decisions of the + # algorithm. + (lhs, rhs, indices) = self.preprocess(self.lhs, self.rhs) + + # Binpred call + point_result = self._op(lhs, rhs) + + # Postprocess: Apply discrete math rules to identify relationships. + return self.postprocess(indices, point_result) + + +class ContainsProperlyBinpred(BinaryPredicate): + def preprocess(self, lhs, rhs): + """Preprocess the input GeoSeries to ensure that they are of the + correct type for the predicate.""" + # RHS conditioning: + point_indices = None + # point in polygon + if contains_only_linestrings(rhs): + # condition for linestrings + geom = rhs.lines + elif contains_only_polygons(rhs) is True: + # polygon in polygon + geom = rhs.polygons + elif contains_only_multipoints(rhs) is True: + # mpoint in polygon + geom = rhs.multipoints + else: + # no conditioning is required + geom = rhs.points + xy_points = geom.xy + + # Arrange into shape for calling point-in-polygon, intersection, or + # equals + point_indices = geom.point_indices() + from cuspatial.core.geoseries import GeoSeries + + final_rhs = GeoSeries( + GeoColumn._from_points_xy(xy_points._column) + ).points + return (lhs, final_rhs, point_indices) + + def _op(self, lhs, points): + """Compute the contains_properly relationship between two GeoSeries. + A feature A contains another feature B if no points of B lie in the + exterior of A, and at least one point of the interior of B lies in the + interior of A. This is the inverse of `within`.""" + if not contains_only_polygons(lhs): + raise TypeError( + "`.contains` can only be called with polygon series." + ) + + # call pip on the three subtypes on the right: + point_result = contains_properly( + points.x, + points.y, + lhs.polygons.part_offset[:-1], + lhs.polygons.ring_offset[:-1], + lhs.polygons.x, + lhs.polygons.y, + ) + return point_result + + def postprocess(self, point_indices, point_result): + """Postprocess the output GeoSeries to ensure that they are of the + correct type for the predicate.""" + result = cudf.DataFrame({"idx": point_indices, "pip": point_result}) + df_result = result + # Discrete math recombination + if ( + contains_only_linestrings(self.rhs) + or contains_only_polygons(self.rhs) + or contains_only_multipoints(self.rhs) + ): + # process for completed linestrings, polygons, and multipoints. + # Not necessary for points. + df_result = ( + result.groupby("idx").sum().sort_index() + == result.groupby("idx").count().sort_index() + ) + point_result = cudf.Series( + df_result["pip"], index=cudf.RangeIndex(0, len(df_result)) + ) + point_result.name = None + return point_result + + +class OverlapsBinpred(ContainsProperlyBinpred): + def postprocess(self, point_indices, point_result): + # Same as contains_properly, but we need to check that the + # dimensions are the same. + # TODO: Maybe change this to intersection + if not has_same_geometry(self.lhs, self.rhs): + return cudf.Series([False] * len(self.lhs)) + result = cudf.DataFrame({"idx": point_indices, "pip": point_result}) + df_result = result + # Discrete math recombination + if contains_only_linestrings(self.rhs): + df_result = ( + result.groupby("idx").sum().sort_index() + == result.groupby("idx").count().sort_index() + ) + elif contains_only_polygons(self.rhs) or contains_only_multipoints( + self.rhs + ): + partial_result = result.groupby("idx").sum() + df_result = (partial_result > 0) & ( + partial_result < len(point_result) + ) + else: + df_result = result.groupby("idx").sum() > 1 + point_result = cudf.Series( + df_result["pip"], index=cudf.RangeIndex(0, len(df_result)) + ) + point_result.name = None + return point_result + + +class IntersectsBinpred(ContainsProperlyBinpred): + def preprocess(self, lhs, rhs): + if contains_only_polygons(rhs): + (lhs, rhs) = (rhs, lhs) + return super().preprocess(lhs, rhs) + + +class WithinBinpred(ContainsProperlyBinpred): + def preprocess(self, lhs, rhs): + if contains_only_polygons(rhs): + (lhs, rhs) = (rhs, lhs) + return super().preprocess(lhs, rhs) + + def postprocess(self, point_indices, point_result): + """Postprocess the output GeoSeries to ensure that they are of the + correct type for the predicate.""" + result = cudf.DataFrame({"idx": point_indices, "pip": point_result}) + df_result = result + # Discrete math recombination + if ( + contains_only_linestrings(self.rhs) + or contains_only_polygons(self.rhs) + or contains_only_multipoints(self.rhs) + ): + # process for completed linestrings, polygons, and multipoints. + # Not necessary for points. + df_result = ( + result.groupby("idx").sum().sort_index() + == result.groupby("idx").count().sort_index() + ) + point_result = cudf.Series( + df_result["pip"], index=cudf.RangeIndex(0, len(df_result)) + ) + point_result.name = None + return point_result diff --git a/python/cuspatial/cuspatial/core/binops/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py similarity index 100% rename from python/cuspatial/cuspatial/core/binops/contains.py rename to python/cuspatial/cuspatial/core/binpreds/contains.py diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index dc219887d..3029da457 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -25,11 +25,11 @@ import cuspatial.io.pygeoarrow as pygeoarrow from cuspatial.core._column.geocolumn import GeoColumn from cuspatial.core._column.geometa import Feature_Enum, GeoMeta -from cuspatial.core.binops.contains import contains_properly -from cuspatial.utils.column_utils import ( - contains_only_linestrings, - contains_only_multipoints, - contains_only_polygons, +from cuspatial.core.binpreds.binpreds import ( + ContainsProperlyBinpred, + IntersectsBinpred, + OverlapsBinpred, + WithinBinpred, ) T = TypeVar("T", bound="GeoSeries") @@ -168,9 +168,12 @@ def _get_current_features(self, type): def point_indices(self): # Return a cupy.ndarray containing the index values that each # point belongs to. + """ offsets = cp.arange(0, len(self.xy) + 1, 2) sizes = offsets[1:] - offsets[:-1] return cp.repeat(self._series.index, sizes) + """ + return self._series.index class MultiPointGeoColumnAccessor(GeoColumnAccessor): def __init__(self, list_series, meta): @@ -692,7 +695,10 @@ def _gather( return self.iloc[gather_map] def contains_properly(self, other, align=True): - """Compute from a GeoSeries of points and a GeoSeries of polygons which + """Returns a `Series` of `dtype('bool')` with value `True` for each + aligned geometry that contains _other_. + + Compute from a GeoSeries of points and a GeoSeries of polygons which points are properly contained within the corresponding polygon. Polygon A contains Point B properly if B intersects the interior of A but not the boundary (or exterior). @@ -768,63 +774,73 @@ def contains_properly(self, other, align=True): A Series of boolean values indicating whether each point falls within the corresponding polygon in the input. """ - if not contains_only_polygons(self): - raise TypeError( - "`.contains` can only be called with polygon series." - ) + return ContainsProperlyBinpred(self, other, align)() - (lhs, rhs) = self.align(other) if align else (self, other) - - # RHS conditioning: - mode = "POINTS" - # point in polygon - if contains_only_linestrings(rhs): - # condition for linestrings - mode = "LINESTRINGS" - geom = rhs.lines - elif contains_only_polygons(rhs) is True: - # polygon in polygon - mode = "POLYGONS" - geom = rhs.polygons - elif contains_only_multipoints(rhs) is True: - # mpoint in polygon - mode = "MULTIPOINTS" - geom = rhs.multipoints - else: - # no conditioning is required - geom = rhs.points - xy_points = geom.xy - point_indices = geom.point_indices() - points = GeoSeries(GeoColumn._from_points_xy(xy_points._column)).points - - # call pip on the three subtypes on the right: - point_result = contains_properly( - points.x, - points.y, - lhs.polygons.part_offset[:-1], - lhs.polygons.ring_offset[:-1], - lhs.polygons.x, - lhs.polygons.y, - ) - if ( - mode == "LINESTRINGS" - or mode == "POLYGONS" - or mode == "MULTIPOINTS" - ): - # process for completed linestrings, polygons, and multipoints. - # Not necessary for points. - result = cudf.DataFrame( - {"idx": point_indices, "pip": point_result} - ) - # if the number of points in the polygon is equal to the number of - # points, then the requirements for `.contains_properly` are met - # for this geometry type. - df_result = ( - result.groupby("idx").sum().sort_index() - == result.groupby("idx").count().sort_index() - ) - point_result = cudf.Series( - df_result["pip"], index=cudf.RangeIndex(0, len(df_result)) - ) - point_result.name = None - return point_result + def intersects(self, other, align=True): + """Returns a `Series` of `dtype('bool')` with value `True` for each + aligned geometry that intersects _other_. + + A geometry intersects another geometry if its boundary or interior + intersect in any way with the other geometry. + + Parameters + ---------- + other + a cuspatial.GeoSeries + align=True + align the GeoSeries indexes before calling the binpred + + Returns + ------- + result : cudf.Series + A Series of boolean values indicating whether the geometries of + each row intersect. + """ + return IntersectsBinpred(self, other, align)() + + def within(self, other, align=True): + """Returns a `Series` of `dtype('bool')` with value `True` for each + aligned geometry that is within _other_. + + A geometry is within another geometry if at least one of its points is + located in the interior of the other geometry and no points are + located in the exterior of the other geometry. + + Parameters + ---------- + other + a cuspatial.GeoSeries + align=True + align the GeoSeries indexes before calling the binpred + + Returns + ------- + result : cudf.Series + A Series of boolean values indicating whether each feature falls + within the corresponding polygon in the input. + """ + return WithinBinpred(self, other, align)() + + def overlaps(self, other, align=True): + """Returns True for all aligned geometries that overlap other, else + False. + + Geometries overlap if they have more than one but not all points in + common, have the same dimension, and the intersection of the + interiors of the geometries has the same dimension as the geometries + themselves. + + Parameters + ---------- + other + a cuspatial.GeoSeries + align=True + align the GeoSeries indexes before calling the binpred + + Returns + ------- + result : cudf.Series + A Series of boolean values indicating whether each geometry + overlaps the corresponding geometry in the input.""" + # Overlaps has the same requirement as crosses. + return OverlapsBinpred(self, other, align=align)() diff --git a/python/cuspatial/cuspatial/tests/binops/test_contains_properly.py b/python/cuspatial/cuspatial/tests/binpreds/test_contains_properly.py similarity index 100% rename from python/cuspatial/cuspatial/tests/binops/test_contains_properly.py rename to python/cuspatial/cuspatial/tests/binpreds/test_contains_properly.py diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_pip_only_binpreds.py b/python/cuspatial/cuspatial/tests/binpreds/test_pip_only_binpreds.py new file mode 100644 index 000000000..545629402 --- /dev/null +++ b/python/cuspatial/cuspatial/tests/binpreds/test_pip_only_binpreds.py @@ -0,0 +1,194 @@ +import geopandas as gpd +from shapely.geometry import LineString, Point, Polygon + +import cuspatial + +"""Overlaps, Within, and Intersects""" + + +def test_polygon_overlaps_point(): + gpdpolygon = gpd.GeoSeries( + Polygon([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]) + ) + gpdpoint = gpd.GeoSeries([Point(0.5, 0.5)]) + polygon = cuspatial.from_geopandas(gpdpolygon) + point = cuspatial.from_geopandas(gpdpoint) + got = polygon.overlaps(point).values_host + expected = gpdpolygon.overlaps(gpdpoint).values + assert (got == expected).all() + + +def test_max_polygons_overlaps_max_points(polygon_generator, point_generator): + gpdpolygon = gpd.GeoSeries([*polygon_generator(31, 0)]) + gpdpoint = gpd.GeoSeries([*point_generator(31)]) + polygon = cuspatial.from_geopandas(gpdpolygon) + point = cuspatial.from_geopandas(gpdpoint) + got = polygon.overlaps(point).values_host + expected = gpdpolygon.overlaps(gpdpoint).values + assert (got == expected).all() + + +def test_polygon_overlaps_polygon_partially(): + gpdpolygon1 = gpd.GeoSeries( + Polygon([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]) + ) + gpdpolygon2 = gpd.GeoSeries( + Polygon([[0.5, 0.5], [0.5, 1.5], [1.5, 1.5], [1.5, 0.5], [0.5, 0.5]]) + ) + polygon1 = cuspatial.from_geopandas(gpdpolygon1) + polygon2 = cuspatial.from_geopandas(gpdpolygon2) + got = polygon1.overlaps(polygon2).values_host + expected = gpdpolygon1.overlaps(gpdpolygon2).values + assert (got == expected).all() + + +def test_polygon_overlaps_polygon_completely(): + gpdpolygon1 = gpd.GeoSeries( + Polygon([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]) + ) + gpdpolygon2 = gpd.GeoSeries( + Polygon( + [[0.25, 0.25], [0.25, 0.5], [0.5, 0.5], [0.5, 0.25], [0.25, 0.25]] + ) + ) + polygon1 = cuspatial.from_geopandas(gpdpolygon1) + polygon2 = cuspatial.from_geopandas(gpdpolygon2) + got = polygon1.overlaps(polygon2).values_host + expected = gpdpolygon1.overlaps(gpdpolygon2).values + assert (got == expected).all() + + +def test_polygon_overlaps_polygon_no_overlap(): + gpdpolygon1 = gpd.GeoSeries( + Polygon([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]) + ) + gpdpolygon2 = gpd.GeoSeries( + Polygon([[2, 2], [2, 3], [3, 3], [3, 2], [2, 2]]) + ) + polygon1 = cuspatial.from_geopandas(gpdpolygon1) + polygon2 = cuspatial.from_geopandas(gpdpolygon2) + got = polygon1.overlaps(polygon2).values_host + expected = gpdpolygon1.overlaps(gpdpolygon2).values + assert (got == expected).all() + + +def test_max_polygon_overlaps_max_points(polygon_generator, point_generator): + gpdpolygon = gpd.GeoSeries([*polygon_generator(31, 0)]) + gpdpoint = gpd.GeoSeries([*point_generator(31)]) + polygon = cuspatial.from_geopandas(gpdpolygon) + point = cuspatial.from_geopandas(gpdpoint) + got = polygon.overlaps(point).values_host + expected = gpdpolygon.overlaps(gpdpoint).values + assert (got == expected).all() + + +def test_point_intersects_polygon_interior(): + gpdpoint = gpd.GeoSeries([Point(0.5, 0.5)]) + gpdpolygon = gpd.GeoSeries( + Polygon([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]) + ) + point = cuspatial.from_geopandas(gpdpoint) + polygon = cuspatial.from_geopandas(gpdpolygon) + got = point.intersects(polygon).values_host + expected = gpdpoint.intersects(gpdpolygon).values + assert (got == expected).all() + + +def test_max_points_intersects_max_polygons_interior( + polygon_generator, point_generator +): + gpdpolygon = gpd.GeoSeries([*polygon_generator(31, 0)]) + gpdpoint = gpd.GeoSeries([*point_generator(31)]) + polygon = cuspatial.from_geopandas(gpdpolygon) + point = cuspatial.from_geopandas(gpdpoint) + got = point.intersects(polygon).values_host + expected = gpdpoint.intersects(gpdpolygon).values + assert (got == expected).all() + + +def test_point_within_polygon(): + gpdpoint = gpd.GeoSeries([Point(0, 0)]) + gpdpolygon = gpd.GeoSeries(Polygon([[0, 0], [1, 0], [1, 1], [0, 0]])) + point = cuspatial.from_geopandas(gpdpoint) + polygon = cuspatial.from_geopandas(gpdpolygon) + got = point.within(polygon).values_host + expected = gpdpoint.within(gpdpolygon).values + assert (got == expected).all() + + +def test_max_points_within_max_polygons(polygon_generator, point_generator): + gpdpolygon = gpd.GeoSeries([*polygon_generator(31, 0)]) + gpdpoint = gpd.GeoSeries([*point_generator(31)]) + polygon = cuspatial.from_geopandas(gpdpolygon) + point = cuspatial.from_geopandas(gpdpoint) + got = point.within(polygon).values_host + expected = gpdpoint.within(gpdpolygon).values + assert (got == expected).all() + + +def test_linestring_within_polygon(): + gpdline = gpd.GeoSeries([LineString([(0, 0), (1, 1)])]) + gpdpolygon = gpd.GeoSeries(Polygon([[0, 0], [1, 0], [1, 1], [0, 0]])) + line = cuspatial.from_geopandas(gpdline) + polygon = cuspatial.from_geopandas(gpdpolygon) + got = line.within(polygon).values_host + expected = gpdline.within(gpdpolygon).values + assert (got == expected).all() + + +def test_max_linestring_within_max_polygon( + polygon_generator, linestring_generator +): + gpdpolygon = gpd.GeoSeries([*polygon_generator(31, 0)]) + gpdline = gpd.GeoSeries([*linestring_generator(31, 5)]) + polygon = cuspatial.from_geopandas(gpdpolygon) + line = cuspatial.from_geopandas(gpdline) + got = line.within(polygon).values_host + expected = gpdline.within(gpdpolygon).values + assert (got == expected).all() + + +def test_polygon_within_polygon(): + gpdpolygon1 = gpd.GeoSeries( + Polygon([[0, 0], [-1, 1], [1, 1], [1, -2], [0, 0]]) + ) + gpdpolygon2 = gpd.GeoSeries(Polygon([[-1, -1], [-2, 2], [2, 2], [2, -2]])) + polygon1 = cuspatial.from_geopandas(gpdpolygon1) + polygon2 = cuspatial.from_geopandas(gpdpolygon2) + got = polygon1.within(polygon2).values_host + expected = gpdpolygon1.within(gpdpolygon2).values + assert (got == expected).all() + + +def test_max_polygons_within_max_polygons(polygon_generator): + gpdpolygon1 = gpd.GeoSeries([*polygon_generator(31, 0)]) + gpdpolygon2 = gpd.GeoSeries([*polygon_generator(31, 1)]) + polygon1 = cuspatial.from_geopandas(gpdpolygon1) + polygon2 = cuspatial.from_geopandas(gpdpolygon2) + got = polygon1.within(polygon2).values_host + expected = gpdpolygon1.within(gpdpolygon2).values + assert (got == expected).all() + + +def test_polygon_overlaps_linestring(): + gpdpolygon = gpd.GeoSeries( + Polygon([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]) + ) + gpdline = gpd.GeoSeries([LineString([(0.5, 0.5), (1.5, 1.5)])]) + polygon = cuspatial.from_geopandas(gpdpolygon) + line = cuspatial.from_geopandas(gpdline) + got = polygon.overlaps(line).values_host + expected = gpdpolygon.overlaps(gpdline).values + assert (got == expected).all() + + +def test_max_polygons_overlaps_max_linestrings( + polygon_generator, linestring_generator +): + gpdpolygon = gpd.GeoSeries([*polygon_generator(31, 0)]) + gpdline = gpd.GeoSeries([*linestring_generator(31, 5)]) + polygon = cuspatial.from_geopandas(gpdpolygon) + line = cuspatial.from_geopandas(gpdline) + got = polygon.overlaps(line).values_host + expected = gpdpolygon.overlaps(gpdline).values + assert (got == expected).all() diff --git a/python/cuspatial/cuspatial/utils/column_utils.py b/python/cuspatial/cuspatial/utils/column_utils.py index fab7ff0e4..808eda840 100644 --- a/python/cuspatial/cuspatial/utils/column_utils.py +++ b/python/cuspatial/cuspatial/utils/column_utils.py @@ -97,3 +97,21 @@ def contains_only_polygons(gs: GeoSeries): """ return contain_single_type_geometry(gs) and len(gs.polygons.xy) > 0 + + +def has_same_geometry(lhs: GeoSeries, rhs: GeoSeries): + """ + Returns true if `lhs` and `rhs` have only features of the same homogeneous + geometry type. + """ + + if contains_only_points(lhs) and contains_only_points(rhs): + return True + elif contains_only_multipoints(lhs) and contains_only_multipoints(rhs): + return True + elif contains_only_linestrings(lhs) and contains_only_linestrings(rhs): + return True + elif contains_only_polygons(lhs) and contains_only_polygons(rhs): + return True + else: + return False