diff --git a/python/cuspatial/cuspatial/core/_column/geocolumn.py b/python/cuspatial/cuspatial/core/_column/geocolumn.py index 67b896959..01f51f03b 100644 --- a/python/cuspatial/cuspatial/core/_column/geocolumn.py +++ b/python/cuspatial/cuspatial/core/_column/geocolumn.py @@ -1,4 +1,6 @@ -# Copyright (c) 2021-2022 NVIDIA CORPORATION +# Copyright (c) 2021-2023 NVIDIA CORPORATION + +from enum import Enum from functools import cached_property from typing import Tuple, TypeVar @@ -11,6 +13,15 @@ from cuspatial.core._column.geometa import Feature_Enum, GeoMeta from cuspatial.utils.column_utils import empty_geometry_column + +class ColumnType(Enum): + MIXED = 0 + POINT = 1 + MULTIPOINT = 2 + LINESTRING = 3 + POLYGON = 4 + + T = TypeVar("T", bound="GeoColumn") diff --git a/python/cuspatial/cuspatial/core/binpreds/binpred_dispatch.py b/python/cuspatial/cuspatial/core/binpreds/binpred_dispatch.py new file mode 100644 index 000000000..5a53abaca --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/binpred_dispatch.py @@ -0,0 +1,31 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +"""`binpred_dispatch.py` contains a collection of dictionaries that +are used to dispatch binary predicate functions to the correct +implementation. + +The dictionaries are collected here to make using the dispatch +functionality easier. +""" + +from cuspatial.core.binpreds.feature_contains import ( # NOQA F401 + DispatchDict as CONTAINS_DISPATCH, +) +from cuspatial.core.binpreds.feature_covers import ( # NOQA F401 + DispatchDict as COVERS_DISPATCH, +) +from cuspatial.core.binpreds.feature_crosses import ( # NOQA F401 + DispatchDict as CROSSES_DISPATCH, +) +from cuspatial.core.binpreds.feature_equals import ( # NOQA F401 + DispatchDict as EQUALS_DISPATCH, +) +from cuspatial.core.binpreds.feature_intersects import ( # NOQA F401 + DispatchDict as INTERSECTS_DISPATCH, +) +from cuspatial.core.binpreds.feature_overlaps import ( # NOQA F401 + DispatchDict as OVERLAPS_DISPATCH, +) +from cuspatial.core.binpreds.feature_within import ( # NOQA F401 + DispatchDict as WITHIN_DISPATCH, +) diff --git a/python/cuspatial/cuspatial/core/binpreds/binpred_interface.py b/python/cuspatial/cuspatial/core/binpreds/binpred_interface.py new file mode 100644 index 000000000..35db8c827 --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/binpred_interface.py @@ -0,0 +1,443 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +from typing import TYPE_CHECKING + +from cudf import Series + +if TYPE_CHECKING: + from cuspatial.core.geoseries import GeoSeries + + +class BinPredConfig: + """Configuration for a binary predicate. + + Parameters + ---------- + align : bool + Whether to align the left-hand and right-hand GeoSeries before + computing the binary predicate. Defaults to True. + allpairs : bool + Whether to compute the binary predicate between all pairs of + features in the left-hand and right-hand GeoSeries. Defaults to + False. Only available with the contains predicate. + """ + + def __init__(self, **kwargs): + self.align = kwargs.get("align", True) + + def __repr__(self): + return f"BinpredConfig(align={self.align}, allpairs={self.allpairs})" + + def __str__(self): + return self.__repr__() + + +class PreprocessorResult: + """Result of a binary predicate preprocessor. The following classes + are used to give all implementors of `BinaryItf` a common interface + for preprocessor results. + + Parameters + ---------- + lhs : GeoSeries + The left-hand GeoSeries. + rhs : GeoSeries + The right-hand GeoSeries. + final_rhs : GeoSeries + The rhs GeoSeries, if modified by the preprocessor. For example + the contains preprocessor converts any complex feature type into + a collection of points. + point_indices : cudf.Series + A cudf.Series of indices that map each point in `points` to its + corresponding feature in the right-hand GeoSeries. + """ + + def __init__( + self, + lhs: "GeoSeries", + rhs: "GeoSeries", + final_rhs: "GeoSeries" = None, + point_indices: Series = None, + ): + self.lhs = lhs + self.rhs = rhs + self.final_rhs = final_rhs + self.point_indices = point_indices + + def __repr__(self): + return f"PreprocessorResult(lhs={self.lhs}, rhs={self.rhs}, \ + points={self.points}, point_indices={self.point_indices})" + + def __str__(self): + return self.__repr__() + + +class OpResult: + """Result of a binary predicate operation.""" + + pass + + +class ContainsOpResult(OpResult): + """Result of a Contains binary predicate operation. + + Parameters + ---------- + result : cudf.DataFrame + A cudf.DataFrame containing two columns: "polygon_index" and + Point_index". The "polygon_index" column contains the index of + the polygon that contains each point. The "point_index" column + contains the index of each point that is contained by a polygon. + points : GeoSeries + A GeoSeries of points. + point_indices : cudf.Series + A cudf.Series of indices that map each point in `points` to its + corresponding feature in the right-hand GeoSeries. + """ + + def __init__( + self, + result: Series, + points: "GeoSeries" = None, + point_indices: Series = None, + ): + self.result = result + self.points = points + self.point_indices = point_indices + + def __repr__(self): + return f"OpResult(result={self.result}, points={self.points}, \ + point_indices={self.point_indices})" + + def __str__(self): + return self.__repr__() + + +class EqualsOpResult(OpResult): + """Result of an Equals binary predicate operation. + + Parameters + ---------- + result : cudf.Series + A cudf.Series of boolean values indicating whether each feature in + the right-hand GeoSeries is equal to the point in the left-hand + GeoSeries. + point_indices: cudf.Series + A cudf.Series of indices that map each point in `points` to its + corresponding feature in the right-hand GeoSeries. + """ + + def __init__(self, result: Series, point_indices: Series): + self.result = result + self.point_indices = point_indices + + def __repr__(self): + return f"OpResult(result={self.result}) \ + point_indices={self.point_indices}" + + def __str__(self): + return self.__repr__() + + +class BinPred: + """Base class for binary predicates. This class is an abstract base class + and can not be instantiated directly. `BinPred` is the base class that + implements this interface in the most general case. Child classes exist + for each combination of left-hand and right-hand GeoSeries types and binary + predicates. For example, a `PointPointContains` predicate is a child class + of `BinPred` that implements the `contains_properly` predicate for two + `Point` GeoSeries. These classes are found in the `feature_` + files found in this directory. + + Notes + ----- + BinPred classes are selected using the appropriate dispatch function. For + example, the `contains_properly` predicate is selected using the + `CONTAINS_DISPATCH` dispatch function from `binpred_dispatch.py`. The + dispatch function selects the appropriate BinPred class based on the + left-hand and right-hand GeoSeries types. + + This enables customized behavior for each combination of left-hand and + right-hand GeoSeries types and binary predicates. For example, the + `contains_properly` predicate for two `Point` GeoSeries is implemented + using a `PointPointContains` BinPred class. The `contains_properly` + predicate for a `Point` GeoSeries and a `Polygon` GeoSeries is implemented + using a `PointPolygonContains` BinPred class. Most subclasses will be able + to use all or 2/3rds of the methods defined in the `RootContains(BinPred) + class`. + + The `RootContains` class implements the `contains_properly` predicate for + the most common combination of left-hand and right-hand GeoSeries types. + The `RootContains` class can be used as a template for implementing the + `contains_properly` predicate for other combinations of left-hand and + right-hand GeoSeries types. + + Examples + -------- + >>> from cuspatial.core.binpreds.binpred_dispatch import CONTAINS_DISPATCH + >>> from cuspatial.core.geoseries import GeoSeries + >>> from shapely.geometry import Point, Polygon + >>> predicate = CONTAINS_DISPATCH[( + ... lhs.column_type, rhs.column_type + ... )](align=True, allpairs=False) + >>> lhs = GeoSeries([Polygon([(0, 0), (1, 1), (1, 0)])]) + >>> rhs = GeoSeries([Point(0, 0), Point(1, 1)]) + >>> print(predicate(lhs, rhs)) + 0 False + dtype: bool + """ + + def __init__(self, **kwargs): + """Initialize a binary predicate. Collects any arguments passed + to the binary predicate to be used at runtime. + + This class stores the config object that can be passed to the binary + predicate at runtime. The lhs and rhs are set at runtime using the + __call__ method so that the same binary predicate can be used for + multiple left-hand and right-hand GeoSeries. + + Parameters + ---------- + **kwargs + Any additional arguments to be used at runtime. + + Attributes + ---------- + kwargs : dict + Any additional arguments to be used at runtime. + + Methods + ------- + __call__(self, lhs, rhs) + System call for the binary predicate. Calls the _call method, which + is implemented by the subclass. + _call(self, lhs, rhs) + Call the binary predicate. This method is implemented by the + subclass. + _preprocess(self, lhs, rhs) + Preprocess the left-hand and right-hand GeoSeries. This method is + implemented by the subclass. + _compute_predicate(self, lhs, rhs) + Compute the binary predicate between two GeoSeries. This method is + implemented by the subclass. + _postprocess(self, lhs, rhs, point_indices, op_result) + Postprocess the output GeoSeries to ensure that they are of the + correct type for the predicate. This method is implemented by the + subclass. + + Examples + -------- + >>> from cuspatial.core.binpreds.binpred_dispatch import ( + ... CONTAINS_DISPATCH + ... ) + >>> from cuspatial.core.geoseries import GeoSeries + >>> from shapely.geometry import Point, Polygon + >>> predicate = CONTAINS_DISPATCH[( + ... lhs.column_type, rhs.column_type + ... )](align=True, allpairs=False) + >>> lhs = GeoSeries([Polygon([(0, 0), (1, 1), (1, 0)])]) + >>> rhs = GeoSeries([Point(0, 0), Point(1, 1)]) + >>> print(predicate(lhs, rhs)) + 0 False + dtype: bool + """ + self.kwargs = kwargs + + def __call__(self, lhs: "GeoSeries", rhs: "GeoSeries") -> Series: + """System call for the binary predicate. Calls the _call method, + which is implemented by the subclass. Executing the binary predicate + returns the results of the binary predicate as a GeoSeries. + + Parameters + ---------- + lhs : GeoSeries + The left-hand GeoSeries. + rhs : GeoSeries + The right-hand GeoSeries. + + Returns + ------- + result : Series + The results of the binary predicate. + + Examples + -------- + >>> from cuspatial.core.binpreds.binpred_dispatch import ( + ... CONTAINS_DISPATCH + ... ) + >>> from cuspatial.core.geoseries import GeoSeries + >>> from shapely.geometry import Point, Polygon + >>> predicate = CONTAINS_DISPATCH[( + ... lhs.column_type, rhs.column_type + ... )](align=True, allpairs=False) + >>> lhs = GeoSeries([Polygon([(0, 0), (1, 1), (1, 0)])]) + >>> rhs = GeoSeries([Point(0, 0), Point(1, 1)]) + >>> print(predicate(lhs, rhs)) + 0 False + dtype: bool + """ + return self._call(lhs, rhs) + + def _call(self, lhs: "GeoSeries", rhs: "GeoSeries") -> Series: + """Call the binary predicate. This method is implemented by the + subclass. + + Parameters + ---------- + lhs : GeoSeries + The left-hand GeoSeries. + rhs : GeoSeries + The right-hand GeoSeries. + + Returns + ------- + result : Series + A cudf.Series of boolean values indicating whether each feature in + the right-hand GeoSeries satisfies the requirements of a binary + predicate with its corresponding feature in the left-hand + GeoSeries. + """ + return self._preprocess(lhs, rhs) + + def _preprocess(self, lhs: "GeoSeries", rhs: "GeoSeries") -> Series: + """Preprocess the left-hand and right-hand GeoSeries. This method + is implemented by the subclass. + + Preprocessing is used to ensure that the left-hand and right-hand + GeoSeries are of the correct type for each of the three basic + predicates: 'contains', 'intersects', and 'equals'. For example, + `contains` requires that the left-hand GeoSeries be polygons or + multipolygons and the right-hand GeoSeries be points or multipoints. + `intersects` requires that the left-hand GeoSeries be linestrings or + points and the right-hand GeoSeries be linestrings or points. + `equals` requires that the left-hand and right-hand GeoSeries be + points. + + Subclasses that implement `_preprocess` are responsible for calling + `_compute_predicate` to continue the execution of the binary predicate. + The last line of `_preprocess` as implemented by any subclass should be + + return self._compute_predicate(lhs, rhs, points, point_indices) + + Parameters + ---------- + lhs : GeoSeries + The original left-hand GeoSeries. + rhs : GeoSeries + The original right-hand GeoSeries. + + Returns + ------- + result : Series + A cudf.Series of boolean values indicating whether each feature in + the right-hand GeoSeries satisfies the requirements of a binary + predicate with its corresponding feature in the left-hand + GeoSeries. + """ + raise NotImplementedError + + def _compute_predicate( + self, + lhs: "GeoSeries", + rhs: "GeoSeries", + preprocessor_result: PreprocessorResult, + ) -> Series: + """Compute the binary predicate between two GeoSeries. This method + is implemented by the subclass. This method is called by `_preprocess` + to continue the execution of the binary predicate. `_compute_predicate` + is responsible for calling `_postprocess` to complete the execution of + the binary predicate. + + `compute_predicate` is used to compute the binary predicate, or + composition of binary predicates, between two GeoSeries. The left-hand + GeoSeries is considered the "base" GeoSeries and the right-hand + GeoSeries is considered the "other" GeoSeries. The binary predicate is + computed between each feature in the base GeoSeries and the other + GeoSeries. The result is a GeoSeries of boolean values indicating + whether each feature in the other GeoSeries satisfies the requirements + of a binary predicate with its corresponding feature in the base + GeoSeries. + + Subclasses that implement `_compute_predicate` are responsible for + calling `_postprocess` to complete the execution of the binary + predicate. The last line of `_compute_predicate` should be + + return self._postprocess( + lhs, + rhs, + OpResult(modified_rhs, point_indices) + ) + + where `modified_rhs` is a GeoSeries of points and `point_indices` is a + cudf.Series of indices that map each point in `points` to its + corresponding feature in the right-hand GeoSeries. + + Parameters + ---------- + lhs : GeoSeries + The left-hand GeoSeries. + rhs : GeoSeries + The right-hand GeoSeries. + preprocessor_result : PreprocessorResult + The result of the preprocessing step. + """ + raise NotImplementedError + + def _postprocess( + self, + lhs: "GeoSeries", + rhs: "GeoSeries", + op_result: OpResult, + ) -> Series: + """Postprocess the output GeoSeries to ensure that they are of the + correct return type for the predicate. This method is implemented by + the subclass. + + Postprocessing is used to convert the results of the + `_compute_predicate` call into countable values. This step converts the + results of one of the three binary predicates `contains`, `intersects`, + or `equals` into a `Series` of boolean values. When the `rhs` is a + non-point type, `_postprocess` is responsible for aggregating the + results of the `_compute_predicate` call into a single boolean value + for each feature in the `lhs`. + + Parameters + ---------- + lhs : GeoSeries + The left-hand GeoSeries. + rhs : GeoSeries + The right-hand GeoSeries. + op_result : cudf.Series + The result of the `_compute_predicate` call. + + Returns + ------- + result : Series + A Series of boolean values indicating whether each feature in + the right-hand GeoSeries satisfies the requirements of a binary + predicate with its corresponding feature in the left-hand + GeoSeries. + + Notes + ----- + Arithmetic rules incorporated into `_postprocess` classes: + + (a, b) -> a contains b iff for all points p in b, p is in a + (a, b) -> a intersects b iff for any point p in b, p is in a + + I'm currently looking into refactoring these arithmetics into a + syntax that more closely resembles it. + """ + raise NotImplementedError + + +class NotImplementedPredicate(BinPred): + """A class that is used to raise an error when a binary predicate is + not implemented for a given combination of left-hand and right-hand + GeoSeries types. This is useful for delineating which binary predicates + are implemented for which GeoSeries types in their appropriate + `DispatchDict`. + """ + + def __init__(self, *args, **kwargs): + raise NotImplementedError diff --git a/python/cuspatial/cuspatial/core/binpreds/binpreds.py b/python/cuspatial/cuspatial/core/binpreds/binpreds.py deleted file mode 100644 index 3e6c1654e..000000000 --- a/python/cuspatial/cuspatial/core/binpreds/binpreds.py +++ /dev/null @@ -1,782 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. - -from abc import ABC, abstractmethod - -import cupy as cp - -import cudf - -import cuspatial -from cuspatial.core._column.geocolumn import GeoColumn -from cuspatial.core.binpreds.contains import contains_properly -from cuspatial.utils.column_utils import ( - contains_only_linestrings, - contains_only_multipoints, - contains_only_points, - contains_only_polygons, - has_multipolygons, - has_same_geometry, -) - - -class PreprocessorOutput: - """The output of the preprocess method of a binary predicate. - - This makes it possible to create a class that matches the necessary - signature of a geoseries.GeoColumnAccessor object. In some cases the - preprocessor may need to reorder the input data, in which case the - preprocessor will return a PreprocessorOutput object instead of a - GeoColumnAccessor.""" - - def __init__(self, coords, indices) -> None: - self.vertices = coords - self.indices = indices - - @property - def xy(self): - return self.vertices - - def point_indices(self): - return self.indices - - -class BinaryPredicate(ABC): - """BinaryPredicate is an abstract class that implements the binary - predicate algorithm. The binary predicate algorithm is used to compute - the relationship between two GeoSeries. - - The algorithm is implemented in three steps: `preprocess`, `_op`, and - `postprocess`. The `preprocess` step is used to ensure that the input - GeoSeries are of the correct type for the binary predicate. The - `_op` step is used to compute the relationship between the points - in the input GeoSeries. The `postprocess` step is used to compute - the relationship between the input GeoSeries. - """ - - @abstractmethod - def preprocess(self, lhs, rhs): - """Preprocess the input data for the binary predicate. This method - should be implemented by subclasses. Preprocess and postprocess are - used to implement the discrete math rules of the binary predicates. - - Notes - ----- - Should only be called once. - - Parameters - ---------- - op : str - The binary predicate to perform. - lhs : GeoSeries - The left-hand-side of the GeoSeries-level binary predicate. - rhs : GeoSeries - The right-hand-side of the GeoSeries-level binary predicate. - - Returns - ------- - GeoSeries - The left-hand-side of the internal binary predicate, may be - reordered. - GeoSeries - The right-hand-side of the internal binary predicate, may be - reordered. - """ - return (lhs, rhs, cudf.RangeIndex(len(rhs))) - - @abstractmethod - def postprocess( - self, point_indices: cudf.Series, point_result: cudf.Series - ) -> cudf.Series: - """Postprocess the output data for the binary predicate. This method - should be implemented by subclasses. - - Postprocess converts the raw results of the binary predicate into - the final result. This is where the discrete math rules are applied. - The binary predicate operation does not compute any relationships - between features in the input GeoSeries', it only computes the - relationship between the points in the input geometries. The - postprocess method uses the discrete math rules to compute the - relationship between the input geometries. - - Parameters - ---------- - op : str - The binary predicate to post process. Determines for example the - set predicate to use for computing the result. - point_indices : cudf.Series - The indices of the points in the original GeoSeries. - point_result : cudf.Series - The raw result of the binary predicate. - - Returns - ------- - cudf.Series - The output of the post processing, True/False results for - the specified binary op. - """ - pass - - def __init__(self, lhs, rhs, align=True): - """Compute the binary predicate `op` on `lhs` and `rhs`. - - There are ten binary predicates supported by cuspatial: - - `.equals` - - `.disjoint` - - `.touches` - - `.contains` - - `.contains_properly` - - `.covers` - - `.intersects` - - `.within` - - `.crosses` - - `.overlaps` - - There are thirty-six ordering combinations of `lhs` and `rhs`, the - unordered pairs of each `point`, `multipoint`, `linestring`, - `multilinestring`, `polygon`, and `multipolygon`. The ordering of - `lhs` and `rhs` is important because the result of the binary - predicate is not symmetric. For example, `A.contains(B)` is not - the same as `B.contains(A)`. - - Parameters - ---------- - op : str - The binary predicate to perform. - lhs : GeoSeries - The left-hand-side of the binary predicate. - rhs : GeoSeries - The right-hand-side of the binary predicate. - align : bool - If True, align the indices of `lhs` and `rhs` before performing - the binary predicate. If False, `lhs` and `rhs` must have the - same index. - - Returns - ------- - GeoSeries - A GeoSeries containing the result of the binary predicate. - """ - (self.lhs, self.rhs) = lhs.align(rhs) if align else (lhs, rhs) - self.align = align - - def _cancel_op(self, lhs, rhs, result): - """Used to disable computation of the binary predicate. - - This occurs when the binary predicate is not supported for the - input types, and a final result can be computed only using - `preprocess` and `postprocess`.""" - self._op = lambda x, y: result - return (lhs, rhs, result) - - def __call__(self) -> cudf.Series: - """Return the result of the binary predicate.""" - # Type disambiguation - # Type disambiguation has a large effect on the decisions of the - # algorithm. - (lhs, rhs, indices) = self.preprocess(self.lhs, self.rhs) - - # Binpred call - point_result = self._op(lhs, rhs) - - # Postprocess: Apply discrete math rules to identify relationships. - return self.postprocess(indices, point_result) - - -class ContainsProperlyBinpred(BinaryPredicate): - def __init__(self, lhs, rhs, align=True, allpairs=False): - super().__init__(lhs, rhs, align=align) - self.allpairs = allpairs - if allpairs: - self.align = False - - def preprocess(self, lhs, rhs): - """Preprocess the input GeoSeries to ensure that they are of the - correct type for the predicate.""" - # RHS conditioning: - point_indices = None - # point in polygon - if contains_only_linestrings(rhs): - # condition for linestrings - geom = rhs.lines - elif contains_only_polygons(rhs) is True: - # polygon in polygon - geom = rhs.polygons - elif contains_only_multipoints(rhs) is True: - # mpoint in polygon - geom = rhs.multipoints - else: - # no conditioning is required - geom = rhs.points - xy_points = geom.xy - - # Arrange into shape for calling point-in-polygon, intersection, or - # equals - point_indices = geom.point_indices() - final_rhs = cuspatial.core.geoseries.GeoSeries( - GeoColumn._from_points_xy(xy_points._column) - ) - return (lhs, final_rhs, cudf.Series(point_indices)) - - def _should_use_quadtree(self): - """Determine if the quadtree should be used for the binary predicate. - - Returns - ------- - bool - True if the quadtree should be used, False otherwise. - - Notes - ----- - 1. Quadtree is always used if user requests `allpairs=True`. - 2. If the number of polygons in the lhs is less than 32, we use the - byte-limited algorithm because it is faster and has less memory - overhead. - 3. If the lhs contains more than 32 polygons, we use the quadtree - because it does not have a polygon-count limit. - 4. If the lhs contains multipolygons, we use quadtree because the - performance between quadtree and byte-limited is similar, but - code complexity would be higher if we did multipolygon - reconstruction on both code paths. - """ - return ( - len(self.lhs) >= 32 or has_multipolygons(self.lhs) or self.allpairs - ) - - def _op(self, lhs, points): - """Compute the contains_properly relationship between two GeoSeries. - A feature A contains another feature B if no points of B lie in the - exterior of A, and at least one point of the interior of B lies in the - interior of A. This is the inverse of `within`.""" - if not contains_only_polygons(lhs): - raise TypeError( - "`.contains` can only be called with polygon series." - ) - if self._should_use_quadtree(): - return contains_properly(lhs, points, how="quadtree") - else: - return contains_properly(lhs, points, how="byte-limited") - - def _convert_quadtree_result_from_part_to_polygon_indices( - self, point_result - ): - """Convert the result of a quadtree contains_properly call from - part indices to polygon indices. - - Parameters - ---------- - point_result : cudf.Series - The result of a quadtree contains_properly call. This result - contains the `part_index` of the polygon that contains the - point, not the polygon index. - - Returns - ------- - cudf.Series - The result of a quadtree contains_properly call. This result - contains the `polygon_index` of the polygon that contains the - point, not the part index. - """ - # Get the length of each part, map it to indices, and store - # the result in a dataframe. - if not contains_only_polygons(self.lhs): - raise TypeError( - "`.contains` can only be called with polygon series." - ) - rings_to_parts = cp.array(self.lhs.polygons.part_offset) - part_sizes = rings_to_parts[1:] - rings_to_parts[:-1] - parts_map = cudf.Series( - cp.arange(len(part_sizes)), name="part_index" - ).repeat(part_sizes) - parts_index_mapping_df = parts_map.reset_index(drop=True).reset_index() - # Map the length of each polygon in a similar fashion, then - # join them below. - parts_to_geoms = cp.array(self.lhs.polygons.geometry_offset) - geometry_sizes = parts_to_geoms[1:] - parts_to_geoms[:-1] - geometry_map = cudf.Series( - cp.arange(len(geometry_sizes)), name="polygon_index" - ).repeat(geometry_sizes) - geom_index_mapping_df = geometry_map.reset_index(drop=True) - geom_index_mapping_df.index.name = "part_index" - geom_index_mapping_df = geom_index_mapping_df.reset_index() - # Replace the part index with the polygon index by join - part_result = parts_index_mapping_df.merge( - point_result, on="part_index" - ) - # Replace the polygon index with the row index by join - return geom_index_mapping_df.merge(part_result, on="part_index")[ - ["polygon_index", "point_index"] - ] - - def _count_results_in_multipoint_geometries( - self, point_indices, point_result - ): - """Count the number of points in each multipoint geometry. - - Parameters - ---------- - point_indices : cudf.Series - The indices of the points in the original (rhs) GeoSeries. - point_result : cudf.DataFrame - The result of a contains_properly call. - - Returns - ------- - cudf.Series - The number of points that fell within a particular polygon id. - cudf.Series - The number of points in each multipoint geometry. - """ - point_indices_df = cudf.Series( - point_indices, - name="rhs_index", - index=cudf.RangeIndex(len(point_indices), name="point_index"), - ).reset_index() - with_rhs_indices = point_result.merge( - point_indices_df, on="point_index" - ) - points_grouped_by_original_polygon = with_rhs_indices[ - ["point_index", "rhs_index"] - ].drop_duplicates() - hits = ( - points_grouped_by_original_polygon.groupby("rhs_index") - .count() - .sort_index() - ) - expected_count = ( - point_indices_df.groupby("rhs_index").count().sort_index() - ) - return hits, expected_count - - def _postprocess_quadtree_result(self, point_indices, point_result): - if len(point_result) == 0: - return cudf.Series([False] * len(self.lhs)) - - # Convert the quadtree part indices df into a polygon indices df - polygon_indices = ( - self._convert_quadtree_result_from_part_to_polygon_indices( - point_result - ) - ) - # Because the quadtree contains_properly call returns a list of - # points that are contained in each part, parts can be duplicated - # once their index is converted to a polygon index. - allpairs_result = polygon_indices.drop_duplicates() - - # Replace the polygon index with the original index - allpairs_result["polygon_index"] = allpairs_result[ - "polygon_index" - ].replace( - cudf.Series(self.lhs.index, index=cp.arange(len(self.lhs.index))) - ) - - # If the user wants all pairs, return the result. Otherwise, - # return a boolean series indicating whether each point is - # contained in the corresponding polygon. - if self.allpairs: - return allpairs_result - else: - # for each input pair i: result[i] =  true iff point[i] is - # contained in at least one polygon of multipolygon[i]. - if ( - contains_only_linestrings(self.rhs) - or contains_only_polygons(self.rhs) - or contains_only_multipoints(self.rhs) - ): - ( - hits, - expected_count, - ) = self._count_results_in_multipoint_geometries( - point_indices, allpairs_result - ) - result_df = hits.reset_index().merge( - expected_count.reset_index(), on="rhs_index" - ) - result_df["feature_in_polygon"] = ( - result_df["point_index_x"] >= result_df["point_index_y"] - ) - final_result = cudf.Series( - [False] * (point_indices.max().item() + 1) - ) # point_indices is zero index - final_result.loc[ - result_df["rhs_index"][result_df["feature_in_polygon"]] - ] = True - return final_result - else: - # pairwise - if len(self.lhs) == len(self.rhs): - matches = ( - allpairs_result["polygon_index"] - == allpairs_result["point_index"] - ) - final_result = cudf.Series([False] * len(point_indices)) - final_result.loc[ - allpairs_result["polygon_index"][matches] - ] = True - return final_result - else: - final_result = cudf.Series([False] * len(point_indices)) - final_result.loc[allpairs_result["polygon_index"]] = True - return final_result - - def _postprocess_brute_force_result(self, point_indices, point_result): - # If there are 31 or fewer polygons in the input, the result - # is a dataframe with one row per point and one column per - # polygon. - - # Result can be: - # A Dataframe of booleans with n_points rows and up to 31 columns. - if ( - contains_only_linestrings(self.rhs) - or contains_only_polygons(self.rhs) - or contains_only_multipoints(self.rhs) - ): - # Compute the set of results for each point-in-polygon predicate. - # Group them by the original index, and sum the results. If the - # sum of points in the rhs feature is equal to the number of - # points found in the polygon, then the polygon contains the - # feature. - point_result["idx"] = point_indices - group_result = ( - point_result.groupby("idx").sum().sort_index() - == point_result.groupby("idx").count().sort_index() - ) - else: - group_result = point_result - - # If there is only one column, the result is a series with - # one row per point. If it is a dataframe, the result needs - # to be converted from each matching row/column value to a - # series using `cp.diag`. - boolean_series_output = cudf.Series([False] * len(self.lhs)) - boolean_series_output.name = None - if len(point_result.columns) > 1: - boolean_series_output[group_result.index] = cp.diag( - group_result.values - ) - else: - boolean_series_output[group_result.index] = group_result[ - group_result.columns[0] - ] - return boolean_series_output - - def postprocess(self, point_indices, point_result): - """Postprocess the output GeoSeries to ensure that they are of the - correct type for the predicate. - - Postprocess for contains_properly has to handle multiple input and - output configurations. - - The input can be a single polygon, a single multipolygon, or a - GeoSeries containing a mix of polygons and multipolygons. - - The input to postprocess is `point_indices`, which can be either a - cudf.DataFrame with one row per point and one column per polygon or - a cudf.DataFrame containing the point index and the part index for - each point in the polygon. - """ - return self._postprocess_quadtree_result(point_indices, point_result) - - -class OverlapsBinpred(ContainsProperlyBinpred): - def postprocess(self, point_indices, point_result): - # Same as contains_properly, but we need to check that the - # dimensions are the same. - # TODO: Maybe change this to intersection - if not has_same_geometry(self.lhs, self.rhs): - return cudf.Series([False] * len(self.lhs)) - if len(point_result) == 0: - return cudf.Series([False] * len(self.lhs)) - polygon_indices = ( - self._convert_quadtree_result_from_part_to_polygon_indices( - point_result - ) - ) - group_counts = polygon_indices.groupby("polygon_index").count() - point_counts = ( - cudf.DataFrame( - {"point_indices": point_indices, "input_size": True} - ) - .groupby("point_indices") - .count() - ) - result = (group_counts["point_index"] > 0) & ( - group_counts["point_index"] < point_counts["input_size"] - ) - return result - - -class IntersectsBinpred(ContainsProperlyBinpred): - def preprocess(self, lhs, rhs): - if contains_only_polygons(rhs): - (self.lhs, self.rhs) = (rhs, lhs) - return super().preprocess(rhs, lhs) - - def postprocess(self, point_indices, point_result): - # Same as contains_properly, but we need to check that the - # dimensions are the same. - contains_result = super().postprocess(point_indices, point_result) - return contains_result - - -class WithinBinpred(ContainsProperlyBinpred): - def preprocess(self, lhs, rhs): - if contains_only_polygons(rhs): - (self.lhs, self.rhs) = (rhs, lhs) - return super().preprocess(rhs, lhs) - - def postprocess(self, point_indices, point_result): - """Postprocess the output GeoSeries to ensure that they are of the - correct type for the predicate.""" - (hits, expected_count,) = self._count_results_in_multipoint_geometries( - point_indices, point_result - ) - result_df = hits.reset_index().merge( - expected_count.reset_index(), on="rhs_index" - ) - result_df["feature_in_polygon"] = ( - result_df["point_index_x"] >= result_df["point_index_y"] - ) - final_result = cudf.Series( - [False] * (point_indices.max().item() + 1) - ) # point_indices is zero index - final_result.loc[ - result_df["rhs_index"][result_df["feature_in_polygon"]] - ] = True - return final_result - - -class EqualsBinpred(BinaryPredicate): - def _offset_equals(self, lhs, rhs): - """Compute the pairwise length equality of two offset arrays""" - lhs_lengths = lhs[:-1] - lhs[1:] - rhs_lengths = rhs[:-1] - rhs[1:] - return lhs_lengths == rhs_lengths - - def _sort_interleaved_points_by_offset(self, coords, offsets, sort_order): - """Sort xy according to bins defined by offset. Sort order is a list - of column names to sort by. - - `_sort_interleaved_points_by_offset` creates a dataframe with the - following columns: - "sizes": an index for each object represented in `coords`. - "points": an index for each point in `coords`. - "xy_key": an index that maintains x/y ordering. - "xy": the x/y coordinates in `coords`. - - The dataframe is sorted according to keys passed in by the caller. - For sorting multipoints, the keys in order are "object_key", "xy", - "xy_key". This sorts the points in each multipoint into the same - bin defined by "object_key", then sorts the points in each bin by - x/y coordinates, and finally sorts the points in each bin by the - `xy_key` which maintains that the x coordinate precedes the y - coordinate. - - For sorting linestrings, the keys in order are "object_key", - "point_key", "xy_key". This sorts the points in each linestring - into the same bin defined by "object_key", then sorts the points - in each bin by point ordering, and finally sorts the points in - each bin by x/y ordering. - - Parameters - ---------- - coords : cudf.Series - interleaved x,y coordinates - offsets : cudf.Series - offsets into coords - sort_order : list - list of column names to sort by. One of "object_key", "point_key", - "xy_key", and "xy". - - Returns - ------- - cudf.Series - sorted interleaved x,y coordinates - """ - sizes = offsets[1:] - offsets[:-1] - object_key = ( - cudf.Series(cp.arange(len(sizes))) - .repeat(sizes * 2) - .reset_index(drop=True) - ) - point_key = cp.arange(len(coords) // 2).repeat(2)[::-1] - xy_key = cp.tile([0, 1], len(coords) // 2) - sorting_df = cudf.DataFrame( - { - "object_key": object_key, - "point_key": point_key, - "xy_key": xy_key, - "xy": coords, - } - ) - sorted_df = sorting_df.sort_values(by=sort_order).reset_index( - drop=True - ) - return sorted_df["xy"] - - def _sort_multipoint_series(self, coords, offsets): - """Sort xy according to bins defined by offset""" - result = self._sort_interleaved_points_by_offset( - coords, offsets, ["object_key", "xy", "xy_key"] - ) - result.name = None - return result - - def _sort_multipoints(self, lhs, rhs, initial): - lhs_sorted = self._sort_multipoint_series( - lhs.multipoints.xy, lhs.multipoints.geometry_offset - ) - rhs_sorted = self._sort_multipoint_series( - rhs.multipoints.xy, rhs.multipoints.geometry_offset - ) - lhs_result = cuspatial.core.geoseries.GeoSeries.from_multipoints_xy( - lhs_sorted, lhs.multipoints.geometry_offset - ) - rhs_result = cuspatial.core.geoseries.GeoSeries.from_multipoints_xy( - rhs_sorted, rhs.multipoints.geometry_offset - ) - lhs_result.index = lhs.index - rhs_result.index = rhs.index - return ( - lhs_result, - rhs_result, - initial, - ) - - def _reverse_linestrings(self, coords, offsets): - """Reverse the order of coordinates in a Arrow buffer of coordinates - and offsets.""" - result = self._sort_interleaved_points_by_offset( - coords, offsets, ["object_key", "point_key", "xy_key"] - ) - result.name = None - return result - - def _compare_linestrings_and_reversed(self, lhs, rhs, initial): - """Compare linestrings with their reversed counterparts.""" - lhs_xy = self._reverse_linestrings(lhs.xy, lhs.part_offset) - rhs_xy = self._reverse_linestrings(rhs.xy, rhs.part_offset) - return ( - PreprocessorOutput(lhs_xy, lhs.point_indices()), - PreprocessorOutput(rhs_xy, rhs.point_indices()), - initial, - ) - - def preprocess(self, lhs, rhs): - # Compare types - type_compare = lhs.feature_types == rhs.feature_types - # Any unmatched type is not equal - if (type_compare == False).all(): # noqa: E712 - # Override _op so that it will not be run. - return self._cancel_op(lhs, rhs, type_compare) - # Get indices of matching types - if contains_only_multipoints(lhs): - lengths_equal = self._offset_equals( - lhs.multipoints.geometry_offset, - rhs.multipoints.geometry_offset, - ) - if lengths_equal.any(): - # Multipoints are equal if they contains the - # same unordered points. - return self._sort_multipoints( - lhs[lengths_equal], - rhs[lengths_equal], - lengths_equal, - ) - else: - # No lengths are equal, so none can be equal. - return self._cancel_op(lhs, rhs, lengths_equal) - elif contains_only_linestrings(lhs): - lengths_equal = self._offset_equals( - lhs.lines.part_offset, rhs.lines.part_offset - ) - if lengths_equal.any(): - return ( - lhs[lengths_equal], - rhs[lengths_equal], - lengths_equal, - ) - else: - return self._cancel_op(lhs, rhs, lengths_equal) - elif contains_only_polygons(lhs): - raise NotImplementedError - elif contains_only_points(lhs): - return (lhs, rhs, type_compare) - - def postprocess(self, lengths_equal, point_result): - # if point_result is not a Series, preprocessing terminated - # the results early. - if isinstance(point_result, cudf.Series): - point_result = point_result.sort_index() - lengths_equal[point_result.index] = point_result - return cudf.Series(lengths_equal) - - def _vertices_equals(self, lhs, rhs): - """Compute the equals relationship between interleaved xy - coordinate buffers.""" - length = min(len(lhs), len(rhs)) - a = lhs[:length:2]._column == rhs[:length:2]._column - b = rhs[1:length:2]._column == lhs[1:length:2]._column - return a & b - - def _op(self, lhs, rhs): - if contains_only_linestrings(lhs): - # Linestrings can be compared either forward or - # reversed. We need to compare both directions. - lhs_reversed = self._reverse_linestrings( - lhs.lines.xy, lhs.lines.part_offset - ) - forward_result = self._vertices_equals(lhs.lines.xy, rhs.lines.xy) - reverse_result = self._vertices_equals(lhs_reversed, rhs.lines.xy) - result = forward_result | reverse_result - elif contains_only_multipoints(lhs): - result = self._vertices_equals( - lhs.multipoints.xy, rhs.multipoints.xy - ) - elif contains_only_points(lhs): - result = self._vertices_equals(lhs.points.xy, rhs.points.xy) - elif contains_only_polygons(lhs): - raise NotImplementedError - indices = lhs.point_indices - result_df = cudf.DataFrame( - {"idx": indices[: len(result)], "equals": result} - ) - gb_idx = result_df.groupby("idx") - result = (gb_idx.sum().sort_index() == gb_idx.count().sort_index())[ - "equals" - ] - result.index = lhs.index - result.index.name = None - result.name = None - return result - - -class CrossesBinpred(EqualsBinpred): - """An object is said to cross other if its interior intersects the - interior of the other but does not contain it, and the dimension of - the intersection is less than the dimension of the one or the other. - - This is only implemented for `points.crosses(points)` at this time. - """ - - def postprocess(self, point_indices, point_result): - if has_same_geometry(self.lhs, self.rhs) and contains_only_points( - self.lhs - ): - return cudf.Series([False] * len(self.lhs)) - df_result = cudf.DataFrame({"idx": point_indices, "pip": point_result}) - point_result = cudf.Series( - df_result["pip"], index=cudf.RangeIndex(0, len(df_result)) - ) - point_result.name = None - return point_result - - -class CoversBinpred(EqualsBinpred): - """An object A covers another B if no points of B lie in the exterior - of A, and at least one point of the interior of B lies in the interior - of A. - - This is only implemented for `points.covers(points)` at this time.""" - - def postprocess(self, point_indices, point_result): - return cudf.Series(point_result, index=point_indices) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py new file mode 100644 index 000000000..28e783c7d --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py @@ -0,0 +1,367 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from typing import Generic, TypeVar, Union + +import cupy as cp + +import cudf +from cudf.core.dataframe import DataFrame +from cudf.core.series import Series + +from cuspatial.core._column.geocolumn import GeoColumn +from cuspatial.core.binpreds.binpred_interface import ( + BinPred, + BinPredConfig, + ContainsOpResult, + NotImplementedPredicate, + PreprocessorResult, +) +from cuspatial.core.binpreds.contains import contains_properly +from cuspatial.core.binpreds.feature_equals import ( + DispatchDict as EQUALS_DISPATCH_DICT, +) +from cuspatial.utils.binpred_utils import ( + LineString, + MultiPoint, + Point, + Polygon, + _count_results_in_multipoint_geometries, + _false_series, +) +from cuspatial.utils.column_utils import ( + contains_only_linestrings, + contains_only_multipoints, + contains_only_polygons, + has_multipolygons, +) + +GeoSeries = TypeVar("GeoSeries") + + +class ContainsPredicateBase(BinPred, Generic[GeoSeries]): + """Base class for binary predicates that are defined in terms of a + `contains` basic predicate. This class implements the logic that underlies + `polygon.contains` primarily, and is implemented for many cases. + + Subclasses are selected using the `DispatchDict` located at the end + of this file. + """ + + def __init__(self, **kwargs): + """`ContainsPredicateBase` constructor. + + Parameters + ---------- + allpairs: bool + Whether to compute all pairs of features in the left-hand and + right-hand GeoSeries. If False, the feature will be compared in a + 1:1 fashion with the corresponding feature in the other GeoSeries. + """ + self.config = BinPredConfig(**kwargs) + self.config.allpairs = kwargs.get("allpairs", False) + + def _preprocess(self, lhs, rhs): + """Flatten any rhs into only its points xy array. This is necessary + because the basic predicate for contains, point-in-polygon, + only accepts points. + + Parameters + ---------- + lhs : GeoSeries + The left-hand GeoSeries. + rhs : GeoSeries + The right-hand GeoSeries. + + Returns + ------- + result : GeoSeries + A GeoSeries of boolean values indicating whether each feature in + the right-hand GeoSeries satisfies the requirements of the point- + in-polygon basic predicate with its corresponding feature in the + left-hand GeoSeries. + """ + # RHS conditioning: + point_indices = None + # point in polygon + if contains_only_linestrings(rhs): + # condition for linestrings + geom = rhs.lines + elif contains_only_polygons(rhs) is True: + # polygon in polygon + geom = rhs.polygons + elif contains_only_multipoints(rhs) is True: + # mpoint in polygon + geom = rhs.multipoints + else: + # no conditioning is required + geom = rhs.points + xy_points = geom.xy + + # Arrange into shape for calling point-in-polygon, intersection, or + # equals + point_indices = geom.point_indices() + from cuspatial.core.geoseries import GeoSeries + + final_rhs = GeoSeries(GeoColumn._from_points_xy(xy_points._column)) + preprocess_result = PreprocessorResult( + lhs, rhs, final_rhs, point_indices + ) + return self._compute_predicate(lhs, rhs, preprocess_result) + + def _should_use_quadtree(self, lhs): + """Determine if the quadtree should be used for the binary predicate. + + Returns + ------- + bool + True if the quadtree should be used, False otherwise. + + Notes + ----- + 1. Quadtree is always used if user requests `allpairs=True`. + 2. If the number of polygons in the lhs is less than 32, we use the + byte-limited algorithm because it is faster and has less memory + overhead. + 3. If the lhs contains more than 32 polygons, we use the quadtree + because it does not have a polygon-count limit. + 4. If the lhs contains multipolygons, we use quadtree because the + performance between quadtree and byte-limited is similar, but + code complexity would be higher if we did multipolygon + reconstruction on both code paths. + """ + return len(lhs) >= 32 or has_multipolygons(lhs) or self.config.allpairs + + def _compute_predicate( + self, + lhs: "GeoSeries", + rhs: "GeoSeries", + preprocessor_result: PreprocessorResult, + ): + """Compute the contains_properly relationship between two GeoSeries. + A feature A contains another feature B if no points of B lie in the + exterior of A, and at least one point of the interior of B lies in the + interior of A. This is the inverse of `within`.""" + if not contains_only_polygons(lhs): + raise TypeError( + "`.contains` can only be called with polygon series." + ) + points = preprocessor_result.final_rhs + point_indices = preprocessor_result.point_indices + if self._should_use_quadtree(lhs): + result = contains_properly(lhs, points, how="quadtree") + else: + result = contains_properly(lhs, points, how="byte-limited") + op_result = ContainsOpResult(result, points, point_indices) + return self._postprocess(lhs, rhs, op_result) + + def _convert_quadtree_result_from_part_to_polygon_indices( + self, lhs, point_result + ): + """Convert the result of a quadtree contains_properly call from + part indices to polygon indices. + + Parameters + ---------- + point_result : cudf.Series + The result of a quadtree contains_properly call. This result + contains the `part_index` of the polygon that contains the + point, not the polygon index. + + Returns + ------- + cudf.Series + The result of a quadtree contains_properly call. This result + contains the `polygon_index` of the polygon that contains the + point, not the part index. + """ + # Get the length of each part, map it to indices, and store + # the result in a dataframe. + rings_to_parts = cp.array(lhs.polygons.part_offset) + part_sizes = rings_to_parts[1:] - rings_to_parts[:-1] + parts_map = cudf.Series( + cp.arange(len(part_sizes)), name="part_index" + ).repeat(part_sizes) + parts_index_mapping_df = parts_map.reset_index(drop=True).reset_index() + # Map the length of each polygon in a similar fashion, then + # join them below. + parts_to_geoms = cp.array(lhs.polygons.geometry_offset) + geometry_sizes = parts_to_geoms[1:] - parts_to_geoms[:-1] + geometry_map = cudf.Series( + cp.arange(len(geometry_sizes)), name="polygon_index" + ).repeat(geometry_sizes) + geom_index_mapping_df = geometry_map.reset_index(drop=True) + geom_index_mapping_df.index.name = "part_index" + geom_index_mapping_df = geom_index_mapping_df.reset_index() + # Replace the part index with the polygon index by join + part_result = parts_index_mapping_df.merge( + point_result, on="part_index" + ) + # Replace the polygon index with the row index by join + return geom_index_mapping_df.merge(part_result, on="part_index")[ + ["polygon_index", "point_index"] + ] + + def _reindex_allpairs(self, lhs, op_result) -> Union[Series, DataFrame]: + """Prepare the allpairs result of a contains_properly call as + the first step of postprocessing. + + Parameters + ---------- + lhs : GeoSeries + The left-hand side of the binary predicate. + op_result : ContainsOpResult + The result of the contains_properly call. + + Returns + ------- + cudf.DataFrame + + """ + # Convert the quadtree part indices df into a polygon indices df + polygon_indices = ( + self._convert_quadtree_result_from_part_to_polygon_indices( + lhs, op_result.result + ) + ) + # Because the quadtree contains_properly call returns a list of + # points that are contained in each part, parts can be duplicated + # once their index is converted to a polygon index. + allpairs_result = polygon_indices.drop_duplicates() + + # Replace the polygon index with the original index + allpairs_result["polygon_index"] = allpairs_result[ + "polygon_index" + ].replace(Series(lhs.index, index=cp.arange(len(lhs.index)))) + + return allpairs_result + + def _postprocess(self, lhs, rhs, op_result): + """Postprocess the output GeoSeries to ensure that they are of the + correct type for the predicate. + + Postprocess for contains_properly has to handle multiple input and + output configurations. + + The input can be a single polygon, a single multipolygon, or a + GeoSeries containing a mix of polygons and multipolygons. + + The input to postprocess is `point_indices`, which can be either a + cudf.DataFrame with one row per point and one column per polygon or + a cudf.DataFrame containing the point index and the part index for + each point in the polygon. + + Parameters + ---------- + lhs : GeoSeries + The left-hand side of the binary predicate. + rhs : GeoSeries + The right-hand side of the binary predicate. + preprocessor_output : ContainsOpResult + The result of the contains_properly call. + + Returns + ------- + cudf.Series or cudf.DataFrame + A Series of boolean values indicating whether each feature in + the rhs GeoSeries is contained in the lhs GeoSeries in the + case of allpairs=False. Otherwise, a DataFrame containing the + point index and the polygon index for each point in the + polygon. + """ + if len(op_result.result) == 0: + return _false_series(len(lhs)) + + # Convert the quadtree part indices df into a polygon indices df. + # Helps with handling multipolygons. + allpairs_result = self._reindex_allpairs(lhs, op_result) + + # If the user wants all pairs, return the result. Otherwise, + # return a boolean series indicating whether each point is + # contained in the corresponding polygon. + if self.config.allpairs: + return allpairs_result + else: + # for each input pair i: result[i] =  true iff point[i] is + # contained in at least one polygon of multipolygon[i]. + # pairwise + final_result = _false_series(len(rhs)) + if len(lhs) == len(rhs): + matches = ( + allpairs_result["polygon_index"] + == allpairs_result["point_index"] + ) + polygon_indexes = allpairs_result["polygon_index"][matches] + final_result.loc[ + op_result.point_indices[polygon_indexes] + ] = True + return final_result + else: + final_result.loc[allpairs_result["polygon_index"]] = True + return final_result + + +class PolygonComplexContains(ContainsPredicateBase): + """Base class for contains operations that use a complex object on + the right hand side. + + This class is shared by the Polygon*Contains classes that use + a non-points object on the right hand side: MultiPoint, LineString, + MultiLineString, Polygon, and MultiPolygon. + """ + + def _postprocess(self, lhs, rhs, preprocessor_output): + # for each input pair i: result[i] =  true iff point[i] is + # contained in at least one polygon of multipolygon[i]. + # pairwise + point_indices = preprocessor_output.point_indices + allpairs_result = self._reindex_allpairs(lhs, preprocessor_output) + if isinstance(allpairs_result, Series): + return allpairs_result + + (hits, expected_count,) = _count_results_in_multipoint_geometries( + point_indices, allpairs_result + ) + result_df = hits.reset_index().merge( + expected_count.reset_index(), on="rhs_index" + ) + result_df["feature_in_polygon"] = ( + result_df["point_index_x"] >= result_df["point_index_y"] + ) + final_result = _false_series(len(rhs)) + final_result.loc[ + result_df["rhs_index"][result_df["feature_in_polygon"]] + ] = True + return final_result + + +class PointPointContains(ContainsPredicateBase): + def _preprocess(self, lhs, rhs): + """PointPointContains that simply calls the equals predicate on the + points.""" + + predicate = EQUALS_DISPATCH_DICT[(lhs.column_type, rhs.column_type)]( + align=self.config.align + ) + return predicate(lhs, rhs) + + +"""DispatchDict listing the classes to use for each combination of + left and right hand side types. """ +DispatchDict = { + (Point, Point): PointPointContains, + (Point, MultiPoint): NotImplementedPredicate, + (Point, LineString): NotImplementedPredicate, + (Point, Polygon): NotImplementedPredicate, + (MultiPoint, Point): NotImplementedPredicate, + (MultiPoint, MultiPoint): NotImplementedPredicate, + (MultiPoint, LineString): NotImplementedPredicate, + (MultiPoint, Polygon): NotImplementedPredicate, + (LineString, Point): NotImplementedPredicate, + (LineString, MultiPoint): NotImplementedPredicate, + (LineString, LineString): NotImplementedPredicate, + (LineString, Polygon): NotImplementedPredicate, + (Polygon, Point): ContainsPredicateBase, + (Polygon, MultiPoint): PolygonComplexContains, + (Polygon, LineString): PolygonComplexContains, + (Polygon, Polygon): PolygonComplexContains, +} diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_covers.py b/python/cuspatial/cuspatial/core/binpreds/feature_covers.py new file mode 100644 index 000000000..02a0dcf3d --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/feature_covers.py @@ -0,0 +1,52 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from cuspatial.core.binpreds.binpred_interface import NotImplementedPredicate +from cuspatial.core.binpreds.feature_equals import EqualsPredicateBase +from cuspatial.utils.binpred_utils import ( + LineString, + MultiPoint, + Point, + Polygon, +) + + +class CoversPredicateBase(EqualsPredicateBase): + """Implements the covers predicate across different combinations of + geometry types. For example, a Point-Polygon covers predicate is + defined in terms of a Point-Point equals predicate. The initial release + implements covers predicates that depend only on the equals predicate, or + depend on no predicate, such as impossible cases like + `LineString.covers(Polygon)`. + + For this initial release, cover is supported for the following types: + + Point.covers(Point) + Point.covers(Polygon) + LineString.covers(Polygon) + Polygon.covers(Point) + Polygon.covers(MultiPoint) + Polygon.covers(LineString) + Polygon.covers(Polygon) + """ + + pass + + +DispatchDict = { + (Point, Point): CoversPredicateBase, + (Point, MultiPoint): NotImplementedPredicate, + (Point, LineString): NotImplementedPredicate, + (Point, Polygon): CoversPredicateBase, + (MultiPoint, Point): NotImplementedPredicate, + (MultiPoint, MultiPoint): NotImplementedPredicate, + (MultiPoint, LineString): NotImplementedPredicate, + (MultiPoint, Polygon): NotImplementedPredicate, + (LineString, Point): NotImplementedPredicate, + (LineString, MultiPoint): NotImplementedPredicate, + (LineString, LineString): NotImplementedPredicate, + (LineString, Polygon): CoversPredicateBase, + (Polygon, Point): CoversPredicateBase, + (Polygon, MultiPoint): CoversPredicateBase, + (Polygon, LineString): CoversPredicateBase, + (Polygon, Polygon): CoversPredicateBase, +} diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_crosses.py b/python/cuspatial/cuspatial/core/binpreds/feature_crosses.py new file mode 100644 index 000000000..0d3c0fe08 --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/feature_crosses.py @@ -0,0 +1,46 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from cuspatial.core.binpreds.binpred_interface import NotImplementedPredicate +from cuspatial.core.binpreds.feature_equals import EqualsPredicateBase +from cuspatial.utils.binpred_utils import ( + LineString, + MultiPoint, + Point, + Polygon, + _false_series, +) + + +class CrossesPredicateBase(EqualsPredicateBase): + """Base class for binary predicates that are defined in terms of a + the equals binary predicate. For example, a Point-Point Crosses + predicate is defined in terms of a Point-Point Equals predicate. + """ + + pass + + +class PointPointCrosses(CrossesPredicateBase): + def _preprocess(self, lhs, rhs): + """Points can't cross other points, so we return False.""" + return _false_series(len(lhs)) + + +DispatchDict = { + (Point, Point): PointPointCrosses, + (Point, MultiPoint): NotImplementedPredicate, + (Point, LineString): NotImplementedPredicate, + (Point, Polygon): CrossesPredicateBase, + (MultiPoint, Point): NotImplementedPredicate, + (MultiPoint, MultiPoint): NotImplementedPredicate, + (MultiPoint, LineString): NotImplementedPredicate, + (MultiPoint, Polygon): NotImplementedPredicate, + (LineString, Point): NotImplementedPredicate, + (LineString, MultiPoint): NotImplementedPredicate, + (LineString, LineString): NotImplementedPredicate, + (LineString, Polygon): NotImplementedPredicate, + (Polygon, Point): CrossesPredicateBase, + (Polygon, MultiPoint): CrossesPredicateBase, + (Polygon, LineString): CrossesPredicateBase, + (Polygon, Polygon): CrossesPredicateBase, +} diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_equals.py b/python/cuspatial/cuspatial/core/binpreds/feature_equals.py new file mode 100644 index 000000000..8b1480b97 --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/feature_equals.py @@ -0,0 +1,341 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from __future__ import annotations + +from typing import Generic, TypeVar + +import cupy as cp + +import cudf +from cudf import Series + +import cuspatial +from cuspatial.core.binpreds.binpred_interface import ( + BinPred, + EqualsOpResult, + NotImplementedPredicate, + PreprocessorResult, +) +from cuspatial.utils.binpred_utils import ( + LineString, + MultiPoint, + Point, + Polygon, + _false_series, +) + +GeoSeries = TypeVar("GeoSeries") + + +class EqualsPredicateBase(BinPred, Generic[GeoSeries]): + """Base class for binary predicates that are defined in terms of the equals + basic predicate. `EqualsPredicateBase` implements utility functions that + are used within many equals-related binary predicates. + """ + + def _offset_equals(self, lhs, rhs): + """Compute the pairwise length equality of two offset arrays. Consider + the following example: + + lhs = [0, 3, 5, 7] + rhs = [0, 2, 4, 6] + + _offset_equals(lhs, rhs) returns [False, True, True, True]. The first + element is False because the first object in lhs has 3 points, while + the first object in rhs has 2 points. The remaining elements are True + because the remaining objects in lhs and rhs have the same number of + points. + + Parameters + ---------- + lhs : cudf.Series + left-hand-side offset array + rhs : cudf.Series + right-hand-side offset array + + Returns + ------- + cudf.Series + pairwise length equality + """ + lhs_lengths = lhs[:-1] - lhs[1:] + rhs_lengths = rhs[:-1] - rhs[1:] + return lhs_lengths == rhs_lengths + + def _sort_interleaved_points_by_offset(self, coords, offsets, sort_order): + """Sort xy according to bins defined by offset. Sort order is a list + of column names to sort by. + + `_sort_interleaved_points_by_offset` creates a dataframe with the + following columns: + "sizes": an index for each object represented in `coords`. + "points": an index for each point in `coords`. + "xy_key": an index that maintains x/y ordering. + "xy": the x/y coordinates in `coords`. + + The dataframe is sorted according to keys passed in by the caller. + For sorting multipoints, the keys in order are "object_key", "xy", + "xy_key". This sorts the points in each multipoint into the same + bin defined by "object_key", then sorts the points in each bin by + x/y coordinates, and finally sorts the points in each bin by the + `xy_key` which maintains that the x coordinate precedes the y + coordinate. + + For sorting linestrings, the keys in order are "object_key", + "point_key", "xy_key". This sorts the points in each linestring + into the same bin defined by "object_key", then sorts the points + in each bin by point ordering, and finally sorts the points in + each bin by x/y ordering. + + Parameters + ---------- + coords : cudf.Series + interleaved x,y coordinates + offsets : cudf.Series + offsets into coords + sort_order : list + list of column names to sort by. One of "object_key", "point_key", + "xy_key", and "xy". + + Returns + ------- + cudf.Series + sorted interleaved x,y coordinates + """ + sizes = offsets[1:] - offsets[:-1] + object_key = ( + cudf.Series(cp.arange(len(sizes))) + .repeat(sizes * 2) + .reset_index(drop=True) + ) + point_key = cp.arange(len(coords) // 2).repeat(2)[::-1] + xy_key = cp.tile([0, 1], len(coords) // 2) + sorting_df = cudf.DataFrame( + { + "object_key": object_key, + "point_key": point_key, + "xy_key": xy_key, + "xy": coords, + } + ) + sorted_df = sorting_df.sort_values(by=sort_order).reset_index( + drop=True + ) + return sorted_df["xy"] + + def _sort_multipoint_series(self, coords, offsets): + """Sort xy according to bins defined by offset. Consider an xy buffer + of 20 values and an offset buffer [0, 5]. This means that the first + multipoint has 5 points and the second multipoint has 5 points. The + first multipoint is sorted by x/y coordinates and the second + multipoint is sorted by x/y coordinates. The resultant sorted values + are stored in the same offset region, or bin, as the original + unsorted values. + + Parameters + ---------- + coords : cudf.Series + interleaved x,y coordinates + offsets : cudf.Series + offsets into coords + + Returns + ------- + cudf.Series + Coordinates sorted according to the bins defined by offsets. + """ + result = self._sort_interleaved_points_by_offset( + coords, offsets, ["object_key", "xy", "xy_key"] + ) + result.name = None + return result + + def _sort_multipoints(self, lhs, rhs): + """Sort the coordinates of the multipoints in the left-hand and + right-hand GeoSeries. + + Parameters + ---------- + lhs : GeoSeries + The left-hand GeoSeries. + rhs : GeoSeries + The right-hand GeoSeries. + + Returns + ------- + lhs_result : Tuple + A tuple containing the sorted left-hand GeoSeries and the + sorted right-hand GeoSeries. + """ + lhs_sorted = self._sort_multipoint_series( + lhs.multipoints.xy, lhs.multipoints.geometry_offset + ) + rhs_sorted = self._sort_multipoint_series( + rhs.multipoints.xy, rhs.multipoints.geometry_offset + ) + lhs_result = cuspatial.core.geoseries.GeoSeries.from_multipoints_xy( + lhs_sorted, lhs.multipoints.geometry_offset + ) + rhs_result = cuspatial.core.geoseries.GeoSeries.from_multipoints_xy( + rhs_sorted, rhs.multipoints.geometry_offset + ) + lhs_result.index = lhs.index + rhs_result.index = rhs.index + return ( + lhs_result, + rhs_result, + ) + + def _reverse_linestrings(self, coords, offsets): + """Reverse the order of coordinates in a Arrow buffer of coordinates + and offsets.""" + result = self._sort_interleaved_points_by_offset( + coords, offsets, ["object_key", "point_key", "xy_key"] + ) + result.name = None + return result + + def _preprocess(self, lhs: "GeoSeries", rhs: "GeoSeries"): + """Convert the input geometry types into buffers of points that can + then be compared with the equals basic predicate. + + The equals basic predicate is simply the value-equality operation + on the coordinates of the points in the geometries. This means that + we can convert any geometry type into a buffer of points and then + compare the buffers with the equals basic predicate. + + Parameters + ---------- + lhs : GeoSeries + The left-hand GeoSeries. + rhs : GeoSeries + The right-hand GeoSeries. + + Returns + ------- + result : GeoSeries + A GeoSeries of boolean values indicating whether each feature in + the right-hand GeoSeries satisfies the requirements of a binary + predicate with its corresponding feature in the left-hand + GeoSeries. + """ + # Any unmatched type is not equal + if (lhs.feature_types != rhs.feature_types).any(): + return _false_series(len(lhs)) + return self._compute_predicate( + lhs, rhs, PreprocessorResult(None, rhs.point_indices) + ) + + def _vertices_equals(self, lhs: Series, rhs: Series): + """Compute the equals relationship between interleaved xy + coordinate buffers.""" + if not isinstance(lhs, Series): + raise TypeError("lhs must be a cudf.Series") + if not isinstance(rhs, Series): + raise TypeError("rhs must be a cudf.Series") + length = min(len(lhs), len(rhs)) + a = lhs[:length:2]._column == rhs[:length:2]._column + b = rhs[1:length:2]._column == lhs[1:length:2]._column + return a & b + + def _compute_predicate(self, lhs, rhs, preprocessor_result): + """Perform the binary predicate operation on the input GeoSeries. + The lhs and rhs are `GeoSeries` of points, and the point_indices + are the indices of the points in the rhs GeoSeries that correspond + to each feature in the rhs GeoSeries. + """ + result = self._vertices_equals(lhs.points.xy, rhs.points.xy) + return self._postprocess( + lhs, rhs, EqualsOpResult(result, preprocessor_result.point_indices) + ) + + def _postprocess(self, lhs, rhs, op_result): + """Postprocess the output GeoSeries to combine the resulting + comparisons into a single boolean value for each feature in the + rhs GeoSeries. + """ + return cudf.Series(op_result.result, dtype="bool") + + +class PolygonComplexEquals(EqualsPredicateBase): + def _postprocess(self, lhs, rhs, op_result): + """Postprocess the output GeoSeries to combine the resulting + comparisons into a single boolean value for each feature in the + rhs GeoSeries. + """ + if len(op_result.result) == 0: + return _false_series(len(lhs)) + result_df = cudf.DataFrame( + {"idx": op_result.point_indices, "equals": op_result.result} + ) + gb_idx = result_df.groupby("idx") + feature_equals_linestring = ( + gb_idx.sum().sort_index() == gb_idx.count().sort_index() + )["equals"] + result = _false_series(len(lhs)) + result[ + feature_equals_linestring.index + ] = feature_equals_linestring.values + return result + + +class MultiPointMultiPointEquals(PolygonComplexEquals): + def _preprocess(self, lhs, rhs): + """Sort the multipoints by their coordinates. This is necessary + because the order of the points in a multipoint is not significant + for the equals predicate.""" + (lhs_result, rhs_result) = self._sort_multipoints(lhs, rhs) + return self._compute_predicate( + lhs_result, rhs_result, rhs_result.point_indices + ) + + def _compute_predicate(self, lhs, rhs, point_indices): + result = self._vertices_equals(lhs.multipoints.xy, rhs.multipoints.xy) + return self._postprocess( + lhs, rhs, EqualsOpResult(result, point_indices) + ) + + +class LineStringLineStringEquals(PolygonComplexEquals): + def _compute_predicate(self, lhs, rhs, preprocessor_result): + """Linestrings can be compared either forward or reversed. We need + to compare both directions.""" + lengths_equal = self._offset_equals( + lhs.lines.part_offset, rhs.lines.part_offset + ) + lhs_lengths_equal = lhs[lengths_equal] + rhs_lengths_equal = rhs[lengths_equal] + lhs_reversed = self._reverse_linestrings( + lhs_lengths_equal.lines.xy, lhs_lengths_equal.lines.part_offset + ) + forward_result = self._vertices_equals( + lhs_lengths_equal.lines.xy, rhs_lengths_equal.lines.xy + ) + reverse_result = self._vertices_equals( + lhs_reversed, rhs_lengths_equal.lines.xy + ) + result = forward_result | reverse_result + return self._postprocess( + lhs, rhs, EqualsOpResult(result, lhs_lengths_equal.point_indices) + ) + + +"""DispatchDict for Equals operations.""" +DispatchDict = { + (Point, Point): EqualsPredicateBase, + (Point, MultiPoint): NotImplementedPredicate, + (Point, LineString): NotImplementedPredicate, + (Point, Polygon): EqualsPredicateBase, + (MultiPoint, Point): NotImplementedPredicate, + (MultiPoint, MultiPoint): MultiPointMultiPointEquals, + (MultiPoint, LineString): NotImplementedPredicate, + (MultiPoint, Polygon): NotImplementedPredicate, + (LineString, Point): NotImplementedPredicate, + (LineString, MultiPoint): NotImplementedPredicate, + (LineString, LineString): LineStringLineStringEquals, + (LineString, Polygon): EqualsPredicateBase, + (Polygon, Point): EqualsPredicateBase, + (Polygon, MultiPoint): EqualsPredicateBase, + (Polygon, LineString): EqualsPredicateBase, + (Polygon, Polygon): EqualsPredicateBase, +} diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py new file mode 100644 index 000000000..ca339b1dd --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py @@ -0,0 +1,54 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from cuspatial.core.binpreds.binpred_interface import NotImplementedPredicate +from cuspatial.core.binpreds.feature_contains import ContainsPredicateBase +from cuspatial.core.binpreds.feature_equals import EqualsPredicateBase +from cuspatial.utils.binpred_utils import ( + LineString, + MultiPoint, + Point, + Polygon, +) + + +class IntersectsPredicateBase(EqualsPredicateBase): + """Base class for binary predicates that are defined in terms of + the intersects basic predicate. These predicates are defined in terms + of the equals basic predicate. The type dispatches here that depend + on `IntersectsPredicateBase` use the `PredicateEquals` class for their + complete implementation, unmodified. + + point.intersects(polygon) is equivalent to polygon.contains(point) + with the left and right hand sides reversed. + """ + + pass + + +class PointPolygonIntersects(ContainsPredicateBase): + def _preprocess(self, lhs, rhs): + """Swap LHS and RHS and call the normal contains processing.""" + self.lhs = rhs + self.rhs = lhs + return super()._preprocess(rhs, lhs) + + +""" Type dispatch dictionary for intersects binary predicates. """ +DispatchDict = { + (Point, Point): IntersectsPredicateBase, + (Point, MultiPoint): NotImplementedPredicate, + (Point, LineString): NotImplementedPredicate, + (Point, Polygon): PointPolygonIntersects, + (MultiPoint, Point): NotImplementedPredicate, + (MultiPoint, MultiPoint): NotImplementedPredicate, + (MultiPoint, LineString): NotImplementedPredicate, + (MultiPoint, Polygon): NotImplementedPredicate, + (LineString, Point): NotImplementedPredicate, + (LineString, MultiPoint): NotImplementedPredicate, + (LineString, LineString): NotImplementedPredicate, + (LineString, Polygon): NotImplementedPredicate, + (Polygon, Point): IntersectsPredicateBase, + (Polygon, MultiPoint): IntersectsPredicateBase, + (Polygon, LineString): IntersectsPredicateBase, + (Polygon, Polygon): IntersectsPredicateBase, +} diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_overlaps.py b/python/cuspatial/cuspatial/core/binpreds/feature_overlaps.py new file mode 100644 index 000000000..6956c9cbf --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/feature_overlaps.py @@ -0,0 +1,75 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import cudf + +from cuspatial.core.binpreds.binpred_interface import NotImplementedPredicate +from cuspatial.core.binpreds.feature_contains import ContainsPredicateBase +from cuspatial.core.binpreds.feature_equals import EqualsPredicateBase +from cuspatial.utils.binpred_utils import ( + LineString, + MultiPoint, + Point, + Polygon, + _false_series, +) +from cuspatial.utils.column_utils import has_same_geometry + + +class OverlapsPredicateBase(EqualsPredicateBase): + """Base class for overlaps binary predicate. Depends on the + equals predicate for all implementations up to this point. + For example, a Point-Point Crosses predicate is defined in terms + of a Point-Point Equals predicate. + """ + + pass + + +class PointPointOverlaps(OverlapsPredicateBase): + def _preprocess(self, lhs, rhs): + """Points can't overlap other points, so we return False.""" + return _false_series(len(lhs)) + + +class PolygonPointOverlaps(ContainsPredicateBase): + def _postprocess(self, lhs, rhs, op_result): + if not has_same_geometry(lhs, rhs) or len(op_result.point_result) == 0: + return _false_series(len(lhs)) + polygon_indices = ( + self._convert_quadtree_result_from_part_to_polygon_indices( + op_result.point_result + ) + ) + group_counts = polygon_indices.groupby("polygon_index").count() + point_counts = ( + cudf.DataFrame( + {"point_indices": op_result.point_indices, "input_size": True} + ) + .groupby("point_indices") + .count() + ) + result = (group_counts["point_index"] > 0) & ( + group_counts["point_index"] < point_counts["input_size"] + ) + return result + + +"""Dispatch table for overlaps binary predicate.""" +DispatchDict = { + (Point, Point): PointPointOverlaps, + (Point, MultiPoint): NotImplementedPredicate, + (Point, LineString): NotImplementedPredicate, + (Point, Polygon): OverlapsPredicateBase, + (MultiPoint, Point): NotImplementedPredicate, + (MultiPoint, MultiPoint): NotImplementedPredicate, + (MultiPoint, LineString): NotImplementedPredicate, + (MultiPoint, Polygon): NotImplementedPredicate, + (LineString, Point): NotImplementedPredicate, + (LineString, MultiPoint): NotImplementedPredicate, + (LineString, LineString): NotImplementedPredicate, + (LineString, Polygon): NotImplementedPredicate, + (Polygon, Point): OverlapsPredicateBase, + (Polygon, MultiPoint): OverlapsPredicateBase, + (Polygon, LineString): OverlapsPredicateBase, + (Polygon, Polygon): OverlapsPredicateBase, +} diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_within.py b/python/cuspatial/cuspatial/core/binpreds/feature_within.py new file mode 100644 index 000000000..e438e5cea --- /dev/null +++ b/python/cuspatial/cuspatial/core/binpreds/feature_within.py @@ -0,0 +1,82 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import cudf + +from cuspatial.core.binpreds.binpred_interface import NotImplementedPredicate +from cuspatial.core.binpreds.feature_contains import ContainsPredicateBase +from cuspatial.core.binpreds.feature_equals import EqualsPredicateBase +from cuspatial.utils import binpred_utils +from cuspatial.utils.binpred_utils import ( + LineString, + MultiPoint, + Point, + Polygon, + _false_series, +) + + +class RootWithin(EqualsPredicateBase): + """Base class for binary predicates that are defined in terms of a + root-level binary predicate. For example, a Point-Point Within + predicate is defined in terms of a Point-Point Contains predicate. + """ + + pass + + +class PointPointWithin(RootWithin): + def _postprocess(self, lhs, rhs, op_result): + return cudf.Series(op_result.result) + + +class PointPolygonWithin(ContainsPredicateBase): + def _preprocess(self, lhs, rhs): + # Note the order of arguments is reversed. + return super()._preprocess(rhs, lhs) + + +class ComplexPolygonWithin(ContainsPredicateBase): + def _preprocess(self, lhs, rhs): + # Note the order of arguments is reversed. + return super()._preprocess(rhs, lhs) + + def _postprocess(self, lhs, rhs, op_result): + """Postprocess the output GeoSeries to ensure that they are of the + correct type for the predicate.""" + ( + hits, + expected_count, + ) = binpred_utils._count_results_in_multipoint_geometries( + op_result.point_indices, op_result.result + ) + result_df = hits.reset_index().merge( + expected_count.reset_index(), on="rhs_index" + ) + result_df["feature_in_polygon"] = ( + result_df["point_index_x"] >= result_df["point_index_y"] + ) + final_result = _false_series(len(lhs)) + final_result.loc[ + result_df["rhs_index"][result_df["feature_in_polygon"]] + ] = True + return final_result + + +DispatchDict = { + (Point, Point): PointPointWithin, + (Point, MultiPoint): NotImplementedPredicate, + (Point, LineString): NotImplementedPredicate, + (Point, Polygon): PointPolygonWithin, + (MultiPoint, Point): NotImplementedPredicate, + (MultiPoint, MultiPoint): NotImplementedPredicate, + (MultiPoint, LineString): NotImplementedPredicate, + (MultiPoint, Polygon): ComplexPolygonWithin, + (LineString, Point): NotImplementedPredicate, + (LineString, MultiPoint): NotImplementedPredicate, + (LineString, LineString): NotImplementedPredicate, + (LineString, Polygon): ComplexPolygonWithin, + (Polygon, Point): RootWithin, + (Polygon, MultiPoint): RootWithin, + (Polygon, LineString): RootWithin, + (Polygon, Polygon): ComplexPolygonWithin, +} diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index 41d5f7eb8..944a12486 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -24,16 +24,28 @@ from cudf.core.column.column import as_column import cuspatial.io.pygeoarrow as pygeoarrow -from cuspatial.core._column.geocolumn import GeoColumn +from cuspatial.core._column.geocolumn import ColumnType, GeoColumn from cuspatial.core._column.geometa import Feature_Enum, GeoMeta -from cuspatial.core.binpreds.binpreds import ( - ContainsProperlyBinpred, - CoversBinpred, - CrossesBinpred, - EqualsBinpred, - IntersectsBinpred, - OverlapsBinpred, - WithinBinpred, +from cuspatial.core.binpreds.feature_contains import ( + DispatchDict as CONTAINS_DISPATCH, +) +from cuspatial.core.binpreds.feature_covers import ( + DispatchDict as COVERS_DISPATCH, +) +from cuspatial.core.binpreds.feature_crosses import ( + DispatchDict as CROSSES_DISPATCH, +) +from cuspatial.core.binpreds.feature_equals import ( + DispatchDict as EQUALS_DISPATCH, +) +from cuspatial.core.binpreds.feature_intersects import ( + DispatchDict as INTERSECTS_DISPATCH, +) +from cuspatial.core.binpreds.feature_overlaps import ( + DispatchDict as OVERLAPS_DISPATCH, +) +from cuspatial.core.binpreds.feature_within import ( + DispatchDict as WITHIN_DISPATCH, ) from cuspatial.utils.column_utils import ( contains_only_linestrings, @@ -121,6 +133,23 @@ def type(self): ) return result + @property + def column_type(self): + """This is used to determine the type of the GeoColumn. + It is a value returning method that produces the same result as + the various `contains_only_*` methods, except as an Enum instead + of many booleans.""" + if contains_only_polygons(self): + return ColumnType.POLYGON + elif contains_only_linestrings(self): + return ColumnType.LINESTRING + elif contains_only_multipoints(self): + return ColumnType.MULTIPOINT + elif contains_only_points(self): + return ColumnType.POINT + else: + return ColumnType.MIXED + @property def point_indices(self): if contains_only_polygons(self): @@ -132,6 +161,8 @@ def point_indices(self): elif contains_only_points(self): return self.points.point_indices() else: + if len(self) == 0: + return cudf.Series([0], dtype="int32") raise TypeError( "GeoSeries must contain only Points, MultiPoints, Lines, or " "Polygons to return point indices." @@ -1017,9 +1048,10 @@ def contains_properly(self, other, align=False, allpairs=False): `point_indices` and `polygon_indices`, each of which is a `Series` of `dtype('int32')` in the case of `allpairs=True`. """ - if contains_only_points(self) and contains_only_points(self): - return EqualsBinpred(self, other, align)() - return ContainsProperlyBinpred(self, other, align, allpairs)() + predicate = CONTAINS_DISPATCH[(self.column_type, other.column_type)]( + align=align, allpairs=allpairs + ) + return predicate(self, other) def geom_equals(self, other, align=True): """Compute if a GeoSeries of features A is equal to a GeoSeries of @@ -1064,7 +1096,10 @@ def geom_equals(self, other, align=True): A Series of boolean values indicating whether each feature in A is equal to the corresponding feature in B. """ - return EqualsBinpred(self, other, align)() + predicate = EQUALS_DISPATCH[(self.column_type, other.column_type)]( + align=align + ) + return predicate(self, other) def covers(self, other, align=True): """Compute if a GeoSeries of features A covers a second GeoSeries of @@ -1084,7 +1119,10 @@ def covers(self, other, align=True): input GeoSeries covers the corresponding feature in the other GeoSeries. """ - return CoversBinpred(self, other, align)() + predicate = COVERS_DISPATCH[(self.column_type, other.column_type)]( + align=align + ) + return predicate(self, other) def intersects(self, other, align=True): """Returns a `Series` of `dtype('bool')` with value `True` for each @@ -1106,10 +1144,10 @@ def intersects(self, other, align=True): A Series of boolean values indicating whether the geometries of each row intersect. """ - if contains_only_points(self) and contains_only_points(other): - return cudf.Series(EqualsBinpred(self, other, align)()) - else: - return IntersectsBinpred(self, other, align)() + predicate = INTERSECTS_DISPATCH[(self.column_type, other.column_type)]( + align=align + ) + return predicate(self, other) def within(self, other, align=True): """Returns a `Series` of `dtype('bool')` with value `True` for each @@ -1132,10 +1170,10 @@ def within(self, other, align=True): A Series of boolean values indicating whether each feature falls within the corresponding polygon in the input. """ - if contains_only_points(self) and contains_only_points(other): - return cudf.Series(EqualsBinpred(self, other, align)()) - else: - return WithinBinpred(self, other, align)() + predicate = WITHIN_DISPATCH[(self.column_type, other.column_type)]( + align=align + ) + return predicate(self, other) def overlaps(self, other, align=True): """Returns True for all aligned geometries that overlap other, else @@ -1158,11 +1196,10 @@ def overlaps(self, other, align=True): result : cudf.Series A Series of boolean values indicating whether each geometry overlaps the corresponding geometry in the input.""" - # Overlaps has the same requirement as crosses. - if contains_only_points(self) and contains_only_points(other): - return cudf.Series([False] * len(self)) - else: - return OverlapsBinpred(self, other, align=align)() + predicate = OVERLAPS_DISPATCH[(self.column_type, other.column_type)]( + align=align + ) + return predicate(self, other) def crosses(self, other, align=True): """Returns True for all aligned geometries that cross other, else @@ -1185,7 +1222,7 @@ def crosses(self, other, align=True): result : cudf.Series A Series of boolean values indicating whether each geometry crosses the corresponding geometry in the input.""" - if contains_only_points(self) and contains_only_points(other): - return cudf.Series([False] * len(self)) - else: - return CrossesBinpred(self, other, align=align)() + predicate = CROSSES_DISPATCH[(self.column_type, other.column_type)]( + align=align + ) + return predicate(self, other) diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_internals.py b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_internals.py index 1c59e2882..7d18530ac 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_internals.py +++ b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_internals.py @@ -4,6 +4,7 @@ from shapely.geometry import LineString import cuspatial +from cuspatial.core.binpreds.binpred_dispatch import EQUALS_DISPATCH def test_internal_reversed_linestrings(): @@ -17,10 +18,10 @@ def test_internal_reversed_linestrings(): LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), ] ) - from cuspatial.core.binpreds.binpreds import EqualsBinpred - - bp = EqualsBinpred(linestring1, linestring2) - got = bp._reverse_linestrings( + predicate = EQUALS_DISPATCH[ + (linestring1.column_type, linestring2.column_type) + ]() + got = predicate._reverse_linestrings( linestring1.lines.xy, linestring1.lines.part_offset ).to_pandas() expected = linestring2.lines.xy.to_pandas() @@ -40,10 +41,10 @@ def test_internal_reversed_linestrings_pair(): LineString([(1, 0), (1, 1), (0, 0)]), ] ) - from cuspatial.core.binpreds.binpreds import EqualsBinpred - - bp = EqualsBinpred(linestring1, linestring2) - got = bp._reverse_linestrings( + predicate = EQUALS_DISPATCH[ + (linestring1.column_type, linestring2.column_type) + ]() + got = predicate._reverse_linestrings( linestring1.lines.xy, linestring1.lines.part_offset ).to_pandas() expected = linestring2.lines.xy.to_pandas() @@ -65,10 +66,10 @@ def test_internal_reversed_linestrings_triple(): LineString([(1, 1), (0, 0), (1, 0), (1, 1), (0, 0)]), ] ) - from cuspatial.core.binpreds.binpreds import EqualsBinpred - - bp = EqualsBinpred(linestring1, linestring2) - got = bp._reverse_linestrings( + predicate = EQUALS_DISPATCH[ + (linestring1.column_type, linestring2.column_type) + ]() + got = predicate._reverse_linestrings( linestring1.lines.xy, linestring1.lines.part_offset ).to_pandas() expected = linestring2.lines.xy.to_pandas() diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py b/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py index 84c4e0f25..3dfdbf2a8 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py +++ b/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py @@ -106,8 +106,8 @@ def test_3_linestrings_equals_3_linestrings_one_equal(lhs): def test_10_linestrings_geom_equals_10_linestrings(linestring_generator): - gpdlines1 = gpd.GeoSeries([*linestring_generator(11, 5)]) - gpdlines2 = gpd.GeoSeries([*linestring_generator(11, 5)]) + gpdlines1 = gpd.GeoSeries([*linestring_generator(10, 5)]) + gpdlines2 = gpd.GeoSeries([*linestring_generator(10, 5)]) lines1 = cuspatial.from_geopandas(gpdlines1) lines2 = cuspatial.from_geopandas(gpdlines2) got = lines1.geom_equals(lines2) @@ -732,50 +732,6 @@ def test_linestring_orders(): pd.testing.assert_series_equal(expected, got.to_pandas()) -def test_internal_reversed_linestrings(): - linestring1 = cuspatial.GeoSeries( - [ - LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), - ] - ) - linestring2 = cuspatial.GeoSeries( - [ - LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), - ] - ) - from cuspatial.core.binpreds.binpreds import EqualsBinpred - - bp = EqualsBinpred(linestring1, linestring2) - got = bp._reverse_linestrings( - linestring1.lines.xy, linestring1.lines.part_offset - ).to_pandas() - expected = linestring2.lines.xy.to_pandas() - pd.testing.assert_series_equal(got, expected) - - -def test_internal_reversed_linestrings_pair(): - linestring1 = cuspatial.GeoSeries( - [ - LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), - LineString([(0, 0), (1, 1), (1, 0)]), - ] - ) - linestring2 = cuspatial.GeoSeries( - [ - LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), - LineString([(1, 0), (1, 1), (0, 0)]), - ] - ) - from cuspatial.core.binpreds.binpreds import EqualsBinpred - - bp = EqualsBinpred(linestring1, linestring2) - got = bp._reverse_linestrings( - linestring1.lines.xy, linestring1.lines.part_offset - ).to_pandas() - expected = linestring2.lines.xy.to_pandas() - pd.testing.assert_series_equal(got, expected) - - def test_from_points_xy_large(): points = cuspatial.GeoSeries( cuspatial.core._column.geocolumn.GeoColumn._from_points_xy( diff --git a/python/cuspatial/cuspatial/utils/binpred_utils.py b/python/cuspatial/cuspatial/utils/binpred_utils.py new file mode 100644 index 000000000..8cbddbda2 --- /dev/null +++ b/python/cuspatial/cuspatial/utils/binpred_utils.py @@ -0,0 +1,56 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import cupy as cp + +import cudf + +from cuspatial.core._column.geocolumn import ColumnType + +"""Column-Type objects to use for simple syntax in the `DispatchDict` contained +in each `feature_.py` file. For example, instead of writing out +`ColumnType.POINT`, we can just write `Point`. +""" +Point = ColumnType.POINT +MultiPoint = ColumnType.MULTIPOINT +LineString = ColumnType.LINESTRING +Polygon = ColumnType.POLYGON + + +def _false_series(size): + """Return a Series of False values""" + return cudf.Series(cp.zeros(size, dtype=cp.bool_)) + + +def _count_results_in_multipoint_geometries(point_indices, point_result): + """Count the number of points in each multipoint geometry. + + Parameters + ---------- + point_indices : cudf.Series + The indices of the points in the original (rhs) GeoSeries. + point_result : cudf.DataFrame + The result of a contains_properly call. + + Returns + ------- + cudf.Series + The number of points that fell within a particular polygon id. + cudf.Series + The number of points in each multipoint geometry. + """ + point_indices_df = cudf.Series( + point_indices, + name="rhs_index", + index=cudf.RangeIndex(len(point_indices), name="point_index"), + ).reset_index() + with_rhs_indices = point_result.merge(point_indices_df, on="point_index") + points_grouped_by_original_polygon = with_rhs_indices[ + ["point_index", "rhs_index"] + ].drop_duplicates() + hits = ( + points_grouped_by_original_polygon.groupby("rhs_index") + .count() + .sort_index() + ) + expected_count = point_indices_df.groupby("rhs_index").count().sort_index() + return hits, expected_count