From f87c2d8c407dbe380fe949dc86723dcffacd45c1 Mon Sep 17 00:00:00 2001 From: Navneet Verma Date: Thu, 15 Dec 2022 13:41:27 -0800 Subject: [PATCH] Added Integration tests for geotile and geohash aggregations on geoshapes Signed-off-by: Navneet Verma --- CHANGELOG.md | 1 + modules/geo/build.gradle | 2 +- .../geo/GeoModulePluginIntegTestCase.java | 4 + .../AbstractBucketAggregationIntegTest.java | 75 +++++ .../aggregations/bucket/GeoHashGridIT.java | 309 +++++++++++------- .../aggregations/bucket/GeoTileGridIT.java | 249 ++++++++++++++ .../bucket/geogrid/cells/CellValues.java | 4 - .../aggregations/bucket/GeoTileUtils.java | 14 +- 8 files changed, 540 insertions(+), 118 deletions(-) create mode 100644 modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/AbstractBucketAggregationIntegTest.java create mode 100644 modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/GeoTileGridIT.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c7aa5b1667ee..446c4e2625709 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -94,6 +94,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Added - Prevent deletion of snapshots that are backing searchable snapshot indexes ([#5069](https://github.com/opensearch-project/OpenSearch/pull/5069)) - Add max_shard_size parameter for shrink API ([#5229](https://github.com/opensearch-project/OpenSearch/pull/5229)) +- Add GeoTile and GeoHash Grid aggregations on GeoShapes. ([#5589](https://github.com/opensearch-project/OpenSearch/pull/5589)) ### Dependencies - Bumps `bcpg-fips` from 1.0.5.1 to 1.0.7.1 diff --git a/modules/geo/build.gradle b/modules/geo/build.gradle index 6b00709f08bf9..7ab6f80b65ca2 100644 --- a/modules/geo/build.gradle +++ b/modules/geo/build.gradle @@ -31,7 +31,7 @@ apply plugin: 'opensearch.yaml-rest-test' apply plugin: 'opensearch.internal-cluster-test' opensearchplugin { - description 'Plugin for geospatial features in OpenSearch. Registering the geo_shape and aggregations GeoBounds on Geo_Shape and Geo_Point' + description 'Plugin for geospatial features in OpenSearch. Registering the geo_shape and aggregations on GeoShape and GeoPoint' classname 'org.opensearch.geo.GeoModulePlugin' } diff --git a/modules/geo/src/internalClusterTest/java/org/opensearch/geo/GeoModulePluginIntegTestCase.java b/modules/geo/src/internalClusterTest/java/org/opensearch/geo/GeoModulePluginIntegTestCase.java index 31ff2ef4689bd..b17f4804d4d50 100644 --- a/modules/geo/src/internalClusterTest/java/org/opensearch/geo/GeoModulePluginIntegTestCase.java +++ b/modules/geo/src/internalClusterTest/java/org/opensearch/geo/GeoModulePluginIntegTestCase.java @@ -8,6 +8,8 @@ package org.opensearch.geo; +import org.opensearch.geometry.utils.StandardValidator; +import org.opensearch.geometry.utils.WellKnownText; import org.opensearch.index.mapper.GeoShapeFieldMapper; import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; @@ -24,6 +26,8 @@ public abstract class GeoModulePluginIntegTestCase extends OpenSearchIntegTestCa protected static final double GEOHASH_TOLERANCE = 1E-5D; + protected static final WellKnownText WKT = new WellKnownText(true, new StandardValidator(true)); + /** * Returns a collection of plugins that should be loaded on each node for doing the integration tests. As this * geo plugin is not getting packaged in a zip, we need to load it before the tests run. diff --git a/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/AbstractBucketAggregationIntegTest.java b/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/AbstractBucketAggregationIntegTest.java new file mode 100644 index 0000000000000..b9d9c48aadf90 --- /dev/null +++ b/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/AbstractBucketAggregationIntegTest.java @@ -0,0 +1,75 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.geo.search.aggregations.bucket; + +import org.opensearch.Version; +import org.opensearch.action.index.IndexRequestBuilder; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.geo.GeoModulePluginIntegTestCase; +import org.opensearch.geometry.Geometry; +import org.opensearch.geometry.Rectangle; +import org.opensearch.test.VersionUtils; + +import java.util.List; + +import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; + +/** + * This is the base class for all the Bucket Aggregation related integration tests. Use this class to add common + * methods which can be used across different bucket aggregations. If there is any common code that can be used + * across other integration test too then this is not the class. Use {@link GeoModulePluginIntegTestCase} + */ +public abstract class AbstractBucketAggregationIntegTest extends GeoModulePluginIntegTestCase { + + protected static final int MAX_PRECISION_FOR_GEO_SHAPES_AGG_TESTING = 4; + + protected static final int NUM_DOCS = 100; + + protected static final String GEO_SHAPE_INDEX_NAME = "geoshape_index"; + + // We don't want to generate this BB at random as it may lead to generation of very large or very small BB. + // Hence, we are making the parameters of this BB static for simplicity. + protected static final Rectangle BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG = new Rectangle(-4.1, 20.9, 21.9, -3.1); + + protected static final String AGG_NAME = "geohashgrid"; + + protected static final String GEO_SHAPE_FIELD_NAME = "location_geo_shape"; + + protected static final String GEO_POINT_FIELD_NAME = "location"; + + protected static final String KEYWORD_FIELD_NAME = "city"; + + protected final Version version = VersionUtils.randomIndexCompatibleVersion(random()); + + @Override + protected boolean forbidPrivateIndexSettings() { + return false; + } + + protected IndexRequestBuilder indexGeoShape(final String index, final Geometry geometry) throws Exception { + XContentBuilder source = jsonBuilder().startObject(); + source = source.field(GEO_SHAPE_FIELD_NAME, WKT.toWKT(geometry)); + source = source.endObject(); + return client().prepareIndex(index).setSource(source); + } + + protected IndexRequestBuilder indexCity(final String index, final String name, final List latLon) throws Exception { + XContentBuilder source = jsonBuilder().startObject().field(KEYWORD_FIELD_NAME, name); + if (latLon != null) { + source = source.field(GEO_POINT_FIELD_NAME, latLon); + } + source = source.endObject(); + return client().prepareIndex(index).setSource(source); + } + + protected IndexRequestBuilder indexCity(final String index, final String name, final String latLon) throws Exception { + return indexCity(index, name, List.of(latLon)); + } + +} diff --git a/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/GeoHashGridIT.java b/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/GeoHashGridIT.java index 6ab7dd5254679..67d4f5299f0d3 100644 --- a/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/GeoHashGridIT.java +++ b/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/GeoHashGridIT.java @@ -34,24 +34,27 @@ import com.carrotsearch.hppc.ObjectIntHashMap; import com.carrotsearch.hppc.ObjectIntMap; import com.carrotsearch.hppc.cursors.ObjectIntCursor; -import org.opensearch.Version; import org.opensearch.action.index.IndexRequestBuilder; import org.opensearch.action.search.SearchResponse; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.geo.GeoBoundingBox; import org.opensearch.common.geo.GeoPoint; +import org.opensearch.common.geo.GeoShapeDocValue; import org.opensearch.common.settings.Settings; -import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.geo.GeoModulePluginIntegTestCase; import org.opensearch.geo.search.aggregations.bucket.geogrid.GeoGrid; +import org.opensearch.geo.search.aggregations.bucket.geogrid.GeoGridAggregationBuilder; +import org.opensearch.geo.search.aggregations.common.GeoBoundsHelper; import org.opensearch.geo.tests.common.AggregationBuilders; +import org.opensearch.geo.tests.common.RandomGeoGeometryGenerator; +import org.opensearch.geometry.Geometry; +import org.opensearch.geometry.Rectangle; +import org.opensearch.geometry.utils.Geohash; import org.opensearch.index.query.GeoBoundingBoxQueryBuilder; import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.bucket.filter.Filter; import org.opensearch.test.OpenSearchIntegTestCase; -import org.opensearch.test.VersionUtils; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Random; @@ -59,110 +62,39 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; -import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; import static org.opensearch.geometry.utils.Geohash.PRECISION; import static org.opensearch.geometry.utils.Geohash.stringEncode; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; @OpenSearchIntegTestCase.SuiteScopeTestCase -public class GeoHashGridIT extends GeoModulePluginIntegTestCase { +public class GeoHashGridIT extends AbstractBucketAggregationIntegTest { - @Override - protected boolean forbidPrivateIndexSettings() { - return false; - } + private static ObjectIntMap expectedDocCountsForGeohash; - private Version version = VersionUtils.randomIndexCompatibleVersion(random()); + private static ObjectIntMap multiValuedExpectedDocCountsForGeohash; - static ObjectIntMap expectedDocCountsForGeoHash = null; - static ObjectIntMap multiValuedExpectedDocCountsForGeoHash = null; - static int numDocs = 100; + private static ObjectIntMap expectedDocCountsForGeoshapeGeohash; - static String smallestGeoHash = null; - - private static IndexRequestBuilder indexCity(String index, String name, List latLon) throws Exception { - XContentBuilder source = jsonBuilder().startObject().field("city", name); - if (latLon != null) { - source = source.field("location", latLon); - } - source = source.endObject(); - return client().prepareIndex(index).setSource(source); - } - - private static IndexRequestBuilder indexCity(String index, String name, String latLon) throws Exception { - return indexCity(index, name, Arrays.asList(latLon)); - } + private static String SMALLEST_GEO_HASH = null; @Override public void setupSuiteScopeCluster() throws Exception { - createIndex("idx_unmapped"); - - Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build(); - - assertAcked(prepareCreate("idx").setSettings(settings).setMapping("location", "type=geo_point", "city", "type=keyword")); - - List cities = new ArrayList<>(); Random random = random(); - expectedDocCountsForGeoHash = new ObjectIntHashMap<>(numDocs * 2); - for (int i = 0; i < numDocs; i++) { - // generate random point - double lat = (180d * random.nextDouble()) - 90d; - double lng = (360d * random.nextDouble()) - 180d; - String randomGeoHash = stringEncode(lng, lat, PRECISION); - // Index at the highest resolution - cities.add(indexCity("idx", randomGeoHash, lat + ", " + lng)); - expectedDocCountsForGeoHash.put(randomGeoHash, expectedDocCountsForGeoHash.getOrDefault(randomGeoHash, 0) + 1); - // Update expected doc counts for all resolutions.. - for (int precision = PRECISION - 1; precision > 0; precision--) { - String hash = stringEncode(lng, lat, precision); - if ((smallestGeoHash == null) || (hash.length() < smallestGeoHash.length())) { - smallestGeoHash = hash; - } - expectedDocCountsForGeoHash.put(hash, expectedDocCountsForGeoHash.getOrDefault(hash, 0) + 1); - } - } - indexRandom(true, cities); - - assertAcked( - prepareCreate("multi_valued_idx").setSettings(settings).setMapping("location", "type=geo_point", "city", "type=keyword") - ); - - cities = new ArrayList<>(); - multiValuedExpectedDocCountsForGeoHash = new ObjectIntHashMap<>(numDocs * 2); - for (int i = 0; i < numDocs; i++) { - final int numPoints = random.nextInt(4); - List points = new ArrayList<>(); - Set geoHashes = new HashSet<>(); - for (int j = 0; j < numPoints; ++j) { - double lat = (180d * random.nextDouble()) - 90d; - double lng = (360d * random.nextDouble()) - 180d; - points.add(lat + "," + lng); - // Update expected doc counts for all resolutions.. - for (int precision = PRECISION; precision > 0; precision--) { - final String geoHash = stringEncode(lng, lat, precision); - geoHashes.add(geoHash); - } - } - cities.add(indexCity("multi_valued_idx", Integer.toString(i), points)); - for (String hash : geoHashes) { - multiValuedExpectedDocCountsForGeoHash.put(hash, multiValuedExpectedDocCountsForGeoHash.getOrDefault(hash, 0) + 1); - } - } - indexRandom(true, cities); - - ensureSearchable(); + prepareSingleValueGeoPointIndex(random); + prepareMultiValuedGeoPointIndex(random); + prepareGeoShapeIndex(random); } public void testSimple() throws Exception { for (int precision = 1; precision <= PRECISION; precision++) { SearchResponse response = client().prepareSearch("idx") - .addAggregation(AggregationBuilders.geohashGrid("geohashgrid").field("location").precision(precision)) + .addAggregation(AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).precision(precision)) .get(); assertSearchResponse(response); - GeoGrid geoGrid = response.getAggregations().get("geohashgrid"); + GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); List buckets = geoGrid.getBuckets(); Object[] propertiesKeys = (Object[]) ((InternalAggregation) geoGrid).getProperty("_key"); Object[] propertiesDocCounts = (Object[]) ((InternalAggregation) geoGrid).getProperty("_count"); @@ -171,7 +103,7 @@ public void testSimple() throws Exception { String geohash = cell.getKeyAsString(); long bucketCount = cell.getDocCount(); - int expectedBucketCount = expectedDocCountsForGeoHash.get(geohash); + int expectedBucketCount = expectedDocCountsForGeohash.get(geohash); assertNotSame(bucketCount, 0); assertEquals("Geohash " + geohash + " has wrong doc count ", expectedBucketCount, bucketCount); GeoPoint geoPoint = (GeoPoint) propertiesKeys[i]; @@ -181,20 +113,50 @@ public void testSimple() throws Exception { } } + public void testGeoShapes() { + final GeoBoundingBox boundingBox = new GeoBoundingBox( + new GeoPoint(BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG.getMaxLat(), BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG.getMinLon()), + new GeoPoint(BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG.getMinLat(), BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG.getMaxLon()) + ); + for (int precision = 1; precision <= MAX_PRECISION_FOR_GEO_SHAPES_AGG_TESTING; precision++) { + GeoGridAggregationBuilder builder = AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_SHAPE_FIELD_NAME).precision(precision); + // This makes sure that for only higher precision we are providing the GeoBounding Box. This also ensures + // that we are able to test both bounded and unbounded aggregations + if (precision > 2) { + builder.setGeoBoundingBox(boundingBox); + } + final SearchResponse response = client().prepareSearch(GEO_SHAPE_INDEX_NAME).addAggregation(builder).get(); + final GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); + final List buckets = geoGrid.getBuckets(); + final Object[] propertiesKeys = (Object[]) ((InternalAggregation) geoGrid).getProperty("_key"); + final Object[] propertiesDocCounts = (Object[]) ((InternalAggregation) geoGrid).getProperty("_count"); + for (int i = 0; i < buckets.size(); i++) { + final GeoGrid.Bucket cell = buckets.get(i); + final String geohash = cell.getKeyAsString(); + + final long bucketCount = cell.getDocCount(); + final int expectedBucketCount = expectedDocCountsForGeoshapeGeohash.get(geohash); + assertNotSame(bucketCount, 0); + assertEquals("Geohash " + geohash + " has wrong doc count ", expectedBucketCount, bucketCount); + final GeoPoint geoPoint = (GeoPoint) propertiesKeys[i]; + assertThat(stringEncode(geoPoint.lon(), geoPoint.lat(), precision), equalTo(geohash)); + assertThat((long) propertiesDocCounts[i], equalTo(bucketCount)); + } + } + } + public void testMultivalued() throws Exception { for (int precision = 1; precision <= PRECISION; precision++) { SearchResponse response = client().prepareSearch("multi_valued_idx") - .addAggregation(AggregationBuilders.geohashGrid("geohashgrid").field("location").precision(precision)) + .addAggregation(AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).precision(precision)) .get(); - assertSearchResponse(response); - - GeoGrid geoGrid = response.getAggregations().get("geohashgrid"); + GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); for (GeoGrid.Bucket cell : geoGrid.getBuckets()) { String geohash = cell.getKeyAsString(); long bucketCount = cell.getDocCount(); - int expectedBucketCount = multiValuedExpectedDocCountsForGeoHash.get(geohash); + int expectedBucketCount = multiValuedExpectedDocCountsForGeohash.get(geohash); assertNotSame(bucketCount, 0); assertEquals("Geohash " + geohash + " has wrong doc count ", expectedBucketCount, bucketCount); } @@ -202,13 +164,13 @@ public void testMultivalued() throws Exception { } public void testFiltered() throws Exception { - GeoBoundingBoxQueryBuilder bbox = new GeoBoundingBoxQueryBuilder("location"); - bbox.setCorners(smallestGeoHash).queryName("bbox"); + GeoBoundingBoxQueryBuilder bbox = new GeoBoundingBoxQueryBuilder(GEO_POINT_FIELD_NAME); + bbox.setCorners(SMALLEST_GEO_HASH).queryName("bbox"); for (int precision = 1; precision <= PRECISION; precision++) { SearchResponse response = client().prepareSearch("idx") .addAggregation( org.opensearch.search.aggregations.AggregationBuilders.filter("filtered", bbox) - .subAggregation(AggregationBuilders.geohashGrid("geohashgrid").field("location").precision(precision)) + .subAggregation(AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).precision(precision)) ) .get(); @@ -216,13 +178,13 @@ public void testFiltered() throws Exception { Filter filter = response.getAggregations().get("filtered"); - GeoGrid geoGrid = filter.getAggregations().get("geohashgrid"); + GeoGrid geoGrid = filter.getAggregations().get(AGG_NAME); for (GeoGrid.Bucket cell : geoGrid.getBuckets()) { String geohash = cell.getKeyAsString(); long bucketCount = cell.getDocCount(); - int expectedBucketCount = expectedDocCountsForGeoHash.get(geohash); + int expectedBucketCount = expectedDocCountsForGeohash.get(geohash); assertNotSame(bucketCount, 0); - assertTrue("Buckets must be filtered", geohash.startsWith(smallestGeoHash)); + assertTrue("Buckets must be filtered", geohash.startsWith(SMALLEST_GEO_HASH)); assertEquals("Geohash " + geohash + " has wrong doc count ", expectedBucketCount, bucketCount); } @@ -232,12 +194,12 @@ public void testFiltered() throws Exception { public void testUnmapped() throws Exception { for (int precision = 1; precision <= PRECISION; precision++) { SearchResponse response = client().prepareSearch("idx_unmapped") - .addAggregation(AggregationBuilders.geohashGrid("geohashgrid").field("location").precision(precision)) + .addAggregation(AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).precision(precision)) .get(); assertSearchResponse(response); - GeoGrid geoGrid = response.getAggregations().get("geohashgrid"); + GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); assertThat(geoGrid.getBuckets().size(), equalTo(0)); } @@ -246,17 +208,17 @@ public void testUnmapped() throws Exception { public void testPartiallyUnmapped() throws Exception { for (int precision = 1; precision <= PRECISION; precision++) { SearchResponse response = client().prepareSearch("idx", "idx_unmapped") - .addAggregation(AggregationBuilders.geohashGrid("geohashgrid").field("location").precision(precision)) + .addAggregation(AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).precision(precision)) .get(); assertSearchResponse(response); - GeoGrid geoGrid = response.getAggregations().get("geohashgrid"); + GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); for (GeoGrid.Bucket cell : geoGrid.getBuckets()) { String geohash = cell.getKeyAsString(); long bucketCount = cell.getDocCount(); - int expectedBucketCount = expectedDocCountsForGeoHash.get(geohash); + int expectedBucketCount = expectedDocCountsForGeohash.get(geohash); assertNotSame(bucketCount, 0); assertEquals("Geohash " + geohash + " has wrong doc count ", expectedBucketCount, bucketCount); } @@ -267,20 +229,20 @@ public void testTopMatch() throws Exception { for (int precision = 1; precision <= PRECISION; precision++) { SearchResponse response = client().prepareSearch("idx") .addAggregation( - AggregationBuilders.geohashGrid("geohashgrid").field("location").size(1).shardSize(100).precision(precision) + AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).size(1).shardSize(100).precision(precision) ) .get(); assertSearchResponse(response); - GeoGrid geoGrid = response.getAggregations().get("geohashgrid"); + GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); // Check we only have one bucket with the best match for that resolution assertThat(geoGrid.getBuckets().size(), equalTo(1)); for (GeoGrid.Bucket cell : geoGrid.getBuckets()) { String geohash = cell.getKeyAsString(); long bucketCount = cell.getDocCount(); int expectedBucketCount = 0; - for (ObjectIntCursor cursor : expectedDocCountsForGeoHash) { + for (ObjectIntCursor cursor : expectedDocCountsForGeohash) { if (cursor.key.length() == precision) { expectedBucketCount = Math.max(expectedBucketCount, cursor.value); } @@ -297,10 +259,10 @@ public void testSizeIsZero() { IllegalArgumentException exception = expectThrows( IllegalArgumentException.class, () -> client().prepareSearch("idx") - .addAggregation(AggregationBuilders.geohashGrid("geohashgrid").field("location").size(size).shardSize(shardSize)) + .addAggregation(AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).size(size).shardSize(shardSize)) .get() ); - assertThat(exception.getMessage(), containsString("[size] must be greater than 0. Found [0] in [geohashgrid]")); + assertThat(exception.getMessage(), containsString("[size] must be greater than 0. Found [0] in [" + AGG_NAME + "]")); } public void testShardSizeIsZero() { @@ -309,10 +271,137 @@ public void testShardSizeIsZero() { IllegalArgumentException exception = expectThrows( IllegalArgumentException.class, () -> client().prepareSearch("idx") - .addAggregation(AggregationBuilders.geohashGrid("geohashgrid").field("location").size(size).shardSize(shardSize)) + .addAggregation(AggregationBuilders.geohashGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).size(size).shardSize(shardSize)) .get() ); - assertThat(exception.getMessage(), containsString("[shardSize] must be greater than 0. Found [0] in [geohashgrid]")); + assertThat(exception.getMessage(), containsString("[shardSize] must be greater than 0. Found [0] in [" + AGG_NAME + "]")); + } + + private void prepareSingleValueGeoPointIndex(final Random random) throws Exception { + createIndex("idx_unmapped"); + final Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, version) + .put("index.number_of_shards", 4) + .put("index.number_of_replicas", 0) + .build(); + assertAcked( + prepareCreate("idx").setSettings(settings) + .setMapping(GEO_POINT_FIELD_NAME, "type=geo_point", KEYWORD_FIELD_NAME, "type=keyword") + ); + final List cities = new ArrayList<>(); + expectedDocCountsForGeohash = new ObjectIntHashMap<>(NUM_DOCS * 2); + for (int i = 0; i < NUM_DOCS; i++) { + // generate random point + double lat = (180d * random.nextDouble()) - 90d; + double lng = (360d * random.nextDouble()) - 180d; + String randomGeoHash = stringEncode(lng, lat, PRECISION); + // Index at the highest resolution + cities.add(indexCity("idx", randomGeoHash, lat + ", " + lng)); + expectedDocCountsForGeohash.put(randomGeoHash, expectedDocCountsForGeohash.getOrDefault(randomGeoHash, 0) + 1); + // Update expected doc counts for all resolutions.. + for (int precision = PRECISION - 1; precision > 0; precision--) { + String hash = stringEncode(lng, lat, precision); + if ((SMALLEST_GEO_HASH == null) || (hash.length() < SMALLEST_GEO_HASH.length())) { + SMALLEST_GEO_HASH = hash; + } + expectedDocCountsForGeohash.put(hash, expectedDocCountsForGeohash.getOrDefault(hash, 0) + 1); + } + } + indexRandom(true, cities); + ensureGreen("idx_unmapped", "idx"); + } + + private void prepareMultiValuedGeoPointIndex(final Random random) throws Exception { + final Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build(); + final List cities = new ArrayList<>(); + assertAcked( + prepareCreate("multi_valued_idx").setSettings(settings) + .setMapping(GEO_POINT_FIELD_NAME, "type=geo_point", KEYWORD_FIELD_NAME, "type=keyword") + ); + multiValuedExpectedDocCountsForGeohash = new ObjectIntHashMap<>(NUM_DOCS * 2); + for (int i = 0; i < NUM_DOCS; i++) { + final int numPoints = random.nextInt(4); + final List points = new ArrayList<>(); + final Set geoHashes = new HashSet<>(); + for (int j = 0; j < numPoints; ++j) { + final double lat = (180d * random.nextDouble()) - 90d; + final double lng = (360d * random.nextDouble()) - 180d; + points.add(lat + "," + lng); + // Update expected doc counts for all resolutions.. + for (int precision = PRECISION; precision > 0; precision--) { + final String geoHash = stringEncode(lng, lat, precision); + geoHashes.add(geoHash); + } + } + cities.add(indexCity("multi_valued_idx", Integer.toString(i), points)); + for (final String hash : geoHashes) { + multiValuedExpectedDocCountsForGeohash.put(hash, multiValuedExpectedDocCountsForGeohash.getOrDefault(hash, 0) + 1); + } + } + indexRandom(true, cities); + ensureGreen("multi_valued_idx"); + } + + private void prepareGeoShapeIndex(final Random random) throws Exception { + final Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build(); + + final List geoshapes = new ArrayList<>(); + assertAcked(prepareCreate(GEO_SHAPE_INDEX_NAME).setSettings(settings).setMapping(GEO_SHAPE_FIELD_NAME, "type" + "=geo_shape")); + expectedDocCountsForGeoshapeGeohash = new ObjectIntHashMap<>(); + boolean isShapeIntersectingBB = false; + for (int i = 0; i < NUM_DOCS;) { + final Geometry geometry = RandomGeoGeometryGenerator.randomGeometry(random); + final GeoShapeDocValue geometryDocValue = GeoShapeDocValue.createGeometryDocValue(geometry); + // make sure that there is 1 shape is intersecting with the bounding box + if (!isShapeIntersectingBB) { + isShapeIntersectingBB = geometryDocValue.isIntersectingRectangle(BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG); + if (!isShapeIntersectingBB && i == NUM_DOCS - 1) { + continue; + } + } + i++; + final GeoPoint topLeft = new GeoPoint(); + final GeoPoint bottomRight = new GeoPoint(); + assert geometry != null; + GeoBoundsHelper.updateBoundsForGeometry(geometry, topLeft, bottomRight); + final Set geoHashes = new HashSet<>(); + for (int precision = MAX_PRECISION_FOR_GEO_SHAPES_AGG_TESTING; precision > 0; precision--) { + if (precision > 2 && !geometryDocValue.isIntersectingRectangle(BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG)) { + continue; + } + final GeoPoint topRight = new GeoPoint(topLeft.getLat(), bottomRight.getLon()); + String currentGeoHash = Geohash.stringEncode(topLeft.getLon(), topLeft.getLat(), precision); + String startingRowGeoHash = currentGeoHash; + String endGeoHashForCurrentRow = Geohash.stringEncode(topRight.getLon(), topRight.getLat(), precision); + String terminatingGeoHash = Geohash.stringEncode(bottomRight.getLon(), bottomRight.getLat(), precision); + while (true) { + final Rectangle currentRectangle = Geohash.toBoundingBox(currentGeoHash); + if (geometryDocValue.isIntersectingRectangle(currentRectangle)) { + geoHashes.add(currentGeoHash); + } + assert currentGeoHash != null; + if (currentGeoHash.equals(terminatingGeoHash)) { + break; + } + if (currentGeoHash.equals(endGeoHashForCurrentRow)) { + // move in south direction + currentGeoHash = Geohash.getNeighbor(startingRowGeoHash, precision, 0, -1); + startingRowGeoHash = currentGeoHash; + endGeoHashForCurrentRow = Geohash.getNeighbor(endGeoHashForCurrentRow, precision, 0, -1); + } else { + // move in East direction + currentGeoHash = Geohash.getNeighbor(currentGeoHash, precision, 1, 0); + } + } + } + + geoshapes.add(indexGeoShape(GEO_SHAPE_INDEX_NAME, geometry)); + for (final String hash : geoHashes) { + expectedDocCountsForGeoshapeGeohash.put(hash, expectedDocCountsForGeoshapeGeohash.getOrDefault(hash, 0) + 1); + } + } + indexRandom(true, geoshapes); + ensureGreen(GEO_SHAPE_INDEX_NAME); } } diff --git a/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/GeoTileGridIT.java b/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/GeoTileGridIT.java new file mode 100644 index 0000000000000..e5723876b74cc --- /dev/null +++ b/modules/geo/src/internalClusterTest/java/org/opensearch/geo/search/aggregations/bucket/GeoTileGridIT.java @@ -0,0 +1,249 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.geo.search.aggregations.bucket; + +import com.carrotsearch.hppc.ObjectIntHashMap; +import com.carrotsearch.hppc.ObjectIntMap; +import org.hamcrest.MatcherAssert; +import org.opensearch.Version; +import org.opensearch.action.index.IndexRequestBuilder; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.geo.GeoBoundingBox; +import org.opensearch.common.geo.GeoPoint; +import org.opensearch.common.geo.GeoShapeDocValue; +import org.opensearch.common.settings.Settings; +import org.opensearch.geo.search.aggregations.bucket.geogrid.GeoGrid; +import org.opensearch.geo.search.aggregations.bucket.geogrid.GeoGridAggregationBuilder; +import org.opensearch.geo.search.aggregations.common.GeoBoundsHelper; +import org.opensearch.geo.tests.common.AggregationBuilders; +import org.opensearch.geo.tests.common.RandomGeoGenerator; +import org.opensearch.geo.tests.common.RandomGeoGeometryGenerator; +import org.opensearch.geometry.Geometry; +import org.opensearch.geometry.Rectangle; +import org.opensearch.search.aggregations.InternalAggregation; +import org.opensearch.search.aggregations.bucket.GeoTileUtils; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.VersionUtils; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.equalTo; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; + +@OpenSearchIntegTestCase.SuiteScopeTestCase +public class GeoTileGridIT extends AbstractBucketAggregationIntegTest { + + private static final int GEOPOINT_MAX_PRECISION = 17; + + private static final String AGG_NAME = "geotilegrid"; + + private static final String GEO_SHAPE_INDEX_NAME = "geoshape_index"; + + private static final String GEO_SHAPE_FIELD_NAME = "location_geo_shape"; + + private final Version version = VersionUtils.randomIndexCompatibleVersion(random()); + + private static final Rectangle BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG = new Rectangle(-4.1, 20.9, 21.9, -3.1); + + private static ObjectIntMap expectedDocCountsForGeoshapeGeoTiles; + + private static ObjectIntMap expectedDocCountsForGeoTiles; + + private static ObjectIntHashMap multiValuedExpectedDocCountsForGeoTiles; + + @Override + public void setupSuiteScopeCluster() throws Exception { + final Random random = random(); + prepareSingleValueGeoPointIndex(random); + prepareMultiValuedGeoPointIndex(random); + prepareGeoShapeIndex(random); + ensureSearchable(); + } + + public void testGeoShapes() { + final GeoBoundingBox boundingBox = new GeoBoundingBox( + new GeoPoint(BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG.getMaxLat(), BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG.getMinLon()), + new GeoPoint(BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG.getMinLat(), BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG.getMaxLon()) + ); + for (int precision = 1; precision <= MAX_PRECISION_FOR_GEO_SHAPES_AGG_TESTING; precision++) { + GeoGridAggregationBuilder builder = AggregationBuilders.geotileGrid(AGG_NAME).field(GEO_SHAPE_FIELD_NAME).precision(precision); + // This makes sure that for only higher precision we are providing the GeoBounding Box. This also ensures + // that we are able to test both bounded and unbounded aggregations + if (precision > 2) { + builder.setGeoBoundingBox(boundingBox); + } + final SearchResponse response = client().prepareSearch(GEO_SHAPE_INDEX_NAME).addAggregation(builder).get(); + final GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); + final List buckets = geoGrid.getBuckets(); + final Object[] propertiesKeys = (Object[]) ((InternalAggregation) geoGrid).getProperty("_key"); + final Object[] propertiesDocCounts = (Object[]) ((InternalAggregation) geoGrid).getProperty("_count"); + for (int i = 0; i < buckets.size(); i++) { + final GeoGrid.Bucket cell = buckets.get(i); + final String geoTile = cell.getKeyAsString(); + + final long bucketCount = cell.getDocCount(); + final int expectedBucketCount = expectedDocCountsForGeoshapeGeoTiles.get(geoTile); + assertNotSame(bucketCount, 0); + assertEquals("Geotile " + geoTile + " has wrong doc count ", expectedBucketCount, bucketCount); + final GeoPoint geoPoint = (GeoPoint) propertiesKeys[i]; + MatcherAssert.assertThat(GeoTileUtils.stringEncode(geoPoint.lon(), geoPoint.lat(), precision), equalTo(geoTile)); + MatcherAssert.assertThat((long) propertiesDocCounts[i], equalTo(bucketCount)); + } + } + } + + public void testSimpleGeoPointsAggregation() { + for (int precision = 1; precision <= GEOPOINT_MAX_PRECISION; precision++) { + SearchResponse response = client().prepareSearch("idx") + .addAggregation(AggregationBuilders.geotileGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).precision(precision)) + .get(); + + assertSearchResponse(response); + + GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); + List buckets = geoGrid.getBuckets(); + Object[] propertiesKeys = (Object[]) ((InternalAggregation) geoGrid).getProperty("_key"); + Object[] propertiesDocCounts = (Object[]) ((InternalAggregation) geoGrid).getProperty("_count"); + for (int i = 0; i < buckets.size(); i++) { + GeoGrid.Bucket cell = buckets.get(i); + String geoTile = cell.getKeyAsString(); + + long bucketCount = cell.getDocCount(); + int expectedBucketCount = expectedDocCountsForGeoTiles.get(geoTile); + assertNotSame(bucketCount, 0); + assertEquals("GeoTile " + geoTile + " has wrong doc count ", expectedBucketCount, bucketCount); + GeoPoint geoPoint = (GeoPoint) propertiesKeys[i]; + assertThat(GeoTileUtils.stringEncode(geoPoint.lon(), geoPoint.lat(), precision), equalTo(geoTile)); + assertThat((long) propertiesDocCounts[i], equalTo(bucketCount)); + } + } + } + + public void testMultivaluedGeoPointsAggregation() throws Exception { + for (int precision = 1; precision <= GEOPOINT_MAX_PRECISION; precision++) { + SearchResponse response = client().prepareSearch("multi_valued_idx") + .addAggregation(AggregationBuilders.geotileGrid(AGG_NAME).field(GEO_POINT_FIELD_NAME).precision(precision)) + .get(); + + assertSearchResponse(response); + + GeoGrid geoGrid = response.getAggregations().get(AGG_NAME); + for (GeoGrid.Bucket cell : geoGrid.getBuckets()) { + String geohash = cell.getKeyAsString(); + + long bucketCount = cell.getDocCount(); + int expectedBucketCount = multiValuedExpectedDocCountsForGeoTiles.get(geohash); + assertNotSame(bucketCount, 0); + assertEquals("Geohash " + geohash + " has wrong doc count ", expectedBucketCount, bucketCount); + } + } + } + + private void prepareGeoShapeIndex(final Random random) throws Exception { + final Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build(); + final List geoshapes = new ArrayList<>(); + assertAcked(prepareCreate(GEO_SHAPE_INDEX_NAME).setSettings(settings).setMapping(GEO_SHAPE_FIELD_NAME, "type" + "=geo_shape")); + expectedDocCountsForGeoshapeGeoTiles = new ObjectIntHashMap<>(); + boolean isShapeIntersectingBB = false; + for (int i = 0; i < NUM_DOCS;) { + final Geometry geometry = RandomGeoGeometryGenerator.randomGeometry(random); + final GeoShapeDocValue geometryDocValue = GeoShapeDocValue.createGeometryDocValue(geometry); + // make sure that there is 1 shape is intersecting with the bounding box + if (!isShapeIntersectingBB) { + isShapeIntersectingBB = geometryDocValue.isIntersectingRectangle(BOUNDING_RECTANGLE_FOR_GEO_SHAPES_AGG); + if (!isShapeIntersectingBB && i == NUM_DOCS - 1) { + continue; + } + } + i++; + final GeoPoint topLeft = new GeoPoint(); + final GeoPoint bottomRight = new GeoPoint(); + assert geometry != null; + GeoBoundsHelper.updateBoundsForGeometry(geometry, topLeft, bottomRight); + final Set geoTiles = new HashSet<>(); + for (int precision = MAX_PRECISION_FOR_GEO_SHAPES_AGG_TESTING; precision > 0; precision--) { + geoTiles.addAll( + GeoTileUtils.encodeShape(geometryDocValue, precision) + .stream() + .map(GeoTileUtils::stringEncode) + .collect(Collectors.toSet()) + ); + } + + geoshapes.add(indexGeoShape(GEO_SHAPE_INDEX_NAME, geometry)); + for (final String hash : geoTiles) { + expectedDocCountsForGeoshapeGeoTiles.put(hash, expectedDocCountsForGeoshapeGeoTiles.getOrDefault(hash, 0) + 1); + } + } + indexRandom(true, geoshapes); + ensureGreen(GEO_SHAPE_INDEX_NAME); + } + + private void prepareSingleValueGeoPointIndex(final Random random) throws Exception { + final Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build(); + assertAcked( + prepareCreate("idx").setSettings(settings) + .setMapping(GEO_POINT_FIELD_NAME, "type=geo_point", KEYWORD_FIELD_NAME, "type=keyword") + ); + final List cities = new ArrayList<>(); + expectedDocCountsForGeoTiles = new ObjectIntHashMap<>(); + for (int i = 0; i < NUM_DOCS; i++) { + // generate random point + final GeoPoint geoPoint = RandomGeoGenerator.randomPoint(random); + final String cityName = GeoTileUtils.stringEncode(geoPoint.getLon(), geoPoint.getLat(), GEOPOINT_MAX_PRECISION); + // Index at the highest resolution + cities.add(indexCity("idx", cityName, geoPoint.getLat() + ", " + geoPoint.getLon())); + // Update expected doc counts for all resolutions.. + for (int precision = GEOPOINT_MAX_PRECISION; precision > 0; precision--) { + String tile = GeoTileUtils.stringEncode(geoPoint.getLon(), geoPoint.getLat(), precision); + expectedDocCountsForGeoTiles.put(tile, expectedDocCountsForGeoTiles.getOrDefault(tile, 0) + 1); + } + } + indexRandom(true, cities); + ensureGreen("idx"); + } + + private void prepareMultiValuedGeoPointIndex(final Random random) throws Exception { + final Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build(); + final List cities = new ArrayList<>(); + assertAcked( + prepareCreate("multi_valued_idx").setSettings(settings) + .setMapping(GEO_POINT_FIELD_NAME, "type=geo_point", KEYWORD_FIELD_NAME, "type=keyword") + ); + multiValuedExpectedDocCountsForGeoTiles = new ObjectIntHashMap<>(NUM_DOCS * 2); + for (int i = 0; i < NUM_DOCS; i++) { + final int numPoints = random.nextInt(4); + final List points = new ArrayList<>(); + final Set geoTiles = new HashSet<>(); + for (int j = 0; j < numPoints; ++j) { + // generate random point + final GeoPoint geoPoint = RandomGeoGenerator.randomPoint(random); + points.add(geoPoint.getLat() + "," + geoPoint.getLon()); + // Update expected doc counts for all resolutions.. + for (int precision = GEOPOINT_MAX_PRECISION; precision > 0; precision--) { + final String tile = GeoTileUtils.stringEncode(geoPoint.getLon(), geoPoint.getLat(), precision); + geoTiles.add(tile); + } + } + cities.add(indexCity("multi_valued_idx", Integer.toString(i), points)); + for (final String hash : geoTiles) { + multiValuedExpectedDocCountsForGeoTiles.put(hash, multiValuedExpectedDocCountsForGeoTiles.getOrDefault(hash, 0) + 1); + } + } + indexRandom(true, cities); + ensureGreen("multi_valued_idx"); + } +} diff --git a/modules/geo/src/main/java/org/opensearch/geo/search/aggregations/bucket/geogrid/cells/CellValues.java b/modules/geo/src/main/java/org/opensearch/geo/search/aggregations/bucket/geogrid/cells/CellValues.java index 2a828e6484ed2..0b69040ec977a 100644 --- a/modules/geo/src/main/java/org/opensearch/geo/search/aggregations/bucket/geogrid/cells/CellValues.java +++ b/modules/geo/src/main/java/org/opensearch/geo/search/aggregations/bucket/geogrid/cells/CellValues.java @@ -54,16 +54,12 @@ protected CellValues(MultiGeoPointValues geoValues, int precision, CellIdSource. this.encoder = encoder; } - // we will need a way to clear the values array, as it is storing the states @Override public boolean advanceExact(int docId) throws IOException { if (geoValues.advanceExact(docId)) { int docValueCount = geoValues.docValueCount(); resize(docValueCount); int j = 0; - // we will need a way to convert the shape into a set of GeoTiles. Now, rather than iterating over the - // points as in GeoPoints, we need to add the list of tiles which shape is intersecting with like we are - // doing for the points. for (int i = 0; i < docValueCount; i++) { j = advanceValue(geoValues.nextValue(), j); } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/GeoTileUtils.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/GeoTileUtils.java index 9c0c55eafaed3..4b3784bd9912b 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/GeoTileUtils.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/GeoTileUtils.java @@ -252,6 +252,13 @@ public static String stringEncode(long hash) { return "" + res[0] + "/" + res[1] + "/" + res[2]; } + /** + * Encode lon/lat to the geotile based string format which is "zoom/x/y" + */ + public static String stringEncode(double longitude, double latitude, int precision) { + return stringEncode(longEncode(longitude, latitude, precision)); + } + /** * Decode long hash as a GeoPoint (center of the tile) */ @@ -295,9 +302,10 @@ public static List encodeShape(final GeoShapeDocValue geoShapeDocValue, fi final long totalTilesAtPrecision = 1L << checkPrecisionRange(precision); int maxXTile = getXTile(boundingRectangle.getMaxX(), totalTilesAtPrecision); int minXTile = getXTile(boundingRectangle.getMinX(), totalTilesAtPrecision); - - int maxYTile = getYTile(boundingRectangle.getMaxY(), totalTilesAtPrecision); - int minYTile = getYTile(boundingRectangle.getMinY(), totalTilesAtPrecision); + // as tuples in tiles are x,y and y(lat) increases from north to south in tiles, so for minYTile we need to + // take maxY and for maxYTile we need to take minY. + int minYTile = getYTile(boundingRectangle.getMaxY(), totalTilesAtPrecision); + int maxYTile = getYTile(boundingRectangle.getMinY(), totalTilesAtPrecision); final List encodedValues = new ArrayList<>(); for (int x = minXTile; x <= maxXTile; x++) { for (int y = minYTile; y <= maxYTile; y++) {