From 3384fb403dee6315ce8515009d35f093ade54f1d Mon Sep 17 00:00:00 2001 From: Heemin Kim Date: Wed, 10 May 2023 09:36:22 -0700 Subject: [PATCH] Set User-Agent in http request (#300) Signed-off-by: Heemin Kim --- .../ip2geo/common/DatasourceManifest.java | 36 ++++++++++++------ .../ip2geo/common/GeoIpDataFacade.java | 34 ++++++++++------- .../ip2geo/common/Ip2GeoSettings.java | 3 +- .../geospatial/shared/Constants.java | 15 ++++++++ .../action/RestPutDatasourceHandlerTests.java | 2 +- .../common/DatasourceManifestTests.java | 38 +++++++++++++++++++ .../ip2geo/common/GeoIpDataFacadeTests.java | 25 ++++++++++++ src/test/resources/ip2geo/manifest.json | 2 +- .../ip2geo/manifest_invalid_url.json | 2 +- .../resources/ip2geo/manifest_template.json | 2 +- 10 files changed, 128 insertions(+), 31 deletions(-) create mode 100644 src/main/java/org/opensearch/geospatial/shared/Constants.java create mode 100644 src/test/java/org/opensearch/geospatial/ip2geo/common/DatasourceManifestTests.java diff --git a/src/main/java/org/opensearch/geospatial/ip2geo/common/DatasourceManifest.java b/src/main/java/org/opensearch/geospatial/ip2geo/common/DatasourceManifest.java index fca40213..5382aa56 100644 --- a/src/main/java/org/opensearch/geospatial/ip2geo/common/DatasourceManifest.java +++ b/src/main/java/org/opensearch/geospatial/ip2geo/common/DatasourceManifest.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; +import java.net.URLConnection; import java.nio.CharBuffer; import java.security.AccessController; import java.security.PrivilegedAction; @@ -25,6 +26,8 @@ import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.geospatial.annotation.VisibleForTesting; +import org.opensearch.geospatial.shared.Constants; /** * Ip2Geo datasource manifest file object @@ -39,7 +42,7 @@ public class DatasourceManifest { private static final ParseField DB_NAME_FIELD = new ParseField("db_name"); private static final ParseField SHA256_HASH_FIELD = new ParseField("sha256_hash"); private static final ParseField VALID_FOR_IN_DAYS_FIELD = new ParseField("valid_for_in_days"); - private static final ParseField UPDATED_AT_FIELD = new ParseField("updated_at"); + private static final ParseField UPDATED_AT_FIELD = new ParseField("updated_at_in_epoch_milli"); private static final ParseField PROVIDER_FIELD = new ParseField("provider"); /** @@ -114,20 +117,31 @@ public static class Builder { public static DatasourceManifest build(final URL url) { SpecialPermission.check(); return AccessController.doPrivileged((PrivilegedAction) () -> { - try (BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream()))) { - CharBuffer charBuffer = CharBuffer.allocate(MANIFEST_FILE_MAX_BYTES); - reader.read(charBuffer); - charBuffer.flip(); - XContentParser parser = JsonXContent.jsonXContent.createParser( - NamedXContentRegistry.EMPTY, - DeprecationHandler.IGNORE_DEPRECATIONS, - charBuffer.toString() - ); - return PARSER.parse(parser, null); + try { + URLConnection connection = url.openConnection(); + return internalBuild(connection); } catch (IOException e) { throw new RuntimeException(e); } }); } + + @VisibleForTesting + @SuppressForbidden(reason = "Need to connect to http endpoint to read manifest file") + protected static DatasourceManifest internalBuild(final URLConnection connection) throws IOException { + connection.addRequestProperty(Constants.USER_AGENT_KEY, Constants.USER_AGENT_VALUE); + InputStreamReader inputStreamReader = new InputStreamReader(connection.getInputStream()); + try (BufferedReader reader = new BufferedReader(inputStreamReader)) { + CharBuffer charBuffer = CharBuffer.allocate(MANIFEST_FILE_MAX_BYTES); + reader.read(charBuffer); + charBuffer.flip(); + XContentParser parser = JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + charBuffer.toString() + ); + return PARSER.parse(parser, null); + } + } } } diff --git a/src/main/java/org/opensearch/geospatial/ip2geo/common/GeoIpDataFacade.java b/src/main/java/org/opensearch/geospatial/ip2geo/common/GeoIpDataFacade.java index 95ae8003..dfd2f099 100644 --- a/src/main/java/org/opensearch/geospatial/ip2geo/common/GeoIpDataFacade.java +++ b/src/main/java/org/opensearch/geospatial/ip2geo/common/GeoIpDataFacade.java @@ -12,6 +12,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; +import java.net.URLConnection; import java.nio.charset.StandardCharsets; import java.security.AccessController; import java.security.PrivilegedAction; @@ -53,6 +54,8 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.XContentType; +import org.opensearch.geospatial.annotation.VisibleForTesting; +import org.opensearch.geospatial.shared.Constants; import org.opensearch.geospatial.shared.StashedThreadContext; import org.opensearch.index.query.QueryBuilders; @@ -139,26 +142,29 @@ public CSVParser getDatabaseReader(final DatasourceManifest manifest) { return AccessController.doPrivileged((PrivilegedAction) () -> { try { URL zipUrl = new URL(manifest.getUrl()); - ZipInputStream zipIn = new ZipInputStream(zipUrl.openStream()); - ZipEntry zipEntry = zipIn.getNextEntry(); - while (zipEntry != null) { - if (zipEntry.getName().equalsIgnoreCase(manifest.getDbName()) == false) { - zipEntry = zipIn.getNextEntry(); - continue; - } - return new CSVParser(new BufferedReader(new InputStreamReader(zipIn)), CSVFormat.RFC4180); - } + return internalGetDatabaseReader(manifest, zipUrl.openConnection()); } catch (IOException e) { throw new OpenSearchException("failed to read geoip data from {}", manifest.getUrl(), e); } - throw new OpenSearchException( - "database file [{}] does not exist in the zip file [{}]", - manifest.getDbName(), - manifest.getUrl() - ); }); } + @VisibleForTesting + @SuppressForbidden(reason = "Need to connect to http endpoint to read GeoIP database file") + protected CSVParser internalGetDatabaseReader(final DatasourceManifest manifest, final URLConnection connection) throws IOException { + connection.addRequestProperty(Constants.USER_AGENT_KEY, Constants.USER_AGENT_VALUE); + ZipInputStream zipIn = new ZipInputStream(connection.getInputStream()); + ZipEntry zipEntry = zipIn.getNextEntry(); + while (zipEntry != null) { + if (zipEntry.getName().equalsIgnoreCase(manifest.getDbName()) == false) { + zipEntry = zipIn.getNextEntry(); + continue; + } + return new CSVParser(new BufferedReader(new InputStreamReader(zipIn)), CSVFormat.RFC4180); + } + throw new OpenSearchException("database file [{}] does not exist in the zip file [{}]", manifest.getDbName(), manifest.getUrl()); + } + /** * Create a document in json string format to ingest in datasource database index * diff --git a/src/main/java/org/opensearch/geospatial/ip2geo/common/Ip2GeoSettings.java b/src/main/java/org/opensearch/geospatial/ip2geo/common/Ip2GeoSettings.java index db565dd8..b236e95c 100644 --- a/src/main/java/org/opensearch/geospatial/ip2geo/common/Ip2GeoSettings.java +++ b/src/main/java/org/opensearch/geospatial/ip2geo/common/Ip2GeoSettings.java @@ -23,8 +23,7 @@ public class Ip2GeoSettings { */ public static final Setting DATASOURCE_ENDPOINT = Setting.simpleString( "plugins.geospatial.ip2geo.datasource.endpoint", - // TODO: This value is not correct. Update it later once CDN server is ready. - "https://geoip.maps.opensearch.org/v1/geolite-2/manifest.json", + "https://geoip.maps.opensearch.org/v1/geolite2-city/manifest.json", new DatasourceEndpointValidator(), Setting.Property.NodeScope, Setting.Property.Dynamic diff --git a/src/main/java/org/opensearch/geospatial/shared/Constants.java b/src/main/java/org/opensearch/geospatial/shared/Constants.java new file mode 100644 index 00000000..7b6488a4 --- /dev/null +++ b/src/main/java/org/opensearch/geospatial/shared/Constants.java @@ -0,0 +1,15 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.geospatial.shared; + +import java.util.Locale; + +import org.opensearch.Version; + +public class Constants { + public static final String USER_AGENT_KEY = "User-Agent"; + public static final String USER_AGENT_VALUE = String.format(Locale.ROOT, "OpenSearch/%s vanilla", Version.CURRENT.toString()); +} diff --git a/src/test/java/org/opensearch/geospatial/ip2geo/action/RestPutDatasourceHandlerTests.java b/src/test/java/org/opensearch/geospatial/ip2geo/action/RestPutDatasourceHandlerTests.java index 97c3cb7d..3ec81de5 100644 --- a/src/test/java/org/opensearch/geospatial/ip2geo/action/RestPutDatasourceHandlerTests.java +++ b/src/test/java/org/opensearch/geospatial/ip2geo/action/RestPutDatasourceHandlerTests.java @@ -69,7 +69,7 @@ public void testPrepareRequestDefaultValue() { verifyingClient.setExecuteLocallyVerifier((actionResponse, actionRequest) -> { assertTrue(actionRequest instanceof PutDatasourceRequest); PutDatasourceRequest putDatasourceRequest = (PutDatasourceRequest) actionRequest; - assertEquals("https://geoip.maps.opensearch.org/v1/geolite-2/manifest.json", putDatasourceRequest.getEndpoint()); + assertEquals("https://geoip.maps.opensearch.org/v1/geolite2-city/manifest.json", putDatasourceRequest.getEndpoint()); assertEquals(TimeValue.timeValueDays(3), putDatasourceRequest.getUpdateInterval()); assertEquals(datasourceName, putDatasourceRequest.getName()); isExecuted.set(true); diff --git a/src/test/java/org/opensearch/geospatial/ip2geo/common/DatasourceManifestTests.java b/src/test/java/org/opensearch/geospatial/ip2geo/common/DatasourceManifestTests.java new file mode 100644 index 00000000..f7b689e1 --- /dev/null +++ b/src/test/java/org/opensearch/geospatial/ip2geo/common/DatasourceManifestTests.java @@ -0,0 +1,38 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.geospatial.ip2geo.common; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.File; +import java.io.FileInputStream; +import java.net.URLConnection; + +import lombok.SneakyThrows; + +import org.opensearch.common.SuppressForbidden; +import org.opensearch.geospatial.ip2geo.Ip2GeoTestCase; +import org.opensearch.geospatial.shared.Constants; + +@SuppressForbidden(reason = "unit test") +public class DatasourceManifestTests extends Ip2GeoTestCase { + + @SneakyThrows + public void testInternalBuild_whenCalled_thenCorrectUserAgentValueIsSet() { + URLConnection connection = mock(URLConnection.class); + File manifestFile = new File(this.getClass().getClassLoader().getResource("ip2geo/manifest.json").getFile()); + when(connection.getInputStream()).thenReturn(new FileInputStream(manifestFile)); + + // Run + DatasourceManifest manifest = DatasourceManifest.Builder.internalBuild(connection); + + // Verify + verify(connection).addRequestProperty(Constants.USER_AGENT_KEY, Constants.USER_AGENT_VALUE); + assertEquals("https://test.com/db.zip", manifest.getUrl()); + } +} diff --git a/src/test/java/org/opensearch/geospatial/ip2geo/common/GeoIpDataFacadeTests.java b/src/test/java/org/opensearch/geospatial/ip2geo/common/GeoIpDataFacadeTests.java index 54c98a41..5a269539 100644 --- a/src/test/java/org/opensearch/geospatial/ip2geo/common/GeoIpDataFacadeTests.java +++ b/src/test/java/org/opensearch/geospatial/ip2geo/common/GeoIpDataFacadeTests.java @@ -14,6 +14,8 @@ import static org.opensearch.geospatial.ip2geo.jobscheduler.Datasource.IP2GEO_DATA_INDEX_NAME_PREFIX; import java.io.File; +import java.io.FileInputStream; +import java.net.URLConnection; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.time.Instant; @@ -55,6 +57,7 @@ import org.opensearch.common.bytes.BytesReference; import org.opensearch.geospatial.GeospatialTestHelper; import org.opensearch.geospatial.ip2geo.Ip2GeoTestCase; +import org.opensearch.geospatial.shared.Constants; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; @@ -139,6 +142,28 @@ public void testGetDatabaseReaderNoFile() throws Exception { assertTrue(exception.getMessage().contains("does not exist")); } + @SneakyThrows + public void testInternalGetDatabaseReader_whenCalled_thenSetUserAgent() { + File zipFile = new File(this.getClass().getClassLoader().getResource("ip2geo/sample_valid.zip").getFile()); + DatasourceManifest manifest = new DatasourceManifest( + zipFile.toURI().toURL().toExternalForm(), + "sample_valid.csv", + "fake_sha256", + 1l, + Instant.now().toEpochMilli(), + "tester" + ); + + URLConnection connection = mock(URLConnection.class); + when(connection.getInputStream()).thenReturn(new FileInputStream(zipFile)); + + // Run + noOpsGeoIpDataFacade.internalGetDatabaseReader(manifest, connection); + + // Verify + verify(connection).addRequestProperty(Constants.USER_AGENT_KEY, Constants.USER_AGENT_VALUE); + } + public void testDeleteIp2GeoDataIndex() { String index = String.format(Locale.ROOT, "%s.%s", IP2GEO_DATA_INDEX_NAME_PREFIX, GeospatialTestHelper.randomLowerCaseString()); verifyingClient.setExecuteVerifier((actionResponse, actionRequest) -> { diff --git a/src/test/resources/ip2geo/manifest.json b/src/test/resources/ip2geo/manifest.json index 4986fbd8..86a76e47 100644 --- a/src/test/resources/ip2geo/manifest.json +++ b/src/test/resources/ip2geo/manifest.json @@ -3,6 +3,6 @@ "db_name": "sample_valid.csv", "sha256_hash": "safasdfaskkkesadfasdf", "valid_for_in_days": 30, - "updated_at": 3134012341236, + "updated_at_in_epoch_milli": 3134012341236, "provider": "sample_provider" } \ No newline at end of file diff --git a/src/test/resources/ip2geo/manifest_invalid_url.json b/src/test/resources/ip2geo/manifest_invalid_url.json index 4e806f49..c9f1723e 100644 --- a/src/test/resources/ip2geo/manifest_invalid_url.json +++ b/src/test/resources/ip2geo/manifest_invalid_url.json @@ -3,6 +3,6 @@ "db_name": "sample_valid.csv", "sha256_hash": "safasdfaskkkesadfasdf", "valid_for_in_days": 30, - "updated_at": 3134012341236, + "updated_at_in_epoch_milli": 3134012341236, "provider": "sample_provider" } \ No newline at end of file diff --git a/src/test/resources/ip2geo/manifest_template.json b/src/test/resources/ip2geo/manifest_template.json index 92ceb590..39665b74 100644 --- a/src/test/resources/ip2geo/manifest_template.json +++ b/src/test/resources/ip2geo/manifest_template.json @@ -3,6 +3,6 @@ "db_name": "sample_valid.csv", "sha256_hash": "safasdfaskkkesadfasdf", "valid_for_in_days": 30, - "updated_at": 3134012341236, + "updated_at_in_epoch_milli": 3134012341236, "provider": "maxmind" } \ No newline at end of file