From 45881c4c9f70554247504f94b376684056b8c404 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Tue, 15 Oct 2024 07:20:01 -0400 Subject: [PATCH] Support IPinfo databases in the ip_location processor (#114735) (#114776) --- .../src/main/java/module-info.java | 2 + .../ingest/geoip/ConfigDatabases.java | 4 +- .../ingest/geoip/GeoIpProcessor.java | 20 ++- .../ingest/geoip/IpDataLookupFactories.java | 73 +++------ .../ingest/geoip/IpinfoIpDataLookups.java | 79 ++++++++++ .../ingest/geoip/MaxmindIpDataLookups.java | 55 +++++++ .../ingest/geoip/GeoIpProcessorTests.java | 144 +++++++++++++----- .../geoip/IpinfoIpDataLookupsTests.java | 103 +++++++++++++ 8 files changed, 383 insertions(+), 97 deletions(-) diff --git a/modules/ingest-geoip/src/main/java/module-info.java b/modules/ingest-geoip/src/main/java/module-info.java index f64c30c060b08..0703d9fb449aa 100644 --- a/modules/ingest-geoip/src/main/java/module-info.java +++ b/modules/ingest-geoip/src/main/java/module-info.java @@ -18,4 +18,6 @@ exports org.elasticsearch.ingest.geoip.direct to org.elasticsearch.server; exports org.elasticsearch.ingest.geoip.stats to org.elasticsearch.server; + + exports org.elasticsearch.ingest.geoip to com.maxmind.db; } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/ConfigDatabases.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/ConfigDatabases.java index 865d0c5a9eca0..3d2b54b04695f 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/ConfigDatabases.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/ConfigDatabases.java @@ -73,14 +73,14 @@ void updateDatabase(Path file, boolean update) { String databaseFileName = file.getFileName().toString(); try { if (update) { - logger.info("database file changed [{}], reload database...", file); + logger.info("database file changed [{}], reloading database...", file); DatabaseReaderLazyLoader loader = new DatabaseReaderLazyLoader(cache, file, null); DatabaseReaderLazyLoader existing = configDatabases.put(databaseFileName, loader); if (existing != null) { existing.shutdown(); } } else { - logger.info("database file removed [{}], close database...", file); + logger.info("database file removed [{}], closing database...", file); DatabaseReaderLazyLoader existing = configDatabases.remove(databaseFileName); assert existing != null; existing.shutdown(); diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java index 6c64cb755bb32..9508bf0346058 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java @@ -196,13 +196,19 @@ public IpDatabase get() throws IOException { } if (Assertions.ENABLED) { - // Only check whether the suffix has changed and not the entire database type. - // To sanity check whether a city db isn't overwriting with a country or asn db. - // For example overwriting a geoip lite city db with geoip city db is a valid change, but the db type is slightly different, - // by checking just the suffix this assertion doesn't fail. - String expectedSuffix = databaseType.substring(databaseType.lastIndexOf('-')); - assert loader.getDatabaseType().endsWith(expectedSuffix) - : "database type [" + loader.getDatabaseType() + "] doesn't match with expected suffix [" + expectedSuffix + "]"; + // Note that the expected suffix might be null for providers that aren't amenable to using dashes as separator for + // determining the database type. + int last = databaseType.lastIndexOf('-'); + final String expectedSuffix = last == -1 ? null : databaseType.substring(last); + + // If the entire database type matches, then that's a match. Otherwise, if there's a suffix to compare on, then + // check whether the suffix has changed (not the entire database type). + // This is to sanity check, for example, that a city db isn't overwritten with a country or asn db. + // But there are permissible overwrites that make sense, for example overwriting a geolite city db with a geoip city db + // is a valid change, but the db type is slightly different -- by checking just the suffix this assertion won't fail. + final String loaderType = loader.getDatabaseType(); + assert loaderType.equals(databaseType) || expectedSuffix == null || loaderType.endsWith(expectedSuffix) + : "database type [" + loaderType + "] doesn't match with expected suffix [" + expectedSuffix + "]"; } return loader; } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookupFactories.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookupFactories.java index 3379fdff0633a..e879f0e0e3514 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookupFactories.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookupFactories.java @@ -13,9 +13,16 @@ import org.elasticsearch.core.Nullable; import java.util.List; +import java.util.Locale; import java.util.Set; import java.util.function.Function; +import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.IPINFO_PREFIX; +import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.getIpinfoDatabase; +import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.getIpinfoLookup; +import static org.elasticsearch.ingest.geoip.MaxmindIpDataLookups.getMaxmindDatabase; +import static org.elasticsearch.ingest.geoip.MaxmindIpDataLookups.getMaxmindLookup; + final class IpDataLookupFactories { private IpDataLookupFactories() { @@ -26,78 +33,44 @@ interface IpDataLookupFactory { IpDataLookup create(List properties); } - private static final String CITY_DB_SUFFIX = "-City"; - private static final String COUNTRY_DB_SUFFIX = "-Country"; - private static final String ASN_DB_SUFFIX = "-ASN"; - private static final String ANONYMOUS_IP_DB_SUFFIX = "-Anonymous-IP"; - private static final String CONNECTION_TYPE_DB_SUFFIX = "-Connection-Type"; - private static final String DOMAIN_DB_SUFFIX = "-Domain"; - private static final String ENTERPRISE_DB_SUFFIX = "-Enterprise"; - private static final String ISP_DB_SUFFIX = "-ISP"; - - @Nullable - private static Database getMaxmindDatabase(final String databaseType) { - if (databaseType.endsWith(CITY_DB_SUFFIX)) { - return Database.City; - } else if (databaseType.endsWith(COUNTRY_DB_SUFFIX)) { - return Database.Country; - } else if (databaseType.endsWith(ASN_DB_SUFFIX)) { - return Database.Asn; - } else if (databaseType.endsWith(ANONYMOUS_IP_DB_SUFFIX)) { - return Database.AnonymousIp; - } else if (databaseType.endsWith(CONNECTION_TYPE_DB_SUFFIX)) { - return Database.ConnectionType; - } else if (databaseType.endsWith(DOMAIN_DB_SUFFIX)) { - return Database.Domain; - } else if (databaseType.endsWith(ENTERPRISE_DB_SUFFIX)) { - return Database.Enterprise; - } else if (databaseType.endsWith(ISP_DB_SUFFIX)) { - return Database.Isp; - } else { - return null; // no match was found - } - } - /** * Parses the passed-in databaseType and return the Database instance that is * associated with that databaseType. * * @param databaseType the database type String from the metadata of the database file - * @return the Database instance that is associated with the databaseType + * @return the Database instance that is associated with the databaseType (or null) */ @Nullable static Database getDatabase(final String databaseType) { Database database = null; if (Strings.hasText(databaseType)) { - database = getMaxmindDatabase(databaseType); + final String databaseTypeLowerCase = databaseType.toLowerCase(Locale.ROOT); + if (databaseTypeLowerCase.startsWith(IPINFO_PREFIX)) { + database = getIpinfoDatabase(databaseTypeLowerCase); // all lower case! + } else { + // for historical reasons, fall back to assuming maxmind-like type parsing + database = getMaxmindDatabase(databaseType); + } } return database; } - @Nullable - static Function, IpDataLookup> getMaxmindLookup(final Database database) { - return switch (database) { - case City -> MaxmindIpDataLookups.City::new; - case Country -> MaxmindIpDataLookups.Country::new; - case Asn -> MaxmindIpDataLookups.Asn::new; - case AnonymousIp -> MaxmindIpDataLookups.AnonymousIp::new; - case ConnectionType -> MaxmindIpDataLookups.ConnectionType::new; - case Domain -> MaxmindIpDataLookups.Domain::new; - case Enterprise -> MaxmindIpDataLookups.Enterprise::new; - case Isp -> MaxmindIpDataLookups.Isp::new; - default -> null; - }; - } - static IpDataLookupFactory get(final String databaseType, final String databaseFile) { final Database database = getDatabase(databaseType); if (database == null) { throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]"); } - final Function, IpDataLookup> factoryMethod = getMaxmindLookup(database); + final Function, IpDataLookup> factoryMethod; + final String databaseTypeLowerCase = databaseType.toLowerCase(Locale.ROOT); + if (databaseTypeLowerCase.startsWith(IPINFO_PREFIX)) { + factoryMethod = getIpinfoLookup(database); + } else { + // for historical reasons, fall back to assuming maxmind-like types + factoryMethod = getMaxmindLookup(database); + } if (factoryMethod == null) { throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]"); diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookups.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookups.java index efc6734b3bd93..5a13ea93ff032 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookups.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookups.java @@ -23,10 +23,14 @@ import java.io.IOException; import java.net.InetAddress; +import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; /** * A collection of {@link IpDataLookup} implementations for IPinfo databases @@ -43,6 +47,81 @@ private IpinfoIpDataLookups() { // prefix dispatch and checks case-insensitive, so that works out nicely static final String IPINFO_PREFIX = "ipinfo"; + private static final Set IPINFO_TYPE_STOP_WORDS = Set.of( + "ipinfo", + "extended", + "free", + "generic", + "ip", + "sample", + "standard", + "mmdb" + ); + + /** + * Cleans up the database_type String from an ipinfo database by splitting on punctuation, removing stop words, and then joining + * with an underscore. + *

+ * e.g. "ipinfo free_foo_sample.mmdb" -> "foo" + * + * @param type the database_type from an ipinfo database + * @return a cleaned up database_type string + */ + // n.b. this is just based on observation of the types from a survey of such databases -- it's like browser user agent sniffing, + // there aren't necessarily any amazing guarantees about this behavior + static String ipinfoTypeCleanup(String type) { + List parts = Arrays.asList(type.split("[ _.]")); + return parts.stream().filter((s) -> IPINFO_TYPE_STOP_WORDS.contains(s) == false).collect(Collectors.joining("_")); + } + + @Nullable + static Database getIpinfoDatabase(final String databaseType) { + // for ipinfo the database selection is more along the lines of user-agent sniffing than + // string-based dispatch. the specific database_type strings could change in the future, + // hence the somewhat loose nature of this checking. + + final String cleanedType = ipinfoTypeCleanup(databaseType); + + // early detection on any of the 'extended' types + if (databaseType.contains("extended")) { + // which are not currently supported + logger.trace("returning null for unsupported database_type [{}]", databaseType); + return null; + } + + // early detection on 'country_asn' so the 'country' and 'asn' checks don't get faked out + if (cleanedType.contains("country_asn")) { + // but it's not currently supported + logger.trace("returning null for unsupported database_type [{}]", databaseType); + return null; + } + + if (cleanedType.contains("asn")) { + return Database.AsnV2; + } else if (cleanedType.contains("country")) { + return Database.CountryV2; + } else if (cleanedType.contains("location")) { // note: catches 'location' and 'geolocation' ;) + return Database.CityV2; + } else if (cleanedType.contains("privacy")) { + return Database.PrivacyDetection; + } else { + // no match was found + logger.trace("returning null for unsupported database_type [{}]", databaseType); + return null; + } + } + + @Nullable + static Function, IpDataLookup> getIpinfoLookup(final Database database) { + return switch (database) { + case AsnV2 -> IpinfoIpDataLookups.Asn::new; + case CountryV2 -> IpinfoIpDataLookups.Country::new; + case CityV2 -> IpinfoIpDataLookups.Geolocation::new; + case PrivacyDetection -> IpinfoIpDataLookups.PrivacyDetection::new; + default -> null; + }; + } + /** * Lax-ly parses a string that (ideally) looks like 'AS123' into a Long like 123L (or null, if such parsing isn't possible). * @param asn a potentially empty (or null) ASN string that is expected to contain 'AS' and then a parsable long diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MaxmindIpDataLookups.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MaxmindIpDataLookups.java index 4297413073e52..8bc74c0e4aac4 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MaxmindIpDataLookups.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MaxmindIpDataLookups.java @@ -26,6 +26,8 @@ import com.maxmind.geoip2.record.Postal; import com.maxmind.geoip2.record.Subdivision; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.core.Nullable; @@ -37,6 +39,7 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.function.Function; /** * A collection of {@link IpDataLookup} implementations for MaxMind databases @@ -47,11 +50,63 @@ private MaxmindIpDataLookups() { // utility class } + private static final Logger logger = LogManager.getLogger(MaxmindIpDataLookups.class); + // the actual prefixes from the metadata are cased like the literal strings, but // prefix dispatch and checks case-insensitive, so the actual constants are lowercase static final String GEOIP2_PREFIX = "GeoIP2".toLowerCase(Locale.ROOT); static final String GEOLITE2_PREFIX = "GeoLite2".toLowerCase(Locale.ROOT); + // note: the secondary dispatch on suffix happens to be case sensitive + private static final String CITY_DB_SUFFIX = "-City"; + private static final String COUNTRY_DB_SUFFIX = "-Country"; + private static final String ASN_DB_SUFFIX = "-ASN"; + private static final String ANONYMOUS_IP_DB_SUFFIX = "-Anonymous-IP"; + private static final String CONNECTION_TYPE_DB_SUFFIX = "-Connection-Type"; + private static final String DOMAIN_DB_SUFFIX = "-Domain"; + private static final String ENTERPRISE_DB_SUFFIX = "-Enterprise"; + private static final String ISP_DB_SUFFIX = "-ISP"; + + @Nullable + static Database getMaxmindDatabase(final String databaseType) { + if (databaseType.endsWith(CITY_DB_SUFFIX)) { + return Database.City; + } else if (databaseType.endsWith(COUNTRY_DB_SUFFIX)) { + return Database.Country; + } else if (databaseType.endsWith(ASN_DB_SUFFIX)) { + return Database.Asn; + } else if (databaseType.endsWith(ANONYMOUS_IP_DB_SUFFIX)) { + return Database.AnonymousIp; + } else if (databaseType.endsWith(CONNECTION_TYPE_DB_SUFFIX)) { + return Database.ConnectionType; + } else if (databaseType.endsWith(DOMAIN_DB_SUFFIX)) { + return Database.Domain; + } else if (databaseType.endsWith(ENTERPRISE_DB_SUFFIX)) { + return Database.Enterprise; + } else if (databaseType.endsWith(ISP_DB_SUFFIX)) { + return Database.Isp; + } else { + // no match was found + logger.trace("returning null for unsupported database_type [{}]", databaseType); + return null; + } + } + + @Nullable + static Function, IpDataLookup> getMaxmindLookup(final Database database) { + return switch (database) { + case City -> MaxmindIpDataLookups.City::new; + case Country -> MaxmindIpDataLookups.Country::new; + case Asn -> MaxmindIpDataLookups.Asn::new; + case AnonymousIp -> MaxmindIpDataLookups.AnonymousIp::new; + case ConnectionType -> MaxmindIpDataLookups.ConnectionType::new; + case Domain -> MaxmindIpDataLookups.Domain::new; + case Enterprise -> MaxmindIpDataLookups.Enterprise::new; + case Isp -> MaxmindIpDataLookups.Isp::new; + default -> null; + }; + } + static class AnonymousIp extends AbstractBase { AnonymousIp(final Set properties) { super( diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java index e96bdbd6314b2..640480ed277c5 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java @@ -27,6 +27,7 @@ import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument; import static org.elasticsearch.ingest.geoip.GeoIpProcessor.GEOIP_TYPE; +import static org.elasticsearch.ingest.geoip.GeoIpProcessor.IP_LOCATION_TYPE; import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDatabase; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -37,10 +38,6 @@ public class GeoIpProcessorTests extends ESTestCase { - private static IpDataLookup ipDataLookupAll(final Database database) { - return IpDataLookupFactories.getMaxmindLookup(database).apply(database.properties()); - } - // a temporary directory that mmdb files can be copied to and read from private Path tmpDir; @@ -54,6 +51,66 @@ public void cleanup() throws IOException { IOUtils.rm(tmpDir); } + public void testMaxmindCity() throws Exception { + String ip = "2602:306:33d3:8000::3257:9652"; + GeoIpProcessor processor = new GeoIpProcessor( + GEOIP_TYPE, // n.b. this is a "geoip" processor + randomAlphaOfLength(10), + null, + "source_field", + loader("GeoLite2-City.mmdb"), + () -> true, + "target_field", + getMaxmindCityLookup(), + false, + false, + "filename" + ); + + Map document = new HashMap<>(); + document.put("source_field", ip); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo(ip)); + @SuppressWarnings("unchecked") + Map data = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(data, notNullValue()); + assertThat(data.get("ip"), equalTo(ip)); + assertThat(data.get("city_name"), equalTo("Homestead")); + // see MaxmindIpDataLookupsTests for more tests of the data lookup behavior + } + + public void testIpinfoGeolocation() throws Exception { + String ip = "13.107.39.238"; + GeoIpProcessor processor = new GeoIpProcessor( + IP_LOCATION_TYPE, // n.b. this is an "ip_location" processor + randomAlphaOfLength(10), + null, + "source_field", + loader("ipinfo/ip_geolocation_sample.mmdb"), + () -> true, + "target_field", + getIpinfoGeolocationLookup(), + false, + false, + "filename" + ); + + Map document = new HashMap<>(); + document.put("source_field", ip); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo(ip)); + @SuppressWarnings("unchecked") + Map data = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(data, notNullValue()); + assertThat(data.get("ip"), equalTo(ip)); + assertThat(data.get("city_name"), equalTo("Des Moines")); + // see IpinfoIpDataLookupsTests for more tests of the data lookup behavior + } + public void testNullValueWithIgnoreMissing() throws Exception { GeoIpProcessor processor = new GeoIpProcessor( GEOIP_TYPE, @@ -63,7 +120,7 @@ public void testNullValueWithIgnoreMissing() throws Exception { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), true, false, "filename" @@ -86,7 +143,7 @@ public void testNonExistentWithIgnoreMissing() throws Exception { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), true, false, "filename" @@ -106,7 +163,7 @@ public void testNullWithoutIgnoreMissing() { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, false, "filename" @@ -129,7 +186,7 @@ public void testNonExistentWithoutIgnoreMissing() { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, false, "filename" @@ -149,7 +206,7 @@ public void testAddressIsNotInTheDatabase() throws Exception { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, false, "filename" @@ -174,7 +231,7 @@ public void testExceptionPropagates() { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, false, "filename" @@ -196,7 +253,7 @@ public void testListAllValid() throws Exception { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, false, "filename" @@ -208,11 +265,11 @@ public void testListAllValid() throws Exception { processor.execute(ingestDocument); @SuppressWarnings("unchecked") - List> geoData = (List>) ingestDocument.getSourceAndMetadata().get("target_field"); - assertThat(geoData, notNullValue()); - assertThat(geoData.size(), equalTo(2)); - assertThat(geoData.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d))); - assertThat(geoData.get(1).get("city_name"), equalTo("Hoensbroek")); + List> data = (List>) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(data, notNullValue()); + assertThat(data.size(), equalTo(2)); + assertThat(data.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d))); + assertThat(data.get(1).get("city_name"), equalTo("Hoensbroek")); } public void testListPartiallyValid() throws Exception { @@ -224,7 +281,7 @@ public void testListPartiallyValid() throws Exception { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, false, "filename" @@ -236,11 +293,11 @@ public void testListPartiallyValid() throws Exception { processor.execute(ingestDocument); @SuppressWarnings("unchecked") - List> geoData = (List>) ingestDocument.getSourceAndMetadata().get("target_field"); - assertThat(geoData, notNullValue()); - assertThat(geoData.size(), equalTo(2)); - assertThat(geoData.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d))); - assertThat(geoData.get(1), nullValue()); + List> data = (List>) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(data, notNullValue()); + assertThat(data.size(), equalTo(2)); + assertThat(data.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d))); + assertThat(data.get(1), nullValue()); } public void testListNoMatches() throws Exception { @@ -252,7 +309,7 @@ public void testListNoMatches() throws Exception { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, false, "filename" @@ -272,7 +329,7 @@ public void testListDatabaseReferenceCounting() throws Exception { GeoIpProcessor processor = new GeoIpProcessor(GEOIP_TYPE, randomAlphaOfLength(10), null, "source_field", () -> { loader.preLookup(); return loader; - }, () -> true, "target_field", ipDataLookupAll(Database.City), false, false, "filename"); + }, () -> true, "target_field", getMaxmindCityLookup(), false, false, "filename"); Map document = new HashMap<>(); document.put("source_field", List.of("8.8.8.8", "82.171.64.0")); @@ -280,11 +337,11 @@ public void testListDatabaseReferenceCounting() throws Exception { processor.execute(ingestDocument); @SuppressWarnings("unchecked") - List> geoData = (List>) ingestDocument.getSourceAndMetadata().get("target_field"); - assertThat(geoData, notNullValue()); - assertThat(geoData.size(), equalTo(2)); - assertThat(geoData.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d))); - assertThat(geoData.get(1).get("city_name"), equalTo("Hoensbroek")); + List> data = (List>) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(data, notNullValue()); + assertThat(data.size(), equalTo(2)); + assertThat(data.get(0).get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d))); + assertThat(data.get(1).get("city_name"), equalTo("Hoensbroek")); // Check the loader's reference count and attempt to close assertThat(loader.current(), equalTo(0)); @@ -301,7 +358,7 @@ public void testListFirstOnly() throws Exception { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, true, "filename" @@ -313,9 +370,9 @@ public void testListFirstOnly() throws Exception { processor.execute(ingestDocument); @SuppressWarnings("unchecked") - Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); - assertThat(geoData, notNullValue()); - assertThat(geoData.get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d))); + Map data = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(data, notNullValue()); + assertThat(data.get("location"), equalTo(Map.of("lat", 37.751d, "lon", -97.822d))); } public void testListFirstOnlyNoMatches() throws Exception { @@ -327,7 +384,7 @@ public void testListFirstOnlyNoMatches() throws Exception { loader("GeoLite2-City.mmdb"), () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, true, "filename" @@ -350,7 +407,7 @@ public void testInvalidDatabase() throws Exception { loader("GeoLite2-City.mmdb"), () -> false, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, true, "filename" @@ -374,7 +431,7 @@ public void testNoDatabase() throws Exception { () -> null, () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), false, false, "GeoLite2-City" @@ -398,7 +455,7 @@ public void testNoDatabase_ignoreMissing() throws Exception { () -> null, () -> true, "target_field", - ipDataLookupAll(Database.City), + getMaxmindCityLookup(), true, false, "GeoLite2-City" @@ -412,13 +469,24 @@ public void testNoDatabase_ignoreMissing() throws Exception { assertIngestDocument(originalIngestDocument, ingestDocument); } + private static IpDataLookup getMaxmindCityLookup() { + final var database = Database.City; + return MaxmindIpDataLookups.getMaxmindLookup(database).apply(database.properties()); + } + + private static IpDataLookup getIpinfoGeolocationLookup() { + final var database = Database.CityV2; + return IpinfoIpDataLookups.getIpinfoLookup(database).apply(database.properties()); + } + private CheckedSupplier loader(final String path) { var loader = loader(path, null); return () -> loader; } private DatabaseReaderLazyLoader loader(final String databaseName, final AtomicBoolean closed) { - Path path = tmpDir.resolve(databaseName); + int last = databaseName.lastIndexOf("/"); + final Path path = tmpDir.resolve(last == -1 ? databaseName : databaseName.substring(last + 1)); copyDatabase(databaseName, path); final GeoIpCache cache = new GeoIpCache(1000); diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookupsTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookupsTests.java index 4167170567f52..e998748efbcad 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookupsTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookupsTests.java @@ -31,12 +31,14 @@ import static java.util.Map.entry; import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDatabase; +import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.ipinfoTypeCleanup; import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.parseAsn; import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.parseBoolean; import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.parseLocationDouble; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.startsWith; @@ -308,6 +310,107 @@ public void testPrivacyDetectionInvariants() { } } + public void testIpinfoTypeCleanup() { + Map typesToCleanedTypes = Map.ofEntries( + // database_type strings from upstream: + // abuse.mmdb + entry("ipinfo standard_abuse_mmdb_v4.mmdb", "abuse_v4"), + // asn.mmdb + entry("ipinfo generic_asn_mmdb_v4.mmdb", "asn_v4"), + // carrier.mmdb + entry("ipinfo standard_carrier_mmdb.mmdb", "carrier"), + // location_extended_v2.mmdb + entry("ipinfo extended_location_v2.mmdb", "location_v2"), + // privacy_extended_v2.mmdb + entry("ipinfo extended_privacy_v2.mmdb", "privacy_v2"), + // standard_company.mmdb + entry("ipinfo standard_company.mmdb", "company"), + // standard_ip_hosted_domains_sample.mmdb + entry("ipinfo standard_ip_hosted_domains_sample.mmdb", "hosted_domains"), + // standard_location.mmdb + entry("ipinfo standard_location_mmdb_v4.mmdb", "location_v4"), + // standard_privacy.mmdb + entry("ipinfo standard_privacy.mmdb", "privacy"), + + // database_type strings from test files: + // ip_asn_sample.mmdb + entry("ipinfo ip_asn_sample.mmdb", "asn"), + // ip_country_asn_sample.mmdb + entry("ipinfo ip_country_asn_sample.mmdb", "country_asn"), + // ip_geolocation_sample.mmdb + entry("ipinfo ip_geolocation_sample.mmdb", "geolocation"), + // abuse_contact_sample.mmdb + entry("ipinfo abuse_contact_sample.mmdb", "abuse_contact"), + // asn_sample.mmdb + entry("ipinfo asn_sample.mmdb", "asn"), + // hosted_domains_sample.mmdb + entry("ipinfo hosted_domains_sample.mmdb", "hosted_domains"), + // ip_carrier_sample.mmdb + entry("ipinfo ip_carrier_sample.mmdb", "carrier"), + // ip_company_sample.mmdb + entry("ipinfo ip_company_sample.mmdb", "company"), + // ip_country_sample.mmdb + entry("ipinfo ip_country_sample.mmdb", "country"), + // ip_geolocation_extended_ipv4_sample.mmdb + entry("ipinfo ip_geolocation_extended_ipv4_sample.mmdb", "geolocation_ipv4"), + // ip_geolocation_extended_ipv6_sample.mmdb + entry("ipinfo ip_geolocation_extended_ipv6_sample.mmdb", "geolocation_ipv6"), + // ip_geolocation_extended_sample.mmdb + entry("ipinfo ip_geolocation_extended_sample.mmdb", "geolocation"), + // ip_rdns_domains_sample.mmdb + entry("ipinfo ip_rdns_domains_sample.mmdb", "rdns_domains"), + // ip_rdns_hostnames_sample.mmdb + entry("ipinfo ip_rdns_hostnames_sample.mmdb", "rdns_hostnames"), + // privacy_detection_extended_sample.mmdb + entry("ipinfo privacy_detection_extended_sample.mmdb", "privacy_detection"), + // privacy_detection_sample.mmdb + entry("ipinfo privacy_detection_sample.mmdb", "privacy_detection"), + + // database_type strings from downloaded (free) files: + // asn.mmdb + entry("ipinfo generic_asn_free.mmdb", "asn"), + // country.mmdb + entry("ipinfo generic_country_free.mmdb", "country"), + // country_asn.mmdb + entry("ipinfo generic_country_free_country_asn.mmdb", "country_country_asn") + ); + + for (var entry : typesToCleanedTypes.entrySet()) { + String type = entry.getKey(); + String cleanedType = entry.getValue(); + assertThat(ipinfoTypeCleanup(type), equalTo(cleanedType)); + } + } + + public void testDatabaseTypeParsing() throws IOException { + // this test is a little bit overloaded -- it's testing that we're getting the expected sorts of + // database_type strings from these files, *and* it's also testing that we dispatch on those strings + // correctly and associated those files with the correct high-level Elasticsearch Database type. + // down the road it would probably make sense to split these out and find a better home for some of the + // logic, but for now it's probably more valuable to have the test *somewhere* than to get especially + // pedantic about where precisely it should be. + + copyDatabase("ipinfo/ip_asn_sample.mmdb", tmpDir.resolve("ip_asn_sample.mmdb")); + copyDatabase("ipinfo/ip_geolocation_sample.mmdb", tmpDir.resolve("ip_geolocation_sample.mmdb")); + copyDatabase("ipinfo/asn_sample.mmdb", tmpDir.resolve("asn_sample.mmdb")); + copyDatabase("ipinfo/ip_country_sample.mmdb", tmpDir.resolve("ip_country_sample.mmdb")); + copyDatabase("ipinfo/privacy_detection_sample.mmdb", tmpDir.resolve("privacy_detection_sample.mmdb")); + + assertThat(parseDatabaseFromType("ip_asn_sample.mmdb"), is(Database.AsnV2)); + assertThat(parseDatabaseFromType("ip_geolocation_sample.mmdb"), is(Database.CityV2)); + assertThat(parseDatabaseFromType("asn_sample.mmdb"), is(Database.AsnV2)); + assertThat(parseDatabaseFromType("ip_country_sample.mmdb"), is(Database.CountryV2)); + assertThat(parseDatabaseFromType("privacy_detection_sample.mmdb"), is(Database.PrivacyDetection)); + + // additional cases where we're bailing early on types we don't support + assertThat(IpDataLookupFactories.getDatabase("ipinfo ip_country_asn_sample.mmdb"), nullValue()); + assertThat(IpDataLookupFactories.getDatabase("ipinfo privacy_detection_extended_sample.mmdb"), nullValue()); + } + + private Database parseDatabaseFromType(String databaseFile) throws IOException { + return IpDataLookupFactories.getDatabase(MMDBUtil.getDatabaseType(tmpDir.resolve(databaseFile))); + } + private static void assertDatabaseInvariants(final Path databasePath, final BiConsumer> rowConsumer) { try (Reader reader = new Reader(pathToFile(databasePath))) { Networks networks = reader.networks(Map.class);