Skip to content

Commit

Permalink
Remove default maxmind geoip databases from distribution (#78362)
Browse files Browse the repository at this point in the history
* Adjusted integration tests to use geoip test fixture or to use test databases provided via config dirs (for qa module / docs).
* Kept the geolite2-databases dependency for most of the unit tests only.
* Made fallback_to_default_databases parameter on geoip processor a noop and emit deprecation warning upon using it.
* If no geoip databases are available yet to a node then the geoip processor factory returns a processor implementation that flags documents that databases are unavailable. This allows these documents to be reindex later with a pipeline. These documents will have a tag string array field, which contains a string _geoip_database_unavailable_{database_name} for each missing database in a pipeline.
* Added reload pipeline capabilities is IngestService, so that when databases are available again on a node then pipelines with geoip processor definition can be reloaded.

Relates to #68920
  • Loading branch information
martijnvg authored Oct 13, 2021
1 parent ae50acd commit 04e5823
Show file tree
Hide file tree
Showing 26 changed files with 715 additions and 339 deletions.
9 changes: 7 additions & 2 deletions docs/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ testClusters.matching { it.name == "integTest"}.configureEach {
if (singleNode().testDistribution == DEFAULT) {
setting 'xpack.license.self_generated.type', 'trial'
setting 'indices.lifecycle.history_index_enabled', 'false'
setting 'ingest.geoip.downloader.enabled', 'false'
systemProperty 'es.geoip_v2_feature_flag_enabled', 'true'
keystorePassword 'keystore-password'
}

Expand All @@ -89,6 +87,9 @@ testClusters.matching { it.name == "integTest"}.configureEach {
// Whitelist reindexing from the local node so we can test it.
setting 'reindex.remote.whitelist', '127.0.0.1:*'

extraConfigFile 'ingest-geoip/GeoLite2-City.mmdb', file("${project.projectDir}/src/test/resources/GeoLite2-City.mmdb")
extraConfigFile 'ingest-geoip/GeoLite2-Country.mmdb', file("${project.projectDir}/src/test/resources/GeoLite2-Country.mmdb")

// TODO: remove this once cname is prepended to transport.publish_address by default in 8.0
systemProperty 'es.transport.cname_in_publish_address', 'true'

Expand All @@ -114,6 +115,10 @@ tasks.named("integTest").configure {
}
}

tasks.named("forbiddenPatterns").configure {
exclude '**/*.mmdb'
}

tasks.named("buildRestTests").configure {
docs = docsFileTree
}
Expand Down
24 changes: 12 additions & 12 deletions docs/reference/ingest/common-log-format-example.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ PUT _index_template/my-data-stream-template
----
POST my-data-stream/_doc?pipeline=my-pipeline
{
"message": "212.87.37.154 - - [05/May/2099:16:21:15 +0000] \"GET /favicon.ico HTTP/1.1\" 200 3638 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\""
"message": "89.160.20.128 - - [05/May/2099:16:21:15 +0000] \"GET /favicon.ico HTTP/1.1\" 200 3638 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\""
}
----
// TEST[s/my-pipeline/my-pipeline&refresh=wait_for/]
Expand Down Expand Up @@ -216,21 +216,21 @@ The API returns:
"version": "1.1"
},
"source": {
"ip": "212.87.37.154",
"ip": "89.160.20.128",
"geo": {
"continent_name": "Europe",
"region_iso_code": "DE-BE",
"city_name": "Berlin",
"country_iso_code": "DE",
"country_name": "Germany",
"region_name": "Land Berlin",
"location": {
"lon": 13.4978,
"lat": 52.411
"continent_name" : "Europe",
"country_name" : "Sweden",
"country_iso_code" : "SE",
"city_name" : "Linköping",
"region_iso_code" : "SE-E",
"region_name" : "Östergötland County",
"location" : {
"lon" : 15.6167,
"lat" : 58.4167
}
}
},
"message": "212.87.37.154 - - [05/May/2099:16:21:15 +0000] \"GET /favicon.ico HTTP/1.1\" 200 3638 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\"",
"message": "89.160.20.128 - - [05/May/2099:16:21:15 +0000] \"GET /favicon.ico HTTP/1.1\" 200 3638 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\"",
"url": {
"original": "/favicon.ico"
},
Expand Down
46 changes: 26 additions & 20 deletions docs/reference/ingest/processors/geoip.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ PUT _ingest/pipeline/geoip
}
PUT my-index-000001/_doc/my_id?pipeline=geoip
{
"ip": "8.8.8.8"
"ip": "89.160.20.128"
}
GET my-index-000001/_doc/my_id
--------------------------------------------------
Expand All @@ -86,12 +86,15 @@ Which returns:
"_seq_no": 55,
"_primary_term": 1,
"_source": {
"ip": "8.8.8.8",
"ip": "89.160.20.128",
"geoip": {
"continent_name": "North America",
"country_name": "United States",
"country_iso_code": "US",
"location": { "lat": 37.751, "lon": -97.822 }
"continent_name": "Europe",
"country_name": "Sweden",
"country_iso_code": "SE",
"city_name" : "Linköping",
"region_iso_code" : "SE-E",
"region_name" : "Östergötland County",
"location": { "lat": 58.4167, "lon": 15.6167 }
}
}
}
Expand Down Expand Up @@ -119,7 +122,7 @@ PUT _ingest/pipeline/geoip
}
PUT my-index-000001/_doc/my_id?pipeline=geoip
{
"ip": "8.8.8.8"
"ip": "89.160.20.128"
}
GET my-index-000001/_doc/my_id
--------------------------------------------------
Expand All @@ -136,11 +139,11 @@ returns this:
"_seq_no": 65,
"_primary_term": 1,
"_source": {
"ip": "8.8.8.8",
"ip": "89.160.20.128",
"geo": {
"continent_name": "North America",
"country_name": "United States",
"country_iso_code": "US"
"continent_name": "Europe",
"country_name": "Sweden",
"country_iso_code": "SE"
}
}
}
Expand Down Expand Up @@ -236,7 +239,7 @@ PUT _ingest/pipeline/geoip
PUT my_ip_locations/_doc/1?refresh=true&pipeline=geoip
{
"ip": "8.8.8.8"
"ip": "89.160.20.128"
}
GET /my_ip_locations/_search
Expand All @@ -250,8 +253,8 @@ GET /my_ip_locations/_search
"geo_distance": {
"distance": "1m",
"geoip.location": {
"lon": -97.822,
"lat": 37.751
"lon": 15.6167,
"lat": 58.4167
}
}
}
Expand Down Expand Up @@ -285,15 +288,18 @@ GET /my_ip_locations/_search
"_score" : 1.0,
"_source" : {
"geoip" : {
"continent_name" : "North America",
"country_name" : "United States",
"country_iso_code" : "US",
"continent_name" : "Europe",
"country_name" : "Sweden",
"country_iso_code" : "SE",
"city_name" : "Linköping",
"region_iso_code" : "SE-E",
"region_name" : "Östergötland County",
"location" : {
"lon" : -97.822,
"lat" : 37.751
"lon" : 15.6167,
"lat" : 58.4167
}
},
"ip" : "8.8.8.8"
"ip" : "89.160.20.128"
}
}
]
Expand Down
28 changes: 28 additions & 0 deletions docs/reference/migration/migrate_8_0/ingest.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,32 @@ Common Schema (ECS)] fields, regardless of the `ecs` value.
To avoid deprecation warnings, remove the parameter from your ingest pipelines.
If a pipeline specifies an `ecs` value, the value is ignored.
====

.The default Maxmind geoip databases have been removed.
[%collapsible]
====
*Details* +
The default Maxmind geoip databases that shipped by default with Elasticsearch
have been removed. These databases are out dated and stale and using these
databases will likely result in incorrect geoip lookups.
By default since 7.13, these pre-packaged geoip databases
were used in case no database were specified in the config directory or before
the geoip downloader downloaded the geoip databases. When the geoip database
downloader completed downloading the new databases then these pre-packaged
databases were no longer used.
*Impact* +
If the geoip downloader is disabled and no geoip databases are provided
in the config directory of each ingest node then the geoip processor will
no longer perform geoip lookups and tag these documents with the fact that
the requested database is no longer available.
After a cluster has been started and before the geoip downloader has completed
downloading the most up to data databases, the geoip processor will not perform
any geoip lookups and tag documents that the requested database is not available.
After the geoip downloader has completed downloading the most up to data databases
then the geoip processor will function as normal. The window of time that the
geoip processor can't do geoip lookups after cluster startup should be very small.
====
//end::notable-breaking-changes[]
Binary file added docs/src/test/resources/GeoLite2-City.mmdb
Binary file not shown.
Binary file added docs/src/test/resources/GeoLite2-Country.mmdb
Binary file not shown.
24 changes: 16 additions & 8 deletions modules/ingest-geoip/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ apply plugin: 'elasticsearch.yaml-rest-compat-test'
apply plugin: 'elasticsearch.internal-cluster-test'

esplugin {
description 'Ingest processor that uses lookup geo data based on IP adresses using the MaxMind geo database'
description 'Ingest processor that uses lookup geo data based on IP addresses using the MaxMind geo database'
classname 'org.elasticsearch.ingest.geoip.IngestGeoIpPlugin'
}

Expand Down Expand Up @@ -57,14 +57,7 @@ tasks.named("internalClusterTest").configure {
}
}

tasks.register("copyDefaultGeoIp2DatabaseFiles", Copy) {
from { zipTree(configurations.testCompileClasspath.files.find { it.name.contains('geolite2-databases') }) }
into "${project.buildDir}/ingest-geoip"
include "*.mmdb"
}

tasks.named("bundlePlugin").configure {
dependsOn("copyDefaultGeoIp2DatabaseFiles")
from("${project.buildDir}/ingest-geoip") {
into '/'
}
Expand Down Expand Up @@ -107,3 +100,18 @@ tasks.named("dependencyLicenses").configure {
mapping from: /maxmind-db.*/, to: 'maxmind-db-reader'
ignoreFile 'elastic-geoip-database-service-agreement-LICENSE.txt'
}

testClusters.configureEach {
// Needed for database downloader, uses delete-by-query to cleanup old databases from org.elasticsearch.ingest.geoip database system index
module ':modules:reindex'
// Downloader is enabled by default, but in test clusters in build disabled by default,
// but in this module, the downloader should be enabled by default
systemProperty 'ingest.geoip.downloader.enabled.default', 'true'
if (useFixture) {
setting 'ingest.geoip.downloader.endpoint', { "${-> fixtureAddress()}" }
}
}
tasks.named("yamlRestTestV7CompatTransform").configure { task ->
task.skipTestsByFilePattern("**/ingest_geoip/20_geoip_processor.yml", "from 8.0 yaml rest tests use geoip test fixture and default geoip are no longer packaged. In 7.x yaml tests used default databases which makes tests results very different, so skipping these tests")
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@
*/
package org.elasticsearch.ingest.geoip;

import org.apache.http.util.EntityUtils;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.core.PathUtils;
import org.elasticsearch.common.settings.SecureString;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.core.PathUtils;
import org.elasticsearch.xcontent.ObjectPath;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.test.rest.ESRestTestCase;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;

import static org.hamcrest.Matchers.either;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue;

public class UpdateDatabasesIT extends ESRestTestCase {

Expand All @@ -35,8 +35,14 @@ public void test() throws Exception {
Request simulatePipelineRequest = new Request("POST", "/_ingest/pipeline/_simulate");
simulatePipelineRequest.setJsonEntity(body);
{
Map<String, Object> response = toMap(client().performRequest(simulatePipelineRequest));
assertThat(ObjectPath.eval("docs.0.doc._source.geoip.city_name", response), equalTo("Tumba"));
Map<String, Object> response = entityAsMap(client().performRequest(simulatePipelineRequest));
assertThat(ObjectPath.eval("docs.0.doc._source.tags.0", response), equalTo("_geoip_database_unavailable_GeoLite2-City.mmdb"));
}

// Ensure no config databases have been setup:
{
Map<?, ?> stats = getGeoIpStatsForSingleNode();
assertThat(stats, nullValue());
}

Path configPath = PathUtils.get(System.getProperty("tests.config.dir"));
Expand All @@ -46,14 +52,25 @@ public void test() throws Exception {
Files.copy(UpdateDatabasesIT.class.getResourceAsStream("/GeoLite2-City-Test.mmdb"),
ingestGeoipDatabaseDir.resolve("GeoLite2-City.mmdb"));

assertBusy(() -> {
Map<String, Object> response = toMap(client().performRequest(simulatePipelineRequest));
assertThat(ObjectPath.eval("docs.0.doc._source.geoip.city_name", response), equalTo("Linköping"));
});
// Ensure that a config database has been setup:
{
assertBusy(() -> {
Map<?, ?> stats = getGeoIpStatsForSingleNode();
assertThat(stats, notNullValue());
assertThat(stats.get("config_databases"), equalTo(List.of("GeoLite2-City.mmdb")));
});
}

Map<String, Object> response = entityAsMap(client().performRequest(simulatePipelineRequest));
assertThat(ObjectPath.eval("docs.0.doc._source.geoip.city_name", response), equalTo("Linköping"));
}

private static Map<String, Object> toMap(Response response) throws IOException {
return XContentHelper.convertToMap(JsonXContent.jsonXContent, EntityUtils.toString(response.getEntity()), false);
private static Map<?, ?> getGeoIpStatsForSingleNode() throws IOException {
Request request = new Request("GET", "/_ingest/geoip/stats");
Map<String, Object> response = entityAsMap(client().performRequest(request));
Map<?, ?> nodes = (Map<?, ?>) response.get("nodes");
assertThat(nodes.size(), either(equalTo(0)).or(equalTo(1)));
return nodes.isEmpty() ? null : (Map<?, ?>) nodes.values().iterator().next();
}

@Override
Expand Down
Loading

0 comments on commit 04e5823

Please sign in to comment.