elastic · probakowski · Mar 29, 2021 · Mar 29, 2021 · Mar 29, 2021 · Mar 29, 2021
diff --git a/...-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java b/...-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java
@@ -34,13 +34,12 @@
 import org.elasticsearch.test.junit.annotations.TestLogging;
 import org.junit.After;
 
-import java.io.BufferedOutputStream;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -50,15 +49,12 @@
 import java.util.stream.StreamSupport;
 import java.util.zip.GZIPInputStream;
 
-import static java.nio.file.StandardOpenOption.CREATE;
-import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
-import static java.nio.file.StandardOpenOption.WRITE;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.empty;
-import static org.hamcrest.Matchers.endsWith;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
 
 public class GeoIpDownloaderIT extends AbstractGeoIpIT {
 
@@ -124,15 +120,16 @@ public void testGeoIpDatabasesDownload() throws Exception {
                         data.add((byte[]) hit.getSourceAsMap().get("data"));
                     }
 
-                    GZIPInputStream stream = new GZIPInputStream(new MultiByteArrayInputStream(data));
-                    Path tempFile = createTempFile();
-                    try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(tempFile, TRUNCATE_EXISTING, WRITE, CREATE))) {
-                    byte[] bytes = new byte[4096];
-                    int read;
-                    while ((read = stream.read(bytes)) != -1) {
-                        os.write(bytes, 0, read);
-                    }
+                    TarInputStream stream = new TarInputStream(new GZIPInputStream(new MultiByteArrayInputStream(data)));
+                    TarInputStream.TarEntry entry;
+                    while ((entry = stream.getNextEntry()) != null) {
+                        if (entry.getName().endsWith(".mmdb")) {
+                            break;
+                        }
                     }
+
+                    Path tempFile = createTempFile();
+                    Files.copy(stream, tempFile, StandardCopyOption.REPLACE_EXISTING);
                     parseDatabase(tempFile);
                 } catch (Exception e) {
                     throw new AssertionError(e);
@@ -143,6 +140,7 @@ public void testGeoIpDatabasesDownload() throws Exception {
 
     @TestLogging(value = "org.elasticsearch.ingest.geoip:TRACE", reason = "https://github.com/elastic/elasticsearch/issues/69972")
     public void testUseGeoIpProcessorWithDownloadedDBs() throws Exception {
+        assumeTrue("only test with fixture to have stable results", ENDPOINT != null);
         // setup:
         BytesReference bytes;
         try (XContentBuilder builder = JsonXContent.contentBuilder()) {
@@ -240,9 +238,11 @@ public void testUseGeoIpProcessorWithDownloadedDBs() throws Exception {
         assertBusy(() -> {
             for (Path geoipTmpDir : geoipTmpDirs) {
                 try (Stream<Path> list = Files.list(geoipTmpDir)) {
-                    List<String> files = list.map(Path::toString).collect(Collectors.toList());
-                    assertThat(files, containsInAnyOrder(endsWith("GeoLite2-City.mmdb"), endsWith("GeoLite2-Country.mmdb"),
-                        endsWith("GeoLite2-ASN.mmdb")));
+                    List<String> files = list.map(Path::getFileName).map(Path::toString).collect(Collectors.toList());
+                    assertThat(files, containsInAnyOrder("GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "GeoLite2-ASN.mmdb",
+                        "GeoLite2-City.mmdb_COPYRIGHT.txt","GeoLite2-Country.mmdb_COPYRIGHT.txt","GeoLite2-ASN.mmdb_COPYRIGHT.txt",
+                        "GeoLite2-City.mmdb_LICENSE.txt","GeoLite2-Country.mmdb_LICENSE.txt","GeoLite2-ASN.mmdb_LICENSE.txt",
+                        "GeoLite2-ASN.mmdb_README.txt"));
                 }
             }
         });
@@ -253,6 +253,7 @@ public void testUseGeoIpProcessorWithDownloadedDBs() throws Exception {
             assertThat(simulateResponse.getPipelineId(), equalTo("_id"));
             assertThat(simulateResponse.getResults().size(), equalTo(1));
             SimulateDocumentBaseResult result = (SimulateDocumentBaseResult) simulateResponse.getResults().get(0);
+            assertThat(result.getFailure(), nullValue());
             assertThat(result.getIngestDocument().getFieldValue("ip-city.city_name", String.class), equalTo("Linköping"));
             assertThat(result.getIngestDocument().getFieldValue("ip-asn.organization_name", String.class), equalTo("Bredband2 AB"));
             assertThat(result.getIngestDocument().getFieldValue("ip-country.country_name", String.class), equalTo("Sweden"));
@@ -265,7 +266,7 @@ public void testUseGeoIpProcessorWithDownloadedDBs() throws Exception {
         assertBusy(() -> {
             for (Path geoipTmpDir : geoipTmpDirs) {
                 try (Stream<Path> list = Files.list(geoipTmpDir)) {
-                    List<String> files = list.map(Path::toString).collect(Collectors.toList());
+                    List<String> files = list.map(Path::toString).filter(p -> p.endsWith(".mmdb")).collect(Collectors.toList());
                     assertThat(files, empty());
                 }
             }

diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseRegistry.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseRegistry.java
@@ -30,6 +30,7 @@
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.watcher.ResourceWatcherService;
 
+import java.io.BufferedInputStream;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.UncheckedIOException;
@@ -261,6 +262,25 @@ void retrieveAndUpdateDatabase(String databaseName, GeoIpTaskState.Metadata meta
                 decompress(databaseTmpGzFile, databaseTmpFile);
 
                 Path databaseFile = geoipTmpDirectory.resolve(databaseName);
+                // tarball contains <database_name>.mmdb, LICENSE.txt, COPYRIGHTS.txt and optional README.txt files.
+                // we store mmdb file as is and prepend database name to all other entries to avoid conflicts
+                try (TarInputStream is = new TarInputStream(new BufferedInputStream(Files.newInputStream(databaseTmpFile)))) {
+                    TarInputStream.TarEntry entry;
+                    while ((entry = is.getNextEntry()) != null) {
+                        //there might be ./ entry in tar, we should skip it
+                        if (entry.isNotFile()) {
+                            continue;
+                        }
+                        // flatten structure, remove any directories present from the path (should be ./ only)
+                        String name = entry.getName().substring(entry.getName().lastIndexOf('/') + 1);
+                        if (name.startsWith(databaseName)) {
+                            Files.copy(is, databaseTmpFile, StandardCopyOption.REPLACE_EXISTING);
+                        } else {
+                            Files.copy(is, geoipTmpDirectory.resolve(databaseName + "_" + name), StandardCopyOption.REPLACE_EXISTING);
+                        }
+                    }
+                }
+
                 LOGGER.debug("moving database from [{}] to [{}]", databaseTmpFile, databaseFile);
                 Files.move(databaseTmpFile, databaseFile, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING);
                 updateDatabase(databaseName, recordedMd5, databaseFile);

diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java
@@ -104,7 +104,9 @@ void updateDatabases() throws IOException {
         logger.info("updating geoip databases");
         List<Map<String, Object>> response = fetchDatabasesOverview();
         for (Map<String, Object> res : response) {
-            processDatabase(res);
+            if (res.get("name").toString().endsWith(".tgz")) {
+                processDatabase(res);
+            }
         }
     }
 
@@ -121,7 +123,7 @@ private <T> List<T> fetchDatabasesOverview() throws IOException {
 
     //visible for testing
     void processDatabase(Map<String, Object> databaseInfo) {
-        String name = databaseInfo.get("name").toString().replace(".gz", "");
+        String name = databaseInfo.get("name").toString().replace(".tgz", "") + ".mmdb";
         String md5 = (String) databaseInfo.get("md5_hash");
         if (state.contains(name) && Objects.equals(md5, state.get(name).getMd5())) {
             updateTimestamp(name, state.get(name));
@@ -234,7 +236,7 @@ protected void onCancelled() {
 
     @Override
     public GeoIpDownloaderStats getStatus() {
-        return isCancelled() || isCompleted() ? null: stats;
+        return isCancelled() || isCompleted() ? null : stats;
     }
 
     private void scheduleNextRun(TimeValue time) {

diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/TarInputStream.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/TarInputStream.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.ingest.geoip;
+
+import java.io.EOFException;
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * {@link InputStream} with very basic support for tar format, just enough to parse archives provided by GeoIP database service from Infra.
+ * This class is not suitable for general purpose tar processing!
+ */
+class TarInputStream extends FilterInputStream {
+
+    private TarEntry currentEntry;
+    private long remaining;
+    private long reminder;
+    private final byte[] buf = new byte[512];
+
+    TarInputStream(InputStream in) {
+        super(in);
+    }
+
+    public TarEntry getNextEntry() throws IOException {
+        if (currentEntry != null) {
+            //go to the end of the current entry
+            skipN(remaining);
+            if (reminder != 0) {
+                skipN(512 - reminder);
+            }
+        }
+        int read = readNBytes(in, buf, 0, 512);
+        if (read == 0) {
+            return null;
+        }
+        if (read != 512) {
+            throw new EOFException();
+        }
+
+        boolean allZero = true;
+        for (byte b : buf) {
+            if (b != 0) {
+                allZero = false;
+                break;
+            }
+        }
+
+        if (allZero) {
+            return null;
+        }
+
+        String name = getString(0, 100);
+
+        boolean notFile = (buf[156] != 0 && buf[156] != '0') || name.endsWith("/");
+
+        if (notFile) {
+            remaining = 0;
+            reminder = 0;
+        } else {
+            String sizeString = getString(124, 12);
+            remaining = sizeString.isEmpty() ? 0 : Long.parseLong(sizeString, 8);
+            reminder = remaining % 512;
+        }
+
+        currentEntry = new TarEntry(name, notFile);
+        return currentEntry;
+    }
+
+    @Override
+    public int read() throws IOException {
+        if (remaining == 0) {
+            return -1;
+        }
+        remaining--;
+        return in.read();
+    }
+
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+        if (remaining <= 0) {
+            return -1;
+        }
+        int read = in.read(b, off, remaining > Integer.MAX_VALUE ? len : (int) Math.min(len, remaining));
+        remaining -= read;
+        return read;
+    }
+
+    private String getString(int offset, int maxLen) {
+        return new String(buf, offset, maxLen, StandardCharsets.UTF_8).trim();
+    }
+
+    private void skipN(long n) throws IOException {
+        while (n > 0) {
+            long skip = in.skip(n);
+            if (skip < n) {
+                int read = in.read();
+                if (read == -1) {
+                    throw new EOFException();
+                }
+                n--;
+            }
+            n -= skip;
+        }
+    }
+
+    private int readNBytes(InputStream in, byte[] b, int off, int len) throws IOException {
+        int n = 0;
+        while (n < len) {
+            int count = in.read(b, off + n, len - n);
+            if (count < 0)
+                break;
+            n += count;
+        }
+        return n;
+    }
+
+    static class TarEntry {
+        private final String name;
+        private final boolean notFile;
+
+        TarEntry(String name, boolean notFile) {
+            this.name = name;
+            this.notFile = notFile;
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public boolean isNotFile() {
+            return notFile;
+        }
+    }
+}
+
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/DatabaseRegistryTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/DatabaseRegistryTests.java
@@ -255,7 +255,7 @@ public void testRetrieveDatabaseCorruption() throws Exception {
 
     private String mockSearches(String databaseName, int firstChunk, int lastChunk) throws IOException {
         String dummyContent = "test: " + databaseName;
-        List<byte[]> data = gzip(dummyContent, lastChunk - firstChunk + 1);
+        List<byte[]> data = gzip(databaseName, dummyContent, lastChunk - firstChunk + 1);
         assertThat(gunzip(data), equalTo(dummyContent));
 
         for (int i = firstChunk; i <= lastChunk; i++) {
@@ -302,10 +302,20 @@ private static RoutingTable createIndexRoutingTable() {
         return RoutingTable.builder().add(IndexRoutingTable.builder(index).addIndexShard(table).build()).build();
     }
 
-    private static List<byte[]> gzip(String content, int chunks) throws IOException {
+    private static List<byte[]> gzip(String name, String content, int chunks) throws IOException {
         ByteArrayOutputStream bytes = new ByteArrayOutputStream();
         GZIPOutputStream gzipOutputStream = new GZIPOutputStream(bytes);
-        gzipOutputStream.write(content.getBytes(StandardCharsets.UTF_8));
+        byte[] header = new byte[512];
+        byte[] nameBytes = name.getBytes(StandardCharsets.UTF_8);
+        byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8);
+        byte[] sizeBytes = String.format(Locale.ROOT, "%1$012o", contentBytes.length).getBytes(StandardCharsets.UTF_8);
+        System.arraycopy(nameBytes, 0, header, 0, nameBytes.length);
+        System.arraycopy(sizeBytes, 0, header, 124, 12);
+        gzipOutputStream.write(header);
+        gzipOutputStream.write(contentBytes);
+        gzipOutputStream.write(512 - contentBytes.length);
+        gzipOutputStream.write(new byte[512]);
+        gzipOutputStream.write(new byte[512]);
         gzipOutputStream.close();
 
         byte[] all = bytes.toByteArray();
@@ -321,9 +331,9 @@ private static List<byte[]> gzip(String content, int chunks) throws IOException
             from = to;
         }
 
-        if (data.size() > chunks) {
+        while (data.size() > chunks) {
             byte[] last = data.remove(data.size() - 1);
-            byte[] secondLast = data.remove(data.size() -1);
+            byte[] secondLast = data.remove(data.size() - 1);
             byte[] merged = new byte[secondLast.length + last.length];
             System.arraycopy(secondLast, 0, merged, 0, secondLast.length);
             System.arraycopy(last, 0, merged, secondLast.length, last.length);
@@ -341,7 +351,8 @@ private static String gunzip(List<byte[]> chunks) throws IOException {
             System.arraycopy(chunk, 0, gzippedContent, written, chunk.length);
             written += chunk.length;
         }
-        GZIPInputStream gzipInputStream = new GZIPInputStream(new ByteArrayInputStream(gzippedContent));
+        TarInputStream gzipInputStream = new TarInputStream(new GZIPInputStream(new ByteArrayInputStream(gzippedContent)));
+        gzipInputStream.getNextEntry();
         return Streams.readFully(gzipInputStream).utf8ToString();
     }