From f4391512551c68227df81ef78cb185bd3f8be2cb Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Fri, 5 Aug 2022 20:13:43 +0200 Subject: [PATCH] Add TableMetadata support for statistics information --- .../org/apache/iceberg/MetadataUpdate.java | 34 +++++++++++++++++++ .../org/apache/iceberg/TableMetadata.java | 26 ++++++++++++++ .../apache/iceberg/TableMetadataParser.java | 1 + .../org/apache/iceberg/TestTableMetadata.java | 11 ++++++ 4 files changed, 72 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java index 1bb467840be0..53e7c57c2f17 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java +++ b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java @@ -178,6 +178,40 @@ public void applyTo(TableMetadata.Builder metadataBuilder) { } } + class SetStatistics implements MetadataUpdate { + private final StatisticsFile statisticsFile; + + public SetStatistics(StatisticsFile statisticsFile) { + this.statisticsFile = statisticsFile; + } + + public StatisticsFile statisticsFile() { + return statisticsFile; + } + + @Override + public void applyTo(TableMetadata.Builder metadataBuilder) { + metadataBuilder.setStatistics(statisticsFile); + } + } + + class RemoveStatistics implements MetadataUpdate { + private final long snapshotId; + + public RemoveStatistics(long snapshotId) { + this.snapshotId = snapshotId; + } + + public long snapshotId() { + return snapshotId; + } + + @Override + public void applyTo(TableMetadata.Builder metadataBuilder) { + metadataBuilder.removeStatistics(snapshotId); + } + } + class AddSnapshot implements MetadataUpdate { private final Snapshot snapshot; diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 0266e83bd553..9ab3bfd4d4c6 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -243,6 +243,7 @@ public String toString() { private final List snapshotLog; private final List previousFiles; private final Map refs; + private final List statistics; private final List changes; @SuppressWarnings("checkstyle:CyclomaticComplexity") @@ -267,6 +268,7 @@ public String toString() { List snapshotLog, List previousFiles, Map refs, + List statistics, List changes) { Preconditions.checkArgument( specs != null && !specs.isEmpty(), "Partition specs cannot be null or empty"); @@ -314,6 +316,7 @@ public String toString() { this.specsById = indexSpecs(specs); this.sortOrdersById = indexSortOrders(sortOrders); this.refs = validateRefs(currentSnapshotId, refs, snapshotsById); + this.statistics = ImmutableList.copyOf(statistics); HistoryEntry last = null; for (HistoryEntry logEntry : snapshotLog) { @@ -489,6 +492,10 @@ public Map refs() { return refs; } + public List statistics() { + return statistics; + } + public List snapshotLog() { return snapshotLog; } @@ -817,6 +824,7 @@ public static class Builder { private long currentSnapshotId; private List snapshots; private final Map refs; + private final Map> statistics; // change tracking private final List changes; @@ -853,6 +861,7 @@ private Builder() { this.snapshotLog = Lists.newArrayList(); this.previousFiles = Lists.newArrayList(); this.refs = Maps.newHashMap(); + this.statistics = Maps.newHashMap(); this.snapshotsById = Maps.newHashMap(); this.schemasById = Maps.newHashMap(); this.specsById = Maps.newHashMap(); @@ -884,6 +893,8 @@ private Builder(TableMetadata base) { this.previousFileLocation = base.metadataFileLocation; this.previousFiles = base.previousFiles; this.refs = Maps.newHashMap(base.refs); + this.statistics = + base.statistics.stream().collect(Collectors.groupingBy(StatisticsFile::snapshotId)); this.snapshotsById = Maps.newHashMap(base.snapshotsById); this.schemasById = Maps.newHashMap(base.schemasById); @@ -1176,6 +1187,20 @@ public Builder removeBranch(String branch) { return this; } + public Builder setStatistics(StatisticsFile statisticsFile) { + Preconditions.checkNotNull(statisticsFile, "statisticsFile is null"); + this.statistics.put(statisticsFile.snapshotId(), ImmutableList.of(statisticsFile)); + changes.add(new MetadataUpdate.SetStatistics(statisticsFile)); + return this; + } + + public Builder removeStatistics(long snapshotId) { + Preconditions.checkNotNull(snapshotId, "snapshotId is null"); + this.statistics.remove(snapshotId); + changes.add(new MetadataUpdate.RemoveStatistics(snapshotId)); + return this; + } + public Builder removeSnapshots(List snapshotsToRemove) { Set idsToRemove = snapshotsToRemove.stream().map(Snapshot::snapshotId).collect(Collectors.toSet()); @@ -1313,6 +1338,7 @@ public TableMetadata build() { ImmutableList.copyOf(newSnapshotLog), ImmutableList.copyOf(metadataHistory), ImmutableMap.copyOf(refs), + statistics.values().stream().flatMap(List::stream).collect(Collectors.toList()), discardChanges ? ImmutableList.of() : ImmutableList.copyOf(changes)); } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index 2abfba95c0b6..5cbb3b66b87d 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -498,6 +498,7 @@ static TableMetadata fromJson(FileIO io, String metadataLocation, JsonNode node) entries.build(), metadataEntries.build(), refs, + ImmutableList.of(), /* TODO: support statistics */ ImmutableList.of() /* no changes from the file */); } diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index e3c69f82993a..67eb2a3a4846 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -152,6 +152,7 @@ public void testJsonConversion() throws Exception { snapshotLog, ImmutableList.of(), refs, + ImmutableList.of(), ImmutableList.of()); String asJson = TableMetadataParser.toJson(expected); @@ -272,6 +273,7 @@ public void testBackwardCompat() throws Exception { ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), + ImmutableList.of(), ImmutableList.of()); String asJson = toJsonWithoutSpecAndSchemaList(expected); @@ -415,6 +417,7 @@ public void testInvalidMainBranch() { snapshotLog, ImmutableList.of(), refs, + ImmutableList.of(), ImmutableList.of())); } @@ -464,6 +467,7 @@ public void testMainWithoutCurrent() { ImmutableList.of(), ImmutableList.of(), refs, + ImmutableList.of(), ImmutableList.of())); } @@ -502,6 +506,7 @@ public void testBranchSnapshotMissing() { ImmutableList.of(), ImmutableList.of(), refs, + ImmutableList.of(), ImmutableList.of())); } @@ -607,6 +612,7 @@ public void testJsonWithPreviousMetadataLog() throws Exception { reversedSnapshotLog, ImmutableList.copyOf(previousMetadataLog), ImmutableMap.of(), + ImmutableList.of(), ImmutableList.of()); String asJson = TableMetadataParser.toJson(base); @@ -686,6 +692,7 @@ public void testAddPreviousMetadataRemoveNone() { reversedSnapshotLog, ImmutableList.copyOf(previousMetadataLog), ImmutableMap.of(), + ImmutableList.of(), ImmutableList.of()); previousMetadataLog.add(latestPreviousMetadata); @@ -783,6 +790,7 @@ public void testAddPreviousMetadataRemoveOne() { reversedSnapshotLog, ImmutableList.copyOf(previousMetadataLog), ImmutableMap.of(), + ImmutableList.of(), ImmutableList.of()); previousMetadataLog.add(latestPreviousMetadata); @@ -886,6 +894,7 @@ public void testAddPreviousMetadataRemoveMultiple() { reversedSnapshotLog, ImmutableList.copyOf(previousMetadataLog), ImmutableMap.of(), + ImmutableList.of(), ImmutableList.of()); previousMetadataLog.add(latestPreviousMetadata); @@ -935,6 +944,7 @@ public void testV2UUIDValidation() { ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), + ImmutableList.of(), ImmutableList.of())); } @@ -967,6 +977,7 @@ public void testVersionValidation() { ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), + ImmutableList.of(), ImmutableList.of())); }