diff --git a/api/src/main/java/org/apache/iceberg/Transaction.java b/api/src/main/java/org/apache/iceberg/Transaction.java index 609f86d1a098..e616666de5f0 100644 --- a/api/src/main/java/org/apache/iceberg/Transaction.java +++ b/api/src/main/java/org/apache/iceberg/Transaction.java @@ -137,6 +137,14 @@ default AppendFiles newFastAppend() { */ DeleteFiles newDelete(); + /** + * Create a new {@link UpdateTableStatistics update table statistics API} to add or remove statistics + * files in this table. + * + * @return a new {@link UpdateTableStatistics} + */ + UpdateTableStatistics newUpdateTableStatistics(); + /** * Create a new {@link ExpireSnapshots expire API} to manage snapshots in this table. * diff --git a/api/src/main/java/org/apache/iceberg/UpdateTableStatistics.java b/api/src/main/java/org/apache/iceberg/UpdateTableStatistics.java new file mode 100644 index 000000000000..9599ae3ea7c1 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/UpdateTableStatistics.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +/** + * API for adding or removing statistics files from a table. + */ +public interface UpdateTableStatistics extends PendingUpdate { + /** + * Delete a statistics file path from the underlying table. + *

+ * To remove a statistics file from the table, this path must equal a path in the table's metadata. Paths + * that are different but equivalent will not be removed. For example, file:/path/file.avro is + * equivalent to file:///path/file.avro, but would not remove the latter path from the table. + * + * @param path a fully-qualified file path to remove from the table + * @return this for method chaining + */ + UpdateTableStatistics deleteStatisticsFile(CharSequence path); + + /** + * Add a statistics file to the underlying table. + * + * @return this for method chaining + */ + UpdateTableStatistics addStatisticsFile(StatisticsFile statisticsFile); +} diff --git a/core/src/main/java/org/apache/iceberg/BaseTransaction.java b/core/src/main/java/org/apache/iceberg/BaseTransaction.java index 70410b682434..3a5c5e1d424c 100644 --- a/core/src/main/java/org/apache/iceberg/BaseTransaction.java +++ b/core/src/main/java/org/apache/iceberg/BaseTransaction.java @@ -215,6 +215,14 @@ public DeleteFiles newDelete() { return delete; } + @Override + public UpdateTableStatistics newUpdateTableStatistics() { + checkLastOperationCommitted("UpdateTableStatistics"); + BaseUpdateTableStatistics updateStatistics = new BaseUpdateTableStatistics(transactionOps); + updates.add(updateStatistics); + return updateStatistics; + } + @Override public ExpireSnapshots expireSnapshots() { checkLastOperationCommitted("ExpireSnapshots"); diff --git a/core/src/main/java/org/apache/iceberg/BaseUpdateTableStatistics.java b/core/src/main/java/org/apache/iceberg/BaseUpdateTableStatistics.java new file mode 100644 index 000000000000..052cd2a76fd6 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/BaseUpdateTableStatistics.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; + +/** + * {@link UpdateTableStatistics Update table statistics} implementation. + */ +class BaseUpdateTableStatistics extends SnapshotProducer implements UpdateTableStatistics { + + private final TableOperations ops; + private final SnapshotSummary.Builder summaryBuilder = SnapshotSummary.builder(); + private final Set removedStatisticsFiles = Sets.newHashSet(); + private final Set addedStatisticsFiles = Sets.newHashSet(); + + protected BaseUpdateTableStatistics(TableOperations ops) { + super(ops); + this.ops = ops; + } + + @Override + protected BaseUpdateTableStatistics self() { + return this; + } + + @Override + public UpdateTableStatistics deleteStatisticsFile(CharSequence path) { + Preconditions.checkNotNull(path, "path is null"); + removedStatisticsFiles.add(path.toString()); + return this; + } + + @Override + public UpdateTableStatistics addStatisticsFile(StatisticsFile statisticsFile) { + Preconditions.checkNotNull(statisticsFile, "statisticsFile is null"); + addedStatisticsFiles.add(statisticsFile); + return null; + } + + @Override + public BaseUpdateTableStatistics set(String property, String value) { + summaryBuilder.set(property, value); + return this; + } + + @Override + protected void cleanUncommitted(Set committed) { + // TODO should i delete files pointed to by addedStatisticsFiles? + } + + @Override + protected String operation() { + return "update statistics"; + } + + @Override + public Snapshot apply() { + TableMetadata base = refresh(); + Long parentSnapshotId = base.currentSnapshot() != null ? + base.currentSnapshot().snapshotId() : null; + long sequenceNumber = base.nextSequenceNumber(); + + validate(base); + + List statisticsFiles = Lists.newArrayList(); + if (base.currentSnapshot() != null && base.currentSnapshot().statisticsFiles() != null) { + base.currentSnapshot().statisticsFiles().forEach(statisticsFiles::add); + } + statisticsFiles.removeIf(statisticsFile -> removedStatisticsFiles.contains(statisticsFile.location())); + statisticsFiles.addAll(addedStatisticsFiles); + + return new BaseSnapshot(ops.io(), + sequenceNumber, snapshotId(), parentSnapshotId, System.currentTimeMillis(), operation(), summary(base), + base.currentSchemaId(), base.currentSnapshot().manifestListLocation(), statisticsFiles); + } + + @Override + protected void validate(TableMetadata currentMetadata) { + ValidationException.check(currentMetadata.formatVersion() >= 2, + "Table statistics are not supported in format version %s", currentMetadata.formatVersion()); + } + + @Override + protected List apply(TableMetadata metadataToUpdate) { + // The method is not invoked, since apply() is overridden. + throw new UnsupportedOperationException(); + } + + @Override + protected Map summary() { + return summaryBuilder.build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/CommitCallbackTransaction.java b/core/src/main/java/org/apache/iceberg/CommitCallbackTransaction.java index bd15744a87ad..233c730b1c4d 100644 --- a/core/src/main/java/org/apache/iceberg/CommitCallbackTransaction.java +++ b/core/src/main/java/org/apache/iceberg/CommitCallbackTransaction.java @@ -102,6 +102,11 @@ public DeleteFiles newDelete() { return wrapped.newDelete(); } + @Override + public UpdateTableStatistics newUpdateTableStatistics() { + return wrapped.newUpdateTableStatistics(); + } + @Override public ExpireSnapshots expireSnapshots() { return wrapped.expireSnapshots(); diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index 6fcd1be745de..8983b05aa963 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -219,7 +219,7 @@ sequenceNumber, snapshotId(), parentSnapshotId, System.currentTimeMillis(), oper /** * Returns the snapshot summary from the implementation and updates totals. */ - private Map summary(TableMetadata previous) { + protected Map summary(TableMetadata previous) { Map summary = summary(); if (summary == null) {