Skip to content

Commit

Permalink
Add statistics update API
Browse files Browse the repository at this point in the history
  • Loading branch information
findepi committed May 10, 2022
1 parent 0c21561 commit e35cf57
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 1 deletion.
8 changes: 8 additions & 0 deletions api/src/main/java/org/apache/iceberg/Transaction.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,14 @@ default AppendFiles newFastAppend() {
*/
DeleteFiles newDelete();

/**
* Create a new {@link UpdateTableStatistics update table statistics API} to add or remove statistics
* files in this table.
*
* @return a new {@link UpdateTableStatistics}
*/
UpdateTableStatistics newUpdateTableStatistics();

/**
* Create a new {@link ExpireSnapshots expire API} to manage snapshots in this table.
*
Expand Down
44 changes: 44 additions & 0 deletions api/src/main/java/org/apache/iceberg/UpdateTableStatistics.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg;

/**
* API for adding or removing statistics files from a table.
*/
public interface UpdateTableStatistics extends PendingUpdate<Snapshot> {
/**
* Delete a statistics file path from the underlying table.
* <p>
* To remove a statistics file from the table, this path must equal a path in the table's metadata. Paths
* that are different but equivalent will not be removed. For example, file:/path/file.avro is
* equivalent to file:///path/file.avro, but would not remove the latter path from the table.
*
* @param path a fully-qualified file path to remove from the table
* @return this for method chaining
*/
UpdateTableStatistics deleteStatisticsFile(CharSequence path);

/**
* Add a statistics file to the underlying table.
*
* @return this for method chaining
*/
UpdateTableStatistics addStatisticsFile(StatisticsFile statisticsFile);
}
8 changes: 8 additions & 0 deletions core/src/main/java/org/apache/iceberg/BaseTransaction.java
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,14 @@ public DeleteFiles newDelete() {
return delete;
}

@Override
public UpdateTableStatistics newUpdateTableStatistics() {
checkLastOperationCommitted("UpdateTableStatistics");
BaseUpdateTableStatistics updateStatistics = new BaseUpdateTableStatistics(transactionOps);
updates.add(updateStatistics);
return updateStatistics;
}

@Override
public ExpireSnapshots expireSnapshots() {
checkLastOperationCommitted("ExpireSnapshots");
Expand Down
117 changes: 117 additions & 0 deletions core/src/main/java/org/apache/iceberg/BaseUpdateTableStatistics.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg;

import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;

/**
* {@link UpdateTableStatistics Update table statistics} implementation.
*/
class BaseUpdateTableStatistics extends SnapshotProducer<BaseUpdateTableStatistics> implements UpdateTableStatistics {

private final TableOperations ops;
private final SnapshotSummary.Builder summaryBuilder = SnapshotSummary.builder();
private final Set<String> removedStatisticsFiles = Sets.newHashSet();
private final Set<StatisticsFile> addedStatisticsFiles = Sets.newHashSet();

protected BaseUpdateTableStatistics(TableOperations ops) {
super(ops);
this.ops = ops;
}

@Override
protected BaseUpdateTableStatistics self() {
return this;
}

@Override
public UpdateTableStatistics deleteStatisticsFile(CharSequence path) {
Preconditions.checkNotNull(path, "path is null");
removedStatisticsFiles.add(path.toString());
return this;
}

@Override
public UpdateTableStatistics addStatisticsFile(StatisticsFile statisticsFile) {
Preconditions.checkNotNull(statisticsFile, "statisticsFile is null");
addedStatisticsFiles.add(statisticsFile);
return null;
}

@Override
public BaseUpdateTableStatistics set(String property, String value) {
summaryBuilder.set(property, value);
return this;
}

@Override
protected void cleanUncommitted(Set<ManifestFile> committed) {
// TODO should i delete files pointed to by addedStatisticsFiles?
}

@Override
protected String operation() {
return "update statistics";
}

@Override
public Snapshot apply() {
TableMetadata base = refresh();
Long parentSnapshotId = base.currentSnapshot() != null ?
base.currentSnapshot().snapshotId() : null;
long sequenceNumber = base.nextSequenceNumber();

validate(base);

List<StatisticsFile> statisticsFiles = Lists.newArrayList();
if (base.currentSnapshot() != null && base.currentSnapshot().statisticsFiles() != null) {
base.currentSnapshot().statisticsFiles().forEach(statisticsFiles::add);
}
statisticsFiles.removeIf(statisticsFile -> removedStatisticsFiles.contains(statisticsFile.location()));
statisticsFiles.addAll(addedStatisticsFiles);

return new BaseSnapshot(ops.io(),
sequenceNumber, snapshotId(), parentSnapshotId, System.currentTimeMillis(), operation(), summary(base),
base.currentSchemaId(), base.currentSnapshot().manifestListLocation(), statisticsFiles);
}

@Override
protected void validate(TableMetadata currentMetadata) {
ValidationException.check(currentMetadata.formatVersion() >= 2,
"Table statistics are not supported in format version %s", currentMetadata.formatVersion());
}

@Override
protected List<ManifestFile> apply(TableMetadata metadataToUpdate) {
// The method is not invoked, since apply() is overridden.
throw new UnsupportedOperationException();
}

@Override
protected Map<String, String> summary() {
return summaryBuilder.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ public DeleteFiles newDelete() {
return wrapped.newDelete();
}

@Override
public UpdateTableStatistics newUpdateTableStatistics() {
return wrapped.newUpdateTableStatistics();
}

@Override
public ExpireSnapshots expireSnapshots() {
return wrapped.expireSnapshots();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ sequenceNumber, snapshotId(), parentSnapshotId, System.currentTimeMillis(), oper
/**
* Returns the snapshot summary from the implementation and updates totals.
*/
private Map<String, String> summary(TableMetadata previous) {
protected Map<String, String> summary(TableMetadata previous) {
Map<String, String> summary = summary();

if (summary == null) {
Expand Down

0 comments on commit e35cf57

Please sign in to comment.