-
Notifications
You must be signed in to change notification settings - Fork 2.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support setting table statistics #5794
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.iceberg; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
import org.apache.iceberg.relocated.com.google.common.collect.Maps; | ||
|
||
public class SetStatistics implements UpdateStatistics { | ||
private final TableOperations ops; | ||
private final Map<Long, Optional<StatisticsFile>> statisticsToSet = Maps.newHashMap(); | ||
|
||
public SetStatistics(TableOperations ops) { | ||
this.ops = ops; | ||
} | ||
|
||
@Override | ||
public UpdateStatistics setStatistics(long snapshotId, StatisticsFile statisticsFile) { | ||
Preconditions.checkArgument(snapshotId == statisticsFile.snapshotId()); | ||
statisticsToSet.put(snapshotId, Optional.of(statisticsFile)); | ||
return this; | ||
} | ||
|
||
@Override | ||
public UpdateStatistics removeStatistics(long snapshotId) { | ||
statisticsToSet.put(snapshotId, Optional.empty()); | ||
return this; | ||
} | ||
|
||
@Override | ||
public List<StatisticsFile> apply() { | ||
return internalApply(ops.current()).statisticsFiles(); | ||
} | ||
|
||
@Override | ||
public void commit() { | ||
TableMetadata base = ops.current(); | ||
TableMetadata newMetadata = internalApply(base); | ||
ops.commit(base, newMetadata); | ||
} | ||
|
||
private TableMetadata internalApply(TableMetadata base) { | ||
TableMetadata.Builder builder = TableMetadata.buildFrom(base); | ||
statisticsToSet.forEach( | ||
(snapshotId, statistics) -> { | ||
if (statistics.isPresent()) { | ||
builder.setStatistics(snapshotId, statistics.get()); | ||
} else { | ||
builder.removeStatistics(snapshotId); | ||
} | ||
}); | ||
return builder.build(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.iceberg; | ||
|
||
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; | ||
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
import org.junit.Assert; | ||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
import org.junit.runners.Parameterized; | ||
|
||
@RunWith(Parameterized.class) | ||
public class TestSetStatistics extends TableTestBase { | ||
@Parameterized.Parameters(name = "formatVersion = {0}") | ||
public static Object[] parameters() { | ||
return new Object[] {1, 2}; | ||
} | ||
|
||
public TestSetStatistics(int formatVersion) { | ||
super(formatVersion); | ||
} | ||
|
||
@Test | ||
public void testEmptyUpdateStatistics() { | ||
Assert.assertEquals("Table should be on version 0", 0, (int) version()); | ||
TableMetadata base = readMetadata(); | ||
|
||
table.updateStatistics().commit(); | ||
|
||
Assert.assertSame( | ||
"Base metadata should not change when commit is created", base, table.ops().current()); | ||
Assert.assertEquals("Table should be on version 1", 1, (int) version()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did the table version change? It looks correct as long as the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure why. I copied the version-related assertions from BTW if i add the following to @Test
public void testNoopUpdateBumpsVersion() {
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).withSpecId(5).identity("data").build();
SortOrder order =
SortOrder.builderFor(SCHEMA)
.withOrderId(10)
.asc("s.id", NULLS_LAST)
.desc(truncate("data", 10), NULLS_FIRST)
.build();
TestTables.TestTable table =
TestTables.create(tableDir, "test", SCHEMA, spec, order, formatVersion);
Assert.assertEquals("Table should be on version 0", 0, (int) TestTables.metadataVersion("test"));
table.replaceSortOrder().commit();
Assert.assertEquals("Table should be on version 1", 1, (int) TestTables.metadataVersion("test"));
} |
||
} | ||
|
||
@Test | ||
public void testEmptyTransactionalUpdateStatistics() { | ||
Assert.assertEquals("Table should be on version 0", 0, (int) version()); | ||
TableMetadata base = readMetadata(); | ||
|
||
Transaction transaction = table.newTransaction(); | ||
transaction.updateStatistics().commit(); | ||
transaction.commitTransaction(); | ||
|
||
Assert.assertSame( | ||
"Base metadata should not change when commit is created", base, table.ops().current()); | ||
Assert.assertEquals("Table should be on version 0", 0, (int) version()); | ||
} | ||
|
||
@Test | ||
public void testUpdateStatistics() { | ||
// Create a snapshot | ||
table.newFastAppend().commit(); | ||
Assert.assertEquals("Table should be on version 1", 1, (int) version()); | ||
|
||
TableMetadata base = readMetadata(); | ||
long snapshotId = base.currentSnapshot().snapshotId(); | ||
GenericStatisticsFile statisticsFile = | ||
new GenericStatisticsFile( | ||
snapshotId, | ||
"/some/statistics/file.puffin", | ||
100, | ||
42, | ||
ImmutableList.of( | ||
new GenericBlobMetadata( | ||
"stats-type", | ||
snapshotId, | ||
base.lastSequenceNumber(), | ||
ImmutableList.of(1, 2), | ||
ImmutableMap.of("a-property", "some-property-value")))); | ||
|
||
table.updateStatistics().setStatistics(snapshotId, statisticsFile).commit(); | ||
|
||
TableMetadata metadata = readMetadata(); | ||
Assert.assertEquals("Table should be on version 2", 2, (int) version()); | ||
Assert.assertEquals( | ||
"Table snapshot should be the same after setting statistics file", | ||
snapshotId, | ||
metadata.currentSnapshot().snapshotId()); | ||
Assert.assertEquals( | ||
"Table metadata should have statistics files", | ||
ImmutableList.of(statisticsFile), | ||
metadata.statisticsFiles()); | ||
} | ||
|
||
@Test | ||
public void testRemoveStatistics() { | ||
// Create a snapshot | ||
table.newFastAppend().commit(); | ||
Assert.assertEquals("Table should be on version 1", 1, (int) version()); | ||
|
||
TableMetadata base = readMetadata(); | ||
long snapshotId = base.currentSnapshot().snapshotId(); | ||
GenericStatisticsFile statisticsFile = | ||
new GenericStatisticsFile( | ||
snapshotId, "/some/statistics/file.puffin", 100, 42, ImmutableList.of()); | ||
|
||
table.updateStatistics().setStatistics(snapshotId, statisticsFile).commit(); | ||
|
||
TableMetadata metadata = readMetadata(); | ||
Assert.assertEquals("Table should be on version 2", 2, (int) version()); | ||
Assert.assertEquals( | ||
"Table metadata should have statistics files", | ||
ImmutableList.of(statisticsFile), | ||
metadata.statisticsFiles()); | ||
|
||
table.updateStatistics().removeStatistics(snapshotId).commit(); | ||
|
||
metadata = readMetadata(); | ||
Assert.assertEquals("Table should be on version 3", 3, (int) version()); | ||
Assert.assertEquals( | ||
"Table metadata should have no statistics files", | ||
ImmutableList.of(), | ||
metadata.statisticsFiles()); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The intent for this method is to give a preview of what will be committed for validation, like a "dry run" option. We use it for that purpose in tests. That's why
apply
is typically called fromcommit
and the result is used to update the table metadata.I think it's a good idea to have the output of
apply
be the final list of statistics files. But that leaves a strange case where you wouldn't want to callapply
fromcommit
because theTableMetadata.Builder
methods are responsible for applying changes. The logic inapply
should be the same as the logic incommit
, though.I'd solve by adding a common method,
internalApply
that returns theTableMetadata
and is called here and bycommit
to ensure consistency.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will do!