Skip to content

Commit

Permalink
Fix Iceberg $history table when using REST Catalog
Browse files Browse the repository at this point in the history
In the current version of Iceberg library, when not all the
snapshots are initially loaded (for the REST session catalog),
the `snapshotsLog` does not get reinitialized, causing to
expose only the current snapshots in the `history()` method
of the Iceberg table.
  • Loading branch information
findinpath committed May 14, 2023
1 parent a30ee90 commit ea3b9e0
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import io.trino.spi.connector.SystemTable;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.TimeZoneKey;
import org.apache.iceberg.HistoryEntry;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
import org.apache.iceberg.util.SnapshotUtil;
Expand Down Expand Up @@ -77,14 +76,13 @@ public RecordCursor cursor(ConnectorTransactionHandle transactionHandle, Connect

Set<Long> ancestorIds = ImmutableSet.copyOf(SnapshotUtil.currentAncestorIds(icebergTable));
TimeZoneKey timeZoneKey = session.getTimeZoneKey();
for (HistoryEntry historyEntry : icebergTable.history()) {
long snapshotId = historyEntry.snapshotId();
Snapshot snapshot = icebergTable.snapshot(snapshotId);
for (Snapshot snapshot : icebergTable.snapshots()) {
long snapshotId = snapshot.snapshotId();

table.addRow(
packDateTimeWithZone(historyEntry.timestampMillis(), timeZoneKey),
packDateTimeWithZone(snapshot.timestampMillis(), timeZoneKey),
snapshotId,
snapshot != null ? snapshot.parentId() : null,
snapshot.parentId(),
ancestorIds.contains(snapshotId));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,33 @@ public void testDropTableWithNonExistentTableLocation()
assertFalse(getQueryRunner().tableExists(getSession(), tableName));
}

// Verify the accuracy of Trino metadata tables while retrieving Iceberg table metadata from the underlying `TrinoCatalog` implementation
@Test
public void testMetadataTables()
{
try (TestTable table = new TestTable(
getQueryRunner()::execute,
"test_metadata_tables",
"(id int, part varchar) WITH (partitioning = ARRAY['part'])",
ImmutableList.of("1, 'p1'", "2, 'p1'", "3, 'p2'"))) {
List<Long> snapshotIds = computeActual("SELECT snapshot_id FROM \"" + table.getName() + "$snapshots\" ORDER BY committed_at DESC")
.getOnlyColumn()
.map(Long.class::cast)
.collect(toImmutableList());
List<Long> historySnapshotIds = computeActual("SELECT snapshot_id FROM \"" + table.getName() + "$history\" ORDER BY made_current_at DESC")
.getOnlyColumn()
.map(Long.class::cast)
.collect(toImmutableList());
long filesCount = (long) computeScalar("SELECT count(*) FROM \"" + table.getName() + "$files\"");
long partitionsCount = (long) computeScalar("SELECT count(*) FROM \"" + table.getName() + "$partitions\"");

assertThat(snapshotIds).hasSize(4);
assertThat(snapshotIds).hasSameElementsAs(historySnapshotIds);
assertThat(filesCount).isEqualTo(3L);
assertThat(partitionsCount).isEqualTo(2L);
}
}

protected abstract boolean isFileSorted(Location path, String sortColumnName);

private String getTableLocation(String tableName)
Expand Down

0 comments on commit ea3b9e0

Please sign in to comment.