From f1773d4eb68d4f4e77fe14b762518b0120f9bf64 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Thu, 29 Feb 2024 15:38:42 +0900 Subject: [PATCH] Add support for v2Checkpoint in Delta Lake --- docs/src/main/sphinx/connector/delta-lake.md | 2 + .../CheckpointMetadataEntry.java | 33 ++++ .../DeltaLakeSchemaSupport.java | 2 + .../DeltaLakeTransactionLogEntry.java | 58 +++++-- .../transactionlog/SidecarEntry.java | 34 ++++ .../transactionlog/TableSnapshot.java | 152 +++++++++++++++--- .../transactionlog/V2Checkpoint.java | 24 +++ .../checkpoint/CheckpointEntryIterator.java | 35 +++- .../checkpoint/CheckpointSchemaManager.java | 16 +- .../checkpoint/CheckpointWriterManager.java | 2 +- .../checkpoint/LastCheckpoint.java | 5 +- ...stDeltaLakeAlluxioCacheFileOperations.java | 66 ++++++++ .../plugin/deltalake/TestDeltaLakeBasic.java | 47 ++++++ .../TestDeltaLakeFileOperations.java | 48 +++++- .../v2_checkpoint_json/README.md | 11 ++ .../_delta_log/00000000000000000000.json | 3 + ....ae4ea00c-afe0-4e8b-ad87-28125262bd44.json | 4 + .../_delta_log/00000000000000000001.json | 2 + .../_delta_log/_last_checkpoint | 1 + ...e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet | Bin 0 -> 12007 bytes ...4fde-860e-a0d90082de75-c000.snappy.parquet | Bin 0 -> 788 bytes .../v2_checkpoint_parquet/README.md | 13 ++ .../_delta_log/00000000000000000000.json | 3 + ...785ba8-b035-4760-b814-1f115ccb6167.parquet | Bin 0 -> 23272 bytes .../_delta_log/00000000000000000001.json | 2 + .../_delta_log/_last_checkpoint | 1 + ...992ab9-b145-4179-b0ee-2ecef614db23.parquet | Bin 0 -> 12008 bytes ...4089-8212-a563edf37f5b-c000.snappy.parquet | Bin 0 -> 788 bytes .../deltalake/v2_checkpoint_json/README.md | 11 ++ .../_delta_log/00000000000000000000.json | 3 + ....73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json | 4 + .../_delta_log/00000000000000000001.json | 2 + .../_delta_log/_last_checkpoint | 1 + ...cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet | Bin 0 -> 9176 bytes ...4ce6-b554-3e1d596857ba-c000.snappy.parquet | Bin 0 -> 666 bytes .../deltalake/v2_checkpoint_parquet/README.md | 13 ++ .../_delta_log/00000000000000000000.json | 3 + ...6b3304-76b2-49c3-a9a1-626f07df27c9.parquet | Bin 0 -> 19019 bytes .../_delta_log/00000000000000000001.json | 2 + .../_delta_log/_last_checkpoint | 1 + ...288d7e-af16-44ed-829c-196064a71812.parquet | Bin 0 -> 9415 bytes ...4e07-8769-345baa22e4b3-c000.snappy.parquet | Bin 0 -> 666 bytes ...TestDeltaLakeCheckpointsCompatibility.java | 42 +++++ 43 files changed, 606 insertions(+), 40 deletions(-) create mode 100644 plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java create mode 100644 plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/SidecarEntry.java create mode 100644 plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/V2Checkpoint.java create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/README.md create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000000.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_last_checkpoint create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/part-00000-946b010f-db20-4fde-860e-a0d90082de75-c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/README.md create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000000.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.b6785ba8-b035-4760-b814-1f115ccb6167.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/_last_checkpoint create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.25992ab9-b145-4179-b0ee-2ecef614db23.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/part-00000-15e6800a-1995-4089-8212-a563edf37f5b-c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/README.md create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000000.json create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.json create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_last_checkpoint create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/part-00000-fad997ee-643c-4ce6-b554-3e1d596857ba-c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/README.md create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000000.json create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000001.json create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/_last_checkpoint create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/part-00000-77e34410-74c8-4e07-8769-345baa22e4b3-c000.snappy.parquet diff --git a/docs/src/main/sphinx/connector/delta-lake.md b/docs/src/main/sphinx/connector/delta-lake.md index 6a78ab104e13..ca80b767a332 100644 --- a/docs/src/main/sphinx/connector/delta-lake.md +++ b/docs/src/main/sphinx/connector/delta-lake.md @@ -351,6 +351,8 @@ features](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#table-featur - Readers only * - Timestamp without time zone - Readers and writers +* - V2 checkpoint + - Readers only ::: No other features are supported. diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java new file mode 100644 index 000000000000..335a2c55b086 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.transactionlog; + +import com.google.common.collect.ImmutableMap; + +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#checkpoint-metadata +public record CheckpointMetadataEntry(long version, Optional> tags) +{ + public CheckpointMetadataEntry + { + checkArgument(version > 0, "version is not positive: %s", version); + requireNonNull(tags, "tags is null"); + tags = tags.map(ImmutableMap::copyOf); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java index bbbd958cf122..8fa4193c5563 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java @@ -104,11 +104,13 @@ private DeltaLakeSchemaSupport() {} private static final String IDENTITY_COLUMNS_FEATURE_NAME = "identityColumns"; private static final String INVARIANTS_FEATURE_NAME = "invariants"; public static final String TIMESTAMP_NTZ_FEATURE_NAME = "timestampNtz"; + public static final String V2_CHECKPOINT_FEATURE_NAME = "v2Checkpoint"; private static final Set SUPPORTED_READER_FEATURES = ImmutableSet.builder() .add(COLUMN_MAPPING_FEATURE_NAME) .add(TIMESTAMP_NTZ_FEATURE_NAME) .add(DELETION_VECTORS_FEATURE_NAME) + .add(V2_CHECKPOINT_FEATURE_NAME) .build(); private static final Set SUPPORTED_WRITER_FEATURES = ImmutableSet.builder() .add(APPEND_ONLY_FEATURE_NAME) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTransactionLogEntry.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTransactionLogEntry.java index cb86f1c99f4e..b6f9f36017ba 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTransactionLogEntry.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTransactionLogEntry.java @@ -30,6 +30,8 @@ public class DeltaLakeTransactionLogEntry private final ProtocolEntry protocol; private final CommitInfoEntry commitInfo; private final CdcEntry cdcEntry; + private final SidecarEntry sidecar; + private final CheckpointMetadataEntry checkpointMetadata; private DeltaLakeTransactionLogEntry( TransactionEntry txn, @@ -38,7 +40,9 @@ private DeltaLakeTransactionLogEntry( MetadataEntry metaData, ProtocolEntry protocol, CommitInfoEntry commitInfo, - CdcEntry cdcEntry) + CdcEntry cdcEntry, + SidecarEntry sidecar, + CheckpointMetadataEntry checkpointMetadata) { this.txn = txn; this.add = add; @@ -47,6 +51,8 @@ private DeltaLakeTransactionLogEntry( this.protocol = protocol; this.commitInfo = commitInfo; this.cdcEntry = cdcEntry; + this.sidecar = sidecar; + this.checkpointMetadata = checkpointMetadata; } @JsonCreator @@ -57,51 +63,59 @@ public static DeltaLakeTransactionLogEntry fromJson( @JsonProperty("metaData") MetadataEntry metaData, @JsonProperty("protocol") ProtocolEntry protocol, @JsonProperty("commitInfo") CommitInfoEntry commitInfo, - @JsonProperty("cdc") CdcEntry cdcEntry) + @JsonProperty("cdc") CdcEntry cdcEntry, + @JsonProperty("sidecar") SidecarEntry sidecarEntry, + @JsonProperty("checkpointMetadata") CheckpointMetadataEntry checkpointMetadata) { - return new DeltaLakeTransactionLogEntry(txn, add, remove, metaData, protocol, commitInfo, cdcEntry); + return new DeltaLakeTransactionLogEntry(txn, add, remove, metaData, protocol, commitInfo, cdcEntry, sidecarEntry, checkpointMetadata); } public static DeltaLakeTransactionLogEntry transactionEntry(TransactionEntry transaction) { requireNonNull(transaction, "transaction is null"); - return new DeltaLakeTransactionLogEntry(transaction, null, null, null, null, null, null); + return new DeltaLakeTransactionLogEntry(transaction, null, null, null, null, null, null, null, null); } public static DeltaLakeTransactionLogEntry commitInfoEntry(CommitInfoEntry commitInfo) { requireNonNull(commitInfo, "commitInfo is null"); - return new DeltaLakeTransactionLogEntry(null, null, null, null, null, commitInfo, null); + return new DeltaLakeTransactionLogEntry(null, null, null, null, null, commitInfo, null, null, null); } public static DeltaLakeTransactionLogEntry protocolEntry(ProtocolEntry protocolEntry) { requireNonNull(protocolEntry, "protocolEntry is null"); - return new DeltaLakeTransactionLogEntry(null, null, null, null, protocolEntry, null, null); + return new DeltaLakeTransactionLogEntry(null, null, null, null, protocolEntry, null, null, null, null); } public static DeltaLakeTransactionLogEntry metadataEntry(MetadataEntry metadataEntry) { requireNonNull(metadataEntry, "metadataEntry is null"); - return new DeltaLakeTransactionLogEntry(null, null, null, metadataEntry, null, null, null); + return new DeltaLakeTransactionLogEntry(null, null, null, metadataEntry, null, null, null, null, null); } public static DeltaLakeTransactionLogEntry addFileEntry(AddFileEntry addFileEntry) { requireNonNull(addFileEntry, "addFileEntry is null"); - return new DeltaLakeTransactionLogEntry(null, addFileEntry, null, null, null, null, null); + return new DeltaLakeTransactionLogEntry(null, addFileEntry, null, null, null, null, null, null, null); } public static DeltaLakeTransactionLogEntry removeFileEntry(RemoveFileEntry removeFileEntry) { requireNonNull(removeFileEntry, "removeFileEntry is null"); - return new DeltaLakeTransactionLogEntry(null, null, removeFileEntry, null, null, null, null); + return new DeltaLakeTransactionLogEntry(null, null, removeFileEntry, null, null, null, null, null, null); } public static DeltaLakeTransactionLogEntry cdcEntry(CdcEntry cdcEntry) { requireNonNull(cdcEntry, "cdcEntry is null"); - return new DeltaLakeTransactionLogEntry(null, null, null, null, null, null, cdcEntry); + return new DeltaLakeTransactionLogEntry(null, null, null, null, null, null, cdcEntry, null, null); + } + + public static DeltaLakeTransactionLogEntry sidecarEntry(SidecarEntry sidecarEntry) + { + requireNonNull(sidecarEntry, "sidecarEntry is null"); + return new DeltaLakeTransactionLogEntry(null, null, null, null, null, null, null, sidecarEntry, null); } @Nullable @@ -153,9 +167,23 @@ public CdcEntry getCDC() return cdcEntry; } + @Nullable + @JsonProperty + public SidecarEntry getSidecar() + { + return sidecar; + } + + @Nullable + @JsonProperty + public CheckpointMetadataEntry getCheckpointMetadata() + { + return checkpointMetadata; + } + public DeltaLakeTransactionLogEntry withCommitInfo(CommitInfoEntry commitInfo) { - return new DeltaLakeTransactionLogEntry(txn, add, remove, metaData, protocol, commitInfo, cdcEntry); + return new DeltaLakeTransactionLogEntry(txn, add, remove, metaData, protocol, commitInfo, cdcEntry, sidecar, checkpointMetadata); } @Override @@ -174,18 +202,20 @@ public boolean equals(Object o) Objects.equals(metaData, that.metaData) && Objects.equals(protocol, that.protocol) && Objects.equals(commitInfo, that.commitInfo) && - Objects.equals(cdcEntry, that.cdcEntry); + Objects.equals(cdcEntry, that.cdcEntry) && + Objects.equals(sidecar, that.sidecar) && + Objects.equals(checkpointMetadata, that.checkpointMetadata); } @Override public int hashCode() { - return Objects.hash(txn, add, remove, metaData, protocol, commitInfo, cdcEntry); + return Objects.hash(txn, add, remove, metaData, protocol, commitInfo, cdcEntry, sidecar, checkpointMetadata); } @Override public String toString() { - return String.format("DeltaLakeTransactionLogEntry{%s, %s, %s, %s, %s, %s, %s}", txn, add, remove, metaData, protocol, commitInfo, cdcEntry); + return String.format("DeltaLakeTransactionLogEntry{%s, %s, %s, %s, %s, %s, %s, %s, %s}", txn, add, remove, metaData, protocol, commitInfo, cdcEntry, sidecar, checkpointMetadata); } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/SidecarEntry.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/SidecarEntry.java new file mode 100644 index 000000000000..3e2a43519df9 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/SidecarEntry.java @@ -0,0 +1,34 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.transactionlog; + +import com.google.common.collect.ImmutableMap; + +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#sidecar-file-information +public record SidecarEntry(String path, long sizeInBytes, long modificationTime, Optional> tags) +{ + public SidecarEntry + { + checkArgument(sizeInBytes > 0, "sizeInBytes is not positive: %s", sizeInBytes); + requireNonNull(path, "path is null"); + requireNonNull(tags, "tags is null"); + tags = tags.map(ImmutableMap::copyOf); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java index 548ed5d7e652..84a3b6c6b010 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java @@ -14,7 +14,7 @@ package io.trino.plugin.deltalake.transactionlog; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Streams; +import com.google.common.collect.ImmutableSet; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; import io.trino.filesystem.TrinoInputFile; @@ -40,11 +40,15 @@ import java.util.stream.Stream; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Verify.verify; +import static com.google.common.collect.Streams.stream; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_FILESYSTEM_ERROR; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.readLastCheckpoint; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.ADD; +import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.SIDECAR; +import static io.trino.plugin.deltalake.transactionlog.checkpoint.TransactionLogTail.getEntriesFromJson; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -199,22 +203,19 @@ public Stream getCheckpointTransactionLogEntries( return getCheckpointPartPaths(checkpoint).stream() .map(fileSystem::newInputFile) - .flatMap(checkpointFile -> { - CheckpointEntryIterator checkpointEntryIterator = getCheckpointTransactionLogEntries( - session, - entryTypes, - metadataAndProtocol.map(MetadataAndProtocolEntry::metadataEntry), - metadataAndProtocol.map(MetadataAndProtocolEntry::protocolEntry), - checkpointSchemaManager, - typeManager, - stats, - checkpoint, - checkpointFile, - partitionConstraint, - addStatsMinMaxColumnFilter); - return Streams.stream(checkpointEntryIterator) - .onClose(checkpointEntryIterator::close); - }); + .flatMap(checkpointFile -> getCheckpointTransactionLogEntries( + session, + fileSystem, + entryTypes, + metadataAndProtocol.map(MetadataAndProtocolEntry::metadataEntry), + metadataAndProtocol.map(MetadataAndProtocolEntry::protocolEntry), + checkpointSchemaManager, + typeManager, + stats, + checkpoint, + checkpointFile, + partitionConstraint, + addStatsMinMaxColumnFilter)); } public Optional getLastCheckpointVersion() @@ -222,8 +223,9 @@ public Optional getLastCheckpointVersion() return lastCheckpoint.map(LastCheckpoint::version); } - private CheckpointEntryIterator getCheckpointTransactionLogEntries( + private Stream getCheckpointTransactionLogEntries( ConnectorSession session, + TrinoFileSystem fileSystem, Set entryTypes, Optional metadataEntry, Optional protocolEntry, @@ -245,7 +247,23 @@ private CheckpointEntryIterator getCheckpointTransactionLogEntries( catch (IOException e) { throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, format("Unexpected IO exception occurred while retrieving the length of the file: %s for the table %s", checkpoint, table), e); } - return new CheckpointEntryIterator( + if (checkpoint.v2Checkpoint().isPresent()) { + return getV2CheckpointTransactionLogEntriesFrom( + session, + entryTypes, + metadataEntry, + protocolEntry, + checkpointSchemaManager, + typeManager, + stats, + checkpoint, + checkpointFile, + partitionConstraint, + addStatsMinMaxColumnFilter, + fileSystem, + fileSize); + } + CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator( checkpointFile, session, fileSize, @@ -260,6 +278,96 @@ private CheckpointEntryIterator getCheckpointTransactionLogEntries( domainCompactionThreshold, partitionConstraint, addStatsMinMaxColumnFilter); + return stream(checkpointEntryIterator).onClose(checkpointEntryIterator::close); + } + + private Stream getV2CheckpointTransactionLogEntriesFrom( + ConnectorSession session, + Set entryTypes, + Optional metadataEntry, + Optional protocolEntry, + CheckpointSchemaManager checkpointSchemaManager, + TypeManager typeManager, + FileFormatDataSourceStats stats, + LastCheckpoint checkpoint, + TrinoInputFile checkpointFile, + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter, + TrinoFileSystem fileSystem, + long fileSize) + { + return getV2CheckpointEntries(session, entryTypes, metadataEntry, protocolEntry, checkpointSchemaManager, typeManager, stats, checkpoint, checkpointFile, partitionConstraint, addStatsMinMaxColumnFilter, fileSystem, fileSize) + .mapMulti((entry, builder) -> { + if (entry.getSidecar() == null) { + builder.accept(entry); + return; + } + Location sidecar = checkpointFile.location().sibling("_sidecars").appendPath(entry.getSidecar().path()); + CheckpointEntryIterator iterator = new CheckpointEntryIterator( + fileSystem.newInputFile(sidecar), + session, + fileSize, + checkpointSchemaManager, + typeManager, + entryTypes, + metadataEntry, + protocolEntry, + stats, + parquetReaderOptions, + checkpointRowStatisticsWritingEnabled, + domainCompactionThreshold, + partitionConstraint, + addStatsMinMaxColumnFilter); + stream(iterator).onClose(iterator::close).forEach(builder); + }); + } + + private Stream getV2CheckpointEntries( + ConnectorSession session, + Set entryTypes, + Optional metadataEntry, + Optional protocolEntry, + CheckpointSchemaManager checkpointSchemaManager, + TypeManager typeManager, + FileFormatDataSourceStats stats, + LastCheckpoint checkpoint, + TrinoInputFile checkpointFile, + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter, + TrinoFileSystem fileSystem, + long fileSize) + { + if (checkpointFile.location().fileName().endsWith(".json")) { + try { + return getEntriesFromJson(checkpoint.version(), checkpointFile).stream().flatMap(List::stream); + } + catch (IOException e) { + throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, format("Unexpected IO exception occurred while reading the entries of the file: %s for the table %s", checkpoint, table), e); + } + } + if (checkpointFile.location().fileName().endsWith(".parquet")) { + CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator( + fileSystem.newInputFile(checkpointFile.location()), + session, + fileSize, + checkpointSchemaManager, + typeManager, + ImmutableSet.builder() + .addAll(entryTypes) + .add(SIDECAR) + .build(), + metadataEntry, + protocolEntry, + stats, + parquetReaderOptions, + checkpointRowStatisticsWritingEnabled, + domainCompactionThreshold, + partitionConstraint, + addStatsMinMaxColumnFilter); + return stream(checkpointEntryIterator) + .onClose(checkpointEntryIterator::close); + } + throw new IllegalArgumentException("Unsupported v2 checkpoint file format: " + checkpointFile.location()); } public record MetadataAndProtocolEntry(MetadataEntry metadataEntry, ProtocolEntry protocolEntry) @@ -275,7 +383,11 @@ private List getCheckpointPartPaths(LastCheckpoint checkpoint) { Location transactionLogDir = Location.of(getTransactionLogDir(tableLocation)); ImmutableList.Builder paths = ImmutableList.builder(); - if (checkpoint.parts().isEmpty()) { + if (checkpoint.v2Checkpoint().isPresent()) { + verify(checkpoint.parts().isEmpty(), "v2 checkpoint should not have multi-part checkpoints"); + paths.add(transactionLogDir.appendPath(checkpoint.v2Checkpoint().get().path())); + } + else if (checkpoint.parts().isEmpty()) { paths.add(transactionLogDir.appendPath("%020d.checkpoint.parquet".formatted(checkpoint.version()))); } else { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/V2Checkpoint.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/V2Checkpoint.java new file mode 100644 index 000000000000..70b52c05fded --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/V2Checkpoint.java @@ -0,0 +1,24 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.transactionlog; + +import static java.util.Objects.requireNonNull; + +public record V2Checkpoint(String path) +{ + public V2Checkpoint + { + requireNonNull(path, "path is null"); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java index a1c9a8c452bf..a6481b4a9496 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java @@ -33,6 +33,7 @@ import io.trino.plugin.deltalake.transactionlog.MetadataEntry; import io.trino.plugin.deltalake.transactionlog.ProtocolEntry; import io.trino.plugin.deltalake.transactionlog.RemoveFileEntry; +import io.trino.plugin.deltalake.transactionlog.SidecarEntry; import io.trino.plugin.deltalake.transactionlog.TransactionEntry; import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeParquetFileStatistics; import io.trino.plugin.hive.FileFormatDataSourceStats; @@ -92,6 +93,7 @@ import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.METADATA; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.PROTOCOL; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.REMOVE; +import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.SIDECAR; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.TRANSACTION; import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static io.trino.spi.type.BigintType.BIGINT; @@ -117,7 +119,9 @@ public enum EntryType REMOVE("remove"), METADATA("metadata"), PROTOCOL("protocol"), - COMMIT("commitinfo"); + COMMIT("commitinfo"), + SIDECAR("sidecar"), + /**/; private final String columnName; @@ -152,6 +156,7 @@ public String getColumnName() private final Optional metadataType; private final Optional protocolType; private final Optional commitType; + private final Optional sidecarType; private MetadataEntry metadataEntry; private ProtocolEntry protocolEntry; @@ -244,6 +249,7 @@ public CheckpointEntryIterator( metadataType = getParquetType(fields, METADATA); protocolType = getParquetType(fields, PROTOCOL); commitType = getParquetType(fields, COMMIT); + sidecarType = getParquetType(fields, SIDECAR); } catch (Exception e) { try { @@ -303,6 +309,7 @@ private CheckpointFieldExtractor createCheckpointFieldExtractor(EntryType entryT case METADATA -> (session, pagePosition, blocks) -> buildMetadataEntry(session, pagePosition, blocks[0]); case PROTOCOL -> (session, pagePosition, blocks) -> buildProtocolEntry(session, pagePosition, blocks[0]); case COMMIT -> (session, pagePosition, blocks) -> buildCommitInfoEntry(session, pagePosition, blocks[0]); + case SIDECAR -> (session, pagePosition, blocks) -> buildSidecarEntry(session, pagePosition, blocks[0]); }; } @@ -320,6 +327,7 @@ private DeltaLakeColumnHandle buildColumnHandle( case METADATA -> schemaManager.getMetadataEntryType(); case PROTOCOL -> schemaManager.getProtocolEntryType(true, true); case COMMIT -> schemaManager.getCommitInfoEntryType(); + case SIDECAR -> schemaManager.getSidecarEntryType(); }; return new DeltaLakeColumnHandle(entryType.getColumnName(), type, OptionalInt.empty(), entryType.getColumnName(), type, REGULAR, Optional.empty()); } @@ -340,7 +348,7 @@ private TupleDomain buildTupleDomainColumnHandle(EntryType ent field = "version"; type = BIGINT; } - case ADD, REMOVE -> { + case ADD, REMOVE, SIDECAR -> { field = "path"; type = VARCHAR; } @@ -538,6 +546,29 @@ private DeltaLakeTransactionLogEntry buildRemoveEntry(ConnectorSession session, return DeltaLakeTransactionLogEntry.removeFileEntry(result); } + private DeltaLakeTransactionLogEntry buildSidecarEntry(ConnectorSession session, int pagePosition, Block block) + { + log.debug("Building sidecar entry from %s pagePosition %d", block, pagePosition); + if (block.isNull(pagePosition)) { + return null; + } + int sidecarFields = 4; + SqlRow sidecarEntryRow = getRow(block, pagePosition); + if (sidecarEntryRow.getFieldCount() != sidecarFields) { + throw new TrinoException( + DELTA_LAKE_INVALID_SCHEMA, + format("Expected block %s to have %d children, but found %s", block, sidecarFields, sidecarEntryRow.getFieldCount())); + } + RowType type = sidecarType.orElseThrow(); + CheckpointFieldReader sidecar = new CheckpointFieldReader(session, sidecarEntryRow, type); + SidecarEntry result = new SidecarEntry( + sidecar.getString("path"), + sidecar.getLong("sizeInBytes"), + sidecar.getLong("modificationTime"), + Optional.ofNullable(sidecar.getMap(stringMap, "tags"))); + return DeltaLakeTransactionLogEntry.sidecarEntry(result); + } + private class AddFileEntryExtractor implements CheckpointFieldExtractor { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java index bdeaa8727dff..0422ef4df9c8 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java @@ -64,6 +64,7 @@ public class CheckpointSchemaManager private final RowType metadataEntryType; private final RowType commitInfoEntryType; private final RowType removeEntryType; + private final RowType sidecarEntryType; private final ArrayType stringList; @Inject @@ -111,6 +112,13 @@ public CheckpointSchemaManager(TypeManager typeManager) RowType.field("partitionValues", stringMap), RowType.field("deletionTimestamp", BIGINT), RowType.field("dataChange", BOOLEAN))); + + sidecarEntryType = RowType.from(ImmutableList.builder() + .add(RowType.field("path", VARCHAR)) + .add(RowType.field("sizeInBytes", BIGINT)) + .add(RowType.field("modificationTime", BIGINT)) + .add(RowType.field("tags", stringMap)) + .build()); } public RowType getMetadataEntryType() @@ -236,4 +244,10 @@ public RowType getProtocolEntryType(boolean requireReaderFeatures, boolean requi public RowType getCommitInfoEntryType() { return commitInfoEntryType; - }} + } + + public RowType getSidecarEntryType() + { + return sidecarEntryType; + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java index 19bf8cf622af..663349d12578 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java @@ -158,7 +158,7 @@ public void writeCheckpoint(ConnectorSession session, TableSnapshot snapshot) checkpointWriter.write(checkpointEntries, checkpointFile); // update last checkpoint file - LastCheckpoint newLastCheckpoint = new LastCheckpoint(newCheckpointVersion, checkpointEntries.size(), Optional.empty()); + LastCheckpoint newLastCheckpoint = new LastCheckpoint(newCheckpointVersion, checkpointEntries.size(), Optional.empty(), Optional.empty()); Location checkpointPath = transactionLogDir.appendPath(LAST_CHECKPOINT_FILENAME); TrinoOutputFile outputFile = fileSystem.newOutputFile(checkpointPath); outputFile.createOrOverwrite(lastCheckpointCodec.toJsonBytes(newLastCheckpoint)); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/LastCheckpoint.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/LastCheckpoint.java index 3754e17692db..aa7bc85bd99d 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/LastCheckpoint.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/LastCheckpoint.java @@ -13,14 +13,17 @@ */ package io.trino.plugin.deltalake.transactionlog.checkpoint; +import io.trino.plugin.deltalake.transactionlog.V2Checkpoint; + import java.util.Optional; import static java.util.Objects.requireNonNull; -public record LastCheckpoint(long version, long size, Optional parts) +public record LastCheckpoint(long version, long size, Optional parts, Optional v2Checkpoint) { public LastCheckpoint { requireNonNull(parts, "parts is null"); + requireNonNull(v2Checkpoint, "v2Checkpoint is null"); } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java index d43fbdf6349a..b48b21506b7d 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java @@ -266,6 +266,72 @@ public void testChangeDataFileOperations() .build()); } + @Test + public void testReadV2CheckpointJson() + { + registerTable("v2_checkpoint_json", "deltalake/v2_checkpoint_json"); + assertUpdate("CALL system.flush_metadata_cache(schema_name => CURRENT_SCHEMA, table_name => 'v2_checkpoint_json')"); + assertFileSystemAccesses( + "SELECT * FROM v2_checkpoint_json", + ImmutableMultiset.builder() + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", 0, 765), 2) + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 0, 9176), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1224, 143)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 4, 727)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1994, 188)) + .add(new CacheOperation("Alluxio.writeCache", "data", 0, 666)) + .add(new CacheOperation("Alluxio.readCached", "data", 0, 666)) + .add(new CacheOperation("Alluxio.readExternal", "data", 0, 666)) + .build()); + assertFileSystemAccesses( + "SELECT * FROM v2_checkpoint_json", + ImmutableMultiset.builder() + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", 0, 765), 2) + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 0, 9176), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1224, 143)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 4, 727)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1994, 188)) + .add(new CacheOperation("Alluxio.readCached", "data", 0, 666)) + .build()); + } + + @Test + public void testReadV2CheckpointParquet() + { + registerTable("v2_checkpoint_parquet", "deltalake/v2_checkpoint_parquet"); + assertUpdate("CALL system.flush_metadata_cache(schema_name => CURRENT_SCHEMA, table_name => 'v2_checkpoint_parquet')"); + assertFileSystemAccesses( + "SELECT * FROM v2_checkpoint_parquet", + ImmutableMultiset.builder() + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 0, 19019), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 100, 2470)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 1304, 1266)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3155, 87)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3829, 143)) + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 0, 9415), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 4, 758)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 1255, 143)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 2040, 199)) + .add(new CacheOperation("Alluxio.writeCache", "data", 0, 666)) + .add(new CacheOperation("Alluxio.readCached", "data", 0, 666)) + .add(new CacheOperation("Alluxio.readExternal", "data", 0, 666)) + .build()); + assertFileSystemAccesses( + "SELECT * FROM v2_checkpoint_parquet", + ImmutableMultiset.builder() + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 0, 19019), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 100, 2470)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 1304, 1266)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3155, 87)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3829, 143)) + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 0, 9415), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 4, 758)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 1255, 143)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 2040, 199)) + .add(new CacheOperation("Alluxio.readCached", "data", 0, 666)) + .build()); + } + private void assertFileSystemAccesses(@Language("SQL") String query, Multiset expectedCacheAccesses) { assertUpdate("CALL system.flush_metadata_cache()"); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 4f2f2a055ed7..04a2e614fce2 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -1323,6 +1323,53 @@ private void assertPartitionValuesParsedCondition(String tableName, int id, @Lan .matches("VALUES " + id); } + @Test + public void testReadV2Checkpoint() + throws Exception + { + testReadV2Checkpoint("deltalake/v2_checkpoint_json"); + testReadV2Checkpoint("deltalake/v2_checkpoint_parquet"); + testReadV2Checkpoint("databricks133/v2_checkpoint_json"); + testReadV2Checkpoint("databricks133/v2_checkpoint_parquet"); + } + + private void testReadV2Checkpoint(String resourceName) + throws Exception + { + String tableName = "test_v2_checkpoint_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + Path source = new File(Resources.getResource(resourceName).toURI()).toPath(); + copyDirectoryContents(source, tableLocation); + assertThat(source.resolve("_delta_log/_last_checkpoint")) + .content().contains("v2Checkpoint").contains("sidecar"); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + assertThat(query("DESCRIBE " + tableName)) + .result() + .projected("Column", "Type") + .skippingTypesCheck() + .matches("VALUES ('a', 'integer'), ('b', 'integer')"); + assertQuery("SELECT * FROM " + tableName, "VALUES (1, 2)"); + + // Write-operations should fail + assertQueryFails( + "INSERT INTO " + tableName + " VALUES (3, 4)", + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQueryFails( + "UPDATE " + tableName + " SET a = 10", + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQueryFails( + "DELETE FROM " + tableName, + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQueryFails( + "TRUNCATE TABLE " + tableName, + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQueryFails( + "MERGE INTO " + tableName + " USING (VALUES 42) t(dummy) ON false WHEN NOT MATCHED THEN INSERT VALUES (3, 4)", + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQuery("SELECT * FROM " + tableName, "VALUES (1, 2)"); + } + private static MetadataEntry loadMetadataEntry(long entryNumber, Path tableLocation) throws IOException { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java index 56b20ac6e613..55d3c7239c84 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java @@ -731,6 +731,52 @@ public void testReadMultipartCheckpoint() .build()); } + @Test + public void testV2CheckpointJson() + throws Exception + { + String tableName = "test_v2_checkpoint_json_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + copyDirectoryContents(new File(Resources.getResource("deltalake/v2_checkpoint_json").toURI()).toPath(), tableLocation); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + assertFileSystemAccesses("SELECT * FROM " + tableName, + ImmutableMultiset.builder() + .add(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream")) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", "InputFile.length"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", "InputFile.newStream"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", "InputFile.length"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", "InputFile.newInput"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .add(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream")) // TransactionLogTail.getEntriesFromJson access non-existing file as end of tail + .add(new FileOperation(DATA, "no partition", "InputFile.newInput")) + .build()); + + assertUpdate("DROP TABLE " + tableName); + } + + @Test + public void testV2CheckpointParquet() + throws Exception + { + String tableName = "test_v2_checkpoint_parquet_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + copyDirectoryContents(new File(Resources.getResource("deltalake/v2_checkpoint_parquet").toURI()).toPath(), tableLocation); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + assertFileSystemAccesses("SELECT * FROM " + tableName, + ImmutableMultiset.builder() + .add(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream")) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", "InputFile.length"), 4) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", "InputFile.newInput"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", "InputFile.length"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", "InputFile.newInput"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .add(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream")) // TransactionLogTail.getEntriesFromJson access non-existing file as end of tail + .add(new FileOperation(DATA, "no partition", "InputFile.newInput")) + .build()); + + assertUpdate("DROP TABLE " + tableName); + } + private int countCdfFilesForKey(String partitionValue) { String path = (String) computeScalar("SELECT \"$path\" FROM table_changes_file_system_access WHERE key = '" + partitionValue + "'"); @@ -774,7 +820,7 @@ public static FileOperation create(String path, String operationType) if (path.matches(".*/_delta_log/_last_checkpoint")) { return new FileOperation(LAST_CHECKPOINT, fileName, operationType); } - if (path.matches(".*/_delta_log/\\d+\\.checkpoint(\\.\\d+\\.\\d+)?\\.parquet")) { + if (path.matches(".*/_delta_log/.*.checkpoint.*")) { return new FileOperation(CHECKPOINT, fileName, operationType); } if (path.matches(".*/_delta_log/\\d+\\.json")) { diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/README.md b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/README.md new file mode 100644 index 000000000000..8e56e908a5cf --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/README.md @@ -0,0 +1,11 @@ +Data generated using Databricks 13.3: + +```sql +CREATE TABLE test_v2_checkpoint_json +(a INT, b INT) +USING delta +LOCATION ? +TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1'); + +INSERT INTO test_v2_checkpoint_json VALUES (1, 2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..1ff67d3dd29d --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1708995805151,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"1\"}","statsOnLoad":false},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0830-081135-p4ddj2po","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"fefea473-91a9-4c85-9f52-8fea8e3ac6ff"}} +{"metaData":{"id":"460c25e0-e22c-4e5c-8d79-0c3205dfcc71","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995803969}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json new file mode 100644 index 000000000000..7372736e50ca --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json @@ -0,0 +1,4 @@ +{"checkpointMetadata":{"version":1,"tags":{"sidecarNumActions":"1","sidecarSizeInBytes":"12007","numOfAddFiles":"1","sidecarFileSchema":"{\"type\":\"struct\",\"fields\":[{\"name\":\"add\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modificationTime\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"tags\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clusteringProvider\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats_parsed\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"numRecords\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"minValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"maxValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nullCount\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"remove\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionTimestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"extendedFileMetadata\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}"}}} +{"sidecar":{"path":"00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet","sizeInBytes":12007,"modificationTime":1708995845000}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} +{"metaData":{"id":"460c25e0-e22c-4e5c-8d79-0c3205dfcc71","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995803969}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..2ed5dbf9fdd6 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1708995837635,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0830-081135-p4ddj2po","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"788"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"700f9c0d-5e49-47a0-b590-de31699f3604"}} +{"add":{"path":"part-00000-946b010f-db20-4fde-860e-a0d90082de75-c000.snappy.parquet","partitionValues":{},"size":788,"modificationTime":1708995838000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a\":1,\"b\":2},\"maxValues\":{\"a\":1,\"b\":2},\"nullCount\":{\"a\":0,\"b\":0}}","tags":{"INSERTION_TIME":"1708995838000000","MIN_INSERTION_TIME":"1708995838000000","MAX_INSERTION_TIME":"1708995838000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..b40f0f66f774 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":1,"size":5,"sizeInBytes":16914,"numOfAddFiles":1,"v2Checkpoint":{"path":"00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json","sizeInBytes":4907,"modificationTime":1708995845000,"nonFileActions":[{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}},{"metaData":{"id":"460c25e0-e22c-4e5c-8d79-0c3205dfcc71","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995803969}},{"checkpointMetadata":{"version":1,"tags":{"sidecarNumActions":"1","sidecarSizeInBytes":"12007","numOfAddFiles":"1","sidecarFileSchema":"{\"type\":\"struct\",\"fields\":[{\"name\":\"add\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modificationTime\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"tags\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clusteringProvider\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats_parsed\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"numRecords\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"minValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"maxValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nullCount\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"remove\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionTimestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"extendedFileMetadata\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}"}}}],"sidecarFiles":[{"path":"00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet","sizeInBytes":12007,"modificationTime":1708995845000}]},"checksum":"26ed24a15897b93a27541c56e784b421"} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e02ce0972d27bec28dc94c9de172ad2ddd499c69 GIT binary patch literal 12007 zcmeHNeQXrR72iD{=8TQ8jc3`T7vdanHK~0$e_~q}0t_ieU@XT)6-{I9-L1`u`*7~o z1d5OlaS%FD$x11rDkUgt5NfN)%^y)ktOmKrtsG=EC`DF;NL5jZR<2~xsETMr``+yC z%(8dbcQz={#TOXym@=wTYBn<_E8EzJS7Rtbb#1A&(Mu)49SHWUa1 z{HeGyIC$6(tgjBK8o;h5Yskkxp82=alu{@N%L5LS0>4xijPj5wGCKA8O^0PvX`?>M zsgDw1*c##`fBoW*_Y2Kopz?|q1Evx`;l~*F*+&x}lw=zh5LVs!p-9J-kxFv%%!eN{ zDz4X=n*75d%d)sc&v+Y-nn1Y6dO5#7BmIcjFXT%LQCisq)RbQq)@O ziUhF$^(`e87#TkG*rz>SKn_76{;m^asYtunEqce*HtX1;7_eYUpypKOATby7J3+D=k{{ zG3Wq~5~zx-BR@)i`ZLiYt*qVDNLQRn%y}UC+r68=|673t(eOtK^tk&m82#z3&!(A; z?ZJp@fx&3zt#@u)M?DaXbUQz8WuIz%ce%qYZ`Mx`I2Qs7J3jA(S$}BM-Ef#RgMLg)| zG>f_C_y~J(ZRZP+7&Cc3)Pu@45Stz6(CB(YE-3D zP(xZGNlNsY7N_7r8Blu<52~h$Xn|6+z@B7hJQ|6s+YXvt4(KXO^!KM!4UPnOD8{LD z#;*gRIN8(B}q8&CUkVp0;Uh9m9)k8X^D5#`VB!%_oRbjPX8H#E^>PW<5 z5pA!UOo2gk9V;Y1!h`8pZj3VQfe5ke$>v( zc+QIkEn+z@t{P56RE*D{-me_e1N%ezSx$`oxRA(CKPi7wsmkH$6!*mdz#oOO&pPFu1*0sLqtW9DChd5)5O0R!om@6Io+TAvcqf364htI?o&!`-Gi6T?6cwDEn|RQ-`uYEYV*10m zFJU<2cAWPS=QhPT2E|(|j-J3!)?tS82&kr0a?2&-8LQ5zL!AZ-IrfP_0sLqsv@ai|5a! zQnu~sLHvMFp3I60mHMKz6FA!SqJaJ%q|v*-{$!PM52f5%OZAhO=GxIQaw`dmS<8C@R$GZHLAogA~|Sv%+r z1WkVv%{2z^L2?CUNtHvwM0u>zrt1vCUZSvRcoSo$vE~x8Tp6vFCN}2dF97odWu63P zGtn}b=gYMaa?%cY79nrz`bhIZW=prIqLNL~cMNi|S}cyNYNR zC|Y{ed}tX-7gcbTO~FqQ^0&Hz0BIf}*%d6d6j7KLPTC=75%Q+4r93aBJ@ukm$~G-e z1OW1$t|f;wx2!GuqNcMWoI`}O8*m}ga0Rrv+nfm))yQqr=sm=HAMvC#yy5V1%;PKjX`+){w9kIQ8|*5Vig0z)MXp7D55f>A&OQ7XKyHMD?S@95DQCe2p7 zWI#-EOc?|sT;QVpA?DdCMK@|lahTDnji zUS8!0!9*geDsdp>I1wz$GWM^^nr0z!FD>h4sxyZhT1vl+ZvozAtrIUCM=vUK{zzO? z2h=1Ga$F5dA$k!gmblAN^7K(&?amM4`?>ESRzJ^A0uKrgDY!2bF0(?|FS1dJ`;=)}HAlTUP4xVXHXKA1Oe9!>@cgZ>VL3CYEbPwaFsj?k|$ zEp$5hg#pXb#LV4!_|4Rv$ zxlmuH6a2k4ca6N`^VjKwaT|I>AyAN?&A5!|_qlC~rebu#E8RZWk@zL;+E}v6xXe}6 z`RnTa4Ye3Qg2_neK&lqLh*n=!9UO{8!&UW-EsaWRQ#h#BH7h`GP#e@>eQT&bP}kqu g+TY)*DjPS}sp{K(|eztCPxDDEX~DJyyHib%2}ts8!|MeECX z$_Oq36~KcK{bi6kJvWFz&Hfdl;F72hM8&YBEr|X`SBcIr5PdI*=$S!r)1;q`I%s;$ z_#_((IhCM8EF}pYCF*dJDt67 zl7wu9ic+n{DuYBii(?tYP=`k4*cY63Z@ySKm%YJ%_r6&mc5g*OxrHZEs{t&nSj1ta zsFh@mY6T5fxSr^=3T;6ag-3a--EMo0^;SXeWu%7?` literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/README.md b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/README.md new file mode 100644 index 000000000000..209b0a013e8f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/README.md @@ -0,0 +1,13 @@ +Data generated using Databricks 13.3: + +```sql +SET spark.databricks.delta.checkpointV2.topLevelFileFormat = parquet; + +CREATE TABLE test_v2_checkpoint_parquet +(a INT, b INT) +USING delta +LOCATION ? +TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1'); + +INSERT INTO test_v2_checkpoint_parquet VALUES (1, 2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..19b4bd4ba32d --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1708995910270,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"1\"}","statsOnLoad":false},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0830-081135-p4ddj2po","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"6d516ff6-63ea-4da3-8a3f-7ca80eb33a40"}} +{"metaData":{"id":"c6274bfd-507a-4515-ac7f-37c482d06d83","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995909994}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.b6785ba8-b035-4760-b814-1f115ccb6167.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.b6785ba8-b035-4760-b814-1f115ccb6167.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ece6e2cbb0b8ea054c846189c80409286c4f4f47 GIT binary patch literal 23272 zcmeHP4RBP~b$)NPTCLCzLe_okGU89cifARFT}dEO*#-+!;b1`lV<$DZZ+D;Y;{B0# zSJ>FYRQ6y8+k}veOC5#5nH|iym}E3AqcO}*N=Qnn$Aj$*(GZooSfz~XW~hd7sN*VX z&$;itd*8lYNpBG*4wfwByZ7Dmopb-rJ?Gr>cKyl?9wLxZQcga<_)q_tS4o`k(LtPo zP~gfF1Ofitg6DI{Jo2m8{_TQI=kE}Nk_H>)&!wm&3&?L?d;PMpRcVvj%C5?KD}I@!OhtFCdGzp2F=XjmF(Y1DM%cVD~qJ*Jho?p&d(qlum`A~ob#-)H-s z206-CQ8`Wt1t3Sa9DAZiNdPNUfR~;fd6qQ=HJPpe9mvsQv!B|?XCfJX{-Y0y4Xj@i zN5a}5yaLRhsY>kZR;rd)#S^g}f1;|ksw=330`aQlfB8&RL=NL48EUmw!AL^cro^Bk z(h~~Fz7RYILZ!AUtR&=soRF)QKeKCBEu-~S)z0z>!2@lAeakLQ_RBB-JXxxXl+i^y zcJT#~Y$xA+_r(|B#F_!09R%97yXaX^mM1#8PvuD{#!5PL}zNj;lP)k*0AiNqe?^q=Y#ZTv7?vi8jGo)jpTP1V8eB!YpVS4+a#!9P8`}ukt1RM3ge z7xPbf8yZ@+0*Iw;5C~M&melNP5Q@9y#AC!Ge)C7c$Tn+xL42_V_IV zL!Mp~mb-!eab@Qwao`iX1ueP0^W2g}VR65BgpdRz6flCt=IKpII%`z$68N5jhl0>~z0-@~~}@{M3f% zlWQUY<*9*oLD+I+RbH`Ajw_-f1O|M<56Gr~(k1tV640tW8V&~&o0V8R7+v}My550L zg$jSDC!SDNF0WW0i|z;p?mSU)zdfE1T}K|uD~>1R#LkBY?-42b>$+RPU8i8+cKnV9VQ$duT``M?~_ozaQK^!?4@J zwN(E&1s`mUhSj3b;4vu3dNN<|)E5cIyv?nxUfI`L=kqi**EM;TL8(De>b#0y>00V( z3i!N@_55VQef^Ig4qUMtIBEOZDSTWHtl$S^!gfFY@ehw>ZMVLIX->3s{`KV(hm6NS znnOTO0X*mq{`w!DXKsu6umFAQo+Y|n0{jx2TXIf+oE{D9i^1l0EwW+-T5N_Z&@t%> z1f~Vr#^NQ*q$@0)R1+9%Zr4;H!s7@F9dnrtGTjPuyCm}?)mGE7fLQc^QMSWtsB!AH zmfOXOhR1f)bZ!^@pRpRPDyXK$l{O^+U%bQ1?I+Vm1GSZAIqflE?bIzFL(#ypB+Y_x zyUI1I$6)6eTH`QTn?kf#G{fDrYQAQ`ThWmDlXhgV$DVpakslx+4y)&dTp;`gah&>T z0%N5YjgR$11Rm=}{qcm;QYksf3iKJ=l~M;Q?Gah=EHCb{k+oXW^ET3{6_a_a?JzGU z?Yx#3llctcYCc0qIau3KUQ8A6T3$?XaYuPERmj`&VrmAj<;7GHujR#5F|RFVLY^&Q zLLMk(4F*eDgXB!sU}Pq1&|k)iBfQu@i`VjEe>tz^#r`{Z?Hy#lsEu_mVb!O2u~#CS zwFZL{ljrbkR=hTw9JOgJ4$fiKpUfeBI`ZyH;?r7OsU+*Q;=o+ep%uq?@ytB3U90V% z&x(Bum~2D5*td|^E@ZeztH^ns^Tm25{aZ`O1-<$u4{JTdi$}d|=&yU#j=(Y>^>q}- z;qZ9;(YWg0lnKo?S68dO_)eQEU+(T+6Sz}YTZsml9cl`8P{TT+ekuyd@x+7O z;HfJCN-GBfL{e$(=+%_=xnJV z^r>Ehg4of8QKcx;{TFnHz=45k0b0z!00MPmpn`&+sUPPe*}N&1y&?UP4IVkDTa*C96FwAS=QO8O8)<_}q}71$p*uxt zO(D8OSQSt|&(7`?!Q7!N<*HK@1E*8JjOgG=R$Sq}jGm7P`SyA$JPt6eWD83Ruu^H6ZrHAZBB|Q>9N*tQ(;WM8gpM zc|hBGx@LE%)yB1uK5+UI+{6W{HlQlL9F7VtKFTmbX{VhHH)j_XXof3}G+n%g14W&i z3@w>?65Iq)B&WO5J-VVA;tU{b^B zOf@;9JZq+WA1Mz}$~_Q2E_UAu-R;N&LbJZ+XEvVI6tP2ETfN6-lI02_rYPchtBCok zEr3INztmZM-i$elm_rmZ3DF!jxY$2j6Rdy9Jd-rve*h>yqm;uY%8a$cP=?JYZy?Hn zW*pxn#HIv&a)H0W=wS3oyGe?lBK>hnKMeH7`N^j?3L_mhlb%P?U#hI(2Es5w^xPX~ zR+9N92|huX5ejp4LKxFjTr~srb^_48r8pER2-9m_X~7j^vW<*35$j*dVG`^B;vJ)S zqZ8wqY$*mgYKA<9kngA>!o7)2ovmylYBm^yN)?!-JB?5utI|yjWwhWJ-l!Sx0^+@} zjE)|t7i}SD6RpT#|1rFjS?OFwyu%c4bP_y+^~mr>&3L=l0p4j~9)Ok!L^qzkGTfTZgX4Jn%%88t)fSq~7`Rbe2)Z-U5b zd2@M+P4XN#3buG2>U*TJpeI( z(=4PP0S8J=#FvoxDkUBS;&hoNQlf)qr0a;ZcR3DOa^@sRrYEBA_~>LknEDtFX|IBz1T}k!}19Z%ayxu zSWaalH+s+v<&+tvcO#(ez8g_`B_>1*8(fF#aWhYwB5ja1L+dtn!NUqLTKc6At26fb z0CG#-O_mGQgOnRu@eiAs={|fGn-Q((b|ZQ1oD6$_Tl^mr|=QnOAb z%{tq%%ebX3Yy>s(>OJI;O{hLNM_>INY>{R}tZ0-Ypt{%TDI|%SE5`WIR7PevA(kj00T#hH10M)UxrDVUhK-nL8ItIy19OTVg4{{+m_Zm83r z8il!z!t7l|I>AtzpGBDT_+)lr`sd#yVfH=WNXB9_rLWoOIwZLf5yz zwD*z-^TvI+_lCxYNniO4ZODxFHlm%OXnhO0H)aw_52Z4kzJ(^K&LPfaRVu*Iw@Q|{ zYg!-?H_XsN*f4r_059Q&$*eWb?q%fp+-lq~N2}QEW?`A#KPr{3iqd%zXmR&vNjPw& zCnXqJfBqoool~pHBSQ7Xddk`k5y#0{GsROXU0f<1F9Pc7v9DDE{}1O_vI)k|XUJ+p zVDOnGKwy5e?-YPKc=Vv_RIO_o5dGN7SQyhl{CstuLKyiPT5dY9!yYK)RH zAs`FXYT_wMVZLp>Q5=3f%Mr9}Q>d_*5sp{5jQkHZn)+p^nhl#N&IH-;hKpiGcLS@- zc&mKlIO-KNr?+S+@%((lh0GLC453brlH(fXcjLK1Y_YzF*ll}m9QbIBb3K2faGCe{ z94S|QMQ$bn{QW0pI=VojMcH$~a8T2GLI`ma@Zm z=eWgg%E6s2FNh12vmJ0d8Cd#6O`8)u3yv?1oQ;xcb3%LF@+PT38rrFa*NN6a3K%=Ir$O9+22~iopL>khhMDjwMiF5Xw&BM@dR&H$|cj=!rMl-f(Q85w@FWo zj`nvtCzcF$iSBc0^ zSp+prkI`G{vHtQLBg&bn_O2ZRU|es>rEv}`!uT>DlQ!q>GH*qD_m3g5e#^0#yc4Rm zcmK?{)r2#n>psG@1Cy}PwJaeth< zHNizlhyX(9$_JN5G)f6V35sYNMOHo_6_JQqDXkpjR1_hEQdCjYAjAhZLQ#v_zBju& zv+N!Aooy6oV_Ux2H}ih)&6_uG-n_l;7kh$4Aa9H0q)6^x|ILB<9^wE(F>wh(*SaD> z5Q-c^2Yd`FDXl&riZ|a)wfv^oG<=xjU>EM4U}!&(AT+RV_)x!J>TEh+xIfGiIc2{-_LH{`D;-- zJit_+YpsC6!PQ{nw&u2AOJJTDC;>`COKVeOb5nB*XyGM3a`1OoGhi(ja8acy*Xc@8 zYpo~|#3IzUoK%soe{ph;#a{DeIzJJo_y5}cCy|^H$v-aMJ+#nbndtk*%D)Qs-s?>a z#e39nG8IXC*97al@n~YZ5*t$CX|GrDii=l-ylWcvL5H$u$5NptF%*k+B!?2(!ls%) zofkR-`}Tp2(PuB7fA5Ud#|&RA62J}V4~&yx6TE%=jgQ$D0ADPtqss~ZaG){hlV5#M zZPA*KK?j&ipc=A*{OsuUUy2rK744=*y5dw~-UHF!{(0l~Ulv&q4S%FSkGmg((VyS> z%Qa?WdoZF}U@*G-&U?44qaF%Ix}6`lvQIU>yVBv7*Xbt+oC|@O9U-%aND+ry5(Iap z=#~Ze!$(K_5Tnk* zIRc51LA~N6VmJ`VA=#;)HC!37s`9+L+xPnnHSCAfKz^O{soUA144C-hVYfkd!=weP?%&c~y znprJF*nn}~%t)Ndo0%n?%9|OPQ+YGv)~VpX4_0p~OG5zP)t3(MRO1RHr_BzzNQp!U zgwe8<4%s=VXggO5TRdP4=vsu)KB`5NMxF$>3dfc@>~qQ`yVT*n-IzWB3@{6F5l{?X z3O0}SniMC(r=ven1%l0rcrp_0kA@XQ?~TTREC6{yMA4Lvol0UrMFpIiGLW8yRDdXt zs4NIOOspT))U-aM(y)?>L=#FZstp?y$Rm4_uXiRQ>K>g^5>nDCk|O%^l8D-`48=4c zbtL2QsJ2~ArNJP&j#c5?^pKdf&CHvEGNc(Tu9`npOn8irN)!HRZ^AhPjpnJ zxf;0Spl^IId)=ucRI7V56$}@7DH>C^s+xk1@%Vi5sdW^|;r6&6)cRZ=Qs!=U3B~SO z+|YC*6NluGE{oEH=ySEp9kWf{;PyW!Ad*n~#OMO27~HR52MB_6brq4MaW^^X5Pa9* zhE(QW3Fh2{=ClVw9{e!qQ9Eyvwg6Xq3#6hP@-9N&`3_ko_}+!PM_!0}*rb@7r7qyJ zVIFuMFpqc;a}qF})>X%#L^-!e-O$_4Q()c#^RvX5Q>|K`KnZ6&WUb)42=_Xxgsia5 zq+KimX=X;vTPQ7?YL)f@O1nj+jlhjsu*}DD8c;cFrS-lMXFH_j@CV5B(~32QSf`&M zUcq+-uq^hbCrQ?_xupA|G$z8}_7o=xKTl+7>=_ageEXp=1>&5B!`Q^J zHFZhnMCrT;!+j@;Js^``2;{8mg5a*h3KBRccqo1d(j;A)WuzM>NF~xO--N-9WJ%$avH%dQLr%p zvmP!v@)CwKX2&^)IJYUzfwG5j^aO^o9(2HE!grvoU4C`8w@_NaV`++B+FQfQcaIw% zqS8}718ac0Tn7Q};B)jyybgs`>yCLS-L#~cbUpF>xxNri1oP(3TVSCaRIAk6=rX65 z299>UD4_obY4q-|KUt+bKq)tu6Tjg5Do-gPDrJMT z2{=Fs))@5vD1|>+rHs^rlw&JU%K1f8&c#f;Eps`)D9^dPk5bN4DTgcalfvh6xFXNF zTtF#zsFcef#c+lk>%w-QF_$i>$9EZ|u)+y%p*MgT=?|w`EqDc`o>+Z!^ZO1*@zE-#a;?FfeFvg$g-D}S zHl#NZX_O*O0+N{=8A$g0h$>@;_%%XI=*le4g2;K#;`%JM>2ndm?&g1Uv zPufARA!z2iXs!$J9wc8-mQ*<;Oq4HF+jPB$uxBZ32HwP&X{@<~ELTRWWr&UW;0wTf zi87A^v$6iI2J>{e7DA5OA>TsC+qyo|bdb5yEvl$wQ}jbbJy?sz8wXS~-O8<~HRGZ> z+R;8mw9^zVvt&B7tfY%7xWuO54TSteR}di0BP6$irIsQJ^TKgE0Yk}r8MsvZ(2(Y zg*9)Tw?C@J@J;KFAfr>_*af!~pzGuE8IQF%#(=<334>=mUX)M_5J8j*uWJo0AlKWs zuZ~Hx6)zbOlN?tDfe06PECTA9>jG%uGAFSknb4GIBHaZl=;GmZai02Mz+p;wW6471 zVy~9Y6o;2rIYKCzjHyZj2zgEfi?WRUo3f@^NZd}#y4mVX;f9vdFXNkmcUkMiGsn@3 z%A7x%(9{7n1%y0T!%~Qz1&Sr^a+Csnlvle8gZOd&dx+J~3zNWu!XpaqGlk2p&~v0h z;}tF6&ywA!{xcM|JEzzV{0#wPD>OQ>FVYke-4!mbV5bii%v*qyLBgQF17Sk)G2;_E z9gHLNYfLkpPGModvNZWHPP;C%AHa9N{&uP$JIpQzF;6M>x{Jv!aEr~!!JX@!8ds>` zJm7yR!7>->+jN4z*XFKKaD4tYoiJ`ga})vv`PrJl>8jI94G_^Jl|1V_G9L4|u literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/part-00000-15e6800a-1995-4089-8212-a563edf37f5b-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/part-00000-15e6800a-1995-4089-8212-a563edf37f5b-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..56c4e214ada72ef43590bcdc5f139d4d89c65b86 GIT binary patch literal 788 zcmb7D&1%~~5MFKFRQ{K(|eztCPxDDEX~DJyyHib%2}ts8!|MeECX z$_Oq36~KcK{bi6kJvWFz&Hfdl;F72hM8&YBEr|X`SBcIr5PdI*=$S!r)1;q`I%s;$ z_#_((IhCM8EF}pYCF*dJDt67 zl7wu9ic+n{DuYBii(?tYP=`k4*cY63Z@ySKm%YJ%_r6&mc5g*OxrHZEs{t&nSj1ta zsFh@mY6T5fxSr^=3T;6ag-3a--EMo0^;SXeWu%7?` literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/README.md new file mode 100644 index 000000000000..d0a8eccace27 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/README.md @@ -0,0 +1,11 @@ +Data generated using Delta Lake 3.0.0: + +```sql +CREATE TABLE test_v2_checkpoint_json +(a INT, b INT) +USING delta +LOCATION ? +TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1'); + +INSERT INTO test_v2_checkpoint_json VALUES (1, 2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..2edfc7c94a46 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1708994964212,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"c20c5431-d81d-4dfc-8550-9a6d9560c57d"}} +{"metaData":{"id":"9a6e4cf5-bb0f-4d34-b948-b16e9c7a35e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708994964035}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json new file mode 100644 index 000000000000..292073f6465e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json @@ -0,0 +1,4 @@ +{"checkpointMetadata":{"version":1}} +{"sidecar":{"path":"00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet","sizeInBytes":9176,"modificationTime":1708994974000}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} +{"metaData":{"id":"9a6e4cf5-bb0f-4d34-b948-b16e9c7a35e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708994964035}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..f2cdb13da87a --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1708994972848,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"666"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"09f1521a-abb3-4bf1-af00-2e336d2957eb"}} +{"add":{"path":"part-00000-fad997ee-643c-4ce6-b554-3e1d596857ba-c000.snappy.parquet","partitionValues":{},"size":666,"modificationTime":1708994972000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a\":1,\"b\":2},\"maxValues\":{\"a\":1,\"b\":2},\"nullCount\":{\"a\":0,\"b\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..9c62a0227c93 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":1,"size":5,"sizeInBytes":9941,"numOfAddFiles":1,"v2Checkpoint":{"path":"00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json","sizeInBytes":765,"modificationTime":1708994974000,"nonFileActions":[{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}},{"metaData":{"id":"9a6e4cf5-bb0f-4d34-b948-b16e9c7a35e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708994964035}},{"checkpointMetadata":{"version":1}}],"sidecarFiles":[{"path":"00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet","sizeInBytes":9176,"modificationTime":1708994974000}]},"checksum":"dcee9d7788d3f4e92f932cf56e554b93"} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet new file mode 100644 index 0000000000000000000000000000000000000000..db1fd2f3d889fe795d8cbf7d2b9c6ddcab323baa GIT binary patch literal 9176 zcmeI2duUtN9mlV(Wk;^xj?b->X_=}vB4xIaX3MdS30XWwQoJ;Tl0Q27xY9j#F4e=8 zuG4tIFuX0xAL}McSGF#srnteBP`0vZqXb$^Ve2SC4cTTw$%+Xf3nr9on9!B7-|yUe z&%HhJL$+hqlqLcnoyX^V&+GR)=Vu@O_K`S|$Qchg>mfhA^t)@#K_Wv&9q~!hp?x)y zB-O~$5d0WZ^F~+9PgYwu4E$7)74GmTB5(VFR>@7cBNvm zShSE;$Hpe2(Dd1HY5;Zz*-1Y5<>G(6b}x03)D)Ak7x>Tmf?7cmBEP-(qYq_Qt>zxH zme;H$LA9MEOkQ33&GWT(HIUrWXJKB(>hRC1_lJcGi}x*8@9FpIZ#?f|^;$^>IsMYx zA8>KN^93y)f>8ZGuAF+$V}nHX9;}7xr~X*Fv!Z&lw`S5#?)>`f37Z9-ZS%9v80AY_ ze91%3dC0%#7Jt&@8oEAf&GVD6W+%hh@yrpL%H_2}cp%;x&gj{rYI>Z)x07Kt?AiWM zGCc6DNhnZHJibE;WyjO$q1<@ZXzB^YI>S&Ho1BEDz)Ae%!$1G4#?_NG$!x{9x*Rvm z-(LCWyn8ErZo4xTxM3DwdF`6J*5|gnxsA74>^;p7w8;VgKGQ~l<%w;~NNpaGCvrfM zq(GY|;FsWoU**cLAl7GFC|A;MjK#KLawT2G9)N?LsH(HorEFa(uE<(P8Be!!mfeTk zEZIEb!7uhC-M#4#Dn3&7z~++zL1o@UhOOj`Ob%Je+cG(9CFg2*@&zy1Z{=S0@@CVu zyxFZJombjobmD81t&RO&vUQVlX+Wkse$aX`dD(3rA8;$NtT+p z%Bg0qvfRw;7F&3->?Dh=POg(IwmG>@ve@qAT1nVPpMLW|V*!lJGfx##$7n_+UMV5_ zYgA1m9TI8WCHuW&s&Q}D|xsR%e?;jXXwOJ$I-0;1{zEL z8pyHQGFUAuwR60PFX%s@62W9^Ca38mdP+t5V|oU%CCI){Qw??Kn3^4>D8Or|qlHcM z3W#QnrYRc&O&Pg7X*5ULP=GJ$D1B^VjM@@$2AXjO9?c)jru8iS&T+d)hN8OM$Vh=2 zV7A~(0}kb2cK?Jy3+9L#Qfgk)vuav5CafIr??-aa9n5OrX` zXi?ZaA5ey&)nG5&MI7Yoh!m?fV z3RC5h<6Q@ywGWq_J(pwSyz3(G4Dv28-VE5w)p#zq$ayn9LEoFmyTW)24v*Pi$BXjj z3u5!XVDmey`E)&(!?kr z4)Cym88#T;p@FUD!{NWjN8mPF|9OD;mEQ-L=P5Nft7qM&9A`eL?2D8dgr)yk7?%Fh z*Kp~Vps{VPt;Ur%F18bpv?P*VMbgKNRBCigu|*QCF%#1$i1`yDeja3EDiO=x>Kr{H z>>|R>Ggt{?S(Y8PZh6o>?er5!LVh(3ah}L6?~v& zv2pQ1XngAHIBUfgzW#mINY`=|xDdq_LC-lv&M;(oP2_Uk0;y$@dI70(rqEW$#4j(@ z?J$_eRzc$>q|H3nF8DRGORR{qT669+XR)LA@a;lT%7XSNYQgJ zl_K)RhXDB&Ll)b)%H7s93eJOxY8OPkj)ZwrR3%|m|G^ZAymyedggoV~c14c(l+)Wg zAkfk2_viAXQFTm(_jr^&&7;T77ofkkJDP~bB>%$|H44wB?cYg6^SS4u8S4S(|3$~i zumSJc@Ibg=*mzYi`+&0rB+k09TyS519<5c38I*0ZDV9lev9Oal?R zq7cirRDiRZob2Si2@wPi;yq{7G023AKEA|F*}3S!El#;xD}2 z26mS_ShI1C8QF2#>sf=2(mZ5Txf-r$&?YD@i|J6-xksIl)A|rkRDXte5NmxsI7VTG zsMkhuEKKG!uFgcQW4vbAjd#Eg+bX+zb4JrXRy>Z!`(CHTx+|7uxnQ9qXx)bE%j6O4 zh5E|=Ini)uYpmOfer5lx>svK8e@z4S#H8eZ1Wp_N@k&=FAF^(ip?Eaj9gT-}C&%@) z7SfVE@xJbOtY1qc_jLErRHDB-ruFTKr8Jesd*iex)~9Jn^@(5^dr5tLy``| H-}L+s$p*Ye literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/part-00000-fad997ee-643c-4ce6-b554-3e1d596857ba-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/part-00000-fad997ee-643c-4ce6-b554-3e1d596857ba-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d076e248c28967d349f8b3e68a143da1873f2ff7 GIT binary patch literal 666 zcmb7CO>5gg5FKqCqo%zCyvr`=V3yV-pe_=!6UXF|9zy8BgkD1_EAJ*XmA@ppB^aL) z=q1No`X_qMxzL}`|B+u1Ih#N}Z1CBRG_4{14dITiatw?JPxvhtw-6n0>jFVa7-8?&VawewHz*^6t<^qdg%_a< z)B#|(4sEb&hbn0Gp9sYdLVXk}mMvo;^jJL=x+Ey{s}N$B4kIm>T{#W(m_*(_1$em1wcN+;jc$mZJUT=Q#ZA@lR8l|7l|>3on& zU!D$aVlvq~S;(;-6_Cbm#Tt%tduCH?jOR(B;{tn4*I6-Fv-ZpHIqVwxsyhutU)_Dm<9}tHM-873u(4S+-Rrpla$+*Nv<&h$_S?#4LtbS6S1(@7{ZN z&u1rjFDZdRLmSTb?)!be`|i8%?%jRv=G*QH5Q$Wh1?1AXH~vvlO+4_?O+1oR<}HyV z3I5K&^F?Ga`SYQ_|J7;ecT3Wo1}D|eSyYo6^6{Y;&zoCSHfyb18rPX1JDIV*R-Kuw zv)}aRGdkEas_dISvOgvrq3p;&bsE- zrp7v@v$44@*cxbB)zs3ms6CrwWtmLyl}rH^Ko> z{;)5-vrF}@_NCIv?oisd!q*v5qv4cq^^YF*#g!O7QlM6E6^W=Z^~1Y%tzfbpz7_L3q)2a@+~KEA^p!)zE!i3E;Ll5sid)dN(eTsy6`B zZ4)~4B>0;s|{~G#bhLMCJpBvPzR(j9& z|NInMLOYo(A}1d|_}VO^GtjQQmbRk#PruGw3_815 zBPh8_-Rm4u1?9Jvb#$siUvyAF%l+=3Z@%a@^;Wl>Q$q9N50J-5_?OY1HyuVh=yBeL z!GW#N(I5TI@r!1=N6{dcmrLMVI4c7%5SB2^Vt6Cabnw8c-Fwj`fXDIgX?L!y;R&Ma-p5c++ zz^DR0z#Q8h$X@z6xE$M%>2~l)o(S_Gw8OhkzV)pHlQ62zVqA1*!B)u<=FvP zRLyr(c_sKmoK>>t%Ev3#n6fiCtGr9CERGhNp+mYSbx2TJrq458vMjo0y6ae#ce)U< zamYf)yy7}#EKXHkS$w1|VFnfmI|8s)?PxUBc#I{e%FBwT$1aR}RbKi(b2TnDP)*ky zeMkVlVn-uiR)mixX)DcaO*v5QF%|?<(NH-t76`9tDE!F_@a%D*-dtqye)^J63C!(*dpJf5(t)v|}I!L3eJwcO51dmXGe!i#&IWRu?XjFa4{7yC@Q^qpOi9$jGML1@nWWo*YaY9)6MZ>W)^SDi<#NHmKQVSyp|U;6}+~BksO)B zNcL8;24^c-gZ{a!L3S={FgTADv%EMspV#u@-~wLDi-TA3T3#GnNcKB)vkw#2Vz#=yDLHHK8C}L25BOQ}lAp<)T&~H1!{Yp^3u?-CsL50$5!XJI zN0n6i`(5y=p@ylf5)KntlXZ0|>3elifSn79(vdWXZc(D$YKl0CbJGfdUFq`9x=-DC z54;|9s|2=K70V=V3Dg*BDG~bAs6isGROBI5V=*t52uC_2AqCOzj>Mo^f)=y!4QTDX zN_-nsP>@twNvAj^S6bON9UjoDN?0*e+9@@ZP9#Z%rl~vyACztC-S9eRkVXcTXp`?I zH^ifnxVmnK@yH2}+=nqY z$`Vv$I`fJ0tI%T6_tNN?oWML5KGr6p-LW|8ac^qV;)g35Nu`NX>tGqo$e0>WbJ;ne zM7%Sytvg8r#B>8sc_^u3yrY2z{0(i9PJ|LsIOnI6cU~+Kze`nMKC$_Ra~i9Ekc_~L zGpaFu02VY{4~X4K8WgQ|s`3GfH5JKlBBn&*#$+^Ny<4j``$E3qj3+p)%e44FgZ=`3 zok)b5heeNlAma*;pnHLY8kvfwwHjQ z7{U1}j;l9CY?n9r_c{eG2i5@Mz8laEXRIQYYP$n8?MC@dkP|Qm5OaWH_QSypGZ6L< z*HYHM{t`iQ7$rxj=qx_z4p-sj> z-aI~9L}#L7qUfWyfoMM!?S~UC(*V(xaGX);FBKR(hcLqw=939wgrWFE1Uia9?@^!( zoIL7X38KdcyCw53iS;kz7Koidyd1^LO^he(t{7xagd9c4z8lfn88`}dYNJ(*qP{C* zP?<7;T^6BUrBJzvq0F&~;pIfUp4$QMtVRvOi|)w9sFj;`gbXhu2AzY5_nF3T5 zlHuh%nE8cIB7ReG`{_0;+a{3xkgZYkc#I7@$q)63@j&ty@z0b)VR!Mo>q&V zR%CO_a5A$6QlBBt#~LXUj=l%ZkB>P~_=&Y3{6rl!2pmKk2GNBJ_sN__xj^FuL^?u| za)4yS5FM#l7|Dqc#}VSBMh5Pg1c;)0an7eg;ByMW&TD)COb;wN9gD#~CxVV4=%e*$ zu8}#`L4}|*GDqNg5n*4Tu)T16FN{L|63|;I2tTzBgomi`SrE?e=tM#EtcY|NkxgxUVr3~7-Dm`L`&$X6SrPIyLOypB8h>ye(whi!yl6T&Paygs zq8_KHSwJp=rK+TG1IYc|7ahyLHTG5y~IA0+6%(nsZQ;i@%8Zosv!J=U`D?%PY z$o&n}k_#qfiP)&9lT0lbOG@$ zXp8{Q*p%pugnda?l-|7_r1u7>i57Bc_Cg9TqBQpGbIAK0{=tQ9*6um1wUXVnmTpa~ zp_zB_z3YireknkXIwb!vT#6c2Vf~_S)GVqF6TzrQ(JW^g(JZ^=BI4%vZ-%m47En$g z%BK`%n4<(43AgsX&fe20(p~Zv|FGP~u69@ftYuvHusU;}M^W4UCbC-c_g;;h@g3lY zqB7$IpvaCB_w2n_3w_8U>RU}{iSu?)g}`}U#O=8Qa6h8B$Gk->oOi-N4U=`s15W=j zZ=3gkW4d=&7IH9b=5K7>^%bQBZga(6t9=&JaJKF;PpN}xd-ORL-rIP=pYxz>g%x6BN zqsH1KZ-$m_{En8Fz>V3>Ew43eyNKG}ZzZ=#{?9?1zQd_yCw~PeepKZ;xgDCfX?MTc zZ%{>Vg0)X(6;HK;isyr*Uh)qu9!Evq=^;}wv=~(2J+k&2Rq#({6^Bv9DXOAp2_F&T z_)<8EEcCbS@^*jElD7Oy>ikcjJnwEg|EZbG>qzEOklZQxv&G2dpKcZ>lf7JIP9vF- zRY+#E1}&qV=~d<*lc4e5w>Vwaq(*#0k^qg9&5g1@ID1n8I4~=pI3x+0{5< zE`Uz`J*bGviQcX*_ymg#A?nea(Z0v8w}HxHigtK-{CWZR4C0=mxOo9Ws*x{SNLF`xx+jp*y6 z%fQ!-yb)ePUoWgRYkPDfc+cKj(ANk2h02N_BBRCuKWH@aPk0G67S@_IK8G4lQjM2D zqdvQg-c2;6F8Kx5KZ;O$ZpA5ea`|LX!aO>;TmUg32 zlH2N)E(LCi>S@M7y`DzkEp4h_S>GUeZzyPyx<6{77bg8>4Th0DvV~^N6w;aC`wynf8YVXd=QO9` zOdUtd+2QgQUj_L$GE8|s(?BuZ1aTCbF3yvQ>_ONFKrB7Uo(68Maq^j_3Rjo_#gVK{ zD;u5H5+;}eHuENM_2~s*V~(*5EvDOM^t&ukt5_NsCuBPaew|jAD-ju1|NO*A?itqR zArBE(BJwU9LFua&k*Bn`vC&^68It*zl`tIP+h*Gv4f>@`_JvKm&TLA>WQ=W7&~Ad5 zypfylbY7`OgG~ zn&y$!m$+@UVIw_b+?HXc_uf_XmEPvdxbS@@#@Z7 z(z6>c7)G{*u23<;RQmv+Zu(MVTsEYa5$qmkaOzTW)@y1HXMaaG;7kH+HNU5Q8} z63(U7v9ZZ8G<{*58i3tJc9V~Oyz&pP-Aj!mHAH0W1^%_+80*9rRm&f5pSXWc88*R57t8UXWqVaZ&meXZ;hmt-22IeQ#K1a+vaDTG0OjI z@y8zWhKKz1%F6c}Tr;E3SZx5)M|wP&9L$WTjfPl9q%#PGk*O(IDV*caZoKyw_nx=bfNhAaF30`! z``7-o=-w&!od%`?_tVO2KmXKS>%P-q?((e``#|#pEpotr$TVSKn80*x#D;AiktK3K zk)%M2C*YUhgJ0$1uOQZE8!%VWZj8mY-Et*e#U6lz87Zr?wWa*qN^ybKJK1>HoulnO z^JdBB5sy4jbGC`aX;*yYst5L>6lhl#J!HsA&dTJNmAosHVJms1iYL!`$zdz^nwK}5 zspidYSCfNQg^M*jdCN)8ZR5GiPIA`AbC;atOfAp7=_IGOlSNy5p^hA~x|^@($%2!d zZ*X!OxZul;TyVaLS6Fb8`DQ2AN#qE%kZlk@_5%pXZ;;B~H?m zO1x5!?5|QajVKbS-y{3IW2$j-k2KT{D*{DLXsdKX&sd%fE@kpiCzg5rwa?SZ=O)nM z0R|dN{wm0^+A>%*E46dHh|lTYrV_zqb1I|hBYHwb`jdJJvL(pguBnDPcv4M|QWW4d z)Y04~dIdzIMw65cfhLShmeiXgt;@j|b(B6iIYw=XI0KD115ad+rjvS_etp6&lA)+3 zGcuB+1{gH>Qinr1nm#;f(40A6fEY1AB zQ==p5c+!BZ!AvTp8z*Qs2NPmyY)vG`a|X@o>CxeAWTv)yyz>EEjU|W2?62cwdGjT)`7AcS#G234ayhK|YCf66W@-g3ze3nG z2D=F`w=XE+!a3)r$hnG~yNolv{eF(=>siz9$#7E+O>ZCYKfBqN3JXv0xP%$E=;yJC zt>%LfdB8{DHh;@O;#YpSi+Qj@owIt@t<1R;3@C>}g*ssi{S%k|&34?*^YzYtv>F%R z+}KV)(tN!@`q3jm`jC+dK(bA`MG~z#6C)yifrwkC7>Iufh?2KMM^A&G=Q6@x`YMjB z05Hp*TXM?u?FA9~2ZX-CpqCrVhYBv^a-*Pi1+niiY`&>z71-;A4>aE-G(PhvG+ttj z7of52AFPqC=W=i%7DVKEM1E&C&RxEFJ!C1bgVcPpAoNY7USQM(pxR!iv{1L>VH!o+ zJ4pMrsj+1}T9NNz0$T)u*AaQ$6bMM$SCkf5C%PY|QbgWC$*tM12L7Yg37ad z0Qu@exHt#JR{`WxG0;jrXOO8+5|;!A@5`4DTP)=4*8UK+jc-` zrPJ@vWJkm5m7zckET#}l?BDiKjzFDuF`uOhV4G;{kUigmQNsT9g2whPK zrduk&SxrrK^4^3%1qbn-Q|cIG!h-=H{HAiV0~VwxnK_tA8>*hp9Rmqw_Yll4Qx84{ zxQJjfQ^{Dw$jC-HPAJ78#50*BRnw4B=0Gq2!<+x7X44K_oM0iGVmWJULsyuGx7)z( za>s-=&M^Z>PJ2CV&{3L&j51fl6?EDJ#bq%aN`-sWiELGdc&7X_#A9WZ_23wVRf209 z#T7^698#tBiiPefZC=6LnV~@50YA8FoRrNOVEc&j*dp({LhJX1Sgle4Oh-_~M(}^h zyVwi$r7d*=_|Dd-*dl*vORcM5IsRi^1NO|6;CTpud($im& Kq+$3Irhfr^^z8Zo literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/part-00000-77e34410-74c8-4e07-8769-345baa22e4b3-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/part-00000-77e34410-74c8-4e07-8769-345baa22e4b3-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5006c66afbcf6d5cb328da0fe696904f09e407d0 GIT binary patch literal 666 zcmb7CO>5gg5S`sPMooJOc$XFEV3yV-pe_=!6UXF|9zy8BgkD1_EAJ*XmA@ppB^aL) z=q1No`X_qMxzL}`|B+u1I`{fBI`$w;` zsZPIq$_Ah9NV6Kk-%vqxm1E%4^<2N@pbEEBcknWz8Re9}VR`z6;r}IHB zeR(>xiOFQ|WFg0TRE#usE7WkD+cTSDV?0k19Va-)MeECZPT5;5m;S?U{NKADmmuC( zYTu61W@WD#sxVSvb1R+?6Vo(tJM2bbuxC2)Zq&Ae&R!Il?rtzJ+J-x!Z3kUrV!iG9 O9sqt&$Cp0Fzw$4EN{8eC literal 0 HcmV?d00001 diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeCheckpointsCompatibility.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeCheckpointsCompatibility.java index 3796fae2ac0e..ea8a6bd1753e 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeCheckpointsCompatibility.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeCheckpointsCompatibility.java @@ -679,6 +679,43 @@ private void testWriteStatsAsStructEnabled(Consumer sqlExecutor, String } } + @Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS}) + public void testV2CheckpointMultipleSidecars() + { + testV2CheckpointMultipleSidecars("json"); + testV2CheckpointMultipleSidecars("parquet"); + } + + private void testV2CheckpointMultipleSidecars(String format) + { + String tableName = "test_dl_v2_checkpoint_multiple_sidecars_" + randomNameSuffix(); + String tableDirectory = "delta-compatibility-test-" + tableName; + + onDelta().executeQuery("SET spark.databricks.delta.checkpointV2.topLevelFileFormat = " + format); + onDelta().executeQuery("SET spark.databricks.delta.checkpoint.partSize = 1"); + + onDelta().executeQuery("CREATE TABLE default." + tableName + + "(id INT, part STRING)" + + "USING delta " + + "PARTITIONED BY (part)" + + "LOCATION 's3://" + bucketName + "/" + tableDirectory + "'" + + "TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1')"); + try { + onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 'part1'), (2, 'part2')"); + + List expectedRows = ImmutableList.of(row(1, "part1"), row(2, "part2")); + assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)) + .containsOnly(expectedRows); + assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)) + .containsOnly(expectedRows); + + assertThat(listSidecarFiles(bucketName, tableDirectory)).hasSize(2); + } + finally { + dropDeltaTableWithRetry("default." + tableName); + } + } + private void fillWithInserts(String tableName, String values, int toCreate) { for (int i = 0; i < toCreate; i++) { @@ -694,6 +731,11 @@ private List listCheckpointFiles(String bucketName, String tableDirector .collect(toImmutableList()); } + private List listSidecarFiles(String bucketName, String tableDirectory) + { + return listS3Directory(bucketName, tableDirectory + "/_delta_log/_sidecars"); + } + private List listS3Directory(String bucketName, String directory) { ImmutableList.Builder result = ImmutableList.builder();