diff --git a/docs/src/main/sphinx/connector/delta-lake.md b/docs/src/main/sphinx/connector/delta-lake.md index 6a78ab104e13..ca80b767a332 100644 --- a/docs/src/main/sphinx/connector/delta-lake.md +++ b/docs/src/main/sphinx/connector/delta-lake.md @@ -351,6 +351,8 @@ features](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#table-featur - Readers only * - Timestamp without time zone - Readers and writers +* - V2 checkpoint + - Readers only ::: No other features are supported. diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java new file mode 100644 index 000000000000..335a2c55b086 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/CheckpointMetadataEntry.java @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.transactionlog; + +import com.google.common.collect.ImmutableMap; + +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#checkpoint-metadata +public record CheckpointMetadataEntry(long version, Optional> tags) +{ + public CheckpointMetadataEntry + { + checkArgument(version > 0, "version is not positive: %s", version); + requireNonNull(tags, "tags is null"); + tags = tags.map(ImmutableMap::copyOf); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java index bbbd958cf122..8fa4193c5563 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java @@ -104,11 +104,13 @@ private DeltaLakeSchemaSupport() {} private static final String IDENTITY_COLUMNS_FEATURE_NAME = "identityColumns"; private static final String INVARIANTS_FEATURE_NAME = "invariants"; public static final String TIMESTAMP_NTZ_FEATURE_NAME = "timestampNtz"; + public static final String V2_CHECKPOINT_FEATURE_NAME = "v2Checkpoint"; private static final Set SUPPORTED_READER_FEATURES = ImmutableSet.builder() .add(COLUMN_MAPPING_FEATURE_NAME) .add(TIMESTAMP_NTZ_FEATURE_NAME) .add(DELETION_VECTORS_FEATURE_NAME) + .add(V2_CHECKPOINT_FEATURE_NAME) .build(); private static final Set SUPPORTED_WRITER_FEATURES = ImmutableSet.builder() .add(APPEND_ONLY_FEATURE_NAME) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTransactionLogEntry.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTransactionLogEntry.java index cb86f1c99f4e..b6f9f36017ba 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTransactionLogEntry.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTransactionLogEntry.java @@ -30,6 +30,8 @@ public class DeltaLakeTransactionLogEntry private final ProtocolEntry protocol; private final CommitInfoEntry commitInfo; private final CdcEntry cdcEntry; + private final SidecarEntry sidecar; + private final CheckpointMetadataEntry checkpointMetadata; private DeltaLakeTransactionLogEntry( TransactionEntry txn, @@ -38,7 +40,9 @@ private DeltaLakeTransactionLogEntry( MetadataEntry metaData, ProtocolEntry protocol, CommitInfoEntry commitInfo, - CdcEntry cdcEntry) + CdcEntry cdcEntry, + SidecarEntry sidecar, + CheckpointMetadataEntry checkpointMetadata) { this.txn = txn; this.add = add; @@ -47,6 +51,8 @@ private DeltaLakeTransactionLogEntry( this.protocol = protocol; this.commitInfo = commitInfo; this.cdcEntry = cdcEntry; + this.sidecar = sidecar; + this.checkpointMetadata = checkpointMetadata; } @JsonCreator @@ -57,51 +63,59 @@ public static DeltaLakeTransactionLogEntry fromJson( @JsonProperty("metaData") MetadataEntry metaData, @JsonProperty("protocol") ProtocolEntry protocol, @JsonProperty("commitInfo") CommitInfoEntry commitInfo, - @JsonProperty("cdc") CdcEntry cdcEntry) + @JsonProperty("cdc") CdcEntry cdcEntry, + @JsonProperty("sidecar") SidecarEntry sidecarEntry, + @JsonProperty("checkpointMetadata") CheckpointMetadataEntry checkpointMetadata) { - return new DeltaLakeTransactionLogEntry(txn, add, remove, metaData, protocol, commitInfo, cdcEntry); + return new DeltaLakeTransactionLogEntry(txn, add, remove, metaData, protocol, commitInfo, cdcEntry, sidecarEntry, checkpointMetadata); } public static DeltaLakeTransactionLogEntry transactionEntry(TransactionEntry transaction) { requireNonNull(transaction, "transaction is null"); - return new DeltaLakeTransactionLogEntry(transaction, null, null, null, null, null, null); + return new DeltaLakeTransactionLogEntry(transaction, null, null, null, null, null, null, null, null); } public static DeltaLakeTransactionLogEntry commitInfoEntry(CommitInfoEntry commitInfo) { requireNonNull(commitInfo, "commitInfo is null"); - return new DeltaLakeTransactionLogEntry(null, null, null, null, null, commitInfo, null); + return new DeltaLakeTransactionLogEntry(null, null, null, null, null, commitInfo, null, null, null); } public static DeltaLakeTransactionLogEntry protocolEntry(ProtocolEntry protocolEntry) { requireNonNull(protocolEntry, "protocolEntry is null"); - return new DeltaLakeTransactionLogEntry(null, null, null, null, protocolEntry, null, null); + return new DeltaLakeTransactionLogEntry(null, null, null, null, protocolEntry, null, null, null, null); } public static DeltaLakeTransactionLogEntry metadataEntry(MetadataEntry metadataEntry) { requireNonNull(metadataEntry, "metadataEntry is null"); - return new DeltaLakeTransactionLogEntry(null, null, null, metadataEntry, null, null, null); + return new DeltaLakeTransactionLogEntry(null, null, null, metadataEntry, null, null, null, null, null); } public static DeltaLakeTransactionLogEntry addFileEntry(AddFileEntry addFileEntry) { requireNonNull(addFileEntry, "addFileEntry is null"); - return new DeltaLakeTransactionLogEntry(null, addFileEntry, null, null, null, null, null); + return new DeltaLakeTransactionLogEntry(null, addFileEntry, null, null, null, null, null, null, null); } public static DeltaLakeTransactionLogEntry removeFileEntry(RemoveFileEntry removeFileEntry) { requireNonNull(removeFileEntry, "removeFileEntry is null"); - return new DeltaLakeTransactionLogEntry(null, null, removeFileEntry, null, null, null, null); + return new DeltaLakeTransactionLogEntry(null, null, removeFileEntry, null, null, null, null, null, null); } public static DeltaLakeTransactionLogEntry cdcEntry(CdcEntry cdcEntry) { requireNonNull(cdcEntry, "cdcEntry is null"); - return new DeltaLakeTransactionLogEntry(null, null, null, null, null, null, cdcEntry); + return new DeltaLakeTransactionLogEntry(null, null, null, null, null, null, cdcEntry, null, null); + } + + public static DeltaLakeTransactionLogEntry sidecarEntry(SidecarEntry sidecarEntry) + { + requireNonNull(sidecarEntry, "sidecarEntry is null"); + return new DeltaLakeTransactionLogEntry(null, null, null, null, null, null, null, sidecarEntry, null); } @Nullable @@ -153,9 +167,23 @@ public CdcEntry getCDC() return cdcEntry; } + @Nullable + @JsonProperty + public SidecarEntry getSidecar() + { + return sidecar; + } + + @Nullable + @JsonProperty + public CheckpointMetadataEntry getCheckpointMetadata() + { + return checkpointMetadata; + } + public DeltaLakeTransactionLogEntry withCommitInfo(CommitInfoEntry commitInfo) { - return new DeltaLakeTransactionLogEntry(txn, add, remove, metaData, protocol, commitInfo, cdcEntry); + return new DeltaLakeTransactionLogEntry(txn, add, remove, metaData, protocol, commitInfo, cdcEntry, sidecar, checkpointMetadata); } @Override @@ -174,18 +202,20 @@ public boolean equals(Object o) Objects.equals(metaData, that.metaData) && Objects.equals(protocol, that.protocol) && Objects.equals(commitInfo, that.commitInfo) && - Objects.equals(cdcEntry, that.cdcEntry); + Objects.equals(cdcEntry, that.cdcEntry) && + Objects.equals(sidecar, that.sidecar) && + Objects.equals(checkpointMetadata, that.checkpointMetadata); } @Override public int hashCode() { - return Objects.hash(txn, add, remove, metaData, protocol, commitInfo, cdcEntry); + return Objects.hash(txn, add, remove, metaData, protocol, commitInfo, cdcEntry, sidecar, checkpointMetadata); } @Override public String toString() { - return String.format("DeltaLakeTransactionLogEntry{%s, %s, %s, %s, %s, %s, %s}", txn, add, remove, metaData, protocol, commitInfo, cdcEntry); + return String.format("DeltaLakeTransactionLogEntry{%s, %s, %s, %s, %s, %s, %s, %s, %s}", txn, add, remove, metaData, protocol, commitInfo, cdcEntry, sidecar, checkpointMetadata); } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/SidecarEntry.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/SidecarEntry.java new file mode 100644 index 000000000000..3e2a43519df9 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/SidecarEntry.java @@ -0,0 +1,34 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.transactionlog; + +import com.google.common.collect.ImmutableMap; + +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#sidecar-file-information +public record SidecarEntry(String path, long sizeInBytes, long modificationTime, Optional> tags) +{ + public SidecarEntry + { + checkArgument(sizeInBytes > 0, "sizeInBytes is not positive: %s", sizeInBytes); + requireNonNull(path, "path is null"); + requireNonNull(tags, "tags is null"); + tags = tags.map(ImmutableMap::copyOf); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java index 548ed5d7e652..84a3b6c6b010 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java @@ -14,7 +14,7 @@ package io.trino.plugin.deltalake.transactionlog; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Streams; +import com.google.common.collect.ImmutableSet; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; import io.trino.filesystem.TrinoInputFile; @@ -40,11 +40,15 @@ import java.util.stream.Stream; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Verify.verify; +import static com.google.common.collect.Streams.stream; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_FILESYSTEM_ERROR; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.readLastCheckpoint; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.ADD; +import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.SIDECAR; +import static io.trino.plugin.deltalake.transactionlog.checkpoint.TransactionLogTail.getEntriesFromJson; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -199,22 +203,19 @@ public Stream getCheckpointTransactionLogEntries( return getCheckpointPartPaths(checkpoint).stream() .map(fileSystem::newInputFile) - .flatMap(checkpointFile -> { - CheckpointEntryIterator checkpointEntryIterator = getCheckpointTransactionLogEntries( - session, - entryTypes, - metadataAndProtocol.map(MetadataAndProtocolEntry::metadataEntry), - metadataAndProtocol.map(MetadataAndProtocolEntry::protocolEntry), - checkpointSchemaManager, - typeManager, - stats, - checkpoint, - checkpointFile, - partitionConstraint, - addStatsMinMaxColumnFilter); - return Streams.stream(checkpointEntryIterator) - .onClose(checkpointEntryIterator::close); - }); + .flatMap(checkpointFile -> getCheckpointTransactionLogEntries( + session, + fileSystem, + entryTypes, + metadataAndProtocol.map(MetadataAndProtocolEntry::metadataEntry), + metadataAndProtocol.map(MetadataAndProtocolEntry::protocolEntry), + checkpointSchemaManager, + typeManager, + stats, + checkpoint, + checkpointFile, + partitionConstraint, + addStatsMinMaxColumnFilter)); } public Optional getLastCheckpointVersion() @@ -222,8 +223,9 @@ public Optional getLastCheckpointVersion() return lastCheckpoint.map(LastCheckpoint::version); } - private CheckpointEntryIterator getCheckpointTransactionLogEntries( + private Stream getCheckpointTransactionLogEntries( ConnectorSession session, + TrinoFileSystem fileSystem, Set entryTypes, Optional metadataEntry, Optional protocolEntry, @@ -245,7 +247,23 @@ private CheckpointEntryIterator getCheckpointTransactionLogEntries( catch (IOException e) { throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, format("Unexpected IO exception occurred while retrieving the length of the file: %s for the table %s", checkpoint, table), e); } - return new CheckpointEntryIterator( + if (checkpoint.v2Checkpoint().isPresent()) { + return getV2CheckpointTransactionLogEntriesFrom( + session, + entryTypes, + metadataEntry, + protocolEntry, + checkpointSchemaManager, + typeManager, + stats, + checkpoint, + checkpointFile, + partitionConstraint, + addStatsMinMaxColumnFilter, + fileSystem, + fileSize); + } + CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator( checkpointFile, session, fileSize, @@ -260,6 +278,96 @@ private CheckpointEntryIterator getCheckpointTransactionLogEntries( domainCompactionThreshold, partitionConstraint, addStatsMinMaxColumnFilter); + return stream(checkpointEntryIterator).onClose(checkpointEntryIterator::close); + } + + private Stream getV2CheckpointTransactionLogEntriesFrom( + ConnectorSession session, + Set entryTypes, + Optional metadataEntry, + Optional protocolEntry, + CheckpointSchemaManager checkpointSchemaManager, + TypeManager typeManager, + FileFormatDataSourceStats stats, + LastCheckpoint checkpoint, + TrinoInputFile checkpointFile, + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter, + TrinoFileSystem fileSystem, + long fileSize) + { + return getV2CheckpointEntries(session, entryTypes, metadataEntry, protocolEntry, checkpointSchemaManager, typeManager, stats, checkpoint, checkpointFile, partitionConstraint, addStatsMinMaxColumnFilter, fileSystem, fileSize) + .mapMulti((entry, builder) -> { + if (entry.getSidecar() == null) { + builder.accept(entry); + return; + } + Location sidecar = checkpointFile.location().sibling("_sidecars").appendPath(entry.getSidecar().path()); + CheckpointEntryIterator iterator = new CheckpointEntryIterator( + fileSystem.newInputFile(sidecar), + session, + fileSize, + checkpointSchemaManager, + typeManager, + entryTypes, + metadataEntry, + protocolEntry, + stats, + parquetReaderOptions, + checkpointRowStatisticsWritingEnabled, + domainCompactionThreshold, + partitionConstraint, + addStatsMinMaxColumnFilter); + stream(iterator).onClose(iterator::close).forEach(builder); + }); + } + + private Stream getV2CheckpointEntries( + ConnectorSession session, + Set entryTypes, + Optional metadataEntry, + Optional protocolEntry, + CheckpointSchemaManager checkpointSchemaManager, + TypeManager typeManager, + FileFormatDataSourceStats stats, + LastCheckpoint checkpoint, + TrinoInputFile checkpointFile, + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter, + TrinoFileSystem fileSystem, + long fileSize) + { + if (checkpointFile.location().fileName().endsWith(".json")) { + try { + return getEntriesFromJson(checkpoint.version(), checkpointFile).stream().flatMap(List::stream); + } + catch (IOException e) { + throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, format("Unexpected IO exception occurred while reading the entries of the file: %s for the table %s", checkpoint, table), e); + } + } + if (checkpointFile.location().fileName().endsWith(".parquet")) { + CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator( + fileSystem.newInputFile(checkpointFile.location()), + session, + fileSize, + checkpointSchemaManager, + typeManager, + ImmutableSet.builder() + .addAll(entryTypes) + .add(SIDECAR) + .build(), + metadataEntry, + protocolEntry, + stats, + parquetReaderOptions, + checkpointRowStatisticsWritingEnabled, + domainCompactionThreshold, + partitionConstraint, + addStatsMinMaxColumnFilter); + return stream(checkpointEntryIterator) + .onClose(checkpointEntryIterator::close); + } + throw new IllegalArgumentException("Unsupported v2 checkpoint file format: " + checkpointFile.location()); } public record MetadataAndProtocolEntry(MetadataEntry metadataEntry, ProtocolEntry protocolEntry) @@ -275,7 +383,11 @@ private List getCheckpointPartPaths(LastCheckpoint checkpoint) { Location transactionLogDir = Location.of(getTransactionLogDir(tableLocation)); ImmutableList.Builder paths = ImmutableList.builder(); - if (checkpoint.parts().isEmpty()) { + if (checkpoint.v2Checkpoint().isPresent()) { + verify(checkpoint.parts().isEmpty(), "v2 checkpoint should not have multi-part checkpoints"); + paths.add(transactionLogDir.appendPath(checkpoint.v2Checkpoint().get().path())); + } + else if (checkpoint.parts().isEmpty()) { paths.add(transactionLogDir.appendPath("%020d.checkpoint.parquet".formatted(checkpoint.version()))); } else { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/V2Checkpoint.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/V2Checkpoint.java new file mode 100644 index 000000000000..70b52c05fded --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/V2Checkpoint.java @@ -0,0 +1,24 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.transactionlog; + +import static java.util.Objects.requireNonNull; + +public record V2Checkpoint(String path) +{ + public V2Checkpoint + { + requireNonNull(path, "path is null"); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java index a1c9a8c452bf..a6481b4a9496 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java @@ -33,6 +33,7 @@ import io.trino.plugin.deltalake.transactionlog.MetadataEntry; import io.trino.plugin.deltalake.transactionlog.ProtocolEntry; import io.trino.plugin.deltalake.transactionlog.RemoveFileEntry; +import io.trino.plugin.deltalake.transactionlog.SidecarEntry; import io.trino.plugin.deltalake.transactionlog.TransactionEntry; import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeParquetFileStatistics; import io.trino.plugin.hive.FileFormatDataSourceStats; @@ -92,6 +93,7 @@ import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.METADATA; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.PROTOCOL; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.REMOVE; +import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.SIDECAR; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.TRANSACTION; import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static io.trino.spi.type.BigintType.BIGINT; @@ -117,7 +119,9 @@ public enum EntryType REMOVE("remove"), METADATA("metadata"), PROTOCOL("protocol"), - COMMIT("commitinfo"); + COMMIT("commitinfo"), + SIDECAR("sidecar"), + /**/; private final String columnName; @@ -152,6 +156,7 @@ public String getColumnName() private final Optional metadataType; private final Optional protocolType; private final Optional commitType; + private final Optional sidecarType; private MetadataEntry metadataEntry; private ProtocolEntry protocolEntry; @@ -244,6 +249,7 @@ public CheckpointEntryIterator( metadataType = getParquetType(fields, METADATA); protocolType = getParquetType(fields, PROTOCOL); commitType = getParquetType(fields, COMMIT); + sidecarType = getParquetType(fields, SIDECAR); } catch (Exception e) { try { @@ -303,6 +309,7 @@ private CheckpointFieldExtractor createCheckpointFieldExtractor(EntryType entryT case METADATA -> (session, pagePosition, blocks) -> buildMetadataEntry(session, pagePosition, blocks[0]); case PROTOCOL -> (session, pagePosition, blocks) -> buildProtocolEntry(session, pagePosition, blocks[0]); case COMMIT -> (session, pagePosition, blocks) -> buildCommitInfoEntry(session, pagePosition, blocks[0]); + case SIDECAR -> (session, pagePosition, blocks) -> buildSidecarEntry(session, pagePosition, blocks[0]); }; } @@ -320,6 +327,7 @@ private DeltaLakeColumnHandle buildColumnHandle( case METADATA -> schemaManager.getMetadataEntryType(); case PROTOCOL -> schemaManager.getProtocolEntryType(true, true); case COMMIT -> schemaManager.getCommitInfoEntryType(); + case SIDECAR -> schemaManager.getSidecarEntryType(); }; return new DeltaLakeColumnHandle(entryType.getColumnName(), type, OptionalInt.empty(), entryType.getColumnName(), type, REGULAR, Optional.empty()); } @@ -340,7 +348,7 @@ private TupleDomain buildTupleDomainColumnHandle(EntryType ent field = "version"; type = BIGINT; } - case ADD, REMOVE -> { + case ADD, REMOVE, SIDECAR -> { field = "path"; type = VARCHAR; } @@ -538,6 +546,29 @@ private DeltaLakeTransactionLogEntry buildRemoveEntry(ConnectorSession session, return DeltaLakeTransactionLogEntry.removeFileEntry(result); } + private DeltaLakeTransactionLogEntry buildSidecarEntry(ConnectorSession session, int pagePosition, Block block) + { + log.debug("Building sidecar entry from %s pagePosition %d", block, pagePosition); + if (block.isNull(pagePosition)) { + return null; + } + int sidecarFields = 4; + SqlRow sidecarEntryRow = getRow(block, pagePosition); + if (sidecarEntryRow.getFieldCount() != sidecarFields) { + throw new TrinoException( + DELTA_LAKE_INVALID_SCHEMA, + format("Expected block %s to have %d children, but found %s", block, sidecarFields, sidecarEntryRow.getFieldCount())); + } + RowType type = sidecarType.orElseThrow(); + CheckpointFieldReader sidecar = new CheckpointFieldReader(session, sidecarEntryRow, type); + SidecarEntry result = new SidecarEntry( + sidecar.getString("path"), + sidecar.getLong("sizeInBytes"), + sidecar.getLong("modificationTime"), + Optional.ofNullable(sidecar.getMap(stringMap, "tags"))); + return DeltaLakeTransactionLogEntry.sidecarEntry(result); + } + private class AddFileEntryExtractor implements CheckpointFieldExtractor { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java index bdeaa8727dff..0422ef4df9c8 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java @@ -64,6 +64,7 @@ public class CheckpointSchemaManager private final RowType metadataEntryType; private final RowType commitInfoEntryType; private final RowType removeEntryType; + private final RowType sidecarEntryType; private final ArrayType stringList; @Inject @@ -111,6 +112,13 @@ public CheckpointSchemaManager(TypeManager typeManager) RowType.field("partitionValues", stringMap), RowType.field("deletionTimestamp", BIGINT), RowType.field("dataChange", BOOLEAN))); + + sidecarEntryType = RowType.from(ImmutableList.builder() + .add(RowType.field("path", VARCHAR)) + .add(RowType.field("sizeInBytes", BIGINT)) + .add(RowType.field("modificationTime", BIGINT)) + .add(RowType.field("tags", stringMap)) + .build()); } public RowType getMetadataEntryType() @@ -236,4 +244,10 @@ public RowType getProtocolEntryType(boolean requireReaderFeatures, boolean requi public RowType getCommitInfoEntryType() { return commitInfoEntryType; - }} + } + + public RowType getSidecarEntryType() + { + return sidecarEntryType; + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java index 19bf8cf622af..663349d12578 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java @@ -158,7 +158,7 @@ public void writeCheckpoint(ConnectorSession session, TableSnapshot snapshot) checkpointWriter.write(checkpointEntries, checkpointFile); // update last checkpoint file - LastCheckpoint newLastCheckpoint = new LastCheckpoint(newCheckpointVersion, checkpointEntries.size(), Optional.empty()); + LastCheckpoint newLastCheckpoint = new LastCheckpoint(newCheckpointVersion, checkpointEntries.size(), Optional.empty(), Optional.empty()); Location checkpointPath = transactionLogDir.appendPath(LAST_CHECKPOINT_FILENAME); TrinoOutputFile outputFile = fileSystem.newOutputFile(checkpointPath); outputFile.createOrOverwrite(lastCheckpointCodec.toJsonBytes(newLastCheckpoint)); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/LastCheckpoint.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/LastCheckpoint.java index 3754e17692db..aa7bc85bd99d 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/LastCheckpoint.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/LastCheckpoint.java @@ -13,14 +13,17 @@ */ package io.trino.plugin.deltalake.transactionlog.checkpoint; +import io.trino.plugin.deltalake.transactionlog.V2Checkpoint; + import java.util.Optional; import static java.util.Objects.requireNonNull; -public record LastCheckpoint(long version, long size, Optional parts) +public record LastCheckpoint(long version, long size, Optional parts, Optional v2Checkpoint) { public LastCheckpoint { requireNonNull(parts, "parts is null"); + requireNonNull(v2Checkpoint, "v2Checkpoint is null"); } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java index d43fbdf6349a..b48b21506b7d 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java @@ -266,6 +266,72 @@ public void testChangeDataFileOperations() .build()); } + @Test + public void testReadV2CheckpointJson() + { + registerTable("v2_checkpoint_json", "deltalake/v2_checkpoint_json"); + assertUpdate("CALL system.flush_metadata_cache(schema_name => CURRENT_SCHEMA, table_name => 'v2_checkpoint_json')"); + assertFileSystemAccesses( + "SELECT * FROM v2_checkpoint_json", + ImmutableMultiset.builder() + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", 0, 765), 2) + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 0, 9176), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1224, 143)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 4, 727)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1994, 188)) + .add(new CacheOperation("Alluxio.writeCache", "data", 0, 666)) + .add(new CacheOperation("Alluxio.readCached", "data", 0, 666)) + .add(new CacheOperation("Alluxio.readExternal", "data", 0, 666)) + .build()); + assertFileSystemAccesses( + "SELECT * FROM v2_checkpoint_json", + ImmutableMultiset.builder() + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", 0, 765), 2) + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 0, 9176), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1224, 143)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 4, 727)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1994, 188)) + .add(new CacheOperation("Alluxio.readCached", "data", 0, 666)) + .build()); + } + + @Test + public void testReadV2CheckpointParquet() + { + registerTable("v2_checkpoint_parquet", "deltalake/v2_checkpoint_parquet"); + assertUpdate("CALL system.flush_metadata_cache(schema_name => CURRENT_SCHEMA, table_name => 'v2_checkpoint_parquet')"); + assertFileSystemAccesses( + "SELECT * FROM v2_checkpoint_parquet", + ImmutableMultiset.builder() + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 0, 19019), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 100, 2470)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 1304, 1266)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3155, 87)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3829, 143)) + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 0, 9415), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 4, 758)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 1255, 143)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 2040, 199)) + .add(new CacheOperation("Alluxio.writeCache", "data", 0, 666)) + .add(new CacheOperation("Alluxio.readCached", "data", 0, 666)) + .add(new CacheOperation("Alluxio.readExternal", "data", 0, 666)) + .build()); + assertFileSystemAccesses( + "SELECT * FROM v2_checkpoint_parquet", + ImmutableMultiset.builder() + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 0, 19019), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 100, 2470)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 1304, 1266)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3155, 87)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3829, 143)) + .addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 0, 9415), 2) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 4, 758)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 1255, 143)) + .add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 2040, 199)) + .add(new CacheOperation("Alluxio.readCached", "data", 0, 666)) + .build()); + } + private void assertFileSystemAccesses(@Language("SQL") String query, Multiset expectedCacheAccesses) { assertUpdate("CALL system.flush_metadata_cache()"); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 4f2f2a055ed7..04a2e614fce2 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -1323,6 +1323,53 @@ private void assertPartitionValuesParsedCondition(String tableName, int id, @Lan .matches("VALUES " + id); } + @Test + public void testReadV2Checkpoint() + throws Exception + { + testReadV2Checkpoint("deltalake/v2_checkpoint_json"); + testReadV2Checkpoint("deltalake/v2_checkpoint_parquet"); + testReadV2Checkpoint("databricks133/v2_checkpoint_json"); + testReadV2Checkpoint("databricks133/v2_checkpoint_parquet"); + } + + private void testReadV2Checkpoint(String resourceName) + throws Exception + { + String tableName = "test_v2_checkpoint_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + Path source = new File(Resources.getResource(resourceName).toURI()).toPath(); + copyDirectoryContents(source, tableLocation); + assertThat(source.resolve("_delta_log/_last_checkpoint")) + .content().contains("v2Checkpoint").contains("sidecar"); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + assertThat(query("DESCRIBE " + tableName)) + .result() + .projected("Column", "Type") + .skippingTypesCheck() + .matches("VALUES ('a', 'integer'), ('b', 'integer')"); + assertQuery("SELECT * FROM " + tableName, "VALUES (1, 2)"); + + // Write-operations should fail + assertQueryFails( + "INSERT INTO " + tableName + " VALUES (3, 4)", + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQueryFails( + "UPDATE " + tableName + " SET a = 10", + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQueryFails( + "DELETE FROM " + tableName, + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQueryFails( + "TRUNCATE TABLE " + tableName, + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQueryFails( + "MERGE INTO " + tableName + " USING (VALUES 42) t(dummy) ON false WHEN NOT MATCHED THEN INSERT VALUES (3, 4)", + "\\QUnsupported writer features: [v2Checkpoint]"); + assertQuery("SELECT * FROM " + tableName, "VALUES (1, 2)"); + } + private static MetadataEntry loadMetadataEntry(long entryNumber, Path tableLocation) throws IOException { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java index 56b20ac6e613..55d3c7239c84 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java @@ -731,6 +731,52 @@ public void testReadMultipartCheckpoint() .build()); } + @Test + public void testV2CheckpointJson() + throws Exception + { + String tableName = "test_v2_checkpoint_json_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + copyDirectoryContents(new File(Resources.getResource("deltalake/v2_checkpoint_json").toURI()).toPath(), tableLocation); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + assertFileSystemAccesses("SELECT * FROM " + tableName, + ImmutableMultiset.builder() + .add(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream")) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", "InputFile.length"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", "InputFile.newStream"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", "InputFile.length"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", "InputFile.newInput"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .add(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream")) // TransactionLogTail.getEntriesFromJson access non-existing file as end of tail + .add(new FileOperation(DATA, "no partition", "InputFile.newInput")) + .build()); + + assertUpdate("DROP TABLE " + tableName); + } + + @Test + public void testV2CheckpointParquet() + throws Exception + { + String tableName = "test_v2_checkpoint_parquet_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + copyDirectoryContents(new File(Resources.getResource("deltalake/v2_checkpoint_parquet").toURI()).toPath(), tableLocation); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + assertFileSystemAccesses("SELECT * FROM " + tableName, + ImmutableMultiset.builder() + .add(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream")) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", "InputFile.length"), 4) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", "InputFile.newInput"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", "InputFile.length"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", "InputFile.newInput"), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .add(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream")) // TransactionLogTail.getEntriesFromJson access non-existing file as end of tail + .add(new FileOperation(DATA, "no partition", "InputFile.newInput")) + .build()); + + assertUpdate("DROP TABLE " + tableName); + } + private int countCdfFilesForKey(String partitionValue) { String path = (String) computeScalar("SELECT \"$path\" FROM table_changes_file_system_access WHERE key = '" + partitionValue + "'"); @@ -774,7 +820,7 @@ public static FileOperation create(String path, String operationType) if (path.matches(".*/_delta_log/_last_checkpoint")) { return new FileOperation(LAST_CHECKPOINT, fileName, operationType); } - if (path.matches(".*/_delta_log/\\d+\\.checkpoint(\\.\\d+\\.\\d+)?\\.parquet")) { + if (path.matches(".*/_delta_log/.*.checkpoint.*")) { return new FileOperation(CHECKPOINT, fileName, operationType); } if (path.matches(".*/_delta_log/\\d+\\.json")) { diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/README.md b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/README.md new file mode 100644 index 000000000000..8e56e908a5cf --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/README.md @@ -0,0 +1,11 @@ +Data generated using Databricks 13.3: + +```sql +CREATE TABLE test_v2_checkpoint_json +(a INT, b INT) +USING delta +LOCATION ? +TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1'); + +INSERT INTO test_v2_checkpoint_json VALUES (1, 2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..1ff67d3dd29d --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1708995805151,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"1\"}","statsOnLoad":false},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0830-081135-p4ddj2po","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"fefea473-91a9-4c85-9f52-8fea8e3ac6ff"}} +{"metaData":{"id":"460c25e0-e22c-4e5c-8d79-0c3205dfcc71","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995803969}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json new file mode 100644 index 000000000000..7372736e50ca --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json @@ -0,0 +1,4 @@ +{"checkpointMetadata":{"version":1,"tags":{"sidecarNumActions":"1","sidecarSizeInBytes":"12007","numOfAddFiles":"1","sidecarFileSchema":"{\"type\":\"struct\",\"fields\":[{\"name\":\"add\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modificationTime\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"tags\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clusteringProvider\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats_parsed\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"numRecords\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"minValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"maxValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nullCount\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"remove\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionTimestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"extendedFileMetadata\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}"}}} +{"sidecar":{"path":"00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet","sizeInBytes":12007,"modificationTime":1708995845000}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} +{"metaData":{"id":"460c25e0-e22c-4e5c-8d79-0c3205dfcc71","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995803969}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..2ed5dbf9fdd6 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1708995837635,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0830-081135-p4ddj2po","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"788"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"700f9c0d-5e49-47a0-b590-de31699f3604"}} +{"add":{"path":"part-00000-946b010f-db20-4fde-860e-a0d90082de75-c000.snappy.parquet","partitionValues":{},"size":788,"modificationTime":1708995838000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a\":1,\"b\":2},\"maxValues\":{\"a\":1,\"b\":2},\"nullCount\":{\"a\":0,\"b\":0}}","tags":{"INSERTION_TIME":"1708995838000000","MIN_INSERTION_TIME":"1708995838000000","MAX_INSERTION_TIME":"1708995838000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..b40f0f66f774 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":1,"size":5,"sizeInBytes":16914,"numOfAddFiles":1,"v2Checkpoint":{"path":"00000000000000000001.checkpoint.ae4ea00c-afe0-4e8b-ad87-28125262bd44.json","sizeInBytes":4907,"modificationTime":1708995845000,"nonFileActions":[{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}},{"metaData":{"id":"460c25e0-e22c-4e5c-8d79-0c3205dfcc71","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995803969}},{"checkpointMetadata":{"version":1,"tags":{"sidecarNumActions":"1","sidecarSizeInBytes":"12007","numOfAddFiles":"1","sidecarFileSchema":"{\"type\":\"struct\",\"fields\":[{\"name\":\"add\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modificationTime\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"tags\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clusteringProvider\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats_parsed\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"numRecords\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"minValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"maxValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nullCount\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"remove\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionTimestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"extendedFileMetadata\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}"}}}],"sidecarFiles":[{"path":"00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet","sizeInBytes":12007,"modificationTime":1708995845000}]},"checksum":"26ed24a15897b93a27541c56e784b421"} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet new file mode 100644 index 000000000000..e02ce0972d27 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.59e66709-d0ca-402d-9375-0e3c4fcf8d73.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/part-00000-946b010f-db20-4fde-860e-a0d90082de75-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/part-00000-946b010f-db20-4fde-860e-a0d90082de75-c000.snappy.parquet new file mode 100644 index 000000000000..56c4e214ada7 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_json/part-00000-946b010f-db20-4fde-860e-a0d90082de75-c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/README.md b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/README.md new file mode 100644 index 000000000000..209b0a013e8f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/README.md @@ -0,0 +1,13 @@ +Data generated using Databricks 13.3: + +```sql +SET spark.databricks.delta.checkpointV2.topLevelFileFormat = parquet; + +CREATE TABLE test_v2_checkpoint_parquet +(a INT, b INT) +USING delta +LOCATION ? +TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1'); + +INSERT INTO test_v2_checkpoint_parquet VALUES (1, 2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..19b4bd4ba32d --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1708995910270,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"1\"}","statsOnLoad":false},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0830-081135-p4ddj2po","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"6d516ff6-63ea-4da3-8a3f-7ca80eb33a40"}} +{"metaData":{"id":"c6274bfd-507a-4515-ac7f-37c482d06d83","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995909994}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.b6785ba8-b035-4760-b814-1f115ccb6167.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.b6785ba8-b035-4760-b814-1f115ccb6167.parquet new file mode 100644 index 000000000000..ece6e2cbb0b8 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.b6785ba8-b035-4760-b814-1f115ccb6167.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..ef0efd984f5c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1708995916065,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0830-081135-p4ddj2po","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"788"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"d1af6b88-f6a9-4a40-8f96-efd0ebf4c2a5"}} +{"add":{"path":"part-00000-15e6800a-1995-4089-8212-a563edf37f5b-c000.snappy.parquet","partitionValues":{},"size":788,"modificationTime":1708995916000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a\":1,\"b\":2},\"maxValues\":{\"a\":1,\"b\":2},\"nullCount\":{\"a\":0,\"b\":0}}","tags":{"INSERTION_TIME":"1708995916000000","MIN_INSERTION_TIME":"1708995916000000","MAX_INSERTION_TIME":"1708995916000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..c93903d31a50 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":1,"size":5,"sizeInBytes":35280,"numOfAddFiles":1,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"checkpointMetadata","type":{"type":"struct","fields":[{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"sidecar","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"v2Checkpoint":{"path":"00000000000000000001.checkpoint.b6785ba8-b035-4760-b814-1f115ccb6167.parquet","sizeInBytes":23272,"modificationTime":1708995923000,"nonFileActions":[{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}},{"metaData":{"id":"c6274bfd-507a-4515-ac7f-37c482d06d83","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708995909994}},{"checkpointMetadata":{"version":1,"tags":{"sidecarNumActions":"1","sidecarSizeInBytes":"12008","numOfAddFiles":"1","sidecarFileSchema":"{\"type\":\"struct\",\"fields\":[{\"name\":\"add\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modificationTime\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"tags\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clusteringProvider\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"stats_parsed\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"numRecords\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"minValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"maxValues\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nullCount\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"remove\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionTimestamp\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"dataChange\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"extendedFileMetadata\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"partitionValues\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"size\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"deletionVector\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"storageType\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"pathOrInlineDv\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"offset\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sizeInBytes\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cardinality\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"maxRowIndex\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"baseRowId\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"defaultRowCommitVersion\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}"}}}],"sidecarFiles":[{"path":"00000000000000000001.checkpoint.0000000001.0000000001.25992ab9-b145-4179-b0ee-2ecef614db23.parquet","sizeInBytes":12008,"modificationTime":1708995922000}]},"checksum":"7108678b561724d7520ae909ecdf1c40"} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.25992ab9-b145-4179-b0ee-2ecef614db23.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.25992ab9-b145-4179-b0ee-2ecef614db23.parquet new file mode 100644 index 000000000000..dff3e99b1d63 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.25992ab9-b145-4179-b0ee-2ecef614db23.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/part-00000-15e6800a-1995-4089-8212-a563edf37f5b-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/part-00000-15e6800a-1995-4089-8212-a563edf37f5b-c000.snappy.parquet new file mode 100644 index 000000000000..56c4e214ada7 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/databricks133/v2_checkpoint_parquet/part-00000-15e6800a-1995-4089-8212-a563edf37f5b-c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/README.md new file mode 100644 index 000000000000..d0a8eccace27 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/README.md @@ -0,0 +1,11 @@ +Data generated using Delta Lake 3.0.0: + +```sql +CREATE TABLE test_v2_checkpoint_json +(a INT, b INT) +USING delta +LOCATION ? +TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1'); + +INSERT INTO test_v2_checkpoint_json VALUES (1, 2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..2edfc7c94a46 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1708994964212,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"c20c5431-d81d-4dfc-8550-9a6d9560c57d"}} +{"metaData":{"id":"9a6e4cf5-bb0f-4d34-b948-b16e9c7a35e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708994964035}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json new file mode 100644 index 000000000000..292073f6465e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json @@ -0,0 +1,4 @@ +{"checkpointMetadata":{"version":1}} +{"sidecar":{"path":"00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet","sizeInBytes":9176,"modificationTime":1708994974000}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} +{"metaData":{"id":"9a6e4cf5-bb0f-4d34-b948-b16e9c7a35e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708994964035}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..f2cdb13da87a --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1708994972848,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"666"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"09f1521a-abb3-4bf1-af00-2e336d2957eb"}} +{"add":{"path":"part-00000-fad997ee-643c-4ce6-b554-3e1d596857ba-c000.snappy.parquet","partitionValues":{},"size":666,"modificationTime":1708994972000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a\":1,\"b\":2},\"maxValues\":{\"a\":1,\"b\":2},\"nullCount\":{\"a\":0,\"b\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..9c62a0227c93 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":1,"size":5,"sizeInBytes":9941,"numOfAddFiles":1,"v2Checkpoint":{"path":"00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json","sizeInBytes":765,"modificationTime":1708994974000,"nonFileActions":[{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}},{"metaData":{"id":"9a6e4cf5-bb0f-4d34-b948-b16e9c7a35e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1708994964035}},{"checkpointMetadata":{"version":1}}],"sidecarFiles":[{"path":"00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet","sizeInBytes":9176,"modificationTime":1708994974000}]},"checksum":"dcee9d7788d3f4e92f932cf56e554b93"} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet new file mode 100644 index 000000000000..db1fd2f3d889 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/part-00000-fad997ee-643c-4ce6-b554-3e1d596857ba-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/part-00000-fad997ee-643c-4ce6-b554-3e1d596857ba-c000.snappy.parquet new file mode 100644 index 000000000000..d076e248c289 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_json/part-00000-fad997ee-643c-4ce6-b554-3e1d596857ba-c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/README.md new file mode 100644 index 000000000000..229e066f0dd0 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/README.md @@ -0,0 +1,13 @@ +Data generated using Delta Lake 3.0.0: + +```sql +SET spark.databricks.delta.checkpointV2.topLevelFileFormat = parquet; + +CREATE TABLE test_v2_checkpoint_parquet +(a INT, b INT) +USING delta +LOCATION ? +TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1'); + +INSERT INTO test_v2_checkpoint_parquet VALUES (1, 2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..4562856cccf7 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1709286514469,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpointPolicy\":\"v2\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.1.0","txnId":"a1e3581d-f04a-4da7-8d04-2b1f0a115178"}} +{"metaData":{"id":"8e13e838-c2df-4732-af24-8713936697b5","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1709286514437}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet new file mode 100644 index 000000000000..0a152b66c66a Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..922ceece4d3b --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1709286516064,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"666"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.1.0","txnId":"ec4f8114-f44d-432b-a712-179cf8bda44f"}} +{"add":{"path":"part-00000-77e34410-74c8-4e07-8769-345baa22e4b3-c000.snappy.parquet","partitionValues":{},"size":666,"modificationTime":1709286516000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a\":1,\"b\":2},\"maxValues\":{\"a\":1,\"b\":2},\"nullCount\":{\"a\":0,\"b\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..35c4727b6f3c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":1,"size":5,"sizeInBytes":28434,"numOfAddFiles":1,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"checkpointMetadata","type":{"type":"struct","fields":[{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"sidecar","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"v2Checkpoint":{"path":"00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet","sizeInBytes":19019,"modificationTime":1709286517000,"nonFileActions":[{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["v2Checkpoint"],"writerFeatures":["v2Checkpoint"]}},{"metaData":{"id":"8e13e838-c2df-4732-af24-8713936697b5","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointPolicy":"v2","delta.checkpointInterval":"1"},"createdTime":1709286514437}},{"checkpointMetadata":{"version":1}}],"sidecarFiles":[{"path":"00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet","sizeInBytes":9415,"modificationTime":1709286517000}]},"checksum":"9d9fbc4697cd4da83d3eb5fca85aa5cc"} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet new file mode 100644 index 000000000000..d969343ee057 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/_delta_log/_sidecars/00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/part-00000-77e34410-74c8-4e07-8769-345baa22e4b3-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/part-00000-77e34410-74c8-4e07-8769-345baa22e4b3-c000.snappy.parquet new file mode 100644 index 000000000000..5006c66afbcf Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/v2_checkpoint_parquet/part-00000-77e34410-74c8-4e07-8769-345baa22e4b3-c000.snappy.parquet differ diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeCheckpointsCompatibility.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeCheckpointsCompatibility.java index 3796fae2ac0e..ea8a6bd1753e 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeCheckpointsCompatibility.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeCheckpointsCompatibility.java @@ -679,6 +679,43 @@ private void testWriteStatsAsStructEnabled(Consumer sqlExecutor, String } } + @Test(groups = {DELTA_LAKE_OSS, PROFILE_SPECIFIC_TESTS}) + public void testV2CheckpointMultipleSidecars() + { + testV2CheckpointMultipleSidecars("json"); + testV2CheckpointMultipleSidecars("parquet"); + } + + private void testV2CheckpointMultipleSidecars(String format) + { + String tableName = "test_dl_v2_checkpoint_multiple_sidecars_" + randomNameSuffix(); + String tableDirectory = "delta-compatibility-test-" + tableName; + + onDelta().executeQuery("SET spark.databricks.delta.checkpointV2.topLevelFileFormat = " + format); + onDelta().executeQuery("SET spark.databricks.delta.checkpoint.partSize = 1"); + + onDelta().executeQuery("CREATE TABLE default." + tableName + + "(id INT, part STRING)" + + "USING delta " + + "PARTITIONED BY (part)" + + "LOCATION 's3://" + bucketName + "/" + tableDirectory + "'" + + "TBLPROPERTIES ('delta.checkpointPolicy' = 'v2', 'delta.checkpointInterval' = '1')"); + try { + onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (1, 'part1'), (2, 'part2')"); + + List expectedRows = ImmutableList.of(row(1, "part1"), row(2, "part2")); + assertThat(onDelta().executeQuery("SELECT * FROM default." + tableName)) + .containsOnly(expectedRows); + assertThat(onTrino().executeQuery("SELECT * FROM delta.default." + tableName)) + .containsOnly(expectedRows); + + assertThat(listSidecarFiles(bucketName, tableDirectory)).hasSize(2); + } + finally { + dropDeltaTableWithRetry("default." + tableName); + } + } + private void fillWithInserts(String tableName, String values, int toCreate) { for (int i = 0; i < toCreate; i++) { @@ -694,6 +731,11 @@ private List listCheckpointFiles(String bucketName, String tableDirector .collect(toImmutableList()); } + private List listSidecarFiles(String bucketName, String tableDirectory) + { + return listS3Directory(bucketName, tableDirectory + "/_delta_log/_sidecars"); + } + private List listS3Directory(String bucketName, String directory) { ImmutableList.Builder result = ImmutableList.builder();