From a9480bd1e5fbdad87b62f33c4d9323a021874963 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Tue, 27 Sep 2022 18:57:23 +0900 Subject: [PATCH] Support converting column stats on row type to json in Delta Lake --- .../DeltaLakeParquetStatisticsUtils.java | 52 +++++++++++++++- .../checkpoint/CheckpointWriter.java | 3 +- .../BaseDeltaLakeConnectorSmokeTest.java | 58 ++++++++++++++++++ .../json_stats_on_row_type/README.md | 21 +++++++ .../00000000000000000002.checkpoint.parquet | Bin 0 -> 15470 bytes .../_delta_log/00000000000000000002.json | 2 + .../_delta_log/00000000000000000003.json | 2 + .../_delta_log/_last_checkpoint | 1 + ...4926-a939-dc6a01571d9f-c000.snappy.parquet | Bin 0 -> 1181 bytes ...4af2-a37f-68a39a1e2a5d-c000.snappy.parquet | Bin 0 -> 1074 bytes 10 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/README.md create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000002.checkpoint.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000002.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000003.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/_last_checkpoint create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/part-00000-4137b6c1-34fd-4926-a939-dc6a01571d9f-c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/part-00000-df481541-fe59-4af2-a37f-68a39a1e2a5d-c000.snappy.parquet diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeParquetStatisticsUtils.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeParquetStatisticsUtils.java index 74f0d7526d23..7a294e9b7926 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeParquetStatisticsUtils.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeParquetStatisticsUtils.java @@ -13,10 +13,13 @@ */ package io.trino.plugin.deltalake.transactionlog; +import com.google.common.collect.ImmutableMap; import io.airlift.log.Logger; import io.airlift.slice.Slice; import io.trino.plugin.base.type.DecodedTimestamp; +import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; +import io.trino.spi.block.ColumnarRow; import io.trino.spi.block.RowBlockBuilder; import io.trino.spi.type.ArrayType; import io.trino.spi.type.DateType; @@ -56,6 +59,7 @@ import static com.google.common.collect.ImmutableMap.toImmutableMap; import static io.airlift.slice.Slices.utf8Slice; import static io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp; +import static io.trino.spi.block.ColumnarRow.toColumnarRow; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.DateTimeEncoding.unpackMillisUtc; @@ -67,6 +71,7 @@ import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND; import static io.trino.spi.type.TinyintType.TINYINT; +import static io.trino.spi.type.TypeUtils.readNativeValue; import static io.trino.spi.type.TypeUtils.writeNativeValue; import static java.lang.Float.floatToRawIntBits; import static java.lang.Float.intBitsToFloat; @@ -76,6 +81,7 @@ import static java.time.format.DateTimeFormatter.ISO_INSTANT; import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE; import static java.time.temporal.ChronoUnit.MILLIS; +import static java.util.Objects.requireNonNull; public class DeltaLakeParquetStatisticsUtils { @@ -154,8 +160,7 @@ public static Map toJsonValues(Map columnTypeMappi Map jsonValues = new HashMap<>(); for (Map.Entry value : values.entrySet()) { Type type = columnTypeMapping.get(value.getKey()); - // TODO: Add support for row type - if (type instanceof ArrayType || type instanceof MapType || type instanceof RowType) { + if (type instanceof ArrayType || type instanceof MapType) { continue; } jsonValues.put(value.getKey(), toJsonValue(columnTypeMapping.get(value.getKey()), value.getValue())); @@ -197,6 +202,19 @@ private static Object toJsonValue(Type type, @Nullable Object value) Instant ts = Instant.ofEpochMilli(unpackMillisUtc((long) value)); return ISO_INSTANT.format(ZonedDateTime.ofInstant(ts, UTC)); } + if (type instanceof RowType rowType) { + Block rowBlock = (Block) value; + ImmutableMap.Builder fieldValues = ImmutableMap.builder(); + for (int i = 0; i < rowBlock.getPositionCount(); i++) { + RowType.Field field = rowType.getFields().get(i); + Object fieldValue = readNativeValue(field.getType(), rowBlock.getChildren().get(i), i); + Object jsonValue = toJsonValue(field.getType(), fieldValue); + if (jsonValue != null) { + fieldValues.put(field.getName().orElseThrow(), jsonValue); + } + } + return fieldValues.buildOrThrow(); + } throw new UnsupportedOperationException("Unsupported type: " + type); } @@ -222,6 +240,36 @@ private static Map jsonEncode(Map .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().get())); } + public static Map toNullCounts(Map columnTypeMapping, Map values) + { + ImmutableMap.Builder nullCounts = ImmutableMap.builderWithExpectedSize(values.size()); + for (Map.Entry value : values.entrySet()) { + Type type = columnTypeMapping.get(value.getKey()); + requireNonNull(type, "type is null"); + nullCounts.put(value.getKey(), toNullCount(type, value.getValue())); + } + return nullCounts.buildOrThrow(); + } + + private static Object toNullCount(Type type, Object value) + { + if (type instanceof RowType rowType) { + ColumnarRow row = toColumnarRow((Block) value); + ImmutableMap.Builder nullCounts = ImmutableMap.builderWithExpectedSize(row.getPositionCount()); + for (int i = 0; i < row.getPositionCount(); i++) { + RowType.Field field = rowType.getFields().get(i); + if (field.getType() instanceof RowType) { + nullCounts.put(field.getName().orElseThrow(), toNullCount(field.getType(), row.getField(i))); + } + else { + nullCounts.put(field.getName().orElseThrow(), BIGINT.getLong(row.getField(i), 0)); + } + } + return nullCounts.buildOrThrow(); + } + return value; + } + private static Optional getMin(Type type, Statistics statistics) { if (statistics.genericGetMin() == null || !statistics.hasNonNullValue()) { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java index dbaf9392ba4d..0bd25679d9c9 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java @@ -58,6 +58,7 @@ import static io.trino.plugin.deltalake.DeltaLakeSchemaProperties.buildHiveSchema; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeParquetStatisticsUtils.jsonValueToTrinoValue; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeParquetStatisticsUtils.toJsonValues; +import static io.trino.plugin.deltalake.transactionlog.DeltaLakeParquetStatisticsUtils.toNullCounts; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson; import static io.trino.plugin.deltalake.transactionlog.MetadataEntry.DELTA_CHECKPOINT_WRITE_STATS_AS_JSON_PROPERTY; @@ -244,7 +245,7 @@ private void writeJsonStats(BlockBuilder entryBlockBuilder, RowType entryType, A parquetFileStatistics.getNumRecords(), parquetFileStatistics.getMinValues().map(values -> toJsonValues(columnTypeMapping, values)), parquetFileStatistics.getMaxValues().map(values -> toJsonValues(columnTypeMapping, values)), - parquetFileStatistics.getNullCount()); + parquetFileStatistics.getNullCount().map(nullCounts -> toNullCounts(columnTypeMapping, nullCounts))); statsJson = getStatsString(jsonFileStatistics).orElse(null); } else { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeConnectorSmokeTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeConnectorSmokeTest.java index 2e7433f9f2b5..eb9f80cb8f6c 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeConnectorSmokeTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseDeltaLakeConnectorSmokeTest.java @@ -22,6 +22,7 @@ import io.trino.Session; import io.trino.execution.QueryManager; import io.trino.operator.OperatorStats; +import io.trino.plugin.deltalake.transactionlog.AddFileEntry; import io.trino.plugin.hive.TestingHivePlugin; import io.trino.plugin.hive.containers.HiveHadoop; import io.trino.plugin.hive.containers.HiveMinioDataLake; @@ -40,11 +41,13 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; +import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.MoreCollectors.onlyElement; import static com.google.common.collect.Sets.union; @@ -65,6 +68,7 @@ import static io.trino.tpch.TpchTable.LINE_ITEM; import static io.trino.tpch.TpchTable.ORDERS; import static java.lang.String.format; +import static java.util.Comparator.comparing; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.assertj.core.api.Assertions.assertThat; @@ -93,6 +97,7 @@ public abstract class BaseDeltaLakeConnectorSmokeTest "old_timestamps", "nested_timestamps", "nested_timestamps_parquet_stats", + "json_stats_on_row_type", "parquet_stats_missing", "uppercase_columns", "default_partitions", @@ -831,6 +836,59 @@ public void testSelectNestedTimestamps() assertQuery("SELECT CAST(col1[1].ts AS VARCHAR) FROM nested_timestamps_parquet_stats LIMIT 1", "VALUES '2010-02-03 12:11:10.000 UTC'"); } + @Test + public void testConvertJsonStatisticsToParquetOnRowType() + throws Exception + { + verifySupportsInsert(); + + assertQuery("SELECT count(*) FROM json_stats_on_row_type", "VALUES 2"); + String transactionLogDirectory = "json_stats_on_row_type/_delta_log"; + String newTransactionFile = getLocationForTable(bucketName, "json_stats_on_row_type") + "/_delta_log/00000000000000000004.json"; + String newCheckpointFile = getLocationForTable(bucketName, "json_stats_on_row_type") + "/_delta_log/00000000000000000004.checkpoint.parquet"; + assertThat(getTableFiles(transactionLogDirectory)) + .doesNotContain(newTransactionFile, newCheckpointFile); + + assertUpdate("INSERT INTO json_stats_on_row_type SELECT CAST(row(3) AS row(x bigint)), CAST(row(row('test insert')) AS row(y row(nested varchar)))", 1); + assertThat(getTableFiles(transactionLogDirectory)) + .contains(newTransactionFile, newCheckpointFile); + assertThat(getAddFileEntries("json_stats_on_row_type")).hasSize(3); + + // The first two entries created by Databricks have column stats. The last one doesn't have column stats because the connector doesn't support collecting it on row columns. + List addFileEntries = getAddFileEntries("json_stats_on_row_type").stream().sorted(comparing(AddFileEntry::getModificationTime)).collect(toImmutableList()); + assertThat(addFileEntries).hasSize(3); + assertJsonStatistics( + addFileEntries.get(0), + "{" + + "\"numRecords\":1," + + "\"minValues\":{\"nested_struct_col\":{\"y\":{\"nested\":\"test\"}},\"struct_col\":{\"x\":1}}," + + "\"maxValues\":{\"nested_struct_col\":{\"y\":{\"nested\":\"test\"}},\"struct_col\":{\"x\":1}}," + + "\"nullCount\":{\"struct_col\":{\"x\":0},\"nested_struct_col\":{\"y\":{\"nested\":0}}}" + + "}"); + assertJsonStatistics( + addFileEntries.get(1), + "{" + + "\"numRecords\":1," + + "\"minValues\":{\"nested_struct_col\":{\"y\":{}},\"struct_col\":{}}," + + "\"maxValues\":{\"nested_struct_col\":{\"y\":{}},\"struct_col\":{}}," + + "\"nullCount\":{\"struct_col\":{\"x\":1},\"nested_struct_col\":{\"y\":{\"nested\":1}}}" + + "}"); + assertJsonStatistics( + addFileEntries.get(2), + "{\"numRecords\":1,\"minValues\":{},\"maxValues\":{},\"nullCount\":{}}"); + } + + private List getAddFileEntries(String tableName) + throws IOException + { + return TestingDeltaLakeUtils.getAddFileEntries(getLocationForTable(bucketName, tableName)); + } + + private void assertJsonStatistics(AddFileEntry addFileEntry, @Language("JSON") String jsonStatistics) + { + assertEquals(addFileEntry.getStatsString().orElseThrow(), jsonStatistics); + } + @Test public void testMissingParquetStats() { diff --git a/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/README.md b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/README.md new file mode 100644 index 000000000000..71797d6728d1 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/README.md @@ -0,0 +1,21 @@ +Data generated using Databricks 10.4: + +```sql +CREATE TABLE default.json_stats_on_row_type + (struct_col struct, nested_struct_col struct>) +USING DELTA +LOCATION 's3://bucket/table' +TBLPROPERTIES ( + delta.checkpointInterval = 2, + delta.checkpoint.writeStatsAsJson = false, + delta.checkpoint.writeStatsAsStruct = true +); + +INSERT INTO default.json_stats_on_row_type SELECT named_struct('x', 1), named_struct('y', named_struct('nested', 'test')); +INSERT INTO default.json_stats_on_row_type SELECT named_struct('x', NULL), named_struct('y', named_struct('nested', NULL)); + +ALTER TABLE default.json_stats_on_row_type SET TBLPROPERTIES ( + 'delta.checkpoint.writeStatsAsJson' = true, + 'delta.checkpoint.writeStatsAsStruct' = false +); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000002.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a505133457e92b797b76562936da9bae4b73ad44 GIT binary patch literal 15470 zcmeHO4Qx}_6@Jfg9EUg|3AvBwkwOT;P21RY5<5RAny7#^L6oA9GSro(FZOfraO~7} zQYd0V?G!P@B9{IP720MKWejDs+PaQ0)Kv(jC>n@XZC7oKW&N3Go3WNvbfSpuocrFp z@5OdvJJ2#sq?BA=pYxrcd+#~t{;=k*_FBS`B2r2&pMCwKoN{7?j}~I(xI8h3<2d;L zI(&b~ME01-&;IqrM|lPbPw`8sh+%FSNa~fMai^Q-$DN+q`o^vXU#+v=6Yx7d&2Gw5A?pkkSt-m?o^ts(`S1c^`_HJ%)C=e97J1S%1LORO|KBIs>w|+3Ar2 zbxx_iG2m=ylIokKTDea0`cv_Ok>4Qm$-kd?_I0bKqJ`rM-6rY;RZpNQClzGxGf$o_ zG*A_J6e=^)Za1UHT%?Yi`t$3bnKd%-=-l-x%(F-cKXvMFm45Jz=btjAqCfcSr7t|r zbKLIVD!+lgn3R!EPVMIn}b1ZMH%$my9Qcq_pj*exU2o{6&;;xp$+gAq>8+M?1|mBa$*Aj3jtpgw@@pqsgzG= z^EtENo9}|07oWK_klzACAjjKO45+mY4KQq;raG@Xmv`r*sSQn@dauXZP`8j18({3< z2jo%R3!DuB^A8|LePKawo1V z8f0)~O+5d?2S%z%D+^o?ZvR?^JxIF9&tLleK(1jj+RzPgAp}zN;?a@u*5)Jo%FsXB%+T087IK8a3VbxK93Z=AX|P!)NJo zs)vos13lCb2%^x9SKiurwFKUl~s5W07L_(S8eIgoQu^}t?OH(M*? zTYBZnC6%#ww9gl>tf>qH<&ZyCx#T-rE5lNceO_6oFA}P(DZG>0T4)V7a@=O(+EW>d zgu8(_+!qQ-T_FGfcDbgqM~+K=DK1qm*}84phHZ~7>Ihi3a2H+PD;t-4$C)OwHp4e*E##?Z)*zyGix?9S2w<5Fl^8ePYLJv$XMsm{Fj_XcG-Cosj?+LannGDHY~i$_5W}Bc6ccG!Fw`d`Ux$+vVxA@n zu&r7|Gc*lfn{hP&7ydGK)7T}N@t4|NE?CJ@0jbM{R@U0Vv*t;jjG0t|Av0N}HqV*K zYPC6%OCD02yDY3ZVPO!5tfXBfxSUVcsm+}QWVzZrUBC#2ZLE1wZ=NV*y-ydB^(xy1 zy*WCYNgF6;%~8EMP@?yiFe`>inHA%uj9_>UBbb=O2#%JK-MlJ!j4 zeA^?DS)A7-=EOILAvF`*Vw0E)+r2jbVs2GAj<^|m^7`dyEEowZp9(@!EPh`v)DNbV3G)DGJq@Z80b2 zK#wMtDnh^2dQcH_EcmFbXi?k~@dpDzpM>Zg!5--5fMOPwHOn?i;cgjqGsmUwYmo{N zwiuQnonT2!_QRHkdSq6(ucux1MWX%~iplQ@Y8IOD^DHGUVEiB^g@fOWUzDg7)}-&D zpWlLXXtYJ4(0!QSBW+f_cm1)kFkBYt3&&v$X0A65jjA`ISfX-Iq+d=oC#dsEAb!c( zuz`1bdXvYF4rZCWIWC9&vj1DbkbI9?v!aBrW?ljCb(k+!EUuTsEF{Ish?;_IG2sB4 zpNsS!^emUlg27A#X`mkF9k!X*E<3gu}wKp2+5BlY(PLUU(EY~uto|i*S5AYl(4A+w~VX%VC=Nzr&#B5;H0wZH>O<;W)Sw|@A z4%jOhd5vL|@$cXh08Z8uK^UU|Cjo$I4KbI-TFQCWB#fCHCxO>oy$pxVNef`-ObUKH z;}eY_Gd(CYdhlF55ry*=udu<`Oz) zU=a?=xl9prm2`vSyi!;Vd%+ij*;9JeUl@yosVFy zW?LV<*XW3!qJ-m%$TvC1F3V&INePWf+hqZ1+MY?@NTIakUZb=lC~bQcN}B*_oHld8 z9z&j_a&X>HfE=EKKY&bs8?jC!)~i*dl5-rQ%V)7!9YGp1lZMSEoaBWwJPl`jw@Cat?vWH zq+MbPiI?dhEhcL?$4)qsfoWidNYR9|by6{4C<3St`-SfTgX7Gsa(!p1w{W<${ifcV zLi}zo7(C>_Q9G@O$3V3{NxWWk7|IQPGgW1^&<@NnZIUGlmehLa@$LXBaE2-{jAzEm zwAna~0?B0!zeOzwm^#C@7G>M4{0(Z*O|rna9IZ^`yQD6DrpT{Ovl zNZj;i@;uafjRSBM6*yRp_FvEyNHP9QfpqqxIu~ZD&XWzG&flpzqj0pVr9idH)UzOl zu+iBGA^i|C@4pSx#6U4~X0^(kT#2$^I8dB`^co@^r%0oKqylK^sz<&-FG{>Yp67HP!@WX#5z=(pz9ul zJxXE60ai<_+7PnbibFV_$b1-?-=)mMa}sq*oqVhU)(pdQ64W?>NS7$mL^`AdlO__e z{)AXh-hrMvTBgrRR{cZOPseT-FYM-lq$RV1vdpL;cxH6$mKmCH?SI+`mD(@wAcuL* zF$UKh+G=BV(d`SHm188q#XFk7E#o!lmYqThcH?G;q3jeAP@Y4Ss}yBSM`>b8sGB3U zDWXX2!aB#8(8A6JSp&SKofon`qs=d)w0%ypgmVnf)h5{bL}A)?E2+{^C)?Zv8^#dz z9VdEXA{$i0#KTEEk%&8vxSvzpBVq~F+r8nQ$XU z2ZG4W)idoK^o<>#o2%!4dG#36?WHCtF@{|9Flcz5mTy~GzD+uKq=${#AtTLbczQED zFCpO$HbY{3$&Cs3LJHW)XOc--+$QXUQ#2k}TN*6YQ(m!Ua6xCZb$>J1dfE+_K%C>q zEy=b{r4%ayY(l@#53HyZz0VW*I-`JNX!N;SQq4II&(|w!=1k++DRtc{VGU5i&QQAy z)LRhZQuG?7y@}EW>TqXx6{MNe10{AKn{*IMMJ^XwfgEl*->ILjPnGaTP^eL=nPZm0)#Pg@Foqre}XZv`3=dE~!_7 zn=u!?By+{#X6gP&SQhhE#C><(*Rc%ViX{<+A?<~wD;jykrM=J*4^?JT*{ONyD$(lk(>9>I%C#*no`*jWH-K?0 zEOXCwN#LoT@aPISO_CAX-Wv#jToQnZ4sb9<9A&VJdd@shd&DvbHlGW#B42jt-MWYfP-mA~c z5;sZk#%#v>qe<`6k|lpF9@m$c;e8v^C8JoK#h__#=9mEH=4mzriSOzdb0z#E9l2YM zLPv%hIe?mUY?C!qT3a`4I1c2cfHiy)FcPG>H|_&v$v}&(CmhP1?vyUZur>f amE;EBZSb}b9!=knKipX-$E|^XY5ZS{@P?xR literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..8209e6a87069 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"add":{"path":"part-00000-df481541-fe59-4af2-a37f-68a39a1e2a5d-c000.snappy.parquet","partitionValues":{},"size":1074,"modificationTime":1664924826000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"struct_col\":{},\"nested_struct_col\":{\"y\":{}}},\"maxValues\":{\"struct_col\":{},\"nested_struct_col\":{\"y\":{}}},\"nullCount\":{\"struct_col\":{\"x\":1},\"nested_struct_col\":{\"y\":{\"nested\":1}}}}","tags":{"INSERTION_TIME":"1664924826000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"commitInfo":{"timestamp":1664924826032,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"notebook":{"notebookId":"2299734316069194"},"clusterId":"0620-043712-o6vqr39c","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"1074"},"engineInfo":"Databricks-Runtime/10.4.x-scala2.12","txnId":"974a8474-26b8-42fc-a2c6-4d6af29109a2"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..f59e47e72c43 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"metaData":{"id":"0e741658-b990-49a7-a25d-d148943b2f44","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"struct_col\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"x\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"nested_struct_col\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"y\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"nested\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsStruct":"false","delta.checkpoint.writeStatsAsJson":"true","delta.checkpointInterval":"2"},"createdTime":1664924821035}} +{"commitInfo":{"timestamp":1664924827940,"userId":"7853186923043731","userName":"yuya.ebihara@starburstdata.com","operation":"SET TBLPROPERTIES","operationParameters":{"properties":"{\"delta.checkpoint.writeStatsAsJson\":\"true\",\"delta.checkpoint.writeStatsAsStruct\":\"false\"}"},"notebook":{"notebookId":"2299734316069194"},"clusterId":"0620-043712-o6vqr39c","readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Databricks-Runtime/10.4.x-scala2.12","txnId":"f37d7c4a-027d-4bf6-9bf3-0f26b919018c"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..598bce9f5a30 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":2,"size":4} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/part-00000-4137b6c1-34fd-4926-a939-dc6a01571d9f-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/part-00000-4137b6c1-34fd-4926-a939-dc6a01571d9f-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bff0b7e1a032f85e84961d75d4af8d3bd88b649a GIT binary patch literal 1181 zcmb_cO^ee&7=Dx18l)l%JHrHmmcW)Sv@zXo*S0KzsGzV&kqTvXDU)UkVOKDR%)IkFGtc`q(|qu}On}@bI{EVR+vki% zWHd8G0k~C50YFLy35V!g^1lCm_w{Cymx45MLJ@@OmaH=6V%Nr$$SMU`!082%Er#W? zMitUT98`_A;~65}lWZ{J{%I$$UyD8`X{gHVMiBaAJM7p)kH|pMJ2I0d5wJBtebMdw zh2y$3jd)q)Wn#t-9BC*;WmQv)c|{{>wF*i`Eo>YuO^8fodPJLfQg$bHDSHIj`rYDq7p>C&&2V*4;oPW(ruMXF+JQ{g;P_#qJk=m&|) zd4+zGwlRmVm^$=(1|M9bb-fB`U; zAiNQN;0_1uTE*Bi%D{FLjNr^Oux4jt5gPvRtjrw`{x5HCCZcgAA-3w+LJl3JBdoh&G3kaqJ_N^^~3S9u%*7ATXsUOY?Sv5 zvlNxs@?HBhD4C|YpDS2n*X!mgPPuAUc)eB=j#Y8ERj*o>Q#bc(<*p-4H1AcpIIwrs QB3{YpR{p|=r-={KA3EhZt^fc4 literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/part-00000-df481541-fe59-4af2-a37f-68a39a1e2a5d-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks/json_stats_on_row_type/part-00000-df481541-fe59-4af2-a37f-68a39a1e2a5d-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..91e55214c8b61aedbb90b729c76e95d113460e7b GIT binary patch literal 1074 zcmb_cU2D@&7(Pk6E=Un3JBJepvjkQ)XtQKpYg@*cs6|0!NWGbka*~|XuzYk$y0w&g z>3ik3ll67k#xyHRoZVs!U|LfIf2_}0k4y{0^UH?o`TDnZ#y4dm@eX7 zDeaTPYVR+l%T(!OszlC-Se3||b?Cq=Du>ZfV64&R)VZiC?qr}>I(SLxb zk|TM1$gJs;XZ=o#B1CD9aL%LH4FkH;&~`Ng(d`@~KJzr__}Q2zS`=PrK2Mm<5=P$_ zW@<7WakZ;v{Z(tK<8sfA)$Yrg8ZiGCl#T}AjD8ch$&#&lVK4+M7<(QwJEnU(+x&NVTyDu4h0 literal 0 HcmV?d00001