Skip to content

Commit

Permalink
Test reading liquid clustering tables in Delta Lake
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Jun 9, 2024
1 parent 86c41ea commit f198b32
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ public class TestDeltaLakeBasic
new ResourceTable("stats_with_minmax_nulls", "deltalake/stats_with_minmax_nulls"),
new ResourceTable("no_column_stats", "databricks73/no_column_stats"),
new ResourceTable("deletion_vectors", "databricks122/deletion_vectors"),
new ResourceTable("liquid_clustering", "deltalake/liquid_clustering"),
new ResourceTable("timestamp_ntz", "databricks131/timestamp_ntz"),
new ResourceTable("timestamp_ntz_partition", "databricks131/timestamp_ntz_partition"));

Expand Down Expand Up @@ -940,6 +941,24 @@ public void testDeletionVectors()
assertQuery("SELECT * FROM deletion_vectors", "VALUES (1, 11)");
}

/**
* @see deltalake.liquid_clustering
*/
@Test
public void testLiquidClustering()
{
assertQuery("SELECT * FROM liquid_clustering", "VALUES ('test 1', 2024, 1), ('test 2', 2024, 2)");
assertQuery("SELECT data FROM liquid_clustering WHERE year = 2024 AND month = 1", "VALUES 'test 1'");
assertQuery("SELECT data FROM liquid_clustering WHERE year = 2024 AND month = 2", "VALUES 'test 2'");

assertQueryReturnsEmptyResult("SELECT * FROM liquid_clustering FOR VERSION AS OF 0");
assertQuery("SELECT * FROM liquid_clustering FOR VERSION AS OF 1", "VALUES ('test 1', 2024, 1)");
assertQuery("SELECT * FROM liquid_clustering FOR VERSION AS OF 2", "VALUES ('test 1', 2024, 1), ('test 2', 2024, 2)");
assertQuery("SELECT * FROM liquid_clustering FOR VERSION AS OF 3", "VALUES ('test 1', 2024, 1), ('test 2', 2024, 2)");

assertQueryFails("INSERT INTO liquid_clustering VALUES ('test 3', 2024, 3)", "Unsupported writer features: .*");
}

@Test
public void testCorruptedManagedTableLocation()
throws Exception
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Data generated using OSS Delta Lake 3.2.0.
At least two columns are required for Hilbert clustering.

```sql
CREATE TABLE test_liquid
(data string, year int, month int)
USING delta
CLUSTER BY (year, month)
LOCATION ?;

INSERT INTO test_liquid VALUES ('test 1', 2024, 1);

INSERT INTO test_liquid VALUES ('test 2', 2024, 2);

OPTIMIZE test_liquid;
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"commitInfo":{"timestamp":1717803968268,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"year\",\"month\"]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"f324a7bf-37b9-4561-9345-6ba50a89eee4"}}
{"metaData":{"id":"89e6e7c2-d77a-4deb-af28-9209972fb56d","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"data\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"year\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"month\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1717803968252}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":7,"writerFeatures":["domainMetadata","clustering"]}}
{"domainMetadata":{"domain":"delta.clustering","configuration":"{\"clusteringColumns\":[[\"year\"],[\"month\"]],\"domainName\":\"delta.clustering\"}","removed":false}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1717803973164,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"934"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"38cd1832-75ea-4acd-920b-d9d1ce89c913"}}
{"add":{"path":"part-00000-be853604-1a95-499c-8bd3-0817e117e934-c000.snappy.parquet","partitionValues":{},"size":934,"modificationTime":1717803973000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"data\":\"test 1\",\"year\":2024,\"month\":1},\"maxValues\":{\"data\":\"test 1\",\"year\":2024,\"month\":1},\"nullCount\":{\"data\":0,\"year\":0,\"month\":0}}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1717803973452,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"934"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"69693df0-524f-44e8-9048-1ac8ffa3b07d"}}
{"add":{"path":"part-00000-dbbc308b-0878-4d8e-a814-d0b55ec894d4-c000.snappy.parquet","partitionValues":{},"size":934,"modificationTime":1717803973000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"data\":\"test 2\",\"year\":2024,\"month\":2},\"maxValues\":{\"data\":\"test 2\",\"year\":2024,\"month\":2},\"nullCount\":{\"data\":0,\"year\":0,\"month\":0}}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"commitInfo":{"timestamp":1717803979381,"operation":"OPTIMIZE","operationParameters":{"predicate":"[]","zOrderBy":"[]","clusterBy":"[\"year\",\"month\"]","auto":false},"readVersion":2,"isolationLevel":"SnapshotIsolation","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"2","numRemovedBytes":"1868","p25FileSize":"966","numDeletionVectorsRemoved":"0","minFileSize":"966","numAddedFiles":"1","maxFileSize":"966","p75FileSize":"966","p50FileSize":"966","numAddedBytes":"966"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"2c6d1d35-dd91-40df-b507-0683a3fe650f"}}
{"add":{"path":"part-00000-ba4afeda-e581-4193-879d-12f07682e4d1-c000.snappy.parquet","partitionValues":{},"size":966,"modificationTime":1717803979000,"dataChange":false,"stats":"{\"numRecords\":2,\"minValues\":{\"data\":\"test 1\",\"year\":2024,\"month\":1},\"maxValues\":{\"data\":\"test 2\",\"year\":2024,\"month\":2},\"nullCount\":{\"data\":0,\"year\":0,\"month\":0}}","tags":{"ZCUBE_ID":"8d97f1d1-6b6d-44da-9cba-f961c89869bd","ZCUBE_ZORDER_BY":"[\"year\",\"month\"]"},"clusteringProvider":"liquid"}}
{"remove":{"path":"part-00000-dbbc308b-0878-4d8e-a814-d0b55ec894d4-c000.snappy.parquet","deletionTimestamp":1717803978631,"dataChange":false,"extendedFileMetadata":true,"partitionValues":{},"size":934,"stats":"{\"numRecords\":1}"}}
{"remove":{"path":"part-00000-be853604-1a95-499c-8bd3-0817e117e934-c000.snappy.parquet","deletionTimestamp":1717803978631,"dataChange":false,"extendedFileMetadata":true,"partitionValues":{},"size":934,"stats":"{\"numRecords\":1}"}}
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit f198b32

Please sign in to comment.