Skip to content

Commit

Permalink
Update hudi test resource for hudi_non_part_cow table
Browse files Browse the repository at this point in the history
The change is to make the schema sync with hudi_cow_pt_tbl.
  • Loading branch information
krvikash authored and Praveen2112 committed Dec 22, 2023
1 parent 5f4e7b6 commit 261322f
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 92 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ protected QueryRunner createQueryRunner()
public void testReadNonPartitionedTable()
{
assertQuery(
"SELECT rowid, name FROM " + HUDI_NON_PART_COW,
"SELECT * FROM VALUES ('row_1', 'bob'), ('row_2', 'john'), ('row_3', 'tom')");
"SELECT id, name FROM " + HUDI_NON_PART_COW,
"SELECT * FROM VALUES (1, 'a1'), (2, 'a2')");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,11 @@ public Map<String, String> getPartitions()
private static List<Column> nonPartitionRegularColumns()
{
return ImmutableList.of(
column("rowid", HIVE_STRING),
column("partitionid", HIVE_STRING),
column("precomb", HIVE_LONG),
column("id", HIVE_LONG),
column("name", HIVE_STRING),
column("versionid", HIVE_STRING),
column("tobedeletedstr", HIVE_STRING),
column("inttolong", HIVE_INT),
column("longtoint", HIVE_LONG));
column("ts", HIVE_LONG),
column("dt", HIVE_STRING),
column("hh", HIVE_STRING));
}

private static List<Column> stockTicksRegularColumns()
Expand Down
47 changes: 47 additions & 0 deletions plugin/trino-hudi/src/test/resources/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Hudi Test Resources

## Generating Hudi Resources

Follow these steps to create the `hudi_non_part_cow` test table and utilize it for testing. `hudi_non_part_cow` resource is generated using `423` trino version.

### Start the Hudi environment

Execute the following command in the terminal to initiate the Hudi environment:

```shell
testing/bin/ptl env up --environment singlenode-hudi
```

### Generate Resources

* Open the `spark-sql` terminal and initiate the `spark-sql` shell in the `ptl-spark` container.
* Execute the following Spark SQL queries to create the `hudi_non_part_cow` table:

```
spark-sql> CREATE TABLE default.hudi_non_part_cow (
id bigint,
name string,
ts bigint,
dt string,
hh string
)
USING hudi
TBLPROPERTIES (
type = 'cow',
primaryKey = 'id',
preCombineField = 'ts'
)
LOCATION 's3://test-bucket/hudi_non_part_cow';
spark-sql> INSERT INTO default.hudi_non_part_cow (id, name, ts, dt, hh) VALUES
(1, 'a1', 1000, '2021-12-09', '10'),
(2, 'a2', 2000, '2021-12-09', '11');
```

### Download Resources

Download the `hudi_non_part_cow` table from the MinIO client http://localhost:9001/buckets/test-bucket/browse.

### Use Resources

Unzip the downloaded `hudi_non_part_cow.zip`. Remove any unnecessary files obtained after unzipping to prepare the resource for testing.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"partitionToWriteStats" : {
"" : [ {
"fileId" : "05b0f4ec-00fb-49f2-a1e2-7f510f3da93b-0",
"path" : "05b0f4ec-00fb-49f2-a1e2-7f510f3da93b-0_0-27-28_20231127051653361.parquet",
"prevCommit" : "null",
"numWrites" : 2,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 2,
"totalWriteBytes" : 435338,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 435338,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : {
"totalScanTime" : 0,
"totalUpsertTime" : 0,
"totalCreateTime" : 856
}
} ]
},
"compacted" : false,
"extraMetadata" : {
"schema" : "{\"type\":\"record\",\"name\":\"hudi_non_part_cow_record\",\"namespace\":\"hoodie.hudi_non_part_cow\",\"fields\":[{\"name\":\"id\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"ts\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"dt\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"hh\",\"type\":[\"null\",\"string\"],\"default\":null}]}"
},
"operationType" : "UPSERT"
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"numWrites" : 0,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 3,
"numInserts" : 2,
"totalWriteBytes" : 0,
"totalWriteErrors" : 0,
"tempPath" : null,
Expand All @@ -21,28 +21,11 @@
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 0,
"minEventTime" : null,
"maxEventTime" : null
"maxEventTime" : null,
"runtimeStats" : null
} ]
},
"compacted" : false,
"extraMetadata" : { },
"operationType" : "INSERT",
"writePartitionPaths" : [ "" ],
"fileIdAndRelativePaths" : {
"" : null
},
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0,
"totalLogFilesCompacted" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesSize" : 0,
"totalScanTime" : 0,
"totalCreateTime" : 0,
"totalUpsertTime" : 0,
"minAndMaxEventTime" : {
"Optional.empty" : {
"val" : null,
"present" : false
}
}
"operationType" : "UPSERT"
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
#Properties saved on Fri Dec 17 11:05:14 UTC 2021
#Fri Dec 17 11:05:14 UTC 2021
hoodie.table.precombine.field=preComb
hoodie.table.partition.fields=
#Updated at 2023-11-27T05:16:58.380652Z
#Mon Nov 27 05:16:58 UTC 2023
hoodie.table.type=COPY_ON_WRITE
hoodie.table.metadata.partitions=files
hoodie.table.precombine.field=ts
hoodie.archivelog.folder=archived
hoodie.populate.meta.fields=true
hoodie.table.create.schema={"type"\:"record","name"\:"hudi_non_part_cow_record","namespace"\:"hoodie.hudi_non_part_cow","fields"\:[{"name"\:"_hoodie_commit_time","type"\:["string","null"]},{"name"\:"_hoodie_commit_seqno","type"\:["string","null"]},{"name"\:"_hoodie_record_key","type"\:["string","null"]},{"name"\:"_hoodie_partition_path","type"\:["string","null"]},{"name"\:"_hoodie_file_name","type"\:["string","null"]},{"name"\:"id","type"\:["long","null"]},{"name"\:"name","type"\:["string","null"]},{"name"\:"ts","type"\:["long","null"]},{"name"\:"dt","type"\:["string","null"]},{"name"\:"hh","type"\:["string","null"]}]}
hoodie.timeline.layout.version=1
hoodie.table.version=3
hoodie.table.recordkey.fields=rowId
hoodie.table.base.file.format=PARQUET
hoodie.table.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
hoodie.table.checksum=2968816715
hoodie.datasource.write.drop.partition.columns=false
hoodie.table.recordkey.fields=id
hoodie.table.name=hudi_non_part_cow
hoodie.datasource.write.hive_style_partitioning=false
hoodie.datasource.write.hive_style_partitioning=true
hoodie.table.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
hoodie.database.name=default
hoodie.datasource.write.partitionpath.urlencode=false
hoodie.table.version=5
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#partition metadata
#Fri Dec 17 11:05:23 UTC 2021
commitTime=20211217110514527
#Mon Nov 27 05:16:59 UTC 2023
commitTime=20231127051653361
partitionDepth=0
Binary file not shown.

0 comments on commit 261322f

Please sign in to comment.