From 38631a635fbfe05f69fd243df03868ec1f23d3c5 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Wed, 15 Dec 2021 08:29:05 -0600 Subject: [PATCH] Fix the java build after parquet partitioning support (#9908) This fixes the java build after #9810 went in. There is a lot of copy/paste in this first draft, because I just wanted to get something to work. Not sure if it is worth going back to make it common everywhere. Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/9908 --- java/src/main/native/src/TableJni.cpp | 63 +++++++++++++++++++-------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 5bae4f5f399..0914c8a23f7 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -705,15 +705,12 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata, void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_names, jintArray &j_children, jbooleanArray &j_col_nullability, - jobjectArray &j_metadata_keys, jobjectArray &j_metadata_values, jbooleanArray &j_is_int96, jintArray &j_precisions, jbooleanArray &j_is_map, cudf::io::table_input_metadata &metadata) { cudf::jni::auto_set_device(env); cudf::jni::native_jstringArray col_names(env, j_col_names); cudf::jni::native_jbooleanArray col_nullability(env, j_col_nullability); cudf::jni::native_jbooleanArray is_int96(env, j_is_int96); - cudf::jni::native_jstringArray meta_keys(env, j_metadata_keys); - cudf::jni::native_jstringArray meta_values(env, j_metadata_values); cudf::jni::native_jintArray precisions(env, j_precisions); cudf::jni::native_jintArray children(env, j_children); cudf::jni::native_jbooleanArray is_map(env, j_is_map); @@ -742,9 +739,6 @@ void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_nam is_int96, precisions, is_map, children, childs_children, read_index); } } - for (auto i = 0; i < meta_keys.size(); ++i) { - metadata.user_data[meta_keys[i].get()] = meta_values[i].get(); - } } // Check that window parameters are valid. @@ -1364,15 +1358,23 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin( using namespace cudf::jni; sink_info sink{data_sink.get()}; table_input_metadata metadata; - createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_isInt96, j_precisions, j_is_map, - metadata); + createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_isInt96, + j_precisions, j_is_map, metadata); + + cudf::jni::native_jstringArray meta_keys(env, j_metadata_keys); + cudf::jni::native_jstringArray meta_values(env, j_metadata_values); + + std::map kv_metadata; + for (auto i = 0; i < meta_keys.size(); ++i) { + kv_metadata[meta_keys[i].get()] = meta_values[i].get(); + } chunked_parquet_writer_options opts = chunked_parquet_writer_options::builder(sink) .metadata(&metadata) .compression(static_cast(j_compression)) .stats_level(static_cast(j_stats_freq)) + .key_value_metadata({kv_metadata}) .build(); auto writer_ptr = std::make_unique(opts); cudf::jni::native_parquet_writer_handle *ret = @@ -1398,15 +1400,24 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin( using namespace cudf::io; using namespace cudf::jni; table_input_metadata metadata; - createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_isInt96, j_precisions, j_is_map, - metadata); + createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_isInt96, + j_precisions, j_is_map, metadata); + + cudf::jni::native_jstringArray meta_keys(env, j_metadata_keys); + cudf::jni::native_jstringArray meta_values(env, j_metadata_values); + + std::map kv_metadata; + for (auto i = 0; i < meta_keys.size(); ++i) { + kv_metadata[meta_keys[i].get()] = meta_values[i].get(); + } + sink_info sink{output_path.get()}; chunked_parquet_writer_options opts = chunked_parquet_writer_options::builder(sink) .metadata(&metadata) .compression(static_cast(j_compression)) .stats_level(static_cast(j_stats_freq)) + .key_value_metadata({kv_metadata}) .build(); auto writer_ptr = std::make_unique(opts); @@ -1519,9 +1530,16 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin( table_input_metadata metadata; // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue. jbooleanArray j_is_int96 = NULL; - createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_is_int96, j_precisions, j_is_map, - metadata); + createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_is_int96, + j_precisions, j_is_map, metadata); + + cudf::jni::native_jstringArray meta_keys(env, j_metadata_keys); + cudf::jni::native_jstringArray meta_values(env, j_metadata_values); + + std::map kv_metadata; + for (auto i = 0; i < meta_keys.size(); ++i) { + kv_metadata[meta_keys[i].get()] = meta_values[i].get(); + } std::unique_ptr data_sink( new cudf::jni::jni_writer_data_sink(env, consumer)); @@ -1530,6 +1548,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin( .metadata(&metadata) .compression(static_cast(j_compression)) .enable_statistics(true) + .key_value_metadata(kv_metadata) .build(); auto writer_ptr = std::make_unique(opts); cudf::jni::native_orc_writer_handle *ret = @@ -1556,15 +1575,23 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin( table_input_metadata metadata; // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue. jbooleanArray j_is_int96 = NULL; - createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, - j_metadata_keys, j_metadata_values, j_is_int96, j_precisions, j_is_map, - metadata); + createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_is_int96, + j_precisions, j_is_map, metadata); + + cudf::jni::native_jstringArray meta_keys(env, j_metadata_keys); + cudf::jni::native_jstringArray meta_values(env, j_metadata_values); + + std::map kv_metadata; + for (auto i = 0; i < meta_keys.size(); ++i) { + kv_metadata[meta_keys[i].get()] = meta_values[i].get(); + } sink_info sink{output_path.get()}; chunked_orc_writer_options opts = chunked_orc_writer_options::builder(sink) .metadata(&metadata) .compression(static_cast(j_compression)) .enable_statistics(true) + .key_value_metadata(kv_metadata) .build(); auto writer_ptr = std::make_unique(opts); cudf::jni::native_orc_writer_handle *ret =