From 1078326535c9989a2e904d78ceb708a097be989b Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 10 Jan 2024 13:43:58 -0800 Subject: [PATCH] Write cuDF version in Parquet "created_by" metadata field (#14721) Populate the informational `created_by` field in the Parquet file metadata. Identifying the source of a parquet file can help with tracking down interoperability problems. Authors: - Ed Seidl (https://github.com/etseidl) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14721 --- cpp/CMakeLists.txt | 8 +++++++- cpp/src/io/parquet/writer_impl.cu | 9 ++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a7c34ca489c..cb1fdb1f557 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -658,6 +658,12 @@ set_source_files_properties( PROPERTIES COMPILE_DEFINITIONS "_FILE_OFFSET_BITS=64" ) +set_property( + SOURCE src/io/parquet/writer_impl.cu + APPEND + PROPERTY COMPILE_DEFINITIONS "CUDF_VERSION=${PROJECT_VERSION}" +) + set_target_properties( cudf PROPERTIES BUILD_RPATH "\$ORIGIN" diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index c452f632cd6..279a814a4e1 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,6 +55,10 @@ #include #include +#ifndef CUDF_VERSION +#error "CUDF_VERSION is not defined" +#endif + namespace cudf::io::parquet::detail { using namespace cudf::io::detail; @@ -108,7 +112,7 @@ struct aggregate_writer_metadata { meta.num_rows = this->files[part].num_rows; meta.row_groups = this->files[part].row_groups; meta.key_value_metadata = this->files[part].key_value_metadata; - meta.created_by = this->created_by; + meta.created_by = "cudf version " CUDF_STRINGIFY(CUDF_VERSION); meta.column_orders = this->column_orders; return meta; } @@ -171,7 +175,6 @@ struct aggregate_writer_metadata { std::vector> column_indexes; }; std::vector files; - std::string created_by = ""; thrust::optional> column_orders = thrust::nullopt; };