Skip to content

Commit

Permalink
Write cuDF version in Parquet "created_by" metadata field (#14721)
Browse files Browse the repository at this point in the history
Populate the informational `created_by` field in the Parquet file metadata. Identifying the source of a parquet file can help with tracking down interoperability problems.

Authors:
  - Ed Seidl (https://github.com/etseidl)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #14721
  • Loading branch information
etseidl authored Jan 10, 2024
1 parent 3f19d04 commit 1078326
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
8 changes: 7 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -658,6 +658,12 @@ set_source_files_properties(
PROPERTIES COMPILE_DEFINITIONS "_FILE_OFFSET_BITS=64"
)

set_property(
SOURCE src/io/parquet/writer_impl.cu
APPEND
PROPERTY COMPILE_DEFINITIONS "CUDF_VERSION=${PROJECT_VERSION}"
)

set_target_properties(
cudf
PROPERTIES BUILD_RPATH "\$ORIGIN"
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -55,6 +55,10 @@
#include <numeric>
#include <utility>

#ifndef CUDF_VERSION
#error "CUDF_VERSION is not defined"
#endif

namespace cudf::io::parquet::detail {

using namespace cudf::io::detail;
Expand Down Expand Up @@ -108,7 +112,7 @@ struct aggregate_writer_metadata {
meta.num_rows = this->files[part].num_rows;
meta.row_groups = this->files[part].row_groups;
meta.key_value_metadata = this->files[part].key_value_metadata;
meta.created_by = this->created_by;
meta.created_by = "cudf version " CUDF_STRINGIFY(CUDF_VERSION);
meta.column_orders = this->column_orders;
return meta;
}
Expand Down Expand Up @@ -171,7 +175,6 @@ struct aggregate_writer_metadata {
std::vector<std::vector<uint8_t>> column_indexes;
};
std::vector<per_file_metadata> files;
std::string created_by = "";
thrust::optional<std::vector<ColumnOrder>> column_orders = thrust::nullopt;
};

Expand Down

0 comments on commit 1078326

Please sign in to comment.