From 93e61034066de340d71ff586a00166dbb0a6ec12 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 10 Jun 2024 13:04:29 -0400 Subject: [PATCH 1/2] Promote IO support queries to cudf API --- cpp/include/cudf/io/config_utils.hpp | 53 +++++++++ cpp/include/cudf/io/nvcomp_adapter.hpp | 106 ++++++++++++++++++ cpp/include/cudf/utilities/logger.hpp | 8 +- cpp/src/io/comp/nvcomp_adapter.cpp | 8 +- cpp/src/io/comp/nvcomp_adapter.hpp | 67 +---------- cpp/src/io/orc/reader_impl_decode.cu | 2 +- cpp/src/io/orc/stripe_enc.cu | 2 +- cpp/src/io/orc/writer_impl.cu | 1 + cpp/src/io/parquet/reader_impl_chunking.cu | 6 +- cpp/src/io/parquet/writer_impl.cu | 2 +- cpp/src/io/text/bgzip_data_chunk_source.cu | 2 +- cpp/src/io/utilities/config_utils.cpp | 8 +- cpp/src/io/utilities/data_sink.cpp | 5 +- cpp/src/io/utilities/datasource.cpp | 7 +- cpp/src/io/utilities/file_io_utilities.cpp | 4 +- .../{config_utils.hpp => getenv_or.hpp} | 42 +------ 16 files changed, 199 insertions(+), 124 deletions(-) create mode 100644 cpp/include/cudf/io/config_utils.hpp create mode 100644 cpp/include/cudf/io/nvcomp_adapter.hpp rename cpp/src/io/utilities/{config_utils.hpp => getenv_or.hpp} (63%) diff --git a/cpp/include/cudf/io/config_utils.hpp b/cpp/include/cudf/io/config_utils.hpp new file mode 100644 index 00000000000..1827ba0e3e6 --- /dev/null +++ b/cpp/include/cudf/io/config_utils.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace CUDF_EXPORT cudf { +namespace io::cufile_integration { + +/** + * @brief Returns true if cuFile and its compatibility mode are enabled. + */ +bool is_always_enabled(); + +/** + * @brief Returns true if only direct IO through cuFile is enabled (compatibility mode is disabled). + */ +bool is_gds_enabled(); + +/** + * @brief Returns true if KvikIO is enabled. + */ +bool is_kvikio_enabled(); + +} // namespace io::cufile_integration + +namespace io::nvcomp_integration { + +/** + * @brief Returns true if all nvCOMP uses are enabled. + */ +bool is_all_enabled(); + +/** + * @brief Returns true if stable nvCOMP use is enabled. + */ +bool is_stable_enabled(); + +} // namespace io::nvcomp_integration +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp new file mode 100644 index 00000000000..1e9bdac257e --- /dev/null +++ b/cpp/include/cudf/io/nvcomp_adapter.hpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace CUDF_EXPORT cudf { +namespace io::nvcomp { + +enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; + +/** + * @brief Set of parameters that impact whether the use nvCOMP features is enabled. + * + */ +struct feature_status_parameters { + int lib_major_version; ///< major version + int lib_minor_version; ///< minor version + int lib_patch_version; ///< patch version + bool are_all_integrations_enabled; ///< all integrations + bool are_stable_integrations_enabled; ///< stable integrations + int compute_capability_major; ///< cuda compute major version + + /** + * @brief Default Constructor + */ + feature_status_parameters(); + + /**pht + * @brief feature_status_parameters Constructor + * + * @param major positive integer representing major value + * @param minor positive integer representing minor value + * @param patch positive integer representing patch value + * @param all_enabled if all integrations are enabled + * @param stable_enabled if stable integrations are enabled + * @param cc_major cuda compute capability + */ + feature_status_parameters( + int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major) + : lib_major_version{major}, + lib_minor_version{minor}, + lib_patch_version{patch}, + are_all_integrations_enabled{all_enabled}, + are_stable_integrations_enabled{stable_enabled}, + compute_capability_major{cc_major} + { + } +}; + +/** + * @brief Equality operator overload. Required to use `feature_status_parameters` as a map key. + */ +inline bool operator==(feature_status_parameters const& lhs, feature_status_parameters const& rhs) +{ + return lhs.lib_major_version == rhs.lib_major_version and + lhs.lib_minor_version == rhs.lib_minor_version and + lhs.lib_patch_version == rhs.lib_patch_version and + lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and + lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and + lhs.compute_capability_major == rhs.compute_capability_major; +} + +/** + * @brief If a compression type is disabled through nvCOMP, returns the reason as a string. + * + * Result cab depend on nvCOMP version and environment variables. + * + * @param compression Compression type + * @param params Optional parameters to query status with different configurations + * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled + */ +[[nodiscard]] std::optional is_compression_disabled( + compression_type compression, feature_status_parameters params = feature_status_parameters()); + +/** + * @brief If a decompression type is disabled through nvCOMP, returns the reason as a string. + * + * Result can depend on nvCOMP version and environment variables. + * + * @param compression Compression type + * @param params Optional parameters to query status with different configurations + * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled + */ +[[nodiscard]] std::optional is_decompression_disabled( + compression_type compression, feature_status_parameters params = feature_status_parameters()); + +} // namespace io::nvcomp +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/logger.hpp b/cpp/include/cudf/utilities/logger.hpp index a39df064f44..45d5d1b12e1 100644 --- a/cpp/include/cudf/utilities/logger.hpp +++ b/cpp/include/cudf/utilities/logger.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,11 @@ #pragma once +#include + #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief Returns the global logger. @@ -43,4 +45,4 @@ namespace cudf { */ spdlog::logger& logger(); -} // namespace cudf +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index f8920bf82c2..0e34c96debd 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -13,11 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "nvcomp_adapter.hpp" -#include "io/utilities/config_utils.hpp" #include "nvcomp_adapter.cuh" +#include +#include #include #include @@ -472,8 +474,8 @@ feature_status_parameters::feature_status_parameters() : lib_major_version{NVCOMP_MAJOR_VERSION}, lib_minor_version{NVCOMP_MINOR_VERSION}, lib_patch_version{NVCOMP_PATCH_VERSION}, - are_all_integrations_enabled{detail::nvcomp_integration::is_all_enabled()}, - are_stable_integrations_enabled{detail::nvcomp_integration::is_stable_enabled()} + are_all_integrations_enabled{nvcomp_integration::is_all_enabled()}, + are_stable_integrations_enabled{nvcomp_integration::is_stable_enabled()} { int device; CUDF_CUDA_TRY(cudaGetDevice(&device)); diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp index 1a680a050fd..43c79e32375 100644 --- a/cpp/src/io/comp/nvcomp_adapter.hpp +++ b/cpp/src/io/comp/nvcomp_adapter.hpp @@ -17,8 +17,9 @@ #pragma once #include "gpuinflate.hpp" -#include "io/utilities/config_utils.hpp" +#include +#include #include #include @@ -27,70 +28,6 @@ #include namespace cudf::io::nvcomp { - -enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; - -/** - * @brief Set of parameters that impact whether the use nvCOMP features is enabled. - */ -struct feature_status_parameters { - int lib_major_version; - int lib_minor_version; - int lib_patch_version; - bool are_all_integrations_enabled; - bool are_stable_integrations_enabled; - int compute_capability_major; - - feature_status_parameters(); - feature_status_parameters( - int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major) - : lib_major_version{major}, - lib_minor_version{minor}, - lib_patch_version{patch}, - are_all_integrations_enabled{all_enabled}, - are_stable_integrations_enabled{stable_enabled}, - compute_capability_major{cc_major} - { - } -}; - -/** - * @brief Equality operator overload. Required to use `feature_status_parameters` as a map key. - */ -inline bool operator==(feature_status_parameters const& lhs, feature_status_parameters const& rhs) -{ - return lhs.lib_major_version == rhs.lib_major_version and - lhs.lib_minor_version == rhs.lib_minor_version and - lhs.lib_patch_version == rhs.lib_patch_version and - lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and - lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and - lhs.compute_capability_major == rhs.compute_capability_major; -} - -/** - * @brief If a compression type is disabled through nvCOMP, returns the reason as a string. - * - * Result cab depend on nvCOMP version and environment variables. - * - * @param compression Compression type - * @param params Optional parameters to query status with different configurations - * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled - */ -[[nodiscard]] std::optional is_compression_disabled( - compression_type compression, feature_status_parameters params = feature_status_parameters()); - -/** - * @brief If a decompression type is disabled through nvCOMP, returns the reason as a string. - * - * Result can depend on nvCOMP version and environment variables. - * - * @param compression Compression type - * @param params Optional parameters to query status with different configurations - * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled - */ -[[nodiscard]] std::optional is_decompression_disabled( - compression_type compression, feature_status_parameters params = feature_status_parameters()); - /** * @brief Device batch decompression of given type. * diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 72eb41b1360..8e20505d3ff 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -19,13 +19,13 @@ #include "io/orc/reader_impl.hpp" #include "io/orc/reader_impl_chunking.hpp" #include "io/orc/reader_impl_helpers.hpp" -#include "io/utilities/config_utils.hpp" #include "io/utilities/hostdevice_span.hpp" #include #include #include #include +#include #include #include diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index b6fc4e3510f..805959327ac 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -16,12 +16,12 @@ #include "io/comp/nvcomp_adapter.hpp" #include "io/utilities/block_utils.cuh" -#include "io/utilities/config_utils.hpp" #include "io/utilities/time_utils.cuh" #include "orc_gpu.hpp" #include #include +#include #include #include #include diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index e9e031a407a..4cb20bb7518 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index d371ef5de93..3da303e6928 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -16,7 +16,6 @@ #include "compact_protocol_reader.hpp" #include "io/comp/nvcomp_adapter.hpp" -#include "io/utilities/config_utils.hpp" #include "io/utilities/time_utils.cuh" #include "reader_impl.hpp" #include "reader_impl_chunking.hpp" @@ -25,6 +24,7 @@ #include #include #include +#include #include @@ -862,7 +862,7 @@ std::vector compute_page_splits_by_row(device_span #include #include +#include #include #include #include diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 0e3ce779089..badcd3f58f9 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -16,12 +16,12 @@ #include "io/comp/nvcomp_adapter.hpp" #include "io/text/device_data_chunks.hpp" -#include "io/utilities/config_utils.hpp" #include #include #include #include +#include #include #include #include diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index 20ac89b4d53..a3afbd52896 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -14,14 +14,16 @@ * limitations under the License. */ -#include "config_utils.hpp" +#include "getenv_or.hpp" +#include #include #include +#include #include -namespace cudf::io::detail { +namespace cudf::io { namespace cufile_integration { @@ -80,4 +82,4 @@ bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_ } // namespace nvcomp_integration -} // namespace cudf::io::detail +} // namespace cudf::io diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index a6cbbcd84a6..1dbb9369115 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -15,8 +15,9 @@ */ #include "file_io_utilities.hpp" -#include "io/utilities/config_utils.hpp" +#include +#include #include #include @@ -40,7 +41,7 @@ class file_sink : public data_sink { _output_stream.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc); if (!_output_stream.is_open()) { detail::throw_on_file_open_failure(filepath, true); } - if (detail::cufile_integration::is_kvikio_enabled()) { + if (cufile_integration::is_kvikio_enabled()) { _kvikio_file = kvikio::FileHandle(filepath, "w"); CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index ca8932322bf..c8a438fc40b 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -15,9 +15,10 @@ */ #include "file_io_utilities.hpp" -#include "io/utilities/config_utils.hpp" +#include #include +#include #include #include #include @@ -44,7 +45,7 @@ class file_source : public datasource { explicit file_source(char const* filepath) : _file(filepath, O_RDONLY) { detail::force_init_cuda_context(); - if (detail::cufile_integration::is_kvikio_enabled()) { + if (cufile_integration::is_kvikio_enabled()) { _kvikio_file = kvikio::FileHandle(filepath); CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); @@ -433,7 +434,7 @@ std::unique_ptr datasource::create(std::string const& filepath, size_t size) { #ifdef CUFILE_FOUND - if (detail::cufile_integration::is_always_enabled()) { + if (cufile_integration::is_always_enabled()) { // avoid mmap as GDS is expected to be used for most reads return std::make_unique(filepath.c_str()); } diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index a9d4f19c848..9fe5959436d 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -16,9 +16,11 @@ #include "file_io_utilities.hpp" -#include "io/utilities/config_utils.hpp" +#include "getenv_or.hpp" #include +#include +#include #include diff --git a/cpp/src/io/utilities/config_utils.hpp b/cpp/src/io/utilities/getenv_or.hpp similarity index 63% rename from cpp/src/io/utilities/config_utils.hpp rename to cpp/src/io/utilities/getenv_or.hpp index 74df1375e6f..3fd97a00b61 100644 --- a/cpp/src/io/utilities/config_utils.hpp +++ b/cpp/src/io/utilities/getenv_or.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,15 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #pragma once #include +#include #include #include -namespace cudf::io::detail { - +namespace { /** * @brief Returns the value of the environment variable, or a default value if the variable is not * present. @@ -45,37 +46,4 @@ T getenv_or(std::string_view env_var_name, T default_val) return converted_val; } -namespace cufile_integration { - -/** - * @brief Returns true if cuFile and its compatibility mode are enabled. - */ -bool is_always_enabled(); - -/** - * @brief Returns true if only direct IO through cuFile is enabled (compatibility mode is disabled). - */ -bool is_gds_enabled(); - -/** - * @brief Returns true if KvikIO is enabled. - */ -bool is_kvikio_enabled(); - -} // namespace cufile_integration - -namespace nvcomp_integration { - -/** - * @brief Returns true if all nvCOMP uses are enabled. - */ -bool is_all_enabled(); - -/** - * @brief Returns true if stable nvCOMP use is enabled. - */ -bool is_stable_enabled(); - -} // namespace nvcomp_integration - -} // namespace cudf::io::detail +} // namespace From 764af1e266e7e0ec994bcbb9ee1573c92afd0dbf Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 9 Jul 2024 16:09:02 -0400 Subject: [PATCH 2/2] Apply documentation improvements from code review Co-authored-by: Bradley Dice --- cpp/include/cudf/io/nvcomp_adapter.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp index 1e9bdac257e..f3260d0cb53 100644 --- a/cpp/include/cudf/io/nvcomp_adapter.hpp +++ b/cpp/include/cudf/io/nvcomp_adapter.hpp @@ -27,7 +27,7 @@ namespace io::nvcomp { enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; /** - * @brief Set of parameters that impact whether the use nvCOMP features is enabled. + * @brief Set of parameters that impact whether nvCOMP features are enabled. * */ struct feature_status_parameters { @@ -43,15 +43,15 @@ struct feature_status_parameters { */ feature_status_parameters(); - /**pht + /** * @brief feature_status_parameters Constructor * - * @param major positive integer representing major value - * @param minor positive integer representing minor value - * @param patch positive integer representing patch value + * @param major positive integer representing major value of nvcomp + * @param minor positive integer representing minor value of nvcomp + * @param patch positive integer representing patch value of nvcomp * @param all_enabled if all integrations are enabled * @param stable_enabled if stable integrations are enabled - * @param cc_major cuda compute capability + * @param cc_major CUDA compute capability */ feature_status_parameters( int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major) @@ -81,7 +81,7 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para /** * @brief If a compression type is disabled through nvCOMP, returns the reason as a string. * - * Result cab depend on nvCOMP version and environment variables. + * Result depends on nvCOMP version and environment variables. * * @param compression Compression type * @param params Optional parameters to query status with different configurations @@ -93,7 +93,7 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para /** * @brief If a decompression type is disabled through nvCOMP, returns the reason as a string. * - * Result can depend on nvCOMP version and environment variables. + * Result depends on nvCOMP version and environment variables. * * @param compression Compression type * @param params Optional parameters to query status with different configurations