From acc0b00271fd003ec5e0c632adafbcdd6eabc491 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 10 Jun 2024 13:04:29 -0400 Subject: [PATCH] Promote IO support queries to cudf API --- cpp/include/cudf/io/config_utils.hpp | 53 +++++++++++ cpp/include/cudf/io/nvcomp_adapter.hpp | 92 +++++++++++++++++++ cpp/include/cudf/utilities/logger.hpp | 4 +- cpp/src/io/comp/nvcomp_adapter.cpp | 8 +- cpp/src/io/comp/nvcomp_adapter.hpp | 67 +------------- cpp/src/io/orc/reader_impl_decode.cu | 2 +- cpp/src/io/orc/stripe_enc.cu | 2 +- cpp/src/io/orc/writer_impl.cu | 1 + cpp/src/io/parquet/reader_impl_chunking.cu | 6 +- cpp/src/io/parquet/writer_impl.cu | 2 +- cpp/src/io/text/bgzip_data_chunk_source.cu | 2 +- cpp/src/io/utilities/config_utils.cpp | 8 +- cpp/src/io/utilities/data_sink.cpp | 5 +- cpp/src/io/utilities/datasource.cpp | 7 +- cpp/src/io/utilities/file_io_utilities.cpp | 7 +- .../{config_utils.hpp => getenv_or.hpp} | 42 +-------- 16 files changed, 184 insertions(+), 124 deletions(-) create mode 100644 cpp/include/cudf/io/config_utils.hpp create mode 100644 cpp/include/cudf/io/nvcomp_adapter.hpp rename cpp/src/io/utilities/{config_utils.hpp => getenv_or.hpp} (63%) diff --git a/cpp/include/cudf/io/config_utils.hpp b/cpp/include/cudf/io/config_utils.hpp new file mode 100644 index 00000000000..430f9caf0f6 --- /dev/null +++ b/cpp/include/cudf/io/config_utils.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace CUDF_EXPORT cudf { +namespace io::cufile_integration { + +/** + * @brief Returns true if cuFile and its compatibility mode are enabled. + */ +bool is_always_enabled(); + +/** + * @brief Returns true if only direct IO through cuFile is enabled (compatibility mode is disabled). + */ +bool is_gds_enabled(); + +/** + * @brief Returns true if KvikIO is enabled. + */ +bool is_kvikio_enabled(); + +} // namespace io::cufile_integration + +namespace io::nvcomp_integration { + +/** + * @brief Returns true if all nvCOMP uses are enabled. + */ +bool is_all_enabled(); + +/** + * @brief Returns true if stable nvCOMP use is enabled. + */ +bool is_stable_enabled(); + +} // namespace io::nvcomp_integration +} // namespace cudf diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp new file mode 100644 index 00000000000..ed50cc59334 --- /dev/null +++ b/cpp/include/cudf/io/nvcomp_adapter.hpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + + +namespace CUDF_EXPORT cudf { +namespace io::nvcomp { + +enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; + +/** + * @brief Set of parameters that impact whether the use nvCOMP features is enabled. + */ +struct feature_status_parameters { + int lib_major_version; + int lib_minor_version; + int lib_patch_version; + bool are_all_integrations_enabled; + bool are_stable_integrations_enabled; + int compute_capability_major; + + feature_status_parameters(); + feature_status_parameters( + int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major) + : lib_major_version{major}, + lib_minor_version{minor}, + lib_patch_version{patch}, + are_all_integrations_enabled{all_enabled}, + are_stable_integrations_enabled{stable_enabled}, + compute_capability_major{cc_major} + { + } +}; + +/** + * @brief Equality operator overload. Required to use `feature_status_parameters` as a map key. + */ +inline bool operator==(feature_status_parameters const& lhs, feature_status_parameters const& rhs) +{ + return lhs.lib_major_version == rhs.lib_major_version and + lhs.lib_minor_version == rhs.lib_minor_version and + lhs.lib_patch_version == rhs.lib_patch_version and + lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and + lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and + lhs.compute_capability_major == rhs.compute_capability_major; +} + +/** + * @brief If a compression type is disabled through nvCOMP, returns the reason as a string. + * + * Result cab depend on nvCOMP version and environment variables. + * + * @param compression Compression type + * @param params Optional parameters to query status with different configurations + * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled + */ +[[nodiscard]] std::optional is_compression_disabled( + compression_type compression, feature_status_parameters params = feature_status_parameters()); + +/** + * @brief If a decompression type is disabled through nvCOMP, returns the reason as a string. + * + * Result can depend on nvCOMP version and environment variables. + * + * @param compression Compression type + * @param params Optional parameters to query status with different configurations + * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled + */ +[[nodiscard]] std::optional is_decompression_disabled( + compression_type compression, feature_status_parameters params = feature_status_parameters()); + +} // namespace io::nvcomp +} // namespace cudf diff --git a/cpp/include/cudf/utilities/logger.hpp b/cpp/include/cudf/utilities/logger.hpp index a39df064f44..3083ffd5281 100644 --- a/cpp/include/cudf/utilities/logger.hpp +++ b/cpp/include/cudf/utilities/logger.hpp @@ -16,9 +16,11 @@ #pragma once +#include + #include -namespace cudf { +namespace CUDF_EXPORT cudf { /** * @brief Returns the global logger. diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index f8920bf82c2..0e34c96debd 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -13,11 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "nvcomp_adapter.hpp" -#include "io/utilities/config_utils.hpp" #include "nvcomp_adapter.cuh" +#include +#include #include #include @@ -472,8 +474,8 @@ feature_status_parameters::feature_status_parameters() : lib_major_version{NVCOMP_MAJOR_VERSION}, lib_minor_version{NVCOMP_MINOR_VERSION}, lib_patch_version{NVCOMP_PATCH_VERSION}, - are_all_integrations_enabled{detail::nvcomp_integration::is_all_enabled()}, - are_stable_integrations_enabled{detail::nvcomp_integration::is_stable_enabled()} + are_all_integrations_enabled{nvcomp_integration::is_all_enabled()}, + are_stable_integrations_enabled{nvcomp_integration::is_stable_enabled()} { int device; CUDF_CUDA_TRY(cudaGetDevice(&device)); diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp index 1a680a050fd..9d43e1289de 100644 --- a/cpp/src/io/comp/nvcomp_adapter.hpp +++ b/cpp/src/io/comp/nvcomp_adapter.hpp @@ -15,10 +15,11 @@ */ #pragma once +#include #include "gpuinflate.hpp" -#include "io/utilities/config_utils.hpp" +#include #include #include @@ -27,70 +28,6 @@ #include namespace cudf::io::nvcomp { - -enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; - -/** - * @brief Set of parameters that impact whether the use nvCOMP features is enabled. - */ -struct feature_status_parameters { - int lib_major_version; - int lib_minor_version; - int lib_patch_version; - bool are_all_integrations_enabled; - bool are_stable_integrations_enabled; - int compute_capability_major; - - feature_status_parameters(); - feature_status_parameters( - int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major) - : lib_major_version{major}, - lib_minor_version{minor}, - lib_patch_version{patch}, - are_all_integrations_enabled{all_enabled}, - are_stable_integrations_enabled{stable_enabled}, - compute_capability_major{cc_major} - { - } -}; - -/** - * @brief Equality operator overload. Required to use `feature_status_parameters` as a map key. - */ -inline bool operator==(feature_status_parameters const& lhs, feature_status_parameters const& rhs) -{ - return lhs.lib_major_version == rhs.lib_major_version and - lhs.lib_minor_version == rhs.lib_minor_version and - lhs.lib_patch_version == rhs.lib_patch_version and - lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and - lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and - lhs.compute_capability_major == rhs.compute_capability_major; -} - -/** - * @brief If a compression type is disabled through nvCOMP, returns the reason as a string. - * - * Result cab depend on nvCOMP version and environment variables. - * - * @param compression Compression type - * @param params Optional parameters to query status with different configurations - * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled - */ -[[nodiscard]] std::optional is_compression_disabled( - compression_type compression, feature_status_parameters params = feature_status_parameters()); - -/** - * @brief If a decompression type is disabled through nvCOMP, returns the reason as a string. - * - * Result can depend on nvCOMP version and environment variables. - * - * @param compression Compression type - * @param params Optional parameters to query status with different configurations - * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled - */ -[[nodiscard]] std::optional is_decompression_disabled( - compression_type compression, feature_status_parameters params = feature_status_parameters()); - /** * @brief Device batch decompression of given type. * diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index 72eb41b1360..8e20505d3ff 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -19,13 +19,13 @@ #include "io/orc/reader_impl.hpp" #include "io/orc/reader_impl_chunking.hpp" #include "io/orc/reader_impl_helpers.hpp" -#include "io/utilities/config_utils.hpp" #include "io/utilities/hostdevice_span.hpp" #include #include #include #include +#include #include #include diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index b6fc4e3510f..805959327ac 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -16,12 +16,12 @@ #include "io/comp/nvcomp_adapter.hpp" #include "io/utilities/block_utils.cuh" -#include "io/utilities/config_utils.hpp" #include "io/utilities/time_utils.cuh" #include "orc_gpu.hpp" #include #include +#include #include #include #include diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index e9e031a407a..4cb20bb7518 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index d371ef5de93..3da303e6928 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -16,7 +16,6 @@ #include "compact_protocol_reader.hpp" #include "io/comp/nvcomp_adapter.hpp" -#include "io/utilities/config_utils.hpp" #include "io/utilities/time_utils.cuh" #include "reader_impl.hpp" #include "reader_impl_chunking.hpp" @@ -25,6 +24,7 @@ #include #include #include +#include #include @@ -862,7 +862,7 @@ std::vector compute_page_splits_by_row(device_span #include #include +#include #include #include #include diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 0e3ce779089..cafc545da71 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -16,7 +16,6 @@ #include "io/comp/nvcomp_adapter.hpp" #include "io/text/device_data_chunks.hpp" -#include "io/utilities/config_utils.hpp" #include #include @@ -24,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index 20ac89b4d53..a3afbd52896 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -14,14 +14,16 @@ * limitations under the License. */ -#include "config_utils.hpp" +#include "getenv_or.hpp" +#include #include #include +#include #include -namespace cudf::io::detail { +namespace cudf::io { namespace cufile_integration { @@ -80,4 +82,4 @@ bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_ } // namespace nvcomp_integration -} // namespace cudf::io::detail +} // namespace cudf::io diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index a6cbbcd84a6..92b2bb57a7f 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -15,9 +15,10 @@ */ #include "file_io_utilities.hpp" -#include "io/utilities/config_utils.hpp" +#include #include +#include #include #include @@ -40,7 +41,7 @@ class file_sink : public data_sink { _output_stream.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc); if (!_output_stream.is_open()) { detail::throw_on_file_open_failure(filepath, true); } - if (detail::cufile_integration::is_kvikio_enabled()) { + if (cufile_integration::is_kvikio_enabled()) { _kvikio_file = kvikio::FileHandle(filepath, "w"); CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index ca8932322bf..c8a438fc40b 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -15,9 +15,10 @@ */ #include "file_io_utilities.hpp" -#include "io/utilities/config_utils.hpp" +#include #include +#include #include #include #include @@ -44,7 +45,7 @@ class file_source : public datasource { explicit file_source(char const* filepath) : _file(filepath, O_RDONLY) { detail::force_init_cuda_context(); - if (detail::cufile_integration::is_kvikio_enabled()) { + if (cufile_integration::is_kvikio_enabled()) { _kvikio_file = kvikio::FileHandle(filepath); CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.", _kvikio_file.is_compat_mode_on() ? "on" : "off"); @@ -433,7 +434,7 @@ std::unique_ptr datasource::create(std::string const& filepath, size_t size) { #ifdef CUFILE_FOUND - if (detail::cufile_integration::is_always_enabled()) { + if (cufile_integration::is_always_enabled()) { // avoid mmap as GDS is expected to be used for most reads return std::make_unique(filepath.c_str()); } diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index a9d4f19c848..2033a8dbd41 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -4,7 +4,7 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * +* * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software @@ -15,10 +15,11 @@ */ #include "file_io_utilities.hpp" +#include "getenv_or.hpp" -#include "io/utilities/config_utils.hpp" - +#include #include +#include #include diff --git a/cpp/src/io/utilities/config_utils.hpp b/cpp/src/io/utilities/getenv_or.hpp similarity index 63% rename from cpp/src/io/utilities/config_utils.hpp rename to cpp/src/io/utilities/getenv_or.hpp index 74df1375e6f..3fd97a00b61 100644 --- a/cpp/src/io/utilities/config_utils.hpp +++ b/cpp/src/io/utilities/getenv_or.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,15 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #pragma once #include +#include #include #include -namespace cudf::io::detail { - +namespace { /** * @brief Returns the value of the environment variable, or a default value if the variable is not * present. @@ -45,37 +46,4 @@ T getenv_or(std::string_view env_var_name, T default_val) return converted_val; } -namespace cufile_integration { - -/** - * @brief Returns true if cuFile and its compatibility mode are enabled. - */ -bool is_always_enabled(); - -/** - * @brief Returns true if only direct IO through cuFile is enabled (compatibility mode is disabled). - */ -bool is_gds_enabled(); - -/** - * @brief Returns true if KvikIO is enabled. - */ -bool is_kvikio_enabled(); - -} // namespace cufile_integration - -namespace nvcomp_integration { - -/** - * @brief Returns true if all nvCOMP uses are enabled. - */ -bool is_all_enabled(); - -/** - * @brief Returns true if stable nvCOMP use is enabled. - */ -bool is_stable_enabled(); - -} // namespace nvcomp_integration - -} // namespace cudf::io::detail +} // namespace