diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 533828bd577..31d4184993f 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -25,6 +25,7 @@ #include "timezone.cuh" #include +#include #include #include @@ -372,15 +373,13 @@ rmm::device_buffer reader::impl::decompress_stripe_data( // Dispatch batches of blocks to decompress if (num_compressed_blocks > 0) { - auto env_use_nvcomp = std::getenv("LIBCUDF_USE_NVCOMP"); - bool use_nvcomp = env_use_nvcomp != nullptr ? std::atoi(env_use_nvcomp) : 0; switch (decompressor->GetKind()) { case orc::ZLIB: CUDA_TRY( gpuinflate(inflate_in.data(), inflate_out.data(), num_compressed_blocks, 0, stream)); break; case orc::SNAPPY: - if (use_nvcomp) { + if (nvcomp_integration::is_stable_enabled()) { device_span inflate_in_view{inflate_in.data(), num_compressed_blocks}; device_span inflate_out_view{inflate_out.data(), diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index b234ea868ba..ff7b642be0e 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -1309,9 +1310,7 @@ void CompressOrcDataStreams(uint8_t* compressed_data, gpuInitCompressionBlocks<<>>( strm_desc, enc_streams, comp_in, comp_out, compressed_data, comp_blk_size, max_comp_blk_size); if (compression == SNAPPY) { - auto env_use_nvcomp = std::getenv("LIBCUDF_USE_NVCOMP"); - bool use_nvcomp = env_use_nvcomp != nullptr ? std::atoi(env_use_nvcomp) : 0; - if (use_nvcomp) { + if (detail::nvcomp_integration::is_stable_enabled()) { try { size_t temp_size; nvcompStatus_t nvcomp_status = nvcompBatchedSnappyCompressGetTempSize( diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 06a696d6751..f144a02bc89 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -22,6 +22,7 @@ #include "reader_impl.hpp" #include +#include #include #include @@ -1154,9 +1155,6 @@ rmm::device_buffer reader::impl::decompress_page_data( cudaMemcpyHostToDevice, stream.value())); - auto env_use_nvcomp = std::getenv("LIBCUDF_USE_NVCOMP"); - bool use_nvcomp = env_use_nvcomp != nullptr ? std::atoi(env_use_nvcomp) : 0; - switch (codec.compression_type) { case parquet::GZIP: CUDA_TRY(gpuinflate(inflate_in.device_ptr(start_pos), @@ -1166,7 +1164,7 @@ rmm::device_buffer reader::impl::decompress_page_data( stream)) break; case parquet::SNAPPY: - if (use_nvcomp) { + if (nvcomp_integration::is_stable_enabled()) { snappy_decompress(inflate_in_view.subspan(start_pos, argc - start_pos), inflate_out_view.subspan(start_pos, argc - start_pos), codec.max_decompressed_size, diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 5b60a81e09b..2ab5d7d696b 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -23,6 +23,7 @@ #include "writer_impl.hpp" #include +#include #include "compact_protocol_writer.hpp" #include @@ -990,11 +991,9 @@ void writer::impl::encode_pages(hostdevice_2dvector& chunks device_span comp_stat{compression_status.data(), compression_status.size()}; gpu::EncodePages(batch_pages, comp_in, comp_stat, stream); - auto env_use_nvcomp = std::getenv("LIBCUDF_USE_NVCOMP"); - bool use_nvcomp = env_use_nvcomp != nullptr ? std::atoi(env_use_nvcomp) : 0; switch (compression_) { case parquet::Compression::SNAPPY: - if (use_nvcomp) { + if (nvcomp_integration::is_stable_enabled()) { snappy_compress(comp_in, comp_stat, max_page_uncomp_data_size, stream); } else { CUDA_TRY(gpu_snap(comp_in.data(), comp_stat.data(), pages_in_batch, stream)); diff --git a/cpp/src/io/utilities/config_utils.hpp b/cpp/src/io/utilities/config_utils.hpp new file mode 100644 index 00000000000..a1d8e747e44 --- /dev/null +++ b/cpp/src/io/utilities/config_utils.hpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cudf::io::detail { + +/** + * @brief Returns the value of the environment variable, or a default value if the variable is not + * present. + */ +inline std::string getenv_or(std::string const& env_var_name, std::string_view default_val) +{ + auto const env_val = std::getenv(env_var_name.c_str()); + return std::string{(env_val == nullptr) ? default_val : env_val}; +} + +namespace nvcomp_integration { + +namespace { +/** + * @brief Defines which nvCOMP usage to enable. + */ +enum class usage_policy : uint8_t { OFF, STABLE, ALWAYS }; + +/** + * @brief Get the current usage policy. + */ +inline usage_policy get_env_policy() +{ + static auto const env_val = getenv_or("LIBCUDF_NVCOMP_POLICY", "STABLE"); + if (env_val == "OFF") return usage_policy::OFF; + if (env_val == "ALWAYS") return usage_policy::ALWAYS; + return usage_policy::STABLE; +} +} // namespace + +/** + * @brief Returns true if all nvCOMP uses are enabled. + */ +inline bool is_all_enabled() { return get_env_policy() == usage_policy::ALWAYS; } + +/** + * @brief Returns true if stable nvCOMP use is enabled. + */ +inline bool is_stable_enabled() +{ + return is_all_enabled() or get_env_policy() == usage_policy::STABLE; +} + +} // namespace nvcomp_integration +} // namespace cudf::io::detail diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 5f151a959bf..387452e171a 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -15,6 +15,7 @@ */ #include "file_io_utilities.hpp" #include +#include #include @@ -48,12 +49,6 @@ file_wrapper::file_wrapper(std::string const& filepath, int flags, mode_t mode) file_wrapper::~file_wrapper() { close(fd); } -std::string getenv_or(std::string const& env_var_name, std::string const& default_val) -{ - auto const env_val = std::getenv(env_var_name.c_str()); - return (env_val == nullptr) ? default_val : std::string(env_val); -} - #ifdef CUFILE_FOUND cufile_config::cufile_config() : policy{getenv_or("LIBCUDF_CUFILE_POLICY", default_policy)} diff --git a/docs/cudf/source/basics/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst index d4d41d02dee..29cbc2024fc 100644 --- a/docs/cudf/source/basics/io-gds-integration.rst +++ b/docs/cudf/source/basics/io-gds-integration.rst @@ -7,10 +7,13 @@ GDS also has a compatibility mode that allows the library to fall back to copyin The SDK is available for download `here `_. Use of GPUDirect Storage in cuDF is disabled by default, and can be enabled through environment variable ``LIBCUDF_CUFILE_POLICY``. -This variable also controls the GDS compatibility mode. There are two special values for the environment variable: +This variable also controls the GDS compatibility mode. -- "GDS": Use of GDS is enabled; GDS compatibility mode is *off*. -- "ALWAYS": Use of GDS is enabled; GDS compatibility mode is *on*. +There are three special values for the environment variable: + +- "GDS": Enable GDS use; GDS compatibility mode is *off*. +- "ALWAYS": Enable GDS use; GDS compatibility mode is *on*. +- "OFF": Compretely disable GDS use. Any other value (or no value set) will keep the GDS disabled for use in cuDF and IO will be done using cuDF's CPU bounce buffers. @@ -28,4 +31,4 @@ Operations that support the use of GPUDirect Storage: - `to_parquet` - `to_orc` -NOTE: current GDS integration is not fully optimized and enabling GDS will not lead to performance improvements in all cases. \ No newline at end of file +NOTE: current GDS integration is not fully optimized and enabling GDS will not lead to performance improvements in all cases. diff --git a/docs/cudf/source/basics/io-nvcomp-integration.rst b/docs/cudf/source/basics/io-nvcomp-integration.rst new file mode 100644 index 00000000000..af89ab5285f --- /dev/null +++ b/docs/cudf/source/basics/io-nvcomp-integration.rst @@ -0,0 +1,26 @@ +nvCOMP Integration +============================= + +Some types of compression/decompression can be performed using either `nvCOMP library `_ or the internal implementation. + +Which implementation is used by default depends on the data format and the compression type. Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``. + +There are three special values for the environment variable: + +- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use. +- "ALWAYS": Enable all available uses of nvCOMP, including new, experimental combinations. +- "OFF": Disable nvCOMP use whenever possible and use the internal implementations instead. + +Any other value (or no value set) will result in the same behavior as the "STABLE" option. + + +.. table:: Current policy for nvCOMP use for different types + :widths: 20 15 15 15 15 15 15 15 15 15 + + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+ + | | CSV | Parquet | JSON | ORC | AVRO | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+ + | Compression Type | Writer | Reader | Writer | Reader | Writer¹ | Reader | Writer | Reader | Reader | + +=======================+========+========+========+========+=========+========+========+========+========+ + | snappy | ❌ | ❌ | Stable | Stable | ❌ | ❌ | Stable | Stable | ❌ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+ diff --git a/docs/cudf/source/basics/io.rst b/docs/cudf/source/basics/io.rst index e88162d8f52..ee3d997d664 100644 --- a/docs/cudf/source/basics/io.rst +++ b/docs/cudf/source/basics/io.rst @@ -9,4 +9,5 @@ This page contains Input / Output related APIs in cuDF. :caption: Contents: io-supported-types.rst - io-gds-integration.rst \ No newline at end of file + io-gds-integration.rst + io-nvcomp-integration.rst \ No newline at end of file