From 3584739a301fa8ab98caa4b7a887aab26712f9d4 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Mon, 13 Mar 2023 22:58:20 +0100 Subject: [PATCH] Setting a threshold for KvikIO IO (#12841) For small reads and writes the overhead of using cuFile and/or KvikIO becomes significant. This PR introduces the threshold already used by the `GDS` to the `KVIKIO` backend as well. Closes #12780 ### Future work Let's optimize KvikIO for small reads and writes so we don't need this threshold. Tracking here: https://github.com/rapidsai/kvikio/issues/178 # Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/12841 --- cpp/src/io/utilities/data_sink.cpp | 8 ++++-- cpp/src/io/utilities/datasource.cpp | 6 ++-- cpp/src/io/utilities/file_io_utilities.hpp | 32 ++-------------------- 3 files changed, 12 insertions(+), 34 deletions(-) diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index cba45f693f9..40b70986eca 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,8 +63,8 @@ class file_sink : public data_sink { [[nodiscard]] bool is_device_write_preferred(size_t size) const override { - return !_kvikio_file.closed() || - (_cufile_out != nullptr && _cufile_out->is_cufile_io_preferred(size)); + if (size < _gds_write_preferred_threshold) { return false; } + return supports_device_write(); } std::future device_write_async(void const* gpu_data, @@ -96,6 +96,8 @@ class file_sink : public data_sink { size_t _bytes_written = 0; std::unique_ptr _cufile_out; kvikio::FileHandle _kvikio_file; + // The write size above which GDS is faster then d2h-copy + posix-write + static constexpr size_t _gds_write_preferred_threshold = 128 << 10; // 128KB }; /** diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 71d64900398..e2cea7a56ff 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -56,8 +56,8 @@ class file_source : public datasource { [[nodiscard]] bool is_device_read_preferred(size_t size) const override { - return !_kvikio_file.closed() || - (_cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size)); + if (size < _gds_read_preferred_threshold) { return false; } + return supports_device_read(); } std::future device_read_async(size_t offset, @@ -98,6 +98,8 @@ class file_source : public datasource { private: std::unique_ptr _cufile_in; kvikio::FileHandle _kvikio_file; + // The read size above which GDS is faster then posix-read + h2d-copy + static constexpr size_t _gds_read_preferred_threshold = 128 << 10; // 128KB }; /** diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index 38674892966..b55dd3b1583 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,36 +49,10 @@ class file_wrapper { [[nodiscard]] auto desc() const { return fd; } }; -/** - * @brief Base class for cuFile input/output. - * - * Contains the common API for cuFile input and output classes. - */ -class cufile_io_base { - public: - /** - * @brief Returns an estimate of whether the cuFile operation is the optimal option. - * - * @param size Read/write operation size, in bytes. - * @return Whether a cuFile operation with the given size is expected to be faster than a host - * read + H2D copy - */ - static bool is_cufile_io_preferred(size_t size) { return size > op_size_threshold; } - - protected: - /** - * @brief The read/write size above which cuFile is faster then host read + copy - * - * This may not be the optimal threshold for all systems. Derived `is_cufile_io_preferred` - * implementations can use a different logic. - */ - static constexpr size_t op_size_threshold = 128 << 10; -}; - /** * @brief Interface class for cufile input. */ -class cufile_input : public cufile_io_base { +class cufile_input { public: /** * @brief Asynchronously reads into existing device memory. @@ -101,7 +75,7 @@ class cufile_input : public cufile_io_base { /** * @brief Interface class for cufile output. */ -class cufile_output : public cufile_io_base { +class cufile_output { public: /** * @brief Asynchronously writes the data from a device buffer into a file.