Skip to content

Commit

Permalink
Setting a threshold for KvikIO IO (#12841)
Browse files Browse the repository at this point in the history
For small reads and writes the overhead of using cuFile and/or KvikIO becomes significant. This PR introduces the threshold already used by the `GDS` to the `KVIKIO` backend as well. 

Closes #12780

### Future work
Let's optimize KvikIO for small reads and writes so we don't need this threshold. 
Tracking here: rapidsai/kvikio#178

#

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: #12841
  • Loading branch information
madsbk authored Mar 13, 2023
1 parent 9c9dd54 commit 3584739
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 34 deletions.
8 changes: 5 additions & 3 deletions cpp/src/io/utilities/data_sink.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,8 +63,8 @@ class file_sink : public data_sink {

[[nodiscard]] bool is_device_write_preferred(size_t size) const override
{
return !_kvikio_file.closed() ||
(_cufile_out != nullptr && _cufile_out->is_cufile_io_preferred(size));
if (size < _gds_write_preferred_threshold) { return false; }
return supports_device_write();
}

std::future<void> device_write_async(void const* gpu_data,
Expand Down Expand Up @@ -96,6 +96,8 @@ class file_sink : public data_sink {
size_t _bytes_written = 0;
std::unique_ptr<detail::cufile_output_impl> _cufile_out;
kvikio::FileHandle _kvikio_file;
// The write size above which GDS is faster then d2h-copy + posix-write
static constexpr size_t _gds_write_preferred_threshold = 128 << 10; // 128KB
};

/**
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ class file_source : public datasource {

[[nodiscard]] bool is_device_read_preferred(size_t size) const override
{
return !_kvikio_file.closed() ||
(_cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size));
if (size < _gds_read_preferred_threshold) { return false; }
return supports_device_read();
}

std::future<size_t> device_read_async(size_t offset,
Expand Down Expand Up @@ -98,6 +98,8 @@ class file_source : public datasource {
private:
std::unique_ptr<detail::cufile_input_impl> _cufile_in;
kvikio::FileHandle _kvikio_file;
// The read size above which GDS is faster then posix-read + h2d-copy
static constexpr size_t _gds_read_preferred_threshold = 128 << 10; // 128KB
};

/**
Expand Down
32 changes: 3 additions & 29 deletions cpp/src/io/utilities/file_io_utilities.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,36 +49,10 @@ class file_wrapper {
[[nodiscard]] auto desc() const { return fd; }
};

/**
* @brief Base class for cuFile input/output.
*
* Contains the common API for cuFile input and output classes.
*/
class cufile_io_base {
public:
/**
* @brief Returns an estimate of whether the cuFile operation is the optimal option.
*
* @param size Read/write operation size, in bytes.
* @return Whether a cuFile operation with the given size is expected to be faster than a host
* read + H2D copy
*/
static bool is_cufile_io_preferred(size_t size) { return size > op_size_threshold; }

protected:
/**
* @brief The read/write size above which cuFile is faster then host read + copy
*
* This may not be the optimal threshold for all systems. Derived `is_cufile_io_preferred`
* implementations can use a different logic.
*/
static constexpr size_t op_size_threshold = 128 << 10;
};

/**
* @brief Interface class for cufile input.
*/
class cufile_input : public cufile_io_base {
class cufile_input {
public:
/**
* @brief Asynchronously reads into existing device memory.
Expand All @@ -101,7 +75,7 @@ class cufile_input : public cufile_io_base {
/**
* @brief Interface class for cufile output.
*/
class cufile_output : public cufile_io_base {
class cufile_output {
public:
/**
* @brief Asynchronously writes the data from a device buffer into a file.
Expand Down

0 comments on commit 3584739

Please sign in to comment.