rapidsai · rapids-bot · Dec 4, 2023 · Nov 25, 2023 · Nov 30, 2023 · Nov 30, 2023
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <chrono>
 #include <iostream>
 
 #include <cuda_runtime_api.h>
@@ -27,6 +28,25 @@
 
 using namespace std;
 
+class Timer {
+ public:
+  Timer() : start(std::chrono::high_resolution_clock::now()) {}
+
+  ~Timer()
+  {
+    auto end = std::chrono::high_resolution_clock::now();
+    auto start_ms =
+      std::chrono::time_point_cast<std::chrono::microseconds>(start).time_since_epoch().count();
+    auto end_ms =
+      std::chrono::time_point_cast<std::chrono::microseconds>(end).time_since_epoch().count();
+
+    cout << "(" << end_ms - start_ms << " us)" << endl;
+  }
+
+ private:
+  std::chrono::time_point<std::chrono::high_resolution_clock> start;
+};
+
 void check(bool condition)
 {
   if (!condition) {
@@ -82,6 +102,8 @@ int main()
   check(kvikio::is_host_memory(c_dev) == false);
 
   {
+    cout << endl;
+    Timer timer;
     kvikio::FileHandle f("/tmp/test-file", "w");
     check(cudaMemcpy(a_dev, a, SIZE, cudaMemcpyHostToDevice) == cudaSuccess);
     size_t written = f.pwrite(a_dev, SIZE, 0, 1).get();
@@ -90,6 +112,8 @@ int main()
     cout << "Write: " << written << endl;
   }
   {
+    std::cout << std::endl;
+    Timer timer;
     kvikio::FileHandle f("/tmp/test-file", "r");
     size_t read = f.pread(b_dev, SIZE, 0, 1).get();
     check(read == SIZE);
@@ -102,6 +126,8 @@ int main()
   }
   kvikio::defaults::thread_pool_nthreads_reset(16);
   {
+    std::cout << std::endl;
+    Timer timer;
     kvikio::FileHandle f("/tmp/test-file", "w");
     size_t written = f.pwrite(a_dev, SIZE).get();
     check(written == SIZE);
@@ -110,16 +136,20 @@ int main()
          << " threads): " << written << endl;
   }
   {
+    std::cout << std::endl;
+    Timer timer;
     kvikio::FileHandle f("/tmp/test-file", "r");
     size_t read = f.pread(b_dev, SIZE, 0).get();
-    cout << "Parallel write (" << kvikio::defaults::thread_pool_nthreads() << " threads): " << read
+    cout << "Parallel read (" << kvikio::defaults::thread_pool_nthreads() << " threads): " << read
          << endl;
     check(cudaMemcpy(b, b_dev, SIZE, cudaMemcpyDeviceToHost) == cudaSuccess);
     for (int i = 0; i < NELEM; ++i) {
       check(a[i] == b[i]);
     }
   }
   {
+    std::cout << std::endl;
+    Timer timer;
     kvikio::FileHandle f("/tmp/test-file", "r+", kvikio::FileHandle::m644);
     kvikio::buffer_register(c_dev, SIZE);
     size_t read = f.pread(c_dev, SIZE).get();
@@ -129,6 +159,8 @@ int main()
     cout << "Read buffer registered data: " << read << endl;
   }
   {
+    std::cout << std::endl;
+    Timer timer;
     kvikio::FileHandle f("/tmp/test-file", "w");
     size_t written = f.pwrite(a, SIZE).get();
     check(written == SIZE);
@@ -137,6 +169,8 @@ int main()
          << " threads): " << written << endl;
   }
   {
+    std::cout << std::endl;
+    Timer timer;
     kvikio::FileHandle f("/tmp/test-file", "r");
     size_t read = f.pread(b, SIZE).get();
     check(read == SIZE);
@@ -148,6 +182,8 @@ int main()
          << " threads): " << read << endl;
   }
   if (kvikio::is_batch_and_stream_available() && !kvikio::defaults::compat_mode()) {
+    std::cout << std::endl;
+    Timer timer;
     // Here we use the batch API to read "/tmp/test-file" into `b_dev` by
     // submitting 4 batch operations.
     constexpr int num_ops_in_batch = 4;
@@ -198,6 +234,8 @@ int main()
     cout << "The batch API isn't available, requires CUDA 12.2+" << endl;
   }
   {
+    std::cout << std::endl;
+    Timer timer;
     cout << "Performing async I/O using by-reference arguments" << endl;
     off_t f_off{0};
     off_t d_off{0};
@@ -231,6 +269,8 @@ int main()
     check(cudaFreeHost((void*)bytes_done_p) == cudaSuccess);
   }
   {
+    std::cout << std::endl;
+    Timer timer;
     cout << "Performing async I/O using by-value arguments" << endl;
 
     // Let's create a new stream and submit an async write