From b85459814e84ef783389dbaabdea345de9b93513 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 25 Mar 2021 11:28:02 +1100
Subject: [PATCH] Convert cudf::concatenate APIs to use spans and
 device_uvector (#7621)

Contributes to #7287

This PR replaces `std::vector` with `host_span` in public and detail `cudf::contatenate` functions, and replaces `rmm::device_vector` with `rmm::device_uvector` in the concatenate implementations.

It also strengthens the SFINAE restrictions on `cudf::host_span` and `cudf::device_span` so that they cannot be constructed from containers unless the container's value_type is the same as the span's value_type.

This PR also
 - [x] Updates cython.
 - [x] benchmarks before and after

Authors:
  - Mark Harris (@harrism)

Approvers:
  - Jake Hemstad (@jrhemstad)
  - Vukasin Milovanovic (@vuule)
  - Ashwin Srinath (@shwina)

URL: https://github.com/rapidsai/cudf/pull/7621
---
 cpp/include/cudf/column/column_factories.hpp  |   1 -
 cpp/include/cudf/concatenate.hpp              |  18 +-
 cpp/include/cudf/detail/concatenate.cuh       |   9 +-
 cpp/include/cudf/detail/concatenate.hpp       |  11 +-
 .../cudf/dictionary/detail/concatenate.hpp    |   5 +-
 cpp/include/cudf/lists/detail/concatenate.hpp |   5 +-
 .../cudf/strings/detail/concatenate.hpp       |   5 +-
 .../cudf/structs/detail/concatenate.hpp       |   5 +-
 cpp/include/cudf/utilities/span.hpp           |  35 +++-
 cpp/src/copying/concatenate.cu                |  61 +++----
 cpp/src/dictionary/detail/concatenate.cu      |   5 +-
 cpp/src/interop/from_arrow.cpp                |   3 +-
 cpp/src/join/hash_join.cu                     |   4 +-
 cpp/src/lists/copying/concatenate.cu          |   6 +-
 cpp/src/replace/replace.cu                    |   3 +-
 cpp/src/strings/copying/concatenate.cu        |  50 +++--
 cpp/src/structs/copying/concatenate.cu        |   6 +-
 cpp/src/structs/utilities.cu                  |   3 +-
 cpp/src/structs/utilities.hpp                 |   5 +-
 cpp/tests/copying/concatenate_tests.cu        | 172 +++++++++++-------
 cpp/tests/io/orc_test.cpp                     |  15 +-
 cpp/tests/io/parquet_test.cpp                 |  31 ++--
 cpp/tests/merge/merge_test.cpp                |   2 +-
 python/cudf/cudf/_lib/cpp/concatenate.pxd     |  14 +-
 .../cudf/_lib/cpp/utilities/host_span.pxd     |   8 +
 25 files changed, 282 insertions(+), 200 deletions(-)
 create mode 100644 python/cudf/cudf/_lib/cpp/utilities/host_span.pxd
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index 31196824845..43c2407d629 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -21,7 +21,6 @@
 #include <cudf/utilities/traits.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_vector.hpp>
 
 namespace cudf {
 /**
diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp
index 8333cf41b77..182cbbdc3ec 100644
--- a/cpp/include/cudf/concatenate.hpp
+++ b/cpp/include/cudf/concatenate.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <memory>
-#include <vector>
 
 namespace cudf {
 /**
@@ -36,13 +36,13 @@ namespace cudf {
  *
  * Returns empty `device_buffer` if the column is not nullable
  *
- * @param views Vector of column views whose bitmask will to be concatenated
+ * @param views host_span of column views whose bitmask will to be concatenated
  * @param mr Device memory resource used for allocating the new device_buffer
  * @return rmm::device_buffer A `device_buffer` containing the bitmasks of all
  * the column views in the views vector
  */
 rmm::device_buffer concatenate_masks(
-  std::vector<column_view> const& views,
+  host_span<column_view const> views,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -51,14 +51,13 @@ rmm::device_buffer concatenate_masks(
  * @throws cudf::logic_error
  * If types of the input columns mismatch
  *
- * @param columns_to_concat The column views to be concatenated into a single
- * column
+ * @param columns_to_concat host_span of column views to be concatenated into a single column
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return Unique pointer to a single table having all the rows from the
  * elements of `columns_to_concat` respectively in the same order.
  */
 std::unique_ptr<column> concatenate(
-  std::vector<column_view> const& columns_to_concat,
+  host_span<column_view const> columns_to_concat,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -82,14 +81,13 @@ std::unique_ptr<column> concatenate(
  * @throws cudf::logic_error
  * If number of columns mismatch
  *
- * @param tables_to_concat The table views to be concatenated into a single
- * table
+ * @param tables_to_concat host_span of table views to be concatenated into a single table
  * @param mr Device memory resource used to allocate the returned table's device memory.
  * @return Unique pointer to a single table having all the rows from the
  * elements of `tables_to_concat` respectively in the same order.
  */
 std::unique_ptr<table> concatenate(
-  std::vector<table_view> const& tables_to_concat,
+  host_span<table_view const> tables_to_concat,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
diff --git a/cpp/include/cudf/detail/concatenate.cuh b/cpp/include/cudf/detail/concatenate.cuh
index a30ad6e853d..5f0399d6172 100644
--- a/cpp/include/cudf/detail/concatenate.cuh
+++ b/cpp/include/cudf/detail/concatenate.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <cudf/concatenate.hpp>
 #include <cudf/detail/concatenate.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -34,8 +35,8 @@ namespace detail {
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-void concatenate_masks(rmm::device_vector<column_device_view> const& d_views,
-                       rmm::device_vector<size_t> const& d_offsets,
+void concatenate_masks(device_span<column_device_view const> d_views,
+                       device_span<size_t const> d_offsets,
                        bitmask_type* dest_mask,
                        size_type output_size,
                        rmm::cuda_stream_view stream);
@@ -45,7 +46,7 @@ void concatenate_masks(rmm::device_vector<column_device_view> const& d_views,
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-void concatenate_masks(std::vector<column_view> const& views,
+void concatenate_masks(host_span<column_view const> views,
                        bitmask_type* dest_mask,
                        rmm::cuda_stream_view stream);
 
diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp
index 43eb5203b37..f7f5567cd76 100644
--- a/cpp/include/cudf/detail/concatenate.hpp
+++ b/cpp/include/cudf/detail/concatenate.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf/column/column_view.hpp>
 #include <cudf/concatenate.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -27,22 +28,22 @@ namespace cudf {
 //! Inner interfaces and implementations
 namespace detail {
 /**
- * @copydoc cudf::concatenate(std::vector<column_view> const&,rmm::mr::device_memory_resource*)
+ * @copydoc cudf::concatenate(host_span<column_view const>,rmm::mr::device_memory_resource*)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> concatenate(
-  std::vector<column_view> const& columns_to_concat,
+  host_span<column_view const> columns_to_concat,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @copydoc cudf::concatenate(std::vector<table_view> const&,rmm::mr::device_memory_resource*)
+ * @copydoc cudf::concatenate(host_span<table_view const>,rmm::mr::device_memory_resource*)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<table> concatenate(
-  std::vector<table_view> const& tables_to_concat,
+  host_span<table_view const> tables_to_concat,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp
index ae2e0f0ba38..c2fe2dce1fe 100644
--- a/cpp/include/cudf/dictionary/detail/concatenate.hpp
+++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -36,7 +37,7 @@ namespace detail {
  * @return New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(
-  std::vector<column_view> const& columns,
+  host_span<column_view const> columns,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp
index f9adc893b8e..30797443c35 100644
--- a/cpp/include/cudf/lists/detail/concatenate.hpp
+++ b/cpp/include/cudf/lists/detail/concatenate.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf/column/column.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -42,7 +43,7 @@ namespace detail {
  * @return New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(
-  std::vector<column_view> const& columns,
+  host_span<column_view const> columns,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp
index 3e6fc6d67fc..0740039e896 100644
--- a/cpp/include/cudf/strings/detail/concatenate.hpp
+++ b/cpp/include/cudf/strings/detail/concatenate.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf/column/column.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -41,7 +42,7 @@ namespace detail {
  * @return New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(
-  std::vector<column_view> const& columns,
+  host_span<column_view const> columns,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp
index ef3da82cfeb..a098703e4b0 100644
--- a/cpp/include/cudf/structs/detail/concatenate.hpp
+++ b/cpp/include/cudf/structs/detail/concatenate.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <cudf/column/column.hpp>
 #include <cudf/structs/structs_column_view.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 namespace cudf {
 namespace structs {
@@ -48,7 +49,7 @@ namespace detail {
  * @return        New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(
-  std::vector<column_view> const& columns,
+  host_span<column_view const> columns,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index c13e5ce44ae..999306d4ee7 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -126,16 +126,31 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
 
   constexpr host_span() noexcept : base() {}  // required to compile on centos
 
-  template <typename C, std::enable_if_t<is_host_span_supported_container<C>::value>* = nullptr>
+  // Constructor from container
+  template <
+    typename C,
+    // Only supported containers of types convertible to T
+    std::enable_if_t<is_host_span_supported_container<C>::value &&
+                     std::is_convertible<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
+                                           std::declval<C&>().data()))> (*)[],
+                                         T (*)[]>::value>* = nullptr>
   constexpr host_span(C& in) : base(in.data(), in.size())
   {
   }
 
-  template <typename C, std::enable_if_t<is_host_span_supported_container<C>::value>* = nullptr>
+  // Constructor from const container
+  template <
+    typename C,
+    // Only supported containers of types convertible to T
+    std::enable_if_t<is_host_span_supported_container<C>::value &&
+                     std::is_convertible<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
+                                           std::declval<C&>().data()))> (*)[],
+                                         T (*)[]>::value>* = nullptr>
   constexpr host_span(C const& in) : base(in.data(), in.size())
   {
   }
 
+  // Copy construction to support const conversion
   template <typename OtherT,
             std::size_t OtherExtent,
             typename std::enable_if<(Extent == OtherExtent || Extent == dynamic_extent) &&
@@ -175,12 +190,24 @@ struct device_span : public cudf::detail::span_base<T, Extent, device_span<T, Ex
 
   constexpr device_span() noexcept : base() {}  // required to compile on centos
 
-  template <typename C, std::enable_if_t<is_device_span_supported_container<C>::value>* = nullptr>
+  template <
+    typename C,
+    // Only supported containers of types convertible to T
+    std::enable_if_t<is_device_span_supported_container<C>::value &&
+                     std::is_convertible<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
+                                           std::declval<C&>().data()))> (*)[],
+                                         T (*)[]>::value>* = nullptr>
   constexpr device_span(C& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
   {
   }
 
-  template <typename C, std::enable_if_t<is_device_span_supported_container<C>::value>* = nullptr>
+  template <
+    typename C,
+    // Only supported containers of types convertible to T
+    std::enable_if_t<is_device_span_supported_container<C>::value &&
+                     std::is_convertible<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
+                                           std::declval<C&>().data()))> (*)[],
+                                         T (*)[]>::value>* = nullptr>
   constexpr device_span(C const& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
   {
   }
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index 8cf9db465f3..1b948083982 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -21,6 +21,7 @@
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/dictionary/detail/concatenate.hpp>
 #include <cudf/lists/detail/concatenate.hpp>
 #include <cudf/strings/detail/concatenate.hpp>
@@ -29,7 +30,6 @@
 #include <cudf/table/table_device_view.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_vector.hpp>
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/binary_search.h>
@@ -50,19 +50,18 @@ constexpr bool use_fused_kernel_heuristic(bool const has_nulls, size_t const num
   return has_nulls || num_columns > 4;
 }
 
-auto create_device_views(std::vector<column_view> const& views, rmm::cuda_stream_view stream)
+auto create_device_views(host_span<column_view const> views, rmm::cuda_stream_view stream)
 {
   // Create device views for each input view
   using CDViewPtr = decltype(
     column_device_view::create(std::declval<column_view>(), std::declval<rmm::cuda_stream_view>()));
   auto device_view_owners = std::vector<CDViewPtr>(views.size());
-  std::transform(
-    views.cbegin(), views.cend(), device_view_owners.begin(), [stream](auto const& col) {
-      // TODO creating this device view can invoke null count computation
-      // even though it isn't used. See this issue:
-      // https://github.com/rapidsai/cudf/issues/4368
-      return column_device_view::create(col, stream);
-    });
+  std::transform(views.begin(), views.end(), device_view_owners.begin(), [stream](auto const& col) {
+    // TODO creating this device view can invoke null count computation
+    // even though it isn't used. See this issue:
+    // https://github.com/rapidsai/cudf/issues/4368
+    return column_device_view::create(col, stream);
+  });
 
   // Assemble contiguous array of device views
   auto device_views = thrust::host_vector<column_device_view>();
@@ -74,7 +73,7 @@ auto create_device_views(std::vector<column_view> const& views, rmm::cuda_stream
   // TODO each of these device vector copies invoke stream synchronization
   // which appears to add unnecessary overhead. See this issue:
   // https://github.com/rapidsai/rmm/issues/120
-  auto d_views = rmm::device_vector<column_device_view>{device_views};
+  auto d_views = make_device_uvector_async(device_views);
 
   // Compute the partition offsets
   auto offsets = thrust::host_vector<size_t>(views.size() + 1);
@@ -85,7 +84,7 @@ auto create_device_views(std::vector<column_view> const& views, rmm::cuda_stream
     std::next(offsets.begin()),
     [](auto const& col) { return col.size(); },
     thrust::plus<size_t>{});
-  auto const d_offsets   = rmm::device_vector<size_t>{offsets};
+  auto d_offsets         = make_device_uvector_async(offsets);
   auto const output_size = offsets.back();
 
   return std::make_tuple(
@@ -132,8 +131,8 @@ __global__ void concatenate_masks_kernel(column_device_view const* views,
   }
 }
 
-void concatenate_masks(rmm::device_vector<column_device_view> const& d_views,
-                       rmm::device_vector<size_t> const& d_offsets,
+void concatenate_masks(device_span<column_device_view const> d_views,
+                       device_span<size_t const> d_offsets,
                        bitmask_type* dest_mask,
                        size_type output_size,
                        rmm::cuda_stream_view stream)
@@ -141,14 +140,14 @@ void concatenate_masks(rmm::device_vector<column_device_view> const& d_views,
   constexpr size_type block_size{256};
   cudf::detail::grid_1d config(output_size, block_size);
   concatenate_masks_kernel<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
-    d_views.data().get(),
-    d_offsets.data().get(),
+    d_views.data(),
+    d_offsets.data(),
     static_cast<size_type>(d_views.size()),
     dest_mask,
     output_size);
 }
 
-void concatenate_masks(std::vector<column_view> const& views,
+void concatenate_masks(host_span<column_view const> views,
                        bitmask_type* dest_mask,
                        rmm::cuda_stream_view stream)
 {
@@ -214,7 +213,7 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views,
 }
 
 template <typename T>
-std::unique_ptr<column> fused_concatenate(std::vector<column_view> const& views,
+std::unique_ptr<column> fused_concatenate(host_span<column_view const> views,
                                           bool const has_nulls,
                                           rmm::cuda_stream_view stream,
                                           rmm::mr::device_memory_resource* mr)
@@ -245,8 +244,8 @@ std::unique_ptr<column> fused_concatenate(std::vector<column_view> const& views,
   auto const kernel = has_nulls ? fused_concatenate_kernel<T, block_size, true>
                                 : fused_concatenate_kernel<T, block_size, false>;
   kernel<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
-    d_views.data().get(),
-    d_offsets.data().get(),
+    d_views.data(),
+    d_offsets.data(),
     static_cast<size_type>(d_views.size()),
     *d_out_view,
     d_valid_count.data());
@@ -257,7 +256,7 @@ std::unique_ptr<column> fused_concatenate(std::vector<column_view> const& views,
 }
 
 template <typename T>
-std::unique_ptr<column> for_each_concatenate(std::vector<column_view> const& views,
+std::unique_ptr<column> for_each_concatenate(host_span<column_view const> views,
                                              bool const has_nulls,
                                              rmm::cuda_stream_view stream,
                                              rmm::mr::device_memory_resource* mr)
@@ -289,7 +288,7 @@ std::unique_ptr<column> for_each_concatenate(std::vector<column_view> const& vie
 }
 
 struct concatenate_dispatch {
-  std::vector<column_view> const& views;
+  host_span<column_view const> views;
   rmm::cuda_stream_view stream;
   rmm::mr::device_memory_resource* mr;
 
@@ -298,7 +297,7 @@ struct concatenate_dispatch {
   std::unique_ptr<column> operator()()
   {
     bool const has_nulls =
-      std::any_of(views.cbegin(), views.cend(), [](auto const& col) { return col.has_nulls(); });
+      std::any_of(views.begin(), views.end(), [](auto const& col) { return col.has_nulls(); });
 
     // Use a heuristic to guess when the fused kernel will be faster
     if (use_fused_kernel_heuristic(has_nulls, views.size())) {
@@ -392,7 +391,7 @@ void bounds_and_type_check(ColIter begin, ColIter end)
 }  // anonymous namespace
 
 // Concatenates the elements from a vector of column_views
-std::unique_ptr<column> concatenate(std::vector<column_view> const& columns_to_concat,
+std::unique_ptr<column> concatenate(host_span<column_view const> columns_to_concat,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
@@ -411,15 +410,15 @@ std::unique_ptr<column> concatenate(std::vector<column_view> const& columns_to_c
     columns_to_concat.front().type(), concatenate_dispatch{columns_to_concat, stream, mr});
 }
 
-std::unique_ptr<table> concatenate(std::vector<table_view> const& tables_to_concat,
+std::unique_ptr<table> concatenate(host_span<table_view const> tables_to_concat,
                                    rmm::cuda_stream_view stream,
                                    rmm::mr::device_memory_resource* mr)
 {
   if (tables_to_concat.empty()) { return std::make_unique<table>(); }
 
   table_view const first_table = tables_to_concat.front();
-  CUDF_EXPECTS(std::all_of(tables_to_concat.cbegin(),
-                           tables_to_concat.cend(),
+  CUDF_EXPECTS(std::all_of(tables_to_concat.begin(),
+                           tables_to_concat.end(),
                            [&first_table](auto const& t) {
                              return t.num_columns() == first_table.num_columns();
                            }),
@@ -428,8 +427,8 @@ std::unique_ptr<table> concatenate(std::vector<table_view> const& tables_to_conc
   std::vector<std::unique_ptr<column>> concat_columns;
   for (size_type i = 0; i < first_table.num_columns(); ++i) {
     std::vector<column_view> cols;
-    std::transform(tables_to_concat.cbegin(),
-                   tables_to_concat.cend(),
+    std::transform(tables_to_concat.begin(),
+                   tables_to_concat.end(),
                    std::back_inserter(cols),
                    [i](auto const& t) { return t.column(i); });
 
@@ -442,7 +441,7 @@ std::unique_ptr<table> concatenate(std::vector<table_view> const& tables_to_conc
 
 }  // namespace detail
 
-rmm::device_buffer concatenate_masks(std::vector<column_view> const& views,
+rmm::device_buffer concatenate_masks(host_span<column_view const> views,
                                      rmm::mr::device_memory_resource* mr)
 {
   bool const has_nulls =
@@ -465,14 +464,14 @@ rmm::device_buffer concatenate_masks(std::vector<column_view> const& views,
 }
 
 // Concatenates the elements from a vector of column_views
-std::unique_ptr<column> concatenate(std::vector<column_view> const& columns_to_concat,
+std::unique_ptr<column> concatenate(host_span<column_view const> columns_to_concat,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::concatenate(columns_to_concat, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<table> concatenate(std::vector<table_view> const& tables_to_concat,
+std::unique_ptr<table> concatenate(host_span<table_view const> tables_to_concat,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu
index 05349a5f968..cdf086e3f4a 100644
--- a/cpp/src/dictionary/detail/concatenate.cu
+++ b/cpp/src/dictionary/detail/concatenate.cu
@@ -62,8 +62,7 @@ struct compute_children_offsets_fn {
    *
    * @param columns The input dictionary columns.
    */
-  compute_children_offsets_fn(std::vector<column_view> const& columns)
-    : columns_ptrs{columns.size()}
+  compute_children_offsets_fn(host_span<column_view const> columns) : columns_ptrs{columns.size()}
   {
     std::transform(
       columns.begin(), columns.end(), columns_ptrs.begin(), [](auto& cv) { return &cv; });
@@ -187,7 +186,7 @@ struct dispatch_compute_indices {
 
 }  // namespace
 
-std::unique_ptr<column> concatenate(std::vector<column_view> const& columns,
+std::unique_ptr<column> concatenate(host_span<column_view const> columns,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
diff --git a/cpp/src/interop/from_arrow.cpp b/cpp/src/interop/from_arrow.cpp
index 729b98d85a8..612e2111b66 100644
--- a/cpp/src/interop/from_arrow.cpp
+++ b/cpp/src/interop/from_arrow.cpp
@@ -150,8 +150,7 @@ struct dispatch_to_cudf_column {
 
 std::unique_ptr<column> get_empty_type_column(size_type size)
 {
-  return std::make_unique<column>(
-    data_type(type_id::EMPTY), size, std::move(rmm::device_buffer(0)));
+  return std::make_unique<column>(data_type(type_id::EMPTY), size, rmm::device_buffer(0));
 }
 
 /**
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index b64e91c18bd..d827d03a6c0 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -442,7 +442,9 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_
                                               stream,
                                               rmm::mr::get_current_device_resource());
       common_table           = cudf::detail::concatenate(
-        {common_from_build->view(), common_from_probe->view()}, stream, mr);
+        std::vector<table_view>({common_from_build->view(), common_from_probe->view()}),
+        stream,
+        mr);
     }
     joined_indices = concatenate_vector_pairs(complement_indices, joined_indices);
   } else {
diff --git a/cpp/src/lists/copying/concatenate.cu b/cpp/src/lists/copying/concatenate.cu
index c6ca56085c8..facf2827f56 100644
--- a/cpp/src/lists/copying/concatenate.cu
+++ b/cpp/src/lists/copying/concatenate.cu
@@ -48,7 +48,7 @@ namespace {
  * @param[in] mr                    Device memory resource used to allocate the
  * returned column's device memory.
  */
-std::unique_ptr<column> merge_offsets(std::vector<lists_column_view> const& columns,
+std::unique_ptr<column> merge_offsets(host_span<lists_column_view const> columns,
                                       size_type total_list_count,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
@@ -90,7 +90,7 @@ std::unique_ptr<column> merge_offsets(std::vector<lists_column_view> const& colu
  * @copydoc cudf::lists::detail::concatenate
  */
 std::unique_ptr<column> concatenate(
-  std::vector<column_view> const& columns,
+  host_span<column_view const> columns,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
@@ -119,7 +119,7 @@ std::unique_ptr<column> concatenate(
 
   // if any of the input columns have nulls, construct the output mask
   bool const has_nulls =
-    std::any_of(columns.cbegin(), columns.cend(), [](auto const& col) { return col.has_nulls(); });
+    std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); });
   rmm::device_buffer null_mask = create_null_mask(
     total_list_count, has_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED);
   if (has_nulls) {
diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu
index 783e0b4b1de..cb142c2c1e2 100644
--- a/cpp/src/replace/replace.cu
+++ b/cpp/src/replace/replace.cu
@@ -450,7 +450,8 @@ std::unique_ptr<cudf::column> replace_kernel_forwarder::operator()<cudf::diction
   auto replacements = cudf::dictionary_column_view(replacement_values);
 
   auto matched_input = [&] {
-    auto new_keys = cudf::detail::concatenate({values.keys(), replacements.keys()}, stream);
+    auto new_keys = cudf::detail::concatenate(
+      std::vector<cudf::column_view>({values.keys(), replacements.keys()}), stream);
     return cudf::dictionary::detail::add_keys(input, new_keys->view(), stream, mr);
   }();
   auto matched_view   = cudf::dictionary_column_view(matched_input->view());
diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu
index 65c6c8f2836..48358cb4a38 100644
--- a/cpp/src/strings/copying/concatenate.cu
+++ b/cpp/src/strings/copying/concatenate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <cudf/detail/concatenate.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/strings/detail/concatenate.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -27,6 +28,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
+#include "thrust/iterator/transform_iterator.h"
 
 #include <thrust/binary_search.h>
 #include <thrust/for_each.h>
@@ -65,8 +67,7 @@ struct chars_size_transform {
   }
 };
 
-auto create_strings_device_views(std::vector<column_view> const& views,
-                                 rmm::cuda_stream_view stream)
+auto create_strings_device_views(host_span<column_view const> views, rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
   // Assemble contiguous array of device views
@@ -77,33 +78,30 @@ auto create_strings_device_views(std::vector<column_view> const& views,
 
   // Compute the partition offsets and size of offset column
   // Note: Using 64-bit size_t so we can detect overflow of 32-bit size_type
-  auto input_offsets = thrust::host_vector<size_t>(views.size() + 1);
+  auto input_offsets = std::vector<size_t>(views.size() + 1);
   auto offset_it     = std::next(input_offsets.begin());
   thrust::transform(
-    thrust::host, views.cbegin(), views.cend(), offset_it, [](auto const& col) -> size_t {
+    thrust::host, views.begin(), views.end(), offset_it, [](auto const& col) -> size_t {
       return static_cast<size_t>(col.size());
     });
   thrust::inclusive_scan(thrust::host, offset_it, input_offsets.end(), offset_it);
-  auto const d_input_offsets = rmm::device_vector<size_t>{input_offsets};
-  auto const output_size     = input_offsets.back();
+  auto d_input_offsets   = cudf::detail::make_device_uvector_async(input_offsets, stream);
+  auto const output_size = input_offsets.back();
 
   // Compute the partition offsets and size of chars column
   // Note: Using 64-bit size_t so we can detect overflow of 32-bit size_type
-  // Note: Using separate transform and inclusive_scan because
-  // transform_inclusive_scan fails to compile with:
-  // error: the default constructor of "cudf::column_device_view" cannot be
-  // referenced -- it is a deleted function
-  auto d_partition_offsets = rmm::device_vector<size_t>(views.size() + 1);
-  thrust::transform(rmm::exec_policy(stream),
-                    device_views_ptr,
-                    device_views_ptr + views.size(),
-                    std::next(d_partition_offsets.begin()),
-                    chars_size_transform{});
-  thrust::inclusive_scan(rmm::exec_policy(stream),
-                         d_partition_offsets.cbegin(),
-                         d_partition_offsets.cend(),
-                         d_partition_offsets.begin());
-  auto const output_chars_size = d_partition_offsets.back();
+  auto d_partition_offsets = rmm::device_uvector<size_t>(views.size() + 1, stream);
+  size_t zero{0};
+  d_partition_offsets.set_element_async(0, zero, stream);  // zero first element
+
+  thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+                                   device_views_ptr,
+                                   device_views_ptr + views.size(),
+                                   std::next(d_partition_offsets.begin()),
+                                   chars_size_transform{},
+                                   thrust::plus<size_t>{});
+  auto const output_chars_size = d_partition_offsets.back_element(stream);
+  stream.synchronize();  // ensure copy of output_chars_size is complete before returning
 
   return std::make_tuple(std::move(device_view_owners),
                          device_views_ptr,
@@ -205,7 +203,7 @@ __global__ void fused_concatenate_string_chars_kernel(column_device_view const*
   }
 }
 
-std::unique_ptr<column> concatenate(std::vector<column_view> const& columns,
+std::unique_ptr<column> concatenate(host_span<column_view const> columns,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
@@ -257,8 +255,8 @@ std::unique_ptr<column> concatenate(std::vector<column_view> const& columns,
                                   : fused_concatenate_string_offset_kernel<block_size, false>;
     kernel<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
       d_views,
-      d_input_offsets.data().get(),
-      d_partition_offsets.data().get(),
+      d_input_offsets.data(),
+      d_partition_offsets.data(),
       static_cast<size_type>(columns.size()),
       strings_count,
       d_new_offsets,
@@ -277,7 +275,7 @@ std::unique_ptr<column> concatenate(std::vector<column_view> const& columns,
       auto const kernel = fused_concatenate_string_chars_kernel;
       kernel<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
         d_views,
-        d_partition_offsets.data().get(),
+        d_partition_offsets.data(),
         static_cast<size_type>(columns.size()),
         total_bytes,
         d_new_chars);
diff --git a/cpp/src/structs/copying/concatenate.cu b/cpp/src/structs/copying/concatenate.cu
index b2f861c7c8d..6f18c4bcbd4 100644
--- a/cpp/src/structs/copying/concatenate.cu
+++ b/cpp/src/structs/copying/concatenate.cu
@@ -36,7 +36,7 @@ namespace detail {
 /**
  * @copydoc cudf::structs::detail::concatenate
  */
-std::unique_ptr<column> concatenate(std::vector<column_view> const& columns,
+std::unique_ptr<column> concatenate(host_span<column_view const> columns,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
@@ -49,7 +49,7 @@ std::unique_ptr<column> concatenate(std::vector<column_view> const& columns,
   std::transform(ordered_children.begin(),
                  ordered_children.end(),
                  std::back_inserter(children),
-                 [mr, stream](std::vector<column_view> const& cols) {
+                 [mr, stream](host_span<column_view const> cols) {
                    return cudf::detail::concatenate(cols, stream, mr);
                  });
 
@@ -57,7 +57,7 @@ std::unique_ptr<column> concatenate(std::vector<column_view> const& columns,
 
   // if any of the input columns have nulls, construct the output mask
   bool const has_nulls =
-    std::any_of(columns.cbegin(), columns.cend(), [](auto const& col) { return col.has_nulls(); });
+    std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); });
   rmm::device_buffer null_mask =
     create_null_mask(total_length, has_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED);
   if (has_nulls) {
diff --git a/cpp/src/structs/utilities.cu b/cpp/src/structs/utilities.cu
index 09e6c5d949d..274a88d3a05 100644
--- a/cpp/src/structs/utilities.cu
+++ b/cpp/src/structs/utilities.cu
@@ -18,6 +18,7 @@
 
 #include <cudf/structs/structs_column_view.hpp>
 #include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
 
 namespace cudf {
 namespace structs {
@@ -27,7 +28,7 @@ namespace detail {
  * @copydoc cudf::structs::detail::extract_ordered_struct_children
  */
 std::vector<std::vector<column_view>> extract_ordered_struct_children(
-  std::vector<column_view> const& struct_cols)
+  host_span<column_view const> struct_cols)
 {
   auto const num_children = struct_cols[0].num_children();
   auto const num_cols     = static_cast<size_type>(struct_cols.size());
diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp
index 1e0511cfd83..613754fc765 100644
--- a/cpp/src/structs/utilities.hpp
+++ b/cpp/src/structs/utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <cudf/structs/structs_column_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 namespace cudf {
 namespace structs {
@@ -45,7 +46,7 @@ namespace detail {
  * @return New column with concatenated results.
  */
 std::vector<std::vector<column_view>> extract_ordered_struct_children(
-  std::vector<column_view> const& struct_cols);
+  host_span<column_view const> struct_cols);
 
 }  // namespace detail
 }  // namespace structs
diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu
index e63cbac1e72..cea53326895 100644
--- a/cpp/tests/copying/concatenate_tests.cu
+++ b/cpp/tests/copying/concatenate_tests.cu
@@ -99,7 +99,7 @@ TYPED_TEST(TypedColumnTest, ConcatenateNoColumns)
 
 TYPED_TEST(TypedColumnTest, ConcatenateColumnView)
 {
-  cudf::column original{this->type(), this->num_elements(), this->data, this->mask};
+  column original{this->type(), this->num_elements(), this->data, this->mask};
   std::vector<cudf::size_type> indices{0,
                                        this->num_elements() / 3,
                                        this->num_elements() / 3,
@@ -223,7 +223,7 @@ TEST_F(TableTest, ConcatenateTables)
   cols_table2.push_back(col3_table2.release());
   Table t2(std::move(cols_table2));
 
-  auto concat_table = cudf::concatenate({t1.view(), t2.view()});
+  auto concat_table = cudf::concatenate(std::vector<TView>({t1, t2}));
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*concat_table, gold_table);
 }
@@ -341,7 +341,8 @@ TEST_F(TableTest, SizeOverflowTest)
     auto many_chars = cudf::make_fixed_width_column(cudf::data_type{cudf::type_id::INT8}, size);
 
     cudf::table_view tbl({*many_chars});
-    EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(std::vector<TView>({tbl, tbl, tbl, tbl, tbl, tbl})),
+                 cudf::logic_error);
   }
 
   // string column, overflow on chars
@@ -356,7 +357,8 @@ TEST_F(TableTest, SizeOverflowTest)
       1, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{0});
 
     cudf::table_view tbl({*col});
-    EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(std::vector<TView>({tbl, tbl, tbl, tbl, tbl, tbl})),
+                 cudf::logic_error);
   }
 
   // string column, overflow on offsets (rows)
@@ -372,7 +374,8 @@ TEST_F(TableTest, SizeOverflowTest)
       size, std::move(many_offsets), chars.release(), 0, rmm::device_buffer{0});
 
     cudf::table_view tbl({*col});
-    EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error);
+    EXPECT_THROW(cudf::concatenate(std::vector<TView>({tbl, tbl, tbl, tbl, tbl, tbl})),
+                 cudf::logic_error);
   }
 
   // list<struct>, structs too long
@@ -395,8 +398,8 @@ TEST_F(TableTest, SizeOverflowTest)
       1, offsets.release(), std::move(struct_col), 0, rmm::device_buffer{0});
 
     cudf::table_view tbl({*col});
-    EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}),
-                 cudf::logic_error);
+    auto tables = std::vector<TView>({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl});
+    EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error);
   }
 
   // struct<int, list>, list child too long
@@ -419,8 +422,8 @@ TEST_F(TableTest, SizeOverflowTest)
     auto col = cudf::make_structs_column(size, std::move(children), 0, rmm::device_buffer{0});
 
     cudf::table_view tbl({*col});
-    EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}),
-                 cudf::logic_error);
+    auto tables = std::vector<TView>({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl});
+    EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error);
   }
 }
 
@@ -463,12 +466,14 @@ TEST_F(StructsColumnTest, ConcatenateStructs)
 
   // build expected output
   std::vector<std::unique_ptr<column>> expected_children;
-  expected_children.push_back(
-    cudf::concatenate({name_cols[0], name_cols[1], name_cols[2], name_cols[3]}));
-  expected_children.push_back(
-    cudf::concatenate({age_cols[0], age_cols[1], age_cols[2], age_cols[3]}));
-  expected_children.push_back(
-    cudf::concatenate({is_human_cols[0], is_human_cols[1], is_human_cols[2], is_human_cols[3]}));
+  auto name_col_vec =
+    std::vector<column_view>({name_cols[0], name_cols[1], name_cols[2], name_cols[3]});
+  auto age_col_vec = std::vector<column_view>({age_cols[0], age_cols[1], age_cols[2], age_cols[3]});
+  auto is_human_col_vec = std::vector<column_view>(
+    {is_human_cols[0], is_human_cols[1], is_human_cols[2], is_human_cols[3]});
+  expected_children.push_back(cudf::concatenate(name_col_vec));
+  expected_children.push_back(cudf::concatenate(age_col_vec));
+  expected_children.push_back(cudf::concatenate(is_human_col_vec));
   std::vector<bool> struct_validity({1, 0, 1, 1, 1, 0});
   auto expected = make_structs_column(
     6,
@@ -484,7 +489,7 @@ TEST_F(StructsColumnTest, ConcatenateStructs)
   src.push_back(structs_column_wrapper({name_cols[3], age_cols[3], is_human_cols[3]}, {1, 0}));
 
   // concatenate
-  auto result = cudf::concatenate({src[0], src[1], src[2], src[3]});
+  auto result = cudf::concatenate(std::vector<column_view>({src[0], src[1], src[2], src[3]}));
   cudf::test::expect_columns_equivalent(*result, *expected);
 }
 
@@ -536,9 +541,13 @@ TEST_F(StructsColumnTest, ConcatenateSplitStructs)
 
   // build expected output
   std::vector<std::unique_ptr<column>> expected_children;
-  expected_children.push_back(cudf::concatenate({split_names_cols[0], split_names_cols[1]}));
-  expected_children.push_back(cudf::concatenate({split_ages_cols[0], split_ages_cols[1]}));
-  expected_children.push_back(cudf::concatenate({split_is_human_cols[0], split_is_human_cols[1]}));
+  auto expected_names = std::vector<column_view>({split_names_cols[0], split_names_cols[1]});
+  auto expected_ages  = std::vector<column_view>({split_ages_cols[0], split_ages_cols[1]});
+  auto expected_is_human =
+    std::vector<column_view>({split_is_human_cols[0], split_is_human_cols[1]});
+  expected_children.push_back(cudf::concatenate(expected_names));
+  expected_children.push_back(cudf::concatenate(expected_ages));
+  expected_children.push_back(cudf::concatenate(expected_is_human));
   auto expected = make_structs_column(7, std::move(expected_children), 0, rmm::device_buffer{});
 
   // concatenate as structs
@@ -552,7 +561,8 @@ TEST_F(StructsColumnTest, ConcatenateSplitStructs)
   }
 
   // concatenate
-  auto result = cudf::concatenate({src[0], src[1]});
+
+  auto result = cudf::concatenate(std::vector<column_view>({src[0], src[1]}));
   cudf::test::expect_columns_equivalent(*result, *expected);
 }
 
@@ -607,8 +617,11 @@ TEST_F(StructsColumnTest, ConcatenateStructsNested)
 
   // build expected output
   std::vector<std::unique_ptr<column>> expected_children;
-  expected_children.push_back(cudf::concatenate({inner_structs[0], inner_structs[1]}));
-  expected_children.push_back(cudf::concatenate({inner_lists[0], inner_lists[1]}));
+
+  expected_children.push_back(
+    cudf::concatenate(std::vector<column_view>({inner_structs[0], inner_structs[1]})));
+  expected_children.push_back(
+    cudf::concatenate(std::vector<column_view>({inner_lists[0], inner_lists[1]})));
   auto expected = make_structs_column(11, std::move(expected_children), 0, rmm::device_buffer{});
 
   // concatenate as structs
@@ -621,7 +634,7 @@ TEST_F(StructsColumnTest, ConcatenateStructsNested)
   }
 
   // concatenate
-  auto result = cudf::concatenate({src[0], src[1]});
+  auto result = cudf::concatenate(std::vector<column_view>({src[0], src[1]}));
   cudf::test::expect_columns_equivalent(*result, *expected);
 }
 
@@ -635,7 +648,7 @@ TEST_F(ListsColumnTest, ConcatenateLists)
     cudf::test::lists_column_wrapper<int> b{4, 5, 6, 7, 8, 9, 10};
     cudf::test::lists_column_wrapper<int> expected{{0, 1, 2, 3}, {4, 5, 6, 7, 8, 9, 10}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -646,7 +659,7 @@ TEST_F(ListsColumnTest, ConcatenateLists)
     cudf::test::lists_column_wrapper<int> expected{
       {0, 1, 1}, {2, 3}, {4, 5}, {6}, {8, 9, 9, 9}, {10, 11}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -657,7 +670,7 @@ TEST_F(ListsColumnTest, ConcatenateLists)
     cudf::test::lists_column_wrapper<int> expected{
       {0, 1}, {2, 3, 4, 5}, {6, 7, 8}, {9}, {10, 11}, {12, 13, 14, 15}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -674,7 +687,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists)
     cudf::test::lists_column_wrapper<int> b{4, 5, 6, 7};
     cudf::test::lists_column_wrapper<int> expected{4, 5, 6, 7};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -684,7 +697,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists)
     cudf::test::lists_column_wrapper<int> d{4, 5, 6, 7};
     cudf::test::lists_column_wrapper<int> expected{4, 5, 6, 7};
 
-    auto result = cudf::concatenate({a, b, c, d});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b, c, d}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -694,7 +707,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists)
     cudf::test::lists_column_wrapper<int> b{4, 5, 6, 7};
     cudf::test::lists_column_wrapper<int> expected{LCW{}, {4, 5, 6, 7}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -704,7 +717,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists)
     cudf::test::lists_column_wrapper<int> d{4, 5, 6, 7};
     cudf::test::lists_column_wrapper<int> expected{LCW{}, LCW{}, LCW{}, {4, 5, 6, 7}};
 
-    auto result = cudf::concatenate({a, b, c, d});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b, c, d}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -715,7 +728,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists)
     cudf::test::lists_column_wrapper<int> d{4, 5, 6, 7};
     cudf::test::lists_column_wrapper<int> expected{{1, 2}, LCW{}, LCW{}, {4, 5, 6, 7}};
 
-    auto result = cudf::concatenate({a, b, c, d});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b, c, d}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -732,7 +745,7 @@ TEST_F(ListsColumnTest, ConcatenateListsWithNulls)
     cudf::test::lists_column_wrapper<int> b{{{4, 6, 7}, valids}};
     cudf::test::lists_column_wrapper<int> expected{{{0, 1, 2, 3}, valids}, {{4, 6, 7}, valids}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -746,7 +759,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedLists)
     cudf::test::lists_column_wrapper<int> expected{
       {{0, 1}, {2}}, {{4, 5, 6, 7, 8, 9, 10}}, {{6, 7}}, {{8, 9, 10}, {11, 12}}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -770,7 +783,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedLists)
       {{{31, 32}, {33, 34}}, {{35, 36}, {37, 38}}, {{39, 40}}},
       {{{71, 72}, {74}}, {{75, 76, 77, 78}, {77, 78}}, {{79, 80, 81}}}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -789,7 +802,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedEmptyLists)
     cudf::test::lists_column_wrapper<int> expected{
       {{LCW{}}}, {{0, 1}, {2, 3}}, {{6, 7}}, {LCW{}, {11, 12}}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -815,7 +828,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedEmptyLists)
       {{{31, 32}, {33, 34}}, {{35, 36}, {37, 38}, {1, 2}}, {{39, 40}}},
       {{{LCW{}}}}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -834,7 +847,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedListsWithNulls)
     cudf::test::lists_column_wrapper<int> expected{{{{0, 1}, {2, 3}}, valids},
                                                    {{{4}, {6, 7}}, valids}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -848,7 +861,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedListsWithNulls)
                                                    {{6, 7}},
                                                    {{{{8, 9, 10}, valids}, {11, 12}}, valids}};
 
-    auto result = cudf::concatenate({a, b});
+    auto result = cudf::concatenate(std::vector<column_view>({a, b}));
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
@@ -864,7 +877,8 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies)
     cudf::test::lists_column_wrapper<int> a{{{{LCW{}}}}};
     cudf::test::lists_column_wrapper<int> b{{{LCW{}}}};
     cudf::test::lists_column_wrapper<int> c{{LCW{}}};
-    EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error);
+
+    EXPECT_THROW(cudf::concatenate(std::vector<column_view>({a, b, c})), cudf::logic_error);
   }
 
   {
@@ -872,20 +886,23 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies)
     cudf::test::lists_column_wrapper<int> a{{{{{LCW{}}}}, valids.begin()}};
     cudf::test::lists_column_wrapper<int> b{{{LCW{}}}};
     cudf::test::lists_column_wrapper<int> c{{LCW{}}};
-    EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error);
+
+    EXPECT_THROW(cudf::concatenate(std::vector<column_view>({a, b, c})), cudf::logic_error);
   }
 
   {
     cudf::test::lists_column_wrapper<int> a{{{{LCW{}}}}};
     cudf::test::lists_column_wrapper<int> b{1, 2, 3};
     cudf::test::lists_column_wrapper<int> c{{3, 4, 5}};
-    EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error);
+
+    EXPECT_THROW(cudf::concatenate(std::vector<column_view>({a, b, c})), cudf::logic_error);
   }
 
   {
     cudf::test::lists_column_wrapper<int> a{{{1, 2, 3}}};
     cudf::test::lists_column_wrapper<int> b{{4, 5}};
-    EXPECT_THROW(cudf::concatenate({a, b}), cudf::logic_error);
+
+    EXPECT_THROW(cudf::concatenate(std::vector<column_view>({a, b})), cudf::logic_error);
   }
 }
 
@@ -910,14 +927,16 @@ TEST_F(ListsColumnTest, SlicedColumns)
                                                     {{4, 4, 4}, {5, 5}, {6, 6}},
                                                     {{-1, -1, -1, -1}, {-2}},
                                                     {{-3, -3, -3, -3}, {-4}}};
-    auto result0 = cudf::concatenate({split_a[0], split_b[0]});
+
+    auto result0 = cudf::concatenate(std::vector<column_view>({split_a[0], split_b[0]}));
     cudf::test::expect_columns_equivalent(*result0, expected0);
 
     cudf::test::lists_column_wrapper<int> expected1{{{1, 1, 1}, {2, 2}, {3, 3}},
                                                     {{4, 4, 4}, {5, 5}, {6, 6}},
                                                     {{-5, -5, -5, -5}, {-6}},
                                                     {{-7, -7, -7, -7}, {-8}}};
-    auto result1 = cudf::concatenate({split_a[0], split_b[1]});
+
+    auto result1 = cudf::concatenate(std::vector<column_view>({split_a[0], split_b[1]}));
     cudf::test::expect_columns_equivalent(*result1, expected1);
 
     cudf::test::lists_column_wrapper<int> expected2{
@@ -926,14 +945,16 @@ TEST_F(ListsColumnTest, SlicedColumns)
       {{-1, -1, -1, -1}, {-2}},
       {{-3, -3, -3, -3}, {-4}},
     };
-    auto result2 = cudf::concatenate({split_a[1], split_b[0]});
+
+    auto result2 = cudf::concatenate(std::vector<column_view>({split_a[1], split_b[0]}));
     cudf::test::expect_columns_equivalent(*result2, expected2);
 
     cudf::test::lists_column_wrapper<int> expected3{{{7, 7, 7}, {8, 8}, {9, 9}},
                                                     {{10, 10, 10}, {11, 11}, {12, 12}},
                                                     {{-5, -5, -5, -5}, {-6}},
                                                     {{-7, -7, -7, -7}, {-8}}};
-    auto result3 = cudf::concatenate({split_a[1], split_b[1]});
+
+    auto result3 = cudf::concatenate(std::vector<column_view>({split_a[1], split_b[1]}));
     cudf::test::expect_columns_equivalent(*result3, expected3);
   }
 
@@ -958,7 +979,9 @@ TEST_F(ListsColumnTest, SlicedColumns)
       {LCW{}, {LCW{}}, {{6, 6}, {2}}},
       {{LCW{}}},
       {LCW{}, {LCW{}}}};
-    auto result0 = cudf::concatenate({split_a[0], split_b[0]});
+
+    auto result0 = cudf::concatenate(std::vector<column_view>({split_a[0], split_b[0]}));
+
     cudf::test::expect_columns_equivalent(*result0, expected0);
 
     cudf::test::lists_column_wrapper<int> expected1{
@@ -967,7 +990,8 @@ TEST_F(ListsColumnTest, SlicedColumns)
       {{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}},
       {{LCW{}}},
     };
-    auto result1 = cudf::concatenate({split_a[0], split_b[1]});
+
+    auto result1 = cudf::concatenate(std::vector<column_view>({split_a[0], split_b[1]}));
     cudf::test::expect_columns_equivalent(*result1, expected1);
 
     cudf::test::lists_column_wrapper<int> expected2{
@@ -975,7 +999,8 @@ TEST_F(ListsColumnTest, SlicedColumns)
       {LCW{}, LCW{}, {{10, 10, 10}, {11, 11}, {12, 12}}, LCW{}},
       {{LCW{}}},
       {LCW{}, {LCW{}}}};
-    auto result2 = cudf::concatenate({split_a[1], split_b[0]});
+
+    auto result2 = cudf::concatenate(std::vector<column_view>({split_a[1], split_b[0]}));
     cudf::test::expect_columns_equivalent(*result2, expected2);
 
     cudf::test::lists_column_wrapper<int> expected3{
@@ -984,7 +1009,8 @@ TEST_F(ListsColumnTest, SlicedColumns)
       {{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}},
       {{LCW{}}},
     };
-    auto result3 = cudf::concatenate({split_a[1], split_b[1]});
+
+    auto result3 = cudf::concatenate(std::vector<column_view>({split_a[1], split_b[1]}));
     cudf::test::expect_columns_equivalent(*result3, expected3);
   }
 }
@@ -1015,14 +1041,16 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls)
                                                     {{{{-1, -1, -1, -1}, valids}, {-2}}, valids},
                                                     {{{{-3, -3, -3, -3}, valids}, {-4}}, valids},
                                                     {{{{-5, -5, -5, -5}, valids}, {-6}}, valids}};
-    auto result0 = cudf::concatenate({split_a[0], split_b[0]});
+
+    auto result0 = cudf::concatenate(std::vector<column_view>({split_a[0], split_b[0]}));
     cudf::test::expect_columns_equivalent(*result0, expected0);
 
     cudf::test::lists_column_wrapper<int> expected1{{{{1, 1, 1}, valids}, {2, 2}, {{3, 3}, valids}},
                                                     {{{4, 4, 4}, {{5, 5}, valids}, {6, 6}}, valids},
                                                     {{7, 7, 7}, {8, 8}, {9, 9}},
                                                     {{{{-7, -7, -7, -7}, valids}, {-8}}, valids}};
-    auto result1 = cudf::concatenate({split_a[0], split_b[1]});
+
+    auto result1 = cudf::concatenate(std::vector<column_view>({split_a[0], split_b[1]}));
     cudf::test::expect_columns_equivalent(*result1, expected1);
 
     cudf::test::lists_column_wrapper<int> expected2{
@@ -1030,13 +1058,15 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls)
       {{{{-1, -1, -1, -1}, valids}, {-2}}, valids},
       {{{{-3, -3, -3, -3}, valids}, {-4}}, valids},
       {{{{-5, -5, -5, -5}, valids}, {-6}}, valids}};
-    auto result2 = cudf::concatenate({split_a[1], split_b[0]});
+
+    auto result2 = cudf::concatenate(std::vector<column_view>({split_a[1], split_b[0]}));
     cudf::test::expect_columns_equivalent(*result2, expected2);
 
     cudf::test::lists_column_wrapper<int> expected3{
       {{{10, 10, 10}, {11, 11}, {{12, 12}, valids}}, valids},
       {{{{-7, -7, -7, -7}, valids}, {-8}}, valids}};
-    auto result3 = cudf::concatenate({split_a[1], split_b[1]});
+
+    auto result3 = cudf::concatenate(std::vector<column_view>({split_a[1], split_b[1]}));
     cudf::test::expect_columns_equivalent(*result3, expected3);
   }
 
@@ -1068,7 +1098,8 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls)
       {{LCW{}, {{LCW{}}, valids}}, valids},
       {{{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, valids},
     };
-    auto result0 = cudf::concatenate({split_a[0], split_b[0]});
+
+    auto result0 = cudf::concatenate(std::vector<column_view>({split_a[0], split_b[0]}));
     cudf::test::expect_columns_equivalent(*result0, expected0);
 
     cudf::test::lists_column_wrapper<int> expected1{
@@ -1079,7 +1110,8 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls)
       {{{LCW{}, LCW{}}, valids}},
       {{LCW{}}},
     };
-    auto result1 = cudf::concatenate({split_a[0], split_b[1]});
+
+    auto result1 = cudf::concatenate(std::vector<column_view>({split_a[0], split_b[1]}));
     cudf::test::expect_columns_equivalent(*result1, expected1);
 
     cudf::test::lists_column_wrapper<int> expected2{
@@ -1088,14 +1120,16 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls)
       {{LCW{}, {{LCW{}}, valids}}, valids},
       {{{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, valids},
     };
-    auto result2 = cudf::concatenate({split_a[1], split_b[0]});
+
+    auto result2 = cudf::concatenate(std::vector<column_view>({split_a[1], split_b[0]}));
     cudf::test::expect_columns_equivalent(*result2, expected2);
 
     cudf::test::lists_column_wrapper<int> expected3{
       {LCW{}, LCW{}, {{{10, 10, 10}, {{11, 11}, valids}, {12, 12}}, valids}, LCW{}},
       {{LCW{}}},
     };
-    auto result3 = cudf::concatenate({split_a[1], split_b[1]});
+
+    auto result3 = cudf::concatenate(std::vector<column_view>({split_a[1], split_b[1]}));
     cudf::test::expect_columns_equivalent(*result3, expected3);
   }
 }
@@ -1140,11 +1174,12 @@ TEST_F(ListsColumnTest, ListOfStructs)
   }
 
   // build expected output
-  auto expected_child =
-    cudf::concatenate({inner_structs[0], inner_structs[1], inner_structs[2], inner_structs[3]});
+  auto struct_views = std::vector<column_view>(
+    {inner_structs[0], inner_structs[1], inner_structs[2], inner_structs[3]});
+  auto expected_child = cudf::concatenate(struct_views);
   fixed_width_column_wrapper<int> offsets_w{0, 1, 1, 1, 1, 4, 6, 6, 6, 10, 11};
-  auto expected = make_lists_column(
-    10, std::move(offsets_w.release()), std::move(expected_child), 0, rmm::device_buffer{});
+  auto expected =
+    make_lists_column(10, offsets_w.release(), std::move(expected_child), 0, rmm::device_buffer{});
 
   // lists
   std::vector<fixed_width_column_wrapper<int>> offsets;
@@ -1154,7 +1189,7 @@ TEST_F(ListsColumnTest, ListOfStructs)
   offsets.push_back({0, 0, 4, 5});
 
   // concatenate as lists
-  std::vector<std::unique_ptr<cudf::column>> src;
+  std::vector<std::unique_ptr<column>> src;
   for (size_t idx = 0; idx < inner_structs.size(); idx++) {
     int size = static_cast<column_view>(offsets[idx]).size() - 1;
     src.push_back(make_lists_column(
@@ -1162,7 +1197,7 @@ TEST_F(ListsColumnTest, ListOfStructs)
   }
 
   // concatenate
-  auto result = cudf::concatenate({*src[0], *src[1], *src[2], *src[3]});
+  auto result = cudf::concatenate(std::vector<column_view>({*src[0], *src[1], *src[2], *src[3]}));
   cudf::test::expect_columns_equivalent(*result, *expected);
 }
 
@@ -1189,8 +1224,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointConcatentate)
   auto const b = fw_wrapper(vec.begin() + 300, vec.begin() + 700);
   auto const c = fw_wrapper(vec.begin() + 700, vec.end());
 
-  auto const columns  = std::vector<cudf::column_view>{a, b, c};
-  auto const results  = cudf::concatenate(columns);
+  auto const results  = cudf::concatenate(std::vector<cudf::column_view>{a, b, c});
   auto const expected = fw_wrapper(vec.begin(), vec.end());
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
@@ -1208,8 +1242,7 @@ TEST_F(FixedPointTest, FixedPointConcatentate)
   auto const b = fp_wrapper(vec.begin() + 300, vec.begin() + 700, scale_type{-2});
   auto const c = fp_wrapper(vec.begin() + 700, vec.end(), /*****/ scale_type{-2});
 
-  auto const columns  = std::vector<cudf::column_view>{a, b, c};
-  auto const results  = cudf::concatenate(columns);
+  auto const results  = cudf::concatenate(std::vector<cudf::column_view>{a, b, c});
   auto const expected = fp_wrapper(vec.begin(), vec.end(), scale_type{-2});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
@@ -1227,8 +1260,7 @@ TEST_F(FixedPointTest, FixedPointScaleMismatch)
   auto const b = fp_wrapper(vec.begin() + 300, vec.begin() + 700, scale_type{-2});
   auto const c = fp_wrapper(vec.begin() + 700, vec.end(), /*****/ scale_type{-3});
 
-  auto const columns = std::vector<cudf::column_view>{a, b, c};
-  EXPECT_THROW(cudf::concatenate(columns), cudf::logic_error);
+  EXPECT_THROW(cudf::concatenate(std::vector<cudf::column_view>{a, b, c}), cudf::logic_error);
 }
 
 struct DictionaryConcatTest : public cudf::test::BaseFixture {
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index b0dc01ea001..108befa80a7 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -29,6 +29,7 @@
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <type_traits>
 
@@ -395,7 +396,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls)
   auto col3_mask =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); });
   auto col4_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 || i <= 60); });
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); });
   auto col5_mask =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); });
 
@@ -657,7 +658,7 @@ TEST_F(OrcChunkedWriterTest, SimpleTable)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
   auto table2 = create_random_fixed_table<int>(5, 5, true);
 
-  auto full_table = cudf::concatenate({*table1, *table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedSimple.orc");
   cudf_io::chunked_orc_writer_options opts =
@@ -677,7 +678,7 @@ TEST_F(OrcChunkedWriterTest, LargeTables)
   auto table1 = create_random_fixed_table<int>(512, 4096, true);
   auto table2 = create_random_fixed_table<int>(512, 8192, true);
 
-  auto full_table = cudf::concatenate({*table1, *table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedLarge.orc");
   cudf_io::chunked_orc_writer_options opts =
@@ -737,7 +738,7 @@ TEST_F(OrcChunkedWriterTest, Strings)
   cols.push_back(strings2.release());
   cudf::table tbl2(std::move(cols));
 
-  auto expected = cudf::concatenate({tbl1, tbl2});
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedStrings.orc");
   cudf_io::chunked_orc_writer_options opts =
@@ -799,7 +800,7 @@ TEST_F(OrcChunkedWriterTest, ReadStripes)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
   auto table2 = create_random_fixed_table<int>(5, 5, true);
 
-  auto full_table = cudf::concatenate({*table2, *table1, *table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table2, *table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedStripes.orc");
   cudf_io::chunked_orc_writer_options opts =
@@ -863,7 +864,7 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize)
   cols.push_back(c2b_w.release());
   cudf::table tbl2(std::move(cols));
 
-  auto expected = cudf::concatenate({tbl1, tbl2});
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.orc");
   cudf_io::chunked_orc_writer_options opts =
@@ -910,7 +911,7 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2)
   cols.push_back(c2b_w.release());
   cudf::table tbl2(std::move(cols));
 
-  auto expected = cudf::concatenate({tbl1, tbl2});
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.orc");
   cudf_io::chunked_orc_writer_options opts =
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 013457d8ed6..880f11aaeb2 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -23,6 +23,7 @@
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -472,7 +473,7 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls)
   auto col3_mask =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); });
   auto col4_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 || i <= 60); });
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); });
   auto col5_mask =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); });
   auto col6_mask =
@@ -1218,7 +1219,7 @@ TEST_F(ParquetChunkedWriterTest, SimpleTable)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
   auto table2 = create_random_fixed_table<int>(5, 5, true);
 
-  auto full_table = cudf::concatenate({*table1, *table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet");
   cudf_io::chunked_parquet_writer_options args =
@@ -1238,7 +1239,7 @@ TEST_F(ParquetChunkedWriterTest, LargeTables)
   auto table1 = create_random_fixed_table<int>(512, 4096, true);
   auto table2 = create_random_fixed_table<int>(512, 8192, true);
 
-  auto full_table = cudf::concatenate({*table1, *table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet");
   cudf_io::chunked_parquet_writer_options args =
@@ -1300,7 +1301,7 @@ TEST_F(ParquetChunkedWriterTest, Strings)
   cols.push_back(strings2.release());
   cudf::table tbl2(std::move(cols));
 
-  auto expected = cudf::concatenate({tbl1, tbl2});
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet");
   cudf_io::chunked_parquet_writer_options args =
@@ -1359,7 +1360,7 @@ TEST_F(ParquetChunkedWriterTest, ListColumn)
   auto tbl0 = table_view({col0_tbl0, col1_tbl0, col2_tbl0});
   auto tbl1 = table_view({col0_tbl1, col1_tbl1, col2_tbl1});
 
-  auto expected = cudf::concatenate({tbl0, tbl1});
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl0, tbl1}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
   cudf_io::chunked_parquet_writer_options args =
@@ -1413,7 +1414,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct)
 
   auto table_2 = table_view({*list_col_2});
 
-  auto full_table = cudf::concatenate({table_1, table_2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
 
   cudf_io::table_input_metadata expected_metadata(table_1);
   expected_metadata.column_metadata[0].set_name("family");
@@ -1504,7 +1505,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList)
 
   auto table_2 = table_view({*list_col_2});
 
-  auto full_table = cudf::concatenate({table_1, table_2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
 
   cudf_io::table_input_metadata expected_metadata(table_1);
   expected_metadata.column_metadata[0].set_name("family");
@@ -1639,7 +1640,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullability)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
   auto table2 = create_random_fixed_table<int>(5, 5, false);
 
-  auto full_table = cudf::concatenate({*table1, *table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet");
   cudf_io::chunked_parquet_writer_options args =
@@ -1678,7 +1679,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct)
   auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
   auto table_2    = cudf::table_view({struct_2_2});
 
-  auto full_table = cudf::concatenate({table_1, table_2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
 
   cudf_io::table_input_metadata expected_metadata(table_1);
   expected_metadata.column_metadata[0].set_name("being");
@@ -1707,7 +1708,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability)
   auto table1 = create_random_fixed_table<int>(5, 5, false);
   auto table2 = create_random_fixed_table<int>(5, 5, false);
 
-  auto full_table = cudf::concatenate({*table1, *table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet");
 
@@ -1764,7 +1765,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList)
   auto table1 = table_view({col00, col10});
   auto table2 = table_view({col01, col11});
 
-  auto full_table = cudf::concatenate({table1, table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({table1, table2}));
 
   cudf_io::table_input_metadata metadata(table1);
   metadata.column_metadata[0].set_nullability(true);  // List is nullable at first (root) level
@@ -1809,7 +1810,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct)
   auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
   auto table_2    = cudf::table_view({struct_2_2});
 
-  auto full_table = cudf::concatenate({table_1, table_2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
 
   cudf_io::table_input_metadata expected_metadata(table_1);
   expected_metadata.column_metadata[0].set_name("being").set_nullability(false);
@@ -1838,7 +1839,7 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroups)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
   auto table2 = create_random_fixed_table<int>(5, 5, true);
 
-  auto full_table = cudf::concatenate({*table2, *table1, *table2});
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table2, *table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet");
   cudf_io::chunked_parquet_writer_options args =
@@ -1951,7 +1952,7 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
   cols.push_back(c2b_w.release());
   cudf::table tbl2(std::move(cols));
 
-  auto expected = cudf::concatenate({tbl1, tbl2});
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet");
   cudf_io::chunked_parquet_writer_options args =
@@ -1998,7 +1999,7 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2)
   cols.push_back(c2b_w.release());
   cudf::table tbl2(std::move(cols));
 
-  auto expected = cudf::concatenate({tbl1, tbl2});
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet");
   cudf_io::chunked_parquet_writer_options args =
diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp
index 451fa82d5a3..b7d98704aff 100644
--- a/cpp/tests/merge/merge_test.cpp
+++ b/cpp/tests/merge/merge_test.cpp
@@ -705,7 +705,7 @@ TEST_F(MergeTest, KeysWithNulls)
   auto valids2 = cudf::detail::make_counting_transform_iterator(
     0, [](auto row) { return (row % 15 == 0) ? false : true; });
   cudf::test::fixed_width_column_wrapper<int32_t> data2(data_iter, data_iter + nrows, valids2);
-  auto all_data = cudf::concatenate({data1, data2});
+  auto all_data = cudf::concatenate(std::vector<cudf::column_view>{{data1, data2}});
 
   std::vector<cudf::order> column_orders{cudf::order::ASCENDING, cudf::order::DESCENDING};
   std::vector<cudf::null_order> null_precedences{cudf::null_order::AFTER, cudf::null_order::BEFORE};
diff --git a/python/cudf/cudf/_lib/cpp/concatenate.pxd b/python/cudf/cudf/_lib/cpp/concatenate.pxd
index b5ec3bcb7d4..c776d23aa85 100644
--- a/python/cudf/cudf/_lib/cpp/concatenate.pxd
+++ b/python/cudf/cudf/_lib/cpp/concatenate.pxd
@@ -5,12 +5,22 @@ from libcpp.vector cimport vector
 
 from cudf._lib.cpp.column.column cimport column, column_view
 from cudf._lib.cpp.table.table cimport table, table_view
-from rmm._lib.device_buffer cimport device_buffer
+from cudf._lib.cpp.utilities.host_span cimport host_span
 
+from rmm._lib.device_buffer cimport device_buffer
 
 cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
+    # The versions of concatenate taking vectors don't exist in libcudf
+    # C++, but passing a vector works because a host_span is implicitly
+    # constructable from a vector. In case they are needed in the future,
+    # host_span versions can be added, e.g:
+    #
+    # cdef device_buffer concatenate_masks "cudf::concatenate_masks"(
+    #    host_span[column_view] views
+    # ) except +
+
     cdef device_buffer concatenate_masks "cudf::concatenate_masks"(
-        const vector[column_view] columns
+        const vector[column_view] views
     ) except +
     cdef unique_ptr[column] concatenate_columns "cudf::concatenate"(
         const vector[column_view] columns
diff --git a/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd b/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd
new file mode 100644
index 00000000000..cbbe3710347
--- /dev/null
+++ b/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd
@@ -0,0 +1,8 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from libcpp.vector cimport vector
+
+cdef extern from "cudf/utilities/span.hpp" namespace "cudf" nogil:
+    cdef cppclass host_span[T]:
+        host_span() except +
+        host_span(vector[T]) except +