rapidsai · rapids-bot · Sep 27, 2022 · Aug 4, 2022 · Aug 9, 2022 · Aug 9, 2022
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_vector.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <iterator>
+#include <memory>
+
+namespace cuspatial {
+
+/**
+ * @addtogroup trajectory_api
+ * @{
+ */
+
+/**
+ * @brief Derive trajectories from object ids, points, and timestamps.
+ *
+ * Groups the input object ids to determine unique trajectories, and reorders all input data to be
+ * grouped by object ID and ordered by timestamp within groups.  Returns a vector containing the
+ * offset index of the first object of each trajectory in the output.
+ *
+ * @tparam IdInputIt Iterator over object IDs. Must meet the requirements of
+ * [LegacyRandomAccessIterator][LinkLRAI] and be device-readable.
+ * @tparam PointInputIt Iterator over points. Must meet the requirements of
+ * [LegacyRandomAccessIterator][LinkLRAI] and be device-readable.
+ * @tparam TimestampInputIt Iterator over timestamps. Must meet the requirements of
+ * [LegacyRandomAccessIterator][LinkLRAI] and be device-readable.
+ * @tparam IdOutputIt Iterator over output object IDs. Must meet the requirements of
+ * [LegacyRandomAccessIterator][LinkLRAI] and be device-writeable.
+ * @tparam PointOutputIt Iterator over output points. Must meet the requirements of
+ * [LegacyRandomAccessIterator][LinkLRAI] and be device-writeable.
+ * @tparam TimestampOutputIt Iterator over output timestamps. Must meet the requirements of
+ * [LegacyRandomAccessIterator][LinkLRAI] and be device-writeable.
+ *
+ * @param ids_first beginning of the range of input object ids
+ * @param ids_first end of the range of input object ids
+ * @param points_first beginning of the range of input point (x,y) coordinates
+ * @param timestamps_first beginning of the range of input timestamps
+ * @param ids_out_first beginning of the range of output object ids
+ * @param points_out_first beginning of the range of output point (x,y) coordinates
+ * @param timestamps_out_first beginning of the range of output timestamps
+ * @param stream the CUDA stream on which to perform computations and allocate memory.
+ * @param mr optional resource to use for output device memory allocations
+ *
+ * @return a unique_ptr to a device_vector containing the offset index of the first object of each
+ * trajectory in the sorted output. These offsets can be used to access the sorted output data.
+ *
+ * @pre There must be no overlap between any of the input and output ranges.
+ * @pre The type of the object IDs and timestamps must support strict weak ordering via comparison
+ *      operators.
+ *
+ * [LinkLRAI]: https://en.cppreference.com/w/cpp/named_req/RandomAccessIterator
+ * "LegacyRandomAccessIterator"
+ */
+template <typename IdInputIt,
+          typename PointInputIt,
+          typename TimestampInputIt,
+          typename IdOutputIt,
+          typename PointOutputIt,
+          typename TimestampOutputIt,
+          typename OffsetType = std::int32_t>
+std::unique_ptr<rmm::device_vector<OffsetType>> derive_trajectories(
+  IdInputIt ids_first,
+  IdInputIt ids_last,
+  PointInputIt points_first,
+  TimestampInputIt timestamps_first,
+  IdOutputIt ids_output_first,
+  PointOutputIt points_output_first,
+  TimestampOutputIt timestamps_output_first,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @} // end of doxygen group
+ */
+
+}  // namespace cuspatial
+
+#include <cuspatial/experimental/detail/derive_trajectories.cuh>
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/device_vector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/gather.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/discard_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/reduce.h>
+#include <thrust/sort.h>
+
+#include <cub/device/device_merge_sort.cuh>
+
+#include <cstdint>
+
+namespace cuspatial {
+
+namespace detail {
+
+template <typename Tuple>
+struct trajectory_comparator {
+  __device__ bool operator()(Tuple const& lhs, Tuple const& rhs)
+  {
+    auto lhs_id = thrust::get<0>(lhs);
+    auto rhs_id = thrust::get<0>(rhs);
+    auto lhs_ts = thrust::get<1>(lhs);
+    auto rhs_ts = thrust::get<1>(rhs);
+    return (lhs_id < rhs_id) || ((lhs_id == rhs_id) && (lhs_ts < rhs_ts));
+  };
+};
+
+template <typename IdInputIt,
+          typename PointInputIt,
+          typename TimestampInputIt,
+          typename IdOutputIt,
+          typename PointOutputIt,
+          typename TimestampOutputIt>
+void order_trajectories(IdInputIt ids_first,
+                        IdInputIt ids_last,
+                        PointInputIt points_first,
+                        TimestampInputIt timestamps_first,
+                        IdOutputIt ids_out_first,
+                        PointOutputIt points_out_first,
+                        TimestampOutputIt timestamps_out_first,
+                        rmm::cuda_stream_view stream,
+                        rmm::mr::device_memory_resource* mr)
+{
+  using id_type        = typename std::iterator_traits<IdInputIt>::value_type;
+  using timestamp_type = typename std::iterator_traits<TimestampInputIt>::value_type;
+  using tuple_type     = thrust::tuple<id_type, timestamp_type>;
+
+  auto keys_first     = thrust::make_zip_iterator(ids_first, timestamps_first);
+  auto keys_out_first = thrust::make_zip_iterator(ids_out_first, timestamps_out_first);
+
+  std::size_t temp_storage_bytes = 0;
+  cub::DeviceMergeSort::SortPairsCopy(nullptr,
+                                      temp_storage_bytes,
+                                      keys_first,
+                                      points_first,
+                                      keys_out_first,
+                                      points_out_first,
+                                      std::distance(ids_first, ids_last),
+                                      trajectory_comparator<tuple_type>{},
+                                      stream);
+
+  auto temp_storage = rmm::device_buffer(temp_storage_bytes, stream, mr);
+
+  cub::DeviceMergeSort::SortPairsCopy(temp_storage.data(),
+                                      temp_storage_bytes,
+                                      keys_first,
+                                      points_first,
+                                      keys_out_first,
+                                      points_out_first,
+                                      std::distance(ids_first, ids_last),
+                                      trajectory_comparator<tuple_type>{},
+                                      stream);
+
+  stream.synchronize();
+}
+
+}  // namespace detail
+
+template <typename IdInputIt,
+          typename PointInputIt,
+          typename TimestampInputIt,
+          typename IdOutputIt,
+          typename PointOutputIt,
+          typename TimestampOutputIt,
+          typename OffsetType>
+std::unique_ptr<rmm::device_vector<OffsetType>> derive_trajectories(
+  IdInputIt ids_first,
+  IdInputIt ids_last,
+  PointInputIt points_first,
+  TimestampInputIt timestamps_first,
+  IdOutputIt ids_out_first,
+  PointOutputIt points_out_first,
+  TimestampOutputIt timestamps_out_first,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  detail::order_trajectories(ids_first,
+                             ids_last,
+                             points_first,
+                             timestamps_first,
+                             ids_out_first,
+                             points_out_first,
+                             timestamps_out_first,
+                             stream,
+                             mr);
+
+  auto const num_points = std::distance(ids_first, ids_last);
+  auto lengths          = rmm::device_uvector<OffsetType>(num_points, stream);
+  auto grouped          = thrust::reduce_by_key(rmm::exec_policy(stream),
+                                       ids_out_first,
+                                       ids_out_first + num_points,
+                                       thrust::make_constant_iterator(1),
+                                       thrust::make_discard_iterator(),
+                                       lengths.begin());
+
+  auto const num_trajectories = std::distance(lengths.begin(), grouped.second);
+  auto offsets                = std::make_unique<rmm::device_vector<OffsetType>>(
+    num_trajectories, rmm::mr::thrust_allocator<OffsetType>(stream, mr));
+
+  thrust::exclusive_scan(rmm::exec_policy(stream),
+                         lengths.begin(),
+                         lengths.begin() + num_trajectories,
+                         offsets->begin());
+
+  return offsets;
+}
+
+/**
+ * @} // end of doxygen group
+ */
+
+}  // namespace cuspatial
@@ -16,6 +16,9 @@
 
 #pragma once
 
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 
 #include <rmm/mr/device/per_device_resource.hpp>

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,30 +14,75 @@
  * limitations under the License.
  */
 
+#include <cudf/utilities/type_dispatcher.hpp>
 #include <cuspatial/error.hpp>
-#include <cuspatial/trajectory.hpp>
+#include <cuspatial/experimental/derive_trajectories.cuh>
+#include <cuspatial/experimental/type_utils.hpp>
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
-#include <cudf/detail/sorting.hpp>
 #include <cudf/table/table.hpp>
-#include <cudf/table/table_view.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/distance.h>
-#include <thrust/iterator/constant_iterator.h>
-#include <thrust/iterator/discard_iterator.h>
-#include <thrust/reduce.h>
-#include <thrust/scan.h>
-
 #include <memory>
 #include <vector>
 
 namespace cuspatial {
 namespace detail {
 
+struct derive_trajectories_dispatch {
+  template <
+    typename T,
+    typename Timestamp,
+    std::enable_if_t<std::is_floating_point_v<T> and cudf::is_timestamp<Timestamp>()>* = nullptr>
+  std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> operator()(
+    cudf::column_view const& object_id,
+    cudf::column_view const& x,
+    cudf::column_view const& y,
+    cudf::column_view const& timestamp,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr)
+  {
+    auto cols = std::vector<std::unique_ptr<cudf::column>>{};
+    cols.reserve(4);
+    cols.push_back(cudf::allocate_like(object_id, cudf::mask_allocation_policy::NEVER, mr));
+    cols.push_back(cudf::allocate_like(x, cudf::mask_allocation_policy::NEVER, mr));
+    cols.push_back(cudf::allocate_like(y, cudf::mask_allocation_policy::NEVER, mr));
+    cols.push_back(cudf::allocate_like(timestamp, cudf::mask_allocation_policy::NEVER, mr));
+
+    auto points_begin     = thrust::make_zip_iterator(x.begin<T>(), y.begin<T>());
+    auto points_out_begin = thrust::make_zip_iterator(cols[1]->mutable_view().begin<T>(),
+                                                      cols[2]->mutable_view().begin<T>());
+
+    auto offsets = derive_trajectories(object_id.begin<std::int32_t>(),
+                                       object_id.end<std::int32_t>(),
+                                       points_begin,
+                                       timestamp.begin<Timestamp>(),
+                                       cols[0]->mutable_view().begin<std::int32_t>(),
+                                       points_out_begin,
+                                       cols[3]->mutable_view().begin<Timestamp>(),
+                                       stream,
+                                       mr);
+
+    auto result_table   = std::make_unique<cudf::table>(std::move(cols));
+    auto offsets_column = std::make_unique<cudf::column>(cudf::column_view(
+      cudf::data_type(cudf::type_id::INT32), offsets->size(), offsets->data().get()));
+
+    return {std::move(result_table), std::move(offsets_column)};
+  }
+
+  template <typename T,
+            typename Timestamp,
+            std::enable_if_t<not(std::is_floating_point_v<T> and
+                                 cudf::is_timestamp<Timestamp>())>* = nullptr>
+  std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> operator()(...)
+  {
+    CUSPATIAL_FAIL("Unsupported data type");
+  }
+};
+
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> derive_trajectories(
   cudf::column_view const& object_id,
   cudf::column_view const& x,
@@ -46,34 +91,15 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> derive_tr
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
-  auto sorted = cudf::detail::sort_by_key(cudf::table_view{{object_id, x, y, timestamp}},
-                                          cudf::table_view{{object_id, timestamp}},
-                                          {},
-                                          {},
-                                          stream,
-                                          mr);
-
-  auto sorted_id = sorted->get_column(0).view();
-  rmm::device_vector<int32_t> lengths(object_id.size());
-  auto grouped = thrust::reduce_by_key(rmm::exec_policy(stream),
-                                       sorted_id.begin<int32_t>(),
-                                       sorted_id.end<int32_t>(),
-                                       thrust::make_constant_iterator(1),
-                                       thrust::make_discard_iterator(),
-                                       lengths.begin());
-
-  auto offsets = cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32},
-                                           thrust::distance(lengths.begin(), grouped.second),
-                                           cudf::mask_state::UNALLOCATED,
-                                           stream,
-                                           mr);
-
-  thrust::exclusive_scan(rmm::exec_policy(stream),
-                         lengths.begin(),
-                         lengths.end(),
-                         offsets->mutable_view().begin<int32_t>());
-
-  return std::make_pair(std::move(sorted), std::move(offsets));
+  return cudf::double_type_dispatcher(x.type(),
+                                      timestamp.type(),
+                                      derive_trajectories_dispatch{},
+                                      object_id,
+                                      x,
+                                      y,
+                                      timestamp,
+                                      stream,
+                                      mr);
 }
 }  // namespace detail