-
Notifications
You must be signed in to change notification settings - Fork 154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Header-only refactoring of derive_trajectories #628
Changes from 13 commits
a189125
f176d77
0a2e458
f7084bf
8da63a7
6a171f4
d3a4c86
26f3e16
1983158
ac9355d
7093d27
3fd8794
ad78166
b7acee6
528c656
1a2a814
235ceb3
4843126
efaca35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Copyright (c) 2022, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <rmm/cuda_stream_view.hpp> | ||
#include <rmm/device_vector.hpp> | ||
#include <rmm/mr/device/device_memory_resource.hpp> | ||
|
||
#include <iterator> | ||
#include <memory> | ||
|
||
namespace cuspatial { | ||
|
||
/** | ||
* @addtogroup trajectory_api | ||
* @{ | ||
*/ | ||
|
||
/** | ||
* @brief Derive trajectories from object ids, points, and timestamps. | ||
* | ||
* Groups the input object ids to determine unique trajectories, and reorders all input data to be | ||
* grouped by object ID and ordered by timestamp within groups. Returns a vector containing the | ||
* offset index of the first object of each trajectory in the output. | ||
* | ||
* @tparam IdInputIt Iterator over object IDs. Must meet the requirements of | ||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-readable. | ||
* @tparam PointInputIt Iterator over points. Must meet the requirements of | ||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-readable. | ||
* @tparam TimestampInputIt Iterator over timestamps. Must meet the requirements of | ||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-readable. | ||
* @tparam IdOutputIt Iterator over output object IDs. Must meet the requirements of | ||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-writeable. | ||
* @tparam PointOutputIt Iterator over output points. Must meet the requirements of | ||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-writeable. | ||
* @tparam TimestampOutputIt Iterator over output timestamps. Must meet the requirements of | ||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-writeable. | ||
* | ||
* @param ids_first beginning of the range of input object ids | ||
* @param ids_first end of the range of input object ids | ||
harrism marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* @param points_first beginning of the range of input point (x,y) coordinates | ||
* @param timestamps_first beginning of the range of input timestamps | ||
* @param ids_out_first beginning of the range of output object ids | ||
* @param points_out_first beginning of the range of output point (x,y) coordinates | ||
* @param timestamps_out_first beginning of the range of output timestamps | ||
* @param stream the CUDA stream on which to perform computations and allocate memory. | ||
* @param mr optional resource to use for output device memory allocations | ||
* | ||
* @return a unique_ptr to a device_vector containing the offset index of the first object of each | ||
* trajectory in the sorted output. These offsets can be used to access the sorted output data. | ||
* | ||
* @pre There must be no overlap between any of the input and output ranges. | ||
* @pre The type of the object IDs and timestamps must support strict weak ordering via comparison | ||
* operators. | ||
* | ||
* [LinkLRAI]: https://en.cppreference.com/w/cpp/named_req/RandomAccessIterator | ||
* "LegacyRandomAccessIterator" | ||
*/ | ||
template <typename IdInputIt, | ||
typename PointInputIt, | ||
typename TimestampInputIt, | ||
typename IdOutputIt, | ||
typename PointOutputIt, | ||
typename TimestampOutputIt, | ||
typename OffsetType = std::int32_t> | ||
std::unique_ptr<rmm::device_vector<OffsetType>> derive_trajectories( | ||
IdInputIt ids_first, | ||
IdInputIt ids_last, | ||
PointInputIt points_first, | ||
TimestampInputIt timestamps_first, | ||
IdOutputIt ids_output_first, | ||
PointOutputIt points_output_first, | ||
TimestampOutputIt timestamps_output_first, | ||
rmm::cuda_stream_view stream = rmm::cuda_stream_default, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); | ||
|
||
/** | ||
* @} // end of doxygen group | ||
*/ | ||
|
||
} // namespace cuspatial | ||
|
||
#include <cuspatial/experimental/detail/derive_trajectories.cuh> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
/* | ||
* Copyright (c) 2022, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <rmm/cuda_stream_view.hpp> | ||
#include <rmm/device_uvector.hpp> | ||
#include <rmm/device_vector.hpp> | ||
#include <rmm/exec_policy.hpp> | ||
|
||
#include <thrust/gather.h> | ||
#include <thrust/iterator/constant_iterator.h> | ||
#include <thrust/iterator/discard_iterator.h> | ||
#include <thrust/iterator/zip_iterator.h> | ||
#include <thrust/reduce.h> | ||
#include <thrust/sort.h> | ||
|
||
#include <cub/device/device_merge_sort.cuh> | ||
|
||
#include <cstdint> | ||
|
||
namespace cuspatial { | ||
|
||
namespace detail { | ||
|
||
template <typename Tuple> | ||
struct trajectory_comparator { | ||
__device__ bool operator()(Tuple const& lhs, Tuple const& rhs) | ||
{ | ||
auto lhs_id = thrust::get<0>(lhs); | ||
auto rhs_id = thrust::get<0>(rhs); | ||
auto lhs_ts = thrust::get<1>(lhs); | ||
auto rhs_ts = thrust::get<1>(rhs); | ||
return (lhs_id < rhs_id) || ((lhs_id == rhs_id) && (lhs_ts < rhs_ts)); | ||
}; | ||
}; | ||
|
||
template <typename IdInputIt, | ||
typename PointInputIt, | ||
typename TimestampInputIt, | ||
typename IdOutputIt, | ||
typename PointOutputIt, | ||
typename TimestampOutputIt> | ||
void order_trajectories(IdInputIt ids_first, | ||
IdInputIt ids_last, | ||
PointInputIt points_first, | ||
TimestampInputIt timestamps_first, | ||
IdOutputIt ids_out_first, | ||
PointOutputIt points_out_first, | ||
TimestampOutputIt timestamps_out_first, | ||
rmm::cuda_stream_view stream, | ||
rmm::mr::device_memory_resource* mr) | ||
{ | ||
using id_type = typename std::iterator_traits<IdInputIt>::value_type; | ||
using timestamp_type = typename std::iterator_traits<TimestampInputIt>::value_type; | ||
harrism marked this conversation as resolved.
Show resolved
Hide resolved
|
||
using tuple_type = thrust::tuple<id_type, timestamp_type>; | ||
|
||
auto keys_first = thrust::make_zip_iterator(ids_first, timestamps_first); | ||
auto keys_out_first = thrust::make_zip_iterator(ids_out_first, timestamps_out_first); | ||
|
||
std::size_t temp_storage_bytes = 0; | ||
cub::DeviceMergeSort::SortPairsCopy(nullptr, | ||
temp_storage_bytes, | ||
keys_first, | ||
points_first, | ||
keys_out_first, | ||
points_out_first, | ||
std::distance(ids_first, ids_last), | ||
trajectory_comparator<tuple_type>{}, | ||
stream); | ||
|
||
auto temp_storage = rmm::device_buffer(temp_storage_bytes, stream, mr); | ||
|
||
cub::DeviceMergeSort::SortPairsCopy(temp_storage.data(), | ||
temp_storage_bytes, | ||
keys_first, | ||
points_first, | ||
keys_out_first, | ||
points_out_first, | ||
std::distance(ids_first, ids_last), | ||
trajectory_comparator<tuple_type>{}, | ||
stream); | ||
|
||
stream.synchronize(); | ||
} | ||
|
||
} // namespace detail | ||
|
||
template <typename IdInputIt, | ||
typename PointInputIt, | ||
typename TimestampInputIt, | ||
typename IdOutputIt, | ||
typename PointOutputIt, | ||
typename TimestampOutputIt, | ||
typename OffsetType> | ||
std::unique_ptr<rmm::device_vector<OffsetType>> derive_trajectories( | ||
harrism marked this conversation as resolved.
Show resolved
Hide resolved
|
||
IdInputIt ids_first, | ||
IdInputIt ids_last, | ||
PointInputIt points_first, | ||
TimestampInputIt timestamps_first, | ||
IdOutputIt ids_out_first, | ||
PointOutputIt points_out_first, | ||
TimestampOutputIt timestamps_out_first, | ||
rmm::cuda_stream_view stream, | ||
rmm::mr::device_memory_resource* mr) | ||
{ | ||
detail::order_trajectories(ids_first, | ||
ids_last, | ||
points_first, | ||
timestamps_first, | ||
ids_out_first, | ||
points_out_first, | ||
timestamps_out_first, | ||
stream, | ||
mr); | ||
|
||
auto const num_points = std::distance(ids_first, ids_last); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed here that you're computing the number of points based on the number of ids. I suggest that you error check here that the number of ids, points, and trajectories are all equal. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's not possible -- we only get a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, the beauty of an iterator-based API is flexibility / fusability. The caller is free to pass anything that looks like an iterator -- it might be, for example, a counting iterator, or some other dynamic iterator type that generates an open-ended sequence. There is no end. :) |
||
auto lengths = rmm::device_uvector<OffsetType>(num_points, stream); | ||
auto grouped = thrust::reduce_by_key(rmm::exec_policy(stream), | ||
ids_out_first, | ||
ids_out_first + num_points, | ||
thrust::make_constant_iterator(1), | ||
thrust::make_discard_iterator(), | ||
lengths.begin()); | ||
|
||
auto const num_trajectories = std::distance(lengths.begin(), grouped.second); | ||
auto offsets = std::make_unique<rmm::device_vector<OffsetType>>( | ||
num_trajectories, rmm::mr::thrust_allocator<OffsetType>(stream, mr)); | ||
|
||
thrust::exclusive_scan(rmm::exec_policy(stream), | ||
lengths.begin(), | ||
lengths.begin() + num_trajectories, | ||
offsets->begin()); | ||
|
||
return offsets; | ||
} | ||
|
||
/** | ||
* @} // end of doxygen group | ||
*/ | ||
|
||
} // namespace cuspatial |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This reads like the input data will be reordered in-place.