-
Notifications
You must be signed in to change notification settings - Fork 66
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding timeAdd with timezone kernel #1700
Changes from 4 commits
81c1fd3
9205162
c698d96
94a4891
c9e121c
6b09249
b3723e3
d710ce2
eb48460
9e7e6df
43d4ff6
0bddee8
ef08f54
fbadd79
758f2fa
eadce5c
6fd2fb7
f483608
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
/* Copyright (c) 2023, NVIDIA CORPORATION. | ||
/* Copyright (c) 2023-2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
|
@@ -63,8 +63,9 @@ Java_com_nvidia_spark_rapids_jni_GpuTimeZoneDB_timeAddCS(JNIEnv* env, | |
JNI_NULL_CHECK(env, transitions_handle, "column is null", 0); | ||
try { | ||
cudf::jni::auto_set_device(env); | ||
auto const input = reinterpret_cast<cudf::column_view const*>(input_handle); | ||
auto const duration = reinterpret_cast<cudf::numeric_scalar<int64_t> const*>(duration_handle); | ||
auto const input = reinterpret_cast<cudf::column_view const*>(input_handle); | ||
auto const duration = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is problematic because c++ is going to say that
Beyond that you will need to use dynamic_cast where C++ will verify that it is the type you expect. Note that dynamic_cast returns nullptr if it is not the right type. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. I moved the |
||
reinterpret_cast<cudf::duration_scalar<cudf::duration_us> const*>(duration_handle); | ||
auto const transitions = reinterpret_cast<cudf::table_view const*>(transitions_handle); | ||
auto const index = static_cast<cudf::size_type>(tz_index); | ||
return cudf::jni::ptr_as_jlong( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2023-2024? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be 2023-2024, not 2024. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
|
@@ -35,10 +35,10 @@ using column = cudf::column; | |
using column_device_view = cudf::column_device_view; | ||
using column_view = cudf::column_view; | ||
using lists_column_device_view = cudf::detail::lists_column_device_view; | ||
using scalar_i64 = cudf::numeric_scalar<int64_t>; | ||
using size_type = cudf::size_type; | ||
using struct_view = cudf::struct_view; | ||
using table_view = cudf::table_view; | ||
// using scalar_i64 = cudf::numeric_scalar<cudf::duration_us>; | ||
revans2 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
using size_type = cudf::size_type; | ||
using struct_view = cudf::struct_view; | ||
using table_view = cudf::table_view; | ||
|
||
namespace { | ||
|
||
|
@@ -120,20 +120,19 @@ auto convert_timestamp_tz(column_view const& input, | |
return results; | ||
} | ||
|
||
template <typename timestamp_type> | ||
struct time_add_functor { | ||
using duration_type = typename timestamp_type::duration; | ||
using duration_type = typename cudf::timestamp_us::duration; | ||
|
||
lists_column_device_view const transitions; | ||
|
||
size_type const tz_index; | ||
|
||
int64_t const duration_scalar; | ||
cudf::duration_us const duration_scalar; | ||
|
||
__device__ inline timestamp_type plus_with_tz(timestamp_type const& timestamp, | ||
int64_t const& duration) const | ||
__device__ inline cudf::timestamp_us plus_with_tz(cudf::timestamp_us const& timestamp, | ||
cudf::duration_us const& duration) const | ||
{ | ||
if (duration == 0L) { return timestamp; } | ||
if (duration == cudf::duration_us{0}) { return timestamp; } | ||
|
||
auto const utc_instants = transitions.child().child(0); | ||
auto const tz_instants = transitions.child().child(1); | ||
|
@@ -155,6 +154,7 @@ struct time_add_functor { | |
static_cast<size_t>(list_size)); | ||
|
||
// step 1: Binary search on utc to find the correct offset to convert utc to local | ||
// Not use convert_timestamp_tz_functor because offset is needed | ||
auto const utc_it = thrust::upper_bound( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we do this some place else? Shouldn't transitioning between time zones like this be a static inline function? And if not can you add comments as to why this is different from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The difference between this and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think there is a enough code to make it common. If we take the time to make the proper abstractions on the CUDA side then adding DST support should have code changes restricted primarily to one common place. But I will leave that up to you and the rest of the people working on timezone support. |
||
thrust::seq, transition_times_utc.begin(), transition_times_utc.end(), epoch_seconds_utc); | ||
auto const utc_idx = | ||
|
@@ -166,17 +166,15 @@ struct time_add_functor { | |
auto const to_local_offset_duration = cuda::std::chrono::duration_cast<duration_type>( | ||
cudf::duration_s{static_cast<int64_t>(to_local_offset)}); | ||
|
||
auto const duration_typed = cuda::std::chrono::duration_cast<duration_type>( | ||
cudf::duration_us{static_cast<int64_t>(duration)}); | ||
|
||
// step 2: add the duration to the local timestamp | ||
auto const local_timestamp_res = timestamp + to_local_offset_duration + duration_typed; | ||
auto const local_timestamp_res = timestamp + to_local_offset_duration + duration; | ||
|
||
auto const result_epoch_seconds = static_cast<int64_t>( | ||
cuda::std::chrono::duration_cast<cudf::duration_s>(local_timestamp_res.time_since_epoch()) | ||
.count()); | ||
|
||
// step 3: Binary search on local to find the correct offset to convert local to utc | ||
// Not use convert_timestamp_tz_functor because idx may need to be adjusted after upper_bound | ||
auto const local_it = thrust::upper_bound( | ||
revans2 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
thrust::seq, transition_times_tz.begin(), transition_times_tz.end(), result_epoch_seconds); | ||
auto local_idx = | ||
|
@@ -191,8 +189,8 @@ struct time_add_functor { | |
auto const temp_list_offset = tz_transitions.element_offset(local_idx); | ||
auto const temp_offset = static_cast<int64_t>(utc_offsets.element<int32_t>(temp_list_offset)); | ||
|
||
// We don't want to check this if the idx is the first or last | ||
if (local_idx != 0 && transition_times_utc[local_idx] != INT64_MAX && | ||
// We don't want to check this if the idx is the last because they are just endpoints | ||
if (transition_times_utc[local_idx] != std::numeric_limits<int64_t>::max() && | ||
transition_times_utc[local_idx] + temp_offset <= result_epoch_seconds) { | ||
local_idx += 1; | ||
} | ||
|
@@ -204,7 +202,8 @@ struct time_add_functor { | |
// step 4: if the result is in the overlap, try to select the original offset if possible | ||
auto const early_offset = static_cast<int64_t>(upper_bound_epoch - upper_bound_utc); | ||
bool const is_gap = (upper_bound_utc + to_utc_offset == upper_bound_epoch); | ||
if (!is_gap && upper_bound_utc != INT64_MIN && upper_bound_utc != INT64_MAX) { // overlap | ||
if (!is_gap && upper_bound_utc != std::numeric_limits<int64_t>::min() && | ||
upper_bound_utc != std::numeric_limits<int64_t>::max()) { // overlap | ||
// The overlap range is [utcInstant + offsetBefore, utcInstant + offsetAfter] | ||
auto const overlap_before = static_cast<int64_t>(upper_bound_utc + to_utc_offset); | ||
if (result_epoch_seconds >= overlap_before && result_epoch_seconds <= upper_bound_epoch) { | ||
|
@@ -221,21 +220,20 @@ struct time_add_functor { | |
return local_timestamp_res - to_utc_offset_duration; | ||
} | ||
|
||
__device__ timestamp_type operator()(timestamp_type const& timestamp) const | ||
__device__ cudf::timestamp_us operator()(cudf::timestamp_us const& timestamp) const | ||
{ | ||
return plus_with_tz(timestamp, duration_scalar); | ||
} | ||
|
||
__device__ timestamp_type operator()(timestamp_type const& timestamp, | ||
int64_t const& interval) const | ||
__device__ cudf::timestamp_us operator()(cudf::timestamp_us const& timestamp, | ||
cudf::duration_us const& interval) const | ||
{ | ||
return plus_with_tz(timestamp, interval); | ||
} | ||
}; | ||
|
||
template <typename timestamp_type> | ||
auto time_add_with_tz(column_view const& input, | ||
scalar_i64 const& duration, | ||
cudf::duration_scalar<cudf::duration_us> const& duration, | ||
table_view const& transitions, | ||
size_type tz_index, | ||
rmm::cuda_stream_view stream, | ||
|
@@ -259,17 +257,15 @@ auto time_add_with_tz(column_view const& input, | |
stream, | ||
mr); | ||
|
||
thrust::transform( | ||
rmm::exec_policy(stream), | ||
input.begin<timestamp_type>(), | ||
input.end<timestamp_type>(), | ||
results->mutable_view().begin<timestamp_type>(), | ||
time_add_functor<timestamp_type>{fixed_transitions, tz_index, duration.value()}); | ||
thrust::transform(rmm::exec_policy(stream), | ||
input.begin<cudf::timestamp_us>(), | ||
input.end<cudf::timestamp_us>(), | ||
results->mutable_view().begin<cudf::timestamp_us>(), | ||
time_add_functor{fixed_transitions, tz_index, duration.value()}); | ||
|
||
return results; | ||
} | ||
|
||
template <typename timestamp_type> | ||
auto time_add_with_tz(column_view const& input, | ||
column_view const& duration, | ||
table_view const& transitions, | ||
|
@@ -288,11 +284,11 @@ auto time_add_with_tz(column_view const& input, | |
input.type(), input.size(), rmm::device_buffer(null_mask, stream), null_count, stream, mr); | ||
|
||
thrust::transform(rmm::exec_policy(stream), | ||
input.begin<timestamp_type>(), | ||
input.end<timestamp_type>(), | ||
duration.begin<int64_t>(), | ||
results->mutable_view().begin<timestamp_type>(), | ||
time_add_functor<timestamp_type>{fixed_transitions, tz_index, 0L}); | ||
input.begin<cudf::timestamp_us>(), | ||
input.end<cudf::timestamp_us>(), | ||
duration.begin<cudf::duration_us>(), | ||
results->mutable_view().begin<cudf::timestamp_us>(), | ||
time_add_functor{fixed_transitions, tz_index, cudf::duration_us{0}}); | ||
|
||
return results; | ||
} | ||
|
@@ -343,26 +339,16 @@ std::unique_ptr<column> convert_utc_timestamp_to_timezone(column_view const& inp | |
} | ||
|
||
std::unique_ptr<column> time_add(column_view const& input, | ||
scalar_i64 const& duration, | ||
cudf::duration_scalar<cudf::duration_us> const& duration, | ||
table_view const& transitions, | ||
size_type tz_index, | ||
rmm::cuda_stream_view stream, | ||
rmm::mr::device_memory_resource* mr) | ||
{ | ||
auto const type = input.type().id(); | ||
|
||
switch (type) { | ||
case cudf::type_id::TIMESTAMP_SECONDS: | ||
return time_add_with_tz<cudf::timestamp_s>( | ||
input, duration, transitions, tz_index, stream, mr); | ||
case cudf::type_id::TIMESTAMP_MILLISECONDS: | ||
return time_add_with_tz<cudf::timestamp_ms>( | ||
input, duration, transitions, tz_index, stream, mr); | ||
case cudf::type_id::TIMESTAMP_MICROSECONDS: | ||
return time_add_with_tz<cudf::timestamp_us>( | ||
input, duration, transitions, tz_index, stream, mr); | ||
default: CUDF_FAIL("Unsupported timestamp unit for timezone conversion"); | ||
if (input.type().id() != cudf::type_id::TIMESTAMP_MICROSECONDS) { | ||
CUDF_FAIL("Unsupported timestamp unit for time add with timezone"); | ||
} | ||
return time_add_with_tz(input, duration, transitions, tz_index, stream, mr); | ||
} | ||
|
||
std::unique_ptr<column> time_add(column_view const& input, | ||
|
@@ -372,20 +358,10 @@ std::unique_ptr<column> time_add(column_view const& input, | |
rmm::cuda_stream_view stream, | ||
rmm::mr::device_memory_resource* mr) | ||
{ | ||
auto const type = input.type().id(); | ||
|
||
switch (type) { | ||
case cudf::type_id::TIMESTAMP_SECONDS: | ||
return time_add_with_tz<cudf::timestamp_s>( | ||
input, duration, transitions, tz_index, stream, mr); | ||
case cudf::type_id::TIMESTAMP_MILLISECONDS: | ||
return time_add_with_tz<cudf::timestamp_ms>( | ||
input, duration, transitions, tz_index, stream, mr); | ||
case cudf::type_id::TIMESTAMP_MICROSECONDS: | ||
return time_add_with_tz<cudf::timestamp_us>( | ||
input, duration, transitions, tz_index, stream, mr); | ||
default: CUDF_FAIL("Unsupported timestamp unit for timezone conversion"); | ||
if (input.type().id() != cudf::type_id::TIMESTAMP_MICROSECONDS) { | ||
CUDF_FAIL("Unsupported timestamp unit for time add with timezone"); | ||
} | ||
return time_add_with_tz(input, duration, transitions, tz_index, stream, mr); | ||
} | ||
|
||
} // namespace spark_rapids_jni |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* Copyright (c) 2023-2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
|
@@ -81,7 +81,7 @@ std::unique_ptr<cudf::column> convert_utc_timestamp_to_timezone( | |
*/ | ||
std::unique_ptr<cudf::column> time_add( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: duration parameter has 2 types: |
||
cudf::column_view const& input, | ||
cudf::numeric_scalar<int64_t> const& duration, | ||
cudf::duration_scalar<cudf::duration_us> const& duration, | ||
cudf::table_view const& transitions, | ||
cudf::size_type tz_index, | ||
rmm::cuda_stream_view stream = cudf::get_default_stream(), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need a null check on the duration handle too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.