Skip to content

Commit

Permalink
Merge branch 'branch-21.12' into split-up-incl-scan
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Oct 1, 2021
2 parents 2d6f52c + 91f1dea commit 7c98115
Show file tree
Hide file tree
Showing 21 changed files with 500 additions and 491 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ repos:
hooks:
- id: mypy
args: ["--config-file=python/cudf/setup.cfg", "python/cudf/cudf"]
pass_filenames: false
- repo: https://github.com/pycqa/pydocstyle
rev: 6.0.0
hooks:
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function(find_and_configure_cucollections)
GLOBAL_TARGETS cuco::cuco
CPM_ARGS
GITHUB_REPOSITORY NVIDIA/cuCollections
GIT_TAG 0d602ae21ea4f38d23ed816aa948453d97b2ee4e
GIT_TAG 729857a5698a0e8d8f812e0464f65f37854ae17b
OPTIONS "BUILD_TESTS OFF"
"BUILD_BENCHMARKS OFF"
"BUILD_EXAMPLES OFF"
Expand Down
91 changes: 87 additions & 4 deletions cpp/include/cudf/detail/indexalator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -502,17 +502,32 @@ struct indexalator_factory {
iter = make_input_iterator(col);
}

__device__ thrust::pair<size_type, bool> operator()(size_type i) const
{
return {iter[i], (has_nulls ? bit_is_set(null_mask, i + offset) : true)};
}
};

/**
* @brief An index accessor that returns a validity flag along with the index value.
*
* This is suitable as a `pair_iterator`.
*/
struct scalar_nullable_index_accessor {
input_indexalator iter;
bool const is_null;

/**
* @brief Create an accessor from a scalar.
*/
nullable_index_accessor(scalar const& input) : has_nulls{!input.is_valid()}
scalar_nullable_index_accessor(scalar const& input) : is_null{!input.is_valid()}
{
iter = indexalator_factory::make_input_iterator(input);
}

__device__ thrust::pair<size_type, bool> operator()(size_type i) const
__device__ thrust::pair<size_type, bool> operator()(size_type) const
{
return {iter[i], (has_nulls ? bit_is_set(null_mask, i + offset) : true)};
return {*iter, is_null};
}
};

Expand All @@ -530,7 +545,75 @@ struct indexalator_factory {
static auto make_input_pair_iterator(scalar const& input)
{
return thrust::make_transform_iterator(thrust::make_constant_iterator<size_type>(0),
nullable_index_accessor{input});
scalar_nullable_index_accessor{input});
}

/**
* @brief An index accessor that returns an index value if corresponding validity flag is true.
*
* This is suitable as an `optional_iterator`.
*/
struct optional_index_accessor {
input_indexalator iter;
bitmask_type const* null_mask{};
size_type const offset{};
bool const has_nulls{};

/**
* @brief Create an accessor from a column_view.
*/
optional_index_accessor(column_view const& col, bool has_nulls = false)
: null_mask{col.null_mask()}, offset{col.offset()}, has_nulls{has_nulls}
{
if (has_nulls) { CUDF_EXPECTS(col.nullable(), "Unexpected non-nullable column."); }
iter = make_input_iterator(col);
}

__device__ thrust::optional<size_type> operator()(size_type i) const
{
return has_nulls && !bit_is_set(null_mask, i + offset) ? thrust::nullopt
: thrust::make_optional(iter[i]);
}
};

/**
* @brief An index accessor that returns an index value if corresponding validity flag is true.
*
* This is suitable as an `optional_iterator`.
*/
struct scalar_optional_index_accessor {
input_indexalator iter;
bool const is_null;

/**
* @brief Create an accessor from a scalar.
*/
scalar_optional_index_accessor(scalar const& input) : is_null{!input.is_valid()}
{
iter = indexalator_factory::make_input_iterator(input);
}

__device__ thrust::optional<size_type> operator()(size_type) const
{
return is_null ? thrust::nullopt : thrust::make_optional(*iter);
}
};

/**
* @brief Create an index iterator with a nullable index accessor.
*/
static auto make_input_optional_iterator(column_view const& col)
{
return make_counting_transform_iterator(0, optional_index_accessor{col, col.has_nulls()});
}

/**
* @brief Create an index iterator with a nullable index accessor for a scalar.
*/
static auto make_input_optional_iterator(scalar const& input)
{
return thrust::make_transform_iterator(thrust::make_constant_iterator<size_type>(0),
scalar_optional_index_accessor{input});
}
};

Expand Down
6 changes: 5 additions & 1 deletion cpp/include/cudf/wrappers/durations.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -59,6 +59,8 @@ using duration_us = cuda::std::chrono::duration<int64_t, cuda::std::chrono::micr
using duration_ns = cuda::std::chrono::duration<int64_t, cuda::std::chrono::nanoseconds::period>;

static_assert(sizeof(duration_D) == sizeof(typename duration_D::rep), "");
static_assert(sizeof(duration_h) == sizeof(typename duration_h::rep), "");
static_assert(sizeof(duration_m) == sizeof(typename duration_m::rep), "");
static_assert(sizeof(duration_s) == sizeof(typename duration_s::rep), "");
static_assert(sizeof(duration_ms) == sizeof(typename duration_ms::rep), "");
static_assert(sizeof(duration_us) == sizeof(typename duration_us::rep), "");
Expand All @@ -85,6 +87,8 @@ namespace std {
}

DURATION_LIMITS(cudf::duration_D);
DURATION_LIMITS(cudf::duration_h);
DURATION_LIMITS(cudf::duration_m);
DURATION_LIMITS(cudf::duration_s);
DURATION_LIMITS(cudf::duration_ms);
DURATION_LIMITS(cudf::duration_us);
Expand Down
42 changes: 23 additions & 19 deletions cpp/include/cudf/wrappers/timestamps.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@

#pragma once

#include <limits>

#include <cuda/std/chrono>
#include <cudf/wrappers/durations.hpp>

/**
* @file timestamps.hpp
Expand All @@ -42,33 +40,37 @@ using timestamp = time_point<Duration>;
*/

/**
* @brief Type alias representing an int32_t duration of days since the unix
* epoch.
* @brief Type alias representing a cudf::duration_D (int32_t) since the unix epoch.
*/
using timestamp_D = detail::timestamp<cudf::duration_D>;
/**
* @brief Type alias representing a cudf::duration_h (int32_t) since the unix epoch.
*/
using timestamp_h = detail::timestamp<cudf::duration_h>;
/**
* @brief Type alias representing a cudf::duration_m (int32_t) since the unix epoch.
*/
using timestamp_D =
detail::timestamp<cuda::std::chrono::duration<int32_t, cuda::std::ratio<86400>>>;
using timestamp_m = detail::timestamp<cudf::duration_m>;
/**
* @brief Type alias representing an int64_t duration of seconds since the
* unix epoch.
* @brief Type alias representing a cudf::duration_s (int64_t) since the unix epoch.
*/
using timestamp_s = detail::timestamp<cuda::std::chrono::duration<int64_t, cuda::std::ratio<1>>>;
using timestamp_s = detail::timestamp<cudf::duration_s>;
/**
* @brief Type alias representing an int64_t duration of milliseconds since
* the unix epoch.
* @brief Type alias representing a cudf::duration_ms (int64_t) since the unix epoch.
*/
using timestamp_ms = detail::timestamp<cuda::std::chrono::duration<int64_t, cuda::std::milli>>;
using timestamp_ms = detail::timestamp<cudf::duration_ms>;
/**
* @brief Type alias representing an int64_t duration of microseconds since
* the unix epoch.
* @brief Type alias representing a cudf::duration_us (int64_t) since the unix epoch.
*/
using timestamp_us = detail::timestamp<cuda::std::chrono::duration<int64_t, cuda::std::micro>>;
using timestamp_us = detail::timestamp<cudf::duration_us>;
/**
* @brief Type alias representing an int64_t duration of nanoseconds since
* the unix epoch.
* @brief Type alias representing a cudf::duration_ns (int64_t) since the unix epoch.
*/
using timestamp_ns = detail::timestamp<cuda::std::chrono::duration<int64_t, cuda::std::nano>>;
using timestamp_ns = detail::timestamp<cudf::duration_ns>;

static_assert(sizeof(timestamp_D) == sizeof(typename timestamp_D::rep), "");
static_assert(sizeof(timestamp_h) == sizeof(typename timestamp_h::rep), "");
static_assert(sizeof(timestamp_m) == sizeof(typename timestamp_m::rep), "");
static_assert(sizeof(timestamp_s) == sizeof(typename timestamp_s::rep), "");
static_assert(sizeof(timestamp_ms) == sizeof(typename timestamp_ms::rep), "");
static_assert(sizeof(timestamp_us) == sizeof(typename timestamp_us::rep), "");
Expand All @@ -95,6 +97,8 @@ namespace std {
}

TIMESTAMP_LIMITS(cudf::timestamp_D);
TIMESTAMP_LIMITS(cudf::timestamp_h);
TIMESTAMP_LIMITS(cudf::timestamp_m);
TIMESTAMP_LIMITS(cudf::timestamp_s);
TIMESTAMP_LIMITS(cudf::timestamp_ms);
TIMESTAMP_LIMITS(cudf::timestamp_us);
Expand Down
10 changes: 6 additions & 4 deletions cpp/src/groupby/sort/group_tdigest.cu
Original file line number Diff line number Diff line change
Expand Up @@ -535,10 +535,12 @@ struct get_scalar_minmax {

__device__ thrust::tuple<double, double> operator()(size_type group_index)
{
// note: .element<T>() is taking care of fixed-point conversions for us.
return {static_cast<double>(col.element<T>(group_offsets[group_index])),
static_cast<double>(
col.element<T>(group_offsets[group_index] + (group_valid_counts[group_index] - 1)))};
auto const valid_count = group_valid_counts[group_index];
return valid_count > 0
? thrust::make_tuple(
static_cast<double>(col.element<T>(group_offsets[group_index])),
static_cast<double>(col.element<T>(group_offsets[group_index] + valid_count - 1)))
: thrust::make_tuple(0.0, 0.0);
}
};

Expand Down
82 changes: 12 additions & 70 deletions cpp/src/io/csv/csv_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -299,89 +299,31 @@ __inline__ __device__ T decode_value(char const* begin,
return cudf::io::parse_numeric<T, base>(begin, end, opts);
}

template <typename T>
template <typename T,
std::enable_if_t<!cudf::is_timestamp<T>() and !cudf::is_duration<T>()>* = nullptr>
__inline__ __device__ T decode_value(char const* begin,
char const* end,
parse_options_view const& opts)
{
return cudf::io::parse_numeric<T>(begin, end, opts);
}

template <>
__inline__ __device__ cudf::timestamp_D decode_value(char const* begin,
char const* end,
parse_options_view const& opts)
{
return timestamp_D{cudf::duration_D{to_date(begin, end, opts.dayfirst)}};
}

template <>
__inline__ __device__ cudf::timestamp_s decode_value(char const* begin,
char const* end,
parse_options_view const& opts)
{
auto milli = to_date_time(begin, end, opts.dayfirst);
if (milli == -1) {
return timestamp_s{cudf::duration_s{to_non_negative_integer<int64_t>(begin, end)}};
} else {
return timestamp_s{cudf::duration_s{milli / 1000}};
}
}

template <>
__inline__ __device__ cudf::timestamp_ms decode_value(char const* begin,
char const* end,
parse_options_view const& opts)
{
auto milli = to_date_time(begin, end, opts.dayfirst);
if (milli == -1) {
return timestamp_ms{cudf::duration_ms{to_non_negative_integer<int64_t>(begin, end)}};
} else {
return timestamp_ms{cudf::duration_ms{milli}};
}
}

template <>
__inline__ __device__ cudf::timestamp_us decode_value(char const* begin,
char const* end,
parse_options_view const& opts)
template <typename T, std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
__inline__ __device__ T decode_value(char const* begin,
char const* end,
parse_options_view const& opts)
{
auto milli = to_date_time(begin, end, opts.dayfirst);
if (milli == -1) {
return timestamp_us{cudf::duration_us{to_non_negative_integer<int64_t>(begin, end)}};
} else {
return timestamp_us{cudf::duration_us{milli * 1000}};
}
return to_timestamp<T>(begin, end, opts.dayfirst);
}

template <>
__inline__ __device__ cudf::timestamp_ns decode_value(char const* begin,
char const* end,
parse_options_view const& opts)
template <typename T, std::enable_if_t<cudf::is_duration<T>()>* = nullptr>
__inline__ __device__ T decode_value(char const* begin,
char const* end,
parse_options_view const& opts)
{
auto milli = to_date_time(begin, end, opts.dayfirst);
if (milli == -1) {
return timestamp_ns{cudf::duration_ns{to_non_negative_integer<int64_t>(begin, end)}};
} else {
return timestamp_ns{cudf::duration_ns{milli * 1000000}};
}
return to_duration<T>(begin, end);
}

#ifndef DURATION_DECODE_VALUE
#define DURATION_DECODE_VALUE(Type) \
template <> \
__inline__ __device__ Type decode_value( \
const char* begin, const char* end, parse_options_view const& opts) \
{ \
return Type{to_time_delta<Type>(begin, end)}; \
}
#endif
DURATION_DECODE_VALUE(duration_D)
DURATION_DECODE_VALUE(duration_s)
DURATION_DECODE_VALUE(duration_ms)
DURATION_DECODE_VALUE(duration_us)
DURATION_DECODE_VALUE(duration_ns)

// The purpose of this is merely to allow compilation ONLY
// TODO : make this work for csv
template <>
Expand Down
Loading

0 comments on commit 7c98115

Please sign in to comment.