Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-22.04' into refactor/scans
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed Mar 2, 2022
2 parents 47a3575 + 7120694 commit bc4e342
Show file tree
Hide file tree
Showing 95 changed files with 1,785 additions and 1,459 deletions.
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ test:
- test -f $PREFIX/include/cudf/io/orc_metadata.hpp
- test -f $PREFIX/include/cudf/io/orc.hpp
- test -f $PREFIX/include/cudf/io/parquet.hpp
- test -f $PREFIX/include/cudf/io/text/byte_range_info.hpp
- test -f $PREFIX/include/cudf/io/text/data_chunk_source_factories.hpp
- test -f $PREFIX/include/cudf/io/text/data_chunk_source.hpp
- test -f $PREFIX/include/cudf/io/text/detail/multistate.hpp
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ add_library(
src/io/parquet/writer_impl.cu
src/io/statistics/orc_column_statistics.cu
src/io/statistics/parquet_column_statistics.cu
src/io/text/byte_range_info.cpp
src/io/text/multibyte_split.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/config_utils.cpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ ConfigureBench(

# ##################################################################################################
# * json benchmark -------------------------------------------------------------------
ConfigureBench(JSON_BENCH string/json.cpp)
ConfigureBench(JSON_BENCH string/json.cu)

# ##################################################################################################
# * io benchmark ---------------------------------------------------------------------
Expand Down
12 changes: 5 additions & 7 deletions cpp/benchmarks/common/generate_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ struct random_value_fn;
* @brief Creates an random timestamp/duration value
*/
template <typename T>
struct random_value_fn<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
std::function<int64_t(std::mt19937&)> seconds_gen;
std::function<int64_t(std::mt19937&)> nanoseconds_gen;

Expand Down Expand Up @@ -164,7 +164,7 @@ struct random_value_fn<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
* @brief Creates an random fixed_point value. Not implemented yet.
*/
template <typename T>
struct random_value_fn<T, typename std::enable_if_t<cudf::is_fixed_point<T>()>> {
struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
using rep = typename T::rep;
rep const lower_bound;
rep const upper_bound;
Expand Down Expand Up @@ -194,9 +194,7 @@ struct random_value_fn<T, typename std::enable_if_t<cudf::is_fixed_point<T>()>>
* @brief Creates an random numeric value with the given distribution.
*/
template <typename T>
struct random_value_fn<
T,
typename std::enable_if_t<!std::is_same_v<T, bool> && cudf::is_numeric<T>()>> {
struct random_value_fn<T, std::enable_if_t<!std::is_same_v<T, bool> && cudf::is_numeric<T>()>> {
T const lower_bound;
T const upper_bound;
distribution_fn<T> dist;
Expand All @@ -219,7 +217,7 @@ struct random_value_fn<
* @brief Creates an boolean value with given probability of returning `true`.
*/
template <typename T>
struct random_value_fn<T, typename std::enable_if_t<std::is_same_v<T, bool>>> {
struct random_value_fn<T, std::enable_if_t<std::is_same_v<T, bool>>> {
std::bernoulli_distribution b_dist;

random_value_fn(distribution_params<bool> const& desc) : b_dist{desc.probability_true} {}
Expand Down Expand Up @@ -260,7 +258,7 @@ struct stored_as {

// Use `int8_t` for bools because that's how they're stored in columns
template <typename T>
struct stored_as<T, typename std::enable_if_t<std::is_same_v<T, bool>>> {
struct stored_as<T, std::enable_if_t<std::is_same_v<T, bool>>> {
using type = int8_t;
};

Expand Down
27 changes: 12 additions & 15 deletions cpp/benchmarks/common/generate_input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,7 @@ struct distribution_params;
* @brief Numeric values are parameterized with a distribution type and bounds of the same type.
*/
template <typename T>
struct distribution_params<
T,
typename std::enable_if_t<!std::is_same_v<T, bool> && cudf::is_numeric<T>()>> {
struct distribution_params<T, std::enable_if_t<!std::is_same_v<T, bool> && cudf::is_numeric<T>()>> {
distribution_id id;
T lower_bound;
T upper_bound;
Expand All @@ -140,15 +138,15 @@ struct distribution_params<
* @brief Booleans are parameterized with the probability of getting `true` value.
*/
template <typename T>
struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, bool>>> {
struct distribution_params<T, std::enable_if_t<std::is_same_v<T, bool>>> {
double probability_true;
};

/**
* @brief Timestamps and durations are parameterized with a distribution type and int64_t bounds.
*/
template <typename T>
struct distribution_params<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
struct distribution_params<T, std::enable_if_t<cudf::is_chrono<T>()>> {
distribution_id id;
int64_t lower_bound;
int64_t upper_bound;
Expand All @@ -158,7 +156,7 @@ struct distribution_params<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
* @brief Strings are parameterized by the distribution of their length, as an integral value.
*/
template <typename T>
struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, cudf::string_view>>> {
struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::string_view>>> {
distribution_params<uint32_t> length_params;
};

Expand All @@ -167,15 +165,15 @@ struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, cudf::
* the element type.
*/
template <typename T>
struct distribution_params<T, typename std::enable_if_t<std::is_same_v<T, cudf::list_view>>> {
struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::list_view>>> {
cudf::type_id element_type;
distribution_params<uint32_t> length_params;
cudf::size_type max_depth;
};

// Present for compilation only. To be implemented once reader/writers support the fixed width type.
template <typename T>
struct distribution_params<T, typename std::enable_if_t<cudf::is_fixed_point<T>()>> {
struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
};

/**
Expand Down Expand Up @@ -225,8 +223,7 @@ class data_profile {

public:
template <typename T,
typename std::enable_if_t<!std::is_same_v<T, bool> && cuda::std::is_integral_v<T>, T>* =
nullptr>
std::enable_if_t<!std::is_same_v<T, bool> && cuda::std::is_integral_v<T>, T>* = nullptr>
distribution_params<T> get_distribution_params() const
{
auto it = int_params.find(cudf::type_to_id<T>());
Expand All @@ -239,7 +236,7 @@ class data_profile {
}
}

template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
distribution_params<T> get_distribution_params() const
{
auto it = float_params.find(cudf::type_to_id<T>());
Expand All @@ -258,7 +255,7 @@ class data_profile {
return distribution_params<T>{bool_probability};
}

template <typename T, typename std::enable_if_t<cudf::is_chrono<T>()>* = nullptr>
template <typename T, std::enable_if_t<cudf::is_chrono<T>()>* = nullptr>
distribution_params<T> get_distribution_params() const
{
auto it = int_params.find(cudf::type_to_id<T>());
Expand All @@ -284,7 +281,7 @@ class data_profile {
return list_dist_desc;
}

template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
distribution_params<typename T::rep> get_distribution_params() const
{
using rep = typename T::rep;
Expand All @@ -307,7 +304,7 @@ class data_profile {
// discrete distributions (integers, strings, lists). Otherwise the call with have no effect.
template <typename T,
typename Type_enum,
typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
void set_distribution_params(Type_enum type_or_group,
distribution_id dist,
T lower_bound,
Expand All @@ -331,7 +328,7 @@ class data_profile {
// have continuous distributions (floating point types). Otherwise the call with have no effect.
template <typename T,
typename Type_enum,
typename std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
void set_distribution_params(Type_enum type_or_group,
distribution_id dist,
T lower_bound,
Expand Down
8 changes: 4 additions & 4 deletions cpp/benchmarks/common/random_distribution_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
/**
* @brief Generates a normal(binomial) distribution between zero and upper_bound.
*/
template <typename T, typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
template <typename T, std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
auto make_normal_dist(T upper_bound)
{
using uT = typename std::make_unsigned<T>::type;
Expand All @@ -42,7 +42,7 @@ auto make_normal_dist(T upper_bound)
return std::normal_distribution<T>(mean, stddev);
}

template <typename T, typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
template <typename T, std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
auto make_uniform_dist(T range_start, T range_end)
{
return std::uniform_int_distribution<T>(range_start, range_end);
Expand All @@ -62,7 +62,7 @@ double geometric_dist_p(T range_size)
return p ? p : std::numeric_limits<double>::epsilon();
}

template <typename T, typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
template <typename T, std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
auto make_geometric_dist(T range_start, T range_end)
{
using uT = typename std::make_unsigned<T>::type;
Expand All @@ -82,7 +82,7 @@ auto make_geometric_dist(T range_start, T range_end)
template <typename T>
using distribution_fn = std::function<T(std::mt19937&)>;

template <typename T, typename std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
template <typename T, std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
distribution_fn<T> make_distribution(distribution_id did, T lower_bound, T upper_bound)
{
switch (did) {
Expand Down
140 changes: 0 additions & 140 deletions cpp/benchmarks/string/json.cpp

This file was deleted.

Loading

0 comments on commit bc4e342

Please sign in to comment.