Skip to content

Commit

Permalink
Add string scalar support in AST (#13061)
Browse files Browse the repository at this point in the history
Adding string scalar support in AST.
A new generic scalar device view class is added in AST to support numeric, timestamp, duration and string scalars.

Register count did not change, and benchmark results are almost same.
Compile time - There is major increase in join.cu by 15%. Other files are in range of -2% to 7%
 
Addressed part of #8858

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #13061
  • Loading branch information
karthikeyann authored Apr 20, 2023
1 parent 1df6894 commit 267bc6a
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 22 deletions.
3 changes: 1 addition & 2 deletions cpp/include/cudf/ast/detail/expression_evaluator.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,6 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/assert.cuh>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
Expand Down
15 changes: 7 additions & 8 deletions cpp/include/cudf/ast/detail/expression_parser.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -104,7 +104,7 @@ using IntermediateDataType = possibly_null_value_t<std::int64_t, has_nulls>;
*/
struct expression_device_view {
device_span<detail::device_data_reference const> data_references;
device_span<cudf::detail::fixed_width_scalar_device_view_base const> literals;
device_span<generic_scalar_device_view const> literals;
device_span<ast_operator const> operators;
device_span<cudf::size_type const> operator_source_indices;
cudf::size_type num_intermediates;
Expand Down Expand Up @@ -281,11 +281,10 @@ class expression_parser {
reinterpret_cast<detail::device_data_reference const*>(device_data_buffer_ptr +
buffer_offsets[0]),
_data_references.size());
device_expression_data.literals =
device_span<cudf::detail::fixed_width_scalar_device_view_base const>(
reinterpret_cast<cudf::detail::fixed_width_scalar_device_view_base const*>(
device_data_buffer_ptr + buffer_offsets[1]),
_literals.size());
device_expression_data.literals = device_span<generic_scalar_device_view const>(
reinterpret_cast<generic_scalar_device_view const*>(device_data_buffer_ptr +
buffer_offsets[1]),
_literals.size());
device_expression_data.operators = device_span<ast_operator const>(
reinterpret_cast<ast_operator const*>(device_data_buffer_ptr + buffer_offsets[2]),
_operators.size());
Expand Down Expand Up @@ -335,7 +334,7 @@ class expression_parser {
std::vector<detail::device_data_reference> _data_references;
std::vector<ast_operator> _operators;
std::vector<cudf::size_type> _operator_source_indices;
std::vector<cudf::detail::fixed_width_scalar_device_view_base> _literals;
std::vector<generic_scalar_device_view> _literals;
};

} // namespace detail
Expand Down
115 changes: 103 additions & 12 deletions cpp/include/cudf/ast/expressions.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -150,6 +150,96 @@ enum class table_reference {
OUTPUT ///< Column index in the output table
};

/**
* @brief A type-erased scalar_device_view where the value is a fixed width type or a string
*/
class generic_scalar_device_view : public cudf::detail::scalar_device_view_base {
public:
/**
* @brief Returns the stored value.
*
* @tparam T The desired type
* @returns The stored value
*/
template <typename T>
__device__ T const value() const noexcept
{
if constexpr (std::is_same_v<T, cudf::string_view>) {
return string_view(static_cast<char const*>(_data), _size);
}
return *static_cast<T const*>(_data);
}

/** @brief Construct a new generic scalar device view object from a numeric scalar
*
* @param s The numeric scalar to construct from
*/
template <typename T>
generic_scalar_device_view(numeric_scalar<T>& s)
: generic_scalar_device_view(s.type(), s.data(), s.validity_data())
{
}

/** @brief Construct a new generic scalar device view object from a timestamp scalar
*
* @param s The timestamp scalar to construct from
*/
template <typename T>
generic_scalar_device_view(timestamp_scalar<T>& s)
: generic_scalar_device_view(s.type(), s.data(), s.validity_data())
{
}

/** @brief Construct a new generic scalar device view object from a duration scalar
*
* @param s The duration scalar to construct from
*/
template <typename T>
generic_scalar_device_view(duration_scalar<T>& s)
: generic_scalar_device_view(s.type(), s.data(), s.validity_data())
{
}

/** @brief Construct a new generic scalar device view object from a string scalar
*
* @param s The string scalar to construct from
*/
generic_scalar_device_view(string_scalar& s)
: generic_scalar_device_view(s.type(), s.data(), s.validity_data(), s.size())
{
}

protected:
void const* _data{}; ///< Pointer to device memory containing the value
size_type const _size{}; ///< Size of the string in bytes for string scalar

/**
* @brief Construct a new fixed width scalar device view object
*
* @param type The data type of the value
* @param data The pointer to the data in device memory
* @param is_valid The pointer to the bool in device memory that indicates the
* validity of the stored value
*/
generic_scalar_device_view(data_type type, void const* data, bool* is_valid)
: cudf::detail::scalar_device_view_base(type, is_valid), _data(data)
{
}

/** @brief Construct a new string scalar device view object
*
* @param type The data type of the value
* @param data The pointer to the data in device memory
* @param is_valid The pointer to the bool in device memory that indicates the
* validity of the stored value
* @param size The size of the string in bytes
*/
generic_scalar_device_view(data_type type, void const* data, bool* is_valid, size_type size)
: cudf::detail::scalar_device_view_base(type, is_valid), _data(data), _size(size)
{
}
};

/**
* @brief A literal value used in an abstract syntax tree.
*/
Expand All @@ -162,8 +252,7 @@ class literal : public expression {
* @param value A numeric scalar value
*/
template <typename T>
literal(cudf::numeric_scalar<T>& value)
: scalar(value), value(cudf::get_scalar_device_view(value))
literal(cudf::numeric_scalar<T>& value) : scalar(value), value(value)
{
}

Expand All @@ -174,8 +263,7 @@ class literal : public expression {
* @param value A timestamp scalar value
*/
template <typename T>
literal(cudf::timestamp_scalar<T>& value)
: scalar(value), value(cudf::get_scalar_device_view(value))
literal(cudf::timestamp_scalar<T>& value) : scalar(value), value(value)
{
}

Expand All @@ -186,11 +274,17 @@ class literal : public expression {
* @param value A duration scalar value
*/
template <typename T>
literal(cudf::duration_scalar<T>& value)
: scalar(value), value(cudf::get_scalar_device_view(value))
literal(cudf::duration_scalar<T>& value) : scalar(value), value(value)
{
}

/**
* @brief Construct a new literal object.
*
* @param value A string scalar value
*/
literal(cudf::string_scalar& value) : scalar(value), value(value) {}

/**
* @brief Get the data type.
*
Expand All @@ -203,10 +297,7 @@ class literal : public expression {
*
* @return The device scalar object
*/
[[nodiscard]] cudf::detail::fixed_width_scalar_device_view_base get_value() const
{
return value;
}
[[nodiscard]] generic_scalar_device_view get_value() const { return value; }

/**
* @brief Accepts a visitor class.
Expand Down Expand Up @@ -236,7 +327,7 @@ class literal : public expression {

private:
cudf::scalar const& scalar;
cudf::detail::fixed_width_scalar_device_view_base const value;
generic_scalar_device_view const value;
};

/**
Expand Down
42 changes: 42 additions & 0 deletions cpp/tests/ast/transform_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,48 @@ TEST_F(TransformTest, StringComparison)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
}

TEST_F(TransformTest, StringScalarComparison)
{
auto c_0 =
cudf::test::strings_column_wrapper({"1", "12", "123", "23"}, {true, true, false, true});
auto table = cudf::table_view{{c_0}};

auto literal_value = cudf::string_scalar("2");
auto literal = cudf::ast::literal(literal_value);

auto col_ref_0 = cudf::ast::column_reference(0);
auto expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);

auto expected = column_wrapper<bool>{{true, true, true, false}, {true, true, false, true}};
auto result = cudf::compute_column(table, expression);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);

// compare with null literal
literal_value.set_valid_async(false);
auto expected2 = column_wrapper<bool>{{false, false, false, false}, {false, false, false, false}};
auto result2 = cudf::compute_column(table, expression);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
}

TEST_F(TransformTest, NumericScalarComparison)
{
auto c_0 = column_wrapper<int32_t>{1, 12, 123, 23};
auto table = cudf::table_view{{c_0}};

auto literal_value = cudf::numeric_scalar<int32_t>(2);
auto literal = cudf::ast::literal(literal_value);

auto col_ref_0 = cudf::ast::column_reference(0);
auto expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);

auto expected = column_wrapper<bool>{true, false, false, false};
auto result = cudf::compute_column(table, expression);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
}

TEST_F(TransformTest, CopyColumn)
{
auto c_0 = column_wrapper<int32_t>{3, 0, 1, 50};
Expand Down

0 comments on commit 267bc6a

Please sign in to comment.