Skip to content

Commit

Permalink
Address PR comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed Jun 29, 2021
1 parent 92e5353 commit 6c9251b
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 82 deletions.
9 changes: 6 additions & 3 deletions cpp/include/cudf/ast/detail/linearizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ class linearizer {
/**
* @brief Construct a new linearizer object
*
* @param table The table used for evaluating the abstract syntax tree.
* @param expr The expression to create an evaluable linearizer for.
* @param left The left table used for evaluating the abstract syntax tree.
* @param right The right table used for evaluating the abstract syntax tree.
*/
linearizer(detail::node const& expr, cudf::table_view left, cudf::table_view right)
: _left(left), _right(right), _node_count(0), _intermediate_counter()
Expand All @@ -114,10 +116,11 @@ class linearizer {
/**
* @brief Construct a new linearizer object
*
* @param expr The expression to create an evaluable linearizer for.
* @param table The table used for evaluating the abstract syntax tree.
*/
linearizer(detail::node const& expr, cudf::table_view left)
: _left(left), _right(left), _node_count(0), _intermediate_counter()
linearizer(detail::node const& expr, cudf::table_view table)
: _left(table), _right(table), _node_count(0), _intermediate_counter()
{
expr.accept(*this);
}
Expand Down
93 changes: 51 additions & 42 deletions cpp/include/cudf/ast/detail/transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ using IntermediateDataType = possibly_null_value_t<std::int64_t, has_nulls>;
* subclasses to get around the lack of device-side polymorphism.
*
* @tparam Subclass The subclass to dispatch methods to.
* @tparam has_nulls Whether or not the result data is nullable.
* @tparam T The underlying data type.
* @tparam has_nulls Whether or not the result data is nullable.
*/
template <typename Subclass, typename T, bool has_nulls>
struct expression_result {
Expand All @@ -89,7 +89,12 @@ struct expression_result {
// TODO: The index is ignored by the value subclass, but is included in this
// signature because it is required by the implementation in the template
// specialization for column views. It would be nice to clean this up, see
// the related TODO below.
// the related TODO below. Note that storing the index in the class on
// construction (which would result in a cleaner delineation of the API for
// the derived types) results in a significant performance penalty because
// the index is pushed down the memory hierarchy by the time it needs to be
// used, whereas passing it as a parameter keeps it in registers for fast
// access at the point where indexing occurs.
template <typename Element>
__device__ void set_value(cudf::size_type index, possibly_null_value_t<Element, has_nulls> result)
{
Expand All @@ -108,8 +113,8 @@ struct expression_result {
* (possibly nullable) scalar type that can be written to by the
* expression_evaluator. The data (and its validity) can then be accessed.
*
* @tparam has_nulls Whether or not the result data is nullable.
* @tparam T The underlying data type.
* @tparam has_nulls Whether or not the result data is nullable.
*/
template <typename T, bool has_nulls>
struct value_expression_result
Expand Down Expand Up @@ -165,7 +170,6 @@ struct value_expression_result
* column. Not all methods are implemented
*
* @tparam has_nulls Whether or not the result data is nullable.
* @tparam T The underlying data type.
*/
template <bool has_nulls>
struct mutable_column_expression_result
Expand Down Expand Up @@ -217,10 +221,10 @@ struct mutable_column_expression_result
*
* This struct should never be instantiated directly. It is created by the
* `ast_plan` on construction, and the resulting member is publicly accessible
* for passing to kernels for constructing `expression_evaluators`.
* for passing to kernels for constructing an `expression_evaluator`.
*
*/
struct dev_ast_plan {
struct device_ast_plan {
device_span<const detail::device_data_reference> data_references;
device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals;
device_span<const ast_operator> operators;
Expand Down Expand Up @@ -263,22 +267,22 @@ struct ast_plan {
rmm::mr::device_memory_resource* mr)
: _linearizer(expr, left, right)
{
std::vector<cudf::size_type> _sizes;
std::vector<const void*> _data_pointers;
std::vector<cudf::size_type> sizes;
std::vector<const void*> data_pointers;

extract_size_and_pointer(_linearizer.data_references(), _sizes, _data_pointers);
extract_size_and_pointer(_linearizer.literals(), _sizes, _data_pointers);
extract_size_and_pointer(_linearizer.operators(), _sizes, _data_pointers);
extract_size_and_pointer(_linearizer.operator_source_indices(), _sizes, _data_pointers);
extract_size_and_pointer(_linearizer.data_references(), sizes, data_pointers);
extract_size_and_pointer(_linearizer.literals(), sizes, data_pointers);
extract_size_and_pointer(_linearizer.operators(), sizes, data_pointers);
extract_size_and_pointer(_linearizer.operator_source_indices(), sizes, data_pointers);

// Create device buffer
auto const buffer_size = std::accumulate(_sizes.cbegin(), _sizes.cend(), 0);
auto buffer_offsets = std::vector<int>(_sizes.size());
thrust::exclusive_scan(_sizes.cbegin(), _sizes.cend(), buffer_offsets.begin(), 0);
auto const buffer_size = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
auto buffer_offsets = std::vector<int>(sizes.size());
thrust::exclusive_scan(sizes.cbegin(), sizes.cend(), buffer_offsets.begin(), 0);

auto h_data_buffer = std::make_unique<char[]>(buffer_size);
for (unsigned int i = 0; i < _data_pointers.size(); ++i) {
std::memcpy(h_data_buffer.get() + buffer_offsets[i], _data_pointers[i], _sizes[i]);
for (unsigned int i = 0; i < data_pointers.size(); ++i) {
std::memcpy(h_data_buffer.get() + buffer_offsets[i], data_pointers[i], sizes[i]);
}

_device_data_buffer = rmm::device_buffer(h_data_buffer.get(), buffer_size, stream, mr);
Expand Down Expand Up @@ -311,40 +315,42 @@ struct ast_plan {
* @brief Construct an AST plan for an expression operating on one table.
*
* @param expr The expression for which to construct a plan.
* @param left The left table on which the expression acts.
* @param table The table on which the expression acts.
* @param has_nulls Boolean indicator of whether or not the data contains nulls.
* @param stream Stream view on which to allocate resources and queue execution.
* @param mr Device memory resource used to allocate the returned column's device.
*/
ast_plan(detail::node const& expr,
cudf::table_view left,
cudf::table_view table,
bool has_nulls,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
: ast_plan(expr, left, left, has_nulls, stream, mr)
: ast_plan(expr, table, table, has_nulls, stream, mr)
{
}

cudf::data_type output_type() const { return _linearizer.root_data_type(); }

dev_ast_plan
device_ast_plan
dev_plan; ///< The collection of data required to evaluate the expression on the device.

private:
/**
* @brief Helper function for adding components (operators, literals, etc) to AST plan
*
* @tparam T The underlying type of the input `std::vector`
* @param v The `std::vector` containing components (operators, literals, etc)
* @param[in] v The `std::vector` containing components (operators, literals, etc).
* @param[in,out] sizes The `std::vector` containing the size of each data buffer.
* @param[in,out] data_pointers The `std::vector` containing pointers to each data buffer.
*/
template <typename T>
void extract_size_and_pointer(std::vector<T> const& v,
std::vector<cudf::size_type>& _sizes,
std::vector<const void*>& _data_pointers)
std::vector<cudf::size_type>& sizes,
std::vector<const void*>& data_pointers)
{
auto const data_size = sizeof(T) * v.size();
_sizes.push_back(data_size);
_data_pointers.push_back(v.data());
sizes.push_back(data_size);
data_pointers.push_back(v.data());
}

rmm::device_buffer
Expand All @@ -366,39 +372,42 @@ struct expression_evaluator {
/**
* @brief Construct an expression evaluator acting on two tables.
*
* @param left View on the left table view used for evaluation.
* @param left View of the left table view used for evaluation.
* @param right View of the right table view used for evaluation.
* @param plan The collection of device references representing the expression to evaluate.
* @param thread_intermediate_storage Pointer to this thread's portion of shared memory for
* storing intermediates.
* @param left View on the right table view used for evaluation.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
*/
__device__ expression_evaluator(table_device_view const& left,
table_device_view const& right,
dev_ast_plan const& plan,
device_ast_plan const& plan,
IntermediateDataType<has_nulls>* thread_intermediate_storage,
null_equality compare_nulls = null_equality::EQUAL)
: left(left),
right(right),
plan(plan),
thread_intermediate_storage(thread_intermediate_storage),
right(right),
compare_nulls(compare_nulls)
{
}

/**
* @brief Construct an expression evaluator acting on one table.
*
* @param left View on the left table view used for evaluation.
* @param table View of the table view used for evaluation.
* @param plan The collection of device references representing the expression to evaluate.
* @param thread_intermediate_storage Pointer to this thread's portion of shared memory for
* storing intermediates.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
*/
__device__ expression_evaluator(table_device_view const& left,
dev_ast_plan const& plan,
__device__ expression_evaluator(table_device_view const& table,
device_ast_plan const& plan,
IntermediateDataType<has_nulls>* thread_intermediate_storage,
null_equality compare_nulls = null_equality::EQUAL)
: left(left),
right(left),
: left(table),
right(table),
plan(plan),
thread_intermediate_storage(thread_intermediate_storage),
compare_nulls(compare_nulls)
Expand All @@ -424,7 +433,7 @@ struct expression_evaluator {
{
auto const data_index = device_data_reference.data_index;
auto const ref_type = device_data_reference.reference_type;
// TODO: Everywhere in the code assumes that the tbale reference is either
// TODO: Everywhere in the code assumes that the table reference is either
// left or right. Should we error-check somewhere to prevent
// table_reference::OUTPUT from being specified?
auto const& table = device_data_reference.table_source == table_reference::LEFT ? left : right;
Expand Down Expand Up @@ -731,7 +740,7 @@ struct expression_evaluator {
using OperatorFunctor = detail::operator_functor<op>;
using Out = cuda::std::invoke_result_t<OperatorFunctor, Input>;
if constexpr (has_nulls) {
auto result = input.has_value()
auto const result = input.has_value()
? possibly_null_value_t<Out, has_nulls>(OperatorFunctor{}(*input))
: possibly_null_value_t<Out, has_nulls>();
this->template resolve_output<Out>(output_object, output, output_row_index, result);
Expand All @@ -758,7 +767,7 @@ struct expression_evaluator {
* @brief Subclass of the expression output handler for binary operations.
*
* This functor's call operator is specialized to handle binary operations,
* which require a two operands.
* which require two operands.
*/
template <typename LHS, typename RHS>
struct binary_expression_output_handler : public expression_output_handler {
Expand Down Expand Up @@ -814,11 +823,11 @@ struct expression_evaluator {
? possibly_null_value_t<Out, has_nulls>(OperatorFunctor{}(*lhs, *rhs))
: possibly_null_value_t<Out, has_nulls>();
this->template resolve_output<Out>(output_object, output, output_row_index, result);
} // if constexpr (op == ast_operator::EQUAL) {
}
} else {
this->template resolve_output<Out>(
output_object, output, output_row_index, OperatorFunctor{}(lhs, rhs));
} // if constexpr (has_nulls) {
}
}

template <ast_operator op,
Expand All @@ -837,13 +846,13 @@ struct expression_evaluator {

table_device_view const& left; ///< The left table to operate on.
table_device_view const& right; ///< The right table to operate on.
dev_ast_plan const&
device_ast_plan const&
plan; ///< The container of device data representing the expression to evaluate.
IntermediateDataType<has_nulls>*
thread_intermediate_storage; ///< The shared memory store of intermediates produced during
///< evaluation.
null_equality
compare_nulls; ///< Whether the equality operators returns true or false for two nulls.
compare_nulls; ///< Whether the equality operator returns true or false for two nulls.
};

/**
Expand Down
45 changes: 20 additions & 25 deletions cpp/include/cudf/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,11 +673,10 @@ class hash_join {
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
*
* @param[in] left_keys The left table
* @param[in] right_keys The right table
* @param[in] binary_predicate The condition on which to join.
* @param[in] compare_nulls controls whether null join-key values
* should match or not.
* @param left The left table
* @param right The right table
* @param binary_predicate The condition on which to join.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
Expand Down Expand Up @@ -718,11 +717,10 @@ conditional_inner_join(
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
*
* @param[in] left_keys The left table
* @param[in] right_keys The right table
* @param[in] binary_predicate The condition on which to join.
* @param[in] compare_nulls controls whether null join-key values
* should match or not.
* @param left The left table
* @param right The right table
* @param binary_predicate The condition on which to join.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
Expand Down Expand Up @@ -761,11 +759,10 @@ conditional_left_join(table_view left,
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
*
* @param[in] left_keys The left table
* @param[in] right_keys The right table
* @param[in] binary_predicate The condition on which to join.
* @param[in] compare_nulls controls whether null join-key values
* should match or not.
* @param left The left table
* @param right The right table
* @param binary_predicate The condition on which to join.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
Expand Down Expand Up @@ -799,11 +796,10 @@ conditional_full_join(table_view left,
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
*
* @param[in] left_keys The left table
* @param[in] right_keys The right table
* @param[in] binary_predicate The condition on which to join.
* @param[in] compare_nulls controls whether null join-key values
* should match or not.
* @param left The left table
* @param right The right table
* @param binary_predicate The condition on which to join.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A vector `left_indices` that can be used to construct the result of
Expand Down Expand Up @@ -837,11 +833,10 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_semi_join(
* @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
* mismatch.
*
* @param[in] left_keys The left table
* @param[in] right_keys The right table
* @param[in] binary_predicate The condition on which to join.
* @param[in] compare_nulls controls whether null join-key values
* should match or not.
* @param left The left table
* @param right The right table
* @param binary_predicate The condition on which to join.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
* @param mr Device memory resource used to allocate the returned table and columns' device memory
*
* @return A vector `left_indices` that can be used to construct the result of
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/ast/transform.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ namespace detail {
* This evaluates an expression over a table to produce a new column. Also called an n-ary
* transform.
*
* @tparam block_size The size of the thread block, used to set launch bounds and minimize register
* usage.
* @tparam max_block_size The size of the thread block, used to set launch
* bounds and minimize register usage.
* @tparam has_nulls whether or not the output column may contain nulls.
*
* @param table The table device view used for evaluation.
Expand All @@ -60,7 +60,7 @@ namespace detail {
template <cudf::size_type max_block_size, bool has_nulls>
__launch_bounds__(max_block_size) __global__
void compute_column_kernel(table_device_view const table,
dev_ast_plan plan,
device_ast_plan plan,
mutable_column_device_view output_column)
{
// The (required) extern storage of the shared memory array leads to
Expand Down Expand Up @@ -93,15 +93,15 @@ std::unique_ptr<column> compute_column(table_view const table,
// If none of the input columns actually contain nulls, we can still use the
// non-nullable version of the expression evaluation code path for
// performance, so we capture that information as well.
auto nullable =
auto const nullable =
std::any_of(table.begin(), table.end(), [](column_view c) { return c.nullable(); });
auto has_nulls = nullable && std::any_of(table.begin(), table.end(), [](column_view c) {
auto const has_nulls = nullable && std::any_of(table.begin(), table.end(), [](column_view c) {
return c.nullable() && c.has_nulls();
});

auto const plan = ast_plan{expr, table, has_nulls, stream, mr};

auto output_column_mask_state =
auto const output_column_mask_state =
nullable ? (has_nulls ? mask_state::UNINITIALIZED : mask_state::ALL_VALID)
: mask_state::UNALLOCATED;

Expand Down
Loading

0 comments on commit 6c9251b

Please sign in to comment.