Skip to content

Commit

Permalink
Abstract Syntax Tree Cleanup and Tests (#7418)
Browse files Browse the repository at this point in the history
  • Loading branch information
codereport authored May 11, 2021
1 parent 9328c56 commit 9a063b6
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 183 deletions.
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ test:
- test -f $PREFIX/include/cudf/ast/transform.hpp
- test -f $PREFIX/include/cudf/ast/detail/linearizer.hpp
- test -f $PREFIX/include/cudf/ast/detail/operators.hpp
- test -f $PREFIX/include/cudf/ast/linearizer.hpp
- test -f $PREFIX/include/cudf/ast/nodes.hpp
- test -f $PREFIX/include/cudf/ast/operators.hpp
- test -f $PREFIX/include/cudf/binaryop.hpp
- test -f $PREFIX/include/cudf/labeling/label_bins.hpp
Expand Down
9 changes: 1 addition & 8 deletions cpp/include/cudf/ast/detail/linearizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,7 @@ class linearizer;
* This class is a part of a "visitor" pattern with the `linearizer` class.
* Nodes inheriting from this class can accept visitors.
*/
class node {
friend class detail::linearizer;

private:
struct node {
virtual cudf::size_type accept(detail::linearizer& visitor) const = 0;
};

Expand All @@ -102,10 +99,6 @@ class node {
* resolved into intermediate data storage in shared memory.
*/
class linearizer {
friend class literal;
friend class column_reference;
friend class expression;

public:
/**
* @brief Construct a new linearizer object
Expand Down
47 changes: 0 additions & 47 deletions cpp/include/cudf/ast/detail/operators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -753,43 +753,6 @@ struct operator_functor<ast_operator::NOT> {
}
};

#if 0
/**
* @brief Functor used to double-type-dispatch binary operators.
*
* This functor's `operator()` is templated to validate calls to its operators based on the input
* type, as determined by the `is_valid_binary_op` trait.
*
* @tparam OperatorFunctor Binary operator functor.
*/
template <typename OperatorFunctor>
struct double_dispatch_binary_operator_types {
template <typename LHS,
typename RHS,
typename F,
typename... Ts,
std::enable_if_t<is_valid_binary_op<OperatorFunctor, LHS, RHS>>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args)
{
f.template operator()<OperatorFunctor, LHS, RHS>(std::forward<Ts>(args)...);
}

template <typename LHS,
typename RHS,
typename F,
typename... Ts,
std::enable_if_t<!is_valid_binary_op<OperatorFunctor, LHS, RHS>>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args)
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("Invalid binary operation.");
#else
cudf_assert(false && "Invalid binary operation.");
#endif
}
};
#endif

/**
* @brief Functor used to single-type-dispatch binary operators.
*
Expand Down Expand Up @@ -856,16 +819,6 @@ struct type_dispatch_binary_op {
F&& f,
Ts&&... args)
{
#if 0
// Double dispatch
/*
double_type_dispatcher(lhs_type,
rhs_type,
detail::double_dispatch_binary_operator_types<operator_functor<op>>{},
std::forward<F>(f),
std::forward<Ts>(args)...);
*/
#endif
// Single dispatch (assume lhs_type == rhs_type)
type_dispatcher(lhs_type,
detail::single_dispatch_binary_operator_types<operator_functor<op>>{},
Expand Down
119 changes: 80 additions & 39 deletions cpp/include/cudf/ast/detail/transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
*/
#pragma once

#include <cudf/ast/detail/linearizer.hpp>
#include <cudf/ast/detail/operators.hpp>
#include <cudf/ast/linearizer.hpp>
#include <cudf/ast/nodes.hpp>
#include <cudf/ast/operators.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
Expand All @@ -25,6 +26,7 @@
#include <cudf/table/table_device_view.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -155,10 +157,11 @@ struct row_evaluator {
* storing intermediates.
* @param output_column The output column where results are stored.
*/
__device__ row_evaluator(table_device_view const& table,
const cudf::detail::fixed_width_scalar_device_view_base* literals,
std::int64_t* thread_intermediate_storage,
mutable_column_device_view* output_column)
__device__ row_evaluator(
table_device_view const& table,
device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals,
std::int64_t* thread_intermediate_storage,
mutable_column_device_view* output_column)
: table(table),
literals(literals),
thread_intermediate_storage(thread_intermediate_storage),
Expand Down Expand Up @@ -264,7 +267,7 @@ struct row_evaluator {

private:
table_device_view const& table;
const cudf::detail::fixed_width_scalar_device_view_base* literals;
device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals;
std::int64_t* thread_intermediate_storage;
mutable_column_device_view* output_column;
};
Expand Down Expand Up @@ -298,15 +301,15 @@ __device__ void row_output::resolve_output(detail::device_data_reference device_
* @param num_operators Number of operators.
* @param row_index Row index of data column(s).
*/
__device__ void evaluate_row_expression(detail::row_evaluator const& evaluator,
const detail::device_data_reference* data_references,
const ast_operator* operators,
const cudf::size_type* operator_source_indices,
cudf::size_type num_operators,
cudf::size_type row_index)
__device__ void evaluate_row_expression(
detail::row_evaluator const& evaluator,
device_span<const detail::device_data_reference> data_references,
device_span<const ast_operator> operators,
device_span<const cudf::size_type> operator_source_indices,
cudf::size_type row_index)
{
auto operator_source_index = cudf::size_type(0);
for (cudf::size_type operator_index(0); operator_index < num_operators; operator_index++) {
auto operator_source_index = static_cast<cudf::size_type>(0);
for (cudf::size_type operator_index = 0; operator_index < operators.size(); operator_index++) {
// Execute operator
auto const op = operators[operator_index];
auto const arity = ast_operator_arity(op);
Expand Down Expand Up @@ -336,41 +339,79 @@ __device__ void evaluate_row_expression(detail::row_evaluator const& evaluator,
}
}

/**
* @brief The AST plan creates a device buffer of data needed to execute an AST.
*
* On construction, an AST plan creates a single "packed" host buffer of all necessary data arrays,
* and copies that to the device with a single host-device memory copy. Because the plan tends to be
* small, this is the most efficient approach for low latency.
*
*/
struct ast_plan {
public:
ast_plan() : sizes(), data_pointers() {}
ast_plan(linearizer const& expr_linearizer,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
: _sizes{}, _data_pointers{}
{
add_to_plan(expr_linearizer.data_references());
add_to_plan(expr_linearizer.literals());
add_to_plan(expr_linearizer.operators());
add_to_plan(expr_linearizer.operator_source_indices());

// Create device buffer
auto const buffer_size = std::accumulate(_sizes.cbegin(), _sizes.cend(), 0);
auto buffer_offsets = std::vector<int>(_sizes.size());
thrust::exclusive_scan(_sizes.cbegin(), _sizes.cend(), buffer_offsets.begin(), 0);

auto h_data_buffer = std::make_unique<char[]>(buffer_size);
for (unsigned int i = 0; i < _data_pointers.size(); ++i) {
std::memcpy(h_data_buffer.get() + buffer_offsets[i], _data_pointers[i], _sizes[i]);
}

using buffer_type = std::pair<std::unique_ptr<char[]>, int>;
_device_data_buffer = rmm::device_buffer(h_data_buffer.get(), buffer_size, stream, mr);

stream.synchronize();

// Create device pointers to components of plan
auto device_data_buffer_ptr = static_cast<const char*>(_device_data_buffer.data());
_device_data_references = device_span<const detail::device_data_reference>(
reinterpret_cast<const detail::device_data_reference*>(device_data_buffer_ptr +
buffer_offsets[0]),
expr_linearizer.data_references().size());
_device_literals = device_span<const cudf::detail::fixed_width_scalar_device_view_base>(
reinterpret_cast<const cudf::detail::fixed_width_scalar_device_view_base*>(
device_data_buffer_ptr + buffer_offsets[1]),
expr_linearizer.literals().size());
_device_operators = device_span<const ast_operator>(
reinterpret_cast<const ast_operator*>(device_data_buffer_ptr + buffer_offsets[2]),
expr_linearizer.operators().size());
_device_operator_source_indices = device_span<const cudf::size_type>(
reinterpret_cast<const cudf::size_type*>(device_data_buffer_ptr + buffer_offsets[3]),
expr_linearizer.operator_source_indices().size());
}

/**
* @brief Helper function for adding components (operators, literals, etc) to AST plan
*
* @tparam T The underlying type of the input `std::vector`
* @param v The `std::vector` containing components (operators, literals, etc)
*/
template <typename T>
void add_to_plan(std::vector<T> const& v)
{
auto const data_size = sizeof(T) * v.size();
sizes.push_back(data_size);
data_pointers.push_back(v.data());
_sizes.push_back(data_size);
_data_pointers.push_back(v.data());
}

buffer_type get_host_data_buffer() const
{
auto const total_size = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
auto host_data_buffer = std::make_unique<char[]>(total_size);
auto const offsets = get_offsets();
for (unsigned int i = 0; i < data_pointers.size(); ++i) {
std::memcpy(host_data_buffer.get() + offsets[i], data_pointers[i], sizes[i]);
}
return std::make_pair(std::move(host_data_buffer), total_size);
}
std::vector<cudf::size_type> _sizes;
std::vector<const void*> _data_pointers;

std::vector<cudf::size_type> get_offsets() const
{
auto offsets = std::vector<int>(sizes.size());
thrust::exclusive_scan(sizes.cbegin(), sizes.cend(), offsets.begin(), 0);
return offsets;
}

private:
std::vector<cudf::size_type> sizes;
std::vector<const void*> data_pointers;
rmm::device_buffer _device_data_buffer;
device_span<const detail::device_data_reference> _device_data_references;
device_span<const cudf::detail::fixed_width_scalar_device_view_base> _device_literals;
device_span<const ast_operator> _device_operators;
device_span<const cudf::size_type> _device_operator_source_indices;
};

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,10 @@ enum class table_reference {
OUTPUT // Column index in the output table
};

// Forward declaration
class literal;
class column_reference;
class expression;

/**
* @brief A literal value used in an abstract syntax tree.
*/
class literal : public detail::node {
friend class detail::linearizer;

public:
/**
* @brief Construct a new literal object.
Expand Down Expand Up @@ -90,7 +83,6 @@ class literal : public detail::node {
*/
cudf::data_type get_data_type() const { return get_value().type(); }

private:
/**
* @brief Get the value object.
*
Expand All @@ -106,15 +98,14 @@ class literal : public detail::node {
*/
cudf::size_type accept(detail::linearizer& visitor) const override;

private:
const cudf::detail::fixed_width_scalar_device_view_base value;
};

/**
* @brief A node referring to data from a column in a table.
*/
class column_reference : public detail::node {
friend class detail::linearizer;

public:
/**
* @brief Construct a new column reference object
Expand Down Expand Up @@ -175,7 +166,6 @@ class column_reference : public detail::node {
return table.column(get_column_index()).type();
}

private:
/**
* @brief Accepts a visitor class.
*
Expand All @@ -184,6 +174,7 @@ class column_reference : public detail::node {
*/
cudf::size_type accept(detail::linearizer& visitor) const override;

private:
cudf::size_type column_index;
table_reference table_source;
};
Expand All @@ -192,8 +183,6 @@ class column_reference : public detail::node {
* @brief An expression node holds an operator and zero or more operands.
*/
class expression : public detail::node {
friend class detail::linearizer;

public:
/**
* @brief Construct a new unary expression object.
Expand All @@ -208,11 +197,6 @@ class expression : public detail::node {
}
}

/**
* @brief `expression` doesn't accept r-value references for expression nodes
*/
expression(ast_operator op, node&& input) = delete;

/**
* @brief Construct a new binary expression object.
*
Expand All @@ -227,19 +211,11 @@ class expression : public detail::node {
}
}

/**
* @brief `expression` doesn't accept r-value references for expression nodes
*/
expression(ast_operator op, node&& left, node&& right) = delete;

/**
* @brief `expression` doesn't accept r-value references for expression nodes
*/
// expression only stores references to nodes, so it does not accept r-value
// references: the calling code must own the nodes.
expression(ast_operator op, node&& input) = delete;
expression(ast_operator op, node&& left, node&& right) = delete;
expression(ast_operator op, node&& left, node const& right) = delete;

/**
* @brief `expression` doesn't accept r-value references for expression nodes
*/
expression(ast_operator op, node const& left, node&& right) = delete;

/**
Expand All @@ -256,7 +232,6 @@ class expression : public detail::node {
*/
std::vector<std::reference_wrapper<const node>> get_operands() const { return operands; }

private:
/**
* @brief Accepts a visitor class.
*
Expand All @@ -265,6 +240,7 @@ class expression : public detail::node {
*/
cudf::size_type accept(detail::linearizer& visitor) const override;

private:
const ast_operator op;
const std::vector<std::reference_wrapper<const node>> operands;
};
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/ast/transform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
#pragma once

#include <cudf/ast/linearizer.hpp>
#include <cudf/ast/nodes.hpp>
#include <cudf/table/table_view.hpp>

namespace cudf {
Expand Down
Loading

0 comments on commit 9a063b6

Please sign in to comment.