Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Abstract Syntax Tree Cleanup and Tests #7418

Merged
merged 22 commits into from
May 11, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c2cec05
Large unit tests
codereport Feb 22, 2021
1eae304
Move AST plan initialization to ctor
codereport Feb 22, 2021
5af4c59
AST Plan docs + more cleanup
codereport Feb 22, 2021
6ce1add
Calculate offsets once
codereport Feb 23, 2021
ae896e5
Move more work to AST ctor, currently failing
codereport Feb 24, 2021
393cdb2
temp
codereport Mar 2, 2021
1697eb3
Merge branch 'branch-0.20' into ast-tests-cleanup
codereport Apr 8, 2021
33ce7cf
Merge remote-tracking branch 'origin/branch-0.20' into ast-tests-cleanup
vyasr Apr 17, 2021
c9315c5
Keep device buffer from going out of scope before compute.
vyasr Apr 17, 2021
314358b
Clean up comments.
vyasr Apr 19, 2021
9c004d0
Inline offset and host buffer creation.
vyasr Apr 19, 2021
2534773
Remove preprocessor elided code.
vyasr Apr 19, 2021
97e84f2
Merge remote-tracking branch 'origin/branch-0.20' into ast-tests-cleanup
vyasr Apr 21, 2021
f924671
Remove unnecessary forward declarations and friend relationships betw…
vyasr Apr 21, 2021
459eae1
Add more informative comment for why r-value expression constructors …
vyasr Apr 21, 2021
33bf98d
Move linearizer.hpp to nodes.hpp.
vyasr Apr 21, 2021
8781a3b
Make ast plan internals public and use them directly.
vyasr Apr 21, 2021
4ed23dd
Apply rename in meta.yaml.
vyasr Apr 21, 2021
8931591
Merge branch 'branch-0.20' into ast-tests-cleanup
vyasr May 3, 2021
f0a5c66
Change raw pointers in ast_plan to span.
vyasr May 3, 2021
d2a5e8b
Pass the spans through the code.
vyasr May 3, 2021
98b219a
Address PR comments.
vyasr May 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 49 additions & 2 deletions cpp/include/cudf/ast/detail/transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -317,16 +317,60 @@ __device__ void evaluate_row_expression(detail::row_evaluator const& evaluator,
}
}

/**
* @brief The AST plan creates a device buffer of data needed to execute an AST.
*
* On construction, an AST plan creates a single "packed" host buffer of all necessary data arrays,
* and copies that to the device with a single host-device memory copy. Because the plan tends to be
* small, this is the most efficient approach for low latency.
*
* TODO: Remove comment below depending on final design
* The stream is not synchronized automatically, so a stream sync must be performed manually (or by
* another function) before the device data can be used safely.
*
*/
struct ast_plan {
public:
ast_plan(linearizer const& expr_linearizer) : _sizes{}, _data_pointers{}
ast_plan(linearizer const& expr_linearizer,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
: _sizes{}, _data_pointers{}
{
add_to_plan(expr_linearizer.data_references());
add_to_plan(expr_linearizer.literals());
add_to_plan(expr_linearizer.operators());
add_to_plan(expr_linearizer.operator_source_indices());

// Create device buffer
auto const h_data_buffer = host_data_buffer();
auto const buffer_offsets = offsets();
auto const buffer_size = h_data_buffer.second;
auto device_data_buffer =
rmm::device_buffer(h_data_buffer.first.get(), buffer_size, stream, mr);

// To reduce overhead, we don't call a stream sync here.
// The stream is synced later when the table_device_view is created.
// ^^^^ this comment will be removed, we are synchronizing vvvv
stream.synchronize(); // this doesn't seem to work

// Create device pointers to components of plan
auto const device_data_buffer_ptr = static_cast<const char*>(device_data_buffer.data());
_device_data_references = reinterpret_cast<const detail::device_data_reference*>(
device_data_buffer_ptr + buffer_offsets[0]);
_device_literals = reinterpret_cast<const cudf::detail::fixed_width_scalar_device_view_base*>(
device_data_buffer_ptr + buffer_offsets[1]);
_device_operators =
reinterpret_cast<const ast_operator*>(device_data_buffer_ptr + buffer_offsets[2]);
_device_operator_source_indices =
reinterpret_cast<const cudf::size_type*>(device_data_buffer_ptr + buffer_offsets[3]);
}

auto device_data_references() const { return _device_data_references; }
vyasr marked this conversation as resolved.
Show resolved Hide resolved
auto device_literals() const { return _device_literals; }
auto device_operators() const { return _device_operators; }
auto device_operator_source_indices() const { return _device_operator_source_indices; }

private:
using buffer_type = std::pair<std::unique_ptr<char[]>, int>;
vyasr marked this conversation as resolved.
Show resolved Hide resolved

/**
Expand Down Expand Up @@ -371,9 +415,12 @@ struct ast_plan {
return offsets;
}

private:
std::vector<cudf::size_type> _sizes;
std::vector<const void*> _data_pointers;
const detail::device_data_reference* _device_data_references;
const cudf::detail::fixed_width_scalar_device_view_base* _device_literals;
const ast_operator* _device_operators;
const cudf::size_type* _device_operator_source_indices;
vyasr marked this conversation as resolved.
Show resolved Hide resolved
};

/**
Expand Down
40 changes: 9 additions & 31 deletions cpp/src/ast/transform.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,12 @@ __launch_bounds__(max_block_size) __global__
{
extern __shared__ std::int64_t intermediate_storage[];
auto thread_intermediate_storage = &intermediate_storage[threadIdx.x * num_intermediates];
auto const start_idx = cudf::size_type(threadIdx.x + blockIdx.x * blockDim.x);
auto const stride = cudf::size_type(blockDim.x * gridDim.x);
auto const num_rows = table.num_rows();
auto const start_idx = static_cast<cudf::size_type>(threadIdx.x + blockIdx.x * blockDim.x);
auto const stride = static_cast<cudf::size_type>(blockDim.x * gridDim.x);
auto const evaluator =
cudf::ast::detail::row_evaluator(table, literals, thread_intermediate_storage, &output_column);

for (cudf::size_type row_index = start_idx; row_index < num_rows; row_index += stride) {
for (cudf::size_type row_index = start_idx; row_index < table.num_rows(); row_index += stride) {
evaluate_row_expression(
evaluator, data_references, operators, operator_source_indices, num_operators, row_index);
}
Expand All @@ -90,29 +89,8 @@ std::unique_ptr<column> compute_column(table_view const table,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const expr_linearizer = linearizer(expr, table); // Linearize the AST
auto const plan = ast_plan(expr_linearizer); // Create ast_plan

// Create device buffer
auto const host_data_buffer = plan.host_data_buffer();
auto const buffer_offsets = plan.offsets();
auto const buffer_size = host_data_buffer.second;
auto device_data_buffer =
rmm::device_buffer(host_data_buffer.first.get(), buffer_size, stream, mr);
// To reduce overhead, we don't call a stream sync here.
// The stream is synced later when the table_device_view is created.

// Create device pointers to components of plan
auto const device_data_buffer_ptr = static_cast<const char*>(device_data_buffer.data());
auto const device_data_references = reinterpret_cast<const detail::device_data_reference*>(
device_data_buffer_ptr + buffer_offsets[0]);
auto const device_literals =
reinterpret_cast<const cudf::detail::fixed_width_scalar_device_view_base*>(
device_data_buffer_ptr + buffer_offsets[1]);
auto const device_operators =
reinterpret_cast<const ast_operator*>(device_data_buffer_ptr + buffer_offsets[2]);
auto const device_operator_source_indices =
reinterpret_cast<const cudf::size_type*>(device_data_buffer_ptr + buffer_offsets[3]);
auto const expr_linearizer = linearizer(expr, table); // Linearize the AST
auto const plan = ast_plan{expr_linearizer, stream, mr}; // Create ast_plan

// Create table device view
auto table_device = table_device_view::create(table, stream);
Expand Down Expand Up @@ -145,11 +123,11 @@ std::unique_ptr<column> compute_column(table_view const table,
cudf::ast::detail::compute_column_kernel<MAX_BLOCK_SIZE>
<<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
*table_device,
device_literals,
plan.device_literals(),
*mutable_output_device,
device_data_references,
device_operators,
device_operator_source_indices,
plan.device_data_references(),
plan.device_operators(),
plan.device_operator_source_indices(),
num_operators,
num_intermediates);
CHECK_CUDA(stream.value());
Expand Down