From 4aeb8c8aecaa73abeb563b96a25c0d81e04360f4 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 9 Feb 2021 11:43:31 -0500 Subject: [PATCH 1/7] Initial refactorings --- cpp/include/cudf/ast/detail/linearizer.hpp | 4 +- cpp/src/ast/linearizer.cpp | 53 +++++++++------------- 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp index 44910bcaad1..f973db1f7aa 100644 --- a/cpp/include/cudf/ast/detail/linearizer.hpp +++ b/cpp/include/cudf/ast/detail/linearizer.hpp @@ -64,8 +64,8 @@ struct alignas(8) device_data_reference { const device_data_reference_type reference_type; // Source of data const cudf::data_type data_type; // Type of data - const cudf::size_type - data_index; // The column index of a table, index of a literal, or index of an intermediate + const cudf::size_type data_index; // The column index of a table, index of a + // literal, or index of an intermediate const table_reference table_source; inline bool operator==(const device_data_reference& rhs) const diff --git a/cpp/src/ast/linearizer.cpp b/cpp/src/ast/linearizer.cpp index 40a419642c2..a7bbe3b6c5d 100644 --- a/cpp/src/ast/linearizer.cpp +++ b/cpp/src/ast/linearizer.cpp @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -64,10 +66,10 @@ cudf::size_type linearizer::intermediate_counter::take() void linearizer::intermediate_counter::give(cudf::size_type value) { + // TODO: add comment auto const lower_bound = std::lower_bound(used_values.cbegin(), used_values.cend(), value); - if ((*lower_bound == value) && (lower_bound != used_values.cend())) { + if ((lower_bound != used_values.cend()) && (*lower_bound == value)) used_values.erase(lower_bound); - } } /** @@ -83,22 +85,13 @@ void linearizer::intermediate_counter::give(cudf::size_type value) */ cudf::size_type linearizer::intermediate_counter::find_first_missing() const { - if ((used_values.empty()) || (used_values.front() != 0)) { - // Handle cases where the container is empty or first value is non-zero. - return 0; - } else { - // Search for the first non-contiguous pair of elements. - auto found = std::adjacent_find(used_values.cbegin(), - used_values.cend(), - [](auto const& a, auto const& b) { return a != b - 1; }); - if (found != used_values.cend()) { - // A missing value was found and is returned. - return *found + 1; - } else { - // No missing elements. Return the next element in the sequence. - return used_values.size(); - } - } + if (used_values.empty() || (used_values.front() != 0)) { return 0; } + // Search for the first non-contiguous pair of elements. + auto diff_not_one = [](auto a, auto b) { return a != b - 1; }; + auto it = std::adjacent_find(used_values.cbegin(), used_values.cend(), diff_not_one); + return it != used_values.cend() + ? *it + 1 // A missing value was found and is returned. + : used_values.size(); // No missing elements. Return the next element in the sequence. } cudf::size_type linearizer::visit(literal const& expr) @@ -137,24 +130,23 @@ cudf::size_type linearizer::visit(expression const& expr) // Increment the node index auto const node_index = node_count++; // Visit children (operands) of this node - auto const operand_data_reference_indices = visit_operands(expr.get_operands()); + auto const operand_data_ref_indices = visit_operands(expr.get_operands()); // Resolve operand types - auto operand_types = std::vector(operand_data_reference_indices.size()); - std::transform(operand_data_reference_indices.cbegin(), - operand_data_reference_indices.cend(), - operand_types.begin(), - [this](auto const& data_reference_index) -> cudf::data_type { - return get_data_references()[data_reference_index].data_type; - }); + auto data_ref = [this](auto const& index) { return get_data_references()[index].data_type; }; + auto begin = thrust::make_transform_iterator(operand_data_ref_indices.cbegin(), data_ref); + auto end = begin + operand_data_ref_indices.size(); + auto const operand_types = std::vector(begin, end); + // Validate types of operand data references match if (std::adjacent_find(operand_types.cbegin(), operand_types.cend(), std::not_equal_to<>()) != operand_types.cend()) { CUDF_FAIL("An AST expression was provided non-matching operand types."); } + // Give back intermediate storage locations that are consumed by this operation std::for_each( - operand_data_reference_indices.cbegin(), - operand_data_reference_indices.cend(), + operand_data_ref_indices.cbegin(), + operand_data_ref_indices.cend(), [this](auto const& data_reference_index) { auto const operand_source = get_data_references()[data_reference_index]; if (operand_source.reference_type == detail::device_data_reference_type::INTERMEDIATE) { @@ -165,7 +157,6 @@ cudf::size_type linearizer::visit(expression const& expr) // Resolve node type auto const op = expr.get_operator(); auto const data_type = cudf::ast::detail::ast_operator_return_type(op, operand_types); - // Push operator operators.push_back(op); // Push data reference auto const output = [&]() { @@ -189,8 +180,8 @@ cudf::size_type linearizer::visit(expression const& expr) auto const index = add_data_reference(output); // Insert source indices from all operands (sources) and this operator (destination) operator_source_indices.insert(operator_source_indices.end(), - operand_data_reference_indices.cbegin(), - operand_data_reference_indices.cend()); + operand_data_ref_indices.cbegin(), + operand_data_ref_indices.cend()); operator_source_indices.push_back(index); return index; } From 76d7b8dec38fa7083f611245d2cb97592445d4e9 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 9 Feb 2021 12:42:48 -0500 Subject: [PATCH 2/7] More refactorings, clang-format fix --- cpp/include/cudf/ast/detail/linearizer.hpp | 34 ++++++------ cpp/src/ast/linearizer.cpp | 61 ++++++++++------------ cpp/src/ast/transform.cu | 12 ++--- 3 files changed, 51 insertions(+), 56 deletions(-) diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp index f973db1f7aa..c83a0b2029b 100644 --- a/cpp/include/cudf/ast/detail/linearizer.hpp +++ b/cpp/include/cudf/ast/detail/linearizer.hpp @@ -114,7 +114,7 @@ class linearizer { * @param table The table used for evaluating the abstract syntax tree. */ linearizer(detail::node const& expr, cudf::table_view table) - : table(table), node_count(0), intermediate_counter() + : _table(table), _node_count(0), _intermediate_counter() { expr.accept(*this); } @@ -124,23 +124,23 @@ class linearizer { * * @return cudf::data_type */ - cudf::data_type get_root_data_type() const; + cudf::data_type root_data_type() const; /** * @brief Get the maximum number of intermediates stored by the abstract syntax tree. * * @return cudf::size_type */ - cudf::size_type get_intermediate_count() const { return intermediate_counter.get_max_used(); } + cudf::size_type intermediate_count() const { return _intermediate_counter.get_max_used(); } /** * @brief Get the device data references. * * @return std::vector */ - std::vector const& get_data_references() const + std::vector const& data_references() const { - return data_references; + return _data_references; } /** @@ -148,16 +148,16 @@ class linearizer { * * @return std::vector */ - std::vector const& get_operators() const { return operators; } + std::vector const& operators() const { return _operators; } /** * @brief Get the operator source indices. * * @return std::vector */ - std::vector const& get_operator_source_indices() const + std::vector const& operator_source_indices() const { - return operator_source_indices; + return _operator_source_indices; } /** @@ -165,9 +165,9 @@ class linearizer { * * @return std::vector */ - std::vector const& get_literals() const + std::vector const& literals() const { - return literals; + return _literals; } /** @@ -225,13 +225,13 @@ class linearizer { cudf::size_type add_data_reference(detail::device_data_reference data_ref); // State information about the "linearized" GPU execution plan - cudf::table_view table; - cudf::size_type node_count; - intermediate_counter intermediate_counter; - std::vector data_references; - std::vector operators; - std::vector operator_source_indices; - std::vector literals; + cudf::table_view _table; + cudf::size_type _node_count; + intermediate_counter _intermediate_counter; + std::vector _data_references; + std::vector _operators; + std::vector _operator_source_indices; + std::vector _literals; }; } // namespace detail diff --git a/cpp/src/ast/linearizer.cpp b/cpp/src/ast/linearizer.cpp index a7bbe3b6c5d..caa5024a30e 100644 --- a/cpp/src/ast/linearizer.cpp +++ b/cpp/src/ast/linearizer.cpp @@ -96,27 +96,22 @@ cudf::size_type linearizer::intermediate_counter::find_first_missing() const cudf::size_type linearizer::visit(literal const& expr) { - // Increment the node index - node_count++; - // Resolve node type - auto const data_type = expr.get_data_type(); - // Construct a scalar device view - auto device_view = expr.get_value(); - // Push literal - auto const literal_index = cudf::size_type(literals.size()); - literals.push_back(device_view); - // Push data reference + _node_count++; // Increment the node index + auto const data_type = expr.get_data_type(); // Resolve node type + auto device_view = expr.get_value(); // Construct a scalar device view + auto const literal_index = cudf::size_type(_literals.size()); // Push literal + _literals.push_back(device_view); auto const source = detail::device_data_reference( - detail::device_data_reference_type::LITERAL, data_type, literal_index); + detail::device_data_reference_type::LITERAL, data_type, literal_index); // Push data reference return add_data_reference(source); } cudf::size_type linearizer::visit(column_reference const& expr) { // Increment the node index - node_count++; + _node_count++; // Resolve node type - auto const data_type = expr.get_data_type(table); + auto const data_type = expr.get_data_type(_table); // Push data reference auto const source = detail::device_data_reference(detail::device_data_reference_type::COLUMN, data_type, @@ -128,13 +123,13 @@ cudf::size_type linearizer::visit(column_reference const& expr) cudf::size_type linearizer::visit(expression const& expr) { // Increment the node index - auto const node_index = node_count++; + auto const node_index = _node_count++; // Visit children (operands) of this node auto const operand_data_ref_indices = visit_operands(expr.get_operands()); // Resolve operand types - auto data_ref = [this](auto const& index) { return get_data_references()[index].data_type; }; - auto begin = thrust::make_transform_iterator(operand_data_ref_indices.cbegin(), data_ref); - auto end = begin + operand_data_ref_indices.size(); + auto data_ref = [this](auto const& index) { return data_references()[index].data_type; }; + auto begin = thrust::make_transform_iterator(operand_data_ref_indices.cbegin(), data_ref); + auto end = begin + operand_data_ref_indices.size(); auto const operand_types = std::vector(begin, end); // Validate types of operand data references match @@ -148,16 +143,16 @@ cudf::size_type linearizer::visit(expression const& expr) operand_data_ref_indices.cbegin(), operand_data_ref_indices.cend(), [this](auto const& data_reference_index) { - auto const operand_source = get_data_references()[data_reference_index]; + auto const operand_source = data_references()[data_reference_index]; if (operand_source.reference_type == detail::device_data_reference_type::INTERMEDIATE) { auto const intermediate_index = operand_source.data_index; - intermediate_counter.give(intermediate_index); + _intermediate_counter.give(intermediate_index); } }); // Resolve node type auto const op = expr.get_operator(); auto const data_type = cudf::ast::detail::ast_operator_return_type(op, operand_types); - operators.push_back(op); + _operators.push_back(op); // Push data reference auto const output = [&]() { if (node_index == 0) { @@ -174,24 +169,24 @@ cudf::size_type linearizer::visit(expression const& expr) CUDF_FAIL("The output data type is too large to be stored in an intermediate."); } return detail::device_data_reference( - detail::device_data_reference_type::INTERMEDIATE, data_type, intermediate_counter.take()); + detail::device_data_reference_type::INTERMEDIATE, data_type, _intermediate_counter.take()); } }(); auto const index = add_data_reference(output); // Insert source indices from all operands (sources) and this operator (destination) - operator_source_indices.insert(operator_source_indices.end(), - operand_data_ref_indices.cbegin(), - operand_data_ref_indices.cend()); - operator_source_indices.push_back(index); + _operator_source_indices.insert(_operator_source_indices.end(), + operand_data_ref_indices.cbegin(), + operand_data_ref_indices.cend()); + _operator_source_indices.push_back(index); return index; } -cudf::data_type linearizer::get_root_data_type() const +cudf::data_type linearizer::root_data_type() const { - if (get_data_references().empty()) { + if (data_references().empty()) { return cudf::data_type(cudf::type_id::EMPTY); } else { - return get_data_references().back().data_type; + return data_references().back().data_type; } } @@ -210,12 +205,12 @@ cudf::size_type linearizer::add_data_reference(detail::device_data_reference dat { // If an equivalent data reference already exists, return its index. Otherwise add this data // reference and return the new index. - auto const it = std::find(data_references.cbegin(), data_references.cend(), data_ref); - if (it != data_references.cend()) { - return std::distance(data_references.cbegin(), it); + auto const it = std::find(_data_references.cbegin(), _data_references.cend(), data_ref); + if (it != _data_references.cend()) { + return std::distance(_data_references.cbegin(), it); } else { - data_references.push_back(data_ref); - return data_references.size() - 1; + _data_references.push_back(data_ref); + return _data_references.size() - 1; } } diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu index b61f48f7a84..f49b5c644e8 100644 --- a/cpp/src/ast/transform.cu +++ b/cpp/src/ast/transform.cu @@ -94,12 +94,12 @@ std::unique_ptr compute_column(table_view const table, { // Linearize the AST auto const expr_linearizer = linearizer(expr, table); - auto const data_references = expr_linearizer.get_data_references(); - auto const literals = expr_linearizer.get_literals(); - auto const operators = expr_linearizer.get_operators(); + auto const data_references = expr_linearizer.data_references(); + auto const literals = expr_linearizer.literals(); + auto const operators = expr_linearizer.operators(); auto const num_operators = cudf::size_type(operators.size()); - auto const operator_source_indices = expr_linearizer.get_operator_source_indices(); - auto const expr_data_type = expr_linearizer.get_root_data_type(); + auto const operator_source_indices = expr_linearizer.operator_source_indices(); + auto const expr_data_type = expr_linearizer.root_data_type(); // Create ast_plan and device buffer auto plan = ast_plan(); @@ -138,7 +138,7 @@ std::unique_ptr compute_column(table_view const table, cudf::mutable_column_device_view::create(output_column->mutable_view(), stream); // Configure kernel parameters - auto const num_intermediates = expr_linearizer.get_intermediate_count(); + auto const num_intermediates = expr_linearizer.intermediate_count(); auto const shmem_size_per_thread = static_cast(sizeof(std::int64_t) * num_intermediates); int device_id; CUDA_TRY(cudaGetDevice(&device_id)); From ff056ba3d80987a29f40a891614fd8f5e6399f75 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 9 Feb 2021 16:15:46 -0500 Subject: [PATCH 3/7] Conditional expression --- cpp/src/ast/linearizer.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/src/ast/linearizer.cpp b/cpp/src/ast/linearizer.cpp index caa5024a30e..54a622a5811 100644 --- a/cpp/src/ast/linearizer.cpp +++ b/cpp/src/ast/linearizer.cpp @@ -183,11 +183,8 @@ cudf::size_type linearizer::visit(expression const& expr) cudf::data_type linearizer::root_data_type() const { - if (data_references().empty()) { - return cudf::data_type(cudf::type_id::EMPTY); - } else { - return data_references().back().data_type; - } + return data_references().empty() ? cudf::data_type(cudf::type_id::EMPTY) + : data_references().back().data_type; } std::vector linearizer::visit_operands( From 396964f3660f81c7390c27e357ab337071a9a3be Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Tue, 9 Feb 2021 19:24:10 -0500 Subject: [PATCH 4/7] Collapsing namespaces --- cpp/include/cudf/ast/detail/linearizer.hpp | 1 - cpp/include/cudf/ast/linearizer.hpp | 1 - cpp/src/ast/transform.cu | 2 -- 3 files changed, 4 deletions(-) diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp index c83a0b2029b..fab69dd1b8c 100644 --- a/cpp/include/cudf/ast/detail/linearizer.hpp +++ b/cpp/include/cudf/ast/detail/linearizer.hpp @@ -23,7 +23,6 @@ #include namespace cudf { - namespace ast { // Forward declaration diff --git a/cpp/include/cudf/ast/linearizer.hpp b/cpp/include/cudf/ast/linearizer.hpp index 594dd0a73ce..cab27522520 100644 --- a/cpp/include/cudf/ast/linearizer.hpp +++ b/cpp/include/cudf/ast/linearizer.hpp @@ -25,7 +25,6 @@ #include namespace cudf { - namespace ast { /** diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu index f49b5c644e8..7e044e726ae 100644 --- a/cpp/src/ast/transform.cu +++ b/cpp/src/ast/transform.cu @@ -40,9 +40,7 @@ #include namespace cudf { - namespace ast { - namespace detail { /** From 1bd09098e0a894a23bfd636a2142ec7c8a9717b0 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 10 Feb 2021 15:54:06 -0500 Subject: [PATCH 5/7] Clean up --- cpp/src/ast/transform.cu | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu index 7e044e726ae..9e7df2963c0 100644 --- a/cpp/src/ast/transform.cu +++ b/cpp/src/ast/transform.cu @@ -136,24 +136,23 @@ std::unique_ptr compute_column(table_view const table, cudf::mutable_column_device_view::create(output_column->mutable_view(), stream); // Configure kernel parameters - auto const num_intermediates = expr_linearizer.intermediate_count(); - auto const shmem_size_per_thread = static_cast(sizeof(std::int64_t) * num_intermediates); + auto const num_intermediates = expr_linearizer.intermediate_count(); + // size_per_thread/block, limit_per_block are for shmem (shared memory) + auto const size_per_thread = static_cast(sizeof(std::int64_t) * num_intermediates); int device_id; CUDA_TRY(cudaGetDevice(&device_id)); - int shmem_per_block_limit; - CUDA_TRY( - cudaDeviceGetAttribute(&shmem_per_block_limit, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); + int limit_per_block; + CUDA_TRY(cudaDeviceGetAttribute(&limit_per_block, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); auto constexpr MAX_BLOCK_SIZE = 128; - auto const block_size = - (shmem_size_per_thread > 0) - ? std::min(MAX_BLOCK_SIZE, shmem_per_block_limit / shmem_size_per_thread) - : MAX_BLOCK_SIZE; - cudf::detail::grid_1d config(table_num_rows, block_size); - auto const shmem_size_per_block = shmem_size_per_thread * config.num_threads_per_block; + auto const block_size = size_per_thread != 0 + ? std::min(MAX_BLOCK_SIZE, limit_per_block / size_per_thread) + : MAX_BLOCK_SIZE; + auto const config = cudf::detail::grid_1d{table_num_rows, block_size}; + auto const size_per_block = size_per_thread * config.num_threads_per_block; // Execute the kernel cudf::ast::detail::compute_column_kernel - <<>>( + <<>>( *table_device, device_literals, *mutable_output_device, From d084f88e61976f1d9705c08db7f302c14ffb863e Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Wed, 17 Feb 2021 15:17:58 -0500 Subject: [PATCH 6/7] Copyright --- cpp/include/cudf/ast/detail/linearizer.hpp | 2 +- cpp/include/cudf/ast/linearizer.hpp | 2 +- cpp/src/ast/linearizer.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp index fab69dd1b8c..68319a24e5d 100644 --- a/cpp/include/cudf/ast/detail/linearizer.hpp +++ b/cpp/include/cudf/ast/detail/linearizer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cudf/ast/linearizer.hpp b/cpp/include/cudf/ast/linearizer.hpp index cab27522520..e5ccb2e8069 100644 --- a/cpp/include/cudf/ast/linearizer.hpp +++ b/cpp/include/cudf/ast/linearizer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/ast/linearizer.cpp b/cpp/src/ast/linearizer.cpp index 54a622a5811..cc70845e1ff 100644 --- a/cpp/src/ast/linearizer.cpp +++ b/cpp/src/ast/linearizer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From b52f687ce661f6243f1413be15578c67939231b6 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra Date: Thu, 18 Feb 2021 10:20:30 -0500 Subject: [PATCH 7/7] Add back shmem_ --- cpp/src/ast/transform.cu | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu index 9e7df2963c0..bc055d46869 100644 --- a/cpp/src/ast/transform.cu +++ b/cpp/src/ast/transform.cu @@ -136,23 +136,24 @@ std::unique_ptr compute_column(table_view const table, cudf::mutable_column_device_view::create(output_column->mutable_view(), stream); // Configure kernel parameters - auto const num_intermediates = expr_linearizer.intermediate_count(); - // size_per_thread/block, limit_per_block are for shmem (shared memory) - auto const size_per_thread = static_cast(sizeof(std::int64_t) * num_intermediates); + auto const num_intermediates = expr_linearizer.intermediate_count(); + auto const shmem_size_per_thread = static_cast(sizeof(std::int64_t) * num_intermediates); int device_id; CUDA_TRY(cudaGetDevice(&device_id)); - int limit_per_block; - CUDA_TRY(cudaDeviceGetAttribute(&limit_per_block, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); + int shmem_limit_per_block; + CUDA_TRY( + cudaDeviceGetAttribute(&shmem_limit_per_block, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); auto constexpr MAX_BLOCK_SIZE = 128; - auto const block_size = size_per_thread != 0 - ? std::min(MAX_BLOCK_SIZE, limit_per_block / size_per_thread) - : MAX_BLOCK_SIZE; - auto const config = cudf::detail::grid_1d{table_num_rows, block_size}; - auto const size_per_block = size_per_thread * config.num_threads_per_block; + auto const block_size = + shmem_size_per_thread != 0 + ? std::min(MAX_BLOCK_SIZE, shmem_limit_per_block / shmem_size_per_thread) + : MAX_BLOCK_SIZE; + auto const config = cudf::detail::grid_1d{table_num_rows, block_size}; + auto const shmem_size_per_block = shmem_size_per_thread * config.num_threads_per_block; // Execute the kernel cudf::ast::detail::compute_column_kernel - <<>>( + <<>>( *table_device, device_literals, *mutable_output_device,