From 6e34bf84d1bc8017967e6f29a969f0d86264d2e7 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 19 Jun 2024 10:35:51 -0700 Subject: [PATCH 001/124] Add basic cudf code to project / select --- cpp/examples/build.sh | 9 ++--- cpp/examples/tpch/CMakeLists.txt | 22 ++++++++++++ cpp/examples/tpch/q1.cpp | 62 ++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 cpp/examples/tpch/CMakeLists.txt create mode 100644 cpp/examples/tpch/q1.cpp diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh index bde6ef7d69c..b6bb6b94fad 100755 --- a/cpp/examples/build.sh +++ b/cpp/examples/build.sh @@ -56,7 +56,8 @@ build_example() { fi } -build_example basic -build_example strings -build_example nested_types -build_example parquet_io +# build_example basic +build_example tpch +# build_example strings +# build_example nested_types +# build_example parquet_io diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt new file mode 100644 index 00000000000..7c2cd74970e --- /dev/null +++ b/cpp/examples/tpch/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. + +cmake_minimum_required(VERSION 3.26.4) + +include(../set_cuda_architecture.cmake) + +# initialize cuda architecture +rapids_cuda_init_architectures(tpch_example) +rapids_cuda_set_architectures(RAPIDS) + +project( + tpch_example + VERSION 0.0.1 + LANGUAGES CXX CUDA +) + +include(../fetch_dependencies.cmake) + +# Configure your project here +add_executable(tpch_q1 q1.cpp) +target_link_libraries(tpch_q1 PRIVATE cudf::cudf) +target_compile_features(tpch_q1 PRIVATE cxx_std_17) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp new file mode 100644 index 00000000000..a188a37a129 --- /dev/null +++ b/cpp/examples/tpch/q1.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void read_parquet_file(std::vector column_names) { + std::string path = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; + auto source = cudf::io::source_info(path); + auto builder = cudf::io::parquet_reader_options_builder(source); + + auto col_ref = cudf::ast::column_reference(3); + + auto literal_value = cudf::numeric_scalar(2); + auto literal = cudf::ast::literal(literal_value); + + auto filter_expr = cudf::ast::operation( + cudf::ast::ast_operator::LESS, + col_ref, + literal + ); + + builder.columns(column_names); + builder.filter(filter_expr); + + auto options = builder.build(); + cudf::io::table_with_metadata result = cudf::io::read_parquet(options); + + std::cout << result.tbl->num_columns() << std::endl; + std::cout << result.tbl->num_rows() << std::endl; +} + +int main() { + std::vector column_names = {"l_orderkey", "l_partkey", "l_suppkey", "l_linenumber"}; + read_parquet_file(column_names); + return 0; +} From b20a8e640d9d19ea6e70d6aea21f98df80440ac3 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 24 Jun 2024 21:43:15 -0700 Subject: [PATCH 002/124] wip q1 --- cpp/examples/tpch/q1.cpp | 138 ++++++++++++++++++++++++++++++++++----- 1 file changed, 123 insertions(+), 15 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index a188a37a129..acf365c1251 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -28,35 +29,142 @@ #include #include #include +#include +#include +#include -void read_parquet_file(std::vector column_names) { +/* + select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, // done + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order + from + lineitem + where + l_shipdate <= date '1998-09-02' + group by + l_returnflag, + l_linestatus + order by + l_returnflag, + l_linestatus; + +*/ +void write_parquet(cudf::table_view input, + std::string filepath) +{ + auto sink_info = cudf::io::sink_info(filepath); + auto builder = cudf::io::parquet_writer_options::builder(sink_info, input); + auto options = builder.build(); + cudf::io::write_parquet(options); +} + +std::unique_ptr append_col_to_table(std::unique_ptr table, std::unique_ptr col) { + std::vector> columns; + for (size_t i = 0; i < table->num_columns(); i++) { + columns.push_back(std::make_unique(table->get_column(i))); + } + columns.push_back(std::move(col)); + return std::make_unique(std::move(columns)); +} + +std::unique_ptr read_filter_project(std::vector& projection_cols) { std::string path = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; auto source = cudf::io::source_info(path); auto builder = cudf::io::parquet_reader_options_builder(source); - auto col_ref = cudf::ast::column_reference(3); + // auto col_ref = cudf::ast::column_reference(5); - auto literal_value = cudf::numeric_scalar(2); - auto literal = cudf::ast::literal(literal_value); + // auto literal_value = cudf::timestamp_scalar(1719255747, true); + // auto literal = cudf::ast::literal(literal_value); - auto filter_expr = cudf::ast::operation( - cudf::ast::ast_operator::LESS, - col_ref, - literal - ); + // auto filter_expr = cudf::ast::operation( + // cudf::ast::ast_operator::LESS, + // col_ref, + // literal + // ); - builder.columns(column_names); - builder.filter(filter_expr); + builder.columns(projection_cols); + // builder.filter(filter_expr); auto options = builder.build(); cudf::io::table_with_metadata result = cudf::io::read_parquet(options); + write_parquet(result.tbl->view(), "file.parquet"); + return std::move(result.tbl); +} + +std::unique_ptr calc_disc_price(std::unique_ptr& table) { + auto one = cudf::fixed_point_scalar(1); + auto disc = table->get_column(4).view(); + auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); + auto extended_price = table->get_column(3).view(); + auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, extended_price.type()); + return disc_price; +} + +std::unique_ptr calc_charge(std::unique_ptr& table) { + auto one = cudf::fixed_point_scalar(1); + auto disc = table->get_column(4).view(); + auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); + auto extended_price = table->get_column(3).view(); + auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, extended_price.type()); + auto tax = table->get_column(7).view(); + auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); + auto charge = cudf::binary_operation(disc_price->view(), one_plus_tax->view(), cudf::binary_operator::MUL, disc_price->type()); + return charge; +} + +std::vector make_single_aggregation_request( + std::unique_ptr&& agg, cudf::column_view value) { + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].aggregations.push_back(std::move(agg)); + requests[0].values = value; + return requests; +} - std::cout << result.tbl->num_columns() << std::endl; - std::cout << result.tbl->num_rows() << std::endl; +std::unique_ptr group_by(std::unique_ptr &table, int32_t groupby_key_index, int32_t groupby_value_index) { + auto tbl_view = table->view(); + auto keys = cudf::table_view{{tbl_view.column(0), tbl_view.column(1}}; + auto val = tbl_view.column(groupby_value_index); + cudf::groupby::groupby grpby_obj(keys); + auto requests = make_single_aggregation_request(cudf::make_sum_aggregation(), val); + auto agg_results = grpby_obj.aggregate(requests); + + auto result_key = std::move(agg_results.first); + auto result_val = std::move(agg_results.second[0].results[0]); + std::vector columns{result_key->get_column(0), *result_val}; + return std::make_unique(cudf::table_view(columns)); } int main() { - std::vector column_names = {"l_orderkey", "l_partkey", "l_suppkey", "l_linenumber"}; - read_parquet_file(column_names); + // 1. Project 2. Filter 3. GroupBy 4. Aggregation + std::vector column_names = { + "l_returnflag", + "l_linestatus", + "l_quantity", + "l_extendedprice", + "l_discount", + "l_shipdate", + "l_orderkey", + "l_tax" + }; + + std::unique_ptr t1 = read_filter_project(column_names); + std::unique_ptr disc_price_col = calc_disc_price(t1); + std::unique_ptr charge_col = calc_charge(t1); + auto t2 = append_col_to_table(std::move(t1), std::move(disc_price_col)); + auto t3 = append_col_to_table(std::move(t2), std::move(charge_col)); + + std::cout << "Table after appending columns: " << std::endl; + std::cout << t3->num_rows() << " " << t3->num_columns() << std::endl; + return 0; } From c36cc169a1fb56250035d0b64f427c38ccd729d8 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 25 Jun 2024 00:40:46 -0700 Subject: [PATCH 003/124] q1 done --- cpp/examples/tpch/q1.cpp | 91 ++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 26 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index acf365c1251..94c2c6803f5 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -31,15 +31,14 @@ #include #include #include -#include - +#include /* select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, // done + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, @@ -55,8 +54,8 @@ order by l_returnflag, l_linestatus; - */ + void write_parquet(cudf::table_view input, std::string filepath) { @@ -80,11 +79,9 @@ std::unique_ptr read_filter_project(std::vector& proje auto source = cudf::io::source_info(path); auto builder = cudf::io::parquet_reader_options_builder(source); - // auto col_ref = cudf::ast::column_reference(5); - - // auto literal_value = cudf::timestamp_scalar(1719255747, true); + auto col_ref = cudf::ast::column_reference(5); + auto literal_value = cudf::timestamp_scalar(1719255747, true); // auto literal = cudf::ast::literal(literal_value); - // auto filter_expr = cudf::ast::operation( // cudf::ast::ast_operator::LESS, // col_ref, @@ -95,7 +92,7 @@ std::unique_ptr read_filter_project(std::vector& proje // builder.filter(filter_expr); auto options = builder.build(); - cudf::io::table_with_metadata result = cudf::io::read_parquet(options); + auto result = cudf::io::read_parquet(options); write_parquet(result.tbl->view(), "file.parquet"); return std::move(result.tbl); } @@ -121,31 +118,71 @@ std::unique_ptr calc_charge(std::unique_ptr& table) { return charge; } -std::vector make_single_aggregation_request( - std::unique_ptr&& agg, cudf::column_view value) { +std::unique_ptr group_by(std::unique_ptr& table) { + auto tbl_view = table->view(); + auto keys = cudf::table_view{{tbl_view.column(0), tbl_view.column(1)}}; + + auto l_quantity = tbl_view.column(2); + auto l_extendedprice = tbl_view.column(3); + auto l_discount = tbl_view.column(4); + auto l_discprice = tbl_view.column(8); + auto l_charge = tbl_view.column(9); + + cudf::groupby::groupby groupby_obj(keys); std::vector requests; + + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + requests[0].aggregations.push_back(cudf::make_mean_aggregation()); + requests[0].values = l_quantity; + + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[1].aggregations.push_back(cudf::make_sum_aggregation()); + requests[1].aggregations.push_back(cudf::make_mean_aggregation()); + requests[1].values = l_extendedprice; + + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[2].aggregations.push_back(cudf::make_mean_aggregation()); + requests[2].values = l_discount; + + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[3].aggregations.push_back(cudf::make_sum_aggregation()); + requests[3].values = l_discprice; + + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[4].aggregations.push_back(cudf::make_sum_aggregation()); + requests[4].values = l_charge; + requests.emplace_back(cudf::groupby::aggregation_request()); - requests[0].aggregations.push_back(std::move(agg)); - requests[0].values = value; - return requests; + requests[5].aggregations.push_back(cudf::make_count_aggregation()); + requests[5].values = l_charge; + + auto agg_results = groupby_obj.aggregate(requests); + auto result_key = std::move(agg_results.first); + std::vector columns{ + result_key->get_column(0), + result_key->get_column(1), + *agg_results.second[0].results[0], + *agg_results.second[0].results[1], + *agg_results.second[1].results[0], + *agg_results.second[1].results[1], + *agg_results.second[2].results[0], + *agg_results.second[3].results[0], + *agg_results.second[4].results[0], + *agg_results.second[5].results[0] + }; + return std::make_unique(cudf::table_view(columns)); } -std::unique_ptr group_by(std::unique_ptr &table, int32_t groupby_key_index, int32_t groupby_value_index) { +std::unique_ptr sort_by_key(std::unique_ptr& table, std::vector& keys) { auto tbl_view = table->view(); - auto keys = cudf::table_view{{tbl_view.column(0), tbl_view.column(1}}; - auto val = tbl_view.column(groupby_value_index); - cudf::groupby::groupby grpby_obj(keys); - auto requests = make_single_aggregation_request(cudf::make_sum_aggregation(), val); - auto agg_results = grpby_obj.aggregate(requests); - - auto result_key = std::move(agg_results.first); - auto result_val = std::move(agg_results.second[0].results[0]); - std::vector columns{result_key->get_column(0), *result_val}; - return std::make_unique(cudf::table_view(columns)); + return cudf::sort_by_key( + tbl_view, + cudf::table_view{{tbl_view.column(0), tbl_view.column(1)}} + ); } int main() { - // 1. Project 2. Filter 3. GroupBy 4. Aggregation std::vector column_names = { "l_returnflag", "l_linestatus", @@ -166,5 +203,7 @@ int main() { std::cout << "Table after appending columns: " << std::endl; std::cout << t3->num_rows() << " " << t3->num_columns() << std::endl; + group_by(t3); + return 0; } From 90fbcb3cd564b4c44cdfbdd15516f4f08ad9a2b9 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 25 Jun 2024 08:38:53 -0700 Subject: [PATCH 004/124] filtering using timestamp --- cpp/examples/tpch/q1.cpp | 44 ++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 94c2c6803f5..6e2cd2ee54c 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -22,9 +22,8 @@ #include #include #include +#include #include -#include -#include #include #include #include @@ -56,16 +55,15 @@ l_linestatus; */ -void write_parquet(cudf::table_view input, - std::string filepath) -{ - auto sink_info = cudf::io::sink_info(filepath); - auto builder = cudf::io::parquet_writer_options::builder(sink_info, input); +void write_parquet(cudf::table_view input, std::string filepath) { + auto sink_info = cudf::io::sink_info(filepath); + auto builder = cudf::io::parquet_writer_options::builder(sink_info, input); auto options = builder.build(); cudf::io::write_parquet(options); } -std::unique_ptr append_col_to_table(std::unique_ptr table, std::unique_ptr col) { +std::unique_ptr append_col_to_table( + std::unique_ptr table, std::unique_ptr col) { std::vector> columns; for (size_t i = 0; i < table->num_columns(); i++) { columns.push_back(std::make_unique(table->get_column(i))); @@ -74,26 +72,25 @@ std::unique_ptr append_col_to_table(std::unique_ptr ta return std::make_unique(std::move(columns)); } -std::unique_ptr read_filter_project(std::vector& projection_cols) { +std::unique_ptr read_parquet(std::vector& projection_cols) { std::string path = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; auto source = cudf::io::source_info(path); auto builder = cudf::io::parquet_reader_options_builder(source); - auto col_ref = cudf::ast::column_reference(5); - auto literal_value = cudf::timestamp_scalar(1719255747, true); - // auto literal = cudf::ast::literal(literal_value); - // auto filter_expr = cudf::ast::operation( - // cudf::ast::ast_operator::LESS, - // col_ref, - // literal - // ); + auto l_shipdate = cudf::ast::column_reference(5); + auto date_scalar = cudf::timestamp_scalar(1719255747L, true); + auto date = cudf::ast::literal(date_scalar); + auto filter_expr = cudf::ast::operation( + cudf::ast::ast_operator::LESS, + l_shipdate, + date + ); builder.columns(projection_cols); - // builder.filter(filter_expr); + builder.filter(filter_expr); auto options = builder.build(); auto result = cudf::io::read_parquet(options); - write_parquet(result.tbl->view(), "file.parquet"); return std::move(result.tbl); } @@ -174,7 +171,8 @@ std::unique_ptr group_by(std::unique_ptr& table) { return std::make_unique(cudf::table_view(columns)); } -std::unique_ptr sort_by_key(std::unique_ptr& table, std::vector& keys) { +std::unique_ptr sort_by_key( + std::unique_ptr& table, std::vector& keys) { auto tbl_view = table->view(); return cudf::sort_by_key( tbl_view, @@ -199,11 +197,9 @@ int main() { std::unique_ptr charge_col = calc_charge(t1); auto t2 = append_col_to_table(std::move(t1), std::move(disc_price_col)); auto t3 = append_col_to_table(std::move(t2), std::move(charge_col)); + auto t4 = group_by(t3); - std::cout << "Table after appending columns: " << std::endl; - std::cout << t3->num_rows() << " " << t3->num_columns() << std::endl; - - group_by(t3); + write_parquet(t4->view(), "q1.parquet"); return 0; } From 6ec4748015741be6145688a99450287d8ca3b5e4 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 25 Jun 2024 09:29:22 -0700 Subject: [PATCH 005/124] Finish q1 --- cpp/examples/tpch/q1.cpp | 56 ++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 6e2cd2ee54c..4c592ab175d 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -31,6 +31,7 @@ #include #include #include + /* select l_returnflag, @@ -56,10 +57,10 @@ */ void write_parquet(cudf::table_view input, std::string filepath) { - auto sink_info = cudf::io::sink_info(filepath); - auto builder = cudf::io::parquet_writer_options::builder(sink_info, input); - auto options = builder.build(); - cudf::io::write_parquet(options); + auto sink_info = cudf::io::sink_info(filepath); + auto builder = cudf::io::parquet_writer_options::builder(sink_info, input); + auto options = builder.build(); + cudf::io::write_parquet(options); } std::unique_ptr append_col_to_table( @@ -72,16 +73,27 @@ std::unique_ptr append_col_to_table( return std::make_unique(std::move(columns)); } -std::unique_ptr read_parquet(std::vector& projection_cols) { - std::string path = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; - auto source = cudf::io::source_info(path); +std::unique_ptr scan_filter_project() { + std::string lineitem = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; + auto source = cudf::io::source_info(lineitem); auto builder = cudf::io::parquet_reader_options_builder(source); + std::vector projection_cols = { + "l_returnflag", + "l_linestatus", + "l_quantity", + "l_extendedprice", + "l_discount", + "l_shipdate", + "l_orderkey", + "l_tax" + }; + auto l_shipdate = cudf::ast::column_reference(5); - auto date_scalar = cudf::timestamp_scalar(1719255747L, true); + auto date_scalar = cudf::timestamp_scalar(10471, true); auto date = cudf::ast::literal(date_scalar); auto filter_expr = cudf::ast::operation( - cudf::ast::ast_operator::LESS, + cudf::ast::ast_operator::LESS_EQUAL, l_shipdate, date ); @@ -115,7 +127,7 @@ std::unique_ptr calc_charge(std::unique_ptr& table) { return charge; } -std::unique_ptr group_by(std::unique_ptr& table) { +std::unique_ptr calc_group_by(std::unique_ptr& table) { auto tbl_view = table->view(); auto keys = cudf::table_view{{tbl_view.column(0), tbl_view.column(1)}}; @@ -171,8 +183,8 @@ std::unique_ptr group_by(std::unique_ptr& table) { return std::make_unique(cudf::table_view(columns)); } -std::unique_ptr sort_by_key( - std::unique_ptr& table, std::vector& keys) { +std::unique_ptr sort( + std::unique_ptr& table) { auto tbl_view = table->view(); return cudf::sort_by_key( tbl_view, @@ -181,25 +193,13 @@ std::unique_ptr sort_by_key( } int main() { - std::vector column_names = { - "l_returnflag", - "l_linestatus", - "l_quantity", - "l_extendedprice", - "l_discount", - "l_shipdate", - "l_orderkey", - "l_tax" - }; - - std::unique_ptr t1 = read_filter_project(column_names); + std::unique_ptr t1 = scan_filter_project(); std::unique_ptr disc_price_col = calc_disc_price(t1); std::unique_ptr charge_col = calc_charge(t1); auto t2 = append_col_to_table(std::move(t1), std::move(disc_price_col)); auto t3 = append_col_to_table(std::move(t2), std::move(charge_col)); - auto t4 = group_by(t3); - - write_parquet(t4->view(), "q1.parquet"); - + auto t4 = calc_group_by(t3); + auto t5 = sort(t4); + write_parquet(t5->view(), "q1.parquet"); return 0; } From 18a92098b839ab4f658d882a87f7e3a22d857332 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 25 Jun 2024 09:29:48 -0700 Subject: [PATCH 006/124] Finish q1 --- cpp/examples/tpch/q1.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 4c592ab175d..da38202e0a9 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -45,7 +45,7 @@ avg(l_discount) as avg_disc, count(*) as count_order from - lineitem + '~/tpch_sf1/lineitem/part-0.parquet' where l_shipdate <= date '1998-09-02' group by From 4f1aad8ef5abf9b2f800981acf14e73fc37da9bd Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 25 Jun 2024 10:02:29 -0700 Subject: [PATCH 007/124] Add column names --- cpp/examples/tpch/q1.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index da38202e0a9..e792b2d58b5 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -58,7 +58,25 @@ void write_parquet(cudf::table_view input, std::string filepath) { auto sink_info = cudf::io::sink_info(filepath); + + cudf::io::table_metadata metadata; + std::vector column_names; + column_names.push_back(cudf::io::column_name_info("l_returnflag")); + column_names.push_back(cudf::io::column_name_info("l_linestatus")); + column_names.push_back(cudf::io::column_name_info("sum_qty")); + column_names.push_back(cudf::io::column_name_info("avg_qty")); + column_names.push_back(cudf::io::column_name_info("sum_base_price")); + column_names.push_back(cudf::io::column_name_info("avg_price")); + column_names.push_back(cudf::io::column_name_info("avg_disc")); + column_names.push_back(cudf::io::column_name_info("sum_disc_price")); + column_names.push_back(cudf::io::column_name_info("sum_charge")); + column_names.push_back(cudf::io::column_name_info("count_order")); + + metadata.schema_info = column_names; + auto table_metadata = cudf::io::table_input_metadata{metadata}; + auto builder = cudf::io::parquet_writer_options::builder(sink_info, input); + builder.metadata(table_metadata); auto options = builder.build(); cudf::io::write_parquet(options); } @@ -193,9 +211,9 @@ std::unique_ptr sort( } int main() { - std::unique_ptr t1 = scan_filter_project(); - std::unique_ptr disc_price_col = calc_disc_price(t1); - std::unique_ptr charge_col = calc_charge(t1); + auto t1 = scan_filter_project(); + auto disc_price_col = calc_disc_price(t1); + auto charge_col = calc_charge(t1); auto t2 = append_col_to_table(std::move(t1), std::move(disc_price_col)); auto t3 = append_col_to_table(std::move(t2), std::move(charge_col)); auto t4 = calc_group_by(t3); From c86ce5e74d1c3a55e3969f2c5f331d62d978fd41 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 25 Jun 2024 14:53:51 -0700 Subject: [PATCH 008/124] Remove unnecessary memory copies --- cpp/examples/tpch/q1.cpp | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index e792b2d58b5..5ad2e2d50d2 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -108,6 +108,8 @@ std::unique_ptr scan_filter_project() { }; auto l_shipdate = cudf::ast::column_reference(5); + + // TODO: Fix the date representation for '1998-09-02' auto date_scalar = cudf::timestamp_scalar(10471, true); auto date = cudf::ast::literal(date_scalar); auto filter_expr = cudf::ast::operation( @@ -186,19 +188,23 @@ std::unique_ptr calc_group_by(std::unique_ptr& table) auto agg_results = groupby_obj.aggregate(requests); auto result_key = std::move(agg_results.first); - std::vector columns{ - result_key->get_column(0), - result_key->get_column(1), - *agg_results.second[0].results[0], - *agg_results.second[0].results[1], - *agg_results.second[1].results[0], - *agg_results.second[1].results[1], - *agg_results.second[2].results[0], - *agg_results.second[3].results[0], - *agg_results.second[4].results[0], - *agg_results.second[5].results[0] - }; - return std::make_unique(cudf::table_view(columns)); + + auto l_returnflag = std::make_unique(result_key->get_column(0)); + auto l_linestatus = std::make_unique(result_key->get_column(1)); + + std::vector> columns; + columns.push_back(std::move(l_returnflag)); + columns.push_back(std::move(l_linestatus)); + columns.push_back(std::move(agg_results.second[0].results[0])); + columns.push_back(std::move(agg_results.second[0].results[1])); + columns.push_back(std::move(agg_results.second[1].results[0])); + columns.push_back(std::move(agg_results.second[1].results[1])); + columns.push_back(std::move(agg_results.second[2].results[0])); + columns.push_back(std::move(agg_results.second[3].results[0])); + columns.push_back(std::move(agg_results.second[4].results[0])); + columns.push_back(std::move(agg_results.second[5].results[0])); + + return std::make_unique(std::move(columns)); } std::unique_ptr sort( From 8174b999c3678b76cf6610e76effafe5262fee3e Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 25 Jun 2024 15:13:20 -0700 Subject: [PATCH 009/124] Add a query plan diagram --- cpp/examples/build.sh | 8 ++--- cpp/examples/tpch/q1.cpp | 73 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh index b6bb6b94fad..dce81fb1677 100755 --- a/cpp/examples/build.sh +++ b/cpp/examples/build.sh @@ -56,8 +56,8 @@ build_example() { fi } -# build_example basic +build_example basic build_example tpch -# build_example strings -# build_example nested_types -# build_example parquet_io +build_example strings +build_example nested_types +build_example parquet_io diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 5ad2e2d50d2..e446c29d8d1 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -33,6 +33,9 @@ #include /* + +Query: + select l_returnflag, l_linestatus, @@ -54,6 +57,76 @@ order by l_returnflag, l_linestatus; + +Plan: + + ┌─────────────────────────────┐ + │┌───────────────────────────┐│ + ││ Physical Plan ││ + │└───────────────────────────┘│ + └─────────────────────────────┘ + ┌───────────────────────────┐ + │ ORDER_BY │ + │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ + │ ORDERS: │ + │ "part-0".l_returnflag ASC │ + │ "part-0".l_linestatus ASC │ + └─────────────┬─────────────┘ + ┌─────────────┴─────────────┐ + │ HASH_GROUP_BY │ + │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ + │ #0 │ + │ #1 │ + │ sum(#2) │ + │ sum(#3) │ + │ sum(#4) │ + │ sum(#5) │ + │ avg(#6) │ + │ avg(#7) │ + │ avg(#8) │ + │ count_star() │ + └─────────────┬─────────────┘ + ┌─────────────┴─────────────┐ + │ PROJECTION │ + │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ + │ l_returnflag │ + │ l_linestatus │ + │ l_quantity │ + │ l_extendedprice │ + │ #4 │ + │ (#4 * (1.00 + l_tax)) │ + │ l_quantity │ + │ l_extendedprice │ + │ l_discount │ + └─────────────┬─────────────┘ + ┌─────────────┴─────────────┐ + │ PROJECTION │ + │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ + │ l_returnflag │ + │ l_linestatus │ + │ l_quantity │ + │ l_extendedprice │ + │ (l_extendedprice * (1.00 -│ + │ l_discount)) │ + │ l_tax │ + │ l_discount │ + └─────────────┬─────────────┘ + ┌─────────────┴─────────────┐ + │ PARQUET_SCAN │ + │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ + │ l_returnflag │ + │ l_linestatus │ + │ l_quantity │ + │ l_extendedprice │ + │ l_discount │ + │ l_tax │ + │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ + │ Filters: l_shipdate<='1998│ + │-09-02'::DATE AND l_sh... │ + │ IS NOT NULL │ + │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ + │ EC: 1200243 │ + └───────────────────────────┘ */ void write_parquet(cudf::table_view input, std::string filepath) { From b02cd032f24cdbce5841969e3c7d7f135f4735eb Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 11:28:25 -0700 Subject: [PATCH 010/124] Add helper functions --- cpp/examples/tpch/CMakeLists.txt | 4 + cpp/examples/tpch/q1.cpp | 23 ++--- cpp/examples/tpch/q6.cpp | 148 +++++++++++++++++++++++++++++++ cpp/examples/tpch/utils.hpp | 32 +++++++ 4 files changed, 193 insertions(+), 14 deletions(-) create mode 100644 cpp/examples/tpch/q6.cpp create mode 100644 cpp/examples/tpch/utils.hpp diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index 7c2cd74970e..9e17f8abf79 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -20,3 +20,7 @@ include(../fetch_dependencies.cmake) add_executable(tpch_q1 q1.cpp) target_link_libraries(tpch_q1 PRIVATE cudf::cudf) target_compile_features(tpch_q1 PRIVATE cxx_std_17) + +add_executable(tpch_q6 q6.cpp) +target_link_libraries(tpch_q6 PRIVATE cudf::cudf) +target_compile_features(tpch_q6 PRIVATE cxx_std_17) \ No newline at end of file diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index e446c29d8d1..a21e0fec772 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -32,6 +32,8 @@ #include #include +#include "utils.hpp" + /* Query: @@ -129,9 +131,7 @@ └───────────────────────────┘ */ -void write_parquet(cudf::table_view input, std::string filepath) { - auto sink_info = cudf::io::sink_info(filepath); - +cudf::io::table_metadata create_table_metadata() { cudf::io::table_metadata metadata; std::vector column_names; column_names.push_back(cudf::io::column_name_info("l_returnflag")); @@ -144,14 +144,8 @@ void write_parquet(cudf::table_view input, std::string filepath) { column_names.push_back(cudf::io::column_name_info("sum_disc_price")); column_names.push_back(cudf::io::column_name_info("sum_charge")); column_names.push_back(cudf::io::column_name_info("count_order")); - metadata.schema_info = column_names; - auto table_metadata = cudf::io::table_input_metadata{metadata}; - - auto builder = cudf::io::parquet_writer_options::builder(sink_info, input); - builder.metadata(table_metadata); - auto options = builder.build(); - cudf::io::write_parquet(options); + return metadata; } std::unique_ptr append_col_to_table( @@ -182,8 +176,7 @@ std::unique_ptr scan_filter_project() { auto l_shipdate = cudf::ast::column_reference(5); - // TODO: Fix the date representation for '1998-09-02' - auto date_scalar = cudf::timestamp_scalar(10471, true); + auto date_scalar = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); auto date = cudf::ast::literal(date_scalar); auto filter_expr = cudf::ast::operation( cudf::ast::ast_operator::LESS_EQUAL, @@ -296,7 +289,9 @@ int main() { auto t2 = append_col_to_table(std::move(t1), std::move(disc_price_col)); auto t3 = append_col_to_table(std::move(t2), std::move(charge_col)); auto t4 = calc_group_by(t3); - auto t5 = sort(t4); - write_parquet(t5->view(), "q1.parquet"); + auto result_table = sort(t4); + auto result_table_metadata = create_table_metadata(); + std::string result_filename = "q1.parquet"; + write_parquet(result_table, result_table_metadata, result_filename); return 0; } diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp new file mode 100644 index 00000000000..4a212f8c57a --- /dev/null +++ b/cpp/examples/tpch/q6.cpp @@ -0,0 +1,148 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "utils.hpp" + +/* +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1995-01-01' + and l_discount >= 0.05 + and l_discount <= 0.07 + and l_quantity < 24; +*/ + +std::unique_ptr scan_filter_project() { + std::string lineitem = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; + auto source = cudf::io::source_info(lineitem); + auto builder = cudf::io::parquet_reader_options_builder(source); + + std::vector projection_cols = { + "l_extendedprice", + "l_discount", + "l_shipdate", + "l_quantity" + }; + + auto l_extendedprice = cudf::ast::column_reference(0); + auto l_discount = cudf::ast::column_reference(1); + auto l_shipdate = cudf::ast::column_reference(2); + auto l_quantity = cudf::ast::column_reference(3); + + auto date_scalar_a = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + auto date_literal_a = cudf::ast::literal(date_scalar_a); + + auto date_scalar_b = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); + auto date_literal_b = cudf::ast::literal(date_scalar_b); + + auto pred_a = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, + l_shipdate, + date_literal_a + ); + + auto pred_b = cudf::ast::operation( + cudf::ast::ast_operator::LESS, + l_shipdate, + date_literal_b + ); + + auto scalar_a = cudf::numeric_scalar(0.05); + auto literal_a = cudf::ast::literal(scalar_a); + auto scalar_b = cudf::numeric_scalar(0.07); + auto literal_b = cudf::ast::literal(scalar_b); + auto scalar_c = cudf::numeric_scalar(24.0); + auto literal_c = cudf::ast::literal(scalar_c); + + auto pred_c = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, + l_discount, + literal_a + ); + + auto pred_d = cudf::ast::operation( + cudf::ast::ast_operator::LESS_EQUAL, + l_discount, + literal_b + ); + + auto pred_e = cudf::ast::operation( + cudf::ast::ast_operator::LESS, + l_quantity, + literal_c + ); + + auto pred_ab = cudf::ast::operation( + cudf::ast::ast_operator::LOGICAL_AND, + pred_a, + pred_b + ); + + auto pred_cd = cudf::ast::operation( + cudf::ast::ast_operator::LOGICAL_AND, + pred_c, + pred_d + ); + + auto pred_abcd = cudf::ast::operation( + cudf::ast::ast_operator::LOGICAL_AND, + pred_ab, + pred_cd + ); + + builder.columns(projection_cols); + // builder.filter(pred_ab); + + auto options = builder.build(); + auto result = cudf::io::read_parquet(options); + return std::move(result.tbl); +} + +void compute_final_cols(std::unique_ptr& table) { + auto l_extendedprice = table->view().column(0); + auto l_discount = table->view().column(1); + + auto extendedprice_mul_discount = cudf::binary_operation( + l_extendedprice, + l_discount, + cudf::binary_operator::MUL, + cudf::data_type{cudf::type_id::DECIMAL64} + ); + + auto const sum_agg = cudf::make_sum_aggregation(); + auto sum = cudf::reduce(extendedprice_mul_discount->view(), *sum_agg, extendedprice_mul_discount->type()); + + auto decimal_sum = static_cast*>(sum.get()); + double decimal_sum_value = decimal_sum->value(); +} + +int main() { + auto t1 = scan_filter_project(); + compute_final_cols(t1); + return 0; +} diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp new file mode 100644 index 00000000000..fbc8edc1b9a --- /dev/null +++ b/cpp/examples/tpch/utils.hpp @@ -0,0 +1,32 @@ +#include +#include +#include + +#include +#include + +std::tm make_tm(int year, int month, int day) { + std::tm tm = {0}; + tm.tm_year = year - 1900; + tm.tm_mon = month - 1; + tm.tm_mday = day; + return tm; +} + +int32_t days_since_epoch(int year, int month, int day) { + std::tm tm = make_tm(year, month, day); + std::tm epoch = make_tm(1970, 1, 1); + std::time_t time = std::mktime(&tm); + std::time_t epoch_time = std::mktime(&epoch); + double diff = std::difftime(time, epoch_time) / (60*60*24); + return static_cast(diff); +} + +void write_parquet(std::unique_ptr& table, cudf::io::table_metadata& metadata, std::string& filepath) { + auto sink_info = cudf::io::sink_info(filepath); + auto table_input_metadata = cudf::io::table_input_metadata{metadata}; + auto builder = cudf::io::parquet_writer_options::builder(sink_info, table->view()); + builder.metadata(table_input_metadata); + auto options = builder.build(); + cudf::io::write_parquet(options); +} From 2fb3198ef375c26b2c92d6ce02ebd0523298c7e0 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 12:39:00 -0700 Subject: [PATCH 011/124] Convert scalar to device buffer --- cpp/examples/tpch/q6.cpp | 28 ++++++++++++++++++++++++++-- cpp/examples/tpch/utils.hpp | 25 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 4a212f8c57a..5968581d859 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -123,7 +123,7 @@ std::unique_ptr scan_filter_project() { return std::move(result.tbl); } -void compute_final_cols(std::unique_ptr& table) { +std::unique_ptr compute_result_table(std::unique_ptr& table) { auto l_extendedprice = table->view().column(0); auto l_discount = table->view().column(1); @@ -139,10 +139,34 @@ void compute_final_cols(std::unique_ptr& table) { auto decimal_sum = static_cast*>(sum.get()); double decimal_sum_value = decimal_sum->value(); + + std::cout << "Sum: " << decimal_sum_value << std::endl; + + cudf::data_type type = cudf::data_type{cudf::type_id::DECIMAL64}; + cudf::size_type len = 1; + rmm::device_buffer data_buffer = get_device_buffer_from_value(decimal_sum_value); + rmm::device_buffer null_mask_buffer = get_empty_device_buffer(); + auto col = std::make_unique( + type, len, std::move(data_buffer), std::move(null_mask_buffer), 0); + std::vector> columns; + columns.push_back(std::move(col)); + auto result_table = std::make_unique(std::move(columns)); + return result_table; +} + +cudf::io::table_metadata create_table_metadata() { + cudf::io::table_metadata metadata; + std::vector column_names; + column_names.push_back(cudf::io::column_name_info("revenue")); + metadata.schema_info = column_names; + return metadata; } int main() { auto t1 = scan_filter_project(); - compute_final_cols(t1); + auto result_table = compute_result_table(t1); + auto result_metadata = create_table_metadata(); + std::string result_filename = "q6.parquet"; + write_parquet(result_table, result_metadata, result_filename); return 0; } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index fbc8edc1b9a..fc1fa079e39 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -5,6 +5,12 @@ #include #include +#include +#include +#include +#include + + std::tm make_tm(int year, int month, int day) { std::tm tm = {0}; tm.tm_year = year - 1900; @@ -30,3 +36,22 @@ void write_parquet(std::unique_ptr& table, cudf::io::table_metadata auto options = builder.build(); cudf::io::write_parquet(options); } + +template +rmm::device_buffer get_device_buffer_from_value(T value) { + auto stream = cudf::get_default_stream(); + rmm::cuda_stream_view stream_view(stream); + + rmm::device_scalar scalar(stream_view); + scalar.set_value_async(value, stream_view); + + rmm::device_buffer buffer(scalar.data(), scalar.size(), stream_view); + return buffer; +} + +rmm::device_buffer get_empty_device_buffer() { + auto stream = cudf::get_default_stream(); + rmm::cuda_stream_view stream_view(stream); + rmm::device_buffer buffer(0, stream_view); + return buffer; +} From 08e6e7eda620ce6e2a1f484a3b6719006999bc0c Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 13:25:33 -0700 Subject: [PATCH 012/124] Extract create metadata into utils --- cpp/examples/tpch/q1.cpp | 30 ++++++++++++------------------ cpp/examples/tpch/q6.cpp | 10 +--------- cpp/examples/tpch/utils.hpp | 10 ++++++++++ 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index a21e0fec772..308d011a077 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -131,23 +131,6 @@ └───────────────────────────┘ */ -cudf::io::table_metadata create_table_metadata() { - cudf::io::table_metadata metadata; - std::vector column_names; - column_names.push_back(cudf::io::column_name_info("l_returnflag")); - column_names.push_back(cudf::io::column_name_info("l_linestatus")); - column_names.push_back(cudf::io::column_name_info("sum_qty")); - column_names.push_back(cudf::io::column_name_info("avg_qty")); - column_names.push_back(cudf::io::column_name_info("sum_base_price")); - column_names.push_back(cudf::io::column_name_info("avg_price")); - column_names.push_back(cudf::io::column_name_info("avg_disc")); - column_names.push_back(cudf::io::column_name_info("sum_disc_price")); - column_names.push_back(cudf::io::column_name_info("sum_charge")); - column_names.push_back(cudf::io::column_name_info("count_order")); - metadata.schema_info = column_names; - return metadata; -} - std::unique_ptr append_col_to_table( std::unique_ptr table, std::unique_ptr col) { std::vector> columns; @@ -290,7 +273,18 @@ int main() { auto t3 = append_col_to_table(std::move(t2), std::move(charge_col)); auto t4 = calc_group_by(t3); auto result_table = sort(t4); - auto result_table_metadata = create_table_metadata(); + auto result_table_metadata = create_table_metadata({ + "l_returnflag", + "l_linestatus", + "sum_qty", + "avg_qty", + "sum_base_price", + "avg_price", + "avg_disc", + "sum_disc_price", + "sum_charge", + "count_order" + }); std::string result_filename = "q1.parquet"; write_parquet(result_table, result_table_metadata, result_filename); return 0; diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 5968581d859..93730b24e37 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -154,18 +154,10 @@ std::unique_ptr compute_result_table(std::unique_ptr& return result_table; } -cudf::io::table_metadata create_table_metadata() { - cudf::io::table_metadata metadata; - std::vector column_names; - column_names.push_back(cudf::io::column_name_info("revenue")); - metadata.schema_info = column_names; - return metadata; -} - int main() { auto t1 = scan_filter_project(); auto result_table = compute_result_table(t1); - auto result_metadata = create_table_metadata(); + auto result_metadata = create_table_metadata({"revenue"}); std::string result_filename = "q6.parquet"; write_parquet(result_table, result_metadata, result_filename); return 0; diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index fc1fa079e39..d61150dd087 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -28,6 +28,16 @@ int32_t days_since_epoch(int year, int month, int day) { return static_cast(diff); } +cudf::io::table_metadata create_table_metadata(std::vector column_names) { + cudf::io::table_metadata metadata; + std::vector column_name_infos; + for (auto &col_name : column_names) { + column_name_infos.push_back(cudf::io::column_name_info(col_name)); + } + metadata.schema_info = column_name_infos; + return metadata; +} + void write_parquet(std::unique_ptr& table, cudf::io::table_metadata& metadata, std::string& filepath) { auto sink_info = cudf::io::sink_info(filepath); auto table_input_metadata = cudf::io::table_input_metadata{metadata}; From 22bda7c14215f996d8ecec047152f869a6b3090f Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 13:31:45 -0700 Subject: [PATCH 013/124] Add copyright to tpch q6 --- cpp/examples/tpch/CMakeLists.txt | 2 -- cpp/examples/tpch/q6.cpp | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index 9e17f8abf79..afadef9e560 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -4,7 +4,6 @@ cmake_minimum_required(VERSION 3.26.4) include(../set_cuda_architecture.cmake) -# initialize cuda architecture rapids_cuda_init_architectures(tpch_example) rapids_cuda_set_architectures(RAPIDS) @@ -16,7 +15,6 @@ project( include(../fetch_dependencies.cmake) -# Configure your project here add_executable(tpch_q1 q1.cpp) target_link_libraries(tpch_q1 PRIVATE cudf::cudf) target_compile_features(tpch_q1 PRIVATE cxx_std_17) diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 93730b24e37..e8bcb1c568f 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -1,3 +1,18 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include #include From e68fcaa166bcb2ed2ccc8d5e0c4c9184b34a066c Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 13:41:36 -0700 Subject: [PATCH 014/124] Add initial README --- cpp/examples/tpch/README.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 cpp/examples/tpch/README.md diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md new file mode 100644 index 00000000000..5362a45e4e8 --- /dev/null +++ b/cpp/examples/tpch/README.md @@ -0,0 +1,7 @@ +# TPC-H Inspired Examples + +Implementing TPC-H queries using `libcudf`. + +## Implementation Status + +- [x] Q1 From 143d32c3f3c97684cbfcf55b88b7b46f5911d81d Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 13:54:19 -0700 Subject: [PATCH 015/124] Extract order by into utils --- cpp/examples/tpch/q1.cpp | 11 +---------- cpp/examples/tpch/utils.hpp | 13 +++++++++++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 308d011a077..92f84c46bd3 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -256,15 +256,6 @@ std::unique_ptr calc_group_by(std::unique_ptr& table) return std::make_unique(std::move(columns)); } -std::unique_ptr sort( - std::unique_ptr& table) { - auto tbl_view = table->view(); - return cudf::sort_by_key( - tbl_view, - cudf::table_view{{tbl_view.column(0), tbl_view.column(1)}} - ); -} - int main() { auto t1 = scan_filter_project(); auto disc_price_col = calc_disc_price(t1); @@ -272,7 +263,7 @@ int main() { auto t2 = append_col_to_table(std::move(t1), std::move(disc_price_col)); auto t3 = append_col_to_table(std::move(t2), std::move(charge_col)); auto t4 = calc_group_by(t3); - auto result_table = sort(t4); + auto result_table = order_by(t4, {0, 1}); auto result_table_metadata = create_table_metadata({ "l_returnflag", "l_linestatus", diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index d61150dd087..c9638feac16 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -38,6 +38,19 @@ cudf::io::table_metadata create_table_metadata(std::vector column_n return metadata; } +std::unique_ptr order_by( + std::unique_ptr& table, std::vector keys) { + auto table_view = table->view(); + std::vector column_views; + for (auto& key : keys) { + column_views.push_back(table_view.column(key)); + } + return cudf::sort_by_key( + table_view, + cudf::table_view{column_views} + ); +} + void write_parquet(std::unique_ptr& table, cudf::io::table_metadata& metadata, std::string& filepath) { auto sink_info = cudf::io::sink_info(filepath); auto table_input_metadata = cudf::io::table_input_metadata{metadata}; From 7dd2cdbe98a15b60615149ba4d8896a9d4388e88 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 14:24:04 -0700 Subject: [PATCH 016/124] Use make_column_from_scalar factory function --- cpp/examples/tpch/q6.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index e8bcb1c568f..fbcbffdd9ef 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -131,7 +132,7 @@ std::unique_ptr scan_filter_project() { ); builder.columns(projection_cols); - // builder.filter(pred_ab); + // builder.filter(pred_abcd); auto options = builder.build(); auto result = cudf::io::read_parquet(options); @@ -152,17 +153,9 @@ std::unique_ptr compute_result_table(std::unique_ptr& auto const sum_agg = cudf::make_sum_aggregation(); auto sum = cudf::reduce(extendedprice_mul_discount->view(), *sum_agg, extendedprice_mul_discount->type()); - auto decimal_sum = static_cast*>(sum.get()); - double decimal_sum_value = decimal_sum->value(); - - std::cout << "Sum: " << decimal_sum_value << std::endl; - - cudf::data_type type = cudf::data_type{cudf::type_id::DECIMAL64}; cudf::size_type len = 1; - rmm::device_buffer data_buffer = get_device_buffer_from_value(decimal_sum_value); - rmm::device_buffer null_mask_buffer = get_empty_device_buffer(); - auto col = std::make_unique( - type, len, std::move(data_buffer), std::move(null_mask_buffer), 0); + auto col = cudf::make_column_from_scalar(*sum, len); + std::vector> columns; columns.push_back(std::move(col)); auto result_table = std::make_unique(std::move(columns)); From 06f02c130168fa2f182773959bfaada153b62667 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 14:39:39 -0700 Subject: [PATCH 017/124] Add data gen instructions in README --- cpp/examples/tpch/README.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index 5362a45e4e8..6a5b8c91594 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -1,6 +1,30 @@ # TPC-H Inspired Examples -Implementing TPC-H queries using `libcudf`. +Implementing the TPC-H queries using `libcudf`. + +## Data Generation + +We leverage `Datafusion`s data generator (wrapper around official TPC-H datagen) for generating data in the form of Parquet files. + +### Requirements + +- Rust + +### Steps + +1. Clone the datafusion repository +```bash +git clone git@github.com:apache/datafusion.git +``` + +2. Run the data generator. The data will be placed in a `data/` subdirectory. +```bash +cd benchmarks/ +./bench.sh data tpch + +# for scale factor 10, +./bench.sh data tpch10 +``` ## Implementation Status From 62f496054719cc4c1180bcc09955a6236175d569 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 14:41:11 -0700 Subject: [PATCH 018/124] Fixes --- cpp/examples/tpch/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index 6a5b8c91594..de0024e5db8 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -28,4 +28,5 @@ cd benchmarks/ ## Implementation Status -- [x] Q1 +- [x] Q1 + From eb03a3b8c5a8a857c907f5e2dedcf9a362a26eda Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 14:45:04 -0700 Subject: [PATCH 019/124] Fixes --- cpp/examples/tpch/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index de0024e5db8..88150c82646 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -4,7 +4,7 @@ Implementing the TPC-H queries using `libcudf`. ## Data Generation -We leverage `Datafusion`s data generator (wrapper around official TPC-H datagen) for generating data in the form of Parquet files. +We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in the form of Parquet files. ### Requirements @@ -12,7 +12,7 @@ We leverage `Datafusion`s data generator (wrapper around official TPC-H datagen) ### Steps -1. Clone the datafusion repository +1. Clone the Datafusion repository. ```bash git clone git@github.com:apache/datafusion.git ``` From d06ac1c8721b78391c6e749742ee4ef7281fc2f2 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 15:28:59 -0700 Subject: [PATCH 020/124] Move append_col_to_table to utils --- cpp/examples/tpch/q1.cpp | 10 ---------- cpp/examples/tpch/utils.hpp | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 92f84c46bd3..347840edc43 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -131,16 +131,6 @@ └───────────────────────────┘ */ -std::unique_ptr append_col_to_table( - std::unique_ptr table, std::unique_ptr col) { - std::vector> columns; - for (size_t i = 0; i < table->num_columns(); i++) { - columns.push_back(std::make_unique(table->get_column(i))); - } - columns.push_back(std::move(col)); - return std::make_unique(std::move(columns)); -} - std::unique_ptr scan_filter_project() { std::string lineitem = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; auto source = cudf::io::source_info(lineitem); diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index c9638feac16..9f9ba60b1e7 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -38,6 +38,16 @@ cudf::io::table_metadata create_table_metadata(std::vector column_n return metadata; } +std::unique_ptr append_col_to_table( + std::unique_ptr table, std::unique_ptr col) { + std::vector> columns; + for (size_t i = 0; i < table->num_columns(); i++) { + columns.push_back(std::make_unique(table->get_column(i))); + } + columns.push_back(std::move(col)); + return std::make_unique(std::move(columns)); +} + std::unique_ptr order_by( std::unique_ptr& table, std::vector keys) { auto table_view = table->view(); From c3631d8f4fa4db5a501217ed2c9f0dc4177c36c2 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 15:57:37 -0700 Subject: [PATCH 021/124] Fix scale errors in q1 --- cpp/examples/tpch/q1.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 347840edc43..7ef25e77575 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -166,23 +166,30 @@ std::unique_ptr scan_filter_project() { } std::unique_ptr calc_disc_price(std::unique_ptr& table) { - auto one = cudf::fixed_point_scalar(1); + auto one = cudf::fixed_point_scalar(1, -2); auto disc = table->get_column(4).view(); auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); auto extended_price = table->get_column(3).view(); - auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, extended_price.type()); + + auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, disc_price_type); return disc_price; } std::unique_ptr calc_charge(std::unique_ptr& table) { - auto one = cudf::fixed_point_scalar(1); + auto one = cudf::fixed_point_scalar(1, -2); auto disc = table->get_column(4).view(); auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); auto extended_price = table->get_column(3).view(); - auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, extended_price.type()); + + auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, disc_price_type); + auto tax = table->get_column(7).view(); auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); - auto charge = cudf::binary_operation(disc_price->view(), one_plus_tax->view(), cudf::binary_operator::MUL, disc_price->type()); + + auto charge_type = cudf::data_type{cudf::type_id::DECIMAL64, -6}; + auto charge = cudf::binary_operation(disc_price->view(), one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); return charge; } From 60e8fef4ac0c0d7b5adf18b659ca99ff075c6437 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 15:58:53 -0700 Subject: [PATCH 022/124] misc fixes --- cpp/examples/tpch/q1.cpp | 115 +++++++-------------------------------- 1 file changed, 21 insertions(+), 94 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 7ef25e77575..b3797829932 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -35,100 +35,27 @@ #include "utils.hpp" /* - -Query: - - select - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order - from - '~/tpch_sf1/lineitem/part-0.parquet' - where - l_shipdate <= date '1998-09-02' - group by - l_returnflag, - l_linestatus - order by - l_returnflag, - l_linestatus; - -Plan: - - ┌─────────────────────────────┐ - │┌───────────────────────────┐│ - ││ Physical Plan ││ - │└───────────────────────────┘│ - └─────────────────────────────┘ - ┌───────────────────────────┐ - │ ORDER_BY │ - │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ - │ ORDERS: │ - │ "part-0".l_returnflag ASC │ - │ "part-0".l_linestatus ASC │ - └─────────────┬─────────────┘ - ┌─────────────┴─────────────┐ - │ HASH_GROUP_BY │ - │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ - │ #0 │ - │ #1 │ - │ sum(#2) │ - │ sum(#3) │ - │ sum(#4) │ - │ sum(#5) │ - │ avg(#6) │ - │ avg(#7) │ - │ avg(#8) │ - │ count_star() │ - └─────────────┬─────────────┘ - ┌─────────────┴─────────────┐ - │ PROJECTION │ - │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ - │ l_returnflag │ - │ l_linestatus │ - │ l_quantity │ - │ l_extendedprice │ - │ #4 │ - │ (#4 * (1.00 + l_tax)) │ - │ l_quantity │ - │ l_extendedprice │ - │ l_discount │ - └─────────────┬─────────────┘ - ┌─────────────┴─────────────┐ - │ PROJECTION │ - │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ - │ l_returnflag │ - │ l_linestatus │ - │ l_quantity │ - │ l_extendedprice │ - │ (l_extendedprice * (1.00 -│ - │ l_discount)) │ - │ l_tax │ - │ l_discount │ - └─────────────┬─────────────┘ - ┌─────────────┴─────────────┐ - │ PARQUET_SCAN │ - │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ - │ l_returnflag │ - │ l_linestatus │ - │ l_quantity │ - │ l_extendedprice │ - │ l_discount │ - │ l_tax │ - │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ - │ Filters: l_shipdate<='1998│ - │-09-02'::DATE AND l_sh... │ - │ IS NOT NULL │ - │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ - │ EC: 1200243 │ - └───────────────────────────┘ +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + '~/tpch_sf1/lineitem/part-0.parquet' +where + l_shipdate <= date '1998-09-02' +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; */ std::unique_ptr scan_filter_project() { From 5b5237ed1a043b5ea4352d797dabf7dcc345aec5 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 16:42:44 -0700 Subject: [PATCH 023/124] Remove useless headers --- cpp/examples/tpch/q6.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index fbcbffdd9ef..9fc5f3126c1 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -34,10 +34,6 @@ #include #include -#include -#include -#include - #include "utils.hpp" /* From 512b0ece72b0110fdc8cf438f9ee84490d1ecf4e Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 18:28:21 -0700 Subject: [PATCH 024/124] Finish q6 --- cpp/examples/tpch/q1.cpp | 34 +++++----- cpp/examples/tpch/q6.cpp | 126 ++++++++++++++++++++---------------- cpp/examples/tpch/utils.hpp | 10 ++- 3 files changed, 97 insertions(+), 73 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index b3797829932..6bcf1e6b40c 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -124,11 +124,11 @@ std::unique_ptr calc_group_by(std::unique_ptr& table) auto tbl_view = table->view(); auto keys = cudf::table_view{{tbl_view.column(0), tbl_view.column(1)}}; - auto l_quantity = tbl_view.column(2); - auto l_extendedprice = tbl_view.column(3); - auto l_discount = tbl_view.column(4); - auto l_discprice = tbl_view.column(8); - auto l_charge = tbl_view.column(9); + auto quantity = tbl_view.column(2); + auto extendedprice = tbl_view.column(3); + auto discount = tbl_view.column(4); + auto discprice = tbl_view.column(8); + auto charge = tbl_view.column(9); cudf::groupby::groupby groupby_obj(keys); std::vector requests; @@ -136,38 +136,38 @@ std::unique_ptr calc_group_by(std::unique_ptr& table) requests.emplace_back(cudf::groupby::aggregation_request()); requests[0].aggregations.push_back(cudf::make_sum_aggregation()); requests[0].aggregations.push_back(cudf::make_mean_aggregation()); - requests[0].values = l_quantity; + requests[0].values = quantity; requests.emplace_back(cudf::groupby::aggregation_request()); requests[1].aggregations.push_back(cudf::make_sum_aggregation()); requests[1].aggregations.push_back(cudf::make_mean_aggregation()); - requests[1].values = l_extendedprice; + requests[1].values = extendedprice; requests.emplace_back(cudf::groupby::aggregation_request()); requests[2].aggregations.push_back(cudf::make_mean_aggregation()); - requests[2].values = l_discount; + requests[2].values = discount; requests.emplace_back(cudf::groupby::aggregation_request()); requests[3].aggregations.push_back(cudf::make_sum_aggregation()); - requests[3].values = l_discprice; + requests[3].values = discprice; requests.emplace_back(cudf::groupby::aggregation_request()); requests[4].aggregations.push_back(cudf::make_sum_aggregation()); - requests[4].values = l_charge; + requests[4].values = charge; requests.emplace_back(cudf::groupby::aggregation_request()); requests[5].aggregations.push_back(cudf::make_count_aggregation()); - requests[5].values = l_charge; + requests[5].values = charge; auto agg_results = groupby_obj.aggregate(requests); auto result_key = std::move(agg_results.first); - auto l_returnflag = std::make_unique(result_key->get_column(0)); - auto l_linestatus = std::make_unique(result_key->get_column(1)); + auto returnflag = std::make_unique(result_key->get_column(0)); + auto linestatus = std::make_unique(result_key->get_column(1)); std::vector> columns; - columns.push_back(std::move(l_returnflag)); - columns.push_back(std::move(l_linestatus)); + columns.push_back(std::move(returnflag)); + columns.push_back(std::move(linestatus)); columns.push_back(std::move(agg_results.second[0].results[0])); columns.push_back(std::move(agg_results.second[0].results[1])); columns.push_back(std::move(agg_results.second[1].results[0])); @@ -184,8 +184,8 @@ int main() { auto t1 = scan_filter_project(); auto disc_price_col = calc_disc_price(t1); auto charge_col = calc_charge(t1); - auto t2 = append_col_to_table(std::move(t1), std::move(disc_price_col)); - auto t3 = append_col_to_table(std::move(t2), std::move(charge_col)); + auto t2 = append_col_to_table(t1, disc_price_col); + auto t3 = append_col_to_table(t2, charge_col); auto t4 = calc_group_by(t3); auto result_table = order_by(t4, {0, 1}); auto result_table_metadata = create_table_metadata({ diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 9fc5f3126c1..6efc31be55d 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "utils.hpp" @@ -40,7 +42,7 @@ select sum(l_extendedprice * l_discount) as revenue from - lineitem + '~/tpch_sf1/lineitem/part-0.parquet' where l_shipdate >= date '1994-01-01' and l_shipdate < date '1995-01-01' @@ -61,10 +63,10 @@ std::unique_ptr scan_filter_project() { "l_quantity" }; - auto l_extendedprice = cudf::ast::column_reference(0); - auto l_discount = cudf::ast::column_reference(1); - auto l_shipdate = cudf::ast::column_reference(2); - auto l_quantity = cudf::ast::column_reference(3); + auto extendedprice = cudf::ast::column_reference(0); + auto discount = cudf::ast::column_reference(1); + auto shipdate = cudf::ast::column_reference(2); + auto quantity = cudf::ast::column_reference(3); auto date_scalar_a = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); auto date_literal_a = cudf::ast::literal(date_scalar_a); @@ -74,76 +76,87 @@ std::unique_ptr scan_filter_project() { auto pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, - l_shipdate, + shipdate, date_literal_a ); auto pred_b = cudf::ast::operation( cudf::ast::ast_operator::LESS, - l_shipdate, + shipdate, date_literal_b ); - auto scalar_a = cudf::numeric_scalar(0.05); - auto literal_a = cudf::ast::literal(scalar_a); - auto scalar_b = cudf::numeric_scalar(0.07); - auto literal_b = cudf::ast::literal(scalar_b); - auto scalar_c = cudf::numeric_scalar(24.0); - auto literal_c = cudf::ast::literal(scalar_c); - - auto pred_c = cudf::ast::operation( - cudf::ast::ast_operator::GREATER_EQUAL, - l_discount, - literal_a - ); - - auto pred_d = cudf::ast::operation( - cudf::ast::ast_operator::LESS_EQUAL, - l_discount, - literal_b - ); - - auto pred_e = cudf::ast::operation( - cudf::ast::ast_operator::LESS, - l_quantity, - literal_c - ); - auto pred_ab = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, pred_a, pred_b ); - auto pred_cd = cudf::ast::operation( - cudf::ast::ast_operator::LOGICAL_AND, - pred_c, - pred_d - ); - - auto pred_abcd = cudf::ast::operation( - cudf::ast::ast_operator::LOGICAL_AND, - pred_ab, - pred_cd - ); - builder.columns(projection_cols); - // builder.filter(pred_abcd); + + // FIXME: since, ast does not support `fixed_point_scalar` yet, + // we just push down the date filters while scanning the parquet file. + builder.filter(pred_ab); auto options = builder.build(); auto result = cudf::io::read_parquet(options); return std::move(result.tbl); } -std::unique_ptr compute_result_table(std::unique_ptr& table) { - auto l_extendedprice = table->view().column(0); - auto l_discount = table->view().column(1); +std::unique_ptr apply_filters(std::unique_ptr& table) { + // NOTE: apply the remaining filters based on the float32 casted columns + auto l_discount = cudf::ast::column_reference(4); + auto l_quantity = cudf::ast::column_reference(5); + + auto l_discount_lower = cudf::numeric_scalar(0.05); + auto l_discount_lower_literal = cudf::ast::literal(l_discount_lower); + auto l_discount_upper = cudf::numeric_scalar(0.07); + auto l_discount_upper_literal = cudf::ast::literal(l_discount_upper); + auto l_quantity_upper = cudf::numeric_scalar(24); + auto l_quantity_upper_literal = cudf::ast::literal(l_quantity_upper); + + auto l_discount_pred_a = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, + l_discount, + l_discount_lower_literal + ); + + auto l_discount_pred_b = cudf::ast::operation( + cudf::ast::ast_operator::LESS_EQUAL, + l_discount, + l_discount_upper_literal + ); + + auto l_discount_pred = cudf::ast::operation( + cudf::ast::ast_operator::LOGICAL_AND, l_discount_pred_a, l_discount_pred_b + ); + + auto l_quantity_pred = cudf::ast::operation( + cudf::ast::ast_operator::LESS, + l_quantity, + l_quantity_upper_literal + ); + + auto pred = cudf::ast::operation( + cudf::ast::ast_operator::LOGICAL_AND, + l_discount_pred, + l_quantity_pred + ); + + auto boolean_mask = cudf::compute_column(table->view(), pred); + return cudf::apply_boolean_mask(table->view(), boolean_mask->view()); +} + +std::unique_ptr apply_reduction(std::unique_ptr& table) { + auto extendedprice = table->view().column(0); + auto discount = table->view().column(1); + auto extendedprice_mul_discount_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; auto extendedprice_mul_discount = cudf::binary_operation( - l_extendedprice, - l_discount, + extendedprice, + discount, cudf::binary_operator::MUL, - cudf::data_type{cudf::type_id::DECIMAL64} + extendedprice_mul_discount_type ); auto const sum_agg = cudf::make_sum_aggregation(); @@ -160,9 +173,12 @@ std::unique_ptr compute_result_table(std::unique_ptr& int main() { auto t1 = scan_filter_project(); - auto result_table = compute_result_table(t1); - auto result_metadata = create_table_metadata({"revenue"}); - std::string result_filename = "q6.parquet"; - write_parquet(result_table, result_metadata, result_filename); + auto discout_float = cudf::cast(t1->view().column(1), cudf::data_type{cudf::type_id::FLOAT32}); + auto quantity_float = cudf::cast(t1->view().column(3), cudf::data_type{cudf::type_id::FLOAT32}); + auto t2 = append_col_to_table(t1, discout_float); + auto t3 = append_col_to_table(t2, quantity_float); + auto t4 = apply_filters(t3); + auto t5 = apply_reduction(t4); + debug_table(std::move(t5), "q6.parquet"); return 0; } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 9f9ba60b1e7..e8d904c4194 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -39,7 +39,7 @@ cudf::io::table_metadata create_table_metadata(std::vector column_n } std::unique_ptr append_col_to_table( - std::unique_ptr table, std::unique_ptr col) { + std::unique_ptr& table, std::unique_ptr& col) { std::vector> columns; for (size_t i = 0; i < table->num_columns(); i++) { columns.push_back(std::make_unique(table->get_column(i))); @@ -70,6 +70,14 @@ void write_parquet(std::unique_ptr& table, cudf::io::table_metadata cudf::io::write_parquet(options); } +void debug_table(std::unique_ptr table, std::string filepath) { + auto sink_info = cudf::io::sink_info(filepath); + auto builder = cudf::io::parquet_writer_options::builder(sink_info, table->view()); + auto options = builder.build(); + cudf::io::write_parquet(options); +} + + template rmm::device_buffer get_device_buffer_from_value(T value) { auto stream = cudf::get_default_stream(); From 210b3d867510c6b6a24b4d178ee30ac0703173e6 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 18:35:50 -0700 Subject: [PATCH 025/124] Update README --- cpp/examples/tpch/README.md | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index 88150c82646..b4ffdf8e608 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -12,21 +12,51 @@ We leverage the data generator (wrapper around official TPC-H datagen) from [Apa ### Steps -1. Clone the Datafusion repository. +1. Clone the datafusion repository. ```bash git clone git@github.com:apache/datafusion.git ``` 2. Run the data generator. The data will be placed in a `data/` subdirectory. ```bash -cd benchmarks/ +cd datafusion/benchmarks/ ./bench.sh data tpch # for scale factor 10, ./bench.sh data tpch10 ``` +## Executing Queries + +### Steps + +1. Clone the cudf repository. +```bash +git clone git@github.com:JayjeetAtGithub/cudf.git +git checkout tpch-bench +``` + +2. Build `libcudf`. +```bash +cd cudf/ +./build.sh libcudf +``` + +3. Build the examples. +```bash +cd cpp/examples +./build.sh +``` +The TPC-H query binaries would be built inside `examples/tpch/build`. + +4. Execute the queries. +```bash +./tpch/build/tpch_q1 +``` +A parquet file named `q1.parquet` would be generated holding the results of the query. + ## Implementation Status - [x] Q1 +- [x] Q6 From f420cf24178e743d6ab89b5c27a268e0eae74514 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 18:58:14 -0700 Subject: [PATCH 026/124] Cleanup q1/q6 --- cpp/examples/tpch/q1.cpp | 52 ++++++++++++++++--------------- cpp/examples/tpch/q6.cpp | 62 ++++++++++++++++++------------------- cpp/examples/tpch/utils.hpp | 10 +----- 3 files changed, 59 insertions(+), 65 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 6bcf1e6b40c..4c2bd2c1610 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -63,7 +63,7 @@ std::unique_ptr scan_filter_project() { auto source = cudf::io::source_info(lineitem); auto builder = cudf::io::parquet_reader_options_builder(source); - std::vector projection_cols = { + std::vector cols = { "l_returnflag", "l_linestatus", "l_quantity", @@ -74,18 +74,18 @@ std::unique_ptr scan_filter_project() { "l_tax" }; - auto l_shipdate = cudf::ast::column_reference(5); + auto shipdate = cudf::ast::column_reference(5); - auto date_scalar = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); - auto date = cudf::ast::literal(date_scalar); - auto filter_expr = cudf::ast::operation( + auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); + auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); + auto pred = cudf::ast::operation( cudf::ast::ast_operator::LESS_EQUAL, - l_shipdate, - date + shipdate, + shipdate_upper_literal ); - builder.columns(projection_cols); - builder.filter(filter_expr); + builder.columns(cols); + builder.filter(pred); auto options = builder.build(); auto result = cudf::io::read_parquet(options); @@ -120,7 +120,7 @@ std::unique_ptr calc_charge(std::unique_ptr& table) { return charge; } -std::unique_ptr calc_group_by(std::unique_ptr& table) { +std::unique_ptr perform_group_by(std::unique_ptr& table) { auto tbl_view = table->view(); auto keys = cudf::table_view{{tbl_view.column(0), tbl_view.column(1)}}; @@ -186,21 +186,23 @@ int main() { auto charge_col = calc_charge(t1); auto t2 = append_col_to_table(t1, disc_price_col); auto t3 = append_col_to_table(t2, charge_col); - auto t4 = calc_group_by(t3); + auto t4 = perform_group_by(t3); auto result_table = order_by(t4, {0, 1}); - auto result_table_metadata = create_table_metadata({ - "l_returnflag", - "l_linestatus", - "sum_qty", - "avg_qty", - "sum_base_price", - "avg_price", - "avg_disc", - "sum_disc_price", - "sum_charge", - "count_order" - }); - std::string result_filename = "q1.parquet"; - write_parquet(result_table, result_table_metadata, result_filename); + write_parquet( + std::move(result_table), + create_table_metadata({ + "l_returnflag", + "l_linestatus", + "sum_qty", + "avg_qty", + "sum_base_price", + "avg_price", + "avg_disc", + "sum_disc_price", + "sum_charge", + "count_order" + }), + "q1.parquet" + ); return 0; } diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 6efc31be55d..cbe6165f44a 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -56,7 +56,7 @@ std::unique_ptr scan_filter_project() { auto source = cudf::io::source_info(lineitem); auto builder = cudf::io::parquet_reader_options_builder(source); - std::vector projection_cols = { + std::vector cols = { "l_extendedprice", "l_discount", "l_shipdate", @@ -68,22 +68,22 @@ std::unique_ptr scan_filter_project() { auto shipdate = cudf::ast::column_reference(2); auto quantity = cudf::ast::column_reference(3); - auto date_scalar_a = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); - auto date_literal_a = cudf::ast::literal(date_scalar_a); + auto shipdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + auto shipdate_lower_literal = cudf::ast::literal(shipdate_lower); - auto date_scalar_b = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); - auto date_literal_b = cudf::ast::literal(date_scalar_b); + auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); + auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); auto pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, shipdate, - date_literal_a + shipdate_lower_literal ); auto pred_b = cudf::ast::operation( cudf::ast::ast_operator::LESS, shipdate, - date_literal_b + shipdate_upper_literal ); auto pred_ab = cudf::ast::operation( @@ -92,7 +92,7 @@ std::unique_ptr scan_filter_project() { pred_b ); - builder.columns(projection_cols); + builder.columns(cols); // FIXME: since, ast does not support `fixed_point_scalar` yet, // we just push down the date filters while scanning the parquet file. @@ -105,42 +105,42 @@ std::unique_ptr scan_filter_project() { std::unique_ptr apply_filters(std::unique_ptr& table) { // NOTE: apply the remaining filters based on the float32 casted columns - auto l_discount = cudf::ast::column_reference(4); - auto l_quantity = cudf::ast::column_reference(5); + auto discount = cudf::ast::column_reference(4); + auto quantity = cudf::ast::column_reference(5); - auto l_discount_lower = cudf::numeric_scalar(0.05); - auto l_discount_lower_literal = cudf::ast::literal(l_discount_lower); - auto l_discount_upper = cudf::numeric_scalar(0.07); - auto l_discount_upper_literal = cudf::ast::literal(l_discount_upper); - auto l_quantity_upper = cudf::numeric_scalar(24); - auto l_quantity_upper_literal = cudf::ast::literal(l_quantity_upper); + auto discount_lower = cudf::numeric_scalar(0.05); + auto discount_lower_literal = cudf::ast::literal(discount_lower); + auto discount_upper = cudf::numeric_scalar(0.07); + auto discount_upper_literal = cudf::ast::literal(discount_upper); + auto quantity_upper = cudf::numeric_scalar(24); + auto quantity_upper_literal = cudf::ast::literal(quantity_upper); - auto l_discount_pred_a = cudf::ast::operation( + auto discount_pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, - l_discount, - l_discount_lower_literal + discount, + discount_lower_literal ); - auto l_discount_pred_b = cudf::ast::operation( + auto discount_pred_b = cudf::ast::operation( cudf::ast::ast_operator::LESS_EQUAL, - l_discount, - l_discount_upper_literal + discount, + discount_upper_literal ); - auto l_discount_pred = cudf::ast::operation( - cudf::ast::ast_operator::LOGICAL_AND, l_discount_pred_a, l_discount_pred_b + auto discount_pred = cudf::ast::operation( + cudf::ast::ast_operator::LOGICAL_AND, discount_pred_a, discount_pred_b ); - auto l_quantity_pred = cudf::ast::operation( + auto quantity_pred = cudf::ast::operation( cudf::ast::ast_operator::LESS, - l_quantity, - l_quantity_upper_literal + quantity, + quantity_upper_literal ); auto pred = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, - l_discount_pred, - l_quantity_pred + discount_pred, + quantity_pred ); auto boolean_mask = cudf::compute_column(table->view(), pred); @@ -178,7 +178,7 @@ int main() { auto t2 = append_col_to_table(t1, discout_float); auto t3 = append_col_to_table(t2, quantity_float); auto t4 = apply_filters(t3); - auto t5 = apply_reduction(t4); - debug_table(std::move(t5), "q6.parquet"); + auto result_table = apply_reduction(t4); + write_parquet(std::move(result_table), create_table_metadata({"revenue"}), "q6.parquet"); return 0; } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index e8d904c4194..93c6c979d37 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -61,7 +61,7 @@ std::unique_ptr order_by( ); } -void write_parquet(std::unique_ptr& table, cudf::io::table_metadata& metadata, std::string& filepath) { +void write_parquet(std::unique_ptr table, cudf::io::table_metadata metadata, std::string filepath) { auto sink_info = cudf::io::sink_info(filepath); auto table_input_metadata = cudf::io::table_input_metadata{metadata}; auto builder = cudf::io::parquet_writer_options::builder(sink_info, table->view()); @@ -70,14 +70,6 @@ void write_parquet(std::unique_ptr& table, cudf::io::table_metadata cudf::io::write_parquet(options); } -void debug_table(std::unique_ptr table, std::string filepath) { - auto sink_info = cudf::io::sink_info(filepath); - auto builder = cudf::io::parquet_writer_options::builder(sink_info, table->view()); - auto options = builder.build(); - cudf::io::write_parquet(options); -} - - template rmm::device_buffer get_device_buffer_from_value(T value) { auto stream = cudf::get_default_stream(); From 182d9c27bb12a5c093cb5fa48b897ca98226b3f3 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 26 Jun 2024 19:04:03 -0700 Subject: [PATCH 027/124] measure query exec time --- cpp/examples/tpch/q1.cpp | 7 +++++++ cpp/examples/tpch/q6.cpp | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 4c2bd2c1610..6e6ac8e82c6 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -181,6 +182,8 @@ std::unique_ptr perform_group_by(std::unique_ptr& tabl } int main() { + auto s = std::chrono::high_resolution_clock::now(); + auto t1 = scan_filter_project(); auto disc_price_col = calc_disc_price(t1); auto charge_col = calc_charge(t1); @@ -188,6 +191,10 @@ int main() { auto t3 = append_col_to_table(t2, charge_col); auto t4 = perform_group_by(t3); auto result_table = order_by(t4, {0, 1}); + + auto e = std::chrono::high_resolution_clock::now(); + std::cout << "q1: " << std::chrono::duration_cast(e - s).count() << "ms" << std::endl; + write_parquet( std::move(result_table), create_table_metadata({ diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index cbe6165f44a..841a65d4fe4 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -172,6 +173,8 @@ std::unique_ptr apply_reduction(std::unique_ptr& table } int main() { + auto s = std::chrono::high_resolution_clock::now(); + auto t1 = scan_filter_project(); auto discout_float = cudf::cast(t1->view().column(1), cudf::data_type{cudf::type_id::FLOAT32}); auto quantity_float = cudf::cast(t1->view().column(3), cudf::data_type{cudf::type_id::FLOAT32}); @@ -179,6 +182,10 @@ int main() { auto t3 = append_col_to_table(t2, quantity_float); auto t4 = apply_filters(t3); auto result_table = apply_reduction(t4); + + auto e = std::chrono::high_resolution_clock::now(); + std::cout << "q6: " << std::chrono::duration_cast(e - s).count() << "ms" << std::endl; + write_parquet(std::move(result_table), create_table_metadata({"revenue"}), "q6.parquet"); return 0; } From b6b5985d0a8d5a7789d5c229d0c0ba11955265ce Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 27 Jun 2024 08:29:58 -0700 Subject: [PATCH 028/124] start working on q5 --- cpp/examples/tpch/CMakeLists.txt | 6 +++- cpp/examples/tpch/README.md | 1 + cpp/examples/tpch/q5.cpp | 57 ++++++++++++++++++++++++++++++++ cpp/examples/tpch/utils.hpp | 9 +++++ 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 cpp/examples/tpch/q5.cpp diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index afadef9e560..2eeac1385c5 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -19,6 +19,10 @@ add_executable(tpch_q1 q1.cpp) target_link_libraries(tpch_q1 PRIVATE cudf::cudf) target_compile_features(tpch_q1 PRIVATE cxx_std_17) +add_executable(tpch_q5 q5.cpp) +target_link_libraries(tpch_q5 PRIVATE cudf::cudf) +target_compile_features(tpch_q5 PRIVATE cxx_std_17) + add_executable(tpch_q6 q6.cpp) target_link_libraries(tpch_q6 PRIVATE cudf::cudf) -target_compile_features(tpch_q6 PRIVATE cxx_std_17) \ No newline at end of file +target_compile_features(tpch_q6 PRIVATE cxx_std_17) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index b4ffdf8e608..ee11945b8e4 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -58,5 +58,6 @@ A parquet file named `q1.parquet` would be generated holding the results of the ## Implementation Status - [x] Q1 +- [ ] Q5 - [x] Q6 diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp new file mode 100644 index 00000000000..e3e99c924d6 --- /dev/null +++ b/cpp/examples/tpch/q5.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.hpp" + + +int main() { + std::string lineitem_path = "~/tpch_sf1/lineitem/part-0.parquet"; + std::string orders_path = "~/tpch_sf1/orders/part-0.parquet"; + std::string customer_path = "~/tpch_sf1/customer/part-0.parquet"; + std::string supplier_path = "~/tpch_sf1/supplier/part-0.parquet"; + std::string nation_path = "~/tpch_sf1/nation/part-0.parquet"; + std::string region_path = "~/tpch_sf1/region/part-0.parquet"; + + auto lineitem = read_parquet(lineitem_path); + auto orders = read_parquet(orders_path); + auto customer = read_parquet(customer_path); + auto supplier = read_parquet(supplier_path); + auto nation = read_parquet(nation_path); + auto region = read_parquet(region_path); +} \ No newline at end of file diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 93c6c979d37..2c3cd975013 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -11,6 +11,15 @@ #include + +std::unique_ptr read_parquet(std::string filename) { + auto source = cudf::io::source_info(filename); + auto builder = cudf::io::parquet_reader_options_builder(source); + auto options = builder.build(); + auto table_with_metadata = cudf::io::read_parquet(options); + return std::move(table_with_metadata.tbl); +} + std::tm make_tm(int year, int month, int day) { std::tm tm = {0}; tm.tm_year = year - 1900; From 79cd0eae9b6d7f6c23915ce6a55d5dac919f5383 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 27 Jun 2024 14:35:07 -0700 Subject: [PATCH 029/124] Add q5 --- cpp/examples/tpch/q1.cpp | 6 +- cpp/examples/tpch/q5.cpp | 163 ++++++++++++++++++++++++++++++++++-- cpp/examples/tpch/q6.cpp | 6 +- cpp/examples/tpch/utils.hpp | 66 ++++++++++++++- 4 files changed, 222 insertions(+), 19 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 6e6ac8e82c6..acb09a25709 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -189,14 +189,14 @@ int main() { auto charge_col = calc_charge(t1); auto t2 = append_col_to_table(t1, disc_price_col); auto t3 = append_col_to_table(t2, charge_col); - auto t4 = perform_group_by(t3); - auto result_table = order_by(t4, {0, 1}); + auto result_table = perform_group_by(t3); + // auto result_table = order_by(t4, {0, 1}); auto e = std::chrono::high_resolution_clock::now(); std::cout << "q1: " << std::chrono::duration_cast(e - s).count() << "ms" << std::endl; write_parquet( - std::move(result_table), + result_table, create_table_metadata({ "l_returnflag", "l_linestatus", diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index e3e99c924d6..aacc433aff8 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -39,19 +39,164 @@ #include "utils.hpp" +/* +create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; +create view orders as select * from '~/tpch_sf1/orders/part-0.parquet'; +create view customer as select * from '~/tpch_sf1/customer/part-0.parquet'; +create view supplier as select * from '~/tpch_sf1/supplier/part-0.parquet'; +create view nation as select * from '~/tpch_sf1/nation/part-0.parquet'; +create view region as select * from '~/tpch_sf1/region/part-0.parquet'; + +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1995-01-01' +group by + n_name +order by + revenue desc; +*/ + +std::unique_ptr calc_disc_price(std::unique_ptr& table) { + auto one = cudf::fixed_point_scalar(1, -2); + auto disc = table->get_column(37).view(); + auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); + auto extended_price = table->get_column(36).view(); + + auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, disc_price_type); + return disc_price; +} int main() { - std::string lineitem_path = "~/tpch_sf1/lineitem/part-0.parquet"; - std::string orders_path = "~/tpch_sf1/orders/part-0.parquet"; - std::string customer_path = "~/tpch_sf1/customer/part-0.parquet"; - std::string supplier_path = "~/tpch_sf1/supplier/part-0.parquet"; - std::string nation_path = "~/tpch_sf1/nation/part-0.parquet"; - std::string region_path = "~/tpch_sf1/region/part-0.parquet"; + std::string customer_path = "/home/jayjeetc/tpch_sf1/customer/part-0.parquet"; + std::string orders_path = "/home/jayjeetc/tpch_sf1/orders/part-0.parquet"; + std::string lineitem_path = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; + std::string supplier_path = "/home/jayjeetc/tpch_sf1/supplier/part-0.parquet"; + std::string nation_path = "/home/jayjeetc/tpch_sf1/nation/part-0.parquet"; + std::string region_path = "/home/jayjeetc/tpch_sf1/region/part-0.parquet"; - auto lineitem = read_parquet(lineitem_path); - auto orders = read_parquet(orders_path); + // read out the tables along with their column names auto customer = read_parquet(customer_path); + auto orders = read_parquet(orders_path); + auto lineitem = read_parquet(lineitem_path); auto supplier = read_parquet(supplier_path); auto nation = read_parquet(nation_path); auto region = read_parquet(region_path); -} \ No newline at end of file + + // move the tables out of the pair + auto customer_table = std::move(customer.first); + auto orders_table = std::move(orders.first); + auto lineitem_table = std::move(lineitem.first); + auto supplier_table = std::move(supplier.first); + auto nation_table = std::move(nation.first); + auto region_table = std::move(region.first); + + // join_a: region with nation on r_regionkey = n_regionkey + auto join_a = inner_join(region_table->view(), nation_table->view(), {0}, {2}); + auto join_a_column_names = concat(region.second, nation.second); + + // join_b: join_a with customer on n_nationkey = c_nationkey + auto join_b = inner_join(join_a->view(), customer_table->view(), {3}, {3}); + auto join_b_column_names = concat(join_a_column_names, customer.second); + + // join_c: join_b with orders on c_custkey = o_custkey + auto join_c = inner_join(join_b->view(), orders_table->view(), {7}, {1}); + auto join_c_column_names = concat(join_b_column_names, orders.second); + + // join_d: join_c with lineitem on o_orderkey = l_orderkey + auto join_d = inner_join(join_c->view(), lineitem_table->view(), {15}, {0}); + auto join_d_column_names = concat(join_c_column_names, lineitem.second); + + // join_e: join_d with supplier on l_suppkey = s_suppkey + auto join_e = inner_join(supplier_table->view(), join_d->view(), {0, 3}, {26, 3}); + auto join_e_column_names = concat(supplier.second, join_d_column_names); + + // define the filter expression + auto o_orderdate = cudf::ast::column_reference(26); + + auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); + auto pred_a = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, + o_orderdate, + o_orderdate_lower_limit + ); + + auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); + auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); + auto pred_b = cudf::ast::operation( + cudf::ast::ast_operator::LESS, + o_orderdate, + o_orderdate_upper_limit + ); + + auto r_name = cudf::ast::column_reference(8); + + auto r_name_value = cudf::string_scalar("ASIA"); + auto r_name_literal = cudf::ast::literal(r_name_value); + auto pred_c = cudf::ast::operation( + cudf::ast::ast_operator::EQUAL, + r_name, + r_name_literal + ); + + auto pred_ab = cudf::ast::operation( + cudf::ast::ast_operator::LOGICAL_AND, + pred_a, + pred_b + ); + + auto pred_abc = cudf::ast::operation( + cudf::ast::ast_operator::LOGICAL_AND, + pred_ab, + pred_c + ); + + auto boolean_mask = cudf::compute_column(join_e->view(), pred_abc); + auto filtered_table = cudf::apply_boolean_mask(join_e->view(), boolean_mask->view()); + + // calcute revenue column + std::cout << join_e_column_names.size() << std::endl; + auto revenue_col = calc_disc_price(filtered_table); + auto new_table = append_col_to_table(filtered_table, revenue_col); + + // perform group by + auto groupby_keys = cudf::table_view{{new_table->get_column(11)}}; + + cudf::groupby::groupby groupby_obj(groupby_keys); + std::vector requests; + + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + auto new_rev_col = new_table->get_column(47); + requests[0].values = new_rev_col.view(); + + auto agg_results = groupby_obj.aggregate(requests); + auto result_key = std::move(agg_results.first); + + std::vector> result_cols; + auto col0 = std::make_unique(result_key->get_column(0)); + result_cols.push_back(std::move(col0)); + result_cols.push_back(std::move(agg_results.second[0].results[0])); + auto final_final_tbl = std::make_unique(std::move(result_cols)); + auto fff_tbl = order_by(final_final_tbl, {1}); + write_parquet(fff_tbl, create_table_metadata({"n_name", "revenue"}), "q5.parquet"); + +} diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 841a65d4fe4..8f13a66dcfe 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -138,13 +138,13 @@ std::unique_ptr apply_filters(std::unique_ptr& table) quantity_upper_literal ); - auto pred = cudf::ast::operation( + auto discount_quantity_pred = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, discount_pred, quantity_pred ); - auto boolean_mask = cudf::compute_column(table->view(), pred); + auto boolean_mask = cudf::compute_column(table->view(), discount_quantity_pred); return cudf::apply_boolean_mask(table->view(), boolean_mask->view()); } @@ -186,6 +186,6 @@ int main() { auto e = std::chrono::high_resolution_clock::now(); std::cout << "q6: " << std::chrono::duration_cast(e - s).count() << "ms" << std::endl; - write_parquet(std::move(result_table), create_table_metadata({"revenue"}), "q6.parquet"); + write_parquet(result_table, create_table_metadata({"revenue"}), "q6.parquet"); return 0; } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 2c3cd975013..dd968248fe3 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -4,6 +4,8 @@ #include #include +#include +#include #include #include @@ -11,13 +13,68 @@ #include +std::unique_ptr join_and_gather( + cudf::table_view left_input, + cudf::table_view right_input, + std::vector left_on, + std::vector right_on, + cudf::null_equality compare_nulls, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { -std::unique_ptr read_parquet(std::string filename) { + auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; + auto left_selected = left_input.select(left_on); + auto right_selected = right_input.select(right_on); + auto const [left_join_indices, right_join_indices] = + cudf::inner_join(left_selected, right_selected, compare_nulls, mr); + + auto left_indices_span = cudf::device_span{*left_join_indices}; + auto right_indices_span = cudf::device_span{*right_join_indices}; + + auto left_indices_col = cudf::column_view{left_indices_span}; + auto right_indices_col = cudf::column_view{right_indices_span}; + + auto left_result = cudf::gather(left_input, left_indices_col, oob_policy); + auto right_result = cudf::gather(right_input, right_indices_col, oob_policy); + + auto joined_cols = left_result->release(); + auto right_cols = right_result->release(); + joined_cols.insert(joined_cols.end(), + std::make_move_iterator(right_cols.begin()), + std::make_move_iterator(right_cols.end())); + return std::make_unique(std::move(joined_cols)); +} + +std::unique_ptr inner_join( + cudf::table_view left_input, + cudf::table_view right_input, + std::vector left_on, + std::vector right_on, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) +{ + return join_and_gather( + left_input, right_input, left_on, right_on, compare_nulls); +} + +template +std::vector concat(const std::vector& lhs, const std::vector& rhs) { + std::vector result; + result.reserve(lhs.size() + rhs.size()); + std::copy(lhs.begin(), lhs.end(), std::back_inserter(result)); + std::copy(rhs.begin(), rhs.end(), std::back_inserter(result)); + return result; +} + +std::pair, std::vector> read_parquet(std::string filename) { auto source = cudf::io::source_info(filename); auto builder = cudf::io::parquet_reader_options_builder(source); auto options = builder.build(); auto table_with_metadata = cudf::io::read_parquet(options); - return std::move(table_with_metadata.tbl); + auto schema_info = table_with_metadata.metadata.schema_info; + std::vector column_names; + for (auto &col_info : schema_info) { + column_names.push_back(col_info.name); + } + return std::make_pair(std::move(table_with_metadata.tbl), column_names); } std::tm make_tm(int year, int month, int day) { @@ -66,11 +123,12 @@ std::unique_ptr order_by( } return cudf::sort_by_key( table_view, - cudf::table_view{column_views} + cudf::table_view{column_views}, + {cudf::order::DESCENDING} ); } -void write_parquet(std::unique_ptr table, cudf::io::table_metadata metadata, std::string filepath) { +void write_parquet(std::unique_ptr& table, cudf::io::table_metadata metadata, std::string filepath) { auto sink_info = cudf::io::sink_info(filepath); auto table_input_metadata = cudf::io::table_input_metadata{metadata}; auto builder = cudf::io::parquet_writer_options::builder(sink_info, table->view()); From bcee9d8f161364a3fdb48247b2f2118e7878c6cc Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 27 Jun 2024 15:30:01 -0700 Subject: [PATCH 030/124] Update README --- cpp/examples/tpch/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index ee11945b8e4..0340c8bfdad 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -58,6 +58,6 @@ A parquet file named `q1.parquet` would be generated holding the results of the ## Implementation Status - [x] Q1 -- [ ] Q5 +- [x] Q5 - [x] Q6 From 3c95cc24584331a73ef9cf497abb06b30df3b733 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 28 Jun 2024 03:20:34 -0700 Subject: [PATCH 031/124] Implement fixed point scalar --- cpp/include/cudf/ast/expressions.hpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp index 918271e3e4f..3472efa6938 100644 --- a/cpp/include/cudf/ast/expressions.hpp +++ b/cpp/include/cudf/ast/expressions.hpp @@ -225,6 +225,16 @@ class generic_scalar_device_view : public cudf::detail::scalar_device_view_base { } + /** @brief Construct a new generic scalar device view object from a fixed point scalar + * + * @param s The fixed point scalar to construct from + */ + template + generic_scalar_device_view(fixed_point_scalar& s) + : generic_scalar_device_view(s.type(), s.data(), s.validity_data()) + { + } + protected: void const* _data{}; ///< Pointer to device memory containing the value size_type const _size{}; ///< Size of the string in bytes for string scalar @@ -272,6 +282,17 @@ class literal : public expression { { } + /** + * @brief Construct a new literal object. + * + * @tparam T Fixed point scalar template type + * @param value A fixed point scalar value + */ + template + literal(cudf::fixed_point_scalar& value) : scalar(value), value(value) + { + } + /** * @brief Construct a new literal object. * From e85cf41dcfeb1eaa0bde8543d9b5ecf67b821a19 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 28 Jun 2024 11:06:37 -0700 Subject: [PATCH 032/124] Fix fixed_point_scalar init in q1 --- cpp/examples/tpch/q1.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index acb09a25709..5c9b70fd6b3 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -94,7 +94,7 @@ std::unique_ptr scan_filter_project() { } std::unique_ptr calc_disc_price(std::unique_ptr& table) { - auto one = cudf::fixed_point_scalar(1, -2); + auto one = cudf::fixed_point_scalar(1); auto disc = table->get_column(4).view(); auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); auto extended_price = table->get_column(3).view(); @@ -105,7 +105,7 @@ std::unique_ptr calc_disc_price(std::unique_ptr& tabl } std::unique_ptr calc_charge(std::unique_ptr& table) { - auto one = cudf::fixed_point_scalar(1, -2); + auto one = cudf::fixed_point_scalar(1); auto disc = table->get_column(4).view(); auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); auto extended_price = table->get_column(3).view(); From 3d50688119d805190e39617bfe104199deb96224 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 28 Jun 2024 14:00:07 -0700 Subject: [PATCH 033/124] Remove fixed point scalar --- cpp/include/cudf/ast/expressions.hpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp index 3472efa6938..918271e3e4f 100644 --- a/cpp/include/cudf/ast/expressions.hpp +++ b/cpp/include/cudf/ast/expressions.hpp @@ -225,16 +225,6 @@ class generic_scalar_device_view : public cudf::detail::scalar_device_view_base { } - /** @brief Construct a new generic scalar device view object from a fixed point scalar - * - * @param s The fixed point scalar to construct from - */ - template - generic_scalar_device_view(fixed_point_scalar& s) - : generic_scalar_device_view(s.type(), s.data(), s.validity_data()) - { - } - protected: void const* _data{}; ///< Pointer to device memory containing the value size_type const _size{}; ///< Size of the string in bytes for string scalar @@ -282,17 +272,6 @@ class literal : public expression { { } - /** - * @brief Construct a new literal object. - * - * @tparam T Fixed point scalar template type - * @param value A fixed point scalar value - */ - template - literal(cudf::fixed_point_scalar& value) : scalar(value), value(value) - { - } - /** * @brief Construct a new literal object. * From 46387c43c378bbd88e19aa0612c1aba7f4fdfa86 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 28 Jun 2024 15:02:10 -0700 Subject: [PATCH 034/124] Extract groupby into utils --- cpp/examples/tpch/q5.cpp | 33 ++++++----------------- cpp/examples/tpch/q9.cpp | 0 cpp/examples/tpch/test.cpp | 15 +++++++++++ cpp/examples/tpch/utils.hpp | 53 ++++++++++++++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 26 deletions(-) create mode 100644 cpp/examples/tpch/q9.cpp create mode 100644 cpp/examples/tpch/test.cpp diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index aacc433aff8..c25617f0c16 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -128,7 +128,7 @@ int main() { auto join_e = inner_join(supplier_table->view(), join_d->view(), {0, 3}, {26, 3}); auto join_e_column_names = concat(supplier.second, join_d_column_names); - // define the filter expression + // apply filter predicates auto o_orderdate = cudf::ast::column_reference(26); auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); @@ -169,34 +169,17 @@ int main() { pred_c ); - auto boolean_mask = cudf::compute_column(join_e->view(), pred_abc); - auto filtered_table = cudf::apply_boolean_mask(join_e->view(), boolean_mask->view()); + auto filtered_table = apply_filter(join_e, pred_abc); // calcute revenue column - std::cout << join_e_column_names.size() << std::endl; auto revenue_col = calc_disc_price(filtered_table); auto new_table = append_col_to_table(filtered_table, revenue_col); // perform group by - auto groupby_keys = cudf::table_view{{new_table->get_column(11)}}; - - cudf::groupby::groupby groupby_obj(groupby_keys); - std::vector requests; - - requests.emplace_back(cudf::groupby::aggregation_request()); - requests[0].aggregations.push_back(cudf::make_sum_aggregation()); - auto new_rev_col = new_table->get_column(47); - requests[0].values = new_rev_col.view(); - - auto agg_results = groupby_obj.aggregate(requests); - auto result_key = std::move(agg_results.first); - - std::vector> result_cols; - auto col0 = std::make_unique(result_key->get_column(0)); - result_cols.push_back(std::move(col0)); - result_cols.push_back(std::move(agg_results.second[0].results[0])); - auto final_final_tbl = std::make_unique(std::move(result_cols)); - auto fff_tbl = order_by(final_final_tbl, {1}); - write_parquet(fff_tbl, create_table_metadata({"n_name", "revenue"}), "q5.parquet"); - + groupby_context ctx{{11}, {{ + 47, {cudf::aggregation::Kind::SUM} + }}}; + auto groupedby_table = apply_groupby(new_table, ctx); + auto orderedby_table = apply_orderby(groupedby_table, {1}); + write_parquet(orderedby_table, create_table_metadata({"n_name", "revenue"}), "q5.parquet"); } diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cpp/examples/tpch/test.cpp b/cpp/examples/tpch/test.cpp new file mode 100644 index 00000000000..6853836d0c2 --- /dev/null +++ b/cpp/examples/tpch/test.cpp @@ -0,0 +1,15 @@ +#include +#include +#include +#include +#include + +int main() { + std::vector v = {26, 26}; + if (std::adjacent_find(v.cbegin(), v.cend(), std::not_equal_to<>()) != + v.cend()) { + std::cout << "Failed" << std::endl; + } else { + std::cout << "Passed" << std::endl; + } +} \ No newline at end of file diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index dd968248fe3..c457df9806e 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -6,6 +6,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include @@ -77,6 +83,51 @@ std::pair, std::vector> read_parquet(s return std::make_pair(std::move(table_with_metadata.tbl), column_names); } +std::unique_ptr apply_filter( + std::unique_ptr& table, cudf::ast::operation& predicate) { + auto boolean_mask = cudf::compute_column(table->view(), predicate); + return cudf::apply_boolean_mask(table->view(), boolean_mask->view()); +} + +struct groupby_context { + std::vector keys; + std::unordered_map> values; +}; + +std::unique_ptr apply_groupby( + std::unique_ptr& table, groupby_context ctx) { + auto keys = table->select(ctx.keys); + cudf::groupby::groupby groupby_obj(keys); + std::vector requests; + for (auto& [value_index, aggregations] : ctx.values) { + requests.emplace_back(cudf::groupby::aggregation_request()); + for (auto& agg : aggregations) { + if (agg == cudf::aggregation::Kind::SUM) { + requests.back().aggregations.push_back(cudf::make_sum_aggregation()); + } else if (agg == cudf::aggregation::Kind::MEAN) { + requests.back().aggregations.push_back(cudf::make_mean_aggregation()); + } else if (agg == cudf::aggregation::Kind::COUNT_ALL) { + requests.back().aggregations.push_back(cudf::make_count_aggregation()); + } else { + throw std::runtime_error("Unsupported aggregation"); + } + } + requests.back().values = table->get_column(value_index).view(); + } + auto agg_results = groupby_obj.aggregate(requests); + std::vector> result_columns; + for (size_t i = 0; i < agg_results.first->num_columns(); i++) { + auto col = std::make_unique(agg_results.first->get_column(i)); + result_columns.push_back(std::move(col)); + } + for (size_t i = 0; i < agg_results.second.size(); i++) { + for (size_t j = 0; j < agg_results.second[i].results.size(); j++) { + result_columns.push_back(std::move(agg_results.second[i].results[j])); + } + } + return std::make_unique(std::move(result_columns)); +} + std::tm make_tm(int year, int month, int day) { std::tm tm = {0}; tm.tm_year = year - 1900; @@ -114,7 +165,7 @@ std::unique_ptr append_col_to_table( return std::make_unique(std::move(columns)); } -std::unique_ptr order_by( +std::unique_ptr apply_orderby( std::unique_ptr& table, std::vector keys) { auto table_view = table->view(); std::vector column_views; From b0ac3ee647dc21ef136cd2c6301ae6941d38bc90 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 28 Jun 2024 15:27:50 -0700 Subject: [PATCH 035/124] Clean up Q5 --- cpp/examples/tpch/q5.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index c25617f0c16..dd3f17559f3 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -85,20 +85,13 @@ std::unique_ptr calc_disc_price(std::unique_ptr& tabl } int main() { - std::string customer_path = "/home/jayjeetc/tpch_sf1/customer/part-0.parquet"; - std::string orders_path = "/home/jayjeetc/tpch_sf1/orders/part-0.parquet"; - std::string lineitem_path = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; - std::string supplier_path = "/home/jayjeetc/tpch_sf1/supplier/part-0.parquet"; - std::string nation_path = "/home/jayjeetc/tpch_sf1/nation/part-0.parquet"; - std::string region_path = "/home/jayjeetc/tpch_sf1/region/part-0.parquet"; - - // read out the tables along with their column names - auto customer = read_parquet(customer_path); - auto orders = read_parquet(orders_path); - auto lineitem = read_parquet(lineitem_path); - auto supplier = read_parquet(supplier_path); - auto nation = read_parquet(nation_path); - auto region = read_parquet(region_path); + std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; + auto customer = read_parquet(dataset_dir + "customer/part-0.parquet"); + auto orders = read_parquet(dataset_dir + "orders/part-0.parquet"); + auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet"); + auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet"); + auto nation = read_parquet(dataset_dir + "nation/part-0.parquet"); + auto region = read_parquet(dataset_dir + "region/part-0.parquet"); // move the tables out of the pair auto customer_table = std::move(customer.first); @@ -175,7 +168,6 @@ int main() { auto revenue_col = calc_disc_price(filtered_table); auto new_table = append_col_to_table(filtered_table, revenue_col); - // perform group by groupby_context ctx{{11}, {{ 47, {cudf::aggregation::Kind::SUM} }}}; From 4ff629368be9be1d27e1da52ee4905f7340ebedb Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 28 Jun 2024 19:45:04 -0700 Subject: [PATCH 036/124] Remove test.cp p --- cpp/examples/tpch/test.cpp | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 cpp/examples/tpch/test.cpp diff --git a/cpp/examples/tpch/test.cpp b/cpp/examples/tpch/test.cpp deleted file mode 100644 index 6853836d0c2..00000000000 --- a/cpp/examples/tpch/test.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include -#include -#include -#include - -int main() { - std::vector v = {26, 26}; - if (std::adjacent_find(v.cbegin(), v.cend(), std::not_equal_to<>()) != - v.cend()) { - std::cout << "Failed" << std::endl; - } else { - std::cout << "Passed" << std::endl; - } -} \ No newline at end of file From d20bac0919cba0ea4d31f99c9732d4aea59eb3e7 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 28 Jun 2024 19:49:42 -0700 Subject: [PATCH 037/124] Add copyright notice --- cpp/examples/tpch/utils.hpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index c457df9806e..f1bb1c504b7 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include #include #include From b297e9869f5db9abf7c93eba6427198d6dfbda14 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 11:56:43 -0700 Subject: [PATCH 038/124] Clean up Q5 --- cpp/examples/tpch/CMakeLists.txt | 16 ++- cpp/examples/tpch/q5.cpp | 142 +++++++++++----------- cpp/examples/tpch/q9.cpp | 143 ++++++++++++++++++++++ cpp/examples/tpch/utils.hpp | 198 ++++++++++++++++++------------- 4 files changed, 340 insertions(+), 159 deletions(-) diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index 2eeac1385c5..ecf06aa9078 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -15,14 +15,18 @@ project( include(../fetch_dependencies.cmake) -add_executable(tpch_q1 q1.cpp) -target_link_libraries(tpch_q1 PRIVATE cudf::cudf) -target_compile_features(tpch_q1 PRIVATE cxx_std_17) +# add_executable(tpch_q1 q1.cpp) +# target_link_libraries(tpch_q1 PRIVATE cudf::cudf) +# target_compile_features(tpch_q1 PRIVATE cxx_std_17) add_executable(tpch_q5 q5.cpp) target_link_libraries(tpch_q5 PRIVATE cudf::cudf) target_compile_features(tpch_q5 PRIVATE cxx_std_17) -add_executable(tpch_q6 q6.cpp) -target_link_libraries(tpch_q6 PRIVATE cudf::cudf) -target_compile_features(tpch_q6 PRIVATE cxx_std_17) +# add_executable(tpch_q6 q6.cpp) +# target_link_libraries(tpch_q6 PRIVATE cudf::cudf) +# target_compile_features(tpch_q6 PRIVATE cxx_std_17) + +add_executable(tpch_q9 q9.cpp) +target_link_libraries(tpch_q9 PRIVATE cudf::cudf) +target_compile_features(tpch_q9 PRIVATE cxx_std_17) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index dd3f17559f3..10d3d4f92a1 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -18,24 +18,10 @@ #include #include #include -#include + #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "utils.hpp" @@ -73,11 +59,11 @@ order by revenue desc; */ -std::unique_ptr calc_disc_price(std::unique_ptr& table) { +std::unique_ptr calc_revenue(std::unique_ptr& table) { auto one = cudf::fixed_point_scalar(1, -2); - auto disc = table->get_column(37).view(); + auto disc = table->column("l_discount"); auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); - auto extended_price = table->get_column(36).view(); + auto extended_price = table->column("l_extendedprice"); auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, disc_price_type); @@ -86,6 +72,8 @@ std::unique_ptr calc_disc_price(std::unique_ptr& tabl int main() { std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; + + // 1. Read out the tables from parquet files auto customer = read_parquet(dataset_dir + "customer/part-0.parquet"); auto orders = read_parquet(dataset_dir + "orders/part-0.parquet"); auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet"); @@ -93,40 +81,44 @@ int main() { auto nation = read_parquet(dataset_dir + "nation/part-0.parquet"); auto region = read_parquet(dataset_dir + "region/part-0.parquet"); - // move the tables out of the pair - auto customer_table = std::move(customer.first); - auto orders_table = std::move(orders.first); - auto lineitem_table = std::move(lineitem.first); - auto supplier_table = std::move(supplier.first); - auto nation_table = std::move(nation.first); - auto region_table = std::move(region.first); - - // join_a: region with nation on r_regionkey = n_regionkey - auto join_a = inner_join(region_table->view(), nation_table->view(), {0}, {2}); - auto join_a_column_names = concat(region.second, nation.second); - - // join_b: join_a with customer on n_nationkey = c_nationkey - auto join_b = inner_join(join_a->view(), customer_table->view(), {3}, {3}); - auto join_b_column_names = concat(join_a_column_names, customer.second); - - // join_c: join_b with orders on c_custkey = o_custkey - auto join_c = inner_join(join_b->view(), orders_table->view(), {7}, {1}); - auto join_c_column_names = concat(join_b_column_names, orders.second); - - // join_d: join_c with lineitem on o_orderkey = l_orderkey - auto join_d = inner_join(join_c->view(), lineitem_table->view(), {15}, {0}); - auto join_d_column_names = concat(join_c_column_names, lineitem.second); - - // join_e: join_d with supplier on l_suppkey = s_suppkey - auto join_e = inner_join(supplier_table->view(), join_d->view(), {0, 3}, {26, 3}); - auto join_e_column_names = concat(supplier.second, join_d_column_names); + // 2. Perform the joins + auto join_a = apply_inner_join( + region, + nation, + {"r_regionkey"}, + {"n_regionkey"} + ); + auto join_b = apply_inner_join( + join_a, + customer, + {"n_nationkey"}, + {"c_nationkey"} + ); + auto join_c = apply_inner_join( + join_b, + orders, + {"c_custkey"}, + {"o_custkey"} + ); + auto join_d = apply_inner_join( + join_c, + lineitem, + {"o_orderkey"}, + {"l_orderkey"} + ); + auto joined_table = apply_inner_join( + supplier, + join_d, + {"s_suppkey", "s_nationkey"}, + {"l_suppkey", "n_nationkey"} + ); - // apply filter predicates - auto o_orderdate = cudf::ast::column_reference(26); + // 3. Apply the filter predicates + auto o_orderdate = cudf::ast::column_reference(joined_table->col_id("o_orderdate")); auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); - auto pred_a = cudf::ast::operation( + auto orderdate_pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate, o_orderdate_lower_limit @@ -134,44 +126,50 @@ int main() { auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); - auto pred_b = cudf::ast::operation( + auto orderdate_pred_b = cudf::ast::operation( cudf::ast::ast_operator::LESS, o_orderdate, o_orderdate_upper_limit ); - - auto r_name = cudf::ast::column_reference(8); - + + auto r_name = cudf::ast::column_reference(joined_table->col_id("r_name")); auto r_name_value = cudf::string_scalar("ASIA"); auto r_name_literal = cudf::ast::literal(r_name_value); - auto pred_c = cudf::ast::operation( + auto r_name_pred = cudf::ast::operation( cudf::ast::ast_operator::EQUAL, r_name, r_name_literal ); - auto pred_ab = cudf::ast::operation( + auto orderdate_pred = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, - pred_a, - pred_b + orderdate_pred_a, + orderdate_pred_b ); - auto pred_abc = cudf::ast::operation( + auto final_pred = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, - pred_ab, - pred_c + orderdate_pred, + r_name_pred ); - - auto filtered_table = apply_filter(join_e, pred_abc); - - // calcute revenue column - auto revenue_col = calc_disc_price(filtered_table); - auto new_table = append_col_to_table(filtered_table, revenue_col); - - groupby_context ctx{{11}, {{ - 47, {cudf::aggregation::Kind::SUM} - }}}; - auto groupedby_table = apply_groupby(new_table, ctx); - auto orderedby_table = apply_orderby(groupedby_table, {1}); - write_parquet(orderedby_table, create_table_metadata({"n_name", "revenue"}), "q5.parquet"); + auto filtered_table = apply_filter(joined_table, final_pred); + + // 4. Calcute and append the `revenue` column + auto revenue = calc_revenue(filtered_table); + auto appended_table = filtered_table->append(revenue, "revenue"); + + // 5. Perform groupby and orderby operations + groupby_context ctx{ + {"n_name"}, + { + {"revenue", {cudf::aggregation::Kind::SUM}}, + }, + {"n_name", "revenue"} + }; + auto groupedby_table = apply_groupby(appended_table, ctx); + auto orderedby_table = apply_orderby( + groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); + + // 6. Write query result to a parquet file + orderedby_table->to_parquet("q5.parquet"); } diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index e69de29bb2d..78ef7d73cf5 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#include "utils.hpp" + +/* +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; +*/ + +int main() { + std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; + + // 1. Read out the table from parquet files + auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet"); + auto nation = read_parquet(dataset_dir + "nation/part-0.parquet"); + auto orders = read_parquet(dataset_dir + "orders/part-0.parquet"); + auto part = read_parquet(dataset_dir + "part/part-0.parquet"); + auto partsupp = read_parquet(dataset_dir + "partsupp/part-0.parquet"); + auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet"); + + // 2. Filter the part table using `p_name like '%green%'` + auto p_name = part->table().column(1); + auto mask = cudf::strings::like( + cudf::strings_column_view(p_name), cudf::string_scalar("%green%")); + auto part_filtered = apply_mask(part, mask); + + // 3. Join the tables + /* + + supplier and lineitem on s_suppkey and l_suppkey // done + + partsupp and lineitem on ps_suppkey and l_suppkey // done + partsupp and lineitem on ps_partkey and l_partkey // done + + part and lineitem on p_partkey and l_partkey + + orders and lineitem on o_orderkey and l_orderkey + + nation and supplier on n_nationkey and s_nationkey // done + + */ + std::cout << "Joining tables" << std::endl; + auto join_a = apply_inner_join( + lineitem, + supplier, + {"l_suppkey"}, + {"s_suppkey"} + ); + auto join_b = apply_inner_join( + join_a, + partsupp, + {"l_suppkey", "l_partkey"}, + {"ps_suppkey", "ps_partkey"} + ); + auto join_c = apply_inner_join( + join_b, + part_filtered, + {"l_partkey"}, + {"p_partkey"} + ); + auto join_d = apply_inner_join( + join_c, + orders, + {"l_orderkey"}, + {"o_orderkey"} + ); + auto join_e = apply_inner_join( + join_d, + nation, + {"s_nationkey"}, + {"n_nationkey"} + ); +} diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index f1bb1c504b7..19cb0dd8563 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -66,17 +67,6 @@ std::unique_ptr join_and_gather( return std::make_unique(std::move(joined_cols)); } -std::unique_ptr inner_join( - cudf::table_view left_input, - cudf::table_view right_input, - std::vector left_on, - std::vector right_on, - cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) -{ - return join_and_gather( - left_input, right_input, left_on, right_on, compare_nulls); -} - template std::vector concat(const std::vector& lhs, const std::vector& rhs) { std::vector result; @@ -86,7 +76,88 @@ std::vector concat(const std::vector& lhs, const std::vector& rhs) { return result; } -std::pair, std::vector> read_parquet(std::string filename) { +class table_with_cols { + public: + table_with_cols( + std::unique_ptr tbl, std::vector col_names) + : tbl(std::move(tbl)), col_names(col_names) {} + cudf::table_view table() { + return tbl->view(); + } + cudf::column_view column(std::string col_name) { + return tbl->view().column(col_id(col_name)); + } + std::vector columns() { + return col_names; + } + cudf::size_type col_id(std::string col_name) { + auto it = std::find(col_names.begin(), col_names.end(), col_name); + if (it == col_names.end()) { + throw std::runtime_error("Column not found"); + } + return std::distance(col_names.begin(), it); + } + std::unique_ptr append(std::unique_ptr& col, std::string col_name) { + std::vector> updated_cols; + std::vector updated_col_names; + for (size_t i = 0; i < tbl->num_columns(); i++) { + updated_cols.push_back(std::make_unique(tbl->get_column(i))); + updated_col_names.push_back(col_names[i]); + } + updated_cols.push_back(std::move(col)); + updated_col_names.push_back(col_name); + auto updated_table = std::make_unique(std::move(updated_cols)); + return std::make_unique(std::move(updated_table), updated_col_names); + } + cudf::table_view select(std::vector col_names) { + std::vector col_indices; + for (auto &col_name : col_names) { + col_indices.push_back(col_id(col_name)); + } + return tbl->select(col_indices); + } + void to_parquet(std::string filepath) { + auto sink_info = cudf::io::sink_info(filepath); + cudf::io::table_metadata metadata; + std::vector col_name_infos; + for (auto &col_name : col_names) { + col_name_infos.push_back(cudf::io::column_name_info(col_name)); + } + metadata.schema_info = col_name_infos; + auto table_input_metadata = cudf::io::table_input_metadata{metadata}; + auto builder = cudf::io::parquet_writer_options::builder(sink_info, tbl->view()); + builder.metadata(table_input_metadata); + auto options = builder.build(); + cudf::io::write_parquet(options); + } + private: + std::unique_ptr tbl; + std::vector col_names; +}; + +std::unique_ptr apply_inner_join( + std::unique_ptr& left_input, + std::unique_ptr& right_input, + std::vector left_on, + std::vector right_on, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) { + std::vector left_on_indices; + std::vector right_on_indices; + for (auto &col_name : left_on) { + left_on_indices.push_back(left_input->col_id(col_name)); + } + for (auto &col_name : right_on) { + right_on_indices.push_back(right_input->col_id(col_name)); + } + auto table = join_and_gather( + left_input->table(), right_input->table(), + left_on_indices, right_on_indices, compare_nulls + ); + return std::make_unique(std::move(table), + concat(left_input->columns(), right_input->columns())); +} + +std::unique_ptr read_parquet(std::string filename) { auto source = cudf::io::source_info(filename); auto builder = cudf::io::parquet_reader_options_builder(source); auto options = builder.build(); @@ -96,26 +167,35 @@ std::pair, std::vector> read_parquet(s for (auto &col_info : schema_info) { column_names.push_back(col_info.name); } - return std::make_pair(std::move(table_with_metadata.tbl), column_names); + return std::make_unique( + std::move(table_with_metadata.tbl), column_names); +} + +std::unique_ptr apply_filter( + std::unique_ptr& table, cudf::ast::operation& predicate) { + auto boolean_mask = cudf::compute_column(table->table(), predicate); + auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); + return std::make_unique(std::move(result_table), table->columns()); } -std::unique_ptr apply_filter( - std::unique_ptr& table, cudf::ast::operation& predicate) { - auto boolean_mask = cudf::compute_column(table->view(), predicate); - return cudf::apply_boolean_mask(table->view(), boolean_mask->view()); +std::unique_ptr apply_mask( + std::unique_ptr& table, std::unique_ptr& mask) { + auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); + return std::make_unique(std::move(result_table), table->columns()); } struct groupby_context { - std::vector keys; - std::unordered_map> values; + std::vector keys; + std::unordered_map> values; + std::vector schema; }; -std::unique_ptr apply_groupby( - std::unique_ptr& table, groupby_context ctx) { +std::unique_ptr apply_groupby( + std::unique_ptr& table, groupby_context ctx) { auto keys = table->select(ctx.keys); cudf::groupby::groupby groupby_obj(keys); std::vector requests; - for (auto& [value_index, aggregations] : ctx.values) { + for (auto& [value_col, aggregations] : ctx.values) { requests.emplace_back(cudf::groupby::aggregation_request()); for (auto& agg : aggregations) { if (agg == cudf::aggregation::Kind::SUM) { @@ -128,7 +208,7 @@ std::unique_ptr apply_groupby( throw std::runtime_error("Unsupported aggregation"); } } - requests.back().values = table->get_column(value_index).view(); + requests.back().values = table->column(value_col); } auto agg_results = groupby_obj.aggregate(requests); std::vector> result_columns; @@ -141,7 +221,8 @@ std::unique_ptr apply_groupby( result_columns.push_back(std::move(agg_results.second[i].results[j])); } } - return std::make_unique(std::move(result_columns)); + auto result_table = std::make_unique(std::move(result_columns)); + return std::make_unique(std::move(result_table), ctx.schema); } std::tm make_tm(int year, int month, int day) { @@ -161,64 +242,19 @@ int32_t days_since_epoch(int year, int month, int day) { return static_cast(diff); } -cudf::io::table_metadata create_table_metadata(std::vector column_names) { - cudf::io::table_metadata metadata; - std::vector column_name_infos; - for (auto &col_name : column_names) { - column_name_infos.push_back(cudf::io::column_name_info(col_name)); - } - metadata.schema_info = column_name_infos; - return metadata; -} - -std::unique_ptr append_col_to_table( - std::unique_ptr& table, std::unique_ptr& col) { - std::vector> columns; - for (size_t i = 0; i < table->num_columns(); i++) { - columns.push_back(std::make_unique(table->get_column(i))); - } - columns.push_back(std::move(col)); - return std::make_unique(std::move(columns)); -} - -std::unique_ptr apply_orderby( - std::unique_ptr& table, std::vector keys) { - auto table_view = table->view(); +std::unique_ptr apply_orderby( + std::unique_ptr& table, + std::vector sort_keys, + std::vector sort_key_orders) { std::vector column_views; - for (auto& key : keys) { - column_views.push_back(table_view.column(key)); - } - return cudf::sort_by_key( - table_view, + for (auto& key : sort_keys) { + column_views.push_back(table->column(key)); + } + auto result_table = cudf::sort_by_key( + table->table(), cudf::table_view{column_views}, - {cudf::order::DESCENDING} + sort_key_orders ); -} - -void write_parquet(std::unique_ptr& table, cudf::io::table_metadata metadata, std::string filepath) { - auto sink_info = cudf::io::sink_info(filepath); - auto table_input_metadata = cudf::io::table_input_metadata{metadata}; - auto builder = cudf::io::parquet_writer_options::builder(sink_info, table->view()); - builder.metadata(table_input_metadata); - auto options = builder.build(); - cudf::io::write_parquet(options); -} - -template -rmm::device_buffer get_device_buffer_from_value(T value) { - auto stream = cudf::get_default_stream(); - rmm::cuda_stream_view stream_view(stream); - - rmm::device_scalar scalar(stream_view); - scalar.set_value_async(value, stream_view); - - rmm::device_buffer buffer(scalar.data(), scalar.size(), stream_view); - return buffer; -} - -rmm::device_buffer get_empty_device_buffer() { - auto stream = cudf::get_default_stream(); - rmm::cuda_stream_view stream_view(stream); - rmm::device_buffer buffer(0, stream_view); - return buffer; + return std::make_unique( + std::move(result_table), table->columns()); } From 1cd06f86e11e31657f778c9cf04127b5ede97e52 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 11:59:57 -0700 Subject: [PATCH 039/124] Fix naming of variables --- cpp/examples/tpch/q5.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 10d3d4f92a1..405c61d33c7 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -118,7 +118,7 @@ int main() { auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); - auto orderdate_pred_a = cudf::ast::operation( + auto o_orderdate_pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate, o_orderdate_lower_limit @@ -126,7 +126,7 @@ int main() { auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); - auto orderdate_pred_b = cudf::ast::operation( + auto o_orderdate_pred_b = cudf::ast::operation( cudf::ast::ast_operator::LESS, o_orderdate, o_orderdate_upper_limit @@ -141,15 +141,15 @@ int main() { r_name_literal ); - auto orderdate_pred = cudf::ast::operation( + auto o_orderdate_pred = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, - orderdate_pred_a, - orderdate_pred_b + o_orderdate_pred_a, + o_orderdate_pred_b ); auto final_pred = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, - orderdate_pred, + o_orderdate_pred, r_name_pred ); auto filtered_table = apply_filter(joined_table, final_pred); From 6d786c4da3acd8db1320504a07e90642f7662de7 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 12:21:21 -0700 Subject: [PATCH 040/124] Remove more headers from Q5 --- cpp/examples/tpch/q5.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 405c61d33c7..6e97c6a7134 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -14,11 +14,6 @@ * limitations under the License. */ -#include -#include -#include -#include - #include #include #include From 38997ac36a5ae82dab5744c2dcf1664f58306f83 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 13:33:53 -0700 Subject: [PATCH 041/124] Refactor Q1 --- cpp/examples/tpch/CMakeLists.txt | 6 +- cpp/examples/tpch/q1.cpp | 198 ++++++++----------------------- cpp/examples/tpch/q5.cpp | 1 + cpp/examples/tpch/utils.hpp | 11 +- 4 files changed, 60 insertions(+), 156 deletions(-) diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index ecf06aa9078..e2edf056677 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -15,9 +15,9 @@ project( include(../fetch_dependencies.cmake) -# add_executable(tpch_q1 q1.cpp) -# target_link_libraries(tpch_q1 PRIVATE cudf::cudf) -# target_compile_features(tpch_q1 PRIVATE cxx_std_17) +add_executable(tpch_q1 q1.cpp) +target_link_libraries(tpch_q1 PRIVATE cudf::cudf) +target_compile_features(tpch_q1 PRIVATE cxx_std_17) add_executable(tpch_q5 q5.cpp) target_link_libraries(tpch_q5 PRIVATE cudf::cudf) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 5c9b70fd6b3..8f68d890599 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -14,24 +14,9 @@ * limitations under the License. */ -#include -#include -#include -#include -#include #include #include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include "utils.hpp" @@ -59,157 +44,68 @@ order by l_linestatus; */ -std::unique_ptr scan_filter_project() { - std::string lineitem = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; - auto source = cudf::io::source_info(lineitem); - auto builder = cudf::io::parquet_reader_options_builder(source); - - std::vector cols = { - "l_returnflag", - "l_linestatus", - "l_quantity", - "l_extendedprice", - "l_discount", - "l_shipdate", - "l_orderkey", - "l_tax" - }; - - auto shipdate = cudf::ast::column_reference(5); - - auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); - auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - auto pred = cudf::ast::operation( - cudf::ast::ast_operator::LESS_EQUAL, - shipdate, - shipdate_upper_literal - ); - - builder.columns(cols); - builder.filter(pred); - - auto options = builder.build(); - auto result = cudf::io::read_parquet(options); - return std::move(result.tbl); -} - -std::unique_ptr calc_disc_price(std::unique_ptr& table) { +std::unique_ptr calc_disc_price(std::unique_ptr& table) { auto one = cudf::fixed_point_scalar(1); - auto disc = table->get_column(4).view(); - auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); - auto extended_price = table->get_column(3).view(); - + auto discount = table->column("l_discount"); + auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); + auto extended_price = table->column("l_extendedprice"); auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; - auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, disc_price_type); + auto disc_price = cudf::binary_operation(extended_price, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type); return disc_price; } -std::unique_ptr calc_charge(std::unique_ptr& table) { +std::unique_ptr calc_charge(std::unique_ptr& table, std::unique_ptr& disc_price) { auto one = cudf::fixed_point_scalar(1); - auto disc = table->get_column(4).view(); - auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); - auto extended_price = table->get_column(3).view(); - - auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; - auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, disc_price_type); - - auto tax = table->get_column(7).view(); + auto tax = table->column("l_tax"); auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); - auto charge_type = cudf::data_type{cudf::type_id::DECIMAL64, -6}; auto charge = cudf::binary_operation(disc_price->view(), one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); return charge; } -std::unique_ptr perform_group_by(std::unique_ptr& table) { - auto tbl_view = table->view(); - auto keys = cudf::table_view{{tbl_view.column(0), tbl_view.column(1)}}; - - auto quantity = tbl_view.column(2); - auto extendedprice = tbl_view.column(3); - auto discount = tbl_view.column(4); - auto discprice = tbl_view.column(8); - auto charge = tbl_view.column(9); - - cudf::groupby::groupby groupby_obj(keys); - std::vector requests; - - requests.emplace_back(cudf::groupby::aggregation_request()); - requests[0].aggregations.push_back(cudf::make_sum_aggregation()); - requests[0].aggregations.push_back(cudf::make_mean_aggregation()); - requests[0].values = quantity; - - requests.emplace_back(cudf::groupby::aggregation_request()); - requests[1].aggregations.push_back(cudf::make_sum_aggregation()); - requests[1].aggregations.push_back(cudf::make_mean_aggregation()); - requests[1].values = extendedprice; - - requests.emplace_back(cudf::groupby::aggregation_request()); - requests[2].aggregations.push_back(cudf::make_mean_aggregation()); - requests[2].values = discount; - - requests.emplace_back(cudf::groupby::aggregation_request()); - requests[3].aggregations.push_back(cudf::make_sum_aggregation()); - requests[3].values = discprice; - - requests.emplace_back(cudf::groupby::aggregation_request()); - requests[4].aggregations.push_back(cudf::make_sum_aggregation()); - requests[4].values = charge; - - requests.emplace_back(cudf::groupby::aggregation_request()); - requests[5].aggregations.push_back(cudf::make_count_aggregation()); - requests[5].values = charge; - - auto agg_results = groupby_obj.aggregate(requests); - auto result_key = std::move(agg_results.first); - - auto returnflag = std::make_unique(result_key->get_column(0)); - auto linestatus = std::make_unique(result_key->get_column(1)); - - std::vector> columns; - columns.push_back(std::move(returnflag)); - columns.push_back(std::move(linestatus)); - columns.push_back(std::move(agg_results.second[0].results[0])); - columns.push_back(std::move(agg_results.second[0].results[1])); - columns.push_back(std::move(agg_results.second[1].results[0])); - columns.push_back(std::move(agg_results.second[1].results[1])); - columns.push_back(std::move(agg_results.second[2].results[0])); - columns.push_back(std::move(agg_results.second[3].results[0])); - columns.push_back(std::move(agg_results.second[4].results[0])); - columns.push_back(std::move(agg_results.second[5].results[0])); - - return std::make_unique(std::move(columns)); -} - int main() { - auto s = std::chrono::high_resolution_clock::now(); - - auto t1 = scan_filter_project(); - auto disc_price_col = calc_disc_price(t1); - auto charge_col = calc_charge(t1); - auto t2 = append_col_to_table(t1, disc_price_col); - auto t3 = append_col_to_table(t2, charge_col); - auto result_table = perform_group_by(t3); - // auto result_table = order_by(t4, {0, 1}); + std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; - auto e = std::chrono::high_resolution_clock::now(); - std::cout << "q1: " << std::chrono::duration_cast(e - s).count() << "ms" << std::endl; + // 1. Read out table from parquet file + auto shipdate = cudf::ast::column_reference(5); + auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); + auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); + auto shipdate_pred = std::make_unique( + cudf::ast::ast_operator::LESS_EQUAL, + shipdate, + shipdate_upper_literal + ); + auto lineitem = read_parquet( + dataset_dir + "lineitem/part-0.parquet", + {"l_returnflag", "l_linestatus", "l_quantity", "l_extendedprice", "l_discount", "l_shipdate", "l_orderkey", "l_tax"}, + std::move(shipdate_pred) + ); - write_parquet( - result_table, - create_table_metadata({ - "l_returnflag", - "l_linestatus", - "sum_qty", - "avg_qty", - "sum_base_price", - "avg_price", - "avg_disc", - "sum_disc_price", - "sum_charge", - "count_order" - }), - "q1.parquet" + // 2. Calculate the discount price and charge columns and append to lineitem table + auto disc_price = calc_disc_price(lineitem); + auto charge = calc_charge(lineitem, disc_price); + auto appended_table = lineitem->append(disc_price, "disc_price")->append(charge, "charge"); + + // 3. Perform the group by operation + auto groupedby_table = apply_groupby( + appended_table, + groupby_context{ + {"l_returnflag", "l_linestatus"}, + { + {"l_extendedprice", {cudf::aggregation::Kind::SUM, cudf::aggregation::Kind::MEAN}}, + {"l_quantity", {cudf::aggregation::Kind::SUM, cudf::aggregation::Kind::MEAN}}, + {"l_discount", {cudf::aggregation::Kind::MEAN}}, + {"disc_price", {cudf::aggregation::Kind::SUM}}, + {"charge", {cudf::aggregation::Kind::SUM, cudf::aggregation::Kind::COUNT_ALL}}, + }, + {"l_returnflag", "l_linestatus", "sum_base_price", "avg_price", "sum_qty", "avg_qty", "avg_disc", "sum_disc_price", "sum_charge", "count_order"} + } ); + + // 4. Perform the order by operation + auto orderedby_table = apply_orderby(groupedby_table, {"l_returnflag", "l_linestatus"}, {cudf::order::ASCENDING, cudf::order::ASCENDING}); + + // 5. Write query result to a parquet file + orderedby_table->to_parquet("q1.parquet"); return 0; } diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 6e97c6a7134..484861432c1 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -167,4 +167,5 @@ int main() { // 6. Write query result to a parquet file orderedby_table->to_parquet("q5.parquet"); + return 0; } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 19cb0dd8563..c4a07678b50 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -157,9 +157,16 @@ std::unique_ptr apply_inner_join( concat(left_input->columns(), right_input->columns())); } -std::unique_ptr read_parquet(std::string filename) { +std::unique_ptr read_parquet( + std::string filename, std::vector columns = {}, std::unique_ptr predicate = nullptr) { auto source = cudf::io::source_info(filename); - auto builder = cudf::io::parquet_reader_options_builder(source); + auto builder = cudf::io::parquet_reader_options_builder(source); + if (columns.size()) { + builder.columns(columns); + } + if (predicate) { + builder.filter(*predicate); + } auto options = builder.build(); auto table_with_metadata = cudf::io::read_parquet(options); auto schema_info = table_with_metadata.metadata.schema_info; From dc625c4b77fe919f34a4865500100848aa140eef Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 15:26:53 -0700 Subject: [PATCH 042/124] Add finished q9 --- cpp/examples/tpch/CMakeLists.txt | 6 +- cpp/examples/tpch/q5.cpp | 5 +- cpp/examples/tpch/q9.cpp | 104 +++++++++++++++++++------------ cpp/examples/tpch/utils.hpp | 16 +++-- 4 files changed, 80 insertions(+), 51 deletions(-) diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index e2edf056677..ecf06aa9078 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -15,9 +15,9 @@ project( include(../fetch_dependencies.cmake) -add_executable(tpch_q1 q1.cpp) -target_link_libraries(tpch_q1 PRIVATE cudf::cudf) -target_compile_features(tpch_q1 PRIVATE cxx_std_17) +# add_executable(tpch_q1 q1.cpp) +# target_link_libraries(tpch_q1 PRIVATE cudf::cudf) +# target_compile_features(tpch_q1 PRIVATE cxx_std_17) add_executable(tpch_q5 q5.cpp) target_link_libraries(tpch_q5 PRIVATE cudf::cudf) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 484861432c1..5eb5ebb2f9f 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -157,9 +157,8 @@ int main() { groupby_context ctx{ {"n_name"}, { - {"revenue", {cudf::aggregation::Kind::SUM}}, - }, - {"n_name", "revenue"} + {"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}}, + } }; auto groupedby_table = apply_groupby(appended_table, ctx); auto orderedby_table = apply_orderby( diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 78ef7d73cf5..b76e7997f43 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -14,35 +14,22 @@ * limitations under the License. */ -#include -#include -#include -#include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - +#include #include #include - #include "utils.hpp" /* +create view part as select * from '~/tpch_sf1/part/part-0.parquet'; +create view supplier as select * from '~/tpch_sf1/supplier/part-0.parquet'; +create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; +create view partsupp as select * from '~/tpch_sf1/partsupp/part-0.parquet'; +create view orders as select * from '~/tpch_sf1/orders/part-0.parquet'; +create view nation as select * from '~/tpch_sf1/nation/part-0.parquet'; + select nation, o_year, @@ -77,6 +64,21 @@ order by o_year desc; */ +std::unique_ptr calc_amount(std::unique_ptr& table) { + auto one = cudf::fixed_point_scalar(1); + auto discount = table->column("l_discount"); + auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); + auto extended_price = table->column("l_extendedprice"); + auto extended_price_discounted_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto extended_price_discounted = cudf::binary_operation(extended_price, one_minus_discount->view(), cudf::binary_operator::MUL, extended_price_discounted_type); + auto supply_cost = table->column("ps_supplycost"); + auto quantity = table->column("l_quantity"); + auto supply_cost_quantity_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto supply_cost_quantity = cudf::binary_operation(supply_cost, quantity, cudf::binary_operator::MUL, supply_cost_quantity_type); + auto amount = cudf::binary_operation(extended_price_discounted->view(), supply_cost_quantity->view(), cudf::binary_operator::SUB, extended_price_discounted->type()); + return amount; +} + int main() { std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; @@ -88,28 +90,14 @@ int main() { auto partsupp = read_parquet(dataset_dir + "partsupp/part-0.parquet"); auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet"); - // 2. Filter the part table using `p_name like '%green%'` + // 2. Generating the `profit` table + // 2.1 Filter the part table using `p_name like '%green%'` auto p_name = part->table().column(1); auto mask = cudf::strings::like( cudf::strings_column_view(p_name), cudf::string_scalar("%green%")); auto part_filtered = apply_mask(part, mask); - // 3. Join the tables - /* - - supplier and lineitem on s_suppkey and l_suppkey // done - - partsupp and lineitem on ps_suppkey and l_suppkey // done - partsupp and lineitem on ps_partkey and l_partkey // done - - part and lineitem on p_partkey and l_partkey - - orders and lineitem on o_orderkey and l_orderkey - - nation and supplier on n_nationkey and s_nationkey // done - - */ - std::cout << "Joining tables" << std::endl; + // 2.2 Perform the joins auto join_a = apply_inner_join( lineitem, supplier, @@ -134,10 +122,48 @@ int main() { {"l_orderkey"}, {"o_orderkey"} ); - auto join_e = apply_inner_join( + auto joined_table = apply_inner_join( join_d, nation, {"s_nationkey"}, {"n_nationkey"} ); + + // 2.3 Calculate the `nation`, `o_year`, and `amount` columns + auto n_name = std::make_unique(joined_table->column("n_name")); + auto o_year = cudf::datetime::extract_year(joined_table->column("o_orderdate")); + auto amount = calc_amount(joined_table); + + // 2.4 Put together the `profit` table + std::vector> profit_columns; + profit_columns.push_back(std::move(n_name)); + profit_columns.push_back(std::move(o_year)); + profit_columns.push_back(std::move(amount)); + + auto profit_table = std::make_unique(std::move(profit_columns)); + auto profit = std::make_unique( + std::move(profit_table), + std::vector{"nation", "o_year", "amount"} + ); + + // 3. Perform the group by operation + auto groupedby_table = apply_groupby( + profit, + groupby_context{ + {"nation", "o_year"}, + { + {"amount", {{cudf::groupby_aggregation::SUM, "sum_profit"}}} + } + } + ); + + // 4. Perform the orderby operation + auto orderedby_table = apply_orderby( + groupedby_table, + {"nation", "o_year"}, + {cudf::order::ASCENDING, cudf::order::DESCENDING} + ); + + // 5. Write query result to a parquet file + orderedby_table->to_parquet("q9.parquet"); } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index c4a07678b50..b253e5419ea 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -193,27 +193,30 @@ std::unique_ptr apply_mask( struct groupby_context { std::vector keys; - std::unordered_map> values; - std::vector schema; + std::unordered_map>> values; }; std::unique_ptr apply_groupby( std::unique_ptr& table, groupby_context ctx) { auto keys = table->select(ctx.keys); cudf::groupby::groupby groupby_obj(keys); + std::vector result_column_names; + result_column_names.insert( + result_column_names.end(), ctx.keys.begin(), ctx.keys.end()); std::vector requests; for (auto& [value_col, aggregations] : ctx.values) { requests.emplace_back(cudf::groupby::aggregation_request()); for (auto& agg : aggregations) { - if (agg == cudf::aggregation::Kind::SUM) { + if (agg.first == cudf::aggregation::Kind::SUM) { requests.back().aggregations.push_back(cudf::make_sum_aggregation()); - } else if (agg == cudf::aggregation::Kind::MEAN) { + } else if (agg.first == cudf::aggregation::Kind::MEAN) { requests.back().aggregations.push_back(cudf::make_mean_aggregation()); - } else if (agg == cudf::aggregation::Kind::COUNT_ALL) { + } else if (agg.first == cudf::aggregation::Kind::COUNT_ALL) { requests.back().aggregations.push_back(cudf::make_count_aggregation()); } else { throw std::runtime_error("Unsupported aggregation"); } + result_column_names.push_back(agg.second); } requests.back().values = table->column(value_col); } @@ -229,7 +232,8 @@ std::unique_ptr apply_groupby( } } auto result_table = std::make_unique(std::move(result_columns)); - return std::make_unique(std::move(result_table), ctx.schema); + return std::make_unique( + std::move(result_table), result_column_names); } std::tm make_tm(int year, int month, int day) { From e398abc4c91ebbb44db9fd7e7fe68f249bb662f4 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 15:45:08 -0700 Subject: [PATCH 043/124] Add finished q9 --- cpp/examples/tpch/CMakeLists.txt | 6 ++--- cpp/examples/tpch/q1.cpp | 41 ++++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index ecf06aa9078..e2edf056677 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -15,9 +15,9 @@ project( include(../fetch_dependencies.cmake) -# add_executable(tpch_q1 q1.cpp) -# target_link_libraries(tpch_q1 PRIVATE cudf::cudf) -# target_compile_features(tpch_q1 PRIVATE cxx_std_17) +add_executable(tpch_q1 q1.cpp) +target_link_libraries(tpch_q1 PRIVATE cudf::cudf) +target_compile_features(tpch_q1 PRIVATE cxx_std_17) add_executable(tpch_q5 q5.cpp) target_link_libraries(tpch_q5 PRIVATE cudf::cudf) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 8f68d890599..690ca4eaff2 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -92,13 +92,40 @@ int main() { groupby_context{ {"l_returnflag", "l_linestatus"}, { - {"l_extendedprice", {cudf::aggregation::Kind::SUM, cudf::aggregation::Kind::MEAN}}, - {"l_quantity", {cudf::aggregation::Kind::SUM, cudf::aggregation::Kind::MEAN}}, - {"l_discount", {cudf::aggregation::Kind::MEAN}}, - {"disc_price", {cudf::aggregation::Kind::SUM}}, - {"charge", {cudf::aggregation::Kind::SUM, cudf::aggregation::Kind::COUNT_ALL}}, - }, - {"l_returnflag", "l_linestatus", "sum_base_price", "avg_price", "sum_qty", "avg_qty", "avg_disc", "sum_disc_price", "sum_charge", "count_order"} + { + "l_extendedprice", + { + {cudf::aggregation::Kind::SUM, "sum_base_price"}, + {cudf::aggregation::Kind::MEAN, "avg_price"} + } + }, + { + "l_quantity", + { + {cudf::aggregation::Kind::SUM, "sum_qty"}, + {cudf::aggregation::Kind::MEAN, "avg_qty"} + } + }, + { + "l_discount", + { + {cudf::aggregation::Kind::MEAN, "avg_disc"}, + } + }, + { + "disc_price", + { + {cudf::aggregation::Kind::SUM, "sum_disc_price"}, + } + }, + { + "charge", + { + {cudf::aggregation::Kind::SUM, "sum_charge"}, + {cudf::aggregation::Kind::COUNT_ALL, "count_order"} + } + }, + } } ); From 0f4516cb56640afd7e44d0d9abadec983ea5f02b Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 15:49:41 -0700 Subject: [PATCH 044/124] Update README --- cpp/examples/tpch/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index 0340c8bfdad..54fd0b2ba08 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -60,4 +60,4 @@ A parquet file named `q1.parquet` would be generated holding the results of the - [x] Q1 - [x] Q5 - [x] Q6 - +- [x] Q9 From 5ca3420e8f6fe49ae95a80597a84833e77d1f3e7 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 15:52:08 -0700 Subject: [PATCH 045/124] Fix sql query in Q5 --- cpp/examples/tpch/q5.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 5eb5ebb2f9f..89c26b3e2db 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -21,9 +21,9 @@ #include "utils.hpp" /* -create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; -create view orders as select * from '~/tpch_sf1/orders/part-0.parquet'; create view customer as select * from '~/tpch_sf1/customer/part-0.parquet'; +create view orders as select * from '~/tpch_sf1/orders/part-0.parquet'; +create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; create view supplier as select * from '~/tpch_sf1/supplier/part-0.parquet'; create view nation as select * from '~/tpch_sf1/nation/part-0.parquet'; create view region as select * from '~/tpch_sf1/region/part-0.parquet'; From dd3a1d5ccf7b070d526a61a3e4aeed07c322a9e3 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 16:15:38 -0700 Subject: [PATCH 046/124] Fix comments --- cpp/examples/tpch/q1.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 690ca4eaff2..a7f10b8f4bf 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -66,7 +66,7 @@ std::unique_ptr calc_charge(std::unique_ptr& tabl int main() { std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; - // 1. Read out table from parquet file + // 1. Read out the `lineitem` table from parquet file auto shipdate = cudf::ast::column_reference(5); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); From 6e87bc9c39aca54ee7d3920f92824a49a6bb9417 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 16:41:02 -0700 Subject: [PATCH 047/124] Refactor Q6 --- cpp/examples/tpch/CMakeLists.txt | 6 +- cpp/examples/tpch/q1.cpp | 4 +- cpp/examples/tpch/q5.cpp | 10 +- cpp/examples/tpch/q6.cpp | 181 ++++++++++++------------------- cpp/examples/tpch/utils.hpp | 18 ++- 5 files changed, 94 insertions(+), 125 deletions(-) diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index e2edf056677..f03b5d8a940 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -23,9 +23,9 @@ add_executable(tpch_q5 q5.cpp) target_link_libraries(tpch_q5 PRIVATE cudf::cudf) target_compile_features(tpch_q5 PRIVATE cxx_std_17) -# add_executable(tpch_q6 q6.cpp) -# target_link_libraries(tpch_q6 PRIVATE cudf::cudf) -# target_compile_features(tpch_q6 PRIVATE cxx_std_17) +add_executable(tpch_q6 q6.cpp) +target_link_libraries(tpch_q6 PRIVATE cudf::cudf) +target_compile_features(tpch_q6 PRIVATE cxx_std_17) add_executable(tpch_q9 q9.cpp) target_link_libraries(tpch_q9 PRIVATE cudf::cudf) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index a7f10b8f4bf..3f2079ec66d 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -67,12 +67,12 @@ int main() { std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; // 1. Read out the `lineitem` table from parquet file - auto shipdate = cudf::ast::column_reference(5); + auto shipdate_ref = cudf::ast::column_reference(5); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); auto shipdate_pred = std::make_unique( cudf::ast::ast_operator::LESS_EQUAL, - shipdate, + shipdate_ref, shipdate_upper_literal ); auto lineitem = read_parquet( diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 89c26b3e2db..690a5fe2138 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -109,13 +109,13 @@ int main() { ); // 3. Apply the filter predicates - auto o_orderdate = cudf::ast::column_reference(joined_table->col_id("o_orderdate")); + auto o_orderdate_ref = cudf::ast::column_reference(joined_table->col_id("o_orderdate")); auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); auto o_orderdate_pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, - o_orderdate, + o_orderdate_ref, o_orderdate_lower_limit ); @@ -123,16 +123,16 @@ int main() { auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); auto o_orderdate_pred_b = cudf::ast::operation( cudf::ast::ast_operator::LESS, - o_orderdate, + o_orderdate_ref, o_orderdate_upper_limit ); - auto r_name = cudf::ast::column_reference(joined_table->col_id("r_name")); + auto r_name_ref = cudf::ast::column_reference(joined_table->col_id("r_name")); auto r_name_value = cudf::string_scalar("ASIA"); auto r_name_literal = cudf::ast::literal(r_name_value); auto r_name_pred = cudf::ast::operation( cudf::ast::ast_operator::EQUAL, - r_name, + r_name_ref, r_name_literal ); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 8f13a66dcfe..e6af6567c30 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -14,28 +14,9 @@ * limitations under the License. */ -#include -#include -#include -#include -#include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "utils.hpp" @@ -52,62 +33,66 @@ where and l_quantity < 24; */ -std::unique_ptr scan_filter_project() { - std::string lineitem = "/home/jayjeetc/tpch_sf1/lineitem/part-0.parquet"; - auto source = cudf::io::source_info(lineitem); - auto builder = cudf::io::parquet_reader_options_builder(source); - - std::vector cols = { - "l_extendedprice", - "l_discount", - "l_shipdate", - "l_quantity" - }; +std::unique_ptr calc_revenue(std::unique_ptr& table) { + auto extendedprice = table->column("l_extendedprice"); + auto discount = table->column("l_discount"); + auto extendedprice_mul_discount_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto extendedprice_mul_discount = cudf::binary_operation( + extendedprice, + discount, + cudf::binary_operator::MUL, + extendedprice_mul_discount_type + ); + return extendedprice_mul_discount; +} - auto extendedprice = cudf::ast::column_reference(0); - auto discount = cudf::ast::column_reference(1); - auto shipdate = cudf::ast::column_reference(2); - auto quantity = cudf::ast::column_reference(3); +int main() { + std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; - auto shipdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + // 1. Read out the `lineitem` table from parquet file + auto shipdate_ref = cudf::ast::column_reference(2); + auto shipdate_lower = cudf::timestamp_scalar( + days_since_epoch(1994, 1, 1), true); auto shipdate_lower_literal = cudf::ast::literal(shipdate_lower); - - auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); + auto shipdate_upper = cudf::timestamp_scalar( + days_since_epoch(1995, 1, 1), true); auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - - auto pred_a = cudf::ast::operation( + auto shipdate_pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, - shipdate, + shipdate_ref, shipdate_lower_literal ); - - auto pred_b = cudf::ast::operation( + auto shipdate_pred_b = cudf::ast::operation( cudf::ast::ast_operator::LESS, - shipdate, + shipdate_ref, shipdate_upper_literal ); - - auto pred_ab = cudf::ast::operation( + auto shipdate_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, - pred_a, - pred_b + shipdate_pred_a, + shipdate_pred_b + ); + auto lineitem = read_parquet( + dataset_dir + "lineitem/part-0.parquet", + {"l_extendedprice", "l_discount", "l_shipdate", "l_quantity"}, + std::move(shipdate_pred) ); - builder.columns(cols); - - // FIXME: since, ast does not support `fixed_point_scalar` yet, - // we just push down the date filters while scanning the parquet file. - builder.filter(pred_ab); - - auto options = builder.build(); - auto result = cudf::io::read_parquet(options); - return std::move(result.tbl); -} - -std::unique_ptr apply_filters(std::unique_ptr& table) { - // NOTE: apply the remaining filters based on the float32 casted columns - auto discount = cudf::ast::column_reference(4); - auto quantity = cudf::ast::column_reference(5); + // 2. Cast the discount and quantity columns to float32 and append to lineitem table + auto discout_float = cudf::cast( + lineitem->column("l_discount"), cudf::data_type{cudf::type_id::FLOAT32}); + auto quantity_float = cudf::cast( + lineitem->column("l_quantity"), cudf::data_type{cudf::type_id::FLOAT32}); + auto appended_table = lineitem + ->append(discout_float, "l_discount_float") + ->append(quantity_float, "l_quantity_float"); + + // 3. Apply the filters + auto discount_ref = cudf::ast::column_reference( + appended_table->col_id("l_discount_float")); + auto quantity_ref = cudf::ast::column_reference( + appended_table->col_id("l_quantity_float") + ); auto discount_lower = cudf::numeric_scalar(0.05); auto discount_lower_literal = cudf::ast::literal(discount_lower); @@ -117,75 +102,43 @@ std::unique_ptr apply_filters(std::unique_ptr& table) auto quantity_upper_literal = cudf::ast::literal(quantity_upper); auto discount_pred_a = cudf::ast::operation( - cudf::ast::ast_operator::GREATER_EQUAL, - discount, - discount_lower_literal - ); + cudf::ast::ast_operator::GREATER_EQUAL, + discount_ref, + discount_lower_literal + ); auto discount_pred_b = cudf::ast::operation( - cudf::ast::ast_operator::LESS_EQUAL, - discount, - discount_upper_literal - ); - + cudf::ast::ast_operator::LESS_EQUAL, + discount_ref, + discount_upper_literal + ); auto discount_pred = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, discount_pred_a, discount_pred_b ); - auto quantity_pred = cudf::ast::operation( cudf::ast::ast_operator::LESS, - quantity, + quantity_ref, quantity_upper_literal ); - auto discount_quantity_pred = cudf::ast::operation( cudf::ast::ast_operator::LOGICAL_AND, discount_pred, quantity_pred ); + auto filtered_table = apply_filter(appended_table, discount_quantity_pred); - auto boolean_mask = cudf::compute_column(table->view(), discount_quantity_pred); - return cudf::apply_boolean_mask(table->view(), boolean_mask->view()); -} + // 4. Calculate the `revenue` column + auto revenue = calc_revenue(filtered_table); -std::unique_ptr apply_reduction(std::unique_ptr& table) { - auto extendedprice = table->view().column(0); - auto discount = table->view().column(1); - - auto extendedprice_mul_discount_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; - auto extendedprice_mul_discount = cudf::binary_operation( - extendedprice, - discount, - cudf::binary_operator::MUL, - extendedprice_mul_discount_type + // 5. Sum the `revenue` column + auto revenue_view = revenue->view(); + auto result_table = apply_reduction( + revenue_view, + cudf::aggregation::Kind::SUM, + "revenue" ); - - auto const sum_agg = cudf::make_sum_aggregation(); - auto sum = cudf::reduce(extendedprice_mul_discount->view(), *sum_agg, extendedprice_mul_discount->type()); - - cudf::size_type len = 1; - auto col = cudf::make_column_from_scalar(*sum, len); - - std::vector> columns; - columns.push_back(std::move(col)); - auto result_table = std::make_unique(std::move(columns)); - return result_table; -} -int main() { - auto s = std::chrono::high_resolution_clock::now(); - - auto t1 = scan_filter_project(); - auto discout_float = cudf::cast(t1->view().column(1), cudf::data_type{cudf::type_id::FLOAT32}); - auto quantity_float = cudf::cast(t1->view().column(3), cudf::data_type{cudf::type_id::FLOAT32}); - auto t2 = append_col_to_table(t1, discout_float); - auto t3 = append_col_to_table(t2, quantity_float); - auto t4 = apply_filters(t3); - auto result_table = apply_reduction(t4); - - auto e = std::chrono::high_resolution_clock::now(); - std::cout << "q6: " << std::chrono::duration_cast(e - s).count() << "ms" << std::endl; - - write_parquet(result_table, create_table_metadata({"revenue"}), "q6.parquet"); + // 6. Write query result to a parquet file + result_table->to_parquet("q6.parquet"); return 0; } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index b253e5419ea..3a8dfb521df 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -29,7 +29,8 @@ #include #include #include - +#include +#include #include #include #include @@ -269,3 +270,18 @@ std::unique_ptr apply_orderby( return std::make_unique( std::move(result_table), table->columns()); } + +std::unique_ptr apply_reduction( + cudf::column_view& column, cudf::aggregation::Kind agg_kind, std::string col_name) { + auto agg = cudf::make_sum_aggregation(); + auto result = cudf::reduce(column, *agg, column.type()); + cudf::size_type len = 1; + auto col = cudf::make_column_from_scalar(*result, len); + std::vector> columns; + columns.push_back(std::move(col)); + auto result_table = std::make_unique(std::move(columns)); + std::vector col_names = {col_name}; + return std::make_unique( + std::move(result_table), col_names + ); +} From 12ddb7857992d7078056f8a0557215af7fd586a3 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 18:04:36 -0700 Subject: [PATCH 048/124] Extract base dataset dir into utils --- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 2 +- cpp/examples/tpch/utils.hpp | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 3f2079ec66d..527a19b544e 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -64,7 +64,7 @@ std::unique_ptr calc_charge(std::unique_ptr& tabl } int main() { - std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; + std::string dataset_dir = BASE_DATASET_DIR; // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(5); diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 690a5fe2138..12ee2f50d04 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -66,7 +66,7 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab } int main() { - std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; + std::string dataset_dir = BASE_DATASET_DIR; // 1. Read out the tables from parquet files auto customer = read_parquet(dataset_dir + "customer/part-0.parquet"); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index e6af6567c30..2f2bdb445b8 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -47,7 +47,7 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab } int main() { - std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; + std::string dataset_dir = BASE_DATASET_DIR; // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(2); diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index b76e7997f43..1890eccd3d8 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -80,7 +80,7 @@ std::unique_ptr calc_amount(std::unique_ptr& tabl } int main() { - std::string dataset_dir = "/home/jayjeetc/tpch_sf1/"; + std::string dataset_dir = BASE_DATASET_DIR; // 1. Read out the table from parquet files auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet"); diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 3a8dfb521df..30fcd49c659 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -36,6 +36,7 @@ #include #include +const std::string BASE_DATASET_DIR = "/home/jayjeetc/tpch_sf1/"; std::unique_ptr join_and_gather( cudf::table_view left_input, From c45577fcf77e8bc15f6ceecb8a7813da19a0b917 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 29 Jun 2024 18:56:21 -0700 Subject: [PATCH 049/124] Remove unnecessary rmm imports --- cpp/examples/tpch/utils.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 30fcd49c659..a52102a3321 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -31,10 +31,6 @@ #include #include #include -#include -#include -#include -#include const std::string BASE_DATASET_DIR = "/home/jayjeetc/tpch_sf1/"; From 07cfd3687d361871fe13514b29322cb3222014b7 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 1 Jul 2024 09:05:24 -0700 Subject: [PATCH 050/124] Rearrange utils.hpp --- cpp/examples/tpch/utils.hpp | 156 ++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index a52102a3321..267254fe198 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -34,46 +34,6 @@ const std::string BASE_DATASET_DIR = "/home/jayjeetc/tpch_sf1/"; -std::unique_ptr join_and_gather( - cudf::table_view left_input, - cudf::table_view right_input, - std::vector left_on, - std::vector right_on, - cudf::null_equality compare_nulls, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - - auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; - auto left_selected = left_input.select(left_on); - auto right_selected = right_input.select(right_on); - auto const [left_join_indices, right_join_indices] = - cudf::inner_join(left_selected, right_selected, compare_nulls, mr); - - auto left_indices_span = cudf::device_span{*left_join_indices}; - auto right_indices_span = cudf::device_span{*right_join_indices}; - - auto left_indices_col = cudf::column_view{left_indices_span}; - auto right_indices_col = cudf::column_view{right_indices_span}; - - auto left_result = cudf::gather(left_input, left_indices_col, oob_policy); - auto right_result = cudf::gather(right_input, right_indices_col, oob_policy); - - auto joined_cols = left_result->release(); - auto right_cols = right_result->release(); - joined_cols.insert(joined_cols.end(), - std::make_move_iterator(right_cols.begin()), - std::make_move_iterator(right_cols.end())); - return std::make_unique(std::move(joined_cols)); -} - -template -std::vector concat(const std::vector& lhs, const std::vector& rhs) { - std::vector result; - result.reserve(lhs.size() + rhs.size()); - std::copy(lhs.begin(), lhs.end(), std::back_inserter(result)); - std::copy(rhs.begin(), rhs.end(), std::back_inserter(result)); - return result; -} - class table_with_cols { public: table_with_cols( @@ -133,6 +93,46 @@ class table_with_cols { std::vector col_names; }; +template +std::vector concat(const std::vector& lhs, const std::vector& rhs) { + std::vector result; + result.reserve(lhs.size() + rhs.size()); + std::copy(lhs.begin(), lhs.end(), std::back_inserter(result)); + std::copy(rhs.begin(), rhs.end(), std::back_inserter(result)); + return result; +} + +std::unique_ptr join_and_gather( + cudf::table_view left_input, + cudf::table_view right_input, + std::vector left_on, + std::vector right_on, + cudf::null_equality compare_nulls, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { + + auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; + auto left_selected = left_input.select(left_on); + auto right_selected = right_input.select(right_on); + auto const [left_join_indices, right_join_indices] = + cudf::inner_join(left_selected, right_selected, compare_nulls, mr); + + auto left_indices_span = cudf::device_span{*left_join_indices}; + auto right_indices_span = cudf::device_span{*right_join_indices}; + + auto left_indices_col = cudf::column_view{left_indices_span}; + auto right_indices_col = cudf::column_view{right_indices_span}; + + auto left_result = cudf::gather(left_input, left_indices_col, oob_policy); + auto right_result = cudf::gather(right_input, right_indices_col, oob_policy); + + auto joined_cols = left_result->release(); + auto right_cols = right_result->release(); + joined_cols.insert(joined_cols.end(), + std::make_move_iterator(right_cols.begin()), + std::make_move_iterator(right_cols.end())); + return std::make_unique(std::move(joined_cols)); +} + std::unique_ptr apply_inner_join( std::unique_ptr& left_input, std::unique_ptr& right_input, @@ -155,27 +155,6 @@ std::unique_ptr apply_inner_join( concat(left_input->columns(), right_input->columns())); } -std::unique_ptr read_parquet( - std::string filename, std::vector columns = {}, std::unique_ptr predicate = nullptr) { - auto source = cudf::io::source_info(filename); - auto builder = cudf::io::parquet_reader_options_builder(source); - if (columns.size()) { - builder.columns(columns); - } - if (predicate) { - builder.filter(*predicate); - } - auto options = builder.build(); - auto table_with_metadata = cudf::io::read_parquet(options); - auto schema_info = table_with_metadata.metadata.schema_info; - std::vector column_names; - for (auto &col_info : schema_info) { - column_names.push_back(col_info.name); - } - return std::make_unique( - std::move(table_with_metadata.tbl), column_names); -} - std::unique_ptr apply_filter( std::unique_ptr& table, cudf::ast::operation& predicate) { auto boolean_mask = cudf::compute_column(table->table(), predicate); @@ -234,23 +213,6 @@ std::unique_ptr apply_groupby( std::move(result_table), result_column_names); } -std::tm make_tm(int year, int month, int day) { - std::tm tm = {0}; - tm.tm_year = year - 1900; - tm.tm_mon = month - 1; - tm.tm_mday = day; - return tm; -} - -int32_t days_since_epoch(int year, int month, int day) { - std::tm tm = make_tm(year, month, day); - std::tm epoch = make_tm(1970, 1, 1); - std::time_t time = std::mktime(&tm); - std::time_t epoch_time = std::mktime(&epoch); - double diff = std::difftime(time, epoch_time) / (60*60*24); - return static_cast(diff); -} - std::unique_ptr apply_orderby( std::unique_ptr& table, std::vector sort_keys, @@ -282,3 +244,41 @@ std::unique_ptr apply_reduction( std::move(result_table), col_names ); } + +std::unique_ptr read_parquet( + std::string filename, std::vector columns = {}, std::unique_ptr predicate = nullptr) { + auto source = cudf::io::source_info(filename); + auto builder = cudf::io::parquet_reader_options_builder(source); + if (columns.size()) { + builder.columns(columns); + } + if (predicate) { + builder.filter(*predicate); + } + auto options = builder.build(); + auto table_with_metadata = cudf::io::read_parquet(options); + auto schema_info = table_with_metadata.metadata.schema_info; + std::vector column_names; + for (auto &col_info : schema_info) { + column_names.push_back(col_info.name); + } + return std::make_unique( + std::move(table_with_metadata.tbl), column_names); +} + +std::tm make_tm(int year, int month, int day) { + std::tm tm = {0}; + tm.tm_year = year - 1900; + tm.tm_mon = month - 1; + tm.tm_mday = day; + return tm; +} + +int32_t days_since_epoch(int year, int month, int day) { + std::tm tm = make_tm(year, month, day); + std::tm epoch = make_tm(1970, 1, 1); + std::time_t time = std::mktime(&tm); + std::time_t epoch_time = std::mktime(&epoch); + double diff = std::difftime(time, epoch_time) / (60*60*24); + return static_cast(diff); +} From da87ff47f325d15519a74985ddf6349de6b0bb0d Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 1 Jul 2024 20:08:05 -0700 Subject: [PATCH 051/124] Push down projections into read parquet --- cpp/examples/tpch/q5.cpp | 12 ++++++------ cpp/examples/tpch/q9.cpp | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 12ee2f50d04..95d39ca5bad 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -69,12 +69,12 @@ int main() { std::string dataset_dir = BASE_DATASET_DIR; // 1. Read out the tables from parquet files - auto customer = read_parquet(dataset_dir + "customer/part-0.parquet"); - auto orders = read_parquet(dataset_dir + "orders/part-0.parquet"); - auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet"); - auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet"); - auto nation = read_parquet(dataset_dir + "nation/part-0.parquet"); - auto region = read_parquet(dataset_dir + "region/part-0.parquet"); + auto customer = read_parquet(dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); + auto orders = read_parquet(dataset_dir + "orders/part-0.parquet", {"o_custkey", "o_orderkey", "o_orderdate"}); + auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet", {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); + auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); + auto nation = read_parquet(dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_regionkey", "n_name"}); + auto region = read_parquet(dataset_dir + "region/part-0.parquet", {"r_regionkey", "r_name"}); // 2. Perform the joins auto join_a = apply_inner_join( diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 1890eccd3d8..3f45d2cd271 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -83,12 +83,12 @@ int main() { std::string dataset_dir = BASE_DATASET_DIR; // 1. Read out the table from parquet files - auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet"); - auto nation = read_parquet(dataset_dir + "nation/part-0.parquet"); - auto orders = read_parquet(dataset_dir + "orders/part-0.parquet"); - auto part = read_parquet(dataset_dir + "part/part-0.parquet"); - auto partsupp = read_parquet(dataset_dir + "partsupp/part-0.parquet"); - auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet"); + auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet", {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); + auto nation = read_parquet(dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_name"}); + auto orders = read_parquet(dataset_dir + "orders/part-0.parquet", {"o_orderkey", "o_orderdate"}); + auto part = read_parquet(dataset_dir + "part/part-0.parquet", {"p_partkey", "p_name"}); + auto partsupp = read_parquet(dataset_dir + "partsupp/part-0.parquet", {"ps_suppkey", "ps_partkey", "ps_supplycost"}); + auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); // 2. Generating the `profit` table // 2.1 Filter the part table using `p_name like '%green%'` From a423ea2b433772cc62a69cc247a6b0faccf42cb6 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 1 Jul 2024 20:25:58 -0700 Subject: [PATCH 052/124] Add comments to utils.hpp --- cpp/examples/tpch/utils.hpp | 82 +++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 267254fe198..3250542cb01 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -32,8 +32,13 @@ #include #include + +// The base directory for the TPC-H dataset const std::string BASE_DATASET_DIR = "/home/jayjeetc/tpch_sf1/"; +/** + * @brief A class to represent a table with column names attached + */ class table_with_cols { public: table_with_cols( @@ -42,12 +47,25 @@ class table_with_cols { cudf::table_view table() { return tbl->view(); } + /** + * @brief Return the column view for a given column name + * + * @param col_name The name of the column + */ cudf::column_view column(std::string col_name) { return tbl->view().column(col_id(col_name)); } + /** + * @param Return the column names of the table + */ std::vector columns() { return col_names; } + /** + * @brief Translate a column name to a column index + * + * @param col_name The name of the column + */ cudf::size_type col_id(std::string col_name) { auto it = std::find(col_names.begin(), col_names.end(), col_name); if (it == col_names.end()) { @@ -55,6 +73,12 @@ class table_with_cols { } return std::distance(col_names.begin(), it); } + /** + * @brief Append a column to the table + * + * @param col The column to append + * @param col_name The name of the appended column + */ std::unique_ptr append(std::unique_ptr& col, std::string col_name) { std::vector> updated_cols; std::vector updated_col_names; @@ -67,6 +91,11 @@ class table_with_cols { auto updated_table = std::make_unique(std::move(updated_cols)); return std::make_unique(std::move(updated_table), updated_col_names); } + /** + * @brief Select a subset of columns from the table + * + * @param col_names The names of the columns to select + */ cudf::table_view select(std::vector col_names) { std::vector col_indices; for (auto &col_name : col_names) { @@ -74,6 +103,11 @@ class table_with_cols { } return tbl->select(col_indices); } + /** + * @brief Write the table to a parquet file + * + * @param filepath The path to the parquet file + */ void to_parquet(std::string filepath) { auto sink_info = cudf::io::sink_info(filepath); cudf::io::table_metadata metadata; @@ -133,6 +167,15 @@ std::unique_ptr join_and_gather( return std::make_unique(std::move(joined_cols)); } +/** + * @brief Apply an inner join operation to two tables + * + * @param left_input The left input table + * @param right_input The right input table + * @param left_on The columns to join on in the left table + * @param right_on The columns to join on in the right table + * @param compare_nulls The null equality policy + */ std::unique_ptr apply_inner_join( std::unique_ptr& left_input, std::unique_ptr& right_input, @@ -155,6 +198,12 @@ std::unique_ptr apply_inner_join( concat(left_input->columns(), right_input->columns())); } +/** + * @brief Apply a filter predicated to a table + * + * @param table The input table + * @param predicate The filter predicate + */ std::unique_ptr apply_filter( std::unique_ptr& table, cudf::ast::operation& predicate) { auto boolean_mask = cudf::compute_column(table->table(), predicate); @@ -162,6 +211,12 @@ std::unique_ptr apply_filter( return std::make_unique(std::move(result_table), table->columns()); } +/** + * @brief Apply a boolean mask to a table + * + * @param table The input table + * @param mask The boolean mask + */ std::unique_ptr apply_mask( std::unique_ptr& table, std::unique_ptr& mask) { auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); @@ -173,6 +228,12 @@ struct groupby_context { std::unordered_map>> values; }; +/** + * @brief Apply a groupby operation to a table + * + * @param table The input table + * @param ctx The groupby context + */ std::unique_ptr apply_groupby( std::unique_ptr& table, groupby_context ctx) { auto keys = table->select(ctx.keys); @@ -213,6 +274,13 @@ std::unique_ptr apply_groupby( std::move(result_table), result_column_names); } +/** + * @brief Apply an order by operation to a table + * + * @param table The input table + * @param sort_keys The sort keys + * @param sort_key_orders The sort key orders + */ std::unique_ptr apply_orderby( std::unique_ptr& table, std::vector sort_keys, @@ -230,6 +298,13 @@ std::unique_ptr apply_orderby( std::move(result_table), table->columns()); } +/** + * @brief Apply a reduction operation to a column + * + * @param column The input column + * @param agg_kind The aggregation kind + * @param col_name The name of the output column + */ std::unique_ptr apply_reduction( cudf::column_view& column, cudf::aggregation::Kind agg_kind, std::string col_name) { auto agg = cudf::make_sum_aggregation(); @@ -245,6 +320,13 @@ std::unique_ptr apply_reduction( ); } +/** + * @brief Read a parquet file into a table + * + * @param filename The path to the parquet file + * @param columns The columns to read + * @param predicate The filter predicate to pushdown + */ std::unique_ptr read_parquet( std::string filename, std::vector columns = {}, std::unique_ptr predicate = nullptr) { auto source = cudf::io::source_info(filename); From 5312f345c992e6406bf494557d29e541369be83e Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 1 Jul 2024 16:46:52 -0700 Subject: [PATCH 053/124] Remove implementation status from README --- cpp/examples/tpch/README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index 54fd0b2ba08..1aa3f6bd86c 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -54,10 +54,3 @@ The TPC-H query binaries would be built inside `examples/tpch/build`. ./tpch/build/tpch_q1 ``` A parquet file named `q1.parquet` would be generated holding the results of the query. - -## Implementation Status - -- [x] Q1 -- [x] Q5 -- [x] Q6 -- [x] Q9 From 6ef9d120e0fc58664881d7e1ee6e254238547059 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 12:53:51 -0700 Subject: [PATCH 054/124] Add nvtx ranges to helper functions --- cpp/examples/tpch/utils.hpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 3250542cb01..8008781afe7 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -32,6 +32,8 @@ #include #include +#include + // The base directory for the TPC-H dataset const std::string BASE_DATASET_DIR = "/home/jayjeetc/tpch_sf1/"; @@ -143,7 +145,7 @@ std::unique_ptr join_and_gather( std::vector right_on, cudf::null_equality compare_nulls, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - + CUDF_FUNC_RANGE(); auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; auto left_selected = left_input.select(left_on); auto right_selected = right_input.select(right_on); @@ -182,6 +184,7 @@ std::unique_ptr apply_inner_join( std::vector left_on, std::vector right_on, cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) { + CUDF_FUNC_RANGE(); std::vector left_on_indices; std::vector right_on_indices; for (auto &col_name : left_on) { @@ -206,6 +209,7 @@ std::unique_ptr apply_inner_join( */ std::unique_ptr apply_filter( std::unique_ptr& table, cudf::ast::operation& predicate) { + CUDF_FUNC_RANGE(); auto boolean_mask = cudf::compute_column(table->table(), predicate); auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); return std::make_unique(std::move(result_table), table->columns()); @@ -219,6 +223,7 @@ std::unique_ptr apply_filter( */ std::unique_ptr apply_mask( std::unique_ptr& table, std::unique_ptr& mask) { + CUDF_FUNC_RANGE(); auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); return std::make_unique(std::move(result_table), table->columns()); } @@ -236,6 +241,7 @@ struct groupby_context { */ std::unique_ptr apply_groupby( std::unique_ptr& table, groupby_context ctx) { + CUDF_FUNC_RANGE(); auto keys = table->select(ctx.keys); cudf::groupby::groupby groupby_obj(keys); std::vector result_column_names; @@ -285,6 +291,7 @@ std::unique_ptr apply_orderby( std::unique_ptr& table, std::vector sort_keys, std::vector sort_key_orders) { + CUDF_FUNC_RANGE(); std::vector column_views; for (auto& key : sort_keys) { column_views.push_back(table->column(key)); @@ -307,6 +314,7 @@ std::unique_ptr apply_orderby( */ std::unique_ptr apply_reduction( cudf::column_view& column, cudf::aggregation::Kind agg_kind, std::string col_name) { + CUDF_FUNC_RANGE(); auto agg = cudf::make_sum_aggregation(); auto result = cudf::reduce(column, *agg, column.type()); cudf::size_type len = 1; @@ -329,6 +337,7 @@ std::unique_ptr apply_reduction( */ std::unique_ptr read_parquet( std::string filename, std::vector columns = {}, std::unique_ptr predicate = nullptr) { + CUDF_FUNC_RANGE(); auto source = cudf::io::source_info(filename); auto builder = cudf::io::parquet_reader_options_builder(source); if (columns.size()) { From 903cb67a37a8241b4ed704f391c2a7f5459a678d Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 13:35:30 -0700 Subject: [PATCH 055/124] Add more nvtx ranges --- cpp/examples/tpch/utils.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 8008781afe7..94e5efc5156 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -69,6 +69,7 @@ class table_with_cols { * @param col_name The name of the column */ cudf::size_type col_id(std::string col_name) { + CUDF_FUNC_RANGE(); auto it = std::find(col_names.begin(), col_names.end(), col_name); if (it == col_names.end()) { throw std::runtime_error("Column not found"); @@ -82,6 +83,7 @@ class table_with_cols { * @param col_name The name of the appended column */ std::unique_ptr append(std::unique_ptr& col, std::string col_name) { + CUDF_FUNC_RANGE(); std::vector> updated_cols; std::vector updated_col_names; for (size_t i = 0; i < tbl->num_columns(); i++) { @@ -99,6 +101,7 @@ class table_with_cols { * @param col_names The names of the columns to select */ cudf::table_view select(std::vector col_names) { + CUDF_FUNC_RANGE(); std::vector col_indices; for (auto &col_name : col_names) { col_indices.push_back(col_id(col_name)); @@ -111,6 +114,7 @@ class table_with_cols { * @param filepath The path to the parquet file */ void to_parquet(std::string filepath) { + CUDF_FUNC_RANGE(); auto sink_info = cudf::io::sink_info(filepath); cudf::io::table_metadata metadata; std::vector col_name_infos; From c0be3198c67ba26b96b1a60343ebc85899e6f50e Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 13:53:34 -0700 Subject: [PATCH 056/124] Add script to run the benchmarks --- cpp/examples/tpch/run.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 cpp/examples/tpch/run.sh diff --git a/cpp/examples/tpch/run.sh b/cpp/examples/tpch/run.sh new file mode 100755 index 00000000000..b9c4dae9f8a --- /dev/null +++ b/cpp/examples/tpch/run.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +query_no=$1 + +if [ -z "$query_no" ]; then + echo "Usage: $0 " + exit 1 +fi + +# Set up environment +export KVIKIO_COMPAT_MODE="on" +export LIBCUDF_CUFILE_POLICY="KVIKIO" + +./tpch/build/tpch_q${query_no} From 9ed2a09fe5dba1f68bfe8fbf92f42a07678e95a5 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 14:06:34 -0700 Subject: [PATCH 057/124] Fix README --- cpp/examples/tpch/README.md | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index 1aa3f6bd86c..0bfb1941a9b 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -1,16 +1,12 @@ # TPC-H Inspired Examples -Implementing the TPC-H queries using `libcudf`. +Implementing the TPC-H queries using `libcudf`. We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in the form of Parquet files. -## Data Generation - -We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in the form of Parquet files. - -### Requirements +## Requirements - Rust -### Steps +## Generating the Dataset 1. Clone the datafusion repository. ```bash @@ -26,30 +22,16 @@ cd datafusion/benchmarks/ ./bench.sh data tpch10 ``` -## Executing Queries - -### Steps - -1. Clone the cudf repository. -```bash -git clone git@github.com:JayjeetAtGithub/cudf.git -git checkout tpch-bench -``` - -2. Build `libcudf`. -```bash -cd cudf/ -./build.sh libcudf -``` +## Running Queries -3. Build the examples. +1. Build the examples. ```bash cd cpp/examples ./build.sh ``` The TPC-H query binaries would be built inside `examples/tpch/build`. -4. Execute the queries. +2. Execute the queries. ```bash ./tpch/build/tpch_q1 ``` From 7145fe88f051da0fd1ec69055962853d6b351e7e Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 14:45:52 -0700 Subject: [PATCH 058/124] Pass the base dir as a cli arg --- cpp/examples/tpch/q1.cpp | 5 +++-- cpp/examples/tpch/q5.cpp | 7 ++++--- cpp/examples/tpch/q6.cpp | 5 +++-- cpp/examples/tpch/q9.cpp | 5 +++-- cpp/examples/tpch/utils.hpp | 11 +++++++---- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 527a19b544e..183e477cb3e 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -63,8 +63,9 @@ std::unique_ptr calc_charge(std::unique_ptr& tabl return charge; } -int main() { - std::string dataset_dir = BASE_DATASET_DIR; +int main(int argc, char const** argv) { + check_args(argc, argv); + std::string dataset_dir = argv[1]; // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(5); diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 95d39ca5bad..53b403675b1 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -65,9 +65,10 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab return disc_price; } -int main() { - std::string dataset_dir = BASE_DATASET_DIR; - +int main(int argc, char const** argv) { + check_args(argc, argv); + std::string dataset_dir = argv[1]; + // 1. Read out the tables from parquet files auto customer = read_parquet(dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); auto orders = read_parquet(dataset_dir + "orders/part-0.parquet", {"o_custkey", "o_orderkey", "o_orderdate"}); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 2f2bdb445b8..fc174e53ced 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -46,8 +46,9 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab return extendedprice_mul_discount; } -int main() { - std::string dataset_dir = BASE_DATASET_DIR; +int main(int argc, char const** argv) { + check_args(argc, argv); + std::string dataset_dir = argv[1]; // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(2); diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 3f45d2cd271..cd88da17a61 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -79,8 +79,9 @@ std::unique_ptr calc_amount(std::unique_ptr& tabl return amount; } -int main() { - std::string dataset_dir = BASE_DATASET_DIR; +int main(int argc, char const** argv) { + check_args(argc, argv); + std::string dataset_dir = argv[1]; // 1. Read out the table from parquet files auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet", {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 94e5efc5156..62a394f9d84 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -31,13 +31,9 @@ #include #include #include - #include -// The base directory for the TPC-H dataset -const std::string BASE_DATASET_DIR = "/home/jayjeetc/tpch_sf1/"; - /** * @brief A class to represent a table with column names attached */ @@ -377,3 +373,10 @@ int32_t days_since_epoch(int year, int month, int day) { double diff = std::difftime(time, epoch_time) / (60*60*24); return static_cast(diff); } + +void check_args(int argc, char const** argv) { + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + exit(1); + } +} From bbf40e247bbf4e40d37728f79e883ee11db6e04e Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 14:53:33 -0700 Subject: [PATCH 059/124] Pass dataset path as an argument to tpch/run.sh --- cpp/examples/tpch/run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/examples/tpch/run.sh b/cpp/examples/tpch/run.sh index b9c4dae9f8a..30cdff62729 100755 --- a/cpp/examples/tpch/run.sh +++ b/cpp/examples/tpch/run.sh @@ -2,6 +2,7 @@ set -e query_no=$1 +dataset_path=$2 if [ -z "$query_no" ]; then echo "Usage: $0 " @@ -12,4 +13,4 @@ fi export KVIKIO_COMPAT_MODE="on" export LIBCUDF_CUFILE_POLICY="KVIKIO" -./tpch/build/tpch_q${query_no} +./tpch/build/tpch_q${query_no} ${dataset_path} From a8b8255743f6b6000264471a3796a8d6e35a48ba Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 15:12:32 -0700 Subject: [PATCH 060/124] Use memory resource for q1 --- cpp/examples/tpch/q1.cpp | 4 +++- cpp/examples/tpch/utils.hpp | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 183e477cb3e..ed925b5df76 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -65,8 +65,10 @@ std::unique_ptr calc_charge(std::unique_ptr& tabl int main(int argc, char const** argv) { check_args(argc, argv); + use_memory_pool(); + std::string dataset_dir = argv[1]; - + // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(5); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 62a394f9d84..02e8d9fe029 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -33,6 +33,17 @@ #include #include +#include +#include +#include +#include + + +void use_memory_pool() { + rmm::mr::cuda_memory_resource cuda_mr{}; + rmm::mr::pool_memory_resource mr{&cuda_mr, rmm::percent_of_free_device_memory(100)}; + rmm::mr::set_current_device_resource(&mr); +} /** * @brief A class to represent a table with column names attached From 8f66bfb619cfd079bed0317b1b40f249af953085 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 15:35:49 -0700 Subject: [PATCH 061/124] Use a memory pool --- cpp/examples/tpch/q1.cpp | 5 +++-- cpp/examples/tpch/q5.cpp | 3 +++ cpp/examples/tpch/q6.cpp | 3 +++ cpp/examples/tpch/q9.cpp | 3 +++ cpp/examples/tpch/utils.hpp | 19 +++++++++++++++---- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index ed925b5df76..1e77da42616 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -65,10 +65,11 @@ std::unique_ptr calc_charge(std::unique_ptr& tabl int main(int argc, char const** argv) { check_args(argc, argv); - use_memory_pool(); - std::string dataset_dir = argv[1]; + auto resource = create_memory_resource(true); + rmm::mr::set_current_device_resource(resource.get()); + // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(5); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 53b403675b1..2bc7f9733ab 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -69,6 +69,9 @@ int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + auto resource = create_memory_resource(true); + rmm::mr::set_current_device_resource(resource.get()); + // 1. Read out the tables from parquet files auto customer = read_parquet(dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); auto orders = read_parquet(dataset_dir + "orders/part-0.parquet", {"o_custkey", "o_orderkey", "o_orderdate"}); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index fc174e53ced..823adcd9fa7 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -50,6 +50,9 @@ int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + auto resource = create_memory_resource(true); + rmm::mr::set_current_device_resource(resource.get()); + // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(2); auto shipdate_lower = cudf::timestamp_scalar( diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index cd88da17a61..46a959572e3 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -83,6 +83,9 @@ int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + auto resource = create_memory_resource(true); + rmm::mr::set_current_device_resource(resource.get()); + // 1. Read out the table from parquet files auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet", {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); auto nation = read_parquet(dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_name"}); diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 02e8d9fe029..25db2a62296 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -37,12 +37,23 @@ #include #include #include +#include -void use_memory_pool() { - rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr, rmm::percent_of_free_device_memory(100)}; - rmm::mr::set_current_device_resource(&mr); +/** + * @brief Create memory resource for libcudf functions + * + * @param pool Whether to use a pool memory resource. + * @return Memory resource instance + */ +std::shared_ptr create_memory_resource(bool pool) +{ + auto cuda_mr = std::make_shared(); + if (pool) { + return rmm::mr::make_owning_wrapper( + cuda_mr, rmm::percent_of_free_device_memory(50)); + } + return cuda_mr; } /** From 1335ff97cc8be3fcfe88911b09e5a9e881db826f Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 17:23:10 -0700 Subject: [PATCH 062/124] Add info to check args --- cpp/examples/tpch/utils.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 25db2a62296..0b67619b065 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -399,6 +399,9 @@ int32_t days_since_epoch(int year, int month, int day) { void check_args(int argc, char const** argv) { if (argc < 2) { std::cerr << "Usage: " << argv[0] << " " << std::endl; + std::cerr << std::endl; + std::cerr << "The query result will be saved to a parquet file named " << + "q{query_no}.parquet in the current working directory." << std::endl; exit(1); } } From e105141b2c22dc428dd32229d445183ccdaafe98 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 18:12:54 -0700 Subject: [PATCH 063/124] Measure the query execution time using the Timer implementation from parquet_io --- cpp/examples/tpch/q1.cpp | 5 +++++ cpp/examples/tpch/q5.cpp | 7 ++++++- cpp/examples/tpch/q6.cpp | 5 +++++ cpp/examples/tpch/q9.cpp | 5 +++++ cpp/examples/tpch/utils.hpp | 31 +++++++++++++++++++++++++++++++ 5 files changed, 52 insertions(+), 1 deletion(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 1e77da42616..b0b89adbba4 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -67,9 +67,12 @@ int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + // Use a memory pool auto resource = create_memory_resource(true); rmm::mr::set_current_device_resource(resource.get()); + Timer timer; + // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(5); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); @@ -135,6 +138,8 @@ int main(int argc, char const** argv) { // 4. Perform the order by operation auto orderedby_table = apply_orderby(groupedby_table, {"l_returnflag", "l_linestatus"}, {cudf::order::ASCENDING, cudf::order::ASCENDING}); + + timer.print_elapsed_millis(); // 5. Write query result to a parquet file orderedby_table->to_parquet("q1.parquet"); diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 2bc7f9733ab..eda2c47f302 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -69,9 +69,12 @@ int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + // Use a memory pool auto resource = create_memory_resource(true); rmm::mr::set_current_device_resource(resource.get()); + Timer timer; + // 1. Read out the tables from parquet files auto customer = read_parquet(dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); auto orders = read_parquet(dataset_dir + "orders/part-0.parquet", {"o_custkey", "o_orderkey", "o_orderdate"}); @@ -167,7 +170,9 @@ int main(int argc, char const** argv) { auto groupedby_table = apply_groupby(appended_table, ctx); auto orderedby_table = apply_orderby( groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); - + + timer.print_elapsed_millis(); + // 6. Write query result to a parquet file orderedby_table->to_parquet("q5.parquet"); return 0; diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 823adcd9fa7..c8b87a3980d 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -50,9 +50,12 @@ int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + // Use a memory pool auto resource = create_memory_resource(true); rmm::mr::set_current_device_resource(resource.get()); + Timer timer; + // 1. Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(2); auto shipdate_lower = cudf::timestamp_scalar( @@ -142,6 +145,8 @@ int main(int argc, char const** argv) { "revenue" ); + timer.print_elapsed_millis(); + // 6. Write query result to a parquet file result_table->to_parquet("q6.parquet"); return 0; diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 46a959572e3..bc9dbe490f6 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -83,9 +83,12 @@ int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + // Use a memory pool auto resource = create_memory_resource(true); rmm::mr::set_current_device_resource(resource.get()); + Timer timer; + // 1. Read out the table from parquet files auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet", {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); auto nation = read_parquet(dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_name"}); @@ -168,6 +171,8 @@ int main(int argc, char const** argv) { {cudf::order::ASCENDING, cudf::order::DESCENDING} ); + timer.print_elapsed_millis(); + // 5. Write query result to a parquet file orderedby_table->to_parquet("q9.parquet"); } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 0b67619b065..00919e6f809 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -405,3 +405,34 @@ void check_args(int argc, char const** argv) { exit(1); } } + +/** + * @brief Light-weight timer for parquet reader and writer instrumentation + * + * Timer object constructed from std::chrono, instrumenting at microseconds + * precision. Can display elapsed durations at milli and micro second + * scales. Timer starts at object construction. + */ +class Timer { + public: + using micros = std::chrono::microseconds; + using millis = std::chrono::milliseconds; + + Timer() { reset(); } + void reset() { start_time = std::chrono::high_resolution_clock::now(); } + auto elapsed() { return (std::chrono::high_resolution_clock::now() - start_time); } + void print_elapsed_micros() + { + std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() + << "us\n\n"; + } + void print_elapsed_millis() + { + std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() + << "ms\n\n"; + } + + private: + using time_point_t = std::chrono::time_point; + time_point_t start_time; +}; From 33b90bae1bb5bf1c5f36bb0fe894dcca685f4cf6 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 22:09:16 -0700 Subject: [PATCH 064/124] Turn on/off memory pol usage --- cpp/examples/tpch/q1.cpp | 3 ++- cpp/examples/tpch/q5.cpp | 3 ++- cpp/examples/tpch/q6.cpp | 3 ++- cpp/examples/tpch/q9.cpp | 3 ++- cpp/examples/tpch/run.sh | 7 ++++--- cpp/examples/tpch/utils.hpp | 4 ++-- 6 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index b0b89adbba4..44413c9da19 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -66,9 +66,10 @@ std::unique_ptr calc_charge(std::unique_ptr& tabl int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + bool use_memory_pool = std::stoi(argv[2]); // Use a memory pool - auto resource = create_memory_resource(true); + auto resource = create_memory_resource(use_memory_pool); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index eda2c47f302..bde96ab39a0 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -68,9 +68,10 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + bool use_memory_pool = std::stoi(argv[2]); // Use a memory pool - auto resource = create_memory_resource(true); + auto resource = create_memory_resource(use_memory_pool); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index c8b87a3980d..5032906140c 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -49,9 +49,10 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + bool use_memory_pool = std::stoi(argv[2]); // Use a memory pool - auto resource = create_memory_resource(true); + auto resource = create_memory_resource(use_memory_pool); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index bc9dbe490f6..3de11a508fa 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -82,9 +82,10 @@ std::unique_ptr calc_amount(std::unique_ptr& tabl int main(int argc, char const** argv) { check_args(argc, argv); std::string dataset_dir = argv[1]; + bool use_memory_pool = std::stoi(argv[2]); // Use a memory pool - auto resource = create_memory_resource(true); + auto resource = create_memory_resource(use_memory_pool); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/run.sh b/cpp/examples/tpch/run.sh index 30cdff62729..23989e40787 100755 --- a/cpp/examples/tpch/run.sh +++ b/cpp/examples/tpch/run.sh @@ -2,10 +2,11 @@ set -e query_no=$1 -dataset_path=$2 +dataset_dir=$2 +use_memory_pool=$3 if [ -z "$query_no" ]; then - echo "Usage: $0 " + echo "Usage: $0 " exit 1 fi @@ -13,4 +14,4 @@ fi export KVIKIO_COMPAT_MODE="on" export LIBCUDF_CUFILE_POLICY="KVIKIO" -./tpch/build/tpch_q${query_no} ${dataset_path} +./tpch/build/tpch_q${query_no} ${dataset_dir} ${use_memory_pool} diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 00919e6f809..66d6ed6fca8 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -397,8 +397,8 @@ int32_t days_since_epoch(int year, int month, int day) { } void check_args(int argc, char const** argv) { - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " " << std::endl; + if (argc < 3) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; std::cerr << std::endl; std::cerr << "The query result will be saved to a parquet file named " << "q{query_no}.parquet in the current working directory." << std::endl; From 5dda3173205a5f6de404f240d9507613ad2af258 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 2 Jul 2024 22:16:32 -0700 Subject: [PATCH 065/124] Add view creation sql to q1/q6 --- cpp/examples/tpch/q1.cpp | 4 +++- cpp/examples/tpch/q6.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 44413c9da19..d2fbbdf71a8 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -21,6 +21,8 @@ #include "utils.hpp" /* +create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; + select l_returnflag, l_linestatus, @@ -33,7 +35,7 @@ select avg(l_discount) as avg_disc, count(*) as count_order from - '~/tpch_sf1/lineitem/part-0.parquet' + lineitem where l_shipdate <= date '1998-09-02' group by diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 5032906140c..5643237e9f4 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -21,10 +21,12 @@ #include "utils.hpp" /* +create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; + select sum(l_extendedprice * l_discount) as revenue from - '~/tpch_sf1/lineitem/part-0.parquet' + lineitem where l_shipdate >= date '1994-01-01' and l_shipdate < date '1995-01-01' From 684e5e2949424fc6b64ad3804fa6d06add9ab7c0 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 11:50:39 -0700 Subject: [PATCH 066/124] Push down filters in Q5 --- cpp/examples/tpch/q5.cpp | 111 +++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 51 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index bde96ab39a0..87fdb79e884 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -77,12 +77,62 @@ int main(int argc, char const** argv) { Timer timer; // 1. Read out the tables from parquet files - auto customer = read_parquet(dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); - auto orders = read_parquet(dataset_dir + "orders/part-0.parquet", {"o_custkey", "o_orderkey", "o_orderdate"}); - auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet", {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); - auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); - auto nation = read_parquet(dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_regionkey", "n_name"}); - auto region = read_parquet(dataset_dir + "region/part-0.parquet", {"r_regionkey", "r_name"}); + // while pushing down column projections and filter predicates + auto o_orderdate_ref = cudf::ast::column_reference(2); + + auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); + auto o_orderdate_pred_a = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, + o_orderdate_ref, + o_orderdate_lower_limit + ); + + auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); + auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); + auto o_orderdate_pred_b = cudf::ast::operation( + cudf::ast::ast_operator::LESS, + o_orderdate_ref, + o_orderdate_upper_limit + ); + + auto o_orderdate_pred = std::make_unique( + cudf::ast::ast_operator::LOGICAL_AND, + o_orderdate_pred_a, + o_orderdate_pred_b + ); + + auto r_name_ref = cudf::ast::column_reference(1); + auto r_name_value = cudf::string_scalar("ASIA"); + auto r_name_literal = cudf::ast::literal(r_name_value); + auto r_name_pred = std::make_unique( + cudf::ast::ast_operator::EQUAL, + r_name_ref, + r_name_literal + ); + + auto customer = read_parquet( + dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); + auto orders = read_parquet( + dataset_dir + "orders/part-0.parquet", + {"o_custkey", "o_orderkey", "o_orderdate"}, + std::move(o_orderdate_pred) + ); + auto lineitem = read_parquet( + dataset_dir + "lineitem/part-0.parquet", + {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"} + ); + auto supplier = read_parquet( + dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"} + ); + auto nation = read_parquet( + dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_regionkey", "n_name"} + ); + auto region = read_parquet( + dataset_dir + "region/part-0.parquet", + {"r_regionkey", "r_name"}, + std::move(r_name_pred) + ); // 2. Perform the joins auto join_a = apply_inner_join( @@ -116,52 +166,11 @@ int main(int argc, char const** argv) { {"l_suppkey", "n_nationkey"} ); - // 3. Apply the filter predicates - auto o_orderdate_ref = cudf::ast::column_reference(joined_table->col_id("o_orderdate")); - - auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); - auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); - auto o_orderdate_pred_a = cudf::ast::operation( - cudf::ast::ast_operator::GREATER_EQUAL, - o_orderdate_ref, - o_orderdate_lower_limit - ); - - auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); - auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); - auto o_orderdate_pred_b = cudf::ast::operation( - cudf::ast::ast_operator::LESS, - o_orderdate_ref, - o_orderdate_upper_limit - ); - - auto r_name_ref = cudf::ast::column_reference(joined_table->col_id("r_name")); - auto r_name_value = cudf::string_scalar("ASIA"); - auto r_name_literal = cudf::ast::literal(r_name_value); - auto r_name_pred = cudf::ast::operation( - cudf::ast::ast_operator::EQUAL, - r_name_ref, - r_name_literal - ); - - auto o_orderdate_pred = cudf::ast::operation( - cudf::ast::ast_operator::LOGICAL_AND, - o_orderdate_pred_a, - o_orderdate_pred_b - ); - - auto final_pred = cudf::ast::operation( - cudf::ast::ast_operator::LOGICAL_AND, - o_orderdate_pred, - r_name_pred - ); - auto filtered_table = apply_filter(joined_table, final_pred); - - // 4. Calcute and append the `revenue` column - auto revenue = calc_revenue(filtered_table); - auto appended_table = filtered_table->append(revenue, "revenue"); + // 3. Calcute and append the `revenue` column + auto revenue = calc_revenue(joined_table); + auto appended_table = joined_table->append(revenue, "revenue"); - // 5. Perform groupby and orderby operations + // 4. Perform groupby and orderby operations groupby_context ctx{ {"n_name"}, { From 45a4b629883a78370ad7a73406bce77f9d6ca415 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 12:46:55 -0700 Subject: [PATCH 067/124] remove run.sh --- cpp/examples/tpch/run.sh | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100755 cpp/examples/tpch/run.sh diff --git a/cpp/examples/tpch/run.sh b/cpp/examples/tpch/run.sh deleted file mode 100755 index 23989e40787..00000000000 --- a/cpp/examples/tpch/run.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -e - -query_no=$1 -dataset_dir=$2 -use_memory_pool=$3 - -if [ -z "$query_no" ]; then - echo "Usage: $0 " - exit 1 -fi - -# Set up environment -export KVIKIO_COMPAT_MODE="on" -export LIBCUDF_CUFILE_POLICY="KVIKIO" - -./tpch/build/tpch_q${query_no} ${dataset_dir} ${use_memory_pool} From 68377ce85a62091304808d2bd9ff62e045dee72d Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 14:10:41 -0700 Subject: [PATCH 068/124] Cleanup parsing cli arguments --- cpp/examples/tpch/q1.cpp | 8 +++----- cpp/examples/tpch/q5.cpp | 18 ++++++++---------- cpp/examples/tpch/q6.cpp | 8 +++----- cpp/examples/tpch/q9.cpp | 21 +++++++++++---------- cpp/examples/tpch/utils.hpp | 18 +++++++++++++++++- 5 files changed, 42 insertions(+), 31 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index d2fbbdf71a8..a134978f57e 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -66,12 +66,10 @@ std::unique_ptr calc_charge(std::unique_ptr& tabl } int main(int argc, char const** argv) { - check_args(argc, argv); - std::string dataset_dir = argv[1]; - bool use_memory_pool = std::stoi(argv[2]); + auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(use_memory_pool); + auto resource = create_memory_resource(args.use_memory_pool); rmm::mr::set_current_device_resource(resource.get()); Timer timer; @@ -86,7 +84,7 @@ int main(int argc, char const** argv) { shipdate_upper_literal ); auto lineitem = read_parquet( - dataset_dir + "lineitem/part-0.parquet", + args.dataset_dir + "lineitem/part-0.parquet", {"l_returnflag", "l_linestatus", "l_quantity", "l_extendedprice", "l_discount", "l_shipdate", "l_orderkey", "l_tax"}, std::move(shipdate_pred) ); diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 87fdb79e884..dab300e2da4 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -66,12 +66,10 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab } int main(int argc, char const** argv) { - check_args(argc, argv); - std::string dataset_dir = argv[1]; - bool use_memory_pool = std::stoi(argv[2]); + auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(use_memory_pool); + auto resource = create_memory_resource(args.use_memory_pool); rmm::mr::set_current_device_resource(resource.get()); Timer timer; @@ -112,24 +110,24 @@ int main(int argc, char const** argv) { ); auto customer = read_parquet( - dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); + args.dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); auto orders = read_parquet( - dataset_dir + "orders/part-0.parquet", + args.dataset_dir + "orders/part-0.parquet", {"o_custkey", "o_orderkey", "o_orderdate"}, std::move(o_orderdate_pred) ); auto lineitem = read_parquet( - dataset_dir + "lineitem/part-0.parquet", + args.dataset_dir + "lineitem/part-0.parquet", {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"} ); auto supplier = read_parquet( - dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"} + args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"} ); auto nation = read_parquet( - dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_regionkey", "n_name"} + args.dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_regionkey", "n_name"} ); auto region = read_parquet( - dataset_dir + "region/part-0.parquet", + args.dataset_dir + "region/part-0.parquet", {"r_regionkey", "r_name"}, std::move(r_name_pred) ); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 5643237e9f4..88473eb2aa6 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -49,12 +49,10 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab } int main(int argc, char const** argv) { - check_args(argc, argv); - std::string dataset_dir = argv[1]; - bool use_memory_pool = std::stoi(argv[2]); + auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(use_memory_pool); + auto resource = create_memory_resource(args.use_memory_pool); rmm::mr::set_current_device_resource(resource.get()); Timer timer; @@ -83,7 +81,7 @@ int main(int argc, char const** argv) { shipdate_pred_b ); auto lineitem = read_parquet( - dataset_dir + "lineitem/part-0.parquet", + args.dataset_dir + "lineitem/part-0.parquet", {"l_extendedprice", "l_discount", "l_shipdate", "l_quantity"}, std::move(shipdate_pred) ); diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 3de11a508fa..bf2dc08e975 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -80,23 +80,24 @@ std::unique_ptr calc_amount(std::unique_ptr& tabl } int main(int argc, char const** argv) { - check_args(argc, argv); - std::string dataset_dir = argv[1]; - bool use_memory_pool = std::stoi(argv[2]); + auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(use_memory_pool); + auto resource = create_memory_resource(args.use_memory_pool); rmm::mr::set_current_device_resource(resource.get()); Timer timer; // 1. Read out the table from parquet files - auto lineitem = read_parquet(dataset_dir + "lineitem/part-0.parquet", {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); - auto nation = read_parquet(dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_name"}); - auto orders = read_parquet(dataset_dir + "orders/part-0.parquet", {"o_orderkey", "o_orderdate"}); - auto part = read_parquet(dataset_dir + "part/part-0.parquet", {"p_partkey", "p_name"}); - auto partsupp = read_parquet(dataset_dir + "partsupp/part-0.parquet", {"ps_suppkey", "ps_partkey", "ps_supplycost"}); - auto supplier = read_parquet(dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); + auto lineitem = read_parquet( + args.dataset_dir + "lineitem/part-0.parquet", + {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); + auto nation = read_parquet(args.dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_name"}); + auto orders = read_parquet(args.dataset_dir + "orders/part-0.parquet", {"o_orderkey", "o_orderdate"}); + auto part = read_parquet(args.dataset_dir + "part/part-0.parquet", {"p_partkey", "p_name"}); + auto partsupp = read_parquet(args.dataset_dir + "partsupp/part-0.parquet", + {"ps_suppkey", "ps_partkey", "ps_supplycost"}); + auto supplier = read_parquet(args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); // 2. Generating the `profit` table // 2.1 Filter the part table using `p_name like '%green%'` diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 66d6ed6fca8..10fc464d78f 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -396,7 +396,18 @@ int32_t days_since_epoch(int year, int month, int day) { return static_cast(diff); } -void check_args(int argc, char const** argv) { +struct tpch_args_t { + std::string dataset_dir; + bool use_memory_pool; +}; + +/** + * @brief Parse command line arguments into a struct + * + * @param argc The number of command line arguments + * @param argv The command line arguments + */ +tpch_args_t parse_args(int argc, char const **argv) { if (argc < 3) { std::cerr << "Usage: " << argv[0] << " " << std::endl; std::cerr << std::endl; @@ -404,6 +415,11 @@ void check_args(int argc, char const** argv) { "q{query_no}.parquet in the current working directory." << std::endl; exit(1); } + tpch_args_t args; + args.dataset_dir = argv[1]; + args.use_memory_pool = std::stoi(argv[2]); + return args; + } /** From dec52aea9d4e1e27e8f54e66c4c3c5ed67f10814 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 14:14:53 -0700 Subject: [PATCH 069/124] Rename groupby_context to groupby_context_t --- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 19 +++++++++++-------- cpp/examples/tpch/q9.cpp | 4 ++-- cpp/examples/tpch/utils.hpp | 4 ++-- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index a134978f57e..666c3b66c9b 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -97,7 +97,7 @@ int main(int argc, char const** argv) { // 3. Perform the group by operation auto groupedby_table = apply_groupby( appended_table, - groupby_context{ + groupby_context_t { {"l_returnflag", "l_linestatus"}, { { diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index dab300e2da4..04a986e1bd4 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -168,14 +168,17 @@ int main(int argc, char const** argv) { auto revenue = calc_revenue(joined_table); auto appended_table = joined_table->append(revenue, "revenue"); - // 4. Perform groupby and orderby operations - groupby_context ctx{ - {"n_name"}, - { - {"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}}, - } - }; - auto groupedby_table = apply_groupby(appended_table, ctx); + // 4. Perform the groupby operation + auto groupedby_table = apply_groupby( + appended_table, + groupby_context_t { + {"n_name"}, + { + {"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}}, + } + }); + + // 5. Perform the order by operation auto orderedby_table = apply_orderby( groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index bf2dc08e975..d8a1164b78f 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -155,10 +155,10 @@ int main(int argc, char const** argv) { std::vector{"nation", "o_year", "amount"} ); - // 3. Perform the group by operation + // 3. Perform the groupby operation auto groupedby_table = apply_groupby( profit, - groupby_context{ + groupby_context_t { {"nation", "o_year"}, { {"amount", {{cudf::groupby_aggregation::SUM, "sum_profit"}}} diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 10fc464d78f..b82f6b5f9ce 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -250,7 +250,7 @@ std::unique_ptr apply_mask( return std::make_unique(std::move(result_table), table->columns()); } -struct groupby_context { +struct groupby_context_t { std::vector keys; std::unordered_map>> values; }; @@ -262,7 +262,7 @@ struct groupby_context { * @param ctx The groupby context */ std::unique_ptr apply_groupby( - std::unique_ptr& table, groupby_context ctx) { + std::unique_ptr& table, groupby_context_t ctx) { CUDF_FUNC_RANGE(); auto keys = table->select(ctx.keys); cudf::groupby::groupby groupby_obj(keys); From f7ec78dc75f7d2f07fb235af6d03d2c61418fa0c Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 15:11:55 -0700 Subject: [PATCH 070/124] Add support for managed memory --- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 2 +- cpp/examples/tpch/utils.hpp | 29 +++++++++++++++++++---------- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 666c3b66c9b..f9abe6a1a16 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -69,7 +69,7 @@ int main(int argc, char const** argv) { auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(args.use_memory_pool); + auto resource = create_memory_resource(args.use_memory_pool, args.use_managed_memory); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 04a986e1bd4..2237801a660 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -69,7 +69,7 @@ int main(int argc, char const** argv) { auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(args.use_memory_pool); + auto resource = create_memory_resource(args.use_memory_pool, args.use_managed_memory); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 88473eb2aa6..a7ab95005fe 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -52,7 +52,7 @@ int main(int argc, char const** argv) { auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(args.use_memory_pool); + auto resource = create_memory_resource(args.use_memory_pool, args.use_managed_memory); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index d8a1164b78f..811d0fc1707 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -83,7 +83,7 @@ int main(int argc, char const** argv) { auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(args.use_memory_pool); + auto resource = create_memory_resource(args.use_memory_pool, args.use_managed_memory); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index b82f6b5f9ce..ab256fba811 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -46,14 +47,20 @@ * @param pool Whether to use a pool memory resource. * @return Memory resource instance */ -std::shared_ptr create_memory_resource(bool pool) -{ - auto cuda_mr = std::make_shared(); - if (pool) { - return rmm::mr::make_owning_wrapper( - cuda_mr, rmm::percent_of_free_device_memory(50)); - } - return cuda_mr; +std::shared_ptr create_memory_resource( + bool pool, bool use_managed) { + std::shared_ptr cuda_mr; + if (use_managed) { + cuda_mr = std::make_shared(); + } else { + cuda_mr = std::make_shared(); + } + + if (pool) { + return rmm::mr::make_owning_wrapper( + cuda_mr, rmm::percent_of_free_device_memory(50)); + } + return cuda_mr; } /** @@ -399,6 +406,7 @@ int32_t days_since_epoch(int year, int month, int day) { struct tpch_args_t { std::string dataset_dir; bool use_memory_pool; + bool use_managed_memory; }; /** @@ -408,8 +416,8 @@ struct tpch_args_t { * @param argv The command line arguments */ tpch_args_t parse_args(int argc, char const **argv) { - if (argc < 3) { - std::cerr << "Usage: " << argv[0] << " " << std::endl; + if (argc < 4) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; std::cerr << std::endl; std::cerr << "The query result will be saved to a parquet file named " << "q{query_no}.parquet in the current working directory." << std::endl; @@ -418,6 +426,7 @@ tpch_args_t parse_args(int argc, char const **argv) { tpch_args_t args; args.dataset_dir = argv[1]; args.use_memory_pool = std::stoi(argv[2]); + args.use_managed_memory = std::stoi(argv[3]); return args; } From 4b9972eca98477dbaaabafe644f03d5527aaced7 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 15:50:16 -0700 Subject: [PATCH 071/124] Refactor mem management code --- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 2 +- cpp/examples/tpch/utils.hpp | 55 +++++++++++++++++++------------------ 5 files changed, 33 insertions(+), 30 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index f9abe6a1a16..14f7c088d9a 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -69,7 +69,7 @@ int main(int argc, char const** argv) { auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(args.use_memory_pool, args.use_managed_memory); + auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 2237801a660..fc46a95fe77 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -69,7 +69,7 @@ int main(int argc, char const** argv) { auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(args.use_memory_pool, args.use_managed_memory); + auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index a7ab95005fe..7cc8b845514 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -52,7 +52,7 @@ int main(int argc, char const** argv) { auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(args.use_memory_pool, args.use_managed_memory); + auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 811d0fc1707..1fb8f612253 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -83,7 +83,7 @@ int main(int argc, char const** argv) { auto args = parse_args(argc, argv); // Use a memory pool - auto resource = create_memory_resource(args.use_memory_pool, args.use_managed_memory); + auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); Timer timer; diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index ab256fba811..705f85af5b3 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -41,26 +41,32 @@ #include -/** - * @brief Create memory resource for libcudf functions - * - * @param pool Whether to use a pool memory resource. - * @return Memory resource instance - */ -std::shared_ptr create_memory_resource( - bool pool, bool use_managed) { - std::shared_ptr cuda_mr; - if (use_managed) { - cuda_mr = std::make_shared(); - } else { - cuda_mr = std::make_shared(); - } +inline auto make_unmanaged() { + return std::make_shared(); +} - if (pool) { - return rmm::mr::make_owning_wrapper( - cuda_mr, rmm::percent_of_free_device_memory(50)); - } - return cuda_mr; +inline auto make_unmanaged_pool() { + return rmm::mr::make_owning_wrapper( + make_unmanaged(), rmm::percent_of_free_device_memory(50)); +} + +inline auto make_managed() { + return std::make_shared(); +} + +inline auto make_managed_pool() { + return rmm::mr::make_owning_wrapper( + make_managed(), rmm::percent_of_free_device_memory(50)); +} + +inline std::shared_ptr create_memory_resource( + std::string const& mode) { + if (mode == "unmanaged") return make_unmanaged(); + if (mode == "unmanaged_pool") return make_unmanaged_pool(); + if (mode == "managed") return make_managed(); + if (mode == "managed_pool") return make_managed_pool(); + CUDF_FAIL("Unknown rmm_mode parameter: " + mode + + "\nExpecting: 'unmanaged', 'unmanaged_pool', 'managed', 'managed_pool'"); } /** @@ -405,8 +411,7 @@ int32_t days_since_epoch(int year, int month, int day) { struct tpch_args_t { std::string dataset_dir; - bool use_memory_pool; - bool use_managed_memory; + std::string memory_resource_type; }; /** @@ -416,8 +421,8 @@ struct tpch_args_t { * @param argv The command line arguments */ tpch_args_t parse_args(int argc, char const **argv) { - if (argc < 4) { - std::cerr << "Usage: " << argv[0] << " " << std::endl; + if (argc < 3) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; std::cerr << std::endl; std::cerr << "The query result will be saved to a parquet file named " << "q{query_no}.parquet in the current working directory." << std::endl; @@ -425,10 +430,8 @@ tpch_args_t parse_args(int argc, char const **argv) { } tpch_args_t args; args.dataset_dir = argv[1]; - args.use_memory_pool = std::stoi(argv[2]); - args.use_managed_memory = std::stoi(argv[3]); + args.memory_resource_type = argv[2]; return args; - } /** From e69c34b573dd9f087f72d783cb87163e46d737e6 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 16:47:15 -0700 Subject: [PATCH 072/124] Fix indentation of queries --- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 18 +++++++++--------- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 14 +++++++------- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 14f7c088d9a..3c2e1913874 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -37,7 +37,7 @@ select from lineitem where - l_shipdate <= date '1998-09-02' + l_shipdate <= date '1998-09-02' group by l_returnflag, l_linestatus diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index fc46a95fe77..09841e670ee 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -39,15 +39,15 @@ from nation, region where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and l_suppkey = s_suppkey - and c_nationkey = s_nationkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'ASIA' - and o_orderdate >= date '1994-01-01' - and o_orderdate < date '1995-01-01' + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1995-01-01' group by n_name order by diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 7cc8b845514..4ba830d38b9 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -28,7 +28,7 @@ select from lineitem where - l_shipdate >= date '1994-01-01' + l_shipdate >= date '1994-01-01' and l_shipdate < date '1995-01-01' and l_discount >= 0.05 and l_discount <= 0.07 diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 1fb8f612253..e0232660df8 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -48,13 +48,13 @@ from orders, nation where - s_suppkey = l_suppkey - and ps_suppkey = l_suppkey - and ps_partkey = l_partkey - and p_partkey = l_partkey - and o_orderkey = l_orderkey - and s_nationkey = n_nationkey - and p_name like '%green%' + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' ) as profit group by nation, From 47199d58285427a53cfb18cfc8f17d95f31f1b03 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 16:52:12 -0700 Subject: [PATCH 073/124] Rename predicates --- cpp/examples/tpch/q5.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 09841e670ee..8a68b936d51 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -80,7 +80,7 @@ int main(int argc, char const** argv) { auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); - auto o_orderdate_pred_a = cudf::ast::operation( + auto o_orderdate_pred_lower = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate_ref, o_orderdate_lower_limit @@ -88,7 +88,7 @@ int main(int argc, char const** argv) { auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); - auto o_orderdate_pred_b = cudf::ast::operation( + auto o_orderdate_pred_upper = cudf::ast::operation( cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit @@ -96,8 +96,8 @@ int main(int argc, char const** argv) { auto o_orderdate_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, - o_orderdate_pred_a, - o_orderdate_pred_b + o_orderdate_pred_lower, + o_orderdate_pred_upper ); auto r_name_ref = cudf::ast::column_reference(1); @@ -164,7 +164,7 @@ int main(int argc, char const** argv) { {"l_suppkey", "n_nationkey"} ); - // 3. Calcute and append the `revenue` column + // 3. Calculate and append the `revenue` column auto revenue = calc_revenue(joined_table); auto appended_table = joined_table->append(revenue, "revenue"); From b7a57a74827d70193d3ad402b14eab9c5e7020f5 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 16:58:57 -0700 Subject: [PATCH 074/124] Fix comments --- cpp/examples/tpch/q1.cpp | 10 +++++----- cpp/examples/tpch/q5.cpp | 14 +++++++------- cpp/examples/tpch/q6.cpp | 12 ++++++------ cpp/examples/tpch/q9.cpp | 18 +++++++++--------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 3c2e1913874..d960f8535ad 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -74,7 +74,7 @@ int main(int argc, char const** argv) { Timer timer; - // 1. Read out the `lineitem` table from parquet file + // Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(5); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); @@ -89,12 +89,12 @@ int main(int argc, char const** argv) { std::move(shipdate_pred) ); - // 2. Calculate the discount price and charge columns and append to lineitem table + // Calculate the discount price and charge columns and append to lineitem table auto disc_price = calc_disc_price(lineitem); auto charge = calc_charge(lineitem, disc_price); auto appended_table = lineitem->append(disc_price, "disc_price")->append(charge, "charge"); - // 3. Perform the group by operation + // Perform the group by operation auto groupedby_table = apply_groupby( appended_table, groupby_context_t { @@ -137,12 +137,12 @@ int main(int argc, char const** argv) { } ); - // 4. Perform the order by operation + // Perform the order by operation auto orderedby_table = apply_orderby(groupedby_table, {"l_returnflag", "l_linestatus"}, {cudf::order::ASCENDING, cudf::order::ASCENDING}); timer.print_elapsed_millis(); - // 5. Write query result to a parquet file + // Write query result to a parquet file orderedby_table->to_parquet("q1.parquet"); return 0; } diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 8a68b936d51..a8cc42b5d92 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -74,8 +74,8 @@ int main(int argc, char const** argv) { Timer timer; - // 1. Read out the tables from parquet files - // while pushing down column projections and filter predicates + // Read out the tables from parquet files + // while pushing down column projections and filter predicates auto o_orderdate_ref = cudf::ast::column_reference(2); auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); @@ -132,7 +132,7 @@ int main(int argc, char const** argv) { std::move(r_name_pred) ); - // 2. Perform the joins + // Perform the joins auto join_a = apply_inner_join( region, nation, @@ -164,11 +164,11 @@ int main(int argc, char const** argv) { {"l_suppkey", "n_nationkey"} ); - // 3. Calculate and append the `revenue` column + // Calculate and append the `revenue` column auto revenue = calc_revenue(joined_table); auto appended_table = joined_table->append(revenue, "revenue"); - // 4. Perform the groupby operation + // Perform the groupby operation auto groupedby_table = apply_groupby( appended_table, groupby_context_t { @@ -178,13 +178,13 @@ int main(int argc, char const** argv) { } }); - // 5. Perform the order by operation + // Perform the order by operation auto orderedby_table = apply_orderby( groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); timer.print_elapsed_millis(); - // 6. Write query result to a parquet file + // Write query result to a parquet file orderedby_table->to_parquet("q5.parquet"); return 0; } diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 4ba830d38b9..99ba6fb3c13 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -57,7 +57,7 @@ int main(int argc, char const** argv) { Timer timer; - // 1. Read out the `lineitem` table from parquet file + // Read out the `lineitem` table from parquet file auto shipdate_ref = cudf::ast::column_reference(2); auto shipdate_lower = cudf::timestamp_scalar( days_since_epoch(1994, 1, 1), true); @@ -86,7 +86,7 @@ int main(int argc, char const** argv) { std::move(shipdate_pred) ); - // 2. Cast the discount and quantity columns to float32 and append to lineitem table + // Cast the discount and quantity columns to float32 and append to lineitem table auto discout_float = cudf::cast( lineitem->column("l_discount"), cudf::data_type{cudf::type_id::FLOAT32}); auto quantity_float = cudf::cast( @@ -95,7 +95,7 @@ int main(int argc, char const** argv) { ->append(discout_float, "l_discount_float") ->append(quantity_float, "l_quantity_float"); - // 3. Apply the filters + // Apply the filters auto discount_ref = cudf::ast::column_reference( appended_table->col_id("l_discount_float")); auto quantity_ref = cudf::ast::column_reference( @@ -135,10 +135,10 @@ int main(int argc, char const** argv) { ); auto filtered_table = apply_filter(appended_table, discount_quantity_pred); - // 4. Calculate the `revenue` column + // Calculate the `revenue` column auto revenue = calc_revenue(filtered_table); - // 5. Sum the `revenue` column + // Sum the `revenue` column auto revenue_view = revenue->view(); auto result_table = apply_reduction( revenue_view, @@ -148,7 +148,7 @@ int main(int argc, char const** argv) { timer.print_elapsed_millis(); - // 6. Write query result to a parquet file + // Write query result to a parquet file result_table->to_parquet("q6.parquet"); return 0; } diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index e0232660df8..4db36663678 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -88,7 +88,7 @@ int main(int argc, char const** argv) { Timer timer; - // 1. Read out the table from parquet files + // Read out the table from parquet files auto lineitem = read_parquet( args.dataset_dir + "lineitem/part-0.parquet", {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); @@ -99,14 +99,14 @@ int main(int argc, char const** argv) { {"ps_suppkey", "ps_partkey", "ps_supplycost"}); auto supplier = read_parquet(args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); - // 2. Generating the `profit` table - // 2.1 Filter the part table using `p_name like '%green%'` + // Generating the `profit` table + // Filter the part table using `p_name like '%green%'` auto p_name = part->table().column(1); auto mask = cudf::strings::like( cudf::strings_column_view(p_name), cudf::string_scalar("%green%")); auto part_filtered = apply_mask(part, mask); - // 2.2 Perform the joins + // Perform the joins auto join_a = apply_inner_join( lineitem, supplier, @@ -138,12 +138,12 @@ int main(int argc, char const** argv) { {"n_nationkey"} ); - // 2.3 Calculate the `nation`, `o_year`, and `amount` columns + // Calculate the `nation`, `o_year`, and `amount` columns auto n_name = std::make_unique(joined_table->column("n_name")); auto o_year = cudf::datetime::extract_year(joined_table->column("o_orderdate")); auto amount = calc_amount(joined_table); - // 2.4 Put together the `profit` table + // Put together the `profit` table std::vector> profit_columns; profit_columns.push_back(std::move(n_name)); profit_columns.push_back(std::move(o_year)); @@ -155,7 +155,7 @@ int main(int argc, char const** argv) { std::vector{"nation", "o_year", "amount"} ); - // 3. Perform the groupby operation + // Perform the groupby operation auto groupedby_table = apply_groupby( profit, groupby_context_t { @@ -166,7 +166,7 @@ int main(int argc, char const** argv) { } ); - // 4. Perform the orderby operation + // Perform the orderby operation auto orderedby_table = apply_orderby( groupedby_table, {"nation", "o_year"}, @@ -175,6 +175,6 @@ int main(int argc, char const** argv) { timer.print_elapsed_millis(); - // 5. Write query result to a parquet file + // Write query result to a parquet file orderedby_table->to_parquet("q9.parquet"); } From 2cb21ed7707a3f40f7542ce8b207ad63cab9b1f4 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 3 Jul 2024 18:18:58 -0700 Subject: [PATCH 075/124] Dynamically determine scale of bin op --- cpp/examples/tpch/q1.cpp | 14 ++++++++++++-- cpp/examples/tpch/q5.cpp | 8 ++++++-- cpp/examples/tpch/q6.cpp | 14 ++++++++++---- cpp/examples/tpch/q9.cpp | 14 ++++++++++++-- cpp/examples/tpch/utils.hpp | 8 ++++++++ 5 files changed, 48 insertions(+), 10 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index d960f8535ad..0b62fc9709d 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -51,7 +51,12 @@ std::unique_ptr calc_disc_price(std::unique_ptr& auto discount = table->column("l_discount"); auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); auto extended_price = table->column("l_extendedprice"); - auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto disc_price_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, + table->column_type("l_extendedprice").scale(), + one_minus_discount->type().scale() + ); + auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, disc_price_scale}; auto disc_price = cudf::binary_operation(extended_price, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type); return disc_price; } @@ -60,7 +65,12 @@ std::unique_ptr calc_charge(std::unique_ptr& tabl auto one = cudf::fixed_point_scalar(1); auto tax = table->column("l_tax"); auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); - auto charge_type = cudf::data_type{cudf::type_id::DECIMAL64, -6}; + auto charge_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, + disc_price->type().scale(), + one_plus_tax->type().scale() + ); + auto charge_type = cudf::data_type{cudf::type_id::DECIMAL64, charge_scale}; auto charge = cudf::binary_operation(disc_price->view(), one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); return charge; } diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index a8cc42b5d92..a348abef23d 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -59,8 +59,12 @@ std::unique_ptr calc_revenue(std::unique_ptr& tab auto disc = table->column("l_discount"); auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); auto extended_price = table->column("l_extendedprice"); - - auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto disc_price_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, + table->column_type("l_extendedprice").scale(), + one_minus_disc->type().scale() + ); + auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, disc_price_scale}; auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, disc_price_type); return disc_price; } diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 99ba6fb3c13..cb86f428ddd 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -38,14 +38,20 @@ where std::unique_ptr calc_revenue(std::unique_ptr& table) { auto extendedprice = table->column("l_extendedprice"); auto discount = table->column("l_discount"); - auto extendedprice_mul_discount_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; - auto extendedprice_mul_discount = cudf::binary_operation( + auto revenue_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, + table->column_type("l_extendedprice").scale(), + table->column_type("l_discount").scale() + ); + auto revenue_type = cudf::data_type{ + cudf::type_id::DECIMAL64, revenue_scale}; + auto revenue = cudf::binary_operation( extendedprice, discount, cudf::binary_operator::MUL, - extendedprice_mul_discount_type + revenue_type ); - return extendedprice_mul_discount; + return revenue; } int main(int argc, char const** argv) { diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 4db36663678..766038a67d4 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -69,11 +69,21 @@ std::unique_ptr calc_amount(std::unique_ptr& tabl auto discount = table->column("l_discount"); auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); auto extended_price = table->column("l_extendedprice"); - auto extended_price_discounted_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto extended_price_discounted_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, + table->column_type("l_extendedprice").scale(), + one_minus_discount->type().scale() + ); + auto extended_price_discounted_type = cudf::data_type{cudf::type_id::DECIMAL64, extended_price_discounted_scale}; auto extended_price_discounted = cudf::binary_operation(extended_price, one_minus_discount->view(), cudf::binary_operator::MUL, extended_price_discounted_type); auto supply_cost = table->column("ps_supplycost"); auto quantity = table->column("l_quantity"); - auto supply_cost_quantity_type = cudf::data_type{cudf::type_id::DECIMAL64, -4}; + auto supply_cost_quantity_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, + table->column_type("ps_supplycost").scale(), + table->column_type("l_quantity").scale() + ); + auto supply_cost_quantity_type = cudf::data_type{cudf::type_id::DECIMAL64, supply_cost_quantity_scale}; auto supply_cost_quantity = cudf::binary_operation(supply_cost, quantity, cudf::binary_operator::MUL, supply_cost_quantity_type); auto amount = cudf::binary_operation(extended_price_discounted->view(), supply_cost_quantity->view(), cudf::binary_operator::SUB, extended_price_discounted->type()); return amount; diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 705f85af5b3..5c7b62edff3 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -88,6 +88,14 @@ class table_with_cols { cudf::column_view column(std::string col_name) { return tbl->view().column(col_id(col_name)); } + /** + * @brief Return the data type of a column + * + * @param col_name The name of the column + */ + cudf::data_type column_type(std::string col_name) { + return tbl->view().column(col_id(col_name)).type(); + } /** * @param Return the column names of the table */ From 057f54ab67e36a06e78382ef6f65e0b0c6e97584 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 4 Jul 2024 10:53:53 -0700 Subject: [PATCH 076/124] use east const --- cpp/examples/tpch/plot.png | Bin 0 -> 32118 bytes cpp/examples/tpch/utils.hpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 cpp/examples/tpch/plot.png diff --git a/cpp/examples/tpch/plot.png b/cpp/examples/tpch/plot.png new file mode 100644 index 0000000000000000000000000000000000000000..2c1f39461494fee638ff449efbc17e2fc8fe9e4a GIT binary patch literal 32118 zcmc%x2{e{ryFQHHhzyk|LnSgaJ&6VsnG%r(b0l-Akc@@Q85@vFBoSrK7$UQZQkfcz z850dcD6{Xldf&bG`(Jy%>%V{Bx7PQys#TBYxu5&KhVwkm<2cUivgQ%hrL3!2DT-Qp zKuuYjqUbXzif){l39ro3XOSOBJF6Ht>)4%gb}@Ifqz;=q+n=>_K5J#M?t-PGla<}M zonq2r+eFu$adx(Mk`WiT{m(au**TsTmsq4$fQzu$s~vNqD0Xx5hb~1S*@~h(`VJ`X z)pd;-`s8Au)4Z^F#D$(^&B`kY-IQoy#nyyi_nr#te1p9i?)qD3hN}5TQ_pLjX3Wvk z;>aj4*jo27qx@;U;iF4z4(6+BnMJ1N_7<5oQMvo7W+r51Z@6pJ&3(Qy`u0`PfYJfw z)nk2O3bcDMF}HS|>v;L(ZC_1T#E~OMY6ZM!wOXXyf1kB{mvlHHj9*SWluuf__2GdP zmcd{uRMhv{}ckE|Zn7#kwU2<5iLtnM}u3fwOUxo_@ocQv=d)dZ~8~dXr zZFCFfMr#CW=@^(ct=V<%(T}FYtB%c?#Bn*-Q*SkE7cmat{!4`MGhO_)i0I-l3i| zzboIpTOr^zDTvp_jGy{{Z_ax5;zgob&9%_b>J+U9W>!|0H|r#=+p>iXV-ye(5z$cH z)WnI~t#Rq{40``w*W-8VX^K)+t!Z-WadRCXr<39#wrlxu@zSY4T!VUNLGUQo*U_E#4+?nRg zwV3z!Z8guIr!aP~%TH3izP^-CvPK;DbiQqS!BV_QCGF3Pt2`8CV`G!=`SIgNwL^#e z4{h+%rES$NS5{8+t&C8+x^%6K?xcwbr}Ne4dwGy6RGgI>o zEu5FHIExk;h3-6EcOO%ybcQikRa;J0*8bP$SH0!eg}${uPOk0y)8#FIo7lEvN7dug z^^_uSyl5)cK}B=3zU>T_&GOZ&r|SmF7v^Uszb40Jn?GZkou3&lbQ)MzVB5}g;>3wJ zcXlrJTg-mc%Ff2->iUDXQ_iLz&4?Z;&Ci!C!>5!!VA|)D@?USoBvHSXBr!2DQ9k42 z&P@73P0aXf>w501TIol(M%e!T{hRVBEZj}C3dHNDA3Z$DTq=SOhv3ccQ*~5r<4Q|C zilbgzOavWl7UYnbl zX?D-wsAWmsap#b&>5&vk2?^21eJeP){3|0;ZoIbptS1&3>9jteIoI-_fuZ4-qI%rV zhcvxsxS(-LS<<09mlkHq7gq2O%o~*Def|2ivbA+pU0vOow!F=OOV>#v8^mp+%y53htGDsP>8wx`X+5`g)jjH?)}H;)=5cmEB>x| zOKBEgu#J&+V!{@g3Vmv5U_enfZr;=@Q1G1A?axU}T)$-L($SHgefjc^p~tnHzrANc zXsG^>uHX3R*kk(kx>&j6{rTk1MEi|qZ|zLZ%kJ&#d+i)*Gx6<%Id=Q;C+AX!iXHm) z=OhU%=ZN4Yb5wAmW?4@{dd$@%5zfo@4jO-ce#*g1kB&b%(%<_i=C@EdGv*-0FrP+Z z2#4@~<{6it9jQ$w8XD~U{QSEw{NRn1_oTgXD)nBFuZxk^3CzkJ+#kjtf}t?$EOBdj zdVWi%`>?}@1u!IZez3bjz=_q$yx&K(b17oiDIj`xj z8=u$IJlG$yn(`SNsPC)vTTD|BR#xs0!8FlNB0@V%k6YyrJquj=`j0f$-EPlmiBt8l zOZD~jW8^(oV^Qxxq!9PG)|qs8jjOx+iJ9L+u9G8zZBOjj1C)Fb#UQW`huIDWW4+MW%7_uMQK%G@6@SNXF3XVYE=hsZa7?poZv7yVm>?lJ!@oq z{5h`i;kIR(X}P@8zI{1oTX$exF-b{DMeVR;H7WBr7Jq5W_0rXfCr^st=MJ-z4nNxR z*|bwvVh1oYGiw${mp{6;X4kUv*p{7mYxx24Q zb8_HU#Lk=~wsk`IMO%Uej9*Qx7!3S+$i-`PIXF02ZOgVo^49R9X7=mJ8!{@o4PU9K zs_GX`>>_XYRkdcht|LOgyYkJS;3Mj0HKlxk`BJzx-$&kcj1T zNl8gAd3kwecJ?6TJ)a*O2+MqbZ=BLjm8iF(s;jHFZQtIgsG>Y>TC)4<|NaTJ5+7=A zqUkCqkDoJIhR@N~=NMc&yzxnoRnwU4pVO8n>8RWMh76v5B@4xmthCnl$t>COc=cs9 zE;4+2h8j0x`^>9u%Kq7W(vhm@Yj#+EaH96Hs8H?VWL7dC|2l8~X81B`iWmaL&HDJw z8pd^pH9~<-@!`fxYGEhjm7}Zv4p8@34VQSe*V^s;5wJvUTt#@xQBxQod*r>>5Xxz=~Mo9QfadADQ|x`AJrmIB*N=*%OwQ z0T?>RG%|kbEBv;6y@ow){{K7tISu=T!q241VlmPLne>}hX{YMiuJ*ya02WE;l0_A< zL2SS3|3|CszxK(0jCX$2@1gd^2wVACTw9K&$f>u{<^Zd%qrCx!#ar|7LnT+a}Rb?80v9 zG0M1P$&%$OSNd;1QQci5;O$@L_1m=ZSrGen>-U@TY?_y$1lZ^7EOGRa0iB$j+_lw` zX&olwWKAV)E4;0Vi~`hqXP^vl{=Um_XOxufYU=*syHbVjf5bj!nG$G8RUn}{e0j6V zKQOTB*)s;p%+8KoTwI(_+HsMIi3#P?(4eWCVaNq6bV-*MP+Yu+AerH^nPpyHUf0zV zEf*|EjFAY|VXS?L8fCP*l41PYhl7^Ik&!%gaSH5r4lz+cxhvniecPC6yeB@y&yQiX z#2Fu*I|q{Pr%HeST^{ z;QgaxD*(}|2OA&6CnpC*Me$Ot50ZR>hc_EpiJAs&8%9X+WVBtSl6>+kt-X-n|4ebBh}IJbWlZ`4klu z)s8vx5B$(La^&b8;k(bOs_37b`$T`;bM&Z*=Yd3R-c_quJh>vZQ{+8PHS5w| z@)})DMZI&TtE_aS1ycqJ^rwx;|_{>=Iz_J zqd!`A=9k^Lal>CqS~yo)I7|BEj#W)fP5u{NtPfYy%rYtEoY0D$-;@W;@Qbg#PZ zs}WCswLRMlZ->WBrEK56o#v!-!op%5-VA&k{|N_zQw^xQ=kw=9>t-Irua>lC zb~15#qD70ab~3%HF2bqzEi6ubtWQZOEoM?)NmsrwO>wh~^WgIF@$rUi^L_8r^w`dS z|EQv?d&_W*i;Ii-c}hb=LuCE>9&9b8yDcpS9c7+tVq;^gr0_bQf)^h)AkFj6-MbHt zJw9bQTT23%M3`{q3khtd!lELAZ7F+Xd2CxVK#{8VKByNfy1E2_e**8`U9)Vhj6bSm zl5Pf*agC9Yk&2EYr|qc4d3kxwfI96jT-XmpsTb*u6sAwkEoOI;g6_kg! z+if=B3!6l+e4}^SnZ&m(T`j@v$F!7|9fFGVwwUpi#DkICEtzg(UoaS(od1Tbr~Xd= z4j)KoKHPcBk@42AAB+`$vyfBpxbpI^qkkmvHMY;Y)n)qrnT1-pk%jtmV#DRFmpw9# zwZtyc+KfWrp4Q{?o7;M$1;S0fc76%Rx7M;Fhb#$?i7hXUWcc|+f>Cc>$f*hEqmO@`#cCT3GpQxOr7l`Yk^wXC?cexxKl zXOa*fa+sT&R|VTxThlx|JeKqGFT)y*4^b9s)OAzVrP~gq6~13BQ9J)X@r0&|vToyO zuP8>~e+A9{myhs2VY`GK$(ozC0Ej%iH!f*Rad3Q<&oS_yD6SLwsiI9H*iMSND#h?%H%8X|i~|z+yo0KEFsU3o@v#3GH-{C+5w5=w z6Ug!Z!R=uH{{00!5Ay)SMBLO4XH3)%pDUHsnbZkS4^a~dRC{E+p!e$J4ph6`Ex-+m zw20d}D#}my6?ZPhDmY#8G=K4xHxeIgce$j?av!{AKJ-l|)-odMzatVg7lMSt_m?!g zUjMts)MifQ1q1{D_MoV%s;ODr)6=s}N{Z>aT(7#aO7d8xq_p%Z{VbCgrS5-5>J=6g zML6TFKcv|^J1ZlzVkN|X<-82M7j7URbeKnr36M1Q9d#`v#N5In!S_nAbI=hceXKH5 zuPMfvKf^WsuOs6%KhLcxH4xnowrIOq1%=%lG!{bEGi}>FwoFiTRj9m1@n!doi!Vg& zJiQE&&)+cLx)Rw!J(2Ukfdj6;KC_(r^fbU}pw7IfDu9`lwd(oh6+L|qjGa>dUWg4B zx+`T{g^FUkeYaxNojd)lNw`GH(TvsD=b`4$0(-#cw1CCAN8@^9zh9#ADb?UVMlO8Y`v#pZP!_{S6!Xe`hAMv zS$Cv500hCCuNoexvKKymYMre_);w*=y)_PVS&VGl-Kh4s_{-hKm;^k>Ii${Y1P_-l z@T07G0Uqd}M+|t4DsXhDo(kQ@l4{H3+<)I!4db_WaM%lGWu)D%JXKC%{dz_`x61dA z8L0Ai9=a#PC2mrpY2(sg%jGfNsA_-yyh`$sd&~LwmLzH)>zyXpgC=v2Jrjh(emq`4 zjl#n0PLmQ>-;ojf*{+2-0a6Tt{^S%mjq>^W>wfEYc7EAlq@V|wcoJkW)7#yCoir-G za5(W3@Rn8Fj|~AvHjE=-jkM=EVlge9>H1<_E)yGttNv%((^3m)Z5j_ zoQcmTR$#>RoMi-z7JyQ8vrLR#J$j$By2PJ#8rODR&!cx32FmynFi zYOrG*Z2^IS30SKJ`PN(=pFVB3c+Px{PnxsK^UvCoJC0p8VZa?}ktP12qk|7Anx1-? zoZR=T|24R~d)cO-Gf=9om#Mz7{$O`sAA`HQd&Cd`R5jpL0{}t95WwGV@A;DDQX2PU z7@eG)BA)W`@evL%M&`VF%Y|?6H#>iAQt98%f;U!ustgWJ@i@kNXYZZ*!{@G~kTcGvMX$x^q~VV{H8$2uZMht;fW`)aOVW}Cl1W}Nl5rKNCE zh4U+gMScF;`@OolD}Q#Cv*QL@T3cCWfV;MN&wISjGA%dowCgD3IC}Ib?{0fDtB7IGI85scs8#Zoa z@X7AN+NzJ0)7i6MTl+@ChSSPY0|x&E4h8=K4lNYQOXghERSvYaw>v$LNLixy-1~8& zv5r;G42M=FO~sy(@W#EPeKl%Fj?|4Ej)yj&@@@Fl&5bYLym{V|bK=AArE8>?9FCT# ztc}=0ce4KcTe)45SQDI=7lD?izb>aS0mz3Tr`aU|=-N9v(qEq&)eKoJ=`WpC)V?V~ zJ-;YRMUe9{ZK6gaamqoEBSOx2epmYItdJCmK@WS+>G929>h5jnIJqvXfd;mDR?7__ zDciPPD#8}1xH8xBb@rJ?p~Aw#`$sbjQ;t6o1M8)`{{s73nMMDJJTcmI1NCvL;ArVi zE^canVs~PEu(4`PR!)vkM{i=JS8!fd9(;`}!9v;x%flwZfW|=HSB^wdmjcCPS%39KKR^D@Z-PWy3 z1qB5`AO@C}%3;+fODU~JaccScwt3|x#+S9Vmx6*8gH&5QXLK+bA6pzBAJ4pG37JE} z$ENBeTYP>Q-anTMtSN9ePVU9=fa!ZH{o>@f%64&mH#b_^{!8?#+D1%Ceol_oaiz3J zfaQWRGKRJ@_xJj31A9pyXLIUQAV90d*QP|q0-MefX`~=^-Hk|C_Zp=n_4XT$CDb$? zO|WR3+H*Pt0g-_0$UP_|9plA0D-}h>My|eF6nsT>K-I|8!((0K>&8-d7fcibc)x~s z?{utCB5p%4eY;YPOi(}qK*9ih64FbW(ej8je;F~Y7l^Cp@yKci4pjEOxO(us9^Kzm zXo?Il+)?C**lsq`T^TJ#sFGlH#R2iU+{cgKIkS*U>at>R3Fiv=McKonBy(^{@w_qj6%&{jO^@J7gCs%m{|GpWlBff@z$6B4QB!Kes>ltyzrxi z0~I~5tn0Qkz4W6g1G3)pe1JZTvg;WU_Uo~o-7Z1>b>)nq42-s@9?}$ z2~L$CUcY7oRsaUtm~Gc3@A2o83nGKRnuxYlvMvo#rUASHp>FT(aS1edv$;HAeT4d0 z43pW}vrAU2Sg}->jQZ0f1DCc?!7nVFhuXM+yaEnwc$RVe3D^KXC@qv?1Wm8IyW0oY z%xT#MVwlCu&=kL*RON;HHG`T6`S`S76=ZX3#_ygjV|f&>o2M)P&OhC~^4+hit2cCQrYAT3P!3Bj zbk$BKe39$emymy5a&}#10U*#Zdx4#$?$4^LnJ!+uNV8QI;s8R7ze7*HCPfpU_nMn% zuKEK|>{ft64quZtiC6g*yo}zhA6yim#FAs6YyOwBsZ|~DOGlbWI@rKsk;DAm<(^8% z)rW*CJw;L6wzBk|m-oino=rby)^z>CKPZ;3Qb>2wVE!R?c&|eOQoL~A_Cp#Mu(qVp zo!`Ok;r@#q7Cskg2u9LT{4X{tJ=(Yx+$L`AM#{>{^Uym0la;Kk(`uUxHf-1sCGRQY z;pr)9@Lyva=QYh@>N&v$A^Qpda%E*D9YtmzGDTl{R@tS0fV8YuB|LD7cA;ZGD+H#i zP;bUUV^toGeJ}I^J~)G0!cL(BTZIfPF!8-P2s9UX{01m!9(Y5v*iGCN?Q*()mc^&1 z=aJDqI&U__c-WjhD~fHMT5w?tHTd;w{OS7G;E<3Hik4)0mg2!Bjp^qOV%)I(V`bfL z49{=Wh^=`4K96q;hPr7j(X1ao-U&e31JnZkhVXcJ=1X9cJUnDK9E$Qmxtk-cn#zo==zKdOiLQt`jyVh7hV`_ zpu?)%?K*P5tqj$MIm&>+_o+9TXo#3iYmx4PppIW#wUbbHXMvcY3%=MG=Wz*Kb~ZtZRS;r2p@J&=6n@!MPt5={@G7YxlktP?5RfT8JTD{ec-(?A0_Ly z1IR24zy-9++LkiUJ@WIDCqTlol9Ce4kX~#t;?<$MQcEpzc;h^OuVP~7zxF^i%;M#B zL2H55)0URz2=UpuxmADwBo9Hl6NWzlgs`^NmYc8ap;b8#cdlh(auZ*C2e#;+J*xfkhzg>;e(FfnYplM{9WZTo9d~pRrpa zAWupG&bC=d?UREK!t593Juxs$baZqmi>nch?2!sxJ|1rzoPBX@ety32!qDp3xj7D~ zR0t6C6uCSej%=Qtr`|N24ae#=to!hR3k!;gA_dxDN6}7t28Pkr({biMT5>2K0Ehcv zD~XEPn5MUdh-3qG_nv`oZ^WL7&my`#Ngq(b8W3j9-LX*+YV?72=_pZA(QLCydNON3 z*hucv(u-8-n=>V$RLLvY9t^=(DH4^^UnBUwf6)GcvT?51oR=@jsIxX z*(xAm^Kl85A}yWco~#=wvWXkwy&KW~R!f}?CIvjU4wnfVh4-((f7c5M3uC8=pAEZq z%^$+WQ@B!^nlX9ypI>sI!dpvqfBkxSuOE{jbWf)*Z|EpesKIB#DRBP-*Lh z_Wb;r$vF8H?LorYJs14J#Ure_0H(^ijwsru5$Z)alQJ_i^Fe$)F*(u` zEv9mM6W1|K8rOomX#L9hH?6?of}HwS1hxw$sAoZLp@qGD^9G-|FSQ#)Q6S0L6%axk zu!3Q{8E8GNq;#pa4_r!$Y#mEFz?^jrweAp|E3iFP(b;J-#EXF{^q9)6mGboT%$KLD z0IE0`DW(E-mx#x=ZmpdC-EPOb-C~h)LQ~N_w}tsRhrtFxqo>X+8HV`;wu~TP$f`Pj zf1moGAOjORi9kSiGVCSXPaZQvPbTs4FECyY1BPF4 zo-in$zIFN-;=Q0n&U8?rTgD)RBJW|{O)IN5-gld_bA6Ll?y@#Hg|JLnHK2n?1oCBA zybbIGCVTIOoBi&4^&DJXUAs&Fbne5AgYyY^`BKf``0)oY-KhLL82^%AeYLcs2!#K* z(|6nQZ6Z@TeRk*+T#j zG+%tO5BcFzaj|=9h{2cjrzLhvNgcIy-bMb-D_n<#61Cew=v+r3?r+(BtFV!WfroB%RUT7i{?h1NnLSD#=c-EC4bXV`gLXi;d+6DP;WlB9`+a%AoY3WJyU0)RTr(osB)c>5z1) zUcEX{=(;s|&ig~A@fxBsTo>H8?&d3UIo(ukM#L%ZHc_L(<-|lb{d?#tEF}+6ySh$% zg2$Bx{W6E-*M7@HTSVKwX?9E=*Sr30@}JvZ|Qp*r0$^BRo?%%9IjKmWS6 zmR8R}kB60o_zm3CM`A2kOp?#4UcO|$aNz>fi`QcoX}znrnNhIraBy;7f|y#-(!z@X z@dD3ne(s#f!!lUXIJmi)P~Yf6RD79F00@{v6<62YN8;p{LJfh8nEq&W_s~#?UD;Gp zg=GQQGj@C}rbS3lkPbz1l!AAe`e7(8&%zYugCPT%W4jWA8N?3~&tV1OD!OnORguiu zmTgFsMz)Qee8fM8bZ3wJL!K=S!gT~kGuVyE$w@#TBUhbpH4#Lb;1s1huz-L4211ZWvl_;jSoq7{RA9m9z=urnMoL?P3zkInpZ~;&X<`^#5nAvZ2TL)G= znax!(KNPc@W5vcJZ(Pcih2UqqIIvxQ>hlT@kWGvQse0+$W%JYDEj}D;BqT&LSHO-t zqFw16sg5_)6Wxw5AR`CD8pT9I8FWPiAh3Q7{Vi@3Qy{AoP@<_kwSTk&MauN}SDoiC zUa*8gV)Ji*-G4hK<{HS)c}%hsk{*hlPTWn-6i96b2iWAPj8 z0tYaaMkTHsSbQ(AwA3>$7{}kNJdaFd@Z{WD)B*TiHuG~2sk^L)ZmF+_07vMMrMd^A zPnn-&N>rlL;Qf;j3OfC7T1dxzevkj00QDN#=f3V;EeizniiFM&8-G@ zfLba|+>KCwZ>fKJs=U$92B~lamb`tbx)yDDR#;6sMT>V_FAaqrbAS9B2)E zmx{0Lql}EPPmx_QpLz@4smQD{TDx{_&)^_4WdPR@f@#FC-7aOj*lJ{8BdM`f|ELL!* zdDziDJ}X>ZKtvf1twbVFpcDdcz0b&q4+47ljT`iAY-|W+ieixu!2Vk6GjdmhDI9RjQ%p)_o?aNbf>)mbADwZ*^e$p#Wcj1S4;aX_j4!1r616XK8ON$sm$4WUAZ21d z$%Ee2ZTnR2EpYI*XeG#JXdLtiY=EyJF7`Cb2e#H}#6FV6r+S>tX`{H0%NW+=C&GkG z{pv?mxfK5;pbU0O3+zk`I3xZU2$-=>LL%?w9LlmPAVrW2DcXsw1dl#4F6Q=LnA=rw zww0bLgg=fXVtAI9LjiaJ+wsWMK-{zE&ljQiUpZ}H>RKO+%vwu4MIrlN^*{`K7}Qo8H_jp*r7!`f`p#wGGrspb(K1 zAm?No(;_T@ehEKMlu@t&LK8Dx$Fm{p@5PViXQznTpQfJ`8nR{=d6K`Fml!w*Zw@oH zfA=($@*kgyc?h0Gd}OAGwn5|yetxlz10txdu=id7Noyvmb5TA!=>m>%Oy%3MQ4?M% zr42WGz(LW3(-nUYeIW%23P*|p*}xE>6yKBy`yH|C!fCPoEd19iAr=IjpjuyhZC=R= zLwAAhx8y4aXc1LFRFkzV>C!I0YNvW7=!6n#zayPO(dPaYkO|hw@WKA2WMHZLSy^FN zJ$=9kRr6SMl@%5DTjpRlKY8+GH!{L=@WzcrPNxCjdm108f~wKIn;iZ4kp7`V*G7^f zBDetR-?#PWy|Vh#Rfa`vZfl#-6pE@@P5AE~Z^~^^WBnGH9Yc<7pYvwyOer%6cukqRZ>d}Q zK1q39bZnp1frAJAey!zE;Ch(%ee>bF2vZfnuY%lH8&Q@aza_WKEnd7> z7&Iv&?lVwH#Nk6&fy7DHS`(OfA@ScF_WBQQK9pz8nl1iWy_<$Es_PsuJ#G$#hJi|W z@WA}c8H1!6xE}xYjy^>k5al+X%X}Q{u9ds@T0(BdDc=M8+PJ3Nu%k%zmUQ^8KcTDl zA{FKU2mI60fGUn`1Y~_ec4A)M#3`6E4+G3RBXuNvl8;PESA*XS3<+7vEoKxxiCxX5F@gp~}+hY?7=D9(#B-H@~&^}E6ZqyEN zE)f?xaY0NkbRNjr=1JGuoJTMT(lDXhMqX`->)+7PJu>0VnougxJ?YM7DV#)lL&6^L{M(@=+ z03EoqWo(mkSrfK5Cy+?$1al4WhXIcs?x5+WJ$4lmGdg z4ilGE9X$9A2`X%hn3#HjxD3iXq#r$c1ZpI!>D=!hts|qOm4tYJL+ntj>?#3|5jxy~ zAAt61pwAj?D7az#QNGhtP%!DB$&C-S^B`^#m-@#{nE6to`WbIMFJKz%wtA;U|zcP1vn7BDqC%BPC`)du32*e!ED>koumlg z?lEakyk3xzC?B8-bF_aD|0_XJB(}y)ev`SkEE7`#bS+$cZIvJPbmY`1WK}S)BZG~? z&ObX=L5hLRhd7d;0?Av~dFWvylw6!p-|92(awOf)&#%qsBXVDf$CS41PaCbv$_b83 z6}d+Hikh_fv$hT>NMchk>8IVA#zYWW`aQHr-gD9-j_duKnrLCp^C%xl2jWoYkLnEuBlR4A0JQTSX5fGW9>h0U5sC|xFIiZ|{ zoLN;@w~XrU>7fIn6;=&pwuQ6G9tJ!BCf0VM7X9pyHh<1dL1S8n(2{A~sE`JQAnj#a zfo=M)7L@!%agIWW5UCI2?+6W0EZJI^Yw^`fJcu+L6nnTk6m`1izuSd zVl)RoX56iHBHcL`&N+u42`i*;^$y2_RfDlLN)uRm7)u(NF%T>zG46v>69kn9wSTZJ ze+y++<8r5DeiZOdc9;>C{P zXq%dxG()(GlCa`LObtRMcmjw6Kk8Xc96L_)?t62AuMce?gF-(LV@cOF(Sgc=$^G|_ zN^n{vh`=M!gRIh0=pejf#|}L8jnfM+hQ5O?0o|gTK3CpO{DtrUEQM^SZF3y02~-7q z^K^>5;crLb=ueeX;K?oIxoIjvnvzh4VgnR?h_b7V^UQ)}C z`Y5nRrYCIBAmMTswmvWdVHyf^x*4`d3gA#@qCfeoCRT0@qP1A|J;?N4lRZHRw+=woiYK!O z*0XA%oT{DMFN5tsb6Ioibw8L{vpW<0y}g$p1Cf%Pu;^4bh<9Qzs2#M2WC|?Gg3c?C zEOW@Wm*V3EL;2;>a(C?ixjE?RAtYZ&uz=e@)N|>uj1Xr_+3c@_M9b07G}0>OfA|P( zN8Fh3y?7$cd~3ZQ;34a+m=ixlqNpbpn^ikcbw93`_e0p1vg;(mP=wVr;r-VjkS^~2 z{{1RuG#+^bT$&`=zCbu9P?Qs>N{nEfob>zl?K9W&#kWTc#++fpfFXYSCLM%vil$TW zLA^lK!j1nOq88eelno4ce-C%D!+1!dFurBeCwexb+mrl9*hO?tjZ`icAUn0{8(re3 zNQglwz6jYVYLHDq;31m@Y`>oTDCj5Jh2hQ+EFZ#lq9z1otb~3BU9swDXuFV>$P)n~ zA#j-^L`O;59DQ}<8u#+euP-(pNNaG+)OL1oa68>z)WuJ0l<_@pn{M$n+2n=Sy!X~? zvrOzOu6S#Fky`e4d)YJlm^%xMHxMDUwthMPt)lO>xcYll^HQk)bJG)`oF3LPE>h+) zR&i&iqt(r*9q8QWIv_eE0#i`2iyfq?8*$5ceCxsrxxZu@z}*UhZEKJc$dpq+!SUS+uMIq(+Z2iLb18q)%phoa$yyT^eM$QXelASGwN5tr7y`u7LBy!=&_ zbsffsy97XLG&`>{)PNM@h3*2VLk`28avoFtkr1>$Iv-G`auqHfFaIvuYx21?)BNZu zCd#a_TDT#S_j7A?K{Vrrr9Epu0Hi+AZR?JH;59criQFM+%oKj5kmtaZ$@7Hkr^O%t z+9c0mA!p@wdp+7?tl5GWB2heZzkv&rwlqS%Fm{hiD9n{tUj7fw49XF{Kr^5+`itaX zg7TgnT?~+GoU+G^qj7;q{lb~4si}y1dtF@%Ywwvjb{zIYL3nL#P{e^+xRY(}kCSta~z7llFSFh^TGm^z*|D^XzDV z^Lz4SS4>O{gxhO?IwbE9H<#B`KM$f+_snp4v-2xuuG>oSK83gDPte|$1v~2gJ|CcB zvE9O(Wv`)K+l`D{bLWpJ1vWUEd%-ND}!-GvKwm0vzVzx5re-3m5F7 zWb!=UbL~?2MVP5(u8(?qF8olu%Dt7eQ=z-U4EO*s@=|$uxq5Nygfnz00iLfN3-*3^ zD3$ED9x??`|})`IYeNLxJt_h?>yQ(7z52czxSgx|%bN^C(`}^yV#eyV22wyA1!Yfpk`37X_## z1bze2ETiC)tVBHZLt&pLTLbxv_ywUEQ{CtTg|BRdl1<%TMN|xY`J<=59^4crb>B8L z4eK69bq#tV(g75&vhK)c`k0#st-Og4&$FhA%{R5|VU zoix?L1zqy^o{^Y1K+hG8)R&jEr^@q#BYqv~sqfgb@~W~$@N=0Njp$%!e~>MREsGPi zH!s<74>X*S4a~8cMP?dek$v5jewH62VZuN=H1WgS2SB27o9`<8H@Ms!Pa|n*d#|$!H@`zm|BA)2ncOTv>Hh6foNAt zqYvZf@|2bUoqovbNs&cPeE@6Rjd%mEcO}A&7K(%Tp9oy$ z@FEiMfqIa%*kKLcy13!ptL~?01U?+8sB7J6BddnKQU#&#BPKQnM6$Q-{CqF^m-$J( zeKw)U>X&h-fRL!@5*Yu9a7d~`^o!p*=gUYi5Mk0#-*TeAYZ6*PL&ou4J+)I!KM&%t z13Vm2AkjER!C1Wn=2qBNiF3Ch6P$iasrxQaJA@1)G#lhdSYNePjmG>ts2^^P>VZTx zato->3BiCfA@%~X2S_0?5Xl!o&gIupq2 zJv6&|f79&3)yLU(OODsmS!$nl)1j?gUF@gl;2lm*?m&ZlM($P_8JYTFr)#|1mDYDG zP~4fP6|s|?$2 zffBs`isMYE7f&ZrNWA7Iv=@eyqz9-3KyQ3-mPWv@rEhERS*w#-xw%>S>D~c_U8yj-5DZ8vPMFI; zm9cAUYfBhIw~FAr9LpOx&FJkAO@*IpnDTx_%;b=xw%dFZMGhh4BT*!XnG5tn>L>9# zq$Z(lxEDvGPKqy!d2H`+=+F}QZyZLt_Y&q7)$rhlbRJA&5a<${r|;DdY`9rE#={In zJyqB62p0mFmWuA?OE%|V@*rKVc>F6_Wz($C-_a0WgJ~n`9q~UwEBB4MN{d|cKbl$f zlp|=QSEv^qrhrJ#!4J_n3KX00ua>IKy$Bilz>Ni zbvCVBeYZ2|)d`*chhvrb7;Rz}gMn%tXQ{hjId0$qRNsEhRT#fl|5g4(es{1j)U7F8 z_N;ayN?B&D5oirK?0oa`@{|IO0_oU}OMI2)-ylnxOu3gY_W_Gx4sC;|sOXYmhT8}X zI62#d2w`BXi2J)&P>u!wkb)7lsJ*or=P|6D?tEe=@a4@NKdk=~ zs2hkRNYXV>bpj+@rQ)h_MhZ++6#_f4uDu7ef_071>;$19j>LaLrXVF%2OJ#)`v4Qe z+RwIS%a-`Ja3to~_bA0Ec=P||SFTl-r#QQx7@-Eq{Oib@KB3)6+6*Yt$p^ z;!;69_F&ydi}~Sp9S^9hD}j{i^j`3!S_Ma6ijy0aoueL`6r_Q-~z~ zUQUG21RGopirQbp6=z8i>tYPNcjR%<31AN|+AL~Vdl#3*6r?KjG%N$zrk)z}u2vCb z1!=5B0k~UQy2p=QzS~X`xRXl}g^Yg#D}@pV$i=q(Xv-=1%Rv&!y8mACx5y>s4Y3!3 zx`knm?j*bRTx3vC&`2pPeAXWyZ64QYDLdC$!h-r-FppjlSwZj8DMQ26nMOsIP)_U> zi%c(x1r+E;pdqGNIGo7n;=MjFz=46kirvNY^I`bU#m%T((FsoIIwDu zML1!t(cZ^}Gdc9+vCwE?P9MKZ?cAx;Di)y*4&ch(J zpr{de)Y9Jm4-+^o3EX@y@;gC9Fa*%ff{he^Tm}mSK~?ZIb%J1Lf(s0e$<*-(Vkba* zPt@4A@5U~nB6^5i&|fnD5=JTh07+Cz=Yj1g=;8l->!h0}G3^MA4l8W*%iA^PPb0OIHpac1O%qR;XG-zJMO}BOnUIPL8qERsqcgt>1qmZCwltY*!Ef!1)+h&Wn~FPh`%7E3p#qbr|hcA&y8;RR6F6 z2!XZ@!w()nFDG7ZwUGZh66124?afrjR4Xmxfw0XH>cnXvYEr@rWBF!cl*F8NR|T*Y zqFB8%u9smbi7jdN>c;vgne#M45+SJB6{?gmpC+yBj}SwPddb)tJ@!JMxjdV+W;h_ zT_kYJ2T$%(fts|H%SV6vC{&0PYDIF_il?zjk>7MeyaEW074Yrx;>_wGS$^6pA4o={^GVhe!99Okm6#FznD+|1TC1Pj&XX`VU}@=+xa)f49)urJJx zg|*FtJMAyJ#uRbyAH3eJfwFN?mEUpVan!V1x!`I*z+<+`Tp_x&w@&|gh>0eLR{xDV zcP_)>){UHlB3T(+7=bwRGw+roL8C%&%e;GA_;ec@wK*ES85b>Dw{D%{JIjW0XRm8r zknuqAs+P@!fmTK3cKZaWtd({7Fjm;Oq5qVUuukbHbg;>KOfHA8@Ek(#-|i%CC@#v% z!w<)F7~!WIg$0aQP|*)SPhI1eTMY#Yc297J^d$f`NqWw5YJHVrcuxa+3-}#dQPtdeNQ?pV9|fr zq>lz+lxhI>Ox@wv$c@fTa^)Xa6!&-B?W|tSTufQ4}%z zqZ9-o#R`Wr<%8Zh*fohw9OO40g`=ZL$w>enj*bE+MieIEr^BIsqnQBL$`8k!=X)Ok z-9CVwe0Y|YR&u+=y5as`Zvbd;!pfsxRuMr!w+)vh=UYHspvVa-mFOk}5?s-SURYF4 zOgL!eR`w9A@5Gs{t~i0dVI1QDR?zVC-&X0o5NI<;q9qR4E=w^IQ-!MxtUeQr2Kmy- zFW}>{B-Se6Ul@$wzG-~d!MAp8I95448qcs|q48%wd7_`*2>(C+P2~>XS|K374y`{Q zy=csM2{h7{h-O9QXq}Gs)3?u?D(hY-oBp;1G>}pM#{#Zb0My&z$Rsi-3ZZB*)jxjU zajMF%Pb8fY2@~Cw=PrjieQjFLC*#cgm&C-G9&{9pLV9ievURO-a04MD=g)(@As~Wk z<+1Dk5Q(#CkXsw{9T1rxCMA6-HWlF{O>fFIDWd3^y{}8VIyc)S-^`W4fB&=O*R&X? z(yyPTWDj4AwJUyf?MJ(ixLh97G?QC-KyzM7sKeSoy(p)*f38=#H+LPYv-NU9LA%^) zFmmN#{=2k|*xrn(XtswkNLt=tkb(7wgk#cd0v#Ru`REm{+hOnkVD)e8nga_^a$uj} zi|T5jty?o%>cxkgj2pkvshix)td(y5=Lu0B?f-H@6zh-22{AThK`T3p=j#mf&cqiS z7UAw|t3K>Hmgiw-dd~1MQ}hvD{mdGXwD*-pn4ui z!!U?bauRU@BW=iO7{I3nqxydOu(~>D+vjU9zg#Q%g5`wcVOF$pL9QYK>aS1r{ROP` zot9HbBGW(`PzgFgSm^=D z3-}B$Xo2Gk$XRuwTep&i5EdR94pJN@&=|&7arnh5u&7Yc`X(nQH~fKQvF&sn-;b_x z0m6m@P-r5H62OakfTxWsNqpVRiVvv2x=|66E?j&y>4T9t(?E`&R4N8wCeA~`X_7-? za!#m{yF%k+s}}gNqZ2~S~W{`^ZomoV3rPm z|3wJSLQ+$rqy~~rYU)D2IBCy+{``3b9Qp`5)u-b;nHU)Sgqn}?)6U!D6|n-lE|VXr z#DT)Q>l`2IV#1F{gv~m6x#DmlU9ao-8+LC0i4$x2`0VFr-4Ng-cKt!=2)Il4a{f#T zzwC5nQE1H@T z@1}2gxdP;6(vh}=UzQiqElS#H10eyAA3shSuSi<~j*fuS!T}Z-(sqnJr&s)us8x96 zYGP3e4rV3|DrmGqSxIT8l#wG9a0(T%`jbvO1byP-mBtfP(E*U4~iR-x=5doN(WqPWZ@ur zGawrGC#0+&qk0A|n(ZCkL*KCLAPARbf(d_>wsJDc$!$N^%rW^NZRT7ax0 zs|L`YZQY*hoj6m7nCoK)Kmfw0egfqxs8l6W;LkHi6XD-CqUj97A_B>_2NKxg>uWi2 zK;S{rW(CO}KG(v+ZmqxBZU+xp> z0HBl`PL0Kd#l;L1%2Is6V)O|lstt(2P!C8|RdE~-+W7di#t&$dI7e^tdsaEgVZ^_H z|5CY5%0@;p>h{L)gp(FZv}>{2Vwn-g0#fR;*484m^AU>?8KDQ&?JUG`o}^hGZ4NHs z-e!=}6I3Q~e8qCG1uzv6KDMe1VNwa?9jX5R{82?7>p{(lMbrbE54>9|+N4lz;?zxY z>KZxj5*h@l?}()cZA}g5&^$++hOp!NLV+VmnV8)Wjf(;aJ9ZBh4NE+i&+;lqCI_D% zU!Bn`S9R+tyiVqb61!b~@}bb*d?+1NLb&0Aw9NyC);J@~p^l}siTU(Xx3LWvDN=4i z=+u3+6wD3bx8V_CEHcXcb&+6x7^xC)!zAKB@56gfYA~K{FW5?y`@);QNpqWN|GQoQ zrTY+_NpBTWhJQo|=>+{%f+IG~TXQXOQbYi3d+1*Fzgp`=6mztelML*azTtDMw{|od zQSaIUCSuLt1h$tSodoy}BxV%qAEt5K2RRye^ook53aZ&q03UMZ1nR)8 zzl?5_th`W~4}3#BSBwSQVO@k|m4gFF=-VaHZ;lujkcq>j)bt5XCbmJKDZQlbL?~9r zOp*D3x$@h_oa_J`{NS{~eCbZt5?DsH3cqoih`4@T^YirB#Un~e^e_mpuba{7NA8Yb zE2Sgge)(l(xFuI)W=z=~Ml?aTTwgX){Ah5z*M{)F(7!Rk{$SrLVE=*Lkn}(j!zzvd zJ{(Do5eT@BvcKC~__5;eg|Qb!?kCM>NbyW8?xc7I0rM$K`zYfBaG*sZCMHv5Z-hNI6BQtGSjrTpg5@Lxxm_ zVv|D(Q4XWcB}`E(ib^Ia=i`ujOeAL=s4Qu6EaLZkXYSpP-Mf#!-{be!>=D=1^}W8| z&*%Mqzh1BB2dmeFd$}5J6>i>BSa8{?_gYs3;FuDHT5Ng*fz7BzK&}pDl;@tlWut9? z2`k>$!n$GIKJF4DgBl4JmzFkSHX)=7TAZb9U!tA2ZJTPnC@TYTyaz670Rbt5f0IBM ztQ7&Hum?A8@ha`!`uvwubj!e)jY!ipj{%jyV2`n%iED{KDNKyjq#E7M)TE(q6d>71 zEsu@m1coEYq3-Q1b49vt(b}at6|d2%RiDqy4`aq&y(q@=vkNZCuQoc4)KNW|U8}8S zp5Ut%jl*zo$nAc^SjAYrFWI6<;u$f<&ZW_=f~o8sEAkzYU-kwGe12Ei!|8Z-!JtscvP zl}jCNMF%$7M<}m_n6HobPtUQs^1Af985uNmvPPhfQ@_aLi~XcqCfJOohwGk=O&x!v zJv$2HZucJ8^CoUgBZar5LP+Qg5+?A?)TFnP5`s~tHW%>(HaXBwzf#3%X2k0#bnC^0 zAkFk)Bkf0r1b(q~c}Tl}7zsCe4f%GTx8=j#kN0O>V+w<$=zk-4ysOwFsf~6#xUkT* zfr4)^tVi{R8_YyftFy%#9#Zsg&l5sg_nq01rgTG9D4ski@#q*WGq&&r3s0)%;f1Oy zC)e55+2gWjHb5qxS7zo+UBy+2H%uNnT34SE5Z3w(5xFP z`q+s|Hd4{-epJU|H`4@bX`TLlK!C4gUG46>K_TJ$URkbokIsx85f?7D0|vA$7t$g(I&GU<~zr*z0O&KEt3+@^-iBV|UQR6ijaCW0<2Bjn= zpEMI*o$Cmm3){PPuuQV`6haU0rW}5MI5F z(ATRMk0hytIZkd)P#)fCKe5wiFg8bv^u5{lC-2ptDxY*WAJ|hlZ&Ui>$XrJ!pT#@O zFP<&*+BI&V4Do0#ex$sZ2t=JlcxP%!?cKA@TQOs2y?8rg7baoCS0vZ;6jTR8`G&Ap zE*4pUdaU^~mV5&#L+Q>(9q1k7$=^25Yt+AucLOOXG^32SA`OK1)YsH)xz52h= z4VQY(#!Z{7GX#b_t!MXFe~y=paP9>fCAkL-OZJ?mfii8?IG;=5MRJ9>v2NwxgXI{Y z3t8@-k%>u{Kd|IXuH#V{uEAxPA~M@ObB%NUwgzt(U!^0;l={7Ko9jnA{|l^gn`X_T zeWhA{$h9JgF!J{Y-m;U=?%Va| znXC7|M0#{ihkJ7yps=d(m<_E?Ky>FO>uLxDl|B zb@jU1XIWZVm`hqEZ~noc1-gnC-R<9-VH~u#_s;Hbw)}c1#q<3V|Euf$4H(J9!e5;2 z>E$pWFK&=g`$aV`J0*Ue+~`!pLE}cfTkW(J`j*pkGY^9uGV`!#8u*Wh!Vqi?vxihX6$L?Vq`=796x-WO) z4i$)^^~;C8&LP`=-EQvDCR(K*ytAyi4Xzv2-i_oPCB3I`$7 zwIztdgH0?A)u&EQ)UJP?{vl<&@D!`OChYgQ_&ZX!TRlNj6ZR+1Pjb1uvqktH61Q!+ zuh1OiN7iWI3C^$|OZyo}WVlP0E~d%YPRr-cLR02}ZrG^6NSOBGnjq05COKl}XZPG< zE?G|`gw6$7U7C!`*J;2eAV~V%+HXs7vW!iaeZHlCrN}&M>}z$UNTc#$UQO{itZuKo zgS(A>dgouM*h^S}lU=vJ$L#BIU@#gqiKz}uMI6||rS)N%mhhWJCEe*VmOJ{|!$;xc zH*3>oB`q@L_b|W5oLe9y5Nc1^Mx^dR;h8{vHMzz*N_EZ?R9?!cb-Pwhgo&)3q;h^ARxLkX9?_LCws$-XL8z@jqgb@gm zdW2sI7^OxAriFA4$FrDG#$y7vXdnlK2#=A5gK8T340tBpujUPyn~AjH1xt+#bCC_*-;tBcE=kgMH>2S9*J7 z&U5~@`;n{_r9Nf-r~c%Vkh|iT2FWIh@xt8$cZj}2r|WilMWzRJCZmrPbqnGuj_>p< z+n%|J(xQE%0_)EwPHfO|e&*%t*S)UBZQh*v!#j-L{Lpt8 zy{UI?m-B#md2RYRy)SNc^xv4O*KbBl-1>^FPt4rUSLkg|h^<&LdgQdcdj+E}u6yF$ z{C8D}kTZhxZIv0ddi5#h|3&xje-!wb^A)I}+#mJ{CH91l^_IFUS%65Jm7UT%uSL(U zXNBO6kuJzAG3$$4rnKlX=s|B+qfDR+7x7NgUc*>g8p^Z$=de2@3?IKfXoP~75wAC4 zxS1dncnW-Jn7vPEBK6iKxjDpDbn@Hi^61E*H-}D#k<(rnnmSTx+-gZc6ET=WA#lI8 z{_$M$D9o`Px7aU~Uy^y1PUFUcU7^%+ZDVEtk5xKd1|M)ikKrCAw6i2MJ>nQ-kR{;K z@jm!cHWMs5$urP1={BGQq6G{MtGvst$8v3zcPB`f!k(wusScF_0QqN>L#8W zT05kS^4qV^{u3_OPZJgu-vWl{?juV(TAHyk{;d|d&!=9CY&&Yo93#W~TC!`otXYFifnt|ruy{>Mub<6X{KFikko; zeJ8DqOwN!Cmpi!d{)eiwYj*~Yomag(cT%wSz`s;@9*~Qf$Eye z4$8!j>;F}la-{z4pQ&bN&k7O~)c?m(9sdV6Mt*&iSjNokpb6L##liEr^~P;{T%LyD z+D+ZV^sN3_rr*X#>{aTGzBBX9ffPzFAn}NLvo_vtF+$>Mh-X%?$#N~XWwQRP9bqQ@ zev7{|(qji(>=1txDcIISM&#;#6L={Rus9~*nz%|};4>ojO9sDzv)jxC%FpnYLn1E0 zz%8q zZnZso*;r~q6HAj^-yfCz*t6CCdFkPKc6SC8RQ(uS_l0rg!o0|Sn-R~L zt0ub*eYmow=*=rPvpU#U;(BIap22Aa$P6WHrunnS^xv6L_L8>}8VHeq zRd{m>G2@#ik3Mjy!h7nJDdJE{9U+ZsMyD^lJtw^v)+3Ja zal04NZ%x&mBjZC)8L8uE@@Z5OETE#6lbh<(CkVjd6k~qb*03F@w zRfte?{4p=qNU_;&B$xFhr<`#(9l?s`R7bAYZW9(aa8JCUH~+BsIM9+L0L}|IN&CQs zrw>>+HODKhN10drD20Lyvxw1*dbW2MJ%{?P=Czp#B*!&*I3LT{nNppAu1*6ZadIo^)IS*93-di*3z`<;RRY zq;%`M*lX)96A969m|G4b9QYeQqLq24LSFT%M)gPQfET&=0((p}*t)3VFP z#*gY~VW9^~-kwZ`%GAYpmu1UYq1lo=n!IOd=hF-6C&GcsKQ=GkE?7|dU*h=NLnD|VSS4AdQgQra}vLjh%|DsUa zn;QgJs5t5#0fsE*zY@rp`QU2L;W3`RXb7j!^lf|^0bJU)>%LZ`>Hw)0U6fY`G>r@n zWkNI4dx=)LjAz*wy%$|T$&YgPW}(BiYTa6s-GP-`BoPMXD3f+dbM;4VbfZqEIE2h1 znqj3dVTd*+Ir}wKU{@8}AWlcrO5?1?Iyzg~Nc|V4!SO)a^uMtPmz_TO2!<3gz!0=A zw0n(2IYib)r;gw^NkHN7B=jM|D5egeh&!J0nN=W+N@91ZyO~UaxPYbaN9YxIuGu5= z@-9x?07V!w5hI0|R<(dK%6Jv~tn*!V!6j*FmXI(umc3@s$}?=jxmRO%*WLk|Abxd3 zE*xwMp3xJ?`65u`$lRGZ93%!Jyc`W6vv}4=F_|-S<_Y^g?-{B@_RJzJX2BlG{RKsb zkHVJ=6>@&gPtkF4B4>~eOCW7PY~4hZAD%SEA0~A=%?zW%TpLCvU!`UL6@L)-Ytqp# zxki0w73&cB&~|mbr{nbi;P- z01U=k|6+@mth;dYpwlEQE5h#f;l1K+vspm_b-V=Y|)C&?5+l|If!*L0zJ^f+$+<=k`= zC=vW$lBbpWMvBU->*1|1Ga)z9iZ#XJ*(_$Ec5-J$iSkZH!dC1JRA|K*KbE5Zb1?+k zBxY!2MT?e0LNE+qXcUVQ&}NC+QfwS!T)}79{&PB2xg=t6#YUk=3o(PysvI#Rd46u4 zNvM?(J1%j&Fu%r7PMhE&Uu5I=Ju-!&KsZ+<)zvqiUmV(DB1)qSC*>BxOC+uo=o`nd zxaA}$4Ao80NYqhJVd*oblc}eOPTYxITj z-FRm#s#0^X#DNv*AVw=i7;a*wtubZ8_4SHDLdt2(bZ@zj&C?U=>mtsXLG?=s>| zt}q9(gw>Hd z<&)dLc_q0%PTZum?8$YBJ|Fz$m&*ttU=5WGzP#e`33M -std::vector concat(const std::vector& lhs, const std::vector& rhs) { +std::vector concat(std::vector const& lhs, std::vector const& rhs) { std::vector result; result.reserve(lhs.size() + rhs.size()); std::copy(lhs.begin(), lhs.end(), std::back_inserter(result)); From 87648c5b49c07a9510bb0fe1451a3d8f8eced8fd Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 4 Jul 2024 18:15:56 -0700 Subject: [PATCH 077/124] Rename rmm utilties --- cpp/examples/tpch/utils.hpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 366b26e585e..785862d1a7c 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -41,32 +41,25 @@ #include -inline auto make_unmanaged() { - return std::make_shared(); -} - -inline auto make_unmanaged_pool() { +// RMM memory resource creation utilities +inline auto make_cuda() { return std::make_shared(); } +inline auto make_pool() { return rmm::mr::make_owning_wrapper( - make_unmanaged(), rmm::percent_of_free_device_memory(50)); -} - -inline auto make_managed() { - return std::make_shared(); + make_cuda(), rmm::percent_of_free_device_memory(50)); } - +inline auto make_managed() { return std::make_shared(); } inline auto make_managed_pool() { return rmm::mr::make_owning_wrapper( make_managed(), rmm::percent_of_free_device_memory(50)); } - inline std::shared_ptr create_memory_resource( std::string const& mode) { - if (mode == "unmanaged") return make_unmanaged(); - if (mode == "unmanaged_pool") return make_unmanaged_pool(); + if (mode == "cuda") return make_cuda(); + if (mode == "pool") return make_pool(); if (mode == "managed") return make_managed(); if (mode == "managed_pool") return make_managed_pool(); CUDF_FAIL("Unknown rmm_mode parameter: " + mode + - "\nExpecting: 'unmanaged', 'unmanaged_pool', 'managed', 'managed_pool'"); + "\nExpecting: cuda, pool, managed, or managed_pool"); } /** From 479852783d155b62360a3a36c261712a95e9a9ac Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 4 Jul 2024 18:25:32 -0700 Subject: [PATCH 078/124] Fix append function --- cpp/examples/tpch/utils.hpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 785862d1a7c..b8ed7068669 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -114,18 +114,14 @@ class table_with_cols { * @param col The column to append * @param col_name The name of the appended column */ - std::unique_ptr append(std::unique_ptr& col, std::string col_name) { + std::unique_ptr append( + std::unique_ptr& col, std::string col_name) { CUDF_FUNC_RANGE(); - std::vector> updated_cols; - std::vector updated_col_names; - for (size_t i = 0; i < tbl->num_columns(); i++) { - updated_cols.push_back(std::make_unique(tbl->get_column(i))); - updated_col_names.push_back(col_names[i]); - } - updated_cols.push_back(std::move(col)); - updated_col_names.push_back(col_name); - auto updated_table = std::make_unique(std::move(updated_cols)); - return std::make_unique(std::move(updated_table), updated_col_names); + auto cols = tbl->release(); + cols.push_back(std::move(col)); + col_names.push_back(col_name); + auto appended_table = std::make_unique(std::move(cols)); + return std::make_unique(std::move(appended_table), col_names); } /** * @brief Select a subset of columns from the table From 303257291610f42c42232c1e378a4d2e35538ed1 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 4 Jul 2024 20:01:09 -0700 Subject: [PATCH 079/124] Address col id by name --- cpp/examples/tpch/q1.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 0b62fc9709d..2681bf53dbd 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -85,18 +85,21 @@ int main(int argc, char const** argv) { Timer timer; // Read out the `lineitem` table from parquet file - auto shipdate_ref = cudf::ast::column_reference(5); + std::vector lineitem_cols = { + "l_returnflag", "l_linestatus", "l_quantity", "l_extendedprice", "l_discount", "l_shipdate", "l_orderkey", "l_tax"}; + auto shipdate_ref = cudf::ast::column_reference( + std::distance(lineitem_cols.begin(), std::find(lineitem_cols.begin(), lineitem_cols.end(), "l_shipdate"))); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - auto shipdate_pred = std::make_unique( + auto lineitem_pred = std::make_unique( cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal ); auto lineitem = read_parquet( args.dataset_dir + "lineitem/part-0.parquet", - {"l_returnflag", "l_linestatus", "l_quantity", "l_extendedprice", "l_discount", "l_shipdate", "l_orderkey", "l_tax"}, - std::move(shipdate_pred) + lineitem_cols, + std::move(lineitem_pred) ); // Calculate the discount price and charge columns and append to lineitem table From 2f1defa0d5c78ffede3a41cad09bd8319f04648e Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 4 Jul 2024 20:04:27 -0700 Subject: [PATCH 080/124] Fix col id addressing --- cpp/examples/tpch/q6.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index cb86f428ddd..b47a9772203 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -64,7 +64,10 @@ int main(int argc, char const** argv) { Timer timer; // Read out the `lineitem` table from parquet file - auto shipdate_ref = cudf::ast::column_reference(2); + std::vector lineitem_cols = {"l_extendedprice", "l_discount", "l_shipdate", "l_quantity"}; + auto shipdate_ref = cudf::ast::column_reference( + std::distance(lineitem_cols.begin(), std::find(lineitem_cols.begin(), lineitem_cols.end(), "l_shipdate")) + ); auto shipdate_lower = cudf::timestamp_scalar( days_since_epoch(1994, 1, 1), true); auto shipdate_lower_literal = cudf::ast::literal(shipdate_lower); @@ -81,15 +84,15 @@ int main(int argc, char const** argv) { shipdate_ref, shipdate_upper_literal ); - auto shipdate_pred = std::make_unique( + auto lineitem_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b ); auto lineitem = read_parquet( args.dataset_dir + "lineitem/part-0.parquet", - {"l_extendedprice", "l_discount", "l_shipdate", "l_quantity"}, - std::move(shipdate_pred) + lineitem_cols, + std::move(lineitem_pred) ); // Cast the discount and quantity columns to float32 and append to lineitem table From adde65b06ce6822ec227832bb4594c47dee2eeb8 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 5 Jul 2024 09:20:52 -0700 Subject: [PATCH 081/124] Add name to col_id addressing --- cpp/examples/tpch/q5.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index a348abef23d..4e0edb8bd4a 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -80,8 +80,10 @@ int main(int argc, char const** argv) { // Read out the tables from parquet files // while pushing down column projections and filter predicates - auto o_orderdate_ref = cudf::ast::column_reference(2); - + std::vector orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; + auto o_orderdate_ref = cudf::ast::column_reference( + std::distance(orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate")) + ); auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); auto o_orderdate_pred_lower = cudf::ast::operation( @@ -89,7 +91,6 @@ int main(int argc, char const** argv) { o_orderdate_ref, o_orderdate_lower_limit ); - auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); auto o_orderdate_pred_upper = cudf::ast::operation( @@ -97,17 +98,18 @@ int main(int argc, char const** argv) { o_orderdate_ref, o_orderdate_upper_limit ); - - auto o_orderdate_pred = std::make_unique( + auto orders_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper ); - auto r_name_ref = cudf::ast::column_reference(1); + std::vector region_cols = {"r_regionkey", "r_name"}; + auto r_name_ref = cudf::ast::column_reference( + std::distance(region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name"))); auto r_name_value = cudf::string_scalar("ASIA"); auto r_name_literal = cudf::ast::literal(r_name_value); - auto r_name_pred = std::make_unique( + auto region_pred = std::make_unique( cudf::ast::ast_operator::EQUAL, r_name_ref, r_name_literal @@ -117,8 +119,8 @@ int main(int argc, char const** argv) { args.dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); auto orders = read_parquet( args.dataset_dir + "orders/part-0.parquet", - {"o_custkey", "o_orderkey", "o_orderdate"}, - std::move(o_orderdate_pred) + orders_cols, + std::move(orders_pred) ); auto lineitem = read_parquet( args.dataset_dir + "lineitem/part-0.parquet", @@ -132,8 +134,8 @@ int main(int argc, char const** argv) { ); auto region = read_parquet( args.dataset_dir + "region/part-0.parquet", - {"r_regionkey", "r_name"}, - std::move(r_name_pred) + region_cols, + std::move(region_pred) ); // Perform the joins From b805c413d312298f1df7b4af6aa254165812c9e7 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 5 Jul 2024 10:16:10 -0700 Subject: [PATCH 082/124] Add comments --- cpp/examples/tpch/q1.cpp | 4 +++- cpp/examples/tpch/q5.cpp | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 2681bf53dbd..4034caaade3 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -84,7 +84,7 @@ int main(int argc, char const** argv) { Timer timer; - // Read out the `lineitem` table from parquet file + // Define the column projections and filter predicate for `lineitem` table std::vector lineitem_cols = { "l_returnflag", "l_linestatus", "l_quantity", "l_extendedprice", "l_discount", "l_shipdate", "l_orderkey", "l_tax"}; auto shipdate_ref = cudf::ast::column_reference( @@ -96,6 +96,8 @@ int main(int argc, char const** argv) { shipdate_ref, shipdate_upper_literal ); + + // Read out the `lineitem` table from parquet file auto lineitem = read_parquet( args.dataset_dir + "lineitem/part-0.parquet", lineitem_cols, diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 4e0edb8bd4a..d8166a65f37 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -78,8 +78,7 @@ int main(int argc, char const** argv) { Timer timer; - // Read out the tables from parquet files - // while pushing down column projections and filter predicates + // Define the column projection and filter predicate for the `orders` table std::vector orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; auto o_orderdate_ref = cudf::ast::column_reference( std::distance(orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate")) @@ -104,6 +103,7 @@ int main(int argc, char const** argv) { o_orderdate_pred_upper ); + // Define the column projection and filter predicate for the `region` table std::vector region_cols = {"r_regionkey", "r_name"}; auto r_name_ref = cudf::ast::column_reference( std::distance(region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name"))); @@ -115,6 +115,8 @@ int main(int argc, char const** argv) { r_name_literal ); + // Read out the tables from parquet files + // while pushing down the column projections and filter predicates auto customer = read_parquet( args.dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); auto orders = read_parquet( From b7e25c992a2e5663d823c694c95c7a4a094125e9 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 5 Jul 2024 10:21:05 -0700 Subject: [PATCH 083/124] Remove plot.png --- cpp/examples/tpch/plot.png | Bin 32118 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 cpp/examples/tpch/plot.png diff --git a/cpp/examples/tpch/plot.png b/cpp/examples/tpch/plot.png deleted file mode 100644 index 2c1f39461494fee638ff449efbc17e2fc8fe9e4a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32118 zcmc%x2{e{ryFQHHhzyk|LnSgaJ&6VsnG%r(b0l-Akc@@Q85@vFBoSrK7$UQZQkfcz z850dcD6{Xldf&bG`(Jy%>%V{Bx7PQys#TBYxu5&KhVwkm<2cUivgQ%hrL3!2DT-Qp zKuuYjqUbXzif){l39ro3XOSOBJF6Ht>)4%gb}@Ifqz;=q+n=>_K5J#M?t-PGla<}M zonq2r+eFu$adx(Mk`WiT{m(au**TsTmsq4$fQzu$s~vNqD0Xx5hb~1S*@~h(`VJ`X z)pd;-`s8Au)4Z^F#D$(^&B`kY-IQoy#nyyi_nr#te1p9i?)qD3hN}5TQ_pLjX3Wvk z;>aj4*jo27qx@;U;iF4z4(6+BnMJ1N_7<5oQMvo7W+r51Z@6pJ&3(Qy`u0`PfYJfw z)nk2O3bcDMF}HS|>v;L(ZC_1T#E~OMY6ZM!wOXXyf1kB{mvlHHj9*SWluuf__2GdP zmcd{uRMhv{}ckE|Zn7#kwU2<5iLtnM}u3fwOUxo_@ocQv=d)dZ~8~dXr zZFCFfMr#CW=@^(ct=V<%(T}FYtB%c?#Bn*-Q*SkE7cmat{!4`MGhO_)i0I-l3i| zzboIpTOr^zDTvp_jGy{{Z_ax5;zgob&9%_b>J+U9W>!|0H|r#=+p>iXV-ye(5z$cH z)WnI~t#Rq{40``w*W-8VX^K)+t!Z-WadRCXr<39#wrlxu@zSY4T!VUNLGUQo*U_E#4+?nRg zwV3z!Z8guIr!aP~%TH3izP^-CvPK;DbiQqS!BV_QCGF3Pt2`8CV`G!=`SIgNwL^#e z4{h+%rES$NS5{8+t&C8+x^%6K?xcwbr}Ne4dwGy6RGgI>o zEu5FHIExk;h3-6EcOO%ybcQikRa;J0*8bP$SH0!eg}${uPOk0y)8#FIo7lEvN7dug z^^_uSyl5)cK}B=3zU>T_&GOZ&r|SmF7v^Uszb40Jn?GZkou3&lbQ)MzVB5}g;>3wJ zcXlrJTg-mc%Ff2->iUDXQ_iLz&4?Z;&Ci!C!>5!!VA|)D@?USoBvHSXBr!2DQ9k42 z&P@73P0aXf>w501TIol(M%e!T{hRVBEZj}C3dHNDA3Z$DTq=SOhv3ccQ*~5r<4Q|C zilbgzOavWl7UYnbl zX?D-wsAWmsap#b&>5&vk2?^21eJeP){3|0;ZoIbptS1&3>9jteIoI-_fuZ4-qI%rV zhcvxsxS(-LS<<09mlkHq7gq2O%o~*Def|2ivbA+pU0vOow!F=OOV>#v8^mp+%y53htGDsP>8wx`X+5`g)jjH?)}H;)=5cmEB>x| zOKBEgu#J&+V!{@g3Vmv5U_enfZr;=@Q1G1A?axU}T)$-L($SHgefjc^p~tnHzrANc zXsG^>uHX3R*kk(kx>&j6{rTk1MEi|qZ|zLZ%kJ&#d+i)*Gx6<%Id=Q;C+AX!iXHm) z=OhU%=ZN4Yb5wAmW?4@{dd$@%5zfo@4jO-ce#*g1kB&b%(%<_i=C@EdGv*-0FrP+Z z2#4@~<{6it9jQ$w8XD~U{QSEw{NRn1_oTgXD)nBFuZxk^3CzkJ+#kjtf}t?$EOBdj zdVWi%`>?}@1u!IZez3bjz=_q$yx&K(b17oiDIj`xj z8=u$IJlG$yn(`SNsPC)vTTD|BR#xs0!8FlNB0@V%k6YyrJquj=`j0f$-EPlmiBt8l zOZD~jW8^(oV^Qxxq!9PG)|qs8jjOx+iJ9L+u9G8zZBOjj1C)Fb#UQW`huIDWW4+MW%7_uMQK%G@6@SNXF3XVYE=hsZa7?poZv7yVm>?lJ!@oq z{5h`i;kIR(X}P@8zI{1oTX$exF-b{DMeVR;H7WBr7Jq5W_0rXfCr^st=MJ-z4nNxR z*|bwvVh1oYGiw${mp{6;X4kUv*p{7mYxx24Q zb8_HU#Lk=~wsk`IMO%Uej9*Qx7!3S+$i-`PIXF02ZOgVo^49R9X7=mJ8!{@o4PU9K zs_GX`>>_XYRkdcht|LOgyYkJS;3Mj0HKlxk`BJzx-$&kcj1T zNl8gAd3kwecJ?6TJ)a*O2+MqbZ=BLjm8iF(s;jHFZQtIgsG>Y>TC)4<|NaTJ5+7=A zqUkCqkDoJIhR@N~=NMc&yzxnoRnwU4pVO8n>8RWMh76v5B@4xmthCnl$t>COc=cs9 zE;4+2h8j0x`^>9u%Kq7W(vhm@Yj#+EaH96Hs8H?VWL7dC|2l8~X81B`iWmaL&HDJw z8pd^pH9~<-@!`fxYGEhjm7}Zv4p8@34VQSe*V^s;5wJvUTt#@xQBxQod*r>>5Xxz=~Mo9QfadADQ|x`AJrmIB*N=*%OwQ z0T?>RG%|kbEBv;6y@ow){{K7tISu=T!q241VlmPLne>}hX{YMiuJ*ya02WE;l0_A< zL2SS3|3|CszxK(0jCX$2@1gd^2wVACTw9K&$f>u{<^Zd%qrCx!#ar|7LnT+a}Rb?80v9 zG0M1P$&%$OSNd;1QQci5;O$@L_1m=ZSrGen>-U@TY?_y$1lZ^7EOGRa0iB$j+_lw` zX&olwWKAV)E4;0Vi~`hqXP^vl{=Um_XOxufYU=*syHbVjf5bj!nG$G8RUn}{e0j6V zKQOTB*)s;p%+8KoTwI(_+HsMIi3#P?(4eWCVaNq6bV-*MP+Yu+AerH^nPpyHUf0zV zEf*|EjFAY|VXS?L8fCP*l41PYhl7^Ik&!%gaSH5r4lz+cxhvniecPC6yeB@y&yQiX z#2Fu*I|q{Pr%HeST^{ z;QgaxD*(}|2OA&6CnpC*Me$Ot50ZR>hc_EpiJAs&8%9X+WVBtSl6>+kt-X-n|4ebBh}IJbWlZ`4klu z)s8vx5B$(La^&b8;k(bOs_37b`$T`;bM&Z*=Yd3R-c_quJh>vZQ{+8PHS5w| z@)})DMZI&TtE_aS1ycqJ^rwx;|_{>=Iz_J zqd!`A=9k^Lal>CqS~yo)I7|BEj#W)fP5u{NtPfYy%rYtEoY0D$-;@W;@Qbg#PZ zs}WCswLRMlZ->WBrEK56o#v!-!op%5-VA&k{|N_zQw^xQ=kw=9>t-Irua>lC zb~15#qD70ab~3%HF2bqzEi6ubtWQZOEoM?)NmsrwO>wh~^WgIF@$rUi^L_8r^w`dS z|EQv?d&_W*i;Ii-c}hb=LuCE>9&9b8yDcpS9c7+tVq;^gr0_bQf)^h)AkFj6-MbHt zJw9bQTT23%M3`{q3khtd!lELAZ7F+Xd2CxVK#{8VKByNfy1E2_e**8`U9)Vhj6bSm zl5Pf*agC9Yk&2EYr|qc4d3kxwfI96jT-XmpsTb*u6sAwkEoOI;g6_kg! z+if=B3!6l+e4}^SnZ&m(T`j@v$F!7|9fFGVwwUpi#DkICEtzg(UoaS(od1Tbr~Xd= z4j)KoKHPcBk@42AAB+`$vyfBpxbpI^qkkmvHMY;Y)n)qrnT1-pk%jtmV#DRFmpw9# zwZtyc+KfWrp4Q{?o7;M$1;S0fc76%Rx7M;Fhb#$?i7hXUWcc|+f>Cc>$f*hEqmO@`#cCT3GpQxOr7l`Yk^wXC?cexxKl zXOa*fa+sT&R|VTxThlx|JeKqGFT)y*4^b9s)OAzVrP~gq6~13BQ9J)X@r0&|vToyO zuP8>~e+A9{myhs2VY`GK$(ozC0Ej%iH!f*Rad3Q<&oS_yD6SLwsiI9H*iMSND#h?%H%8X|i~|z+yo0KEFsU3o@v#3GH-{C+5w5=w z6Ug!Z!R=uH{{00!5Ay)SMBLO4XH3)%pDUHsnbZkS4^a~dRC{E+p!e$J4ph6`Ex-+m zw20d}D#}my6?ZPhDmY#8G=K4xHxeIgce$j?av!{AKJ-l|)-odMzatVg7lMSt_m?!g zUjMts)MifQ1q1{D_MoV%s;ODr)6=s}N{Z>aT(7#aO7d8xq_p%Z{VbCgrS5-5>J=6g zML6TFKcv|^J1ZlzVkN|X<-82M7j7URbeKnr36M1Q9d#`v#N5In!S_nAbI=hceXKH5 zuPMfvKf^WsuOs6%KhLcxH4xnowrIOq1%=%lG!{bEGi}>FwoFiTRj9m1@n!doi!Vg& zJiQE&&)+cLx)Rw!J(2Ukfdj6;KC_(r^fbU}pw7IfDu9`lwd(oh6+L|qjGa>dUWg4B zx+`T{g^FUkeYaxNojd)lNw`GH(TvsD=b`4$0(-#cw1CCAN8@^9zh9#ADb?UVMlO8Y`v#pZP!_{S6!Xe`hAMv zS$Cv500hCCuNoexvKKymYMre_);w*=y)_PVS&VGl-Kh4s_{-hKm;^k>Ii${Y1P_-l z@T07G0Uqd}M+|t4DsXhDo(kQ@l4{H3+<)I!4db_WaM%lGWu)D%JXKC%{dz_`x61dA z8L0Ai9=a#PC2mrpY2(sg%jGfNsA_-yyh`$sd&~LwmLzH)>zyXpgC=v2Jrjh(emq`4 zjl#n0PLmQ>-;ojf*{+2-0a6Tt{^S%mjq>^W>wfEYc7EAlq@V|wcoJkW)7#yCoir-G za5(W3@Rn8Fj|~AvHjE=-jkM=EVlge9>H1<_E)yGttNv%((^3m)Z5j_ zoQcmTR$#>RoMi-z7JyQ8vrLR#J$j$By2PJ#8rODR&!cx32FmynFi zYOrG*Z2^IS30SKJ`PN(=pFVB3c+Px{PnxsK^UvCoJC0p8VZa?}ktP12qk|7Anx1-? zoZR=T|24R~d)cO-Gf=9om#Mz7{$O`sAA`HQd&Cd`R5jpL0{}t95WwGV@A;DDQX2PU z7@eG)BA)W`@evL%M&`VF%Y|?6H#>iAQt98%f;U!ustgWJ@i@kNXYZZ*!{@G~kTcGvMX$x^q~VV{H8$2uZMht;fW`)aOVW}Cl1W}Nl5rKNCE zh4U+gMScF;`@OolD}Q#Cv*QL@T3cCWfV;MN&wISjGA%dowCgD3IC}Ib?{0fDtB7IGI85scs8#Zoa z@X7AN+NzJ0)7i6MTl+@ChSSPY0|x&E4h8=K4lNYQOXghERSvYaw>v$LNLixy-1~8& zv5r;G42M=FO~sy(@W#EPeKl%Fj?|4Ej)yj&@@@Fl&5bYLym{V|bK=AArE8>?9FCT# ztc}=0ce4KcTe)45SQDI=7lD?izb>aS0mz3Tr`aU|=-N9v(qEq&)eKoJ=`WpC)V?V~ zJ-;YRMUe9{ZK6gaamqoEBSOx2epmYItdJCmK@WS+>G929>h5jnIJqvXfd;mDR?7__ zDciPPD#8}1xH8xBb@rJ?p~Aw#`$sbjQ;t6o1M8)`{{s73nMMDJJTcmI1NCvL;ArVi zE^canVs~PEu(4`PR!)vkM{i=JS8!fd9(;`}!9v;x%flwZfW|=HSB^wdmjcCPS%39KKR^D@Z-PWy3 z1qB5`AO@C}%3;+fODU~JaccScwt3|x#+S9Vmx6*8gH&5QXLK+bA6pzBAJ4pG37JE} z$ENBeTYP>Q-anTMtSN9ePVU9=fa!ZH{o>@f%64&mH#b_^{!8?#+D1%Ceol_oaiz3J zfaQWRGKRJ@_xJj31A9pyXLIUQAV90d*QP|q0-MefX`~=^-Hk|C_Zp=n_4XT$CDb$? zO|WR3+H*Pt0g-_0$UP_|9plA0D-}h>My|eF6nsT>K-I|8!((0K>&8-d7fcibc)x~s z?{utCB5p%4eY;YPOi(}qK*9ih64FbW(ej8je;F~Y7l^Cp@yKci4pjEOxO(us9^Kzm zXo?Il+)?C**lsq`T^TJ#sFGlH#R2iU+{cgKIkS*U>at>R3Fiv=McKonBy(^{@w_qj6%&{jO^@J7gCs%m{|GpWlBff@z$6B4QB!Kes>ltyzrxi z0~I~5tn0Qkz4W6g1G3)pe1JZTvg;WU_Uo~o-7Z1>b>)nq42-s@9?}$ z2~L$CUcY7oRsaUtm~Gc3@A2o83nGKRnuxYlvMvo#rUASHp>FT(aS1edv$;HAeT4d0 z43pW}vrAU2Sg}->jQZ0f1DCc?!7nVFhuXM+yaEnwc$RVe3D^KXC@qv?1Wm8IyW0oY z%xT#MVwlCu&=kL*RON;HHG`T6`S`S76=ZX3#_ygjV|f&>o2M)P&OhC~^4+hit2cCQrYAT3P!3Bj zbk$BKe39$emymy5a&}#10U*#Zdx4#$?$4^LnJ!+uNV8QI;s8R7ze7*HCPfpU_nMn% zuKEK|>{ft64quZtiC6g*yo}zhA6yim#FAs6YyOwBsZ|~DOGlbWI@rKsk;DAm<(^8% z)rW*CJw;L6wzBk|m-oino=rby)^z>CKPZ;3Qb>2wVE!R?c&|eOQoL~A_Cp#Mu(qVp zo!`Ok;r@#q7Cskg2u9LT{4X{tJ=(Yx+$L`AM#{>{^Uym0la;Kk(`uUxHf-1sCGRQY z;pr)9@Lyva=QYh@>N&v$A^Qpda%E*D9YtmzGDTl{R@tS0fV8YuB|LD7cA;ZGD+H#i zP;bUUV^toGeJ}I^J~)G0!cL(BTZIfPF!8-P2s9UX{01m!9(Y5v*iGCN?Q*()mc^&1 z=aJDqI&U__c-WjhD~fHMT5w?tHTd;w{OS7G;E<3Hik4)0mg2!Bjp^qOV%)I(V`bfL z49{=Wh^=`4K96q;hPr7j(X1ao-U&e31JnZkhVXcJ=1X9cJUnDK9E$Qmxtk-cn#zo==zKdOiLQt`jyVh7hV`_ zpu?)%?K*P5tqj$MIm&>+_o+9TXo#3iYmx4PppIW#wUbbHXMvcY3%=MG=Wz*Kb~ZtZRS;r2p@J&=6n@!MPt5={@G7YxlktP?5RfT8JTD{ec-(?A0_Ly z1IR24zy-9++LkiUJ@WIDCqTlol9Ce4kX~#t;?<$MQcEpzc;h^OuVP~7zxF^i%;M#B zL2H55)0URz2=UpuxmADwBo9Hl6NWzlgs`^NmYc8ap;b8#cdlh(auZ*C2e#;+J*xfkhzg>;e(FfnYplM{9WZTo9d~pRrpa zAWupG&bC=d?UREK!t593Juxs$baZqmi>nch?2!sxJ|1rzoPBX@ety32!qDp3xj7D~ zR0t6C6uCSej%=Qtr`|N24ae#=to!hR3k!;gA_dxDN6}7t28Pkr({biMT5>2K0Ehcv zD~XEPn5MUdh-3qG_nv`oZ^WL7&my`#Ngq(b8W3j9-LX*+YV?72=_pZA(QLCydNON3 z*hucv(u-8-n=>V$RLLvY9t^=(DH4^^UnBUwf6)GcvT?51oR=@jsIxX z*(xAm^Kl85A}yWco~#=wvWXkwy&KW~R!f}?CIvjU4wnfVh4-((f7c5M3uC8=pAEZq z%^$+WQ@B!^nlX9ypI>sI!dpvqfBkxSuOE{jbWf)*Z|EpesKIB#DRBP-*Lh z_Wb;r$vF8H?LorYJs14J#Ure_0H(^ijwsru5$Z)alQJ_i^Fe$)F*(u` zEv9mM6W1|K8rOomX#L9hH?6?of}HwS1hxw$sAoZLp@qGD^9G-|FSQ#)Q6S0L6%axk zu!3Q{8E8GNq;#pa4_r!$Y#mEFz?^jrweAp|E3iFP(b;J-#EXF{^q9)6mGboT%$KLD z0IE0`DW(E-mx#x=ZmpdC-EPOb-C~h)LQ~N_w}tsRhrtFxqo>X+8HV`;wu~TP$f`Pj zf1moGAOjORi9kSiGVCSXPaZQvPbTs4FECyY1BPF4 zo-in$zIFN-;=Q0n&U8?rTgD)RBJW|{O)IN5-gld_bA6Ll?y@#Hg|JLnHK2n?1oCBA zybbIGCVTIOoBi&4^&DJXUAs&Fbne5AgYyY^`BKf``0)oY-KhLL82^%AeYLcs2!#K* z(|6nQZ6Z@TeRk*+T#j zG+%tO5BcFzaj|=9h{2cjrzLhvNgcIy-bMb-D_n<#61Cew=v+r3?r+(BtFV!WfroB%RUT7i{?h1NnLSD#=c-EC4bXV`gLXi;d+6DP;WlB9`+a%AoY3WJyU0)RTr(osB)c>5z1) zUcEX{=(;s|&ig~A@fxBsTo>H8?&d3UIo(ukM#L%ZHc_L(<-|lb{d?#tEF}+6ySh$% zg2$Bx{W6E-*M7@HTSVKwX?9E=*Sr30@}JvZ|Qp*r0$^BRo?%%9IjKmWS6 zmR8R}kB60o_zm3CM`A2kOp?#4UcO|$aNz>fi`QcoX}znrnNhIraBy;7f|y#-(!z@X z@dD3ne(s#f!!lUXIJmi)P~Yf6RD79F00@{v6<62YN8;p{LJfh8nEq&W_s~#?UD;Gp zg=GQQGj@C}rbS3lkPbz1l!AAe`e7(8&%zYugCPT%W4jWA8N?3~&tV1OD!OnORguiu zmTgFsMz)Qee8fM8bZ3wJL!K=S!gT~kGuVyE$w@#TBUhbpH4#Lb;1s1huz-L4211ZWvl_;jSoq7{RA9m9z=urnMoL?P3zkInpZ~;&X<`^#5nAvZ2TL)G= znax!(KNPc@W5vcJZ(Pcih2UqqIIvxQ>hlT@kWGvQse0+$W%JYDEj}D;BqT&LSHO-t zqFw16sg5_)6Wxw5AR`CD8pT9I8FWPiAh3Q7{Vi@3Qy{AoP@<_kwSTk&MauN}SDoiC zUa*8gV)Ji*-G4hK<{HS)c}%hsk{*hlPTWn-6i96b2iWAPj8 z0tYaaMkTHsSbQ(AwA3>$7{}kNJdaFd@Z{WD)B*TiHuG~2sk^L)ZmF+_07vMMrMd^A zPnn-&N>rlL;Qf;j3OfC7T1dxzevkj00QDN#=f3V;EeizniiFM&8-G@ zfLba|+>KCwZ>fKJs=U$92B~lamb`tbx)yDDR#;6sMT>V_FAaqrbAS9B2)E zmx{0Lql}EPPmx_QpLz@4smQD{TDx{_&)^_4WdPR@f@#FC-7aOj*lJ{8BdM`f|ELL!* zdDziDJ}X>ZKtvf1twbVFpcDdcz0b&q4+47ljT`iAY-|W+ieixu!2Vk6GjdmhDI9RjQ%p)_o?aNbf>)mbADwZ*^e$p#Wcj1S4;aX_j4!1r616XK8ON$sm$4WUAZ21d z$%Ee2ZTnR2EpYI*XeG#JXdLtiY=EyJF7`Cb2e#H}#6FV6r+S>tX`{H0%NW+=C&GkG z{pv?mxfK5;pbU0O3+zk`I3xZU2$-=>LL%?w9LlmPAVrW2DcXsw1dl#4F6Q=LnA=rw zww0bLgg=fXVtAI9LjiaJ+wsWMK-{zE&ljQiUpZ}H>RKO+%vwu4MIrlN^*{`K7}Qo8H_jp*r7!`f`p#wGGrspb(K1 zAm?No(;_T@ehEKMlu@t&LK8Dx$Fm{p@5PViXQznTpQfJ`8nR{=d6K`Fml!w*Zw@oH zfA=($@*kgyc?h0Gd}OAGwn5|yetxlz10txdu=id7Noyvmb5TA!=>m>%Oy%3MQ4?M% zr42WGz(LW3(-nUYeIW%23P*|p*}xE>6yKBy`yH|C!fCPoEd19iAr=IjpjuyhZC=R= zLwAAhx8y4aXc1LFRFkzV>C!I0YNvW7=!6n#zayPO(dPaYkO|hw@WKA2WMHZLSy^FN zJ$=9kRr6SMl@%5DTjpRlKY8+GH!{L=@WzcrPNxCjdm108f~wKIn;iZ4kp7`V*G7^f zBDetR-?#PWy|Vh#Rfa`vZfl#-6pE@@P5AE~Z^~^^WBnGH9Yc<7pYvwyOer%6cukqRZ>d}Q zK1q39bZnp1frAJAey!zE;Ch(%ee>bF2vZfnuY%lH8&Q@aza_WKEnd7> z7&Iv&?lVwH#Nk6&fy7DHS`(OfA@ScF_WBQQK9pz8nl1iWy_<$Es_PsuJ#G$#hJi|W z@WA}c8H1!6xE}xYjy^>k5al+X%X}Q{u9ds@T0(BdDc=M8+PJ3Nu%k%zmUQ^8KcTDl zA{FKU2mI60fGUn`1Y~_ec4A)M#3`6E4+G3RBXuNvl8;PESA*XS3<+7vEoKxxiCxX5F@gp~}+hY?7=D9(#B-H@~&^}E6ZqyEN zE)f?xaY0NkbRNjr=1JGuoJTMT(lDXhMqX`->)+7PJu>0VnougxJ?YM7DV#)lL&6^L{M(@=+ z03EoqWo(mkSrfK5Cy+?$1al4WhXIcs?x5+WJ$4lmGdg z4ilGE9X$9A2`X%hn3#HjxD3iXq#r$c1ZpI!>D=!hts|qOm4tYJL+ntj>?#3|5jxy~ zAAt61pwAj?D7az#QNGhtP%!DB$&C-S^B`^#m-@#{nE6to`WbIMFJKz%wtA;U|zcP1vn7BDqC%BPC`)du32*e!ED>koumlg z?lEakyk3xzC?B8-bF_aD|0_XJB(}y)ev`SkEE7`#bS+$cZIvJPbmY`1WK}S)BZG~? z&ObX=L5hLRhd7d;0?Av~dFWvylw6!p-|92(awOf)&#%qsBXVDf$CS41PaCbv$_b83 z6}d+Hikh_fv$hT>NMchk>8IVA#zYWW`aQHr-gD9-j_duKnrLCp^C%xl2jWoYkLnEuBlR4A0JQTSX5fGW9>h0U5sC|xFIiZ|{ zoLN;@w~XrU>7fIn6;=&pwuQ6G9tJ!BCf0VM7X9pyHh<1dL1S8n(2{A~sE`JQAnj#a zfo=M)7L@!%agIWW5UCI2?+6W0EZJI^Yw^`fJcu+L6nnTk6m`1izuSd zVl)RoX56iHBHcL`&N+u42`i*;^$y2_RfDlLN)uRm7)u(NF%T>zG46v>69kn9wSTZJ ze+y++<8r5DeiZOdc9;>C{P zXq%dxG()(GlCa`LObtRMcmjw6Kk8Xc96L_)?t62AuMce?gF-(LV@cOF(Sgc=$^G|_ zN^n{vh`=M!gRIh0=pejf#|}L8jnfM+hQ5O?0o|gTK3CpO{DtrUEQM^SZF3y02~-7q z^K^>5;crLb=ueeX;K?oIxoIjvnvzh4VgnR?h_b7V^UQ)}C z`Y5nRrYCIBAmMTswmvWdVHyf^x*4`d3gA#@qCfeoCRT0@qP1A|J;?N4lRZHRw+=woiYK!O z*0XA%oT{DMFN5tsb6Ioibw8L{vpW<0y}g$p1Cf%Pu;^4bh<9Qzs2#M2WC|?Gg3c?C zEOW@Wm*V3EL;2;>a(C?ixjE?RAtYZ&uz=e@)N|>uj1Xr_+3c@_M9b07G}0>OfA|P( zN8Fh3y?7$cd~3ZQ;34a+m=ixlqNpbpn^ikcbw93`_e0p1vg;(mP=wVr;r-VjkS^~2 z{{1RuG#+^bT$&`=zCbu9P?Qs>N{nEfob>zl?K9W&#kWTc#++fpfFXYSCLM%vil$TW zLA^lK!j1nOq88eelno4ce-C%D!+1!dFurBeCwexb+mrl9*hO?tjZ`icAUn0{8(re3 zNQglwz6jYVYLHDq;31m@Y`>oTDCj5Jh2hQ+EFZ#lq9z1otb~3BU9swDXuFV>$P)n~ zA#j-^L`O;59DQ}<8u#+euP-(pNNaG+)OL1oa68>z)WuJ0l<_@pn{M$n+2n=Sy!X~? zvrOzOu6S#Fky`e4d)YJlm^%xMHxMDUwthMPt)lO>xcYll^HQk)bJG)`oF3LPE>h+) zR&i&iqt(r*9q8QWIv_eE0#i`2iyfq?8*$5ceCxsrxxZu@z}*UhZEKJc$dpq+!SUS+uMIq(+Z2iLb18q)%phoa$yyT^eM$QXelASGwN5tr7y`u7LBy!=&_ zbsffsy97XLG&`>{)PNM@h3*2VLk`28avoFtkr1>$Iv-G`auqHfFaIvuYx21?)BNZu zCd#a_TDT#S_j7A?K{Vrrr9Epu0Hi+AZR?JH;59criQFM+%oKj5kmtaZ$@7Hkr^O%t z+9c0mA!p@wdp+7?tl5GWB2heZzkv&rwlqS%Fm{hiD9n{tUj7fw49XF{Kr^5+`itaX zg7TgnT?~+GoU+G^qj7;q{lb~4si}y1dtF@%Ywwvjb{zIYL3nL#P{e^+xRY(}kCSta~z7llFSFh^TGm^z*|D^XzDV z^Lz4SS4>O{gxhO?IwbE9H<#B`KM$f+_snp4v-2xuuG>oSK83gDPte|$1v~2gJ|CcB zvE9O(Wv`)K+l`D{bLWpJ1vWUEd%-ND}!-GvKwm0vzVzx5re-3m5F7 zWb!=UbL~?2MVP5(u8(?qF8olu%Dt7eQ=z-U4EO*s@=|$uxq5Nygfnz00iLfN3-*3^ zD3$ED9x??`|})`IYeNLxJt_h?>yQ(7z52czxSgx|%bN^C(`}^yV#eyV22wyA1!Yfpk`37X_## z1bze2ETiC)tVBHZLt&pLTLbxv_ywUEQ{CtTg|BRdl1<%TMN|xY`J<=59^4crb>B8L z4eK69bq#tV(g75&vhK)c`k0#st-Og4&$FhA%{R5|VU zoix?L1zqy^o{^Y1K+hG8)R&jEr^@q#BYqv~sqfgb@~W~$@N=0Njp$%!e~>MREsGPi zH!s<74>X*S4a~8cMP?dek$v5jewH62VZuN=H1WgS2SB27o9`<8H@Ms!Pa|n*d#|$!H@`zm|BA)2ncOTv>Hh6foNAt zqYvZf@|2bUoqovbNs&cPeE@6Rjd%mEcO}A&7K(%Tp9oy$ z@FEiMfqIa%*kKLcy13!ptL~?01U?+8sB7J6BddnKQU#&#BPKQnM6$Q-{CqF^m-$J( zeKw)U>X&h-fRL!@5*Yu9a7d~`^o!p*=gUYi5Mk0#-*TeAYZ6*PL&ou4J+)I!KM&%t z13Vm2AkjER!C1Wn=2qBNiF3Ch6P$iasrxQaJA@1)G#lhdSYNePjmG>ts2^^P>VZTx zato->3BiCfA@%~X2S_0?5Xl!o&gIupq2 zJv6&|f79&3)yLU(OODsmS!$nl)1j?gUF@gl;2lm*?m&ZlM($P_8JYTFr)#|1mDYDG zP~4fP6|s|?$2 zffBs`isMYE7f&ZrNWA7Iv=@eyqz9-3KyQ3-mPWv@rEhERS*w#-xw%>S>D~c_U8yj-5DZ8vPMFI; zm9cAUYfBhIw~FAr9LpOx&FJkAO@*IpnDTx_%;b=xw%dFZMGhh4BT*!XnG5tn>L>9# zq$Z(lxEDvGPKqy!d2H`+=+F}QZyZLt_Y&q7)$rhlbRJA&5a<${r|;DdY`9rE#={In zJyqB62p0mFmWuA?OE%|V@*rKVc>F6_Wz($C-_a0WgJ~n`9q~UwEBB4MN{d|cKbl$f zlp|=QSEv^qrhrJ#!4J_n3KX00ua>IKy$Bilz>Ni zbvCVBeYZ2|)d`*chhvrb7;Rz}gMn%tXQ{hjId0$qRNsEhRT#fl|5g4(es{1j)U7F8 z_N;ayN?B&D5oirK?0oa`@{|IO0_oU}OMI2)-ylnxOu3gY_W_Gx4sC;|sOXYmhT8}X zI62#d2w`BXi2J)&P>u!wkb)7lsJ*or=P|6D?tEe=@a4@NKdk=~ zs2hkRNYXV>bpj+@rQ)h_MhZ++6#_f4uDu7ef_071>;$19j>LaLrXVF%2OJ#)`v4Qe z+RwIS%a-`Ja3to~_bA0Ec=P||SFTl-r#QQx7@-Eq{Oib@KB3)6+6*Yt$p^ z;!;69_F&ydi}~Sp9S^9hD}j{i^j`3!S_Ma6ijy0aoueL`6r_Q-~z~ zUQUG21RGopirQbp6=z8i>tYPNcjR%<31AN|+AL~Vdl#3*6r?KjG%N$zrk)z}u2vCb z1!=5B0k~UQy2p=QzS~X`xRXl}g^Yg#D}@pV$i=q(Xv-=1%Rv&!y8mACx5y>s4Y3!3 zx`knm?j*bRTx3vC&`2pPeAXWyZ64QYDLdC$!h-r-FppjlSwZj8DMQ26nMOsIP)_U> zi%c(x1r+E;pdqGNIGo7n;=MjFz=46kirvNY^I`bU#m%T((FsoIIwDu zML1!t(cZ^}Gdc9+vCwE?P9MKZ?cAx;Di)y*4&ch(J zpr{de)Y9Jm4-+^o3EX@y@;gC9Fa*%ff{he^Tm}mSK~?ZIb%J1Lf(s0e$<*-(Vkba* zPt@4A@5U~nB6^5i&|fnD5=JTh07+Cz=Yj1g=;8l->!h0}G3^MA4l8W*%iA^PPb0OIHpac1O%qR;XG-zJMO}BOnUIPL8qERsqcgt>1qmZCwltY*!Ef!1)+h&Wn~FPh`%7E3p#qbr|hcA&y8;RR6F6 z2!XZ@!w()nFDG7ZwUGZh66124?afrjR4Xmxfw0XH>cnXvYEr@rWBF!cl*F8NR|T*Y zqFB8%u9smbi7jdN>c;vgne#M45+SJB6{?gmpC+yBj}SwPddb)tJ@!JMxjdV+W;h_ zT_kYJ2T$%(fts|H%SV6vC{&0PYDIF_il?zjk>7MeyaEW074Yrx;>_wGS$^6pA4o={^GVhe!99Okm6#FznD+|1TC1Pj&XX`VU}@=+xa)f49)urJJx zg|*FtJMAyJ#uRbyAH3eJfwFN?mEUpVan!V1x!`I*z+<+`Tp_x&w@&|gh>0eLR{xDV zcP_)>){UHlB3T(+7=bwRGw+roL8C%&%e;GA_;ec@wK*ES85b>Dw{D%{JIjW0XRm8r zknuqAs+P@!fmTK3cKZaWtd({7Fjm;Oq5qVUuukbHbg;>KOfHA8@Ek(#-|i%CC@#v% z!w<)F7~!WIg$0aQP|*)SPhI1eTMY#Yc297J^d$f`NqWw5YJHVrcuxa+3-}#dQPtdeNQ?pV9|fr zq>lz+lxhI>Ox@wv$c@fTa^)Xa6!&-B?W|tSTufQ4}%z zqZ9-o#R`Wr<%8Zh*fohw9OO40g`=ZL$w>enj*bE+MieIEr^BIsqnQBL$`8k!=X)Ok z-9CVwe0Y|YR&u+=y5as`Zvbd;!pfsxRuMr!w+)vh=UYHspvVa-mFOk}5?s-SURYF4 zOgL!eR`w9A@5Gs{t~i0dVI1QDR?zVC-&X0o5NI<;q9qR4E=w^IQ-!MxtUeQr2Kmy- zFW}>{B-Se6Ul@$wzG-~d!MAp8I95448qcs|q48%wd7_`*2>(C+P2~>XS|K374y`{Q zy=csM2{h7{h-O9QXq}Gs)3?u?D(hY-oBp;1G>}pM#{#Zb0My&z$Rsi-3ZZB*)jxjU zajMF%Pb8fY2@~Cw=PrjieQjFLC*#cgm&C-G9&{9pLV9ievURO-a04MD=g)(@As~Wk z<+1Dk5Q(#CkXsw{9T1rxCMA6-HWlF{O>fFIDWd3^y{}8VIyc)S-^`W4fB&=O*R&X? z(yyPTWDj4AwJUyf?MJ(ixLh97G?QC-KyzM7sKeSoy(p)*f38=#H+LPYv-NU9LA%^) zFmmN#{=2k|*xrn(XtswkNLt=tkb(7wgk#cd0v#Ru`REm{+hOnkVD)e8nga_^a$uj} zi|T5jty?o%>cxkgj2pkvshix)td(y5=Lu0B?f-H@6zh-22{AThK`T3p=j#mf&cqiS z7UAw|t3K>Hmgiw-dd~1MQ}hvD{mdGXwD*-pn4ui z!!U?bauRU@BW=iO7{I3nqxydOu(~>D+vjU9zg#Q%g5`wcVOF$pL9QYK>aS1r{ROP` zot9HbBGW(`PzgFgSm^=D z3-}B$Xo2Gk$XRuwTep&i5EdR94pJN@&=|&7arnh5u&7Yc`X(nQH~fKQvF&sn-;b_x z0m6m@P-r5H62OakfTxWsNqpVRiVvv2x=|66E?j&y>4T9t(?E`&R4N8wCeA~`X_7-? za!#m{yF%k+s}}gNqZ2~S~W{`^ZomoV3rPm z|3wJSLQ+$rqy~~rYU)D2IBCy+{``3b9Qp`5)u-b;nHU)Sgqn}?)6U!D6|n-lE|VXr z#DT)Q>l`2IV#1F{gv~m6x#DmlU9ao-8+LC0i4$x2`0VFr-4Ng-cKt!=2)Il4a{f#T zzwC5nQE1H@T z@1}2gxdP;6(vh}=UzQiqElS#H10eyAA3shSuSi<~j*fuS!T}Z-(sqnJr&s)us8x96 zYGP3e4rV3|DrmGqSxIT8l#wG9a0(T%`jbvO1byP-mBtfP(E*U4~iR-x=5doN(WqPWZ@ur zGawrGC#0+&qk0A|n(ZCkL*KCLAPARbf(d_>wsJDc$!$N^%rW^NZRT7ax0 zs|L`YZQY*hoj6m7nCoK)Kmfw0egfqxs8l6W;LkHi6XD-CqUj97A_B>_2NKxg>uWi2 zK;S{rW(CO}KG(v+ZmqxBZU+xp> z0HBl`PL0Kd#l;L1%2Is6V)O|lstt(2P!C8|RdE~-+W7di#t&$dI7e^tdsaEgVZ^_H z|5CY5%0@;p>h{L)gp(FZv}>{2Vwn-g0#fR;*484m^AU>?8KDQ&?JUG`o}^hGZ4NHs z-e!=}6I3Q~e8qCG1uzv6KDMe1VNwa?9jX5R{82?7>p{(lMbrbE54>9|+N4lz;?zxY z>KZxj5*h@l?}()cZA}g5&^$++hOp!NLV+VmnV8)Wjf(;aJ9ZBh4NE+i&+;lqCI_D% zU!Bn`S9R+tyiVqb61!b~@}bb*d?+1NLb&0Aw9NyC);J@~p^l}siTU(Xx3LWvDN=4i z=+u3+6wD3bx8V_CEHcXcb&+6x7^xC)!zAKB@56gfYA~K{FW5?y`@);QNpqWN|GQoQ zrTY+_NpBTWhJQo|=>+{%f+IG~TXQXOQbYi3d+1*Fzgp`=6mztelML*azTtDMw{|od zQSaIUCSuLt1h$tSodoy}BxV%qAEt5K2RRye^ook53aZ&q03UMZ1nR)8 zzl?5_th`W~4}3#BSBwSQVO@k|m4gFF=-VaHZ;lujkcq>j)bt5XCbmJKDZQlbL?~9r zOp*D3x$@h_oa_J`{NS{~eCbZt5?DsH3cqoih`4@T^YirB#Un~e^e_mpuba{7NA8Yb zE2Sgge)(l(xFuI)W=z=~Ml?aTTwgX){Ah5z*M{)F(7!Rk{$SrLVE=*Lkn}(j!zzvd zJ{(Do5eT@BvcKC~__5;eg|Qb!?kCM>NbyW8?xc7I0rM$K`zYfBaG*sZCMHv5Z-hNI6BQtGSjrTpg5@Lxxm_ zVv|D(Q4XWcB}`E(ib^Ia=i`ujOeAL=s4Qu6EaLZkXYSpP-Mf#!-{be!>=D=1^}W8| z&*%Mqzh1BB2dmeFd$}5J6>i>BSa8{?_gYs3;FuDHT5Ng*fz7BzK&}pDl;@tlWut9? z2`k>$!n$GIKJF4DgBl4JmzFkSHX)=7TAZb9U!tA2ZJTPnC@TYTyaz670Rbt5f0IBM ztQ7&Hum?A8@ha`!`uvwubj!e)jY!ipj{%jyV2`n%iED{KDNKyjq#E7M)TE(q6d>71 zEsu@m1coEYq3-Q1b49vt(b}at6|d2%RiDqy4`aq&y(q@=vkNZCuQoc4)KNW|U8}8S zp5Ut%jl*zo$nAc^SjAYrFWI6<;u$f<&ZW_=f~o8sEAkzYU-kwGe12Ei!|8Z-!JtscvP zl}jCNMF%$7M<}m_n6HobPtUQs^1Af985uNmvPPhfQ@_aLi~XcqCfJOohwGk=O&x!v zJv$2HZucJ8^CoUgBZar5LP+Qg5+?A?)TFnP5`s~tHW%>(HaXBwzf#3%X2k0#bnC^0 zAkFk)Bkf0r1b(q~c}Tl}7zsCe4f%GTx8=j#kN0O>V+w<$=zk-4ysOwFsf~6#xUkT* zfr4)^tVi{R8_YyftFy%#9#Zsg&l5sg_nq01rgTG9D4ski@#q*WGq&&r3s0)%;f1Oy zC)e55+2gWjHb5qxS7zo+UBy+2H%uNnT34SE5Z3w(5xFP z`q+s|Hd4{-epJU|H`4@bX`TLlK!C4gUG46>K_TJ$URkbokIsx85f?7D0|vA$7t$g(I&GU<~zr*z0O&KEt3+@^-iBV|UQR6ijaCW0<2Bjn= zpEMI*o$Cmm3){PPuuQV`6haU0rW}5MI5F z(ATRMk0hytIZkd)P#)fCKe5wiFg8bv^u5{lC-2ptDxY*WAJ|hlZ&Ui>$XrJ!pT#@O zFP<&*+BI&V4Do0#ex$sZ2t=JlcxP%!?cKA@TQOs2y?8rg7baoCS0vZ;6jTR8`G&Ap zE*4pUdaU^~mV5&#L+Q>(9q1k7$=^25Yt+AucLOOXG^32SA`OK1)YsH)xz52h= z4VQY(#!Z{7GX#b_t!MXFe~y=paP9>fCAkL-OZJ?mfii8?IG;=5MRJ9>v2NwxgXI{Y z3t8@-k%>u{Kd|IXuH#V{uEAxPA~M@ObB%NUwgzt(U!^0;l={7Ko9jnA{|l^gn`X_T zeWhA{$h9JgF!J{Y-m;U=?%Va| znXC7|M0#{ihkJ7yps=d(m<_E?Ky>FO>uLxDl|B zb@jU1XIWZVm`hqEZ~noc1-gnC-R<9-VH~u#_s;Hbw)}c1#q<3V|Euf$4H(J9!e5;2 z>E$pWFK&=g`$aV`J0*Ue+~`!pLE}cfTkW(J`j*pkGY^9uGV`!#8u*Wh!Vqi?vxihX6$L?Vq`=796x-WO) z4i$)^^~;C8&LP`=-EQvDCR(K*ytAyi4Xzv2-i_oPCB3I`$7 zwIztdgH0?A)u&EQ)UJP?{vl<&@D!`OChYgQ_&ZX!TRlNj6ZR+1Pjb1uvqktH61Q!+ zuh1OiN7iWI3C^$|OZyo}WVlP0E~d%YPRr-cLR02}ZrG^6NSOBGnjq05COKl}XZPG< zE?G|`gw6$7U7C!`*J;2eAV~V%+HXs7vW!iaeZHlCrN}&M>}z$UNTc#$UQO{itZuKo zgS(A>dgouM*h^S}lU=vJ$L#BIU@#gqiKz}uMI6||rS)N%mhhWJCEe*VmOJ{|!$;xc zH*3>oB`q@L_b|W5oLe9y5Nc1^Mx^dR;h8{vHMzz*N_EZ?R9?!cb-Pwhgo&)3q;h^ARxLkX9?_LCws$-XL8z@jqgb@gm zdW2sI7^OxAriFA4$FrDG#$y7vXdnlK2#=A5gK8T340tBpujUPyn~AjH1xt+#bCC_*-;tBcE=kgMH>2S9*J7 z&U5~@`;n{_r9Nf-r~c%Vkh|iT2FWIh@xt8$cZj}2r|WilMWzRJCZmrPbqnGuj_>p< z+n%|J(xQE%0_)EwPHfO|e&*%t*S)UBZQh*v!#j-L{Lpt8 zy{UI?m-B#md2RYRy)SNc^xv4O*KbBl-1>^FPt4rUSLkg|h^<&LdgQdcdj+E}u6yF$ z{C8D}kTZhxZIv0ddi5#h|3&xje-!wb^A)I}+#mJ{CH91l^_IFUS%65Jm7UT%uSL(U zXNBO6kuJzAG3$$4rnKlX=s|B+qfDR+7x7NgUc*>g8p^Z$=de2@3?IKfXoP~75wAC4 zxS1dncnW-Jn7vPEBK6iKxjDpDbn@Hi^61E*H-}D#k<(rnnmSTx+-gZc6ET=WA#lI8 z{_$M$D9o`Px7aU~Uy^y1PUFUcU7^%+ZDVEtk5xKd1|M)ikKrCAw6i2MJ>nQ-kR{;K z@jm!cHWMs5$urP1={BGQq6G{MtGvst$8v3zcPB`f!k(wusScF_0QqN>L#8W zT05kS^4qV^{u3_OPZJgu-vWl{?juV(TAHyk{;d|d&!=9CY&&Yo93#W~TC!`otXYFifnt|ruy{>Mub<6X{KFikko; zeJ8DqOwN!Cmpi!d{)eiwYj*~Yomag(cT%wSz`s;@9*~Qf$Eye z4$8!j>;F}la-{z4pQ&bN&k7O~)c?m(9sdV6Mt*&iSjNokpb6L##liEr^~P;{T%LyD z+D+ZV^sN3_rr*X#>{aTGzBBX9ffPzFAn}NLvo_vtF+$>Mh-X%?$#N~XWwQRP9bqQ@ zev7{|(qji(>=1txDcIISM&#;#6L={Rus9~*nz%|};4>ojO9sDzv)jxC%FpnYLn1E0 zz%8q zZnZso*;r~q6HAj^-yfCz*t6CCdFkPKc6SC8RQ(uS_l0rg!o0|Sn-R~L zt0ub*eYmow=*=rPvpU#U;(BIap22Aa$P6WHrunnS^xv6L_L8>}8VHeq zRd{m>G2@#ik3Mjy!h7nJDdJE{9U+ZsMyD^lJtw^v)+3Ja zal04NZ%x&mBjZC)8L8uE@@Z5OETE#6lbh<(CkVjd6k~qb*03F@w zRfte?{4p=qNU_;&B$xFhr<`#(9l?s`R7bAYZW9(aa8JCUH~+BsIM9+L0L}|IN&CQs zrw>>+HODKhN10drD20Lyvxw1*dbW2MJ%{?P=Czp#B*!&*I3LT{nNppAu1*6ZadIo^)IS*93-di*3z`<;RRY zq;%`M*lX)96A969m|G4b9QYeQqLq24LSFT%M)gPQfET&=0((p}*t)3VFP z#*gY~VW9^~-kwZ`%GAYpmu1UYq1lo=n!IOd=hF-6C&GcsKQ=GkE?7|dU*h=NLnD|VSS4AdQgQra}vLjh%|DsUa zn;QgJs5t5#0fsE*zY@rp`QU2L;W3`RXb7j!^lf|^0bJU)>%LZ`>Hw)0U6fY`G>r@n zWkNI4dx=)LjAz*wy%$|T$&YgPW}(BiYTa6s-GP-`BoPMXD3f+dbM;4VbfZqEIE2h1 znqj3dVTd*+Ir}wKU{@8}AWlcrO5?1?Iyzg~Nc|V4!SO)a^uMtPmz_TO2!<3gz!0=A zw0n(2IYib)r;gw^NkHN7B=jM|D5egeh&!J0nN=W+N@91ZyO~UaxPYbaN9YxIuGu5= z@-9x?07V!w5hI0|R<(dK%6Jv~tn*!V!6j*FmXI(umc3@s$}?=jxmRO%*WLk|Abxd3 zE*xwMp3xJ?`65u`$lRGZ93%!Jyc`W6vv}4=F_|-S<_Y^g?-{B@_RJzJX2BlG{RKsb zkHVJ=6>@&gPtkF4B4>~eOCW7PY~4hZAD%SEA0~A=%?zW%TpLCvU!`UL6@L)-Ytqp# zxki0w73&cB&~|mbr{nbi;P- z01U=k|6+@mth;dYpwlEQE5h#f;l1K+vspm_b-V=Y|)C&?5+l|If!*L0zJ^f+$+<=k`= zC=vW$lBbpWMvBU->*1|1Ga)z9iZ#XJ*(_$Ec5-J$iSkZH!dC1JRA|K*KbE5Zb1?+k zBxY!2MT?e0LNE+qXcUVQ&}NC+QfwS!T)}79{&PB2xg=t6#YUk=3o(PysvI#Rd46u4 zNvM?(J1%j&Fu%r7PMhE&Uu5I=Ju-!&KsZ+<)zvqiUmV(DB1)qSC*>BxOC+uo=o`nd zxaA}$4Ao80NYqhJVd*oblc}eOPTYxITj z-FRm#s#0^X#DNv*AVw=i7;a*wtubZ8_4SHDLdt2(bZ@zj&C?U=>mtsXLG?=s>| zt}q9(gw>Hd z<&)dLc_q0%PTZum?8$YBJ|Fz$m&*ttU=5WGzP#e`33M Date: Fri, 5 Jul 2024 12:03:27 -0700 Subject: [PATCH 084/124] Fix the calc functions --- cpp/examples/tpch/q1.cpp | 27 +++++++++++++----------- cpp/examples/tpch/q5.cpp | 24 +++++++++++---------- cpp/examples/tpch/q6.cpp | 13 ++++++------ cpp/examples/tpch/q9.cpp | 42 ++++++++++++++++++++++--------------- cpp/examples/tpch/utils.hpp | 11 +++------- 5 files changed, 63 insertions(+), 54 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 4034caaade3..451f92488da 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -46,32 +46,33 @@ order by l_linestatus; */ -std::unique_ptr calc_disc_price(std::unique_ptr& table) { +std::unique_ptr calc_disc_price( + cudf::column_view discount, cudf::column_view extendedprice) { auto one = cudf::fixed_point_scalar(1); - auto discount = table->column("l_discount"); - auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); - auto extended_price = table->column("l_extendedprice"); + auto one_minus_discount = cudf::binary_operation( + one, discount, cudf::binary_operator::SUB, discount.type()); auto disc_price_scale = cudf::binary_operation_fixed_point_scale( cudf::binary_operator::MUL, - table->column_type("l_extendedprice").scale(), + extendedprice.type().scale(), one_minus_discount->type().scale() ); auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, disc_price_scale}; - auto disc_price = cudf::binary_operation(extended_price, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type); + auto disc_price = cudf::binary_operation( + extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type); return disc_price; } -std::unique_ptr calc_charge(std::unique_ptr& table, std::unique_ptr& disc_price) { +std::unique_ptr calc_charge(cudf::column_view tax, cudf::column_view disc_price) { auto one = cudf::fixed_point_scalar(1); - auto tax = table->column("l_tax"); auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); auto charge_scale = cudf::binary_operation_fixed_point_scale( cudf::binary_operator::MUL, - disc_price->type().scale(), + disc_price.type().scale(), one_plus_tax->type().scale() ); auto charge_type = cudf::data_type{cudf::type_id::DECIMAL64, charge_scale}; - auto charge = cudf::binary_operation(disc_price->view(), one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); + auto charge = cudf::binary_operation( + disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); return charge; } @@ -105,8 +106,10 @@ int main(int argc, char const** argv) { ); // Calculate the discount price and charge columns and append to lineitem table - auto disc_price = calc_disc_price(lineitem); - auto charge = calc_charge(lineitem, disc_price); + auto disc_price = calc_disc_price( + lineitem->column("l_discount"), lineitem->column("l_extendedprice")); + auto charge = calc_charge( + lineitem->column("l_tax"), disc_price->view()); auto appended_table = lineitem->append(disc_price, "disc_price")->append(charge, "charge"); // Perform the group by operation diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index d8166a65f37..047f877f5e9 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -54,19 +54,20 @@ order by revenue desc; */ -std::unique_ptr calc_revenue(std::unique_ptr& table) { +std::unique_ptr calc_revenue( + cudf::column_view extendedprice, cudf::column_view discount) { auto one = cudf::fixed_point_scalar(1, -2); - auto disc = table->column("l_discount"); - auto one_minus_disc = cudf::binary_operation(one, disc, cudf::binary_operator::SUB, disc.type()); - auto extended_price = table->column("l_extendedprice"); - auto disc_price_scale = cudf::binary_operation_fixed_point_scale( + auto one_minus_discount = cudf::binary_operation( + one, discount, cudf::binary_operator::SUB, discount.type()); + auto revenue_scale = cudf::binary_operation_fixed_point_scale( cudf::binary_operator::MUL, - table->column_type("l_extendedprice").scale(), - one_minus_disc->type().scale() + extendedprice.type().scale(), + one_minus_discount->type().scale() ); - auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, disc_price_scale}; - auto disc_price = cudf::binary_operation(extended_price, one_minus_disc->view(), cudf::binary_operator::MUL, disc_price_type); - return disc_price; + auto revenue_type = cudf::data_type{cudf::type_id::DECIMAL64, revenue_scale}; + auto revenue = cudf::binary_operation( + extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, revenue_type); + return revenue; } int main(int argc, char const** argv) { @@ -173,7 +174,8 @@ int main(int argc, char const** argv) { ); // Calculate and append the `revenue` column - auto revenue = calc_revenue(joined_table); + auto revenue = calc_revenue( + joined_table->column("l_extendedprice"), joined_table->column("l_discount")); auto appended_table = joined_table->append(revenue, "revenue"); // Perform the groupby operation diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index b47a9772203..43fdf2aeb6f 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -35,13 +35,12 @@ where and l_quantity < 24; */ -std::unique_ptr calc_revenue(std::unique_ptr& table) { - auto extendedprice = table->column("l_extendedprice"); - auto discount = table->column("l_discount"); +std::unique_ptr calc_revenue( + cudf::column_view extendedprice, cudf::column_view discount) { auto revenue_scale = cudf::binary_operation_fixed_point_scale( cudf::binary_operator::MUL, - table->column_type("l_extendedprice").scale(), - table->column_type("l_discount").scale() + extendedprice.type().scale(), + discount.type().scale() ); auto revenue_type = cudf::data_type{ cudf::type_id::DECIMAL64, revenue_scale}; @@ -145,7 +144,9 @@ int main(int argc, char const** argv) { auto filtered_table = apply_filter(appended_table, discount_quantity_pred); // Calculate the `revenue` column - auto revenue = calc_revenue(filtered_table); + auto revenue = calc_revenue( + filtered_table->column("l_extendedprice"), + filtered_table->column("l_discount")); // Sum the `revenue` column auto revenue_view = revenue->view(); diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 766038a67d4..1351e5e808b 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -64,28 +64,31 @@ order by o_year desc; */ -std::unique_ptr calc_amount(std::unique_ptr& table) { +std::unique_ptr calc_amount( + cudf::column_view discount, cudf::column_view extendedprice, + cudf::column_view supplycost, cudf::column_view quantity) { auto one = cudf::fixed_point_scalar(1); - auto discount = table->column("l_discount"); - auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); - auto extended_price = table->column("l_extendedprice"); - auto extended_price_discounted_scale = cudf::binary_operation_fixed_point_scale( + auto one_minus_discount = cudf::binary_operation( + one, discount, cudf::binary_operator::SUB, discount.type()); + auto extendedprice_discounted_scale = cudf::binary_operation_fixed_point_scale( cudf::binary_operator::MUL, - table->column_type("l_extendedprice").scale(), + extendedprice.type().scale(), one_minus_discount->type().scale() ); - auto extended_price_discounted_type = cudf::data_type{cudf::type_id::DECIMAL64, extended_price_discounted_scale}; - auto extended_price_discounted = cudf::binary_operation(extended_price, one_minus_discount->view(), cudf::binary_operator::MUL, extended_price_discounted_type); - auto supply_cost = table->column("ps_supplycost"); - auto quantity = table->column("l_quantity"); - auto supply_cost_quantity_scale = cudf::binary_operation_fixed_point_scale( + auto extendedprice_discounted_type = cudf::data_type{ + cudf::type_id::DECIMAL64, extendedprice_discounted_scale}; + auto extendedprice_discounted = cudf::binary_operation( + extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, extendedprice_discounted_type); + auto supplycost_quantity_scale = cudf::binary_operation_fixed_point_scale( cudf::binary_operator::MUL, - table->column_type("ps_supplycost").scale(), - table->column_type("l_quantity").scale() + supplycost.type().scale(), + quantity.type().scale() ); - auto supply_cost_quantity_type = cudf::data_type{cudf::type_id::DECIMAL64, supply_cost_quantity_scale}; - auto supply_cost_quantity = cudf::binary_operation(supply_cost, quantity, cudf::binary_operator::MUL, supply_cost_quantity_type); - auto amount = cudf::binary_operation(extended_price_discounted->view(), supply_cost_quantity->view(), cudf::binary_operator::SUB, extended_price_discounted->type()); + auto supplycost_quantity_type = cudf::data_type{cudf::type_id::DECIMAL64, supplycost_quantity_scale}; + auto supplycost_quantity = cudf::binary_operation( + supplycost, quantity, cudf::binary_operator::MUL, supplycost_quantity_type); + auto amount = cudf::binary_operation( + extendedprice_discounted->view(), supplycost_quantity->view(), cudf::binary_operator::SUB, extendedprice_discounted->type()); return amount; } @@ -151,7 +154,12 @@ int main(int argc, char const** argv) { // Calculate the `nation`, `o_year`, and `amount` columns auto n_name = std::make_unique(joined_table->column("n_name")); auto o_year = cudf::datetime::extract_year(joined_table->column("o_orderdate")); - auto amount = calc_amount(joined_table); + auto amount = calc_amount( + joined_table->column("l_discount"), + joined_table->column("l_extendedprice"), + joined_table->column("ps_supplycost"), + joined_table->column("l_quantity") + ); // Put together the `profit` table std::vector> profit_columns; diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index b8ed7068669..a0b538c3358 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -70,6 +70,9 @@ class table_with_cols { table_with_cols( std::unique_ptr tbl, std::vector col_names) : tbl(std::move(tbl)), col_names(col_names) {} + /** + * @brief Return the table view + */ cudf::table_view table() { return tbl->view(); } @@ -81,14 +84,6 @@ class table_with_cols { cudf::column_view column(std::string col_name) { return tbl->view().column(col_id(col_name)); } - /** - * @brief Return the data type of a column - * - * @param col_name The name of the column - */ - cudf::data_type column_type(std::string col_name) { - return tbl->view().column(col_id(col_name)).type(); - } /** * @param Return the column names of the table */ From 4ae4538209e718b8d3eead0a6d7b6407c88dbbc8 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 5 Jul 2024 14:16:56 -0700 Subject: [PATCH 085/124] Run clang-format --- cpp/examples/tpch/q1.cpp | 210 ++++++------- cpp/examples/tpch/q5.cpp | 237 ++++++--------- cpp/examples/tpch/q6.cpp | 214 ++++++------- cpp/examples/tpch/q9.cpp | 234 +++++++-------- cpp/examples/tpch/utils.hpp | 579 ++++++++++++++++++------------------ 5 files changed, 667 insertions(+), 807 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 451f92488da..12aaa7c437b 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "utils.hpp" + #include #include #include -#include "utils.hpp" - /* create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; @@ -46,121 +46,101 @@ order by l_linestatus; */ -std::unique_ptr calc_disc_price( - cudf::column_view discount, cudf::column_view extendedprice) { - auto one = cudf::fixed_point_scalar(1); - auto one_minus_discount = cudf::binary_operation( - one, discount, cudf::binary_operator::SUB, discount.type()); - auto disc_price_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, - extendedprice.type().scale(), - one_minus_discount->type().scale() - ); - auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, disc_price_scale}; - auto disc_price = cudf::binary_operation( - extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type); - return disc_price; +std::unique_ptr calc_disc_price(cudf::column_view discount, + cudf::column_view extendedprice) +{ + auto one = cudf::fixed_point_scalar(1); + auto one_minus_discount = + cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); + auto disc_price_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, extendedprice.type().scale(), one_minus_discount->type().scale()); + auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, disc_price_scale}; + auto disc_price = cudf::binary_operation( + extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type); + return disc_price; } -std::unique_ptr calc_charge(cudf::column_view tax, cudf::column_view disc_price) { - auto one = cudf::fixed_point_scalar(1); - auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); - auto charge_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, - disc_price.type().scale(), - one_plus_tax->type().scale() - ); - auto charge_type = cudf::data_type{cudf::type_id::DECIMAL64, charge_scale}; - auto charge = cudf::binary_operation( - disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); - return charge; +std::unique_ptr calc_charge(cudf::column_view tax, cudf::column_view disc_price) +{ + auto one = cudf::fixed_point_scalar(1); + auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); + auto charge_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, disc_price.type().scale(), one_plus_tax->type().scale()); + auto charge_type = cudf::data_type{cudf::type_id::DECIMAL64, charge_scale}; + auto charge = cudf::binary_operation( + disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); + return charge; } -int main(int argc, char const** argv) { - auto args = parse_args(argc, argv); - - // Use a memory pool - auto resource = create_memory_resource(args.memory_resource_type); - rmm::mr::set_current_device_resource(resource.get()); - - Timer timer; - - // Define the column projections and filter predicate for `lineitem` table - std::vector lineitem_cols = { - "l_returnflag", "l_linestatus", "l_quantity", "l_extendedprice", "l_discount", "l_shipdate", "l_orderkey", "l_tax"}; - auto shipdate_ref = cudf::ast::column_reference( - std::distance(lineitem_cols.begin(), std::find(lineitem_cols.begin(), lineitem_cols.end(), "l_shipdate"))); - auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); - auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - auto lineitem_pred = std::make_unique( - cudf::ast::ast_operator::LESS_EQUAL, - shipdate_ref, - shipdate_upper_literal - ); - - // Read out the `lineitem` table from parquet file - auto lineitem = read_parquet( - args.dataset_dir + "lineitem/part-0.parquet", - lineitem_cols, - std::move(lineitem_pred) - ); - - // Calculate the discount price and charge columns and append to lineitem table - auto disc_price = calc_disc_price( - lineitem->column("l_discount"), lineitem->column("l_extendedprice")); - auto charge = calc_charge( - lineitem->column("l_tax"), disc_price->view()); - auto appended_table = lineitem->append(disc_price, "disc_price")->append(charge, "charge"); - - // Perform the group by operation - auto groupedby_table = apply_groupby( - appended_table, - groupby_context_t { - {"l_returnflag", "l_linestatus"}, - { - { - "l_extendedprice", - { - {cudf::aggregation::Kind::SUM, "sum_base_price"}, - {cudf::aggregation::Kind::MEAN, "avg_price"} - } - }, - { - "l_quantity", - { - {cudf::aggregation::Kind::SUM, "sum_qty"}, - {cudf::aggregation::Kind::MEAN, "avg_qty"} - } - }, - { - "l_discount", - { - {cudf::aggregation::Kind::MEAN, "avg_disc"}, - } - }, - { - "disc_price", - { - {cudf::aggregation::Kind::SUM, "sum_disc_price"}, - } - }, - { - "charge", - { - {cudf::aggregation::Kind::SUM, "sum_charge"}, - {cudf::aggregation::Kind::COUNT_ALL, "count_order"} - } - }, - } - } - ); - - // Perform the order by operation - auto orderedby_table = apply_orderby(groupedby_table, {"l_returnflag", "l_linestatus"}, {cudf::order::ASCENDING, cudf::order::ASCENDING}); - - timer.print_elapsed_millis(); - - // Write query result to a parquet file - orderedby_table->to_parquet("q1.parquet"); - return 0; +int main(int argc, char const** argv) +{ + auto args = parse_args(argc, argv); + + // Use a memory pool + auto resource = create_memory_resource(args.memory_resource_type); + rmm::mr::set_current_device_resource(resource.get()); + + Timer timer; + + // Define the column projections and filter predicate for `lineitem` table + std::vector lineitem_cols = {"l_returnflag", + "l_linestatus", + "l_quantity", + "l_extendedprice", + "l_discount", + "l_shipdate", + "l_orderkey", + "l_tax"}; + auto shipdate_ref = cudf::ast::column_reference(std::distance( + lineitem_cols.begin(), std::find(lineitem_cols.begin(), lineitem_cols.end(), "l_shipdate"))); + auto shipdate_upper = + cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); + auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); + auto lineitem_pred = std::make_unique( + cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal); + + // Read out the `lineitem` table from parquet file + auto lineitem = read_parquet( + args.dataset_dir + "lineitem/part-0.parquet", lineitem_cols, std::move(lineitem_pred)); + + // Calculate the discount price and charge columns and append to lineitem table + auto disc_price = + calc_disc_price(lineitem->column("l_discount"), lineitem->column("l_extendedprice")); + auto charge = calc_charge(lineitem->column("l_tax"), disc_price->view()); + auto appended_table = lineitem->append(disc_price, "disc_price")->append(charge, "charge"); + + // Perform the group by operation + auto groupedby_table = apply_groupby( + appended_table, + groupby_context_t{ + {"l_returnflag", "l_linestatus"}, + { + {"l_extendedprice", + {{cudf::aggregation::Kind::SUM, "sum_base_price"}, + {cudf::aggregation::Kind::MEAN, "avg_price"}}}, + {"l_quantity", + {{cudf::aggregation::Kind::SUM, "sum_qty"}, {cudf::aggregation::Kind::MEAN, "avg_qty"}}}, + {"l_discount", + { + {cudf::aggregation::Kind::MEAN, "avg_disc"}, + }}, + {"disc_price", + { + {cudf::aggregation::Kind::SUM, "sum_disc_price"}, + }}, + {"charge", + {{cudf::aggregation::Kind::SUM, "sum_charge"}, + {cudf::aggregation::Kind::COUNT_ALL, "count_order"}}}, + }}); + + // Perform the order by operation + auto orderedby_table = apply_orderby(groupedby_table, + {"l_returnflag", "l_linestatus"}, + {cudf::order::ASCENDING, cudf::order::ASCENDING}); + + timer.print_elapsed_millis(); + + // Write query result to a parquet file + orderedby_table->to_parquet("q1.parquet"); + return 0; } diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 047f877f5e9..a6ee2663ad3 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "utils.hpp" + #include #include #include -#include "utils.hpp" - /* create view customer as select * from '~/tpch_sf1/customer/part-0.parquet'; create view orders as select * from '~/tpch_sf1/orders/part-0.parquet'; @@ -54,147 +54,98 @@ order by revenue desc; */ -std::unique_ptr calc_revenue( - cudf::column_view extendedprice, cudf::column_view discount) { - auto one = cudf::fixed_point_scalar(1, -2); - auto one_minus_discount = cudf::binary_operation( - one, discount, cudf::binary_operator::SUB, discount.type()); - auto revenue_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, - extendedprice.type().scale(), - one_minus_discount->type().scale() - ); - auto revenue_type = cudf::data_type{cudf::type_id::DECIMAL64, revenue_scale}; - auto revenue = cudf::binary_operation( - extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, revenue_type); - return revenue; +std::unique_ptr calc_revenue(cudf::column_view extendedprice, + cudf::column_view discount) +{ + auto one = cudf::fixed_point_scalar(1, -2); + auto one_minus_discount = + cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); + auto revenue_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, extendedprice.type().scale(), one_minus_discount->type().scale()); + auto revenue_type = cudf::data_type{cudf::type_id::DECIMAL64, revenue_scale}; + auto revenue = cudf::binary_operation( + extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, revenue_type); + return revenue; } -int main(int argc, char const** argv) { - auto args = parse_args(argc, argv); - - // Use a memory pool - auto resource = create_memory_resource(args.memory_resource_type); - rmm::mr::set_current_device_resource(resource.get()); - - Timer timer; - - // Define the column projection and filter predicate for the `orders` table - std::vector orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; - auto o_orderdate_ref = cudf::ast::column_reference( - std::distance(orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate")) - ); - auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); - auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); - auto o_orderdate_pred_lower = cudf::ast::operation( - cudf::ast::ast_operator::GREATER_EQUAL, - o_orderdate_ref, - o_orderdate_lower_limit - ); - auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); - auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); - auto o_orderdate_pred_upper = cudf::ast::operation( - cudf::ast::ast_operator::LESS, - o_orderdate_ref, - o_orderdate_upper_limit - ); - auto orders_pred = std::make_unique( - cudf::ast::ast_operator::LOGICAL_AND, - o_orderdate_pred_lower, - o_orderdate_pred_upper - ); - - // Define the column projection and filter predicate for the `region` table - std::vector region_cols = {"r_regionkey", "r_name"}; - auto r_name_ref = cudf::ast::column_reference( - std::distance(region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name"))); - auto r_name_value = cudf::string_scalar("ASIA"); - auto r_name_literal = cudf::ast::literal(r_name_value); - auto region_pred = std::make_unique( - cudf::ast::ast_operator::EQUAL, - r_name_ref, - r_name_literal - ); - - // Read out the tables from parquet files - // while pushing down the column projections and filter predicates - auto customer = read_parquet( - args.dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); - auto orders = read_parquet( - args.dataset_dir + "orders/part-0.parquet", - orders_cols, - std::move(orders_pred) - ); - auto lineitem = read_parquet( - args.dataset_dir + "lineitem/part-0.parquet", - {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"} - ); - auto supplier = read_parquet( - args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"} - ); - auto nation = read_parquet( - args.dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_regionkey", "n_name"} - ); - auto region = read_parquet( - args.dataset_dir + "region/part-0.parquet", - region_cols, - std::move(region_pred) - ); - - // Perform the joins - auto join_a = apply_inner_join( - region, - nation, - {"r_regionkey"}, - {"n_regionkey"} - ); - auto join_b = apply_inner_join( - join_a, - customer, - {"n_nationkey"}, - {"c_nationkey"} - ); - auto join_c = apply_inner_join( - join_b, - orders, - {"c_custkey"}, - {"o_custkey"} - ); - auto join_d = apply_inner_join( - join_c, - lineitem, - {"o_orderkey"}, - {"l_orderkey"} - ); - auto joined_table = apply_inner_join( - supplier, - join_d, - {"s_suppkey", "s_nationkey"}, - {"l_suppkey", "n_nationkey"} - ); - - // Calculate and append the `revenue` column - auto revenue = calc_revenue( - joined_table->column("l_extendedprice"), joined_table->column("l_discount")); - auto appended_table = joined_table->append(revenue, "revenue"); - - // Perform the groupby operation - auto groupedby_table = apply_groupby( - appended_table, - groupby_context_t { - {"n_name"}, - { - {"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}}, - } - }); - - // Perform the order by operation - auto orderedby_table = apply_orderby( - groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); - - timer.print_elapsed_millis(); - - // Write query result to a parquet file - orderedby_table->to_parquet("q5.parquet"); - return 0; +int main(int argc, char const** argv) +{ + auto args = parse_args(argc, argv); + + // Use a memory pool + auto resource = create_memory_resource(args.memory_resource_type); + rmm::mr::set_current_device_resource(resource.get()); + + Timer timer; + + // Define the column projection and filter predicate for the `orders` table + std::vector orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; + auto o_orderdate_ref = cudf::ast::column_reference(std::distance( + orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate"))); + auto o_orderdate_lower = + cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); + auto o_orderdate_pred_lower = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate_ref, o_orderdate_lower_limit); + auto o_orderdate_upper = + cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); + auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); + auto o_orderdate_pred_upper = + cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit); + auto orders_pred = std::make_unique( + cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper); + + // Define the column projection and filter predicate for the `region` table + std::vector region_cols = {"r_regionkey", "r_name"}; + auto r_name_ref = cudf::ast::column_reference(std::distance( + region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name"))); + auto r_name_value = cudf::string_scalar("ASIA"); + auto r_name_literal = cudf::ast::literal(r_name_value); + auto region_pred = std::make_unique( + cudf::ast::ast_operator::EQUAL, r_name_ref, r_name_literal); + + // Read out the tables from parquet files + // while pushing down the column projections and filter predicates + auto customer = + read_parquet(args.dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); + auto orders = + read_parquet(args.dataset_dir + "orders/part-0.parquet", orders_cols, std::move(orders_pred)); + auto lineitem = read_parquet(args.dataset_dir + "lineitem/part-0.parquet", + {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); + auto supplier = + read_parquet(args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); + auto nation = read_parquet(args.dataset_dir + "nation/part-0.parquet", + {"n_nationkey", "n_regionkey", "n_name"}); + auto region = + read_parquet(args.dataset_dir + "region/part-0.parquet", region_cols, std::move(region_pred)); + + // Perform the joins + auto join_a = apply_inner_join(region, nation, {"r_regionkey"}, {"n_regionkey"}); + auto join_b = apply_inner_join(join_a, customer, {"n_nationkey"}, {"c_nationkey"}); + auto join_c = apply_inner_join(join_b, orders, {"c_custkey"}, {"o_custkey"}); + auto join_d = apply_inner_join(join_c, lineitem, {"o_orderkey"}, {"l_orderkey"}); + auto joined_table = + apply_inner_join(supplier, join_d, {"s_suppkey", "s_nationkey"}, {"l_suppkey", "n_nationkey"}); + + // Calculate and append the `revenue` column + auto revenue = + calc_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount")); + auto appended_table = joined_table->append(revenue, "revenue"); + + // Perform the groupby operation + auto groupedby_table = + apply_groupby(appended_table, + groupby_context_t{{"n_name"}, + { + {"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}}, + }}); + + // Perform the order by operation + auto orderedby_table = apply_orderby(groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); + + timer.print_elapsed_millis(); + + // Write query result to a parquet file + orderedby_table->to_parquet("q5.parquet"); + return 0; } diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 43fdf2aeb6f..d44bcf1b8d7 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "utils.hpp" + #include #include #include -#include "utils.hpp" - /* create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; @@ -30,135 +30,95 @@ from where l_shipdate >= date '1994-01-01' and l_shipdate < date '1995-01-01' - and l_discount >= 0.05 + and l_discount >= 0.05 and l_discount <= 0.07 and l_quantity < 24; */ -std::unique_ptr calc_revenue( - cudf::column_view extendedprice, cudf::column_view discount) { - auto revenue_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, - extendedprice.type().scale(), - discount.type().scale() - ); - auto revenue_type = cudf::data_type{ - cudf::type_id::DECIMAL64, revenue_scale}; - auto revenue = cudf::binary_operation( - extendedprice, - discount, - cudf::binary_operator::MUL, - revenue_type - ); - return revenue; +std::unique_ptr calc_revenue(cudf::column_view extendedprice, + cudf::column_view discount) +{ + auto revenue_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, extendedprice.type().scale(), discount.type().scale()); + auto revenue_type = cudf::data_type{cudf::type_id::DECIMAL64, revenue_scale}; + auto revenue = + cudf::binary_operation(extendedprice, discount, cudf::binary_operator::MUL, revenue_type); + return revenue; } -int main(int argc, char const** argv) { - auto args = parse_args(argc, argv); - - // Use a memory pool - auto resource = create_memory_resource(args.memory_resource_type); - rmm::mr::set_current_device_resource(resource.get()); - - Timer timer; - - // Read out the `lineitem` table from parquet file - std::vector lineitem_cols = {"l_extendedprice", "l_discount", "l_shipdate", "l_quantity"}; - auto shipdate_ref = cudf::ast::column_reference( - std::distance(lineitem_cols.begin(), std::find(lineitem_cols.begin(), lineitem_cols.end(), "l_shipdate")) - ); - auto shipdate_lower = cudf::timestamp_scalar( - days_since_epoch(1994, 1, 1), true); - auto shipdate_lower_literal = cudf::ast::literal(shipdate_lower); - auto shipdate_upper = cudf::timestamp_scalar( - days_since_epoch(1995, 1, 1), true); - auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - auto shipdate_pred_a = cudf::ast::operation( - cudf::ast::ast_operator::GREATER_EQUAL, - shipdate_ref, - shipdate_lower_literal - ); - auto shipdate_pred_b = cudf::ast::operation( - cudf::ast::ast_operator::LESS, - shipdate_ref, - shipdate_upper_literal - ); - auto lineitem_pred = std::make_unique( - cudf::ast::ast_operator::LOGICAL_AND, - shipdate_pred_a, - shipdate_pred_b - ); - auto lineitem = read_parquet( - args.dataset_dir + "lineitem/part-0.parquet", - lineitem_cols, - std::move(lineitem_pred) - ); - - // Cast the discount and quantity columns to float32 and append to lineitem table - auto discout_float = cudf::cast( - lineitem->column("l_discount"), cudf::data_type{cudf::type_id::FLOAT32}); - auto quantity_float = cudf::cast( - lineitem->column("l_quantity"), cudf::data_type{cudf::type_id::FLOAT32}); - auto appended_table = lineitem - ->append(discout_float, "l_discount_float") - ->append(quantity_float, "l_quantity_float"); - - // Apply the filters - auto discount_ref = cudf::ast::column_reference( - appended_table->col_id("l_discount_float")); - auto quantity_ref = cudf::ast::column_reference( - appended_table->col_id("l_quantity_float") - ); - - auto discount_lower = cudf::numeric_scalar(0.05); - auto discount_lower_literal = cudf::ast::literal(discount_lower); - auto discount_upper = cudf::numeric_scalar(0.07); - auto discount_upper_literal = cudf::ast::literal(discount_upper); - auto quantity_upper = cudf::numeric_scalar(24); - auto quantity_upper_literal = cudf::ast::literal(quantity_upper); - - auto discount_pred_a = cudf::ast::operation( - cudf::ast::ast_operator::GREATER_EQUAL, - discount_ref, - discount_lower_literal - ); - - auto discount_pred_b = cudf::ast::operation( - cudf::ast::ast_operator::LESS_EQUAL, - discount_ref, - discount_upper_literal - ); - auto discount_pred = cudf::ast::operation( - cudf::ast::ast_operator::LOGICAL_AND, discount_pred_a, discount_pred_b - ); - auto quantity_pred = cudf::ast::operation( - cudf::ast::ast_operator::LESS, - quantity_ref, - quantity_upper_literal - ); - auto discount_quantity_pred = cudf::ast::operation( - cudf::ast::ast_operator::LOGICAL_AND, - discount_pred, - quantity_pred - ); - auto filtered_table = apply_filter(appended_table, discount_quantity_pred); - - // Calculate the `revenue` column - auto revenue = calc_revenue( - filtered_table->column("l_extendedprice"), - filtered_table->column("l_discount")); - - // Sum the `revenue` column - auto revenue_view = revenue->view(); - auto result_table = apply_reduction( - revenue_view, - cudf::aggregation::Kind::SUM, - "revenue" - ); - - timer.print_elapsed_millis(); - - // Write query result to a parquet file - result_table->to_parquet("q6.parquet"); - return 0; +int main(int argc, char const** argv) +{ + auto args = parse_args(argc, argv); + + // Use a memory pool + auto resource = create_memory_resource(args.memory_resource_type); + rmm::mr::set_current_device_resource(resource.get()); + + Timer timer; + + // Read out the `lineitem` table from parquet file + std::vector lineitem_cols = { + "l_extendedprice", "l_discount", "l_shipdate", "l_quantity"}; + auto shipdate_ref = cudf::ast::column_reference(std::distance( + lineitem_cols.begin(), std::find(lineitem_cols.begin(), lineitem_cols.end(), "l_shipdate"))); + auto shipdate_lower = + cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + auto shipdate_lower_literal = cudf::ast::literal(shipdate_lower); + auto shipdate_upper = + cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); + auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); + auto shipdate_pred_a = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, shipdate_ref, shipdate_lower_literal); + auto shipdate_pred_b = + cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal); + auto lineitem_pred = std::make_unique( + cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b); + auto lineitem = read_parquet( + args.dataset_dir + "lineitem/part-0.parquet", lineitem_cols, std::move(lineitem_pred)); + + // Cast the discount and quantity columns to float32 and append to lineitem table + auto discout_float = + cudf::cast(lineitem->column("l_discount"), cudf::data_type{cudf::type_id::FLOAT32}); + auto quantity_float = + cudf::cast(lineitem->column("l_quantity"), cudf::data_type{cudf::type_id::FLOAT32}); + auto appended_table = + lineitem->append(discout_float, "l_discount_float")->append(quantity_float, "l_quantity_float"); + + // Apply the filters + auto discount_ref = cudf::ast::column_reference(appended_table->col_id("l_discount_float")); + auto quantity_ref = cudf::ast::column_reference(appended_table->col_id("l_quantity_float")); + + auto discount_lower = cudf::numeric_scalar(0.05); + auto discount_lower_literal = cudf::ast::literal(discount_lower); + auto discount_upper = cudf::numeric_scalar(0.07); + auto discount_upper_literal = cudf::ast::literal(discount_upper); + auto quantity_upper = cudf::numeric_scalar(24); + auto quantity_upper_literal = cudf::ast::literal(quantity_upper); + + auto discount_pred_a = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, discount_ref, discount_lower_literal); + + auto discount_pred_b = + cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, discount_ref, discount_upper_literal); + auto discount_pred = + cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, discount_pred_a, discount_pred_b); + auto quantity_pred = + cudf::ast::operation(cudf::ast::ast_operator::LESS, quantity_ref, quantity_upper_literal); + auto discount_quantity_pred = + cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, discount_pred, quantity_pred); + auto filtered_table = apply_filter(appended_table, discount_quantity_pred); + + // Calculate the `revenue` column + auto revenue = + calc_revenue(filtered_table->column("l_extendedprice"), filtered_table->column("l_discount")); + + // Sum the `revenue` column + auto revenue_view = revenue->view(); + auto result_table = apply_reduction(revenue_view, cudf::aggregation::Kind::SUM, "revenue"); + + timer.print_elapsed_millis(); + + // Write query result to a parquet file + result_table->to_parquet("q6.parquet"); + return 0; } diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 1351e5e808b..a103eed52e3 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -14,13 +14,13 @@ * limitations under the License. */ +#include "utils.hpp" + #include -#include #include -#include +#include #include - -#include "utils.hpp" +#include /* create view part as select * from '~/tpch_sf1/part/part-0.parquet'; @@ -64,135 +64,103 @@ order by o_year desc; */ -std::unique_ptr calc_amount( - cudf::column_view discount, cudf::column_view extendedprice, - cudf::column_view supplycost, cudf::column_view quantity) { - auto one = cudf::fixed_point_scalar(1); - auto one_minus_discount = cudf::binary_operation( - one, discount, cudf::binary_operator::SUB, discount.type()); - auto extendedprice_discounted_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, - extendedprice.type().scale(), - one_minus_discount->type().scale() - ); - auto extendedprice_discounted_type = cudf::data_type{ - cudf::type_id::DECIMAL64, extendedprice_discounted_scale}; - auto extendedprice_discounted = cudf::binary_operation( - extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, extendedprice_discounted_type); - auto supplycost_quantity_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, - supplycost.type().scale(), - quantity.type().scale() - ); - auto supplycost_quantity_type = cudf::data_type{cudf::type_id::DECIMAL64, supplycost_quantity_scale}; - auto supplycost_quantity = cudf::binary_operation( - supplycost, quantity, cudf::binary_operator::MUL, supplycost_quantity_type); - auto amount = cudf::binary_operation( - extendedprice_discounted->view(), supplycost_quantity->view(), cudf::binary_operator::SUB, extendedprice_discounted->type()); - return amount; +std::unique_ptr calc_amount(cudf::column_view discount, + cudf::column_view extendedprice, + cudf::column_view supplycost, + cudf::column_view quantity) +{ + auto one = cudf::fixed_point_scalar(1); + auto one_minus_discount = + cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); + auto extendedprice_discounted_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, extendedprice.type().scale(), one_minus_discount->type().scale()); + auto extendedprice_discounted_type = + cudf::data_type{cudf::type_id::DECIMAL64, extendedprice_discounted_scale}; + auto extendedprice_discounted = cudf::binary_operation(extendedprice, + one_minus_discount->view(), + cudf::binary_operator::MUL, + extendedprice_discounted_type); + auto supplycost_quantity_scale = cudf::binary_operation_fixed_point_scale( + cudf::binary_operator::MUL, supplycost.type().scale(), quantity.type().scale()); + auto supplycost_quantity_type = + cudf::data_type{cudf::type_id::DECIMAL64, supplycost_quantity_scale}; + auto supplycost_quantity = cudf::binary_operation( + supplycost, quantity, cudf::binary_operator::MUL, supplycost_quantity_type); + auto amount = cudf::binary_operation(extendedprice_discounted->view(), + supplycost_quantity->view(), + cudf::binary_operator::SUB, + extendedprice_discounted->type()); + return amount; } -int main(int argc, char const** argv) { - auto args = parse_args(argc, argv); - - // Use a memory pool - auto resource = create_memory_resource(args.memory_resource_type); - rmm::mr::set_current_device_resource(resource.get()); - - Timer timer; - - // Read out the table from parquet files - auto lineitem = read_parquet( - args.dataset_dir + "lineitem/part-0.parquet", - {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); - auto nation = read_parquet(args.dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_name"}); - auto orders = read_parquet(args.dataset_dir + "orders/part-0.parquet", {"o_orderkey", "o_orderdate"}); - auto part = read_parquet(args.dataset_dir + "part/part-0.parquet", {"p_partkey", "p_name"}); - auto partsupp = read_parquet(args.dataset_dir + "partsupp/part-0.parquet", - {"ps_suppkey", "ps_partkey", "ps_supplycost"}); - auto supplier = read_parquet(args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); - - // Generating the `profit` table - // Filter the part table using `p_name like '%green%'` - auto p_name = part->table().column(1); - auto mask = cudf::strings::like( - cudf::strings_column_view(p_name), cudf::string_scalar("%green%")); - auto part_filtered = apply_mask(part, mask); - - // Perform the joins - auto join_a = apply_inner_join( - lineitem, - supplier, - {"l_suppkey"}, - {"s_suppkey"} - ); - auto join_b = apply_inner_join( - join_a, - partsupp, - {"l_suppkey", "l_partkey"}, - {"ps_suppkey", "ps_partkey"} - ); - auto join_c = apply_inner_join( - join_b, - part_filtered, - {"l_partkey"}, - {"p_partkey"} - ); - auto join_d = apply_inner_join( - join_c, - orders, - {"l_orderkey"}, - {"o_orderkey"} - ); - auto joined_table = apply_inner_join( - join_d, - nation, - {"s_nationkey"}, - {"n_nationkey"} - ); - - // Calculate the `nation`, `o_year`, and `amount` columns - auto n_name = std::make_unique(joined_table->column("n_name")); - auto o_year = cudf::datetime::extract_year(joined_table->column("o_orderdate")); - auto amount = calc_amount( - joined_table->column("l_discount"), - joined_table->column("l_extendedprice"), - joined_table->column("ps_supplycost"), - joined_table->column("l_quantity") - ); - - // Put together the `profit` table - std::vector> profit_columns; - profit_columns.push_back(std::move(n_name)); - profit_columns.push_back(std::move(o_year)); - profit_columns.push_back(std::move(amount)); - - auto profit_table = std::make_unique(std::move(profit_columns)); - auto profit = std::make_unique( - std::move(profit_table), - std::vector{"nation", "o_year", "amount"} - ); - - // Perform the groupby operation - auto groupedby_table = apply_groupby( - profit, - groupby_context_t { - {"nation", "o_year"}, - { - {"amount", {{cudf::groupby_aggregation::SUM, "sum_profit"}}} - } - } - ); - - // Perform the orderby operation - auto orderedby_table = apply_orderby( - groupedby_table, - {"nation", "o_year"}, - {cudf::order::ASCENDING, cudf::order::DESCENDING} - ); - - timer.print_elapsed_millis(); - - // Write query result to a parquet file - orderedby_table->to_parquet("q9.parquet"); +int main(int argc, char const** argv) +{ + auto args = parse_args(argc, argv); + + // Use a memory pool + auto resource = create_memory_resource(args.memory_resource_type); + rmm::mr::set_current_device_resource(resource.get()); + + Timer timer; + + // Read out the table from parquet files + auto lineitem = read_parquet( + args.dataset_dir + "lineitem/part-0.parquet", + {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); + auto nation = read_parquet(args.dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_name"}); + auto orders = + read_parquet(args.dataset_dir + "orders/part-0.parquet", {"o_orderkey", "o_orderdate"}); + auto part = read_parquet(args.dataset_dir + "part/part-0.parquet", {"p_partkey", "p_name"}); + auto partsupp = read_parquet(args.dataset_dir + "partsupp/part-0.parquet", + {"ps_suppkey", "ps_partkey", "ps_supplycost"}); + auto supplier = + read_parquet(args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); + + // Generating the `profit` table + // Filter the part table using `p_name like '%green%'` + auto p_name = part->table().column(1); + auto mask = + cudf::strings::like(cudf::strings_column_view(p_name), cudf::string_scalar("%green%")); + auto part_filtered = apply_mask(part, mask); + + // Perform the joins + auto join_a = apply_inner_join(lineitem, supplier, {"l_suppkey"}, {"s_suppkey"}); + auto join_b = + apply_inner_join(join_a, partsupp, {"l_suppkey", "l_partkey"}, {"ps_suppkey", "ps_partkey"}); + auto join_c = apply_inner_join(join_b, part_filtered, {"l_partkey"}, {"p_partkey"}); + auto join_d = apply_inner_join(join_c, orders, {"l_orderkey"}, {"o_orderkey"}); + auto joined_table = apply_inner_join(join_d, nation, {"s_nationkey"}, {"n_nationkey"}); + + // Calculate the `nation`, `o_year`, and `amount` columns + auto n_name = std::make_unique(joined_table->column("n_name")); + auto o_year = cudf::datetime::extract_year(joined_table->column("o_orderdate")); + auto amount = calc_amount(joined_table->column("l_discount"), + joined_table->column("l_extendedprice"), + joined_table->column("ps_supplycost"), + joined_table->column("l_quantity")); + + // Put together the `profit` table + std::vector> profit_columns; + profit_columns.push_back(std::move(n_name)); + profit_columns.push_back(std::move(o_year)); + profit_columns.push_back(std::move(amount)); + + auto profit_table = std::make_unique(std::move(profit_columns)); + auto profit = std::make_unique( + std::move(profit_table), std::vector{"nation", "o_year", "amount"}); + + // Perform the groupby operation + auto groupedby_table = apply_groupby( + profit, + groupby_context_t{{"nation", "o_year"}, + {{"amount", {{cudf::groupby_aggregation::SUM, "sum_profit"}}}}}); + + // Perform the orderby operation + auto orderedby_table = apply_orderby( + groupedby_table, {"nation", "o_year"}, {cudf::order::ASCENDING, cudf::order::DESCENDING}); + + timer.print_elapsed_millis(); + + // Write query result to a parquet file + orderedby_table->to_parquet("q9.parquet"); } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index a0b538c3358..54b9a1cd7b6 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -14,186 +14,186 @@ * limitations under the License. */ -#include -#include -#include - -#include -#include -#include +#include +#include #include +#include #include -#include +#include +#include +#include #include +#include +#include #include -#include #include -#include -#include -#include -#include #include #include -#include #include -#include +#include #include +#include +#include +#include +#include // RMM memory resource creation utilities inline auto make_cuda() { return std::make_shared(); } -inline auto make_pool() { - return rmm::mr::make_owning_wrapper( - make_cuda(), rmm::percent_of_free_device_memory(50)); +inline auto make_pool() +{ + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_managed() { return std::make_shared(); } -inline auto make_managed_pool() { - return rmm::mr::make_owning_wrapper( - make_managed(), rmm::percent_of_free_device_memory(50)); +inline auto make_managed_pool() +{ + return rmm::mr::make_owning_wrapper( + make_managed(), rmm::percent_of_free_device_memory(50)); } inline std::shared_ptr create_memory_resource( - std::string const& mode) { - if (mode == "cuda") return make_cuda(); - if (mode == "pool") return make_pool(); - if (mode == "managed") return make_managed(); - if (mode == "managed_pool") return make_managed_pool(); - CUDF_FAIL("Unknown rmm_mode parameter: " + mode + - "\nExpecting: cuda, pool, managed, or managed_pool"); + std::string const& mode) +{ + if (mode == "cuda") return make_cuda(); + if (mode == "pool") return make_pool(); + if (mode == "managed") return make_managed(); + if (mode == "managed_pool") return make_managed_pool(); + CUDF_FAIL("Unknown rmm_mode parameter: " + mode + + "\nExpecting: cuda, pool, managed, or managed_pool"); } /** * @brief A class to represent a table with column names attached */ class table_with_cols { - public: - table_with_cols( - std::unique_ptr tbl, std::vector col_names) - : tbl(std::move(tbl)), col_names(col_names) {} - /** - * @brief Return the table view - */ - cudf::table_view table() { - return tbl->view(); - } - /** - * @brief Return the column view for a given column name - * - * @param col_name The name of the column - */ - cudf::column_view column(std::string col_name) { - return tbl->view().column(col_id(col_name)); - } - /** - * @param Return the column names of the table - */ - std::vector columns() { - return col_names; - } - /** - * @brief Translate a column name to a column index - * - * @param col_name The name of the column - */ - cudf::size_type col_id(std::string col_name) { - CUDF_FUNC_RANGE(); - auto it = std::find(col_names.begin(), col_names.end(), col_name); - if (it == col_names.end()) { - throw std::runtime_error("Column not found"); - } - return std::distance(col_names.begin(), it); - } - /** - * @brief Append a column to the table - * - * @param col The column to append - * @param col_name The name of the appended column - */ - std::unique_ptr append( - std::unique_ptr& col, std::string col_name) { - CUDF_FUNC_RANGE(); - auto cols = tbl->release(); - cols.push_back(std::move(col)); - col_names.push_back(col_name); - auto appended_table = std::make_unique(std::move(cols)); - return std::make_unique(std::move(appended_table), col_names); - } - /** - * @brief Select a subset of columns from the table - * - * @param col_names The names of the columns to select - */ - cudf::table_view select(std::vector col_names) { - CUDF_FUNC_RANGE(); - std::vector col_indices; - for (auto &col_name : col_names) { - col_indices.push_back(col_id(col_name)); - } - return tbl->select(col_indices); - } - /** - * @brief Write the table to a parquet file - * - * @param filepath The path to the parquet file - */ - void to_parquet(std::string filepath) { - CUDF_FUNC_RANGE(); - auto sink_info = cudf::io::sink_info(filepath); - cudf::io::table_metadata metadata; - std::vector col_name_infos; - for (auto &col_name : col_names) { - col_name_infos.push_back(cudf::io::column_name_info(col_name)); - } - metadata.schema_info = col_name_infos; - auto table_input_metadata = cudf::io::table_input_metadata{metadata}; - auto builder = cudf::io::parquet_writer_options::builder(sink_info, tbl->view()); - builder.metadata(table_input_metadata); - auto options = builder.build(); - cudf::io::write_parquet(options); - } - private: - std::unique_ptr tbl; - std::vector col_names; + public: + table_with_cols(std::unique_ptr tbl, std::vector col_names) + : tbl(std::move(tbl)), col_names(col_names) + { + } + /** + * @brief Return the table view + */ + cudf::table_view table() { return tbl->view(); } + /** + * @brief Return the column view for a given column name + * + * @param col_name The name of the column + */ + cudf::column_view column(std::string col_name) { return tbl->view().column(col_id(col_name)); } + /** + * @param Return the column names of the table + */ + std::vector columns() { return col_names; } + /** + * @brief Translate a column name to a column index + * + * @param col_name The name of the column + */ + cudf::size_type col_id(std::string col_name) + { + CUDF_FUNC_RANGE(); + auto it = std::find(col_names.begin(), col_names.end(), col_name); + if (it == col_names.end()) { throw std::runtime_error("Column not found"); } + return std::distance(col_names.begin(), it); + } + /** + * @brief Append a column to the table + * + * @param col The column to append + * @param col_name The name of the appended column + */ + std::unique_ptr append(std::unique_ptr& col, std::string col_name) + { + CUDF_FUNC_RANGE(); + auto cols = tbl->release(); + cols.push_back(std::move(col)); + col_names.push_back(col_name); + auto appended_table = std::make_unique(std::move(cols)); + return std::make_unique(std::move(appended_table), col_names); + } + /** + * @brief Select a subset of columns from the table + * + * @param col_names The names of the columns to select + */ + cudf::table_view select(std::vector col_names) + { + CUDF_FUNC_RANGE(); + std::vector col_indices; + for (auto& col_name : col_names) { + col_indices.push_back(col_id(col_name)); + } + return tbl->select(col_indices); + } + /** + * @brief Write the table to a parquet file + * + * @param filepath The path to the parquet file + */ + void to_parquet(std::string filepath) + { + CUDF_FUNC_RANGE(); + auto sink_info = cudf::io::sink_info(filepath); + cudf::io::table_metadata metadata; + std::vector col_name_infos; + for (auto& col_name : col_names) { + col_name_infos.push_back(cudf::io::column_name_info(col_name)); + } + metadata.schema_info = col_name_infos; + auto table_input_metadata = cudf::io::table_input_metadata{metadata}; + auto builder = cudf::io::parquet_writer_options::builder(sink_info, tbl->view()); + builder.metadata(table_input_metadata); + auto options = builder.build(); + cudf::io::write_parquet(options); + } + + private: + std::unique_ptr tbl; + std::vector col_names; }; template -std::vector concat(std::vector const& lhs, std::vector const& rhs) { - std::vector result; - result.reserve(lhs.size() + rhs.size()); - std::copy(lhs.begin(), lhs.end(), std::back_inserter(result)); - std::copy(rhs.begin(), rhs.end(), std::back_inserter(result)); - return result; +std::vector concat(std::vector const& lhs, std::vector const& rhs) +{ + std::vector result; + result.reserve(lhs.size() + rhs.size()); + std::copy(lhs.begin(), lhs.end(), std::back_inserter(result)); + std::copy(rhs.begin(), rhs.end(), std::back_inserter(result)); + return result; } std::unique_ptr join_and_gather( - cudf::table_view left_input, - cudf::table_view right_input, - std::vector left_on, - std::vector right_on, - cudf::null_equality compare_nulls, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - CUDF_FUNC_RANGE(); - auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; - auto left_selected = left_input.select(left_on); - auto right_selected = right_input.select(right_on); - auto const [left_join_indices, right_join_indices] = - cudf::inner_join(left_selected, right_selected, compare_nulls, mr); + cudf::table_view left_input, + cudf::table_view right_input, + std::vector left_on, + std::vector right_on, + cudf::null_equality compare_nulls, + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) +{ + CUDF_FUNC_RANGE(); + auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; + auto left_selected = left_input.select(left_on); + auto right_selected = right_input.select(right_on); + auto const [left_join_indices, right_join_indices] = + cudf::inner_join(left_selected, right_selected, compare_nulls, mr); - auto left_indices_span = cudf::device_span{*left_join_indices}; - auto right_indices_span = cudf::device_span{*right_join_indices}; + auto left_indices_span = cudf::device_span{*left_join_indices}; + auto right_indices_span = cudf::device_span{*right_join_indices}; - auto left_indices_col = cudf::column_view{left_indices_span}; - auto right_indices_col = cudf::column_view{right_indices_span}; + auto left_indices_col = cudf::column_view{left_indices_span}; + auto right_indices_col = cudf::column_view{right_indices_span}; - auto left_result = cudf::gather(left_input, left_indices_col, oob_policy); - auto right_result = cudf::gather(right_input, right_indices_col, oob_policy); + auto left_result = cudf::gather(left_input, left_indices_col, oob_policy); + auto right_result = cudf::gather(right_input, right_indices_col, oob_policy); - auto joined_cols = left_result->release(); - auto right_cols = right_result->release(); - joined_cols.insert(joined_cols.end(), - std::make_move_iterator(right_cols.begin()), - std::make_move_iterator(right_cols.end())); - return std::make_unique(std::move(joined_cols)); + auto joined_cols = left_result->release(); + auto right_cols = right_result->release(); + joined_cols.insert(joined_cols.end(), + std::make_move_iterator(right_cols.begin()), + std::make_move_iterator(right_cols.end())); + return std::make_unique(std::move(joined_cols)); } /** @@ -210,22 +210,21 @@ std::unique_ptr apply_inner_join( std::unique_ptr& right_input, std::vector left_on, std::vector right_on, - cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) { - CUDF_FUNC_RANGE(); - std::vector left_on_indices; - std::vector right_on_indices; - for (auto &col_name : left_on) { - left_on_indices.push_back(left_input->col_id(col_name)); - } - for (auto &col_name : right_on) { - right_on_indices.push_back(right_input->col_id(col_name)); - } - auto table = join_and_gather( - left_input->table(), right_input->table(), - left_on_indices, right_on_indices, compare_nulls - ); - return std::make_unique(std::move(table), - concat(left_input->columns(), right_input->columns())); + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) +{ + CUDF_FUNC_RANGE(); + std::vector left_on_indices; + std::vector right_on_indices; + for (auto& col_name : left_on) { + left_on_indices.push_back(left_input->col_id(col_name)); + } + for (auto& col_name : right_on) { + right_on_indices.push_back(right_input->col_id(col_name)); + } + auto table = join_and_gather( + left_input->table(), right_input->table(), left_on_indices, right_on_indices, compare_nulls); + return std::make_unique(std::move(table), + concat(left_input->columns(), right_input->columns())); } /** @@ -234,12 +233,13 @@ std::unique_ptr apply_inner_join( * @param table The input table * @param predicate The filter predicate */ -std::unique_ptr apply_filter( - std::unique_ptr& table, cudf::ast::operation& predicate) { - CUDF_FUNC_RANGE(); - auto boolean_mask = cudf::compute_column(table->table(), predicate); - auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); - return std::make_unique(std::move(result_table), table->columns()); +std::unique_ptr apply_filter(std::unique_ptr& table, + cudf::ast::operation& predicate) +{ + CUDF_FUNC_RANGE(); + auto boolean_mask = cudf::compute_column(table->table(), predicate); + auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); + return std::make_unique(std::move(result_table), table->columns()); } /** @@ -248,16 +248,18 @@ std::unique_ptr apply_filter( * @param table The input table * @param mask The boolean mask */ -std::unique_ptr apply_mask( - std::unique_ptr& table, std::unique_ptr& mask) { - CUDF_FUNC_RANGE(); - auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); - return std::make_unique(std::move(result_table), table->columns()); +std::unique_ptr apply_mask(std::unique_ptr& table, + std::unique_ptr& mask) +{ + CUDF_FUNC_RANGE(); + auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); + return std::make_unique(std::move(result_table), table->columns()); } struct groupby_context_t { - std::vector keys; - std::unordered_map>> values; + std::vector keys; + std::unordered_map>> + values; }; /** @@ -266,45 +268,47 @@ struct groupby_context_t { * @param table The input table * @param ctx The groupby context */ -std::unique_ptr apply_groupby( - std::unique_ptr& table, groupby_context_t ctx) { - CUDF_FUNC_RANGE(); - auto keys = table->select(ctx.keys); - cudf::groupby::groupby groupby_obj(keys); - std::vector result_column_names; - result_column_names.insert( - result_column_names.end(), ctx.keys.begin(), ctx.keys.end()); - std::vector requests; - for (auto& [value_col, aggregations] : ctx.values) { - requests.emplace_back(cudf::groupby::aggregation_request()); - for (auto& agg : aggregations) { - if (agg.first == cudf::aggregation::Kind::SUM) { - requests.back().aggregations.push_back(cudf::make_sum_aggregation()); - } else if (agg.first == cudf::aggregation::Kind::MEAN) { - requests.back().aggregations.push_back(cudf::make_mean_aggregation()); - } else if (agg.first == cudf::aggregation::Kind::COUNT_ALL) { - requests.back().aggregations.push_back(cudf::make_count_aggregation()); - } else { - throw std::runtime_error("Unsupported aggregation"); - } - result_column_names.push_back(agg.second); - } - requests.back().values = table->column(value_col); - } - auto agg_results = groupby_obj.aggregate(requests); - std::vector> result_columns; - for (size_t i = 0; i < agg_results.first->num_columns(); i++) { - auto col = std::make_unique(agg_results.first->get_column(i)); - result_columns.push_back(std::move(col)); +std::unique_ptr apply_groupby(std::unique_ptr& table, + groupby_context_t ctx) +{ + CUDF_FUNC_RANGE(); + auto keys = table->select(ctx.keys); + cudf::groupby::groupby groupby_obj(keys); + std::vector result_column_names; + result_column_names.insert(result_column_names.end(), ctx.keys.begin(), ctx.keys.end()); + std::vector requests; + for (auto& [value_col, aggregations] : ctx.values) { + requests.emplace_back(cudf::groupby::aggregation_request()); + for (auto& agg : aggregations) { + if (agg.first == cudf::aggregation::Kind::SUM) { + requests.back().aggregations.push_back( + cudf::make_sum_aggregation()); + } else if (agg.first == cudf::aggregation::Kind::MEAN) { + requests.back().aggregations.push_back( + cudf::make_mean_aggregation()); + } else if (agg.first == cudf::aggregation::Kind::COUNT_ALL) { + requests.back().aggregations.push_back( + cudf::make_count_aggregation()); + } else { + throw std::runtime_error("Unsupported aggregation"); + } + result_column_names.push_back(agg.second); } - for (size_t i = 0; i < agg_results.second.size(); i++) { - for (size_t j = 0; j < agg_results.second[i].results.size(); j++) { - result_columns.push_back(std::move(agg_results.second[i].results[j])); - } + requests.back().values = table->column(value_col); + } + auto agg_results = groupby_obj.aggregate(requests); + std::vector> result_columns; + for (size_t i = 0; i < agg_results.first->num_columns(); i++) { + auto col = std::make_unique(agg_results.first->get_column(i)); + result_columns.push_back(std::move(col)); + } + for (size_t i = 0; i < agg_results.second.size(); i++) { + for (size_t j = 0; j < agg_results.second[i].results.size(); j++) { + result_columns.push_back(std::move(agg_results.second[i].results[j])); } - auto result_table = std::make_unique(std::move(result_columns)); - return std::make_unique( - std::move(result_table), result_column_names); + } + auto result_table = std::make_unique(std::move(result_columns)); + return std::make_unique(std::move(result_table), result_column_names); } /** @@ -314,22 +318,18 @@ std::unique_ptr apply_groupby( * @param sort_keys The sort keys * @param sort_key_orders The sort key orders */ -std::unique_ptr apply_orderby( - std::unique_ptr& table, - std::vector sort_keys, - std::vector sort_key_orders) { - CUDF_FUNC_RANGE(); - std::vector column_views; - for (auto& key : sort_keys) { - column_views.push_back(table->column(key)); - } - auto result_table = cudf::sort_by_key( - table->table(), - cudf::table_view{column_views}, - sort_key_orders - ); - return std::make_unique( - std::move(result_table), table->columns()); +std::unique_ptr apply_orderby(std::unique_ptr& table, + std::vector sort_keys, + std::vector sort_key_orders) +{ + CUDF_FUNC_RANGE(); + std::vector column_views; + for (auto& key : sort_keys) { + column_views.push_back(table->column(key)); + } + auto result_table = + cudf::sort_by_key(table->table(), cudf::table_view{column_views}, sort_key_orders); + return std::make_unique(std::move(result_table), table->columns()); } /** @@ -339,20 +339,20 @@ std::unique_ptr apply_orderby( * @param agg_kind The aggregation kind * @param col_name The name of the output column */ -std::unique_ptr apply_reduction( - cudf::column_view& column, cudf::aggregation::Kind agg_kind, std::string col_name) { - CUDF_FUNC_RANGE(); - auto agg = cudf::make_sum_aggregation(); - auto result = cudf::reduce(column, *agg, column.type()); - cudf::size_type len = 1; - auto col = cudf::make_column_from_scalar(*result, len); - std::vector> columns; - columns.push_back(std::move(col)); - auto result_table = std::make_unique(std::move(columns)); - std::vector col_names = {col_name}; - return std::make_unique( - std::move(result_table), col_names - ); +std::unique_ptr apply_reduction(cudf::column_view& column, + cudf::aggregation::Kind agg_kind, + std::string col_name) +{ + CUDF_FUNC_RANGE(); + auto agg = cudf::make_sum_aggregation(); + auto result = cudf::reduce(column, *agg, column.type()); + cudf::size_type len = 1; + auto col = cudf::make_column_from_scalar(*result, len); + std::vector> columns; + columns.push_back(std::move(col)); + auto result_table = std::make_unique(std::move(columns)); + std::vector col_names = {col_name}; + return std::make_unique(std::move(result_table), col_names); } /** @@ -363,67 +363,68 @@ std::unique_ptr apply_reduction( * @param predicate The filter predicate to pushdown */ std::unique_ptr read_parquet( - std::string filename, std::vector columns = {}, std::unique_ptr predicate = nullptr) { - CUDF_FUNC_RANGE(); - auto source = cudf::io::source_info(filename); - auto builder = cudf::io::parquet_reader_options_builder(source); - if (columns.size()) { - builder.columns(columns); - } - if (predicate) { - builder.filter(*predicate); - } - auto options = builder.build(); - auto table_with_metadata = cudf::io::read_parquet(options); - auto schema_info = table_with_metadata.metadata.schema_info; - std::vector column_names; - for (auto &col_info : schema_info) { - column_names.push_back(col_info.name); - } - return std::make_unique( - std::move(table_with_metadata.tbl), column_names); + std::string filename, + std::vector columns = {}, + std::unique_ptr predicate = nullptr) +{ + CUDF_FUNC_RANGE(); + auto source = cudf::io::source_info(filename); + auto builder = cudf::io::parquet_reader_options_builder(source); + if (columns.size()) { builder.columns(columns); } + if (predicate) { builder.filter(*predicate); } + auto options = builder.build(); + auto table_with_metadata = cudf::io::read_parquet(options); + auto schema_info = table_with_metadata.metadata.schema_info; + std::vector column_names; + for (auto& col_info : schema_info) { + column_names.push_back(col_info.name); + } + return std::make_unique(std::move(table_with_metadata.tbl), column_names); } -std::tm make_tm(int year, int month, int day) { - std::tm tm = {0}; - tm.tm_year = year - 1900; - tm.tm_mon = month - 1; - tm.tm_mday = day; - return tm; +std::tm make_tm(int year, int month, int day) +{ + std::tm tm = {0}; + tm.tm_year = year - 1900; + tm.tm_mon = month - 1; + tm.tm_mday = day; + return tm; } -int32_t days_since_epoch(int year, int month, int day) { - std::tm tm = make_tm(year, month, day); - std::tm epoch = make_tm(1970, 1, 1); - std::time_t time = std::mktime(&tm); - std::time_t epoch_time = std::mktime(&epoch); - double diff = std::difftime(time, epoch_time) / (60*60*24); - return static_cast(diff); +int32_t days_since_epoch(int year, int month, int day) +{ + std::tm tm = make_tm(year, month, day); + std::tm epoch = make_tm(1970, 1, 1); + std::time_t time = std::mktime(&tm); + std::time_t epoch_time = std::mktime(&epoch); + double diff = std::difftime(time, epoch_time) / (60 * 60 * 24); + return static_cast(diff); } struct tpch_args_t { - std::string dataset_dir; - std::string memory_resource_type; + std::string dataset_dir; + std::string memory_resource_type; }; /** * @brief Parse command line arguments into a struct - * + * * @param argc The number of command line arguments * @param argv The command line arguments */ -tpch_args_t parse_args(int argc, char const **argv) { - if (argc < 3) { - std::cerr << "Usage: " << argv[0] << " " << std::endl; - std::cerr << std::endl; - std::cerr << "The query result will be saved to a parquet file named " << - "q{query_no}.parquet in the current working directory." << std::endl; - exit(1); - } - tpch_args_t args; - args.dataset_dir = argv[1]; - args.memory_resource_type = argv[2]; - return args; +tpch_args_t parse_args(int argc, char const** argv) +{ + if (argc < 3) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + std::cerr << std::endl; + std::cerr << "The query result will be saved to a parquet file named " + << "q{query_no}.parquet in the current working directory." << std::endl; + exit(1); + } + tpch_args_t args; + args.dataset_dir = argv[1]; + args.memory_resource_type = argv[2]; + return args; } /** From f0d6f6a9c55068320227f821eac75392eedff9cb Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 5 Jul 2024 14:48:36 -0700 Subject: [PATCH 086/124] Change q5 for benchmarks in dt04 --- cpp/examples/tpch/q5.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index a6ee2663ad3..43357138013 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -57,12 +57,10 @@ order by std::unique_ptr calc_revenue(cudf::column_view extendedprice, cudf::column_view discount) { - auto one = cudf::fixed_point_scalar(1, -2); + auto one = cudf::numeric_scalar(1); auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); - auto revenue_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, extendedprice.type().scale(), one_minus_discount->type().scale()); - auto revenue_type = cudf::data_type{cudf::type_id::DECIMAL64, revenue_scale}; + auto revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; auto revenue = cudf::binary_operation( extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, revenue_type); return revenue; @@ -107,17 +105,17 @@ int main(int argc, char const** argv) // Read out the tables from parquet files // while pushing down the column projections and filter predicates auto customer = - read_parquet(args.dataset_dir + "customer/part-0.parquet", {"c_custkey", "c_nationkey"}); + read_parquet(args.dataset_dir + "customer.parquet", {"c_custkey", "c_nationkey"}); auto orders = - read_parquet(args.dataset_dir + "orders/part-0.parquet", orders_cols, std::move(orders_pred)); - auto lineitem = read_parquet(args.dataset_dir + "lineitem/part-0.parquet", + read_parquet(args.dataset_dir + "orders.parquet", orders_cols, std::move(orders_pred)); + auto lineitem = read_parquet(args.dataset_dir + "lineitem.parquet", {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); auto supplier = - read_parquet(args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); + read_parquet(args.dataset_dir + "supplier.parquet", {"s_suppkey", "s_nationkey"}); auto nation = read_parquet(args.dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_regionkey", "n_name"}); auto region = - read_parquet(args.dataset_dir + "region/part-0.parquet", region_cols, std::move(region_pred)); + read_parquet(args.dataset_dir + "region.parquet", region_cols, std::move(region_pred)); // Perform the joins auto join_a = apply_inner_join(region, nation, {"r_regionkey"}, {"n_regionkey"}); From 91ac5035201d18271191a64250389f2896af059b Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 5 Jul 2024 15:19:11 -0700 Subject: [PATCH 087/124] Fix q5 --- cpp/examples/tpch/q5.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 43357138013..9aeb7e73bec 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -112,7 +112,7 @@ int main(int argc, char const** argv) {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); auto supplier = read_parquet(args.dataset_dir + "supplier.parquet", {"s_suppkey", "s_nationkey"}); - auto nation = read_parquet(args.dataset_dir + "nation/part-0.parquet", + auto nation = read_parquet(args.dataset_dir + "nation.parquet", {"n_nationkey", "n_regionkey", "n_name"}); auto region = read_parquet(args.dataset_dir + "region.parquet", region_cols, std::move(region_pred)); From cb0550c0e913b1148cf4d446050a1efd5f98f4a1 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 5 Jul 2024 15:49:35 -0700 Subject: [PATCH 088/124] Use float64 instead of decimal64 --- cpp/examples/tpch/q1.cpp | 18 +++++++----------- cpp/examples/tpch/q5.cpp | 14 +++++++------- cpp/examples/tpch/q6.cpp | 8 +++----- cpp/examples/tpch/q9.cpp | 29 +++++++++++------------------ 4 files changed, 28 insertions(+), 41 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 12aaa7c437b..b6804840346 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -49,12 +49,10 @@ order by std::unique_ptr calc_disc_price(cudf::column_view discount, cudf::column_view extendedprice) { - auto one = cudf::fixed_point_scalar(1); + auto one = cudf::numeric_scalar(1); auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); - auto disc_price_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, extendedprice.type().scale(), one_minus_discount->type().scale()); - auto disc_price_type = cudf::data_type{cudf::type_id::DECIMAL64, disc_price_scale}; + auto disc_price_type = cudf::data_type{cudf::type_id::FLOAT64}; auto disc_price = cudf::binary_operation( extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type); return disc_price; @@ -62,12 +60,10 @@ std::unique_ptr calc_disc_price(cudf::column_view discount, std::unique_ptr calc_charge(cudf::column_view tax, cudf::column_view disc_price) { - auto one = cudf::fixed_point_scalar(1); + auto one = cudf::numeric_scalar(1); auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); - auto charge_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, disc_price.type().scale(), one_plus_tax->type().scale()); - auto charge_type = cudf::data_type{cudf::type_id::DECIMAL64, charge_scale}; - auto charge = cudf::binary_operation( + auto charge_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto charge = cudf::binary_operation( disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); return charge; } @@ -100,8 +96,8 @@ int main(int argc, char const** argv) cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal); // Read out the `lineitem` table from parquet file - auto lineitem = read_parquet( - args.dataset_dir + "lineitem/part-0.parquet", lineitem_cols, std::move(lineitem_pred)); + auto lineitem = + read_parquet(args.dataset_dir + "/lineitem.parquet", lineitem_cols, std::move(lineitem_pred)); // Calculate the discount price and charge columns and append to lineitem table auto disc_price = diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 9aeb7e73bec..b6a12983818 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -105,17 +105,17 @@ int main(int argc, char const** argv) // Read out the tables from parquet files // while pushing down the column projections and filter predicates auto customer = - read_parquet(args.dataset_dir + "customer.parquet", {"c_custkey", "c_nationkey"}); + read_parquet(args.dataset_dir + "/customer.parquet", {"c_custkey", "c_nationkey"}); auto orders = - read_parquet(args.dataset_dir + "orders.parquet", orders_cols, std::move(orders_pred)); - auto lineitem = read_parquet(args.dataset_dir + "lineitem.parquet", + read_parquet(args.dataset_dir + "/orders.parquet", orders_cols, std::move(orders_pred)); + auto lineitem = read_parquet(args.dataset_dir + "/lineitem.parquet", {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); auto supplier = - read_parquet(args.dataset_dir + "supplier.parquet", {"s_suppkey", "s_nationkey"}); - auto nation = read_parquet(args.dataset_dir + "nation.parquet", - {"n_nationkey", "n_regionkey", "n_name"}); + read_parquet(args.dataset_dir + "/supplier.parquet", {"s_suppkey", "s_nationkey"}); + auto nation = + read_parquet(args.dataset_dir + "/nation.parquet", {"n_nationkey", "n_regionkey", "n_name"}); auto region = - read_parquet(args.dataset_dir + "region.parquet", region_cols, std::move(region_pred)); + read_parquet(args.dataset_dir + "/region.parquet", region_cols, std::move(region_pred)); // Perform the joins auto join_a = apply_inner_join(region, nation, {"r_regionkey"}, {"n_regionkey"}); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index d44bcf1b8d7..f92ab52ece7 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -38,9 +38,7 @@ where std::unique_ptr calc_revenue(cudf::column_view extendedprice, cudf::column_view discount) { - auto revenue_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, extendedprice.type().scale(), discount.type().scale()); - auto revenue_type = cudf::data_type{cudf::type_id::DECIMAL64, revenue_scale}; + auto revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; auto revenue = cudf::binary_operation(extendedprice, discount, cudf::binary_operator::MUL, revenue_type); return revenue; @@ -73,8 +71,8 @@ int main(int argc, char const** argv) cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal); auto lineitem_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b); - auto lineitem = read_parquet( - args.dataset_dir + "lineitem/part-0.parquet", lineitem_cols, std::move(lineitem_pred)); + auto lineitem = + read_parquet(args.dataset_dir + "/lineitem.parquet", lineitem_cols, std::move(lineitem_pred)); // Cast the discount and quantity columns to float32 and append to lineitem table auto discout_float = diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index a103eed52e3..2ca08a389a8 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -69,22 +69,16 @@ std::unique_ptr calc_amount(cudf::column_view discount, cudf::column_view supplycost, cudf::column_view quantity) { - auto one = cudf::fixed_point_scalar(1); + auto one = cudf::numeric_scalar(1); auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); - auto extendedprice_discounted_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, extendedprice.type().scale(), one_minus_discount->type().scale()); - auto extendedprice_discounted_type = - cudf::data_type{cudf::type_id::DECIMAL64, extendedprice_discounted_scale}; - auto extendedprice_discounted = cudf::binary_operation(extendedprice, + auto extendedprice_discounted_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto extendedprice_discounted = cudf::binary_operation(extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, extendedprice_discounted_type); - auto supplycost_quantity_scale = cudf::binary_operation_fixed_point_scale( - cudf::binary_operator::MUL, supplycost.type().scale(), quantity.type().scale()); - auto supplycost_quantity_type = - cudf::data_type{cudf::type_id::DECIMAL64, supplycost_quantity_scale}; - auto supplycost_quantity = cudf::binary_operation( + auto supplycost_quantity_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto supplycost_quantity = cudf::binary_operation( supplycost, quantity, cudf::binary_operator::MUL, supplycost_quantity_type); auto amount = cudf::binary_operation(extendedprice_discounted->view(), supplycost_quantity->view(), @@ -105,16 +99,15 @@ int main(int argc, char const** argv) // Read out the table from parquet files auto lineitem = read_parquet( - args.dataset_dir + "lineitem/part-0.parquet", + args.dataset_dir + "/lineitem.parquet", {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); - auto nation = read_parquet(args.dataset_dir + "nation/part-0.parquet", {"n_nationkey", "n_name"}); - auto orders = - read_parquet(args.dataset_dir + "orders/part-0.parquet", {"o_orderkey", "o_orderdate"}); - auto part = read_parquet(args.dataset_dir + "part/part-0.parquet", {"p_partkey", "p_name"}); - auto partsupp = read_parquet(args.dataset_dir + "partsupp/part-0.parquet", + auto nation = read_parquet(args.dataset_dir + "/nation.parquet", {"n_nationkey", "n_name"}); + auto orders = read_parquet(args.dataset_dir + "/orders.parquet", {"o_orderkey", "o_orderdate"}); + auto part = read_parquet(args.dataset_dir + "/part.parquet", {"p_partkey", "p_name"}); + auto partsupp = read_parquet(args.dataset_dir + "/partsupp.parquet", {"ps_suppkey", "ps_partkey", "ps_supplycost"}); auto supplier = - read_parquet(args.dataset_dir + "supplier/part-0.parquet", {"s_suppkey", "s_nationkey"}); + read_parquet(args.dataset_dir + "/supplier.parquet", {"s_suppkey", "s_nationkey"}); // Generating the `profit` table // Filter the part table using `p_name like '%green%'` From 545cfb90e5a3c216fc5df0bb708dbec136e92d97 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 6 Jul 2024 10:33:26 -0700 Subject: [PATCH 089/124] Add stream/mr params to new col calc functions --- cpp/examples/tpch/q1.cpp | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index b6804840346..7ad407da4e0 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -46,25 +46,37 @@ order by l_linestatus; */ -std::unique_ptr calc_disc_price(cudf::column_view discount, - cudf::column_view extendedprice) +std::unique_ptr calc_disc_price( + cudf::column_view discount, + cudf::column_view extendedprice, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto one = cudf::numeric_scalar(1); auto one_minus_discount = - cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); + cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr); auto disc_price_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto disc_price = cudf::binary_operation( - extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type); + auto disc_price = cudf::binary_operation(extendedprice, + one_minus_discount->view(), + cudf::binary_operator::MUL, + disc_price_type, + stream, + mr); return disc_price; } -std::unique_ptr calc_charge(cudf::column_view tax, cudf::column_view disc_price) +std::unique_ptr calc_charge( + cudf::column_view tax, + cudf::column_view disc_price, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - auto one = cudf::numeric_scalar(1); - auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type()); - auto charge_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto charge = cudf::binary_operation( - disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, charge_type); + auto one = cudf::numeric_scalar(1); + auto one_plus_tax = + cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type(), stream, mr); + auto charge_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto charge = cudf::binary_operation( + disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, charge_type, stream, mr); return charge; } From cadc1951f5ff4f5e1603f59b3aa3814148cc133f Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 6 Jul 2024 10:39:02 -0700 Subject: [PATCH 090/124] Add stream / mr params --- cpp/examples/tpch/q5.cpp | 17 ++++++++++++----- cpp/examples/tpch/q6.cpp | 11 +++++++---- cpp/examples/tpch/q9.cpp | 19 +++++++++++++------ 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index b6a12983818..9fa87429344 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -54,15 +54,22 @@ order by revenue desc; */ -std::unique_ptr calc_revenue(cudf::column_view extendedprice, - cudf::column_view discount) +std::unique_ptr calc_revenue( + cudf::column_view extendedprice, + cudf::column_view discount, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto one = cudf::numeric_scalar(1); auto one_minus_discount = - cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); + cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr); auto revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto revenue = cudf::binary_operation( - extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, revenue_type); + auto revenue = cudf::binary_operation(extendedprice, + one_minus_discount->view(), + cudf::binary_operator::MUL, + revenue_type, + stream, + mr); return revenue; } diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index f92ab52ece7..b7265dafad6 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -35,12 +35,15 @@ where and l_quantity < 24; */ -std::unique_ptr calc_revenue(cudf::column_view extendedprice, - cudf::column_view discount) +std::unique_ptr calc_revenue( + cudf::column_view extendedprice, + cudf::column_view discount, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto revenue = - cudf::binary_operation(extendedprice, discount, cudf::binary_operator::MUL, revenue_type); + auto revenue = cudf::binary_operation( + extendedprice, discount, cudf::binary_operator::MUL, revenue_type, stream, mr); return revenue; } diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 2ca08a389a8..b5bd92c6196 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -64,10 +64,13 @@ order by o_year desc; */ -std::unique_ptr calc_amount(cudf::column_view discount, - cudf::column_view extendedprice, - cudf::column_view supplycost, - cudf::column_view quantity) +std::unique_ptr calc_amount( + cudf::column_view discount, + cudf::column_view extendedprice, + cudf::column_view supplycost, + cudf::column_view quantity, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto one = cudf::numeric_scalar(1); auto one_minus_discount = @@ -76,14 +79,18 @@ std::unique_ptr calc_amount(cudf::column_view discount, auto extendedprice_discounted = cudf::binary_operation(extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, - extendedprice_discounted_type); + extendedprice_discounted_type, + stream, + mr); auto supplycost_quantity_type = cudf::data_type{cudf::type_id::FLOAT64}; auto supplycost_quantity = cudf::binary_operation( supplycost, quantity, cudf::binary_operator::MUL, supplycost_quantity_type); auto amount = cudf::binary_operation(extendedprice_discounted->view(), supplycost_quantity->view(), cudf::binary_operator::SUB, - extendedprice_discounted->type()); + extendedprice_discounted->type(), + stream, + mr); return amount; } From c55251007c6ab3c1d5e8401b8ab2764c21091c86 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 6 Jul 2024 10:46:45 -0700 Subject: [PATCH 091/124] Fix the SQL queries --- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 12 ++++++------ cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 12 ++++++------ 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 7ad407da4e0..c579c65d86e 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -21,7 +21,7 @@ #include /* -create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; +create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; select l_returnflag, diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 9fa87429344..1247393f8c2 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -21,12 +21,12 @@ #include /* -create view customer as select * from '~/tpch_sf1/customer/part-0.parquet'; -create view orders as select * from '~/tpch_sf1/orders/part-0.parquet'; -create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; -create view supplier as select * from '~/tpch_sf1/supplier/part-0.parquet'; -create view nation as select * from '~/tpch_sf1/nation/part-0.parquet'; -create view region as select * from '~/tpch_sf1/region/part-0.parquet'; +create view customer as select * from '/tables/scale-1/customer.parquet'; +create view orders as select * from '/tables/scale-1/orders.parquet'; +create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; +create view supplier as select * from '/tables/scale-1/supplier.parquet'; +create view nation as select * from '/tables/scale-1/nation.parquet'; +create view region as select * from '/tables/scale-1/region.parquet'; select n_name, diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index b7265dafad6..3e935871b4a 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -21,7 +21,7 @@ #include /* -create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; +create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; select sum(l_extendedprice * l_discount) as revenue diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index b5bd92c6196..ef3e73d40d3 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -23,12 +23,12 @@ #include /* -create view part as select * from '~/tpch_sf1/part/part-0.parquet'; -create view supplier as select * from '~/tpch_sf1/supplier/part-0.parquet'; -create view lineitem as select * from '~/tpch_sf1/lineitem/part-0.parquet'; -create view partsupp as select * from '~/tpch_sf1/partsupp/part-0.parquet'; -create view orders as select * from '~/tpch_sf1/orders/part-0.parquet'; -create view nation as select * from '~/tpch_sf1/nation/part-0.parquet'; +create view part as select * from '/tables/scale-1/part.parquet'; +create view supplier as select * from '/tables/scale-1/supplier.parquet'; +create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; +create view partsupp as select * from '/tables/scale-1/partsupp.parquet'; +create view orders as select * from '/tables/scale-1/orders.parquet'; +create view nation as select * from '/tables/scale-1/nation.parquet'; select nation, From adf9456cf5f9aae001311df47244788cc461edb1 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sat, 6 Jul 2024 11:29:23 -0700 Subject: [PATCH 092/124] Fix the join order for Q9 --- cpp/examples/tpch/q9.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index ef3e73d40d3..eeadc2f109a 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -124,12 +124,12 @@ int main(int argc, char const** argv) auto part_filtered = apply_mask(part, mask); // Perform the joins - auto join_a = apply_inner_join(lineitem, supplier, {"l_suppkey"}, {"s_suppkey"}); - auto join_b = - apply_inner_join(join_a, partsupp, {"l_suppkey", "l_partkey"}, {"ps_suppkey", "ps_partkey"}); - auto join_c = apply_inner_join(join_b, part_filtered, {"l_partkey"}, {"p_partkey"}); - auto join_d = apply_inner_join(join_c, orders, {"l_orderkey"}, {"o_orderkey"}); - auto joined_table = apply_inner_join(join_d, nation, {"s_nationkey"}, {"n_nationkey"}); + auto join_a = apply_inner_join(supplier, nation, {"s_nationkey"}, {"n_nationkey"}); + auto join_b = apply_inner_join(partsupp, join_a, {"ps_suppkey"}, {"s_suppkey"}); + auto join_c = apply_inner_join(lineitem, part_filtered, {"l_partkey"}, {"p_partkey"}); + auto join_d = apply_inner_join(orders, join_c, {"o_orderkey"}, {"l_orderkey"}); + auto joined_table = + apply_inner_join(join_d, join_b, {"l_suppkey", "l_partkey"}, {"s_suppkey", "ps_partkey"}); // Calculate the `nation`, `o_year`, and `amount` columns auto n_name = std::make_unique(joined_table->column("n_name")); From 48d108a108e2eeb52ab1bb8631bcc3fad9fc76cf Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 14:40:02 -0700 Subject: [PATCH 093/124] Fix trailing whitespace --- cpp/examples/tpch/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index 0bfb1941a9b..ee457bb1f32 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -1,8 +1,8 @@ # TPC-H Inspired Examples -Implementing the TPC-H queries using `libcudf`. We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in the form of Parquet files. +Implementing the TPC-H queries using `libcudf`. We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in the form of Parquet files. -## Requirements +## Requirements - Rust From 1bb2793b6dd1dab7d02b91048995c7f4f1468ce1 Mon Sep 17 00:00:00 2001 From: Jayjeet Chakraborty Date: Wed, 10 Jul 2024 18:12:57 -0700 Subject: [PATCH 094/124] Update cpp/examples/tpch/CMakeLists.txt Co-authored-by: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> --- cpp/examples/tpch/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt index f03b5d8a940..1b91d07e148 100644 --- a/cpp/examples/tpch/CMakeLists.txt +++ b/cpp/examples/tpch/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. cmake_minimum_required(VERSION 3.26.4) From 174f998998489dbb0f2c361e15f4fd41612790c7 Mon Sep 17 00:00:00 2001 From: Jayjeet Chakraborty Date: Wed, 10 Jul 2024 18:13:21 -0700 Subject: [PATCH 095/124] Update cpp/examples/tpch/README.md Co-authored-by: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> --- cpp/examples/tpch/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md index ee457bb1f32..1ea71ae9824 100644 --- a/cpp/examples/tpch/README.md +++ b/cpp/examples/tpch/README.md @@ -1,6 +1,6 @@ # TPC-H Inspired Examples -Implementing the TPC-H queries using `libcudf`. We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in the form of Parquet files. +Implements TPC-H queries using `libcudf`. We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in Parquet format. ## Requirements From 94e3b4ef9f0652dfc458e6cb0a6698f7df4be4c0 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 18:17:56 -0700 Subject: [PATCH 096/124] Add docstring to remaining functions in utils --- cpp/examples/tpch/utils.hpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 54b9a1cd7b6..f8de4ee89ca 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -154,6 +154,12 @@ class table_with_cols { std::vector col_names; }; +/** + * @brief Concatenate two vectors + * + * @param lhs The left vector + * @param rhs The right vector + */ template std::vector concat(std::vector const& lhs, std::vector const& rhs) { @@ -382,6 +388,13 @@ std::unique_ptr read_parquet( return std::make_unique(std::move(table_with_metadata.tbl), column_names); } +/** + * @brief Generate the `std::tm` structure from year, month, and day + * + * @param year The year + * @param month The month + * @param day The day + */ std::tm make_tm(int year, int month, int day) { std::tm tm = {0}; @@ -391,6 +404,13 @@ std::tm make_tm(int year, int month, int day) return tm; } +/** + * @brief Calculate the number of days since the UNIX epoch + * + * @param year The year + * @param month The month + * @param day The day + */ int32_t days_since_epoch(int year, int month, int day) { std::tm tm = make_tm(year, month, day); From e8c8abbf2b0b3d54f034277dc681036b1b8f9821 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 18:22:40 -0700 Subject: [PATCH 097/124] Make the one scalars const --- cpp/examples/tpch/q1.cpp | 4 ++-- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q9.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index c579c65d86e..60f436caad7 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -52,7 +52,7 @@ std::unique_ptr calc_disc_price( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - auto one = cudf::numeric_scalar(1); + auto const one = cudf::numeric_scalar(1); auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr); auto disc_price_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -71,7 +71,7 @@ std::unique_ptr calc_charge( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - auto one = cudf::numeric_scalar(1); + auto const one = cudf::numeric_scalar(1); auto one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type(), stream, mr); auto charge_type = cudf::data_type{cudf::type_id::FLOAT64}; diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 1247393f8c2..fb53d16674d 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -60,7 +60,7 @@ std::unique_ptr calc_revenue( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - auto one = cudf::numeric_scalar(1); + auto const one = cudf::numeric_scalar(1); auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr); auto revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index eeadc2f109a..57b888a6c5a 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -72,7 +72,7 @@ std::unique_ptr calc_amount( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - auto one = cudf::numeric_scalar(1); + auto const one = cudf::numeric_scalar(1); auto one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); auto extendedprice_discounted_type = cudf::data_type{cudf::type_id::FLOAT64}; From d8306800c2aad1f2da2b62b060a29be77e71b8ec Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 18:26:03 -0700 Subject: [PATCH 098/124] Add docstrings to column calculation functions --- cpp/examples/tpch/q1.cpp | 16 ++++++++++++++++ cpp/examples/tpch/q5.cpp | 8 ++++++++ cpp/examples/tpch/q6.cpp | 8 ++++++++ cpp/examples/tpch/q9.cpp | 10 ++++++++++ 4 files changed, 42 insertions(+) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 60f436caad7..ea5bbf17a9d 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -46,6 +46,14 @@ order by l_linestatus; */ +/** + * @brief Calculate the discount price column + * + * @param discount The discount column + * @param extendedprice The extended price column + * @param stream The CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ std::unique_ptr calc_disc_price( cudf::column_view discount, cudf::column_view extendedprice, @@ -65,6 +73,14 @@ std::unique_ptr calc_disc_price( return disc_price; } +/** + * @brief Calculate the charge column + * + * @param tax The tax column + * @param disc_price The discount price column + * @param stream The CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ std::unique_ptr calc_charge( cudf::column_view tax, cudf::column_view disc_price, diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index fb53d16674d..ab54c3ea955 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -54,6 +54,14 @@ order by revenue desc; */ +/** + * @brief Calculate the revenue column + * + * @param extendedprice The extended price column + * @param discount The discount column + * @param stream The CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ std::unique_ptr calc_revenue( cudf::column_view extendedprice, cudf::column_view discount, diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 3e935871b4a..65dd0d856eb 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -35,6 +35,14 @@ where and l_quantity < 24; */ +/** + * @brief Calculate the revenue column + * + * @param extendedprice The extended price column + * @param discount The discount column + * @param stream The CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ std::unique_ptr calc_revenue( cudf::column_view extendedprice, cudf::column_view discount, diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 57b888a6c5a..fd8489dabcf 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -64,6 +64,16 @@ order by o_year desc; */ +/** + * @brief Calculate the amount column + * + * @param discount The discount column + * @param extendedprice The extended price column + * @param supplycost The supply cost column + * @param quantity The quantity column + * @param stream The CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ std::unique_ptr calc_amount( cudf::column_view discount, cudf::column_view extendedprice, From e297deb6f1bee7b5875a94d1c6bd4b05e4e2a4d9 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 18:53:18 -0700 Subject: [PATCH 099/124] Add file-level docstring to Q1 --- cpp/examples/tpch/q1.cpp | 53 +++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index ea5bbf17a9d..73a75be8d8d 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -20,31 +20,34 @@ #include #include -/* -create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; - -select - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order -from - lineitem -where - l_shipdate <= date '1998-09-02' -group by - l_returnflag, - l_linestatus -order by - l_returnflag, - l_linestatus; -*/ +/** + * @file q1.cpp + * @brief Implement query 1 of the TPC-H benchmark. + * + * create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; + * + * select + * l_returnflag, + * l_linestatus, + * sum(l_quantity) as sum_qty, + * sum(l_extendedprice) as sum_base_price, + * sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + * sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + * avg(l_quantity) as avg_qty, + * avg(l_extendedprice) as avg_price, + * avg(l_discount) as avg_disc, + * count(*) as count_order + * from + * lineitem + * where + * l_shipdate <= date '1998-09-02' + * group by + * l_returnflag, + * l_linestatus + * order by + * l_returnflag, + * l_linestatus; + */ /** * @brief Calculate the discount price column From 146d45b4fac604771a92fd433ba9d1abfe2da188 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 18:56:36 -0700 Subject: [PATCH 100/124] Add file-level docstring to Q5 --- cpp/examples/tpch/q5.cpp | 69 +++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index ab54c3ea955..200e6dbba9c 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -20,39 +20,42 @@ #include #include -/* -create view customer as select * from '/tables/scale-1/customer.parquet'; -create view orders as select * from '/tables/scale-1/orders.parquet'; -create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; -create view supplier as select * from '/tables/scale-1/supplier.parquet'; -create view nation as select * from '/tables/scale-1/nation.parquet'; -create view region as select * from '/tables/scale-1/region.parquet'; - -select - n_name, - sum(l_extendedprice * (1 - l_discount)) as revenue -from - customer, - orders, - lineitem, - supplier, - nation, - region -where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and l_suppkey = s_suppkey - and c_nationkey = s_nationkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'ASIA' - and o_orderdate >= date '1994-01-01' - and o_orderdate < date '1995-01-01' -group by - n_name -order by - revenue desc; -*/ +/** + * @file q5.cpp + * @brief Implement query 5 of the TPC-H benchmark. + * + * create view customer as select * from '/tables/scale-1/customer.parquet'; + * create view orders as select * from '/tables/scale-1/orders.parquet'; + * create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; + * create view supplier as select * from '/tables/scale-1/supplier.parquet'; + * create view nation as select * from '/tables/scale-1/nation.parquet'; + * create view region as select * from '/tables/scale-1/region.parquet'; + * + * select + * n_name, + * sum(l_extendedprice * (1 - l_discount)) as revenue + * from + * customer, + * orders, + * lineitem, + * supplier, + * nation, + * region + * where + * c_custkey = o_custkey + * and l_orderkey = o_orderkey + * and l_suppkey = s_suppkey + * and c_nationkey = s_nationkey + * and s_nationkey = n_nationkey + * and n_regionkey = r_regionkey + * and r_name = 'ASIA' + * and o_orderdate >= date '1994-01-01' + * and o_orderdate < date '1995-01-01' + * group by + * n_name + * order by + * revenue desc; + */ /** * @brief Calculate the revenue column From 90903f2fcbf62ddeac2be2ee8a3c411a839bb25f Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 18:59:31 -0700 Subject: [PATCH 101/124] Add file-level docstring to Q6 --- cpp/examples/tpch/q6.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 65dd0d856eb..b03f6eaea54 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -20,20 +20,23 @@ #include #include -/* -create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; - -select - sum(l_extendedprice * l_discount) as revenue -from - lineitem -where - l_shipdate >= date '1994-01-01' - and l_shipdate < date '1995-01-01' - and l_discount >= 0.05 - and l_discount <= 0.07 - and l_quantity < 24; -*/ +/** + * @file q6.cpp + * @brief Implement query 6 of the TPC-H benchmark. + * + * create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; + * + * select + * sum(l_extendedprice * l_discount) as revenue + * from + * lineitem + * where + * l_shipdate >= date '1994-01-01' + * and l_shipdate < date '1995-01-01' + * and l_discount >= 0.05 + * and l_discount <= 0.07 + * and l_quantity < 24; + */ /** * @brief Calculate the revenue column From 0c78691fdb302480b41c5211ad1c04f7975b896b Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 19:05:12 -0700 Subject: [PATCH 102/124] Add file-level docstring to Q9 --- cpp/examples/tpch/q9.cpp | 85 +++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index fd8489dabcf..c9da44326f4 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -22,47 +22,50 @@ #include #include -/* -create view part as select * from '/tables/scale-1/part.parquet'; -create view supplier as select * from '/tables/scale-1/supplier.parquet'; -create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; -create view partsupp as select * from '/tables/scale-1/partsupp.parquet'; -create view orders as select * from '/tables/scale-1/orders.parquet'; -create view nation as select * from '/tables/scale-1/nation.parquet'; - -select - nation, - o_year, - sum(amount) as sum_profit -from - ( - select - n_name as nation, - extract(year from o_orderdate) as o_year, - l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount - from - part, - supplier, - lineitem, - partsupp, - orders, - nation - where - s_suppkey = l_suppkey - and ps_suppkey = l_suppkey - and ps_partkey = l_partkey - and p_partkey = l_partkey - and o_orderkey = l_orderkey - and s_nationkey = n_nationkey - and p_name like '%green%' - ) as profit -group by - nation, - o_year -order by - nation, - o_year desc; -*/ +/** + * @file q9.cpp + * @brief Implement query 9 of the TPC-H benchmark. + * + * create view part as select * from '/tables/scale-1/part.parquet'; + * create view supplier as select * from '/tables/scale-1/supplier.parquet'; + * create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; + * create view partsupp as select * from '/tables/scale-1/partsupp.parquet'; + * create view orders as select * from '/tables/scale-1/orders.parquet'; + * create view nation as select * from '/tables/scale-1/nation.parquet'; + * + * select + * nation, + * o_year, + * sum(amount) as sum_profit + * from + * ( + * select + * n_name as nation, + * extract(year from o_orderdate) as o_year, + * l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + * from + * part, + * supplier, + * lineitem, + * partsupp, + * orders, + * nation + * where + * s_suppkey = l_suppkey + * and ps_suppkey = l_suppkey + * and ps_partkey = l_partkey + * and p_partkey = l_partkey + * and o_orderkey = l_orderkey + * and s_nationkey = n_nationkey + * and p_name like '%green%' + * ) as profit + * group by + * nation, + * o_year + * order by + * nation, + * o_year desc; + */ /** * @brief Calculate the amount column From 194f08f8f9456c31acf795021a122aed60d1412c Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 19:12:25 -0700 Subject: [PATCH 103/124] Add docstring to join_and_gather function --- cpp/examples/tpch/utils.hpp | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index f8de4ee89ca..a6ab030bc74 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -170,20 +170,27 @@ std::vector concat(std::vector const& lhs, std::vector const& rhs) return result; } -std::unique_ptr join_and_gather( - cudf::table_view left_input, - cudf::table_view right_input, - std::vector left_on, - std::vector right_on, - cudf::null_equality compare_nulls, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) +/** + * @brief Inner join two tables and gather the result + * + * @param left_input The left input table + * @param right_input The right input table + * @param left_on The columns to join on in the left table + * @param right_on The columns to join on in the right table + * @param compare_nulls The null equality policy + */ +std::unique_ptr join_and_gather(cudf::table_view left_input, + cudf::table_view right_input, + std::vector left_on, + std::vector right_on, + cudf::null_equality compare_nulls) { CUDF_FUNC_RANGE(); - auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; - auto left_selected = left_input.select(left_on); - auto right_selected = right_input.select(right_on); - auto const [left_join_indices, right_join_indices] = - cudf::inner_join(left_selected, right_selected, compare_nulls, mr); + auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; + auto left_selected = left_input.select(left_on); + auto right_selected = right_input.select(right_on); + auto const [left_join_indices, right_join_indices] = cudf::inner_join( + left_selected, right_selected, compare_nulls, rmm::mr::get_current_device_resource()); auto left_indices_span = cudf::device_span{*left_join_indices}; auto right_indices_span = cudf::device_span{*right_join_indices}; From 6b12762faaae5c80e3f4e62acf75a0df8d3a6fdf Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 21:42:38 -0700 Subject: [PATCH 104/124] Add consts to join_and_gather --- cpp/examples/tpch/utils.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index a6ab030bc74..d674fd1bac4 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -187,19 +187,19 @@ std::unique_ptr join_and_gather(cudf::table_view left_input, { CUDF_FUNC_RANGE(); auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; - auto left_selected = left_input.select(left_on); - auto right_selected = right_input.select(right_on); + auto const left_selected = left_input.select(left_on); + auto const right_selected = right_input.select(right_on); auto const [left_join_indices, right_join_indices] = cudf::inner_join( left_selected, right_selected, compare_nulls, rmm::mr::get_current_device_resource()); - auto left_indices_span = cudf::device_span{*left_join_indices}; - auto right_indices_span = cudf::device_span{*right_join_indices}; + auto const left_indices_span = cudf::device_span{*left_join_indices}; + auto const right_indices_span = cudf::device_span{*right_join_indices}; - auto left_indices_col = cudf::column_view{left_indices_span}; - auto right_indices_col = cudf::column_view{right_indices_span}; + auto const left_indices_col = cudf::column_view{left_indices_span}; + auto const right_indices_col = cudf::column_view{right_indices_span}; - auto left_result = cudf::gather(left_input, left_indices_col, oob_policy); - auto right_result = cudf::gather(right_input, right_indices_col, oob_policy); + auto const left_result = cudf::gather(left_input, left_indices_col, oob_policy); + auto const right_result = cudf::gather(right_input, right_indices_col, oob_policy); auto joined_cols = left_result->release(); auto right_cols = right_result->release(); From 9dd31a7f1c96f33224cfff828ecd7aefb19eb1dc Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 22:41:18 -0700 Subject: [PATCH 105/124] Add more const literals --- cpp/examples/tpch/utils.hpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index d674fd1bac4..277d73fc238 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -122,7 +122,7 @@ class table_with_cols { { CUDF_FUNC_RANGE(); std::vector col_indices; - for (auto& col_name : col_names) { + for (auto const& col_name : col_names) { col_indices.push_back(col_id(col_name)); } return tbl->select(col_indices); @@ -135,17 +135,17 @@ class table_with_cols { void to_parquet(std::string filepath) { CUDF_FUNC_RANGE(); - auto sink_info = cudf::io::sink_info(filepath); + auto const sink_info = cudf::io::sink_info(filepath); cudf::io::table_metadata metadata; std::vector col_name_infos; - for (auto& col_name : col_names) { + for (auto const& col_name : col_names) { col_name_infos.push_back(cudf::io::column_name_info(col_name)); } - metadata.schema_info = col_name_infos; - auto table_input_metadata = cudf::io::table_input_metadata{metadata}; - auto builder = cudf::io::parquet_writer_options::builder(sink_info, tbl->view()); + metadata.schema_info = col_name_infos; + auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; + auto builder = cudf::io::parquet_writer_options::builder(sink_info, tbl->view()); builder.metadata(table_input_metadata); - auto options = builder.build(); + auto const options = builder.build(); cudf::io::write_parquet(options); } @@ -250,8 +250,8 @@ std::unique_ptr apply_filter(std::unique_ptr& cudf::ast::operation& predicate) { CUDF_FUNC_RANGE(); - auto boolean_mask = cudf::compute_column(table->table(), predicate); - auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); + auto const boolean_mask = cudf::compute_column(table->table(), predicate); + auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); return std::make_unique(std::move(result_table), table->columns()); } @@ -285,7 +285,7 @@ std::unique_ptr apply_groupby(std::unique_ptr& groupby_context_t ctx) { CUDF_FUNC_RANGE(); - auto keys = table->select(ctx.keys); + auto const keys = table->select(ctx.keys); cudf::groupby::groupby groupby_obj(keys); std::vector result_column_names; result_column_names.insert(result_column_names.end(), ctx.keys.begin(), ctx.keys.end()); @@ -357,8 +357,8 @@ std::unique_ptr apply_reduction(cudf::column_view& column, std::string col_name) { CUDF_FUNC_RANGE(); - auto agg = cudf::make_sum_aggregation(); - auto result = cudf::reduce(column, *agg, column.type()); + auto const agg = cudf::make_sum_aggregation(); + auto const result = cudf::reduce(column, *agg, column.type()); cudf::size_type len = 1; auto col = cudf::make_column_from_scalar(*result, len); std::vector> columns; @@ -381,15 +381,15 @@ std::unique_ptr read_parquet( std::unique_ptr predicate = nullptr) { CUDF_FUNC_RANGE(); - auto source = cudf::io::source_info(filename); - auto builder = cudf::io::parquet_reader_options_builder(source); + auto const source = cudf::io::source_info(filename); + auto builder = cudf::io::parquet_reader_options_builder(source); if (columns.size()) { builder.columns(columns); } if (predicate) { builder.filter(*predicate); } - auto options = builder.build(); + auto const options = builder.build(); auto table_with_metadata = cudf::io::read_parquet(options); - auto schema_info = table_with_metadata.metadata.schema_info; + auto const schema_info = table_with_metadata.metadata.schema_info; std::vector column_names; - for (auto& col_info : schema_info) { + for (auto const& col_info : schema_info) { column_names.push_back(col_info.name); } return std::make_unique(std::move(table_with_metadata.tbl), column_names); From 0e989949a7859d58b25c5b2ddcd7ec512acf071a Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 22:45:28 -0700 Subject: [PATCH 106/124] Add more const literals --- cpp/examples/tpch/utils.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 277d73fc238..56e94d42316 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -357,10 +357,10 @@ std::unique_ptr apply_reduction(cudf::column_view& column, std::string col_name) { CUDF_FUNC_RANGE(); - auto const agg = cudf::make_sum_aggregation(); - auto const result = cudf::reduce(column, *agg, column.type()); - cudf::size_type len = 1; - auto col = cudf::make_column_from_scalar(*result, len); + auto const agg = cudf::make_sum_aggregation(); + auto const result = cudf::reduce(column, *agg, column.type()); + cudf::size_type const len = 1; + auto col = cudf::make_column_from_scalar(*result, len); std::vector> columns; columns.push_back(std::move(col)); auto result_table = std::make_unique(std::move(columns)); From 792f33e896c87a1034cc31769c57f896a1456f2d Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 11 Jul 2024 15:26:44 -0700 Subject: [PATCH 107/124] Add consts, nodiscards, and other qualifiers --- cpp/examples/tpch/q1.cpp | 8 ++-- cpp/examples/tpch/q5.cpp | 4 +- cpp/examples/tpch/q6.cpp | 4 +- cpp/examples/tpch/q9.cpp | 10 ++--- cpp/examples/tpch/utils.hpp | 84 +++++++++++++++++++------------------ 5 files changed, 57 insertions(+), 53 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 73a75be8d8d..b36b9efec99 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -58,8 +58,8 @@ * @param mr Device memory resource used to allocate the returned column's device memory. */ std::unique_ptr calc_disc_price( - cudf::column_view discount, - cudf::column_view extendedprice, + cudf::column_view const& discount, + cudf::column_view const& extendedprice, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { @@ -85,8 +85,8 @@ std::unique_ptr calc_disc_price( * @param mr Device memory resource used to allocate the returned column's device memory. */ std::unique_ptr calc_charge( - cudf::column_view tax, - cudf::column_view disc_price, + cudf::column_view const& tax, + cudf::column_view const& disc_price, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 200e6dbba9c..6dd09fd082d 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -66,8 +66,8 @@ * @param mr Device memory resource used to allocate the returned column's device memory. */ std::unique_ptr calc_revenue( - cudf::column_view extendedprice, - cudf::column_view discount, + cudf::column_view const& extendedprice, + cudf::column_view const& discount, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index b03f6eaea54..206b1732d91 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -47,8 +47,8 @@ * @param mr Device memory resource used to allocate the returned column's device memory. */ std::unique_ptr calc_revenue( - cudf::column_view extendedprice, - cudf::column_view discount, + cudf::column_view const& extendedprice, + cudf::column_view const& discount, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index c9da44326f4..f5b7210f31f 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -78,10 +78,10 @@ * @param mr Device memory resource used to allocate the returned column's device memory. */ std::unique_ptr calc_amount( - cudf::column_view discount, - cudf::column_view extendedprice, - cudf::column_view supplycost, - cudf::column_view quantity, + cudf::column_view const& discount, + cudf::column_view const& extendedprice, + cudf::column_view const& supplycost, + cudf::column_view const& quantity, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { @@ -159,7 +159,7 @@ int main(int argc, char const** argv) profit_columns.push_back(std::move(amount)); auto profit_table = std::make_unique(std::move(profit_columns)); - auto profit = std::make_unique( + auto profit = std::make_unique( std::move(profit_table), std::vector{"nation", "o_year", "amount"}); // Perform the groupby operation diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 56e94d42316..412d0efe847 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -66,32 +66,35 @@ inline std::shared_ptr create_memory_resource( /** * @brief A class to represent a table with column names attached */ -class table_with_cols { +class table_with_names { public: - table_with_cols(std::unique_ptr tbl, std::vector col_names) + table_with_names(std::unique_ptr tbl, std::vector col_names) : tbl(std::move(tbl)), col_names(col_names) { } /** * @brief Return the table view */ - cudf::table_view table() { return tbl->view(); } + [[nodiscard]] cudf::table_view const table() { return tbl->view(); } /** * @brief Return the column view for a given column name * * @param col_name The name of the column */ - cudf::column_view column(std::string col_name) { return tbl->view().column(col_id(col_name)); } + [[nodiscard]] cudf::column_view const column(std::string const& col_name) + { + return tbl->view().column(col_id(col_name)); + } /** * @param Return the column names of the table */ - std::vector columns() { return col_names; } + [[nodiscard]] std::vector const column_names() { return col_names; } /** * @brief Translate a column name to a column index * * @param col_name The name of the column */ - cudf::size_type col_id(std::string col_name) + [[nodiscard]] cudf::size_type const col_id(std::string const& col_name) { CUDF_FUNC_RANGE(); auto it = std::find(col_names.begin(), col_names.end(), col_name); @@ -104,21 +107,22 @@ class table_with_cols { * @param col The column to append * @param col_name The name of the appended column */ - std::unique_ptr append(std::unique_ptr& col, std::string col_name) + std::unique_ptr append(std::unique_ptr& col, + std::string const& col_name) { CUDF_FUNC_RANGE(); auto cols = tbl->release(); cols.push_back(std::move(col)); col_names.push_back(col_name); auto appended_table = std::make_unique(std::move(cols)); - return std::make_unique(std::move(appended_table), col_names); + return std::make_unique(std::move(appended_table), col_names); } /** * @brief Select a subset of columns from the table * * @param col_names The names of the columns to select */ - cudf::table_view select(std::vector col_names) + [[nodiscard]] cudf::table_view const select(std::vector const& col_names) { CUDF_FUNC_RANGE(); std::vector col_indices; @@ -132,7 +136,7 @@ class table_with_cols { * * @param filepath The path to the parquet file */ - void to_parquet(std::string filepath) + void to_parquet(std::string const& filepath) { CUDF_FUNC_RANGE(); auto const sink_info = cudf::io::sink_info(filepath); @@ -179,14 +183,14 @@ std::vector concat(std::vector const& lhs, std::vector const& rhs) * @param right_on The columns to join on in the right table * @param compare_nulls The null equality policy */ -std::unique_ptr join_and_gather(cudf::table_view left_input, - cudf::table_view right_input, - std::vector left_on, - std::vector right_on, +std::unique_ptr join_and_gather(cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, cudf::null_equality compare_nulls) { CUDF_FUNC_RANGE(); - auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; + constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; auto const left_selected = left_input.select(left_on); auto const right_selected = right_input.select(right_on); auto const [left_join_indices, right_join_indices] = cudf::inner_join( @@ -218,9 +222,9 @@ std::unique_ptr join_and_gather(cudf::table_view left_input, * @param right_on The columns to join on in the right table * @param compare_nulls The null equality policy */ -std::unique_ptr apply_inner_join( - std::unique_ptr& left_input, - std::unique_ptr& right_input, +std::unique_ptr apply_inner_join( + std::unique_ptr& left_input, + std::unique_ptr& right_input, std::vector left_on, std::vector right_on, cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) @@ -236,8 +240,8 @@ std::unique_ptr apply_inner_join( } auto table = join_and_gather( left_input->table(), right_input->table(), left_on_indices, right_on_indices, compare_nulls); - return std::make_unique(std::move(table), - concat(left_input->columns(), right_input->columns())); + return std::make_unique( + std::move(table), concat(left_input->column_names(), right_input->column_names())); } /** @@ -246,13 +250,13 @@ std::unique_ptr apply_inner_join( * @param table The input table * @param predicate The filter predicate */ -std::unique_ptr apply_filter(std::unique_ptr& table, - cudf::ast::operation& predicate) +std::unique_ptr apply_filter(std::unique_ptr& table, + cudf::ast::operation& predicate) { CUDF_FUNC_RANGE(); auto const boolean_mask = cudf::compute_column(table->table(), predicate); auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); - return std::make_unique(std::move(result_table), table->columns()); + return std::make_unique(std::move(result_table), table->column_names()); } /** @@ -261,12 +265,12 @@ std::unique_ptr apply_filter(std::unique_ptr& * @param table The input table * @param mask The boolean mask */ -std::unique_ptr apply_mask(std::unique_ptr& table, - std::unique_ptr& mask) +std::unique_ptr apply_mask(std::unique_ptr& table, + std::unique_ptr& mask) { CUDF_FUNC_RANGE(); auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); - return std::make_unique(std::move(result_table), table->columns()); + return std::make_unique(std::move(result_table), table->column_names()); } struct groupby_context_t { @@ -281,8 +285,8 @@ struct groupby_context_t { * @param table The input table * @param ctx The groupby context */ -std::unique_ptr apply_groupby(std::unique_ptr& table, - groupby_context_t ctx) +std::unique_ptr apply_groupby(std::unique_ptr& table, + groupby_context_t ctx) { CUDF_FUNC_RANGE(); auto const keys = table->select(ctx.keys); @@ -321,7 +325,7 @@ std::unique_ptr apply_groupby(std::unique_ptr& } } auto result_table = std::make_unique(std::move(result_columns)); - return std::make_unique(std::move(result_table), result_column_names); + return std::make_unique(std::move(result_table), result_column_names); } /** @@ -331,9 +335,9 @@ std::unique_ptr apply_groupby(std::unique_ptr& * @param sort_keys The sort keys * @param sort_key_orders The sort key orders */ -std::unique_ptr apply_orderby(std::unique_ptr& table, - std::vector sort_keys, - std::vector sort_key_orders) +std::unique_ptr apply_orderby(std::unique_ptr& table, + std::vector sort_keys, + std::vector sort_key_orders) { CUDF_FUNC_RANGE(); std::vector column_views; @@ -342,7 +346,7 @@ std::unique_ptr apply_orderby(std::unique_ptr& } auto result_table = cudf::sort_by_key(table->table(), cudf::table_view{column_views}, sort_key_orders); - return std::make_unique(std::move(result_table), table->columns()); + return std::make_unique(std::move(result_table), table->column_names()); } /** @@ -352,9 +356,9 @@ std::unique_ptr apply_orderby(std::unique_ptr& * @param agg_kind The aggregation kind * @param col_name The name of the output column */ -std::unique_ptr apply_reduction(cudf::column_view& column, - cudf::aggregation::Kind agg_kind, - std::string col_name) +std::unique_ptr apply_reduction(cudf::column_view& column, + cudf::aggregation::Kind agg_kind, + std::string col_name) { CUDF_FUNC_RANGE(); auto const agg = cudf::make_sum_aggregation(); @@ -365,7 +369,7 @@ std::unique_ptr apply_reduction(cudf::column_view& column, columns.push_back(std::move(col)); auto result_table = std::make_unique(std::move(columns)); std::vector col_names = {col_name}; - return std::make_unique(std::move(result_table), col_names); + return std::make_unique(std::move(result_table), col_names); } /** @@ -375,7 +379,7 @@ std::unique_ptr apply_reduction(cudf::column_view& column, * @param columns The columns to read * @param predicate The filter predicate to pushdown */ -std::unique_ptr read_parquet( +std::unique_ptr read_parquet( std::string filename, std::vector columns = {}, std::unique_ptr predicate = nullptr) @@ -383,7 +387,7 @@ std::unique_ptr read_parquet( CUDF_FUNC_RANGE(); auto const source = cudf::io::source_info(filename); auto builder = cudf::io::parquet_reader_options_builder(source); - if (columns.size()) { builder.columns(columns); } + if (!columns.empty()) { builder.columns(columns); } if (predicate) { builder.filter(*predicate); } auto const options = builder.build(); auto table_with_metadata = cudf::io::read_parquet(options); @@ -392,7 +396,7 @@ std::unique_ptr read_parquet( for (auto const& col_info : schema_info) { column_names.push_back(col_info.name); } - return std::make_unique(std::move(table_with_metadata.tbl), column_names); + return std::make_unique(std::move(table_with_metadata.tbl), column_names); } /** From 5e103b978f939072af6daf17a22f43033d892e0c Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 11 Jul 2024 15:48:43 -0700 Subject: [PATCH 108/124] Add more const references --- cpp/examples/tpch/utils.hpp | 46 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 412d0efe847..8a37c003d33 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -223,10 +223,10 @@ std::unique_ptr join_and_gather(cudf::table_view const& left_input, * @param compare_nulls The null equality policy */ std::unique_ptr apply_inner_join( - std::unique_ptr& left_input, - std::unique_ptr& right_input, - std::vector left_on, - std::vector right_on, + std::unique_ptr const& left_input, + std::unique_ptr const& right_input, + std::vector const& left_on, + std::vector const& right_on, cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) { CUDF_FUNC_RANGE(); @@ -250,8 +250,8 @@ std::unique_ptr apply_inner_join( * @param table The input table * @param predicate The filter predicate */ -std::unique_ptr apply_filter(std::unique_ptr& table, - cudf::ast::operation& predicate) +std::unique_ptr apply_filter(std::unique_ptr const& table, + cudf::ast::operation const& predicate) { CUDF_FUNC_RANGE(); auto const boolean_mask = cudf::compute_column(table->table(), predicate); @@ -265,8 +265,8 @@ std::unique_ptr apply_filter(std::unique_ptr * @param table The input table * @param mask The boolean mask */ -std::unique_ptr apply_mask(std::unique_ptr& table, - std::unique_ptr& mask) +std::unique_ptr apply_mask(std::unique_ptr const& table, + std::unique_ptr const& mask) { CUDF_FUNC_RANGE(); auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); @@ -285,8 +285,8 @@ struct groupby_context_t { * @param table The input table * @param ctx The groupby context */ -std::unique_ptr apply_groupby(std::unique_ptr& table, - groupby_context_t ctx) +std::unique_ptr apply_groupby(std::unique_ptr const& table, + groupby_context_t const& ctx) { CUDF_FUNC_RANGE(); auto const keys = table->select(ctx.keys); @@ -335,9 +335,9 @@ std::unique_ptr apply_groupby(std::unique_ptr apply_orderby(std::unique_ptr& table, - std::vector sort_keys, - std::vector sort_key_orders) +std::unique_ptr apply_orderby(std::unique_ptr const& table, + std::vector const& sort_keys, + std::vector const& sort_key_orders) { CUDF_FUNC_RANGE(); std::vector column_views; @@ -356,9 +356,9 @@ std::unique_ptr apply_orderby(std::unique_ptr apply_reduction(cudf::column_view& column, - cudf::aggregation::Kind agg_kind, - std::string col_name) +std::unique_ptr apply_reduction(cudf::column_view const& column, + cudf::aggregation::Kind const& agg_kind, + std::string const& col_name) { CUDF_FUNC_RANGE(); auto const agg = cudf::make_sum_aggregation(); @@ -380,9 +380,9 @@ std::unique_ptr apply_reduction(cudf::column_view& column, * @param predicate The filter predicate to pushdown */ std::unique_ptr read_parquet( - std::string filename, - std::vector columns = {}, - std::unique_ptr predicate = nullptr) + std::string const& filename, + std::vector const& columns = {}, + std::unique_ptr const& predicate = nullptr) { CUDF_FUNC_RANGE(); auto const source = cudf::io::source_info(filename); @@ -408,7 +408,7 @@ std::unique_ptr read_parquet( */ std::tm make_tm(int year, int month, int day) { - std::tm tm = {0}; + std::tm tm{}; tm.tm_year = year - 1900; tm.tm_mon = month - 1; tm.tm_mday = day; @@ -472,13 +472,13 @@ class Timer { Timer() { reset(); } void reset() { start_time = std::chrono::high_resolution_clock::now(); } - auto elapsed() { return (std::chrono::high_resolution_clock::now() - start_time); } - void print_elapsed_micros() + auto const elapsed() { return (std::chrono::high_resolution_clock::now() - start_time); } + void const print_elapsed_micros() { std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() << "us\n\n"; } - void print_elapsed_millis() + void const print_elapsed_millis() { std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() << "ms\n\n"; From a32e8990ba73a3c8c6049beaeb2766a45c29b144 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 11 Jul 2024 15:56:33 -0700 Subject: [PATCH 109/124] More improvements --- cpp/examples/tpch/utils.hpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 8a37c003d33..0ba5fe4a18f 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -141,11 +141,8 @@ class table_with_names { CUDF_FUNC_RANGE(); auto const sink_info = cudf::io::sink_info(filepath); cudf::io::table_metadata metadata; - std::vector col_name_infos; - for (auto const& col_name : col_names) { - col_name_infos.push_back(cudf::io::column_name_info(col_name)); - } - metadata.schema_info = col_name_infos; + metadata.schema_info = + std::vector(col_names.begin(), col_names.end()); auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; auto builder = cudf::io::parquet_writer_options::builder(sink_info, tbl->view()); builder.metadata(table_input_metadata); @@ -391,9 +388,8 @@ std::unique_ptr read_parquet( if (predicate) { builder.filter(*predicate); } auto const options = builder.build(); auto table_with_metadata = cudf::io::read_parquet(options); - auto const schema_info = table_with_metadata.metadata.schema_info; std::vector column_names; - for (auto const& col_info : schema_info) { + for (auto const& col_info : table_with_metadata.metadata.schema_info) { column_names.push_back(col_info.name); } return std::make_unique(std::move(table_with_metadata.tbl), column_names); From 781a46033b22940f5d877fcc28ceb2af7a6562a0 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 11 Jul 2024 17:14:39 -0700 Subject: [PATCH 110/124] Add [[nodiscard]] to append --- cpp/examples/tpch/utils.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 0ba5fe4a18f..e941ff9797e 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -107,8 +107,8 @@ class table_with_names { * @param col The column to append * @param col_name The name of the appended column */ - std::unique_ptr append(std::unique_ptr& col, - std::string const& col_name) + [[nodiscard]] std::unique_ptr append(std::unique_ptr& col, + std::string const& col_name) { CUDF_FUNC_RANGE(); auto cols = tbl->release(); From 4578a4b1dcc343c1cb3ad03f343b732065127e45 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Thu, 11 Jul 2024 17:18:49 -0700 Subject: [PATCH 111/124] Use std transform --- cpp/examples/tpch/utils.hpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index e941ff9797e..3c076973fa9 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -229,12 +229,14 @@ std::unique_ptr apply_inner_join( CUDF_FUNC_RANGE(); std::vector left_on_indices; std::vector right_on_indices; - for (auto& col_name : left_on) { - left_on_indices.push_back(left_input->col_id(col_name)); - } - for (auto& col_name : right_on) { - right_on_indices.push_back(right_input->col_id(col_name)); - } + std::transform( + left_on.begin(), left_on.end(), std::back_inserter(left_on_indices), [&](auto const& col_name) { + return left_input->col_id(col_name); + }); + std::transform(right_on.begin(), + right_on.end(), + std::back_inserter(right_on_indices), + [&](auto const& col_name) { return right_input->col_id(col_name); }); auto table = join_and_gather( left_input->table(), right_input->table(), left_on_indices, right_on_indices, compare_nulls); return std::make_unique( From fcdde79138f6a8732ad1f6d6046764efb917f671 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Sun, 14 Jul 2024 20:44:34 -0700 Subject: [PATCH 112/124] Change to append in place --- cpp/examples/tpch/q1.cpp | 7 ++++--- cpp/examples/tpch/q5.cpp | 4 ++-- cpp/examples/tpch/q6.cpp | 11 ++++++----- cpp/examples/tpch/utils.hpp | 6 ++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index b36b9efec99..36489dec823 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -133,12 +133,13 @@ int main(int argc, char const** argv) // Calculate the discount price and charge columns and append to lineitem table auto disc_price = calc_disc_price(lineitem->column("l_discount"), lineitem->column("l_extendedprice")); - auto charge = calc_charge(lineitem->column("l_tax"), disc_price->view()); - auto appended_table = lineitem->append(disc_price, "disc_price")->append(charge, "charge"); + auto charge = calc_charge(lineitem->column("l_tax"), disc_price->view()); + lineitem->append(disc_price, "disc_price"); + lineitem->append(charge, "charge"); // Perform the group by operation auto groupedby_table = apply_groupby( - appended_table, + lineitem, groupby_context_t{ {"l_returnflag", "l_linestatus"}, { diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 6dd09fd082d..abd4d8bb45b 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -146,11 +146,11 @@ int main(int argc, char const** argv) // Calculate and append the `revenue` column auto revenue = calc_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount")); - auto appended_table = joined_table->append(revenue, "revenue"); + joined_table->append(revenue, "revenue"); // Perform the groupby operation auto groupedby_table = - apply_groupby(appended_table, + apply_groupby(joined_table, groupby_context_t{{"n_name"}, { {"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}}, diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 206b1732d91..449058b2fed 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -93,12 +93,13 @@ int main(int argc, char const** argv) cudf::cast(lineitem->column("l_discount"), cudf::data_type{cudf::type_id::FLOAT32}); auto quantity_float = cudf::cast(lineitem->column("l_quantity"), cudf::data_type{cudf::type_id::FLOAT32}); - auto appended_table = - lineitem->append(discout_float, "l_discount_float")->append(quantity_float, "l_quantity_float"); + + lineitem->append(discout_float, "l_discount_float"); + lineitem->append(quantity_float, "l_quantity_float"); // Apply the filters - auto discount_ref = cudf::ast::column_reference(appended_table->col_id("l_discount_float")); - auto quantity_ref = cudf::ast::column_reference(appended_table->col_id("l_quantity_float")); + auto discount_ref = cudf::ast::column_reference(lineitem->col_id("l_discount_float")); + auto quantity_ref = cudf::ast::column_reference(lineitem->col_id("l_quantity_float")); auto discount_lower = cudf::numeric_scalar(0.05); auto discount_lower_literal = cudf::ast::literal(discount_lower); @@ -118,7 +119,7 @@ int main(int argc, char const** argv) cudf::ast::operation(cudf::ast::ast_operator::LESS, quantity_ref, quantity_upper_literal); auto discount_quantity_pred = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, discount_pred, quantity_pred); - auto filtered_table = apply_filter(appended_table, discount_quantity_pred); + auto filtered_table = apply_filter(lineitem, discount_quantity_pred); // Calculate the `revenue` column auto revenue = diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 3c076973fa9..d9068c7173e 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -107,15 +107,13 @@ class table_with_names { * @param col The column to append * @param col_name The name of the appended column */ - [[nodiscard]] std::unique_ptr append(std::unique_ptr& col, - std::string const& col_name) + [[nodiscard]] void append(std::unique_ptr& col, std::string const& col_name) { CUDF_FUNC_RANGE(); auto cols = tbl->release(); cols.push_back(std::move(col)); + tbl = std::make_unique(std::move(cols)); col_names.push_back(col_name); - auto appended_table = std::make_unique(std::move(cols)); - return std::make_unique(std::move(appended_table), col_names); } /** * @brief Select a subset of columns from the table From b0e764fa82c75fa71b5cd942368c2a9e157bd9ea Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 15 Jul 2024 17:33:30 -0700 Subject: [PATCH 113/124] Address reviews --- cpp/examples/tpch/q1.cpp | 4 +-- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 3 +- cpp/examples/tpch/utils.hpp | 55 ++++++++++++++++++++----------------- 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 36489dec823..00cc5ebaded 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -57,7 +57,7 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr calc_disc_price( +[[nodiscard]] std::unique_ptr calc_disc_price( cudf::column_view const& discount, cudf::column_view const& extendedprice, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -84,7 +84,7 @@ std::unique_ptr calc_disc_price( * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr calc_charge( +[[nodiscard]] std::unique_ptr calc_charge( cudf::column_view const& tax, cudf::column_view const& disc_price, rmm::cuda_stream_view stream = cudf::get_default_stream(), diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index abd4d8bb45b..8fa565d8772 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -65,7 +65,7 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr calc_revenue( +[[nodiscard]] std::unique_ptr calc_revenue( cudf::column_view const& extendedprice, cudf::column_view const& discount, rmm::cuda_stream_view stream = cudf::get_default_stream(), diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 449058b2fed..b5cd6db255a 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -46,7 +46,7 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr calc_revenue( +[[nodiscard]] std::unique_ptr calc_revenue( cudf::column_view const& extendedprice, cudf::column_view const& discount, rmm::cuda_stream_view stream = cudf::get_default_stream(), diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index f5b7210f31f..96a621f9a7e 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -77,7 +77,7 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr calc_amount( +[[nodiscard]] std::unique_ptr calc_amount( cudf::column_view const& discount, cudf::column_view const& extendedprice, cudf::column_view const& supplycost, @@ -176,4 +176,5 @@ int main(int argc, char const** argv) // Write query result to a parquet file orderedby_table->to_parquet("q9.parquet"); + return 0; } diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index d9068c7173e..d5cfbadafdf 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -107,7 +107,7 @@ class table_with_names { * @param col The column to append * @param col_name The name of the appended column */ - [[nodiscard]] void append(std::unique_ptr& col, std::string const& col_name) + void append(std::unique_ptr& col, std::string const& col_name) { CUDF_FUNC_RANGE(); auto cols = tbl->release(); @@ -178,11 +178,12 @@ std::vector concat(std::vector const& lhs, std::vector const& rhs) * @param right_on The columns to join on in the right table * @param compare_nulls The null equality policy */ -std::unique_ptr join_and_gather(cudf::table_view const& left_input, - cudf::table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, - cudf::null_equality compare_nulls) +[[nodiscard]] std::unique_ptr join_and_gather( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls) { CUDF_FUNC_RANGE(); constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; @@ -217,7 +218,7 @@ std::unique_ptr join_and_gather(cudf::table_view const& left_input, * @param right_on The columns to join on in the right table * @param compare_nulls The null equality policy */ -std::unique_ptr apply_inner_join( +[[nodiscard]] std::unique_ptr apply_inner_join( std::unique_ptr const& left_input, std::unique_ptr const& right_input, std::vector const& left_on, @@ -247,8 +248,8 @@ std::unique_ptr apply_inner_join( * @param table The input table * @param predicate The filter predicate */ -std::unique_ptr apply_filter(std::unique_ptr const& table, - cudf::ast::operation const& predicate) +[[nodiscard]] std::unique_ptr apply_filter( + std::unique_ptr const& table, cudf::ast::operation const& predicate) { CUDF_FUNC_RANGE(); auto const boolean_mask = cudf::compute_column(table->table(), predicate); @@ -262,8 +263,8 @@ std::unique_ptr apply_filter(std::unique_ptr * @param table The input table * @param mask The boolean mask */ -std::unique_ptr apply_mask(std::unique_ptr const& table, - std::unique_ptr const& mask) +[[nodiscard]] std::unique_ptr apply_mask( + std::unique_ptr const& table, std::unique_ptr const& mask) { CUDF_FUNC_RANGE(); auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); @@ -282,8 +283,8 @@ struct groupby_context_t { * @param table The input table * @param ctx The groupby context */ -std::unique_ptr apply_groupby(std::unique_ptr const& table, - groupby_context_t const& ctx) +[[nodiscard]] std::unique_ptr apply_groupby( + std::unique_ptr const& table, groupby_context_t const& ctx) { CUDF_FUNC_RANGE(); auto const keys = table->select(ctx.keys); @@ -332,9 +333,10 @@ std::unique_ptr apply_groupby(std::unique_ptr apply_orderby(std::unique_ptr const& table, - std::vector const& sort_keys, - std::vector const& sort_key_orders) +[[nodiscard]] std::unique_ptr apply_orderby( + std::unique_ptr const& table, + std::vector const& sort_keys, + std::vector const& sort_key_orders) { CUDF_FUNC_RANGE(); std::vector column_views; @@ -353,9 +355,10 @@ std::unique_ptr apply_orderby(std::unique_ptr apply_reduction(cudf::column_view const& column, - cudf::aggregation::Kind const& agg_kind, - std::string const& col_name) +[[nodiscard]] std::unique_ptr apply_reduction( + cudf::column_view const& column, + cudf::aggregation::Kind const& agg_kind, + std::string const& col_name) { CUDF_FUNC_RANGE(); auto const agg = cudf::make_sum_aggregation(); @@ -376,7 +379,7 @@ std::unique_ptr apply_reduction(cudf::column_view const& colum * @param columns The columns to read * @param predicate The filter predicate to pushdown */ -std::unique_ptr read_parquet( +[[nodiscard]] std::unique_ptr read_parquet( std::string const& filename, std::vector const& columns = {}, std::unique_ptr const& predicate = nullptr) @@ -442,11 +445,13 @@ struct tpch_args_t { tpch_args_t parse_args(int argc, char const** argv) { if (argc < 3) { - std::cerr << "Usage: " << argv[0] << " " << std::endl; - std::cerr << std::endl; - std::cerr << "The query result will be saved to a parquet file named " - << "q{query_no}.parquet in the current working directory." << std::endl; - exit(1); + std::string usage_message = R"( + Usage: + + The query result will be saved to a parquet file named q{query_no}.parquet in the current + working directory. + )"; + throw std::runtime_error(usage_message); } tpch_args_t args; args.dataset_dir = argv[1]; From 243517963bdc7f8bf225f4371119703f29698a32 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 15 Jul 2024 18:05:29 -0700 Subject: [PATCH 114/124] Add consts --- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 00cc5ebaded..e3afeba26a8 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -101,7 +101,7 @@ int main(int argc, char const** argv) { - auto args = parse_args(argc, argv); + auto const args = parse_args(argc, argv); // Use a memory pool auto resource = create_memory_resource(args.memory_resource_type); diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 8fa565d8772..998832f5543 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -86,7 +86,7 @@ int main(int argc, char const** argv) { - auto args = parse_args(argc, argv); + auto const args = parse_args(argc, argv); // Use a memory pool auto resource = create_memory_resource(args.memory_resource_type); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index b5cd6db255a..c5cd2753e97 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -60,7 +60,7 @@ int main(int argc, char const** argv) { - auto args = parse_args(argc, argv); + auto const args = parse_args(argc, argv); // Use a memory pool auto resource = create_memory_resource(args.memory_resource_type); diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 96a621f9a7e..13ed0a4e692 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -109,7 +109,7 @@ int main(int argc, char const** argv) { - auto args = parse_args(argc, argv); + auto const args = parse_args(argc, argv); // Use a memory pool auto resource = create_memory_resource(args.memory_resource_type); From d02cf08ef3e122943c5679e7f0e868e7683d37c9 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 15 Jul 2024 18:30:44 -0700 Subject: [PATCH 115/124] Add consts --- cpp/examples/tpch/q1.cpp | 42 ++++++++++++++--------------- cpp/examples/tpch/q5.cpp | 55 +++++++++++++++++++------------------- cpp/examples/tpch/q6.cpp | 48 ++++++++++++++++----------------- cpp/examples/tpch/q9.cpp | 57 ++++++++++++++++++++-------------------- 4 files changed, 102 insertions(+), 100 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index e3afeba26a8..94abde85c86 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -64,10 +64,10 @@ rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto const one = cudf::numeric_scalar(1); - auto one_minus_discount = + auto const one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr); - auto disc_price_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto disc_price = cudf::binary_operation(extendedprice, + auto const disc_price_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto disc_price = cudf::binary_operation(extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, disc_price_type, @@ -91,10 +91,10 @@ rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto const one = cudf::numeric_scalar(1); - auto one_plus_tax = + auto const one_plus_tax = cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type(), stream, mr); - auto charge_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto charge = cudf::binary_operation( + auto const charge_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto charge = cudf::binary_operation( disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, charge_type, stream, mr); return charge; } @@ -110,20 +110,20 @@ int main(int argc, char const** argv) Timer timer; // Define the column projections and filter predicate for `lineitem` table - std::vector lineitem_cols = {"l_returnflag", - "l_linestatus", - "l_quantity", - "l_extendedprice", - "l_discount", - "l_shipdate", - "l_orderkey", - "l_tax"}; - auto shipdate_ref = cudf::ast::column_reference(std::distance( + std::vector const lineitem_cols = {"l_returnflag", + "l_linestatus", + "l_quantity", + "l_extendedprice", + "l_discount", + "l_shipdate", + "l_orderkey", + "l_tax"}; + auto const shipdate_ref = cudf::ast::column_reference(std::distance( lineitem_cols.begin(), std::find(lineitem_cols.begin(), lineitem_cols.end(), "l_shipdate"))); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); - auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - auto lineitem_pred = std::make_unique( + auto const shipdate_upper_literal = cudf::ast::literal(shipdate_upper); + auto lineitem_pred = std::make_unique( cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal); // Read out the `lineitem` table from parquet file @@ -138,7 +138,7 @@ int main(int argc, char const** argv) lineitem->append(charge, "charge"); // Perform the group by operation - auto groupedby_table = apply_groupby( + auto const groupedby_table = apply_groupby( lineitem, groupby_context_t{ {"l_returnflag", "l_linestatus"}, @@ -162,9 +162,9 @@ int main(int argc, char const** argv) }}); // Perform the order by operation - auto orderedby_table = apply_orderby(groupedby_table, - {"l_returnflag", "l_linestatus"}, - {cudf::order::ASCENDING, cudf::order::ASCENDING}); + auto const orderedby_table = apply_orderby(groupedby_table, + {"l_returnflag", "l_linestatus"}, + {cudf::order::ASCENDING, cudf::order::ASCENDING}); timer.print_elapsed_millis(); diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 998832f5543..1f3daf40b4c 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -72,10 +72,10 @@ rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto const one = cudf::numeric_scalar(1); - auto one_minus_discount = + auto const one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr); - auto revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto revenue = cudf::binary_operation(extendedprice, + auto const revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto revenue = cudf::binary_operation(extendedprice, one_minus_discount->view(), cudf::binary_operator::MUL, revenue_type, @@ -95,51 +95,51 @@ int main(int argc, char const** argv) Timer timer; // Define the column projection and filter predicate for the `orders` table - std::vector orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; - auto o_orderdate_ref = cudf::ast::column_reference(std::distance( + std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; + auto const o_orderdate_ref = cudf::ast::column_reference(std::distance( orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate"))); auto o_orderdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); - auto o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); - auto o_orderdate_pred_lower = cudf::ast::operation( + auto const o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); + auto const o_orderdate_pred_lower = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate_ref, o_orderdate_lower_limit); auto o_orderdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); - auto o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); - auto o_orderdate_pred_upper = + auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); + auto const o_orderdate_pred_upper = cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit); auto orders_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper); // Define the column projection and filter predicate for the `region` table - std::vector region_cols = {"r_regionkey", "r_name"}; - auto r_name_ref = cudf::ast::column_reference(std::distance( + std::vector const region_cols = {"r_regionkey", "r_name"}; + auto const r_name_ref = cudf::ast::column_reference(std::distance( region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name"))); - auto r_name_value = cudf::string_scalar("ASIA"); - auto r_name_literal = cudf::ast::literal(r_name_value); - auto region_pred = std::make_unique( + auto r_name_value = cudf::string_scalar("ASIA"); + auto const r_name_literal = cudf::ast::literal(r_name_value); + auto region_pred = std::make_unique( cudf::ast::ast_operator::EQUAL, r_name_ref, r_name_literal); // Read out the tables from parquet files // while pushing down the column projections and filter predicates - auto customer = + auto const customer = read_parquet(args.dataset_dir + "/customer.parquet", {"c_custkey", "c_nationkey"}); - auto orders = + auto const orders = read_parquet(args.dataset_dir + "/orders.parquet", orders_cols, std::move(orders_pred)); - auto lineitem = read_parquet(args.dataset_dir + "/lineitem.parquet", - {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); - auto supplier = + auto const lineitem = read_parquet(args.dataset_dir + "/lineitem.parquet", + {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); + auto const supplier = read_parquet(args.dataset_dir + "/supplier.parquet", {"s_suppkey", "s_nationkey"}); - auto nation = + auto const nation = read_parquet(args.dataset_dir + "/nation.parquet", {"n_nationkey", "n_regionkey", "n_name"}); - auto region = + auto const region = read_parquet(args.dataset_dir + "/region.parquet", region_cols, std::move(region_pred)); // Perform the joins - auto join_a = apply_inner_join(region, nation, {"r_regionkey"}, {"n_regionkey"}); - auto join_b = apply_inner_join(join_a, customer, {"n_nationkey"}, {"c_nationkey"}); - auto join_c = apply_inner_join(join_b, orders, {"c_custkey"}, {"o_custkey"}); - auto join_d = apply_inner_join(join_c, lineitem, {"o_orderkey"}, {"l_orderkey"}); + auto const join_a = apply_inner_join(region, nation, {"r_regionkey"}, {"n_regionkey"}); + auto const join_b = apply_inner_join(join_a, customer, {"n_nationkey"}, {"c_nationkey"}); + auto const join_c = apply_inner_join(join_b, orders, {"c_custkey"}, {"o_custkey"}); + auto const join_d = apply_inner_join(join_c, lineitem, {"o_orderkey"}, {"l_orderkey"}); auto joined_table = apply_inner_join(supplier, join_d, {"s_suppkey", "s_nationkey"}, {"l_suppkey", "n_nationkey"}); @@ -149,7 +149,7 @@ int main(int argc, char const** argv) joined_table->append(revenue, "revenue"); // Perform the groupby operation - auto groupedby_table = + auto const groupedby_table = apply_groupby(joined_table, groupby_context_t{{"n_name"}, { @@ -157,7 +157,8 @@ int main(int argc, char const** argv) }}); // Perform the order by operation - auto orderedby_table = apply_orderby(groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); + auto const orderedby_table = + apply_orderby(groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); timer.print_elapsed_millis(); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index c5cd2753e97..b38b9399433 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -52,8 +52,8 @@ rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { - auto revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto revenue = cudf::binary_operation( + auto const revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto revenue = cudf::binary_operation( extendedprice, discount, cudf::binary_operator::MUL, revenue_type, stream, mr); return revenue; } @@ -69,19 +69,19 @@ int main(int argc, char const** argv) Timer timer; // Read out the `lineitem` table from parquet file - std::vector lineitem_cols = { + std::vector const lineitem_cols = { "l_extendedprice", "l_discount", "l_shipdate", "l_quantity"}; - auto shipdate_ref = cudf::ast::column_reference(std::distance( + auto const shipdate_ref = cudf::ast::column_reference(std::distance( lineitem_cols.begin(), std::find(lineitem_cols.begin(), lineitem_cols.end(), "l_shipdate"))); auto shipdate_lower = cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); - auto shipdate_lower_literal = cudf::ast::literal(shipdate_lower); + auto const shipdate_lower_literal = cudf::ast::literal(shipdate_lower); auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1995, 1, 1), true); - auto shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - auto shipdate_pred_a = cudf::ast::operation( + auto const shipdate_upper_literal = cudf::ast::literal(shipdate_upper); + auto const shipdate_pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, shipdate_ref, shipdate_lower_literal); - auto shipdate_pred_b = + auto const shipdate_pred_b = cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal); auto lineitem_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b); @@ -98,36 +98,36 @@ int main(int argc, char const** argv) lineitem->append(quantity_float, "l_quantity_float"); // Apply the filters - auto discount_ref = cudf::ast::column_reference(lineitem->col_id("l_discount_float")); - auto quantity_ref = cudf::ast::column_reference(lineitem->col_id("l_quantity_float")); + auto const discount_ref = cudf::ast::column_reference(lineitem->col_id("l_discount_float")); + auto const quantity_ref = cudf::ast::column_reference(lineitem->col_id("l_quantity_float")); - auto discount_lower = cudf::numeric_scalar(0.05); - auto discount_lower_literal = cudf::ast::literal(discount_lower); - auto discount_upper = cudf::numeric_scalar(0.07); - auto discount_upper_literal = cudf::ast::literal(discount_upper); - auto quantity_upper = cudf::numeric_scalar(24); - auto quantity_upper_literal = cudf::ast::literal(quantity_upper); + auto discount_lower = cudf::numeric_scalar(0.05); + auto const discount_lower_literal = cudf::ast::literal(discount_lower); + auto discount_upper = cudf::numeric_scalar(0.07); + auto const discount_upper_literal = cudf::ast::literal(discount_upper); + auto quantity_upper = cudf::numeric_scalar(24); + auto const quantity_upper_literal = cudf::ast::literal(quantity_upper); - auto discount_pred_a = cudf::ast::operation( + auto const discount_pred_a = cudf::ast::operation( cudf::ast::ast_operator::GREATER_EQUAL, discount_ref, discount_lower_literal); - auto discount_pred_b = + auto const discount_pred_b = cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, discount_ref, discount_upper_literal); - auto discount_pred = + auto const discount_pred = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, discount_pred_a, discount_pred_b); - auto quantity_pred = + auto const quantity_pred = cudf::ast::operation(cudf::ast::ast_operator::LESS, quantity_ref, quantity_upper_literal); - auto discount_quantity_pred = + auto const discount_quantity_pred = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, discount_pred, quantity_pred); - auto filtered_table = apply_filter(lineitem, discount_quantity_pred); + auto const filtered_table = apply_filter(lineitem, discount_quantity_pred); // Calculate the `revenue` column auto revenue = calc_revenue(filtered_table->column("l_extendedprice"), filtered_table->column("l_discount")); // Sum the `revenue` column - auto revenue_view = revenue->view(); - auto result_table = apply_reduction(revenue_view, cudf::aggregation::Kind::SUM, "revenue"); + auto const revenue_view = revenue->view(); + auto const result_table = apply_reduction(revenue_view, cudf::aggregation::Kind::SUM, "revenue"); timer.print_elapsed_millis(); diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 13ed0a4e692..27d0415c231 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -86,17 +86,17 @@ rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { auto const one = cudf::numeric_scalar(1); - auto one_minus_discount = + auto const one_minus_discount = cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type()); - auto extendedprice_discounted_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto extendedprice_discounted = cudf::binary_operation(extendedprice, - one_minus_discount->view(), - cudf::binary_operator::MUL, - extendedprice_discounted_type, - stream, - mr); - auto supplycost_quantity_type = cudf::data_type{cudf::type_id::FLOAT64}; - auto supplycost_quantity = cudf::binary_operation( + auto const extendedprice_discounted_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto const extendedprice_discounted = cudf::binary_operation(extendedprice, + one_minus_discount->view(), + cudf::binary_operator::MUL, + extendedprice_discounted_type, + stream, + mr); + auto const supplycost_quantity_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto const supplycost_quantity = cudf::binary_operation( supplycost, quantity, cudf::binary_operator::MUL, supplycost_quantity_type); auto amount = cudf::binary_operation(extendedprice_discounted->view(), supplycost_quantity->view(), @@ -118,30 +118,31 @@ int main(int argc, char const** argv) Timer timer; // Read out the table from parquet files - auto lineitem = read_parquet( + auto const lineitem = read_parquet( args.dataset_dir + "/lineitem.parquet", {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); - auto nation = read_parquet(args.dataset_dir + "/nation.parquet", {"n_nationkey", "n_name"}); - auto orders = read_parquet(args.dataset_dir + "/orders.parquet", {"o_orderkey", "o_orderdate"}); - auto part = read_parquet(args.dataset_dir + "/part.parquet", {"p_partkey", "p_name"}); - auto partsupp = read_parquet(args.dataset_dir + "/partsupp.parquet", - {"ps_suppkey", "ps_partkey", "ps_supplycost"}); - auto supplier = + auto const nation = read_parquet(args.dataset_dir + "/nation.parquet", {"n_nationkey", "n_name"}); + auto const orders = + read_parquet(args.dataset_dir + "/orders.parquet", {"o_orderkey", "o_orderdate"}); + auto const part = read_parquet(args.dataset_dir + "/part.parquet", {"p_partkey", "p_name"}); + auto const partsupp = read_parquet(args.dataset_dir + "/partsupp.parquet", + {"ps_suppkey", "ps_partkey", "ps_supplycost"}); + auto const supplier = read_parquet(args.dataset_dir + "/supplier.parquet", {"s_suppkey", "s_nationkey"}); // Generating the `profit` table // Filter the part table using `p_name like '%green%'` - auto p_name = part->table().column(1); - auto mask = + auto const p_name = part->table().column(1); + auto const mask = cudf::strings::like(cudf::strings_column_view(p_name), cudf::string_scalar("%green%")); - auto part_filtered = apply_mask(part, mask); + auto const part_filtered = apply_mask(part, mask); // Perform the joins - auto join_a = apply_inner_join(supplier, nation, {"s_nationkey"}, {"n_nationkey"}); - auto join_b = apply_inner_join(partsupp, join_a, {"ps_suppkey"}, {"s_suppkey"}); - auto join_c = apply_inner_join(lineitem, part_filtered, {"l_partkey"}, {"p_partkey"}); - auto join_d = apply_inner_join(orders, join_c, {"o_orderkey"}, {"l_orderkey"}); - auto joined_table = + auto const join_a = apply_inner_join(supplier, nation, {"s_nationkey"}, {"n_nationkey"}); + auto const join_b = apply_inner_join(partsupp, join_a, {"ps_suppkey"}, {"s_suppkey"}); + auto const join_c = apply_inner_join(lineitem, part_filtered, {"l_partkey"}, {"p_partkey"}); + auto const join_d = apply_inner_join(orders, join_c, {"o_orderkey"}, {"l_orderkey"}); + auto const joined_table = apply_inner_join(join_d, join_b, {"l_suppkey", "l_partkey"}, {"s_suppkey", "ps_partkey"}); // Calculate the `nation`, `o_year`, and `amount` columns @@ -159,17 +160,17 @@ int main(int argc, char const** argv) profit_columns.push_back(std::move(amount)); auto profit_table = std::make_unique(std::move(profit_columns)); - auto profit = std::make_unique( + auto const profit = std::make_unique( std::move(profit_table), std::vector{"nation", "o_year", "amount"}); // Perform the groupby operation - auto groupedby_table = apply_groupby( + auto const groupedby_table = apply_groupby( profit, groupby_context_t{{"nation", "o_year"}, {{"amount", {{cudf::groupby_aggregation::SUM, "sum_profit"}}}}}); // Perform the orderby operation - auto orderedby_table = apply_orderby( + auto const orderedby_table = apply_orderby( groupedby_table, {"nation", "o_year"}, {cudf::order::ASCENDING, cudf::order::DESCENDING}); timer.print_elapsed_millis(); From 750d6a914a766ff77e3f805c29598ceff70902f5 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 15 Jul 2024 18:39:11 -0700 Subject: [PATCH 116/124] Misc fixes --- cpp/examples/tpch/utils.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index d5cfbadafdf..5ae61ccccc5 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -431,7 +431,7 @@ int32_t days_since_epoch(int year, int month, int day) return static_cast(diff); } -struct tpch_args_t { +struct tpch_example_args { std::string dataset_dir; std::string memory_resource_type; }; @@ -442,7 +442,7 @@ struct tpch_args_t { * @param argc The number of command line arguments * @param argv The command line arguments */ -tpch_args_t parse_args(int argc, char const** argv) +tpch_example_args parse_args(int argc, char const** argv) { if (argc < 3) { std::string usage_message = R"( @@ -453,7 +453,7 @@ tpch_args_t parse_args(int argc, char const** argv) )"; throw std::runtime_error(usage_message); } - tpch_args_t args; + tpch_example_args args; args.dataset_dir = argv[1]; args.memory_resource_type = argv[2]; return args; @@ -473,13 +473,13 @@ class Timer { Timer() { reset(); } void reset() { start_time = std::chrono::high_resolution_clock::now(); } - auto const elapsed() { return (std::chrono::high_resolution_clock::now() - start_time); } - void const print_elapsed_micros() + auto elapsed() const { return (std::chrono::high_resolution_clock::now() - start_time); } + void print_elapsed_micros() const { std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() << "us\n\n"; } - void const print_elapsed_millis() + void print_elapsed_millis() const { std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() << "ms\n\n"; From 0f984f07049e0f5aceb226b30891ce7311489d5d Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 15 Jul 2024 18:49:33 -0700 Subject: [PATCH 117/124] Extract timer into cudf/utilities/timer.hpp --- cpp/examples/parquet_io/parquet_io.cpp | 4 +++- cpp/examples/parquet_io/parquet_io.hpp | 31 ------------------------ cpp/examples/tpch/q1.cpp | 3 ++- cpp/examples/tpch/q5.cpp | 3 ++- cpp/examples/tpch/q6.cpp | 3 ++- cpp/examples/tpch/q9.cpp | 3 ++- cpp/examples/tpch/utils.hpp | 33 -------------------------- 7 files changed, 11 insertions(+), 69 deletions(-) diff --git a/cpp/examples/parquet_io/parquet_io.cpp b/cpp/examples/parquet_io/parquet_io.cpp index 8be17db3781..d0a607d34ca 100644 --- a/cpp/examples/parquet_io/parquet_io.cpp +++ b/cpp/examples/parquet_io/parquet_io.cpp @@ -16,6 +16,8 @@ #include "parquet_io.hpp" +#include + /** * @file parquet_io.cpp * @brief Demonstrates usage of the libcudf APIs to read and write @@ -140,7 +142,7 @@ int main(int argc, char const** argv) << page_stat_string << ".." << std::endl; // `timer` is automatically started here - Timer timer; + cudf::timer timer; write_parquet(input->view(), metadata, output_filepath, encoding, compression, page_stats); timer.print_elapsed_millis(); diff --git a/cpp/examples/parquet_io/parquet_io.hpp b/cpp/examples/parquet_io/parquet_io.hpp index d2fc359a2fe..e27cbec4fce 100644 --- a/cpp/examples/parquet_io/parquet_io.hpp +++ b/cpp/examples/parquet_io/parquet_io.hpp @@ -124,34 +124,3 @@ std::shared_ptr create_memory_resource(bool is_ return std::nullopt; } - -/** - * @brief Light-weight timer for parquet reader and writer instrumentation - * - * Timer object constructed from std::chrono, instrumenting at microseconds - * precision. Can display elapsed durations at milli and micro second - * scales. Timer starts at object construction. - */ -class Timer { - public: - using micros = std::chrono::microseconds; - using millis = std::chrono::milliseconds; - - Timer() { reset(); } - void reset() { start_time = std::chrono::high_resolution_clock::now(); } - auto elapsed() { return (std::chrono::high_resolution_clock::now() - start_time); } - void print_elapsed_micros() - { - std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() - << "us\n\n"; - } - void print_elapsed_millis() - { - std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() - << "ms\n\n"; - } - - private: - using time_point_t = std::chrono::time_point; - time_point_t start_time; -}; diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 94abde85c86..96801c4c9e9 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -19,6 +19,7 @@ #include #include #include +#include /** * @file q1.cpp @@ -107,7 +108,7 @@ int main(int argc, char const** argv) auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); - Timer timer; + cudf::timer timer; // Define the column projections and filter predicate for `lineitem` table std::vector const lineitem_cols = {"l_returnflag", diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 1f3daf40b4c..2b2e9bd2388 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -19,6 +19,7 @@ #include #include #include +#include /** * @file q5.cpp @@ -92,7 +93,7 @@ int main(int argc, char const** argv) auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); - Timer timer; + cudf::timer timer; // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index b38b9399433..86760ed780a 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -19,6 +19,7 @@ #include #include #include +#include /** * @file q6.cpp @@ -66,7 +67,7 @@ int main(int argc, char const** argv) auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); - Timer timer; + cudf::timer timer; // Read out the `lineitem` table from parquet file std::vector const lineitem_cols = { diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 27d0415c231..71962191fa1 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -21,6 +21,7 @@ #include #include #include +#include /** * @file q9.cpp @@ -115,7 +116,7 @@ int main(int argc, char const** argv) auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); - Timer timer; + cudf::timer timer; // Read out the table from parquet files auto const lineitem = read_parquet( diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 5ae61ccccc5..1df72d262ab 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -35,9 +35,7 @@ #include #include -#include #include -#include // RMM memory resource creation utilities inline auto make_cuda() { return std::make_shared(); } @@ -458,34 +456,3 @@ tpch_example_args parse_args(int argc, char const** argv) args.memory_resource_type = argv[2]; return args; } - -/** - * @brief Light-weight timer for parquet reader and writer instrumentation - * - * Timer object constructed from std::chrono, instrumenting at microseconds - * precision. Can display elapsed durations at milli and micro second - * scales. Timer starts at object construction. - */ -class Timer { - public: - using micros = std::chrono::microseconds; - using millis = std::chrono::milliseconds; - - Timer() { reset(); } - void reset() { start_time = std::chrono::high_resolution_clock::now(); } - auto elapsed() const { return (std::chrono::high_resolution_clock::now() - start_time); } - void print_elapsed_micros() const - { - std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() - << "us\n\n"; - } - void print_elapsed_millis() const - { - std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() - << "ms\n\n"; - } - - private: - using time_point_t = std::chrono::time_point; - time_point_t start_time; -}; From 14be02215c4204d01f267877cdd07375dd99dbce Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 15 Jul 2024 18:50:18 -0700 Subject: [PATCH 118/124] Add a cudf::timer --- cpp/include/cudf/utilities/timer.hpp | 52 ++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 cpp/include/cudf/utilities/timer.hpp diff --git a/cpp/include/cudf/utilities/timer.hpp b/cpp/include/cudf/utilities/timer.hpp new file mode 100644 index 00000000000..713eb6bb47a --- /dev/null +++ b/cpp/include/cudf/utilities/timer.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cudf { +/** + * @brief Light-weight timer for measuring elapsed time. + * + * A timer object constructed from std::chrono, instrumenting at microseconds + * precision. Can display elapsed durations at milli and micro second + * scales. The timer starts at object construction. + */ +class timer { + public: + using micros = std::chrono::microseconds; + using millis = std::chrono::milliseconds; + + timer() { reset(); } + void reset() { start_time = std::chrono::high_resolution_clock::now(); } + auto elapsed() const { return (std::chrono::high_resolution_clock::now() - start_time); } + void print_elapsed_micros() const + { + std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() + << "us\n\n"; + } + void print_elapsed_millis() const + { + std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() + << "ms\n\n"; + } + + private: + using time_point_t = std::chrono::time_point; + time_point_t start_time; +}; + +}; // namespace cudf From 0ae2863a4bbd0febd869120a7fb7b8f38d68dd43 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 15 Jul 2024 18:54:00 -0700 Subject: [PATCH 119/124] Fix the invalid cli args message --- cpp/examples/tpch/utils.hpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 1df72d262ab..8b09aff7f30 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -443,12 +443,10 @@ struct tpch_example_args { tpch_example_args parse_args(int argc, char const** argv) { if (argc < 3) { - std::string usage_message = R"( - Usage: - - The query result will be saved to a parquet file named q{query_no}.parquet in the current - working directory. - )"; + std::string usage_message = "Usage: " + std::string(argv[0]) + + " \n The query result will be " + "saved to a parquet file named q{query_no}.parquet in the current " + "working directory "; throw std::runtime_error(usage_message); } tpch_example_args args; From 2ce182da6071903bcb2d35aecc00d1c4812db5b5 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 15 Jul 2024 18:59:32 -0700 Subject: [PATCH 120/124] Allow chaining of appends --- cpp/examples/tpch/q1.cpp | 3 +-- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q6.cpp | 3 +-- cpp/examples/tpch/utils.hpp | 3 ++- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 96801c4c9e9..32fa3a74fd2 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -135,8 +135,7 @@ int main(int argc, char const** argv) auto disc_price = calc_disc_price(lineitem->column("l_discount"), lineitem->column("l_extendedprice")); auto charge = calc_charge(lineitem->column("l_tax"), disc_price->view()); - lineitem->append(disc_price, "disc_price"); - lineitem->append(charge, "charge"); + (*lineitem).append(disc_price, "disc_price").append(charge, "charge"); // Perform the group by operation auto const groupedby_table = apply_groupby( diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index 2b2e9bd2388..f69dc0b1aa5 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -147,7 +147,7 @@ int main(int argc, char const** argv) // Calculate and append the `revenue` column auto revenue = calc_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount")); - joined_table->append(revenue, "revenue"); + (*joined_table).append(revenue, "revenue"); // Perform the groupby operation auto const groupedby_table = diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 86760ed780a..e0fa0389c1f 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -95,8 +95,7 @@ int main(int argc, char const** argv) auto quantity_float = cudf::cast(lineitem->column("l_quantity"), cudf::data_type{cudf::type_id::FLOAT32}); - lineitem->append(discout_float, "l_discount_float"); - lineitem->append(quantity_float, "l_quantity_float"); + (*lineitem).append(discout_float, "l_discount_float").append(quantity_float, "l_quantity_float"); // Apply the filters auto const discount_ref = cudf::ast::column_reference(lineitem->col_id("l_discount_float")); diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 8b09aff7f30..6b1ee892d08 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -105,13 +105,14 @@ class table_with_names { * @param col The column to append * @param col_name The name of the appended column */ - void append(std::unique_ptr& col, std::string const& col_name) + table_with_names& append(std::unique_ptr& col, std::string const& col_name) { CUDF_FUNC_RANGE(); auto cols = tbl->release(); cols.push_back(std::move(col)); tbl = std::make_unique(std::move(cols)); col_names.push_back(col_name); + return (*this); } /** * @brief Select a subset of columns from the table From 38f33eda772befcd5db6d38fa75d3338d1e0e0d0 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 16 Jul 2024 10:18:16 -0700 Subject: [PATCH 121/124] Move timer to a utilities dir in examples --- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 2 +- cpp/include/cudf/utilities/timer.hpp | 52 ---------------------------- 5 files changed, 4 insertions(+), 56 deletions(-) delete mode 100644 cpp/include/cudf/utilities/timer.hpp diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index 32fa3a74fd2..a4aa665d839 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "../utilities/timer.hpp" #include "utils.hpp" #include #include #include -#include /** * @file q1.cpp diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index f69dc0b1aa5..c2fa5d3cac6 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "../utilities/timer.hpp" #include "utils.hpp" #include #include #include -#include /** * @file q5.cpp diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index e0fa0389c1f..8eef5973d93 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "../utilities/timer.hpp" #include "utils.hpp" #include #include #include -#include /** * @file q6.cpp diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index 71962191fa1..e56db197042 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "../utilities/timer.hpp" #include "utils.hpp" #include @@ -21,7 +22,6 @@ #include #include #include -#include /** * @file q9.cpp diff --git a/cpp/include/cudf/utilities/timer.hpp b/cpp/include/cudf/utilities/timer.hpp deleted file mode 100644 index 713eb6bb47a..00000000000 --- a/cpp/include/cudf/utilities/timer.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace cudf { -/** - * @brief Light-weight timer for measuring elapsed time. - * - * A timer object constructed from std::chrono, instrumenting at microseconds - * precision. Can display elapsed durations at milli and micro second - * scales. The timer starts at object construction. - */ -class timer { - public: - using micros = std::chrono::microseconds; - using millis = std::chrono::milliseconds; - - timer() { reset(); } - void reset() { start_time = std::chrono::high_resolution_clock::now(); } - auto elapsed() const { return (std::chrono::high_resolution_clock::now() - start_time); } - void print_elapsed_micros() const - { - std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() - << "us\n\n"; - } - void print_elapsed_millis() const - { - std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() - << "ms\n\n"; - } - - private: - using time_point_t = std::chrono::time_point; - time_point_t start_time; -}; - -}; // namespace cudf From c36895318743d79b82a19d0a6b1cd59f36d3f8a0 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 16 Jul 2024 10:20:31 -0700 Subject: [PATCH 122/124] Fix parquet_io and add the utilities dir --- cpp/examples/parquet_io/parquet_io.cpp | 2 +- cpp/examples/utilities/timer.hpp | 52 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 cpp/examples/utilities/timer.hpp diff --git a/cpp/examples/parquet_io/parquet_io.cpp b/cpp/examples/parquet_io/parquet_io.cpp index d0a607d34ca..b31b7f869bd 100644 --- a/cpp/examples/parquet_io/parquet_io.cpp +++ b/cpp/examples/parquet_io/parquet_io.cpp @@ -16,7 +16,7 @@ #include "parquet_io.hpp" -#include +#include "../utilities/timer.hpp" /** * @file parquet_io.cpp diff --git a/cpp/examples/utilities/timer.hpp b/cpp/examples/utilities/timer.hpp new file mode 100644 index 00000000000..713eb6bb47a --- /dev/null +++ b/cpp/examples/utilities/timer.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cudf { +/** + * @brief Light-weight timer for measuring elapsed time. + * + * A timer object constructed from std::chrono, instrumenting at microseconds + * precision. Can display elapsed durations at milli and micro second + * scales. The timer starts at object construction. + */ +class timer { + public: + using micros = std::chrono::microseconds; + using millis = std::chrono::milliseconds; + + timer() { reset(); } + void reset() { start_time = std::chrono::high_resolution_clock::now(); } + auto elapsed() const { return (std::chrono::high_resolution_clock::now() - start_time); } + void print_elapsed_micros() const + { + std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() + << "us\n\n"; + } + void print_elapsed_millis() const + { + std::cout << "Elapsed Time: " << std::chrono::duration_cast(elapsed()).count() + << "ms\n\n"; + } + + private: + using time_point_t = std::chrono::time_point; + time_point_t start_time; +}; + +}; // namespace cudf From 0fdf2f3e454d475b896b85ffeedbb4aba7b60d29 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 16 Jul 2024 15:57:36 -0700 Subject: [PATCH 123/124] Move timer into cudf::example namespacE --- cpp/examples/parquet_io/parquet_io.cpp | 2 +- cpp/examples/tpch/q1.cpp | 2 +- cpp/examples/tpch/q5.cpp | 2 +- cpp/examples/tpch/q6.cpp | 2 +- cpp/examples/tpch/q9.cpp | 2 +- cpp/examples/utilities/timer.hpp | 2 ++ 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cpp/examples/parquet_io/parquet_io.cpp b/cpp/examples/parquet_io/parquet_io.cpp index b31b7f869bd..274a2599189 100644 --- a/cpp/examples/parquet_io/parquet_io.cpp +++ b/cpp/examples/parquet_io/parquet_io.cpp @@ -142,7 +142,7 @@ int main(int argc, char const** argv) << page_stat_string << ".." << std::endl; // `timer` is automatically started here - cudf::timer timer; + cudf::examples::timer timer; write_parquet(input->view(), metadata, output_filepath, encoding, compression, page_stats); timer.print_elapsed_millis(); diff --git a/cpp/examples/tpch/q1.cpp b/cpp/examples/tpch/q1.cpp index a4aa665d839..1bdf039da4a 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/examples/tpch/q1.cpp @@ -108,7 +108,7 @@ int main(int argc, char const** argv) auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); - cudf::timer timer; + cudf::examples::timer timer; // Define the column projections and filter predicate for `lineitem` table std::vector const lineitem_cols = {"l_returnflag", diff --git a/cpp/examples/tpch/q5.cpp b/cpp/examples/tpch/q5.cpp index c2fa5d3cac6..e56850b94d6 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/examples/tpch/q5.cpp @@ -93,7 +93,7 @@ int main(int argc, char const** argv) auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); - cudf::timer timer; + cudf::examples::timer timer; // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; diff --git a/cpp/examples/tpch/q6.cpp b/cpp/examples/tpch/q6.cpp index 8eef5973d93..f11b3d6ab3b 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/examples/tpch/q6.cpp @@ -67,7 +67,7 @@ int main(int argc, char const** argv) auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); - cudf::timer timer; + cudf::examples::timer timer; // Read out the `lineitem` table from parquet file std::vector const lineitem_cols = { diff --git a/cpp/examples/tpch/q9.cpp b/cpp/examples/tpch/q9.cpp index e56db197042..d3c218253f9 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/examples/tpch/q9.cpp @@ -116,7 +116,7 @@ int main(int argc, char const** argv) auto resource = create_memory_resource(args.memory_resource_type); rmm::mr::set_current_device_resource(resource.get()); - cudf::timer timer; + cudf::examples::timer timer; // Read out the table from parquet files auto const lineitem = read_parquet( diff --git a/cpp/examples/utilities/timer.hpp b/cpp/examples/utilities/timer.hpp index 713eb6bb47a..65fa92e74cf 100644 --- a/cpp/examples/utilities/timer.hpp +++ b/cpp/examples/utilities/timer.hpp @@ -18,6 +18,7 @@ #include namespace cudf { +namespace examples { /** * @brief Light-weight timer for measuring elapsed time. * @@ -49,4 +50,5 @@ class timer { time_point_t start_time; }; +} // namespace examples }; // namespace cudf From c153636725599ad659c6bd1b4890ce2e9766928f Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 17 Jul 2024 11:18:58 -0700 Subject: [PATCH 124/124] Fix the consts for table with names fn --- cpp/examples/tpch/utils.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp index 6b1ee892d08..e586da2c802 100644 --- a/cpp/examples/tpch/utils.hpp +++ b/cpp/examples/tpch/utils.hpp @@ -73,26 +73,26 @@ class table_with_names { /** * @brief Return the table view */ - [[nodiscard]] cudf::table_view const table() { return tbl->view(); } + [[nodiscard]] cudf::table_view table() const { return tbl->view(); } /** * @brief Return the column view for a given column name * * @param col_name The name of the column */ - [[nodiscard]] cudf::column_view const column(std::string const& col_name) + [[nodiscard]] cudf::column_view column(std::string const& col_name) const { return tbl->view().column(col_id(col_name)); } /** * @param Return the column names of the table */ - [[nodiscard]] std::vector const column_names() { return col_names; } + [[nodiscard]] std::vector column_names() const { return col_names; } /** * @brief Translate a column name to a column index * * @param col_name The name of the column */ - [[nodiscard]] cudf::size_type const col_id(std::string const& col_name) + [[nodiscard]] cudf::size_type col_id(std::string const& col_name) const { CUDF_FUNC_RANGE(); auto it = std::find(col_names.begin(), col_names.end(), col_name); @@ -119,7 +119,7 @@ class table_with_names { * * @param col_names The names of the columns to select */ - [[nodiscard]] cudf::table_view const select(std::vector const& col_names) + [[nodiscard]] cudf::table_view select(std::vector const& col_names) const { CUDF_FUNC_RANGE(); std::vector col_indices; @@ -133,7 +133,7 @@ class table_with_names { * * @param filepath The path to the parquet file */ - void to_parquet(std::string const& filepath) + void to_parquet(std::string const& filepath) const { CUDF_FUNC_RANGE(); auto const sink_info = cudf::io::sink_info(filepath);