Skip to content

Commit

Permalink
Add query 10 to the TPC-H suite (rapidsai#16392)
Browse files Browse the repository at this point in the history
Adds Q10 to the TPC-H benchmark suite

Authors:
  - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Yunsong Wang (https://github.com/PointKernel)

URL: rapidsai#16392
  • Loading branch information
JayjeetAtGithub authored and rjzamora committed Jul 30, 2024
1 parent 8a2c81c commit 4893218
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 12 deletions.
4 changes: 4 additions & 0 deletions cpp/examples/tpch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@ target_compile_features(tpch_q6 PRIVATE cxx_std_17)
add_executable(tpch_q9 q9.cpp)
target_link_libraries(tpch_q9 PRIVATE cudf::cudf)
target_compile_features(tpch_q9 PRIVATE cxx_std_17)

add_executable(tpch_q10 q10.cpp)
target_link_libraries(tpch_q10 PRIVATE cudf::cudf)
target_compile_features(tpch_q10 PRIVATE cxx_std_17)
2 changes: 1 addition & 1 deletion cpp/examples/tpch/q1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ int main(int argc, char const** argv)
auto shipdate_upper =
cudf::timestamp_scalar<cudf::timestamp_D>(days_since_epoch(1998, 9, 2), true);
auto const shipdate_upper_literal = cudf::ast::literal(shipdate_upper);
auto lineitem_pred = std::make_unique<cudf::ast::operation>(
auto const lineitem_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal);

// Read out the `lineitem` table from parquet file
Expand Down
166 changes: 166 additions & 0 deletions cpp/examples/tpch/q10.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "../utilities/timer.hpp"
#include "utils.hpp"

#include <cudf/ast/expressions.hpp>
#include <cudf/column/column.hpp>
#include <cudf/scalar/scalar.hpp>

/**
* @file q10.cpp
* @brief Implement query 10 of the TPC-H benchmark.
*
* create view customer as select * from '/tables/scale-1/customer.parquet';
* create view orders as select * from '/tables/scale-1/orders.parquet';
* create view lineitem as select * from '/tables/scale-1/lineitem.parquet';
* create view nation as select * from '/tables/scale-1/nation.parquet';
*
* select
* c_custkey,
* c_name,
* sum(l_extendedprice * (1 - l_discount)) as revenue,
* c_acctbal,
* n_name,
* c_address,
* c_phone,
* c_comment
* from
* customer,
* orders,
* lineitem,
* nation
* where
* c_custkey = o_custkey
* and l_orderkey = o_orderkey
* and o_orderdate >= date '1993-10-01'
* and o_orderdate < date '1994-01-01'
* and l_returnflag = 'R'
* and c_nationkey = n_nationkey
* group by
* c_custkey,
* c_name,
* c_acctbal,
* c_phone,
* n_name,
* c_address,
* c_comment
* order by
* revenue desc;
*/

/**
* @brief Calculate the revenue column
*
* @param extendedprice The extended price column
* @param discount The discount column
* @param stream The CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*/
[[nodiscard]] std::unique_ptr<cudf::column> calc_revenue(
cudf::column_view const& extendedprice,
cudf::column_view const& discount,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
{
auto const one = cudf::numeric_scalar<double>(1);
auto const one_minus_discount =
cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr);
auto const revenue_type = cudf::data_type{cudf::type_id::FLOAT64};
auto revenue = cudf::binary_operation(extendedprice,
one_minus_discount->view(),
cudf::binary_operator::MUL,
revenue_type,
stream,
mr);
return revenue;
}
int main(int argc, char const** argv)
{
auto const args = parse_args(argc, argv);

// Use a memory pool
auto resource = create_memory_resource(args.memory_resource_type);
rmm::mr::set_current_device_resource(resource.get());

cudf::examples::timer timer;

// Define the column projection and filter predicate for the `orders` table
std::vector<std::string> const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"};
auto const o_orderdate_ref = cudf::ast::column_reference(std::distance(
orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate")));
auto o_orderdate_lower =
cudf::timestamp_scalar<cudf::timestamp_D>(days_since_epoch(1993, 10, 1), true);
auto const o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower);
auto const o_orderdate_pred_lower = cudf::ast::operation(
cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate_ref, o_orderdate_lower_limit);
auto o_orderdate_upper =
cudf::timestamp_scalar<cudf::timestamp_D>(days_since_epoch(1994, 1, 1), true);
auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper);
auto const o_orderdate_pred_upper =
cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit);
auto const orders_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper);

auto const l_returnflag_ref = cudf::ast::column_reference(3);
auto r_scalar = cudf::string_scalar("R");
auto const r_literal = cudf::ast::literal(r_scalar);
auto const lineitem_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::EQUAL, l_returnflag_ref, r_literal);

// Read out the tables from parquet files
// while pushing down the column projections and filter predicates
auto const customer = read_parquet(
args.dataset_dir + "/customer.parquet",
{"c_custkey", "c_name", "c_nationkey", "c_acctbal", "c_address", "c_phone", "c_comment"});
auto const orders =
read_parquet(args.dataset_dir + "/orders.parquet", orders_cols, std::move(orders_pred));
auto const lineitem =
read_parquet(args.dataset_dir + "/lineitem.parquet",
{"l_extendedprice", "l_discount", "l_orderkey", "l_returnflag"},
std::move(lineitem_pred));
auto const nation = read_parquet(args.dataset_dir + "/nation.parquet", {"n_name", "n_nationkey"});

// Perform the joins
auto const join_a = apply_inner_join(customer, nation, {"c_nationkey"}, {"n_nationkey"});
auto const join_b = apply_inner_join(lineitem, orders, {"l_orderkey"}, {"o_orderkey"});
auto const joined_table = apply_inner_join(join_a, join_b, {"c_custkey"}, {"o_custkey"});

// Calculate and append the `revenue` column
auto revenue =
calc_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount"));
(*joined_table).append(revenue, "revenue");

// Perform the groupby operation
auto const groupedby_table = apply_groupby(
joined_table,
groupby_context_t{
{"c_custkey", "c_name", "c_acctbal", "c_phone", "n_name", "c_address", "c_comment"},
{
{"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}},
}});

// Perform the order by operation
auto const orderedby_table =
apply_orderby(groupedby_table, {"revenue"}, {cudf::order::DESCENDING});

timer.print_elapsed_millis();

// Write query result to a parquet file
orderedby_table->to_parquet("q10.parquet");
return 0;
}
20 changes: 10 additions & 10 deletions cpp/examples/tpch/q5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@
* region
* where
* c_custkey = o_custkey
* and l_orderkey = o_orderkey
* and l_suppkey = s_suppkey
* and c_nationkey = s_nationkey
* and s_nationkey = n_nationkey
* and n_regionkey = r_regionkey
* and r_name = 'ASIA'
* and o_orderdate >= date '1994-01-01'
* and o_orderdate < date '1995-01-01'
* and l_orderkey = o_orderkey
* and l_suppkey = s_suppkey
* and c_nationkey = s_nationkey
* and s_nationkey = n_nationkey
* and n_regionkey = r_regionkey
* and r_name = 'ASIA'
* and o_orderdate >= date '1994-01-01'
* and o_orderdate < date '1995-01-01'
* group by
* n_name
* order by
Expand Down Expand Up @@ -109,7 +109,7 @@ int main(int argc, char const** argv)
auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper);
auto const o_orderdate_pred_upper =
cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit);
auto orders_pred = std::make_unique<cudf::ast::operation>(
auto const orders_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper);

// Define the column projection and filter predicate for the `region` table
Expand All @@ -118,7 +118,7 @@ int main(int argc, char const** argv)
region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name")));
auto r_name_value = cudf::string_scalar("ASIA");
auto const r_name_literal = cudf::ast::literal(r_name_value);
auto region_pred = std::make_unique<cudf::ast::operation>(
auto const region_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::EQUAL, r_name_ref, r_name_literal);

// Read out the tables from parquet files
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/tpch/q6.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ int main(int argc, char const** argv)
cudf::ast::ast_operator::GREATER_EQUAL, shipdate_ref, shipdate_lower_literal);
auto const shipdate_pred_b =
cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal);
auto lineitem_pred = std::make_unique<cudf::ast::operation>(
auto const lineitem_pred = std::make_unique<cudf::ast::operation>(
cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b);
auto lineitem =
read_parquet(args.dataset_dir + "/lineitem.parquet", lineitem_cols, std::move(lineitem_pred));
Expand Down

0 comments on commit 4893218

Please sign in to comment.