Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-856] Optimize of string/binary split #918

Merged
merged 21 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions native-sql-engine/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
cmake_minimum_required(VERSION 3.16)
project(spark_columnar_plugin)

#add_definitions(-DSKIPWRITE -DSKIPCOMPRESS -DPROCESSROW)
add_definitions(-DPROCESSROW)
#add_definitions(-DSKIPWRITE -DSKIPCOMPRESS )
#add_definitions(-DAVX512SUPPORT )

#add_compile_options(-g)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down
34 changes: 18 additions & 16 deletions native-sql-engine/cpp/src/benchmarks/shuffle_split_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void print_trace(void) {
namespace sparkcolumnarplugin {
namespace shuffle {

#define ALIGNMENT 2048 * 1024
#define ALIGNMENT 2 * 1024 * 1024

const int batch_buffer_size = 32768;
const int split_buffer_size = 8192;
Expand Down Expand Up @@ -127,7 +127,7 @@ class LargePageMemoryPool : public MemoryPool {
return pool_->Reallocate(old_size, new_size, ptr);
} else {
Status st = pool_->AlignReallocate(old_size, new_size, ptr, ALIGNMENT);
// madvise(*ptr, new_size, /*MADV_HUGEPAGE */ 14);
madvise(*ptr, new_size, /*MADV_HUGEPAGE */ 14);
return st;
}
#else
Expand Down Expand Up @@ -320,28 +320,30 @@ class BenchmarkShuffleSplit_CacheScan_Benchmark : public BenchmarkShuffleSplit {
int64_t& num_batches, int64_t& num_rows, int64_t& split_time,
const int num_partitions, SplitOptions options, benchmark::State& state) {
std::vector<int> local_column_indices;
local_column_indices.push_back(0);
// local_column_indices.push_back(0);
/* local_column_indices.push_back(0);
local_column_indices.push_back(1);
local_column_indices.push_back(2);
local_column_indices.push_back(4);
local_column_indices.push_back(5);
local_column_indices.push_back(6);
local_column_indices.push_back(7);*/
local_column_indices.push_back(7);
*/
local_column_indices.push_back(8);
local_column_indices.push_back(9);
local_column_indices.push_back(13);
local_column_indices.push_back(14);
local_column_indices.push_back(15);

std::shared_ptr<arrow::Schema> local_schema;
local_schema = std::make_shared<arrow::Schema>(*schema.get());

/* ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(15));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(14));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(13));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(12));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(11));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(10));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(9));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(8));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(3));
*/
arrow::FieldVector fields;
fields.push_back(schema->field(8));
fields.push_back(schema->field(9));
fields.push_back(schema->field(13));
fields.push_back(schema->field(14));
fields.push_back(schema->field(15));
local_schema = std::make_shared<arrow::Schema>(fields);

if (state.thread_index() == 0) std::cout << local_schema->ToString() << std::endl;

ARROW_ASSIGN_OR_THROW(splitter,
Expand Down
Loading