Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-856] Optimize of string/binary split (#918)
Browse files Browse the repository at this point in the history
Manually split binary columns
  • Loading branch information
FelixYBW authored May 23, 2022
1 parent 4b2a9df commit c81f8af
Show file tree
Hide file tree
Showing 5 changed files with 429 additions and 666 deletions.
4 changes: 2 additions & 2 deletions native-sql-engine/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
cmake_minimum_required(VERSION 3.16)
project(spark_columnar_plugin)

#add_definitions(-DSKIPWRITE -DSKIPCOMPRESS -DPROCESSROW)
add_definitions(-DPROCESSROW)
#add_definitions(-DSKIPWRITE -DSKIPCOMPRESS )
#add_definitions(-DAVX512SUPPORT )

#add_compile_options(-g)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down
34 changes: 18 additions & 16 deletions native-sql-engine/cpp/src/benchmarks/shuffle_split_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void print_trace(void) {
namespace sparkcolumnarplugin {
namespace shuffle {

#define ALIGNMENT 2048 * 1024
#define ALIGNMENT 2 * 1024 * 1024

const int batch_buffer_size = 32768;
const int split_buffer_size = 8192;
Expand Down Expand Up @@ -127,7 +127,7 @@ class LargePageMemoryPool : public MemoryPool {
return pool_->Reallocate(old_size, new_size, ptr);
} else {
Status st = pool_->AlignReallocate(old_size, new_size, ptr, ALIGNMENT);
// madvise(*ptr, new_size, /*MADV_HUGEPAGE */ 14);
madvise(*ptr, new_size, /*MADV_HUGEPAGE */ 14);
return st;
}
#else
Expand Down Expand Up @@ -320,28 +320,30 @@ class BenchmarkShuffleSplit_CacheScan_Benchmark : public BenchmarkShuffleSplit {
int64_t& num_batches, int64_t& num_rows, int64_t& split_time,
const int num_partitions, SplitOptions options, benchmark::State& state) {
std::vector<int> local_column_indices;
local_column_indices.push_back(0);
// local_column_indices.push_back(0);
/* local_column_indices.push_back(0);
local_column_indices.push_back(1);
local_column_indices.push_back(2);
local_column_indices.push_back(4);
local_column_indices.push_back(5);
local_column_indices.push_back(6);
local_column_indices.push_back(7);*/
local_column_indices.push_back(7);
*/
local_column_indices.push_back(8);
local_column_indices.push_back(9);
local_column_indices.push_back(13);
local_column_indices.push_back(14);
local_column_indices.push_back(15);

std::shared_ptr<arrow::Schema> local_schema;
local_schema = std::make_shared<arrow::Schema>(*schema.get());

/* ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(15));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(14));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(13));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(12));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(11));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(10));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(9));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(8));
ARROW_ASSIGN_OR_THROW(local_schema, local_schema->RemoveField(3));
*/
arrow::FieldVector fields;
fields.push_back(schema->field(8));
fields.push_back(schema->field(9));
fields.push_back(schema->field(13));
fields.push_back(schema->field(14));
fields.push_back(schema->field(15));
local_schema = std::make_shared<arrow::Schema>(fields);

if (state.thread_index() == 0) std::cout << local_schema->ToString() << std::endl;

ARROW_ASSIGN_OR_THROW(splitter,
Expand Down
Loading

0 comments on commit c81f8af

Please sign in to comment.