Skip to content

Commit

Permalink
support to_bitmap (apache#45)
Browse files Browse the repository at this point in the history
support to_bitmap
  • Loading branch information
stdpain authored and HappenLee committed Jul 13, 2021
1 parent 1392807 commit 8d9a4fa
Show file tree
Hide file tree
Showing 15 changed files with 292 additions and 54 deletions.
4 changes: 3 additions & 1 deletion be/src/runtime/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "olap/hll.h"
#include "runtime/primitive_type.h"
#include "thrift/protocol/TDebugProtocol.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_date.h"
#include "vec/data_types/data_type_date_time.h"
#include "vec/data_types/data_type_decimal.h"
Expand Down Expand Up @@ -293,8 +294,9 @@ struct TypeDescriptor {
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
return std::make_shared<vectorized::DataTypeString>();
case TYPE_OBJECT:
return std::make_shared<vectorized::DataTypeBitMap>();

case TYPE_DECIMALV2:
case TYPE_DECIMAL:
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ set(VEC_FILES
exprs/vslot_ref.cpp
exprs/vcast_expr.cpp
functions/abs.cpp
functions/function_bitmap.cpp
functions/comparison.cpp
functions/comparison_less.cpp
functions/comparison_equels.cpp
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/columns/column_complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,5 +260,5 @@ ColumnPtr ColumnComplexType<T>::replicate(const IColumn::Offsets& offsets) const
return res;
}

using ColumnBitMap = ColumnComplexType<BitmapValue>;
using ColumnBitmap = ColumnComplexType<BitmapValue>;
} // namespace doris::vectorized
6 changes: 6 additions & 0 deletions be/src/vec/core/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "vec/common/exception.h"
#include "vec/common/field_visitors.h"
#include "vec/common/typeid_cast.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_date.h"
#include "vec/data_types/data_type_date_time.h"
#include "vec/data_types/data_type_decimal.h"
Expand Down Expand Up @@ -110,6 +111,9 @@ inline DataTypePtr get_data_type(const PColumn& pcolumn) {
return std::make_shared<DataTypeDecimal<Decimal128>>(pcolumn.decimal_param().precision(),
pcolumn.decimal_param().scale());
}
case PColumn::BITMAP: {
return std::make_shared<DataTypeBitMap>();
}
default: {
throw Exception("Unknown data type: " + std::to_string(pcolumn.type()) +
", data type name: " + PColumn::DataType_Name(pcolumn.type()),
Expand Down Expand Up @@ -157,6 +161,8 @@ PColumn::DataType get_pdata_type(DataTypePtr data_type) {
return PColumn::DATE;
case TypeIndex::DateTime:
return PColumn::DATETIME;
case TypeIndex::BitMap:
return PColumn::BITMAP;
default:
return PColumn::UNKNOWN;
}
Expand Down
21 changes: 21 additions & 0 deletions be/src/vec/core/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,4 +408,25 @@ struct hash<doris::vectorized::Decimal128> {
x.value & std::numeric_limits<doris::vectorized::UInt64>::max());
}
};

constexpr bool is_integer(doris::vectorized::TypeIndex index) {
using TypeIndex = doris::vectorized::TypeIndex;
switch (index) {
case TypeIndex::UInt8:
case TypeIndex::UInt16:
case TypeIndex::UInt32:
case TypeIndex::UInt64:
case TypeIndex::UInt128:
case TypeIndex::Int8:
case TypeIndex::Int16:
case TypeIndex::Int32:
case TypeIndex::Int64:
case TypeIndex::Int128: {
return true;
}
default: {
return false;
}
}
}
} // namespace std
37 changes: 34 additions & 3 deletions be/src/vec/data_types/data_type_bitmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,48 @@
#include "vec/data_types/data_type_bitmap.h"

#include "vec/columns/column_complex.h"
#include "vec/common/assert_cast.h"
#include "vec/io/io_helper.h"

namespace doris::vectorized {
void DataTypeBitMap::serialize(const IColumn& column, PColumn* pcolumn) const {
throw Exception("DataTypeBitMap serialize not implemented", ErrorCodes::NOT_IMPLEMENTED);
std::ostringstream buf;
auto& data_column = assert_cast<const ColumnBitmap&>(*column.convertToFullColumnIfConst());
// TODO: remove std::string as memory buffer to avoid memory copy
std::string memory_buffer;
for (size_t i = 0; i < column.size(); ++i) {
auto& bitmap = const_cast<BitmapValue&>(data_column.getElement(i));
int bytesize = bitmap.getSizeInBytes();
writeIntBinary(bytesize, buf);
memory_buffer.resize(bytesize);
bitmap.write(const_cast<char*>(memory_buffer.data()));
writeBinary(memory_buffer, buf);
memory_buffer.clear();
}
write_binary(buf, pcolumn);
}

void DataTypeBitMap::deserialize(const PColumn& pcolumn, IColumn* column) const {
throw Exception("DataTypeBitMap deserialize not implemented", ErrorCodes::NOT_IMPLEMENTED);
auto& data_column = assert_cast<ColumnBitmap&>(*column);
auto& data = data_column.getData();

std::string uncompressed;
read_binary(pcolumn, &uncompressed);

std::istringstream istr(uncompressed);
std::string memory_buffer;
while (istr.peek() != EOF) {
int bytesize = 0;
readIntBinary(bytesize, istr);
readBinary(memory_buffer, istr);

data.emplace_back();
data.back().deserialize(memory_buffer.data());
memory_buffer.clear();
}
}

MutableColumnPtr DataTypeBitMap::createColumn() const {
return ColumnBitMap::create();
return ColumnBitmap::create();
}
} // namespace doris::vectorized
8 changes: 7 additions & 1 deletion be/src/vec/data_types/data_type_bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class DataTypeBitMap : public IDataType {
DataTypeBitMap() = default;
~DataTypeBitMap() = default;

std::string doGetName() const override { return "BitMap"; }
std::string doGetName() const override { return getFamilyName(); }
const char* getFamilyName() const override { return "BitMap"; }

TypeIndex getTypeId() const override { return TypeIndex::BitMap; }

Expand Down Expand Up @@ -59,5 +60,10 @@ class DataTypeBitMap : public IDataType {
bool canBeInsideLowCardinality() const override { return false; }

std::string to_string(const IColumn& column, size_t row_num) const { return "BitMap()"; }

virtual Field getDefault() const {
throw Exception("Method getDefault() is not implemented for data type " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
};
} // namespace doris::vectorized
39 changes: 32 additions & 7 deletions be/src/vec/exec/volap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#include "vec/exec/volap_scanner.h"

#include "vec/columns/column_complex.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
#include "vec/common/assert_cast.h"
#include "vec/core/block.h"
Expand Down Expand Up @@ -115,20 +117,43 @@ void VOlapScanner::_convert_row_to_block(std::vector<vectorized::MutableColumnPt
(*columns)[i]->insertData(slice->data, strnlen(slice->data, slice->size));
break;
}
case TYPE_VARCHAR:
case TYPE_OBJECT: {
case TYPE_VARCHAR: {
Slice* slice = reinterpret_cast<Slice*>(ptr);
(*columns)[i]->insertData(slice->data, slice->size);
break;
}
case TYPE_HLL:{
case TYPE_OBJECT: {
Slice* slice = reinterpret_cast<Slice*>(ptr);
// insertDefault()
auto& target_column = (*columns)[i];
target_column->insertDefault();
BitmapValue* pvalue = nullptr;
int pos = target_column->size() - 1;
if (target_column->isNullable()) {
auto& nullable_column = assert_cast<ColumnNullable&>(*target_column);
auto& bitmap_column = assert_cast<ColumnBitmap&>(nullable_column.getNestedColumn());
pvalue = &bitmap_column.getElement(pos);
} else {
auto& bitmap_column = assert_cast<ColumnBitmap&>(*target_column);
pvalue = &bitmap_column.getElement(pos);
}
if (slice->size != 0) {
BitmapValue value;
value.deserialize(slice->data);
*pvalue = std::move(value);
} else {
*pvalue = std::move(*reinterpret_cast<BitmapValue*>(slice->data));
}
break;
}
case TYPE_HLL: {
Slice* slice = reinterpret_cast<Slice*>(ptr);
if (slice->size != 0) {
(*columns)[i]->insertData(slice->data, slice->size);
// TODO: in vector exec engine, it is diffcult to set hll size = 0
// so we have to serialize here. which will cause two problem
// 1. some unnecessary mem malloc and delay mem release
// 2. some unnecessary CPU cost in serialize
// TODO: in vector exec engine, it is diffcult to set hll size = 0
// so we have to serialize here. which will cause two problem
// 1. some unnecessary mem malloc and delay mem release
// 2. some unnecessary CPU cost in serialize
} else {
auto* dst_hll = reinterpret_cast<HyperLogLog*>(slice->data);
std::string result(dst_hll->max_serialized_size(), '0');
Expand Down
75 changes: 43 additions & 32 deletions be/src/vec/functions/function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,10 @@ bool allArgumentsAreConstants(const Block& block, const ColumnNumbers& args) {
}
} // namespace

bool PreparedFunctionImpl::defaultImplementationForConstantArguments(Block& block,
const ColumnNumbers& args,
size_t result,
size_t input_rows_count,
bool dry_run) {
Status PreparedFunctionImpl::defaultImplementationForConstantArguments(
Block& block, const ColumnNumbers& args, size_t result, size_t input_rows_count,
bool dry_run, bool* executed) {
*executed = false;
ColumnNumbers arguments_to_remain_constants = getArgumentsThatAreAlwaysConstant();

/// Check that these arguments are really constant.
Expand All @@ -218,7 +217,7 @@ bool PreparedFunctionImpl::defaultImplementationForConstantArguments(Block& bloc

if (args.empty() || !useDefaultImplementationForConstants() ||
!allArgumentsAreConstants(block, args))
return false;
return Status::OK();

Block temporary_block;
bool have_converted_columns = false;
Expand Down Expand Up @@ -252,8 +251,9 @@ bool PreparedFunctionImpl::defaultImplementationForConstantArguments(Block& bloc
ColumnNumbers temporary_argument_numbers(arguments_size);
for (size_t i = 0; i < arguments_size; ++i) temporary_argument_numbers[i] = i;

executeWithoutLowCardinalityColumns(temporary_block, temporary_argument_numbers, arguments_size,
temporary_block.rows(), dry_run);
RETURN_IF_ERROR(executeWithoutLowCardinalityColumns(temporary_block, temporary_argument_numbers,
arguments_size, temporary_block.rows(),
dry_run));

ColumnPtr result_column;
/// extremely rare case, when we have function with completely const arguments
Expand All @@ -264,49 +264,60 @@ bool PreparedFunctionImpl::defaultImplementationForConstantArguments(Block& bloc
result_column = temporary_block.getByPosition(arguments_size).column;

block.getByPosition(result).column = ColumnConst::create(result_column, input_rows_count);
return true;
*executed = true;
return Status::OK();
}

bool PreparedFunctionImpl::defaultImplementationForNulls(Block& block, const ColumnNumbers& args,
size_t result, size_t input_rows_count,
bool dry_run) {
if (args.empty() || !useDefaultImplementationForNulls()) return false;
Status PreparedFunctionImpl::defaultImplementationForNulls(Block& block, const ColumnNumbers& args,
size_t result, size_t input_rows_count,
bool dry_run, bool* executed) {
*executed = false;
if (args.empty() || !useDefaultImplementationForNulls()) return Status::OK();

NullPresence null_presence = getNullPresense(block, args);

if (null_presence.has_null_constant) {
block.getByPosition(result).column =
block.getByPosition(result).type->createColumnConst(input_rows_count, Null());
return true;
*executed = true;
return Status::OK();
}

if (null_presence.has_nullable) {
Block temporary_block = createBlockWithNestedColumns(block, args, result);
executeWithoutLowCardinalityColumns(temporary_block, args, result, temporary_block.rows(),
dry_run);
RETURN_IF_ERROR(executeWithoutLowCardinalityColumns(temporary_block, args, result,
temporary_block.rows(), dry_run));
block.getByPosition(result).column =
wrapInNullable(temporary_block.getByPosition(result).column, block, args, result,
input_rows_count);
return true;
*executed = true;
return Status::OK();
}

return false;
*executed = false;
return Status::OK();
}

void PreparedFunctionImpl::executeWithoutLowCardinalityColumns(Block& block,
const ColumnNumbers& args,
size_t result,
size_t input_rows_count,
bool dry_run) {
if (defaultImplementationForConstantArguments(block, args, result, input_rows_count, dry_run))
return;

if (defaultImplementationForNulls(block, args, result, input_rows_count, dry_run)) return;
Status PreparedFunctionImpl::executeWithoutLowCardinalityColumns(Block& block,
const ColumnNumbers& args,
size_t result,
size_t input_rows_count,
bool dry_run) {
bool executed = false;
RETURN_IF_ERROR(defaultImplementationForConstantArguments(block, args, result, input_rows_count,
dry_run, &executed));
if (executed) {
return Status::OK();
}
RETURN_IF_ERROR(defaultImplementationForNulls(block, args, result, input_rows_count, dry_run,
&executed));
if (executed) {
return Status::OK();
}

if (dry_run)
executeImplDryRun(block, args, result, input_rows_count);
return executeImplDryRun(block, args, result, input_rows_count);
else
executeImpl(block, args, result, input_rows_count);
return executeImpl(block, args, result, input_rows_count);
}

//static const ColumnLowCardinality * findLowCardinalityArgument(const Block & block, const ColumnNumbers & args)
Expand Down Expand Up @@ -465,8 +476,8 @@ Status PreparedFunctionImpl::execute(Block& block, const ColumnNumbers& args, si
// else
{
// convertLowCardinalityColumnsToFull(block_without_low_cardinality, args);
executeWithoutLowCardinalityColumns(block_without_low_cardinality, args, result,
input_rows_count, dry_run);
RETURN_IF_ERROR(executeWithoutLowCardinalityColumns(block_without_low_cardinality, args,
result, input_rows_count, dry_run));
res.column = block_without_low_cardinality.safeGetByPosition(result).column;
}
} else
Expand Down
15 changes: 8 additions & 7 deletions be/src/vec/functions/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,14 @@ class PreparedFunctionImpl : public IPreparedFunction {
virtual bool canBeExecutedOnDefaultArguments() const { return true; }

private:
bool defaultImplementationForNulls(Block& block, const ColumnNumbers& args, size_t result,
size_t input_rows_count, bool dry_run);
bool defaultImplementationForConstantArguments(Block& block, const ColumnNumbers& args,
size_t result, size_t input_rows_count,
bool dry_run);
void executeWithoutLowCardinalityColumns(Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count, bool dry_run);
Status defaultImplementationForNulls(Block& block, const ColumnNumbers& args, size_t result,
size_t input_rows_count, bool dry_run, bool* executed);
Status defaultImplementationForConstantArguments(Block& block, const ColumnNumbers& args,
size_t result, size_t input_rows_count,
bool dry_run, bool* executed);
Status executeWithoutLowCardinalityColumns(Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count,
bool dry_run);

/// Cache is created by function createLowCardinalityResultCache()
PreparedFunctionLowCardinalityResultCachePtr low_cardinality_result_cache;
Expand Down
Loading

0 comments on commit 8d9a4fa

Please sign in to comment.