Skip to content

Commit

Permalink
Fix core when serialize and deserialize nullable column (apache#7)
Browse files Browse the repository at this point in the history
* support nullable

Change-Id: I7b222d0e6ad2d3ac3e763534a74a696dcd2d8a5e

* fix core when serde nullable column

Change-Id: I6253da1c51ff8e07bc2008e7df3e47dc604fe358

* format

Change-Id: I49ad7c83e66f7a157de46efef829b80ba72be0f4

Co-authored-by: yangzhengguo01 <[email protected]>
  • Loading branch information
2 people authored and HappenLee committed Jul 1, 2021
1 parent f3ce0d4 commit a576431
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 42 deletions.
31 changes: 16 additions & 15 deletions be/src/vec/core/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,23 @@

#include "vec/core/block.h"

#include "vec/common/exception.h"
#include "vec/common/field_visitors.h"

// #include <IO/WriteBufferFromString.h>
// #include <IO/Operators.h>

#include <iterator>
#include <memory>

#include "vec/columns/column_vector.h"
#include "gen_cpp/data.pb.h"
#include "vec/columns/column_const.h"

#include "vec/columns/columns_common.h"

#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_common.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/common/exception.h"
#include "vec/common/field_visitors.h"
#include "vec/common/typeid_cast.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_types_decimal.h"
#include "vec/data_types/data_types_number.h"
#include "gen_cpp/data.pb.h"

namespace doris::vectorized {

Expand Down Expand Up @@ -151,11 +146,11 @@ PColumn::DataType get_pdata_type(DataTypePtr data_type) {
}
}

Block::Block(std::initializer_list<ColumnWithTypeAndName> il) : data {il} {
Block::Block(std::initializer_list<ColumnWithTypeAndName> il) : data{il} {
initializeIndexByName();
}

Block::Block(const ColumnsWithTypeAndName& data_) : data {data_} {
Block::Block(const ColumnsWithTypeAndName& data_) : data{data_} {
initializeIndexByName();
}

Expand All @@ -166,6 +161,7 @@ Block::Block(const PBlock& pblock) {
if (pcolumn.is_null_size() > 0) {
data_column =
ColumnNullable::create(std::move(type->createColumn()), ColumnUInt8::create());
type = makeNullable(type);
} else {
data_column = type->createColumn();
}
Expand Down Expand Up @@ -353,7 +349,7 @@ size_t Block::rows() const {

void Block::set_num_rows(int length) {
if (rows() > length) {
for (auto &elem : data) {
for (auto& elem : data) {
if (elem.column) {
elem.column = elem.column->cut(0, length);
}
Expand Down Expand Up @@ -690,7 +686,12 @@ void Block::serialize(PBlock* pblock) const {
for (auto c = cbegin(); c != cend(); ++c) {
PColumn* pc = pblock->add_columns();
pc->set_name(c->name);
pc->set_type(get_pdata_type(c->type));
if (c->type->isNullable()) {
pc->set_type(get_pdata_type(
std::dynamic_pointer_cast<const DataTypeNullable>(c->type)->getNestedType()));
} else {
pc->set_type(get_pdata_type(c->type));
}
c->type->serialize(*(c->column), pc);
}
}
Expand Down
39 changes: 16 additions & 23 deletions be/src/vec/data_types/data_type_nullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,13 @@

#include "vec/data_types/data_type_nullable.h"

#include "vec/data_types/data_type_nothing.h"
#include "vec/data_types/data_types_number.h"
// #include <vec/DataTypes/DataTypeFactory.h>
#include "gen_cpp/data.pb.h"
#include "vec/columns/column_nullable.h"
#include "vec/core/field.h"
// #include <vec/IO/ReadBuffer.h>
// #include <vec/IO/ReadBufferFromMemory.h>
// #include <vec/IO/ReadHelpers.h>
// #include <vec/IO/WriteBuffer.h>
// #include <vec/IO/WriteHelpers.h>
// #include <vec/IO/ConcatReadBuffer.h>
// #include <vec/Parsers/IAST.h>
#include "vec/common/assert_cast.h"
#include "gen_cpp/data.pb.h"

#include "vec/common/typeid_cast.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type_nothing.h"
#include "vec/data_types/data_types_number.h"

namespace doris::vectorized {

Expand All @@ -42,7 +33,7 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
} // namespace ErrorCodes

DataTypeNullable::DataTypeNullable(const DataTypePtr& nested_data_type_)
: nested_data_type {nested_data_type_} {
: nested_data_type{nested_data_type_} {
if (!nested_data_type->canBeInsideNullable())
throw Exception(
"Nested type " + nested_data_type->getName() + " cannot be inside Nullable type",
Expand Down Expand Up @@ -486,27 +477,29 @@ bool DataTypeNullable::onlyNull() const {
// }

void DataTypeNullable::serialize(const IColumn& column, size_t row_num, PColumn* pcolumn) const {
const ColumnNullable& col = assert_cast<const ColumnNullable& >(column);
const ColumnNullable& col = assert_cast<const ColumnNullable&>(column);
pcolumn->set_is_null(row_num, col.isNullAt(row_num));
nested_data_type->serialize(column, row_num, pcolumn);
}
void DataTypeNullable::serialize(const IColumn& column, PColumn* pcolumn) const {
const ColumnNullable& col = assert_cast<const ColumnNullable& >(column);
const ColumnNullable& col = assert_cast<const ColumnNullable&>(column);
for (size_t i = 0; i < column.size(); ++i) {
bool is_null = col.isNullAt(i);
pcolumn->add_is_null(is_null);
nested_data_type->serialize(column, i, pcolumn);
if (!is_null) {
nested_data_type->serialize(column, i, pcolumn);
}
nested_data_type->serialize(col.getNestedColumn(), i, pcolumn);
}
}
void DataTypeNullable::deserialize(const PColumn& pcolumn, IColumn* column) const {
ColumnNullable * col = assert_cast<ColumnNullable *>(column);
ColumnNullable* col = assert_cast<ColumnNullable*>(column);
for (size_t i = 0; i < pcolumn.is_null_size(); ++i) {
col->getNullMapData().push_back(pcolumn.is_null(i));
if (pcolumn.is_null(i)) {
col->getNullMapData().push_back(1);
} else {
col->getNullMapData().push_back(0);
}
}
nested_data_type->deserialize(pcolumn, column);
IColumn& nested = col->getNestedColumn();
nested_data_type->deserialize(pcolumn, &nested);
}
MutableColumnPtr DataTypeNullable::createColumn() const {
return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
Expand Down
9 changes: 7 additions & 2 deletions be/src/vec/io/io_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,13 @@ inline void write_binary(const std::ostringstream& buf, PColumn* pcolumn) {
std::string uncompressed = buf.str();
std::string compressed;
snappy::Compress(uncompressed.data(), uncompressed.size(), &compressed);
pcolumn->set_compressed(true);
pcolumn->mutable_binary()->append(compressed);
if (static_cast<double>(compressed.size()) / uncompressed.size() > 0.7) {
pcolumn->set_compressed(false);
pcolumn->mutable_binary()->append(uncompressed);
} else {
pcolumn->set_compressed(true);
pcolumn->mutable_binary()->append(compressed);
}
}

/// Read POD-type in native format
Expand Down
28 changes: 26 additions & 2 deletions be/test/vec/core/block_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
#include "runtime/row_batch.h"
#include "runtime/tuple_row.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/common/exception.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_nullable.h"

namespace doris {

Expand Down Expand Up @@ -188,7 +190,9 @@ TEST(BlockTest, SerializeAndDeserializeBlock) {
{
vectorized::DataTypePtr decimal_data_type(doris::vectorized::createDecimal(27, 9));
auto decimal_column = decimal_data_type->createColumn();
auto& data = ((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int128>>*)decimal_column.get())->getData();
auto& data = ((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int128>>*)
decimal_column.get())
->getData();
for (int i = 0; i < 1024; ++i) {
__int128_t value = i;
for (int j = 0; j < 9; ++j) {
Expand All @@ -197,7 +201,27 @@ TEST(BlockTest, SerializeAndDeserializeBlock) {
data.push_back(value);
}
vectorized::ColumnWithTypeAndName type_and_name(decimal_column->getPtr(), decimal_data_type,
"test_decimal");
"test_decimal");
vectorized::Block block({type_and_name});
PBlock pblock;
block.serialize(&pblock);
std::string s1 = pblock.DebugString();
PBlock pblock2;
vectorized::Block block2(pblock);
block2.serialize(&pblock2);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
{
auto column_vector_int32 = vectorized::ColumnVector<Int32>::create();
auto column_nullable_vector = makeNullable(std::move(column_vector_int32));
auto mutable_nullable_vector = std::move(*column_nullable_vector).mutate();
for (int i = 0; i < 4096; i++) {
mutable_nullable_vector->insert(vectorized::castToNearestFieldType(i));
}
auto data_type = makeNullable(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName type_and_name(mutable_nullable_vector->getPtr(),
data_type, "test_nullable_int32");
vectorized::Block block({type_and_name});
PBlock pblock;
block.serialize(&pblock);
Expand Down

0 comments on commit a576431

Please sign in to comment.