From b7b801dd249a831a40d30eb5e3c1c7010a138970 Mon Sep 17 00:00:00 2001 From: Zhengguo Yang Date: Wed, 21 Apr 2021 14:09:21 +0800 Subject: [PATCH] Fix core when serialize and deserialize nullable column (#7) * support nullable Change-Id: I7b222d0e6ad2d3ac3e763534a74a696dcd2d8a5e * fix core when serde nullable column Change-Id: I6253da1c51ff8e07bc2008e7df3e47dc604fe358 * format Change-Id: I49ad7c83e66f7a157de46efef829b80ba72be0f4 Co-authored-by: yangzhengguo01 --- be/src/vec/core/block.cpp | 31 ++++++++-------- be/src/vec/data_types/data_type_nullable.cpp | 39 ++++++++------------ be/src/vec/io/io_helper.h | 9 ++++- be/test/vec/core/block_test.cpp | 28 +++++++++++++- 4 files changed, 65 insertions(+), 42 deletions(-) diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index a89b1a04f366b8..d548be1a5b40c6 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -17,28 +17,23 @@ #include "vec/core/block.h" -#include "vec/common/exception.h" -#include "vec/common/field_visitors.h" - -// #include -// #include - #include #include -#include "vec/columns/column_vector.h" +#include "gen_cpp/data.pb.h" #include "vec/columns/column_const.h" - -#include "vec/columns/columns_common.h" - #include "vec/columns/column_nullable.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/columns_common.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" +#include "vec/common/exception.h" +#include "vec/common/field_visitors.h" #include "vec/common/typeid_cast.h" +#include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_string.h" #include "vec/data_types/data_types_decimal.h" #include "vec/data_types/data_types_number.h" -#include "gen_cpp/data.pb.h" namespace doris::vectorized { @@ -151,11 +146,11 @@ PColumn::DataType get_pdata_type(DataTypePtr data_type) { } } -Block::Block(std::initializer_list il) : data {il} { +Block::Block(std::initializer_list il) : data{il} { initializeIndexByName(); } -Block::Block(const ColumnsWithTypeAndName& data_) : data {data_} { +Block::Block(const ColumnsWithTypeAndName& data_) : data{data_} { initializeIndexByName(); } @@ -166,6 +161,7 @@ Block::Block(const PBlock& pblock) { if (pcolumn.is_null_size() > 0) { data_column = ColumnNullable::create(std::move(type->createColumn()), ColumnUInt8::create()); + type = makeNullable(type); } else { data_column = type->createColumn(); } @@ -353,7 +349,7 @@ size_t Block::rows() const { void Block::set_num_rows(int length) { if (rows() > length) { - for (auto &elem : data) { + for (auto& elem : data) { if (elem.column) { elem.column = elem.column->cut(0, length); } @@ -690,7 +686,12 @@ void Block::serialize(PBlock* pblock) const { for (auto c = cbegin(); c != cend(); ++c) { PColumn* pc = pblock->add_columns(); pc->set_name(c->name); - pc->set_type(get_pdata_type(c->type)); + if (c->type->isNullable()) { + pc->set_type(get_pdata_type( + std::dynamic_pointer_cast(c->type)->getNestedType())); + } else { + pc->set_type(get_pdata_type(c->type)); + } c->type->serialize(*(c->column), pc); } } diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp index c08b286adf884d..5131d198538937 100644 --- a/be/src/vec/data_types/data_type_nullable.cpp +++ b/be/src/vec/data_types/data_type_nullable.cpp @@ -17,22 +17,13 @@ #include "vec/data_types/data_type_nullable.h" -#include "vec/data_types/data_type_nothing.h" -#include "vec/data_types/data_types_number.h" -// #include +#include "gen_cpp/data.pb.h" #include "vec/columns/column_nullable.h" -#include "vec/core/field.h" -// #include -// #include -// #include -// #include -// #include -// #include -// #include #include "vec/common/assert_cast.h" -#include "gen_cpp/data.pb.h" - #include "vec/common/typeid_cast.h" +#include "vec/core/field.h" +#include "vec/data_types/data_type_nothing.h" +#include "vec/data_types/data_types_number.h" namespace doris::vectorized { @@ -42,7 +33,7 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT; } // namespace ErrorCodes DataTypeNullable::DataTypeNullable(const DataTypePtr& nested_data_type_) - : nested_data_type {nested_data_type_} { + : nested_data_type{nested_data_type_} { if (!nested_data_type->canBeInsideNullable()) throw Exception( "Nested type " + nested_data_type->getName() + " cannot be inside Nullable type", @@ -486,27 +477,29 @@ bool DataTypeNullable::onlyNull() const { // } void DataTypeNullable::serialize(const IColumn& column, size_t row_num, PColumn* pcolumn) const { - const ColumnNullable& col = assert_cast(column); + const ColumnNullable& col = assert_cast(column); pcolumn->set_is_null(row_num, col.isNullAt(row_num)); nested_data_type->serialize(column, row_num, pcolumn); } void DataTypeNullable::serialize(const IColumn& column, PColumn* pcolumn) const { - const ColumnNullable& col = assert_cast(column); + const ColumnNullable& col = assert_cast(column); for (size_t i = 0; i < column.size(); ++i) { bool is_null = col.isNullAt(i); pcolumn->add_is_null(is_null); - nested_data_type->serialize(column, i, pcolumn); - if (!is_null) { - nested_data_type->serialize(column, i, pcolumn); - } + nested_data_type->serialize(col.getNestedColumn(), i, pcolumn); } } void DataTypeNullable::deserialize(const PColumn& pcolumn, IColumn* column) const { - ColumnNullable * col = assert_cast(column); + ColumnNullable* col = assert_cast(column); for (size_t i = 0; i < pcolumn.is_null_size(); ++i) { - col->getNullMapData().push_back(pcolumn.is_null(i)); + if (pcolumn.is_null(i)) { + col->getNullMapData().push_back(1); + } else { + col->getNullMapData().push_back(0); + } } - nested_data_type->deserialize(pcolumn, column); + IColumn& nested = col->getNestedColumn(); + nested_data_type->deserialize(pcolumn, &nested); } MutableColumnPtr DataTypeNullable::createColumn() const { return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create()); diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h index a3ed3ee5d94d7c..18cde16f561fd3 100644 --- a/be/src/vec/io/io_helper.h +++ b/be/src/vec/io/io_helper.h @@ -100,8 +100,13 @@ inline void write_binary(const std::ostringstream& buf, PColumn* pcolumn) { std::string uncompressed = buf.str(); std::string compressed; snappy::Compress(uncompressed.data(), uncompressed.size(), &compressed); - pcolumn->set_compressed(true); - pcolumn->mutable_binary()->append(compressed); + if (static_cast(compressed.size()) / uncompressed.size() > 0.7) { + pcolumn->set_compressed(false); + pcolumn->mutable_binary()->append(uncompressed); + } else { + pcolumn->set_compressed(true); + pcolumn->mutable_binary()->append(compressed); + } } /// Read POD-type in native format diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 0f8048ba4ec574..5bc3334931cc00 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -27,10 +27,12 @@ #include "runtime/row_batch.h" #include "runtime/tuple_row.h" #include "vec/columns/column_decimal.h" +#include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/common/exception.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" namespace doris { @@ -188,7 +190,9 @@ TEST(BlockTest, SerializeAndDeserializeBlock) { { vectorized::DataTypePtr decimal_data_type(doris::vectorized::createDecimal(27, 9)); auto decimal_column = decimal_data_type->createColumn(); - auto& data = ((vectorized::ColumnDecimal>*)decimal_column.get())->getData(); + auto& data = ((vectorized::ColumnDecimal>*) + decimal_column.get()) + ->getData(); for (int i = 0; i < 1024; ++i) { __int128_t value = i; for (int j = 0; j < 9; ++j) { @@ -197,7 +201,27 @@ TEST(BlockTest, SerializeAndDeserializeBlock) { data.push_back(value); } vectorized::ColumnWithTypeAndName type_and_name(decimal_column->getPtr(), decimal_data_type, - "test_decimal"); + "test_decimal"); + vectorized::Block block({type_and_name}); + PBlock pblock; + block.serialize(&pblock); + std::string s1 = pblock.DebugString(); + PBlock pblock2; + vectorized::Block block2(pblock); + block2.serialize(&pblock2); + std::string s2 = pblock2.DebugString(); + EXPECT_EQ(s1, s2); + } + { + auto column_vector_int32 = vectorized::ColumnVector::create(); + auto column_nullable_vector = makeNullable(std::move(column_vector_int32)); + auto mutable_nullable_vector = std::move(*column_nullable_vector).mutate(); + for (int i = 0; i < 4096; i++) { + mutable_nullable_vector->insert(vectorized::castToNearestFieldType(i)); + } + auto data_type = makeNullable(std::make_shared()); + vectorized::ColumnWithTypeAndName type_and_name(mutable_nullable_vector->getPtr(), + data_type, "test_nullable_int32"); vectorized::Block block({type_and_name}); PBlock pblock; block.serialize(&pblock);