From 9ba1627acee30e4ae2dfe958dfc906b0e860cf2e Mon Sep 17 00:00:00 2001 From: JaySon Date: Wed, 28 Aug 2019 14:37:16 +0800 Subject: [PATCH] [FLASH-386] DeltaMerge DDL support (#190) * cast DataType while reading from PageStorage * add isLossyCast function * alter for StroageDeltaMerge, WIP * add Alter for StorageDeltaMerge * add Alter for StorageDeltaMerge * add TableInfo in StroageDeltaMerge * rename table for StorageDeltaMerge * fix bug: the TableInfo from TiDB * add comments for DeltaMerge flush && cache * more faster(?) way to cast mismatch datatype * support cast for numeric type null/not null * support for other data type just change null / not null * isLossyCast -> isSupportedDataTypeCast * isSupportedDataTypeCast add decimal detect * rename function * fix compile errors in gtests * small fix * fix broken tests * support new column with non-zero default value * remove unused code * fix compile error in CI * fix bug in table rename * small fix * minor fix * refine cast function in chunk * update DeltaMergeStore's segments within lock * [WIP]Add test cases for default value ddl. Still has bugs * fix bugs after rebasing to latest master * add some TODO marks * fix compile error in gtests && remove unused comments * fix broken gtests * flush cached chunks in delta instead of doing DeltaMerge when ddl-changes apply * use TypeIndex instead of typeid_cast * clean up data after tests * address comment --- dbms/CMakeLists.txt | 4 +- dbms/src/Columns/ColumnVector.h | 2 +- dbms/src/Core/Block.cpp | 4 +- dbms/src/Core/ColumnWithTypeAndName.cpp | 2 + dbms/src/Core/ColumnWithTypeAndName.h | 8 +- dbms/src/Core/NamesAndTypes.h | 2 + dbms/src/DataTypes/DataTypeDecimal.cpp | 2 +- .../src/DataTypes/isSupportedDataTypeCast.cpp | 134 +++++ dbms/src/DataTypes/isSupportedDataTypeCast.h | 11 + dbms/src/DataTypes/tests/CMakeLists.txt | 2 +- ...pp => gtest_data_type_get_common_type.cpp} | 211 ++++++-- .../tests/gtest_funtions_decimal_arith.cpp | 33 +- dbms/src/Storages/AlterCommands.cpp | 26 +- dbms/src/Storages/AlterCommands.h | 4 + dbms/src/Storages/DeltaMerge/Chunk.cpp | 467 +++++++++++++++++- dbms/src/Storages/DeltaMerge/Chunk.h | 37 +- .../DeltaMerge/ChunkBlockInputStream.h | 16 +- dbms/src/Storages/DeltaMerge/DMContext.h | 7 +- .../Storages/DeltaMerge/DMDecoratorStreams.h | 2 +- .../DeltaMerge/DMSegmentThreadInputStream.h | 2 +- .../Storages/DeltaMerge/DeltaMergeDefines.h | 18 +- .../Storages/DeltaMerge/DeltaMergeHelpers.h | 61 +-- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 142 ++++++ .../src/Storages/DeltaMerge/DeltaMergeStore.h | 34 +- .../Storages/DeltaMerge/DiskValueSpace.cpp | 53 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 6 + dbms/src/Storages/DeltaMerge/Segment.h | 3 + .../DeltaMerge/registerStorageDeltaMerge.cpp | 84 ++++ .../Storages/DeltaMerge/tests/CMakeLists.txt | 6 +- .../DeltaMerge/tests/dm_basic_include.h | 21 +- .../DeltaMerge/tests/gtest_dm_chunk.cpp | 228 +++++++-- .../tests/gtest_dm_delta_merge_store.cpp | 234 +++++++-- .../tests/gtest_dm_disk_value_space.cpp | 5 +- .../DeltaMerge/tests/gtest_dm_segment.cpp | 275 +++++++++-- .../tests/gtest_dm_storage_delta_merge.cpp | 2 +- dbms/src/Storages/StorageDeltaMerge.cpp | 246 +++++++-- dbms/src/Storages/StorageDeltaMerge.h | 33 +- dbms/src/Storages/Transaction/TiDB.h | 2 +- dbms/src/Storages/Transaction/TypeMapping.cpp | 144 +++++- dbms/src/Storages/Transaction/TypeMapping.h | 2 + .../Storages/Transaction/tests/CMakeLists.txt | 3 + .../Transaction/tests/gtest_type_mapping.cpp | 82 +++ dbms/src/test_utils/TiflashTestBasic.h | 3 +- tests/_env.sh | 4 +- tests/delta_merge/ddl/alter.test | 93 ++++ .../delta_merge/ddl/alter_default_value.test | 46 ++ .../ddl/alter_joint_primary_key.test | 54 ++ tests/delta_merge/ddl/alter_nullable.test | 101 ++++ tests/docker/run.sh | 2 +- tests/run-test.py | 4 + 50 files changed, 2610 insertions(+), 357 deletions(-) create mode 100644 dbms/src/DataTypes/isSupportedDataTypeCast.cpp create mode 100644 dbms/src/DataTypes/isSupportedDataTypeCast.h rename dbms/src/DataTypes/tests/{data_type_get_common_type.cpp => gtest_data_type_get_common_type.cpp} (50%) create mode 100644 dbms/src/Storages/DeltaMerge/registerStorageDeltaMerge.cpp create mode 100644 dbms/src/Storages/Transaction/tests/gtest_type_mapping.cpp create mode 100644 tests/delta_merge/ddl/alter.test create mode 100644 tests/delta_merge/ddl/alter_default_value.test create mode 100644 tests/delta_merge/ddl/alter_joint_primary_key.test create mode 100644 tests/delta_merge/ddl/alter_nullable.test diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index eb7328f4b20..fe2d618596b 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -290,8 +290,8 @@ if (TEST_COVERAGE AND CMAKE_BUILD_TYPE STREQUAL "Debug") ) SETUP_TARGET_FOR_COVERAGE_LCOV( NAME tiflash_lcov_coverage - DEPENDENCIES unit_tests_dbms - EXECUTABLE unit_tests_dbms + DEPENDENCIES gtests_dbms + EXECUTABLE gtests_dbms ) set(COVERAGE_GCOVR_EXCLUDES 'contrib' diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 7e578a5dbfe..743c323d2df 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -137,7 +137,7 @@ class ColumnVector final : public COWPtrHelperisColumnConst() && expected.column->isColumnConst()) { Field actual_value = static_cast(*actual.column).getField(); diff --git a/dbms/src/Core/ColumnWithTypeAndName.cpp b/dbms/src/Core/ColumnWithTypeAndName.cpp index 9acc2d56408..188bfc09b95 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.cpp +++ b/dbms/src/Core/ColumnWithTypeAndName.cpp @@ -13,6 +13,7 @@ ColumnWithTypeAndName ColumnWithTypeAndName::cloneEmpty() const res.name = name; res.type = type; + res.column_id = column_id; if (column) res.column = column->cloneEmpty(); @@ -22,6 +23,7 @@ ColumnWithTypeAndName ColumnWithTypeAndName::cloneEmpty() const bool ColumnWithTypeAndName::operator==(const ColumnWithTypeAndName & other) const { + // TODO should we check column_id here? return name == other.name && ((!type && !other.type) || (type && other.type && type->equals(*other.type))) && ((!column && !other.column) || (column && other.column && column->getName() == other.column->getName())); diff --git a/dbms/src/Core/ColumnWithTypeAndName.h b/dbms/src/Core/ColumnWithTypeAndName.h index 47f9ba3dd78..8bc059ddf43 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.h +++ b/dbms/src/Core/ColumnWithTypeAndName.h @@ -1,3 +1,5 @@ +#include + #pragma once #include @@ -24,9 +26,9 @@ struct ColumnWithTypeAndName /// TODO Handle column_id properly after we support DDL. Int64 column_id; - ColumnWithTypeAndName() {} - ColumnWithTypeAndName(const ColumnPtr & column_, const DataTypePtr & type_, const String & name_) - : column(column_), type(type_), name(name_) {} + ColumnWithTypeAndName(): ColumnWithTypeAndName(nullptr, nullptr, "") {} + ColumnWithTypeAndName(ColumnPtr column_, const DataTypePtr & type_, const String & name_, Int64 column_id_ = 0) + : column(std::move(column_)), type(type_), name(name_), column_id(column_id_) {} /// Uses type->createColumn() to create column ColumnWithTypeAndName(const DataTypePtr & type_, const String & name_) diff --git a/dbms/src/Core/NamesAndTypes.h b/dbms/src/Core/NamesAndTypes.h index 849141e43f7..7da98ad04da 100644 --- a/dbms/src/Core/NamesAndTypes.h +++ b/dbms/src/Core/NamesAndTypes.h @@ -37,6 +37,8 @@ using NamesAndTypes = std::vector; class NamesAndTypesList : public std::list { public: + using Iterator = std::list::iterator; + NamesAndTypesList() {} NamesAndTypesList(std::initializer_list init) : std::list(init) {} diff --git a/dbms/src/DataTypes/DataTypeDecimal.cpp b/dbms/src/DataTypes/DataTypeDecimal.cpp index 271be023297..e6860f84a4e 100644 --- a/dbms/src/DataTypes/DataTypeDecimal.cpp +++ b/dbms/src/DataTypes/DataTypeDecimal.cpp @@ -169,7 +169,7 @@ bool DataTypeDecimal::equals(const IDataType & rhs) const // make sure rhs has same underlying type with this type. if (auto ptr = checkDecimal(rhs)) { - return ptr->getScale() == scale; + return ptr->getScale() == scale && ptr->getPrec() == precision; } return false; } diff --git a/dbms/src/DataTypes/isSupportedDataTypeCast.cpp b/dbms/src/DataTypes/isSupportedDataTypeCast.cpp new file mode 100644 index 00000000000..69314791496 --- /dev/null +++ b/dbms/src/DataTypes/isSupportedDataTypeCast.cpp @@ -0,0 +1,134 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool isSupportedDataTypeCast(const DataTypePtr &from, const DataTypePtr &to) +{ + assert(from != nullptr && to != nullptr); + /// `to` is equal to `from` + if (to->equals(*from)) + { + return true; + } + + /// For Nullable, unwrap DataTypeNullable + { + bool has_nullable = false; + DataTypePtr from_not_null; + if (const DataTypeNullable * type_nullable = typeid_cast(from.get())) + { + has_nullable = true; + from_not_null = type_nullable->getNestedType(); + } + else + { + from_not_null = from; + } + + DataTypePtr to_not_null; + if (const DataTypeNullable * type_nullable = typeid_cast(to.get())) + { + has_nullable = true; + to_not_null = type_nullable->getNestedType(); + } + else + { + to_not_null = to; + } + + if (has_nullable) + return isSupportedDataTypeCast(from_not_null, to_not_null); + } + + /// For numeric types (integer, floats) + if (from->isNumber() && to->isNumber()) + { + /// int <-> float, or float32 <-> float64, is not supported + if (!from->isInteger() || !to->isInteger()) + { + return false; + } + /// Change from signed to unsigned, or vice versa, is not supported + // use xor(^) + if ((from->isUnsignedInteger()) ^ (to->isUnsignedInteger())) + { + return false; + } + + /// Both signed or unsigned, compare the sizeof(Type) + size_t from_sz = from->getSizeOfValueInMemory(); + size_t to_sz = to->getSizeOfValueInMemory(); + return from_sz <= to_sz; + } + + /// For String / FixedString + if (from->isStringOrFixedString() && to->isStringOrFixedString()) + { + size_t from_sz = std::numeric_limits::max(); + if (const DataTypeFixedString * type_fixed_str = typeid_cast(from.get())) + from_sz = type_fixed_str->getN(); + size_t to_sz = std::numeric_limits::max(); + if (const DataTypeFixedString * type_fixed_str = typeid_cast(to.get())) + to_sz = type_fixed_str->getN(); + return from_sz <= to_sz; + } + + /// For Date and DateTime, not supported + if (from->isDateOrDateTime() || to->isDateOrDateTime()) + { + return false; + } + + { + bool from_is_decimal = IsDecimalDataType(from); + bool to_is_decimal = IsDecimalDataType(to); + if (from_is_decimal || to_is_decimal) + { + if (from_is_decimal && to_is_decimal) + { + // not support change Decimal to other type, neither other type to Decimal + return false; + } + + return from->equals(*to); + } + } + + // TODO enums, set? + + /// some DataTypes that support in ClickHouse but not in TiDB + + // Cast to Nothing / from Nothing is lossy + if (typeid_cast(from.get()) || typeid_cast(to.get())) + { + return true; + } + + // Cast to Array / from Array is not supported + if (typeid_cast(from.get()) || typeid_cast(to.get())) + { + return false; + } + + // Cast to Tuple / from Tuple is not supported + if (typeid_cast(from.get()) || typeid_cast(to.get())) + { + return false; + } + + return false; +} + +} // namespace DB diff --git a/dbms/src/DataTypes/isSupportedDataTypeCast.h b/dbms/src/DataTypes/isSupportedDataTypeCast.h new file mode 100644 index 00000000000..d002eb7fe8f --- /dev/null +++ b/dbms/src/DataTypes/isSupportedDataTypeCast.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +namespace DB +{ + +/// Is TiDB / TiFlash support casting DataType `from` to `to` in DDL +bool isSupportedDataTypeCast(const DataTypePtr &from, const DataTypePtr &to); + +} // namespace DB diff --git a/dbms/src/DataTypes/tests/CMakeLists.txt b/dbms/src/DataTypes/tests/CMakeLists.txt index 6a20692bca0..b59d8c4d76c 100644 --- a/dbms/src/DataTypes/tests/CMakeLists.txt +++ b/dbms/src/DataTypes/tests/CMakeLists.txt @@ -8,5 +8,5 @@ target_link_libraries (data_types_number_fixed dbms) add_executable (data_type_string data_type_string.cpp ${SRCS}) target_link_libraries (data_type_string dbms) -add_executable (data_type_get_common_type data_type_get_common_type.cpp ${SRCS}) +add_executable (data_type_get_common_type gtest_data_type_get_common_type.cpp ${SRCS}) target_link_libraries (data_type_get_common_type dbms gtest_main) diff --git a/dbms/src/DataTypes/tests/data_type_get_common_type.cpp b/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp similarity index 50% rename from dbms/src/DataTypes/tests/data_type_get_common_type.cpp rename to dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp index b3c8b9fcb3e..148b9c49d9d 100644 --- a/dbms/src/DataTypes/tests/data_type_get_common_type.cpp +++ b/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp @@ -1,39 +1,44 @@ #include #include #include +#include #include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" #include - #pragma GCC diagnostic pop -using namespace DB; - +namespace DB +{ +namespace tests +{ -TEST(data_type, data_type_get_common_type_Test) +namespace { - try - { - auto & data_type_factory = DataTypeFactory::instance(); - auto typeFromString = [& data_type_factory](const std::string & str) - { - return data_type_factory.get(str); - }; +DataTypePtr typeFromString(const String & str) +{ + auto & data_type_factory = DataTypeFactory::instance(); + return data_type_factory.get(str); +} - auto typesFromString = [& typeFromString](const std::string & str) - { - std::istringstream data_types_stream(str); - DataTypes data_types; - std::string data_type; - while (data_types_stream >> data_type) - data_types.push_back(typeFromString(data_type)); +DataTypes typesFromString(const String & str) +{ + DataTypes data_types; + std::istringstream data_types_stream(str); + std::string data_type; + while (data_types_stream >> data_type) + data_types.push_back(typeFromString(data_type)); - return data_types; - }; + return data_types; +} +} // namespace +TEST(DataType_test, getLeastSuperType) +{ + try + { ASSERT_TRUE(getLeastSupertype(typesFromString(""))->equals(*typeFromString("Nothing"))); ASSERT_TRUE(getLeastSupertype(typesFromString("Nothing"))->equals(*typeFromString("Nothing"))); @@ -58,9 +63,12 @@ TEST(data_type, data_type_get_common_type_Test) ASSERT_TRUE(getLeastSupertype(typesFromString("Array(UInt8) Array(UInt8)"))->equals(*typeFromString("Array(UInt8)"))); ASSERT_TRUE(getLeastSupertype(typesFromString("Array(UInt8) Array(Int8)"))->equals(*typeFromString("Array(Int16)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Float32) Array(Int16) Array(UInt32)"))->equals(*typeFromString("Array(Float64)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Array(UInt8)) Array(Array(UInt8))"))->equals(*typeFromString("Array(Array(UInt8))"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Array(UInt8)) Array(Array(Int8))"))->equals(*typeFromString("Array(Array(Int16))"))); + ASSERT_TRUE( + getLeastSupertype(typesFromString("Array(Float32) Array(Int16) Array(UInt32)"))->equals(*typeFromString("Array(Float64)"))); + ASSERT_TRUE( + getLeastSupertype(typesFromString("Array(Array(UInt8)) Array(Array(UInt8))"))->equals(*typeFromString("Array(Array(UInt8))"))); + ASSERT_TRUE( + getLeastSupertype(typesFromString("Array(Array(UInt8)) Array(Array(Int8))"))->equals(*typeFromString("Array(Array(Int16))"))); ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Date) Array(DateTime)"))->equals(*typeFromString("Array(DateTime)"))); ASSERT_TRUE(getLeastSupertype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(String)"))); @@ -68,8 +76,10 @@ TEST(data_type, data_type_get_common_type_Test) ASSERT_TRUE(getLeastSupertype(typesFromString("Nullable(UInt8) Int8"))->equals(*typeFromString("Nullable(Int16)"))); ASSERT_TRUE(getLeastSupertype(typesFromString("Nullable(Nothing) UInt8 Int8"))->equals(*typeFromString("Nullable(Int16)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Tuple(Int8,UInt8) Tuple(UInt8,Int8)"))->equals(*typeFromString("Tuple(Int16,Int16)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Tuple(Nullable(Nothing)) Tuple(Nullable(UInt8))"))->equals(*typeFromString("Tuple(Nullable(UInt8))"))); + ASSERT_TRUE( + getLeastSupertype(typesFromString("Tuple(Int8,UInt8) Tuple(UInt8,Int8)"))->equals(*typeFromString("Tuple(Int16,Int16)"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Tuple(Nullable(Nothing)) Tuple(Nullable(UInt8))")) + ->equals(*typeFromString("Tuple(Nullable(UInt8))"))); EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Int8 String"))); EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Int64 UInt64"))); @@ -78,8 +88,47 @@ TEST(data_type, data_type_get_common_type_Test) EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Tuple(Int64) Tuple(UInt64)"))); EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Tuple(Int64, Int8) Tuple(UInt64)"))); EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Array(Int64) Array(String)"))); + } + catch (const Exception & e) + { + std::string text = e.displayText(); + + bool print_stack_trace = true; + + auto embedded_stack_trace_pos = text.find("Stack trace"); + if (std::string::npos != embedded_stack_trace_pos && !print_stack_trace) + text.resize(embedded_stack_trace_pos); + + std::cerr << "Code: " << e.code() << ". " << text << std::endl << std::endl; + if (print_stack_trace && std::string::npos == embedded_stack_trace_pos) + { + std::cerr << "Stack trace:" << std::endl << e.getStackTrace().toString(); + } + throw; + } + catch (const Poco::Exception & e) + { + std::cerr << "Poco::Exception: " << e.displayText() << std::endl; + throw; + } + catch (const std::exception & e) + { + std::cerr << "std::exception: " << e.what() << std::endl; + throw; + } + catch (...) + { + std::cerr << "Unknown exception" << std::endl; + throw; + } +} + +TEST(DataType_test, getMostSubtype) +{ + try + { ASSERT_TRUE(getMostSubtype(typesFromString(""))->equals(*typeFromString("Nothing"))); ASSERT_TRUE(getMostSubtype(typesFromString("Nothing"))->equals(*typeFromString("Nothing"))); @@ -106,11 +155,15 @@ TEST(data_type, data_type_get_common_type_Test) ASSERT_TRUE(getMostSubtype(typesFromString("Array(UInt8) Array(UInt8)"))->equals(*typeFromString("Array(UInt8)"))); ASSERT_TRUE(getMostSubtype(typesFromString("Array(UInt8) Array(Int8)"))->equals(*typeFromString("Array(UInt8)"))); ASSERT_TRUE(getMostSubtype(typesFromString("Array(Float32) Array(Int16) Array(UInt32)"))->equals(*typeFromString("Array(Int16)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(Array(UInt8)) Array(Array(UInt8))"))->equals(*typeFromString("Array(Array(UInt8))"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(Array(UInt8)) Array(Array(Int8))"))->equals(*typeFromString("Array(Array(UInt8))"))); + ASSERT_TRUE( + getMostSubtype(typesFromString("Array(Array(UInt8)) Array(Array(UInt8))"))->equals(*typeFromString("Array(Array(UInt8))"))); + ASSERT_TRUE( + getMostSubtype(typesFromString("Array(Array(UInt8)) Array(Array(Int8))"))->equals(*typeFromString("Array(Array(UInt8))"))); ASSERT_TRUE(getMostSubtype(typesFromString("Array(Date) Array(DateTime)"))->equals(*typeFromString("Array(Date)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(FixedString(32))"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(FixedString(32))"))); + ASSERT_TRUE( + getMostSubtype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(FixedString(32))"))); + ASSERT_TRUE( + getMostSubtype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(FixedString(32))"))); ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(Nothing) Nothing"))->equals(*typeFromString("Nothing"))); ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(UInt8) Int8"))->equals(*typeFromString("UInt8"))); @@ -119,12 +172,104 @@ TEST(data_type, data_type_get_common_type_Test) ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(Nothing) Nullable(Int8)"))->equals(*typeFromString("Nullable(Nothing)"))); ASSERT_TRUE(getMostSubtype(typesFromString("Tuple(Int8,UInt8) Tuple(UInt8,Int8)"))->equals(*typeFromString("Tuple(UInt8,UInt8)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Tuple(Nullable(Nothing)) Tuple(Nullable(UInt8))"))->equals(*typeFromString("Tuple(Nullable(Nothing))"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Tuple(Nullable(Nothing)) Tuple(Nullable(UInt8))")) + ->equals(*typeFromString("Tuple(Nullable(Nothing))"))); EXPECT_ANY_THROW(getMostSubtype(typesFromString("Int8 String"), true)); EXPECT_ANY_THROW(getMostSubtype(typesFromString("Nothing"), true)); EXPECT_ANY_THROW(getMostSubtype(typesFromString("FixedString(16) FixedString(8) String"), true)); + } + catch (const Exception & e) + { + std::string text = e.displayText(); + + bool print_stack_trace = true; + auto embedded_stack_trace_pos = text.find("Stack trace"); + if (std::string::npos != embedded_stack_trace_pos && !print_stack_trace) + text.resize(embedded_stack_trace_pos); + + std::cerr << "Code: " << e.code() << ". " << text << std::endl << std::endl; + + if (print_stack_trace && std::string::npos == embedded_stack_trace_pos) + { + std::cerr << "Stack trace:" << std::endl << e.getStackTrace().toString(); + } + + throw; + } + catch (const Poco::Exception & e) + { + std::cerr << "Poco::Exception: " << e.displayText() << std::endl; + throw; + } + catch (const std::exception & e) + { + std::cerr << "std::exception: " << e.what() << std::endl; + throw; + } + catch (...) + { + std::cerr << "Unknown exception" << std::endl; + throw; + } +} + +TEST(DataType_test, isSupportedDataTypeCast) +{ + try + { + // same type is not lossy + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Int8"), typeFromString("Int8"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Int16"), typeFromString("Int16"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Int32"), typeFromString("Int32"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Int64"), typeFromString("Int64"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("DateTime"), typeFromString("DateTime"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Date"), typeFromString("Date"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Decimal(10, 4)"), typeFromString("Decimal(10, 4)"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("String"), typeFromString("String"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("FixedString(16)"), typeFromString("FixedString(16)"))); + + // signed -> unsigned is lossy + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Int8"), typeFromString("UInt8"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Int8"), typeFromString("UInt16"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Int8"), typeFromString("UInt32"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Int8"), typeFromString("UInt64"))); + + // unsigned -> signed is lossy + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("UInt8"), typeFromString("Int8"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("UInt8"), typeFromString("Int16"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("UInt8"), typeFromString("Int32"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("UInt8"), typeFromString("Int64"))); + + // nullable -> not null is ok + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Nullable(UInt32)"), typeFromString("UInt32"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Nullable(UInt16)"), typeFromString("UInt32"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("Nullable(Int32)"), typeFromString("Int64"))); + + // not null -> nullable is ok + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("UInt32"), typeFromString("Nullable(UInt32)"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("UInt16"), typeFromString("Nullable(UInt32)"))); + + // float32 -> float64 is lossy + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Float32"), typeFromString("Float64"))); + // float64 -> float32 is lossy + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Float64"), typeFromString("Float32"))); + + // not support datetime <-> date + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("DateTime"), typeFromString("Date"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Date"), typeFromString("DateTime"))); + + // strings + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("FixedString(16)"), typeFromString("FixedString(100)"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("String"), typeFromString("FixedString(1024)"))); + ASSERT_TRUE(isSupportedDataTypeCast(typeFromString("FixedString(16)"), typeFromString("String"))); + + // Decimal + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Decimal(10, 4)"), typeFromString("Decimal(10, 2)"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Decimal(10, 2)"), typeFromString("Decimal(10, 4)"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Decimal(10, 4)"), typeFromString("Decimal(16, 4)"))); + ASSERT_FALSE(isSupportedDataTypeCast(typeFromString("Decimal(16, 4)"), typeFromString("Decimal(10, 4)"))); } catch (const Exception & e) { @@ -140,8 +285,7 @@ TEST(data_type, data_type_get_common_type_Test) if (print_stack_trace && std::string::npos == embedded_stack_trace_pos) { - std::cerr << "Stack trace:" << std::endl - << e.getStackTrace().toString(); + std::cerr << "Stack trace:" << std::endl << e.getStackTrace().toString(); } throw; @@ -162,3 +306,6 @@ TEST(data_type, data_type_get_common_type_Test) throw; } } + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Functions/tests/gtest_funtions_decimal_arith.cpp b/dbms/src/Functions/tests/gtest_funtions_decimal_arith.cpp index 30f254ac591..6d5f6d2f6d3 100644 --- a/dbms/src/Functions/tests/gtest_funtions_decimal_arith.cpp +++ b/dbms/src/Functions/tests/gtest_funtions_decimal_arith.cpp @@ -9,6 +9,28 @@ namespace DB namespace tests { +void ASSERT_DecimalDataTypeScaleEq(const DataTypePtr &actual_, ScaleType expected_scale) +{ + if (auto actual = checkDecimal(*actual_)) + ASSERT_EQ(actual->getScale(), expected_scale); + else if (auto actual = checkDecimal(*actual_)) + ASSERT_EQ(actual->getScale(), expected_scale); + else if (auto actual = checkDecimal(*actual_)) + ASSERT_EQ(actual->getScale(), expected_scale); + else if (auto actual = checkDecimal(*actual_)) + ASSERT_EQ(actual->getScale(), expected_scale); + else + ASSERT_TRUE(false) << "type: " + actual_->getName() + " is not decimal!"; +} + +// 1) If the declared type of both operands of a dyadic arithmetic operator is exact numeric, then the declared +// type of the result is an implementation-defined exact numeric type, with precision and scale determined as +// follows: +// a) Let S1 and S2 be the scale of the first and second operands respectively. +// b) The precision of the result of addition and subtraction is implementation-defined, and the scale is the +// maximum of S1 and S2. +// c) The precision of the result of multiplication is implementation-defined, and the scale is S1 + S2. +// d) The precision and scale of the result of division are implementation-defined. TEST(DataTypeDecimal_test, A) { DataTypePtr lhs = createDecimal(10, 4); @@ -16,23 +38,24 @@ TEST(DataTypeDecimal_test, A) const ScaleType scale_max = std::max(typeid_cast(lhs.get())->getScale(), (typeid_cast(rhs.get()))->getScale()); const ScaleType scale_sum = typeid_cast(lhs.get())->getScale() + (typeid_cast(rhs.get()))->getScale(); - const DataTypePtr expect_add = createDecimal(10, scale_max); - const DataTypePtr expect_mul = createDecimal(20, scale_sum); Context context = TiFlashTestEnv::getContext(); DataTypes args{lhs, rhs}; + // Decimal(10, 4) + Decimal(10, 6) FunctionPtr func = FunctionPlus::create(context); DataTypePtr return_type = func->getReturnTypeImpl(args); - ASSERT_TRUE(return_type->equals(*expect_add)); + ASSERT_DecimalDataTypeScaleEq(return_type, scale_max); + // Decimal(10, 4) - Decimal(10, 6) func = FunctionMinus::create(context); return_type = func->getReturnTypeImpl(args); - ASSERT_TRUE(return_type->equals(*expect_add)); + ASSERT_DecimalDataTypeScaleEq(return_type, scale_max); + // Decimal(10, 4) * Decimal(10, 6) func = FunctionMultiply::create(context); return_type = func->getReturnTypeImpl(args); - ASSERT_TRUE(return_type->equals(*expect_mul)); + ASSERT_DecimalDataTypeScaleEq(return_type, scale_sum); } diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index c47952d1e90..a5d6c788736 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -20,6 +20,16 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +NamesAndTypesList::Iterator AlterCommand::findColumn(NamesAndTypesList &columns) const +{ + const auto it = std::find_if(columns.begin(), columns.end(), + std::bind(namesEqual, std::cref(column_name), std::placeholders::_1) ); + if (it == columns.end()) + throw Exception("Wrong column name. Cannot find column " + column_name + " to modify", + ErrorCodes::ILLEGAL_COLUMN); + + return it; +} void AlterCommand::apply(ColumnsDescription & columns_description) const { @@ -111,18 +121,6 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const : default_kind == ColumnDefaultKind::Materialized ? columns_description.materialized : columns_description.aliases; - /// find column or throw exception - const auto find_column = [this] (NamesAndTypesList & columns) - { - const auto it = std::find_if(columns.begin(), columns.end(), - std::bind(namesEqual, std::cref(column_name), std::placeholders::_1) ); - if (it == columns.end()) - throw Exception("Wrong column name. Cannot find column " + column_name + " to modify", - ErrorCodes::ILLEGAL_COLUMN); - - return it; - }; - /// if default types differ, remove column from the old list, then add to the new list if (default_kind != old_default_kind) { @@ -132,7 +130,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const : old_default_kind == ColumnDefaultKind::Materialized ? columns_description.materialized : columns_description.aliases; - const auto old_column_it = find_column(old_columns); + const auto old_column_it = findColumn(old_columns); new_columns.emplace_back(*old_column_it); old_columns.erase(old_column_it); @@ -142,7 +140,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const } /// find column in one of three column lists - const auto column_it = find_column(new_columns); + const auto column_it = findColumn(new_columns); column_it->type = data_type; if (!default_expression && had_default_expr) diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 3ea4898b011..c65a02e8f8d 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -48,6 +48,10 @@ struct AlterCommand return (name_with_dot == name_type.name.substr(0, name_without_dot.length() + 1) || name_without_dot == name_type.name); } + /// For MODIFY_COLUMN + /// find column from `columns` or throw exception + NamesAndTypesList::Iterator findColumn(NamesAndTypesList &columns) const; + void apply(ColumnsDescription & columns_description) const; AlterCommand() = default; diff --git a/dbms/src/Storages/DeltaMerge/Chunk.cpp b/dbms/src/Storages/DeltaMerge/Chunk.cpp index 43eea3105c9..31da65744d8 100644 --- a/dbms/src/Storages/DeltaMerge/Chunk.cpp +++ b/dbms/src/Storages/DeltaMerge/Chunk.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include @@ -141,40 +143,96 @@ void deserializeColumn(IColumn & column, const ColumnMeta & meta, const Page & p } void readChunkData(MutableColumns & columns, - const Chunk & chunk, const ColumnDefines & column_defines, + const Chunk & chunk, const PageReader & page_reader, size_t rows_offset, size_t rows_limit) { + assert(!chunk.isDeleteRange()); + std::unordered_map page_to_index; PageIds page_ids; page_ids.reserve(column_defines.size()); for (size_t index = 0; index < column_defines.size(); ++index) { - auto & define = column_defines[index]; - auto page_id = chunk.getColumn(define.id).page_id; - page_ids.push_back(page_id); - page_to_index[page_id] = index; + const auto & define = column_defines[index]; + if (chunk.hasColumn(define.id)) + { + // Read chunk's data from PageStorage later + auto page_id = chunk.getColumn(define.id).page_id; + page_ids.push_back(page_id); + page_to_index[page_id] = index; + } + else + { + // New column after ddl is not exist in chunk's meta, fill with default value + IColumn & col = *columns[index]; + + if (define.default_value.empty()) + { + ColumnPtr tmp_col = define.type->createColumnConstWithDefaultValue(rows_limit)->convertToFullColumnIfConst(); + col.insertRangeFrom(*tmp_col, 0, rows_limit); + } + else + { + // Read default value from `define.default_value` + MutableColumnPtr tmp_col = define.type->createColumn(); + ReadBufferFromMemory buff(define.default_value.c_str(), define.default_value.size()); + define.type->deserializeTextEscaped(*tmp_col, buff); + ColumnPtr tmp_full_col = tmp_col->replicate(IColumn::Offsets(1, rows_limit)); + col.insertRangeFrom(*tmp_full_col, 0, rows_limit); + } + } } PageHandler page_handler = [&](PageId page_id, const Page & page) { - size_t index = page_to_index[page_id]; + size_t index = page_to_index[page_id]; + IColumn & col = *columns[index]; + const ColumnDefine & read_define = column_defines[index]; + const ColumnMeta & disk_meta = chunk.getColumn(read_define.id); - ColumnDefine define = column_defines[index]; - ReadBufferFromMemory buf(page.data.begin(), page.data.size()); - const ColumnMeta & meta = chunk.getColumn(define.id); - IColumn & col = *columns[index]; + // define.type is current type at memory + // meta.type is the type at disk (maybe different from define.type) - if (rows_offset == 0) + if (read_define.type->equals(*disk_meta.type)) { - deserializeColumn(col, meta, page, rows_limit); + if (rows_offset == 0) + { + deserializeColumn(col, disk_meta, page, rows_limit); + } + else + { + MutableColumnPtr tmp_col = read_define.type->createColumn(); + deserializeColumn(*tmp_col, disk_meta, page, rows_offset + rows_limit); + col.insertRangeFrom(*tmp_col, rows_offset, rows_limit); + } } else { - auto tmp_col = define.type->createColumn(); - deserializeColumn(*tmp_col, meta, page, rows_offset + rows_limit); - col.insertRangeFrom(*tmp_col, rows_offset, rows_limit); +#ifndef NDEBUG + const auto && [first, last] = chunk.getHandleFirstLast(); + const String disk_col_str = "col{name:" + DB::toString(read_define.name) + ",id:" + DB::toString(disk_meta.col_id) + + ",type:" + disk_meta.type->getName() + "]"; + LOG_TRACE(&Poco::Logger::get("Chunk"), + "Reading chunk[" + DB::toString(first) + "-" + DB::toString(last) + "] " + disk_col_str + " as type " + + read_define.type->getName()); +#endif + + // sanity check + if (unlikely(!isSupportedDataTypeCast(disk_meta.type, read_define.type))) + { + throw Exception("Reading mismatch data type chunk. Cast from " + disk_meta.type->getName() + " to " + + read_define.type->getName() + " is NOT supported!", + ErrorCodes::NOT_IMPLEMENTED); + } + + // Read from disk according as chunk meta + MutableColumnPtr disk_col = disk_meta.type->createColumn(); + deserializeColumn(*disk_col, disk_meta, page, rows_offset + rows_limit); + + // Cast column's data from DataType in disk to what we need now + castColumnAccordingToColumnDefine(disk_meta.type, disk_col->getPtr(), read_define, col.getPtr(), rows_offset, rows_limit); } }; page_reader.read(page_ids, page_handler); @@ -196,23 +254,382 @@ Block readChunk(const Chunk & chunk, const ColumnDefines & read_column_defines, if (chunk.getRows()) { // Read from storage - readChunkData(columns, chunk, read_column_defines, page_reader, 0, chunk.getRows()); + readChunkData(columns, read_column_defines, chunk, page_reader, 0, chunk.getRows()); } Block res; for (size_t index = 0; index < read_column_defines.size(); ++index) { - ColumnDefine define = read_column_defines[index]; - ColumnWithTypeAndName col; - col.type = define.type; - col.name = define.name; - col.column_id = define.id; - col.column = std::move(columns[index]); - - res.insert(col); + const ColumnDefine & define = read_column_defines[index]; + ColumnWithTypeAndName col(std::move(columns[index]), define.type, define.name, define.id); + res.insert(std::move(col)); } return res; } +//========================================================================================== +// Functions for casting column data when disk data type mismatch with read data type. +//========================================================================================== + +namespace +{ + +/// some helper functions for casting column data type + +bool castNonNullNumericColumn(const DataTypePtr & disk_type_not_null_, + const ColumnPtr & disk_col_not_null, + const ColumnDefine & read_define, + const ColumnPtr & null_map, + MutableColumnPtr & memory_col_not_null, + size_t rows_offset, + size_t rows_limit); + + +template +void insertRangeFromWithNumericTypeCast(const ColumnPtr & from_col, // + const ColumnPtr & null_map, + const ColumnDefine & read_define, + MutableColumnPtr & to_col, + size_t rows_offset, + size_t rows_limit); + +} // namespace + +void castColumnAccordingToColumnDefine(const DataTypePtr & disk_type, + const ColumnPtr & disk_col, + const ColumnDefine & read_define, + MutableColumnPtr memory_col, + size_t rows_offset, + size_t rows_limit) +{ +#if 0 + // A simple but awful version using Field. + for (size_t i = 0; i < disk_col->size(); ++i) + { + Field f = (*disk_col)[i]; + if (f.getType() == Field::Types::Null) + memory_col->insertDefault(); + else + memory_col->insert(std::move(f)); + } +#else + const DataTypePtr & read_type = read_define.type; + + // Unwrap nullable(what) + ColumnPtr disk_col_not_null; + MutableColumnPtr memory_col_not_null; + ColumnPtr null_map; + DataTypePtr disk_type_not_null = disk_type; + DataTypePtr read_type_not_null = read_type; + if (disk_type->isNullable() && read_type->isNullable()) + { + // nullable -> nullable, copy null map + const auto & disk_nullable_col = typeid_cast(*disk_col); + const auto & disk_null_map = disk_nullable_col.getNullMapData(); + auto & memory_nullable_col = typeid_cast(*memory_col); + auto & memory_null_map = memory_nullable_col.getNullMapData(); + memory_null_map.insert(disk_null_map.begin(), disk_null_map.end()); + + disk_col_not_null = disk_nullable_col.getNestedColumnPtr(); + memory_col_not_null = memory_nullable_col.getNestedColumn().getPtr(); + + const auto * type_nullable = typeid_cast(disk_type.get()); + disk_type_not_null = type_nullable->getNestedType(); + type_nullable = typeid_cast(read_type.get()); + read_type_not_null = type_nullable->getNestedType(); + } + else if (!disk_type->isNullable() && read_type->isNullable()) + { + // not null -> nullable, set null map to all not null + auto & memory_nullable_col = typeid_cast(*memory_col); + auto & nullmap_data = memory_nullable_col.getNullMapData(); + nullmap_data.resize_fill(rows_offset + rows_limit, 0); + + disk_col_not_null = disk_col; + memory_col_not_null = memory_nullable_col.getNestedColumn().getPtr(); + + const auto * type_nullable = typeid_cast(read_type.get()); + read_type_not_null = type_nullable->getNestedType(); + } + else if (disk_type->isNullable() && !read_type->isNullable()) + { + // nullable -> not null, fill "NULL" values with default value later + const auto & disk_nullable_col = typeid_cast(*disk_col); + null_map = disk_nullable_col.getNullMapColumnPtr(); + disk_col_not_null = disk_nullable_col.getNestedColumnPtr(); + memory_col_not_null = std::move(memory_col); + + const auto * type_nullable = typeid_cast(disk_type.get()); + disk_type_not_null = type_nullable->getNestedType(); + } + else + { + // not null -> not null + disk_col_not_null = disk_col; + memory_col_not_null = std::move(memory_col); + } + + assert(memory_col_not_null != nullptr); + assert(disk_col_not_null != nullptr); + assert(read_type_not_null != nullptr); + assert(disk_type_not_null != nullptr); + + ColumnDefine read_define_not_null(read_define); + read_define_not_null.type = read_type_not_null; + if (disk_type_not_null->equals(*read_type_not_null)) + { + // just change from nullable -> not null / not null -> nullable + memory_col_not_null->insertRangeFrom(*disk_col_not_null, rows_offset, rows_limit); + + if (null_map) + { + /// We are applying cast from nullable to not null, scan to fill "NULL" with default value + + for (size_t i = 0; i < rows_limit; ++i) + { + if (unlikely(null_map->getInt(i) != 0)) + { + // `from_col[i]` is "NULL", fill `to_col[rows_offset + i]` with default value + // TiDB/MySQL don't support this, should not call here. + throw Exception("Reading mismatch data type chunk. Cast from " + disk_type->getName() + " to " + read_type->getName() + + " with \"NULL\" value is NOT supported!", + ErrorCodes::NOT_IMPLEMENTED); + } + } + } + } + else if (!castNonNullNumericColumn( + disk_type_not_null, disk_col_not_null, read_define_not_null, null_map, memory_col_not_null, rows_offset, rows_limit)) + { + throw Exception("Reading mismatch data type chunk. Cast and assign from " + disk_type->getName() + " to " + read_type->getName() + + " is NOT supported!", + ErrorCodes::NOT_IMPLEMENTED); + } +#endif +} + +namespace +{ +bool castNonNullNumericColumn(const DataTypePtr & disk_type_not_null_, + const ColumnPtr & disk_col_not_null, + const ColumnDefine & read_define, + const ColumnPtr & null_map, + MutableColumnPtr & memory_col_not_null, + size_t rows_offset, + size_t rows_limit) +{ + /// Caller should ensure that type is not nullable + assert(disk_type_not_null_ != nullptr); + assert(disk_col_not_null != nullptr); + assert(read_define.type != nullptr); + assert(memory_col_not_null != nullptr); + + const IDataType * disk_type_not_null = disk_type_not_null_.get(); + const IDataType * read_type_not_null = read_define.type.get(); + + /// Caller should ensure nullable is unwrapped + assert(!disk_type_not_null->isNullable()); + assert(!read_type_not_null->isNullable()); + + if (checkDataType(disk_type_not_null)) + { + using FromType = UInt32; + if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + } + else if (checkDataType(disk_type_not_null)) + { + using FromType = Int32; + if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + } + else if (checkDataType(disk_type_not_null)) + { + using FromType = UInt16; + if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + } + else if (checkDataType(disk_type_not_null)) + { + using FromType = Int16; + if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + } + else if (checkDataType(disk_type_not_null)) + { + using FromType = UInt8; + if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + } + else if (checkDataType(disk_type_not_null)) + { + using FromType = Int8; + if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + else if (checkDataType(read_type_not_null)) + { + insertRangeFromWithNumericTypeCast( + disk_col_not_null, null_map, read_define, memory_col_not_null, rows_offset, rows_limit); + return true; + } + } + + // else is not support + return false; +} + +template +void insertRangeFromWithNumericTypeCast(const ColumnPtr & from_col, // + const ColumnPtr & null_map, + const ColumnDefine & read_define, + MutableColumnPtr & to_col, + size_t rows_offset, + size_t rows_limit) +{ + // Caller should ensure that both from_col / to_col + // * is numeric + // * no nullable wrapper + // * both signed or unsigned + static_assert(std::is_integral_v); + static_assert(std::is_integral_v); + constexpr bool is_both_signed_or_unsigned = !(std::is_unsigned_v ^ std::is_unsigned_v); + static_assert(is_both_signed_or_unsigned); + assert(from_col != nullptr); + assert(to_col != nullptr); + assert(from_col->isNumeric()); + assert(to_col->isNumeric()); + assert(!from_col->isColumnNullable()); + assert(!to_col->isColumnNullable()); + assert(!from_col->isColumnConst()); + assert(!to_col->isColumnConst()); + + // Something like `insertRangeFrom(from_col, rows_offset, rows_limit)` with static_cast + const PaddedPODArray & from_array = toColumnVectorData(from_col); + PaddedPODArray * to_array_ptr = toMutableColumnVectorDataPtr(to_col); + to_array_ptr->reserve(rows_limit); + for (size_t i = 0; i < rows_limit; ++i) + { + (*to_array_ptr).emplace_back(static_cast(from_array[rows_offset + i])); + } + + if (unlikely(null_map)) + { + /// We are applying cast from nullable to not null, scan to fill "NULL" with default value + + TypeTo default_value = 0; // if read_define.default_value is empty, fill with 0 + if (!read_define.default_value.empty()) + { + // parse from text + ReadBufferFromMemory buff(read_define.default_value.c_str(), read_define.default_value.size()); + readIntTextUnsafe(default_value, buff); + } + + const size_t to_offset_before_inserted = to_array_ptr->size() - rows_limit; + + for (size_t i = 0; i < rows_limit; ++i) + { + const size_t to_offset = to_offset_before_inserted + i; + if (null_map->getInt(rows_offset + i) != 0) + { + // `from_col[rows_offset + i]` is "NULL", fill `to_col[x]` with default value + (*to_array_ptr)[to_offset] = static_cast(default_value); + } + } + } +} + + +} // namespace + } // namespace DM -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/Chunk.h b/dbms/src/Storages/DeltaMerge/Chunk.h index 0e0b70d963c..395210161a8 100644 --- a/dbms/src/Storages/DeltaMerge/Chunk.h +++ b/dbms/src/Storages/DeltaMerge/Chunk.h @@ -21,7 +21,6 @@ namespace DM static constexpr size_t CHUNK_SERIALIZE_BUFFER_SIZE = 65536; -// TODO: version des/ser struct ColumnMeta { ColId col_id; @@ -69,6 +68,8 @@ class Chunk return bytes; } + bool hasColumn(ColId col_id) const { return columns.count(col_id) > 0; } + const ColumnMeta & getColumn(ColId col_id) const { auto it = columns.find(col_id); @@ -114,9 +115,23 @@ Chunks deserializeChunks(ReadBuffer & buf); Chunk prepareChunkDataWrite(const DMContext & dm_context, const GenPageId & gen_data_page_id, WriteBatch & wb, const Block & block); +/** + * Read `chunk`'s columns from `storage` and append the `chunk`'s data range + * [`rows_offset`, `rows_offset`+`rows_limit`) to `columns`. + * + * Note that after ddl, the data type between `chunk.columns` and `column_defines` maybe different, + * we do a cast according to `column_defines` before append to `columns`. + * + * @param columns The columns to append data. + * @param column_defines The DataType, column-id of `columns`. + * @param chunk Info about chunk to read. e.g. PageId in `storage`, DataType for reading. + * @param page_reader Where the serialized data stored in. + * @param rows_offset + * @param rows_limit + */ void readChunkData(MutableColumns & columns, - const Chunk & chunk, const ColumnDefines & column_defines, + const Chunk & chunk, const PageReader & page_reader, size_t rows_offset, size_t rows_limit); @@ -124,6 +139,24 @@ void readChunkData(MutableColumns & columns, Block readChunk(const Chunk & chunk, const ColumnDefines & read_column_defines, const PageReader & page_reader); +/** + * Cast `disk_col` from `disk_type` according to `read_define`, and append data + * [`rows_offset`, `rows_offset`+`rows_limit`) to `memory_col` + * + * @param disk_type + * @param disk_col + * @param read_define + * @param memory_col + * @param rows_offset + * @param rows_limit + */ +void castColumnAccordingToColumnDefine(const DataTypePtr & disk_type, + const ColumnPtr & disk_col, + const ColumnDefine & read_define, + MutableColumnPtr memory_col, + size_t rows_offset, + size_t rows_limit); + } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/ChunkBlockInputStream.h b/dbms/src/Storages/DeltaMerge/ChunkBlockInputStream.h index f83c9310eb7..13bc1aedc3e 100644 --- a/dbms/src/Storages/DeltaMerge/ChunkBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/ChunkBlockInputStream.h @@ -8,6 +8,7 @@ namespace DB { namespace DM { +/// Read `chunks` as blocks according to `read_columns` class ChunkBlockInputStream final : public IBlockInputStream { public: @@ -17,20 +18,7 @@ class ChunkBlockInputStream final : public IBlockInputStream } String getName() const override { return "Chunk"; } - Block getHeader() const override - { - Block res; - for (const auto & c : read_columns) - { - ColumnWithTypeAndName col; - col.column = c.type->createColumn(); - col.type = c.type; - col.name = c.name; - col.column_id = c.id; - res.insert(col); - } - return res; - } + Block getHeader() const override { return toEmptyBlock(read_columns); } Block read() override { diff --git a/dbms/src/Storages/DeltaMerge/DMContext.h b/dbms/src/Storages/DeltaMerge/DMContext.h index e9a8289d6f2..e4065d07501 100644 --- a/dbms/src/Storages/DeltaMerge/DMContext.h +++ b/dbms/src/Storages/DeltaMerge/DMContext.h @@ -21,9 +21,10 @@ struct DMContext const Context & db_context; StoragePool & storage_pool; - const String & table_name; - const ColumnDefines & table_columns; - const ColumnDefine & table_handle_define; + // The schema snapshot + // We need a consistent snapshot of columns, copy ColumnsDefines + const ColumnDefines table_columns; + const ColumnDefine table_handle_define; const UInt64 min_version; diff --git a/dbms/src/Storages/DeltaMerge/DMDecoratorStreams.h b/dbms/src/Storages/DeltaMerge/DMDecoratorStreams.h index 3bafc1c5803..11a7268ab97 100644 --- a/dbms/src/Storages/DeltaMerge/DMDecoratorStreams.h +++ b/dbms/src/Storages/DeltaMerge/DMDecoratorStreams.h @@ -14,7 +14,7 @@ class DMColumnFilterBlockInputStream : public IProfilingBlockInputStream { public: DMColumnFilterBlockInputStream(const BlockInputStreamPtr & input, const ColumnDefines & columns_to_read_) - : columns_to_read(columns_to_read_), header(createHeader(columns_to_read)) + : columns_to_read(columns_to_read_), header(toEmptyBlock(columns_to_read)) { children.emplace_back(input); } diff --git a/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h b/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h index 3643f37f28b..169b372e792 100644 --- a/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h +++ b/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h @@ -20,7 +20,7 @@ class DMSegmentThreadInputStream : public IProfilingBlockInputStream : task_pool(task_pool_), stream_creator(stream_creator_), columns_to_read(columns_to_read_), - header(createHeader(columns_to_read)), + header(toEmptyBlock(columns_to_read)), handle_name(handle_name_), handle_real_type(handle_real_type_), context(context_), diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h b/dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h index d3619f73f6b..eab2f157cb6 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h @@ -13,6 +13,11 @@ #include #include +namespace TiDB +{ +struct TableInfo; +} // namespace TiDB + namespace DB { namespace DM @@ -58,11 +63,14 @@ using ColId = Int64; using ColIds = std::vector; using HandlePair = std::pair; +using OptionTableInfoConstRef = std::optional>; + struct ColumnDefine { ColId id; String name; DataTypePtr type; + String default_value; explicit ColumnDefine(ColId id_ = 0, String name_ = "", DataTypePtr type_ = nullptr): id(id_), name(std::move(name_)), type(std::move(type_)) {} }; @@ -83,12 +91,12 @@ static const ColId EXTRA_HANDLE_COLUMN_ID = -1; static const ColId VERSION_COLUMN_ID = -1024; // Prevent conflict with TiDB. static const ColId TAG_COLUMN_ID = -1025; -static DataTypePtr EXTRA_HANDLE_COLUMN_TYPE = DataTypeFactory::instance().get("Int64"); -static DataTypePtr VERSION_COLUMN_TYPE = DataTypeFactory::instance().get("UInt64"); -static DataTypePtr TAG_COLUMN_TYPE = DataTypeFactory::instance().get("UInt8"); +static const DataTypePtr EXTRA_HANDLE_COLUMN_TYPE = DataTypeFactory::instance().get("Int64"); +static const DataTypePtr VERSION_COLUMN_TYPE = DataTypeFactory::instance().get("UInt64"); +static const DataTypePtr TAG_COLUMN_TYPE = DataTypeFactory::instance().get("UInt8"); -static ColumnDefine VERSION_COLUMN_DEFINE{VERSION_COLUMN_ID, VERSION_COLUMN_NAME, VERSION_COLUMN_TYPE}; -static ColumnDefine TAG_COLUMN_DEFINE{TAG_COLUMN_ID, TAG_COLUMN_NAME, TAG_COLUMN_TYPE}; +static const ColumnDefine VERSION_COLUMN_DEFINE{VERSION_COLUMN_ID, VERSION_COLUMN_NAME, VERSION_COLUMN_TYPE}; +static const ColumnDefine TAG_COLUMN_DEFINE{TAG_COLUMN_ID, TAG_COLUMN_NAME, TAG_COLUMN_TYPE}; static constexpr UInt64 MIN_UINT64 = std::numeric_limits::min(); static constexpr UInt64 MAX_UINT64 = std::numeric_limits::max(); diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h index 249296b54e9..7ec652df85d 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h @@ -1,3 +1,5 @@ +#include + #pragma once #include @@ -11,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -62,16 +65,6 @@ inline const ColumnWithTypeAndName & getByColumnId(const Block & block, ColId co throw Exception("Column with column id " + DB::toString(col_id) + " not found"); } -inline ColumnWithTypeAndName createColumnWithTypeAndName(const ColumnPtr & column, const DataTypePtr & type, const String & name, ColId id) -{ - ColumnWithTypeAndName c; - c.column = column; - c.type = type; - c.name = name; - c.column_id = id; - return c; -} - inline SortDescription getPkSort(const ColumnDefine & handle) { SortDescription sort; @@ -105,6 +98,13 @@ inline PaddedPODArray const * toColumnVectorDataPtr(const ColumnPtr & column) return &c.getData(); } +template +inline PaddedPODArray * toMutableColumnVectorDataPtr(const MutableColumnPtr & column) +{ + ColumnVector & c = typeid_cast &>(*(column)); + return &c.getData(); +} + template inline const PaddedPODArray & toColumnVectorData(const ColumnPtr & column) { @@ -124,27 +124,23 @@ inline PaddedPODArray const * getColumnVectorDataPtr(const Block & block, siz return toColumnVectorDataPtr(block.getByPosition(pos).column); } -inline void addColumn(Block & block, ColId col_id, String col_name, const DataTypePtr & col_type, const ColumnPtr & col) +inline void addColumnToBlock(Block & block, ColId col_id, const String &col_name, const DataTypePtr & col_type, const ColumnPtr & col) { - ColumnWithTypeAndName column; - column.column_id = col_id; - column.name = col_name; - column.type = col_type; - column.column = col; - block.insert(column); + ColumnWithTypeAndName column(col, col_type, col_name, col_id); + block.insert(std::move(column)); } inline Block toEmptyBlock(const ColumnDefines & columns) { Block block; for (auto & c : columns) - addColumn(block, c.id, c.name, c.type, c.type->createColumn()); + addColumnToBlock(block, c.id, c.name, c.type, c.type->createColumn()); return block; } inline void convertColumn(Block & block, size_t pos, const DataTypePtr & to_type, const Context & context) { - auto * to_type_ptr = &(*to_type); + const IDataType * to_type_ptr = to_type.get(); if (checkDataType(to_type_ptr)) FunctionToUInt8::create(context)->execute(block, {pos}, pos); @@ -250,21 +246,6 @@ inline size_t blockBytes(const Block & block) return bytes; } -inline Block createHeader(const ColumnDefines & col_defines) -{ - Block header; - for (auto & d : col_defines) - { - ColumnWithTypeAndName col; - col.name = d.name; - col.type = d.type; - col.column_id = d.id; - col.column = d.type->createColumn(); - header.insert(std::move(col)); - } - return header; -} - template inline String rangeToString(T start, T end) { @@ -285,5 +266,17 @@ inline String rangeToString(const Range & range) return rangeToString(range.start, range.end); } +/// find column from `table_info.columns` or throw exception +inline std::vector::const_iterator findColumnInfoInTableInfo(const TiDB::TableInfo & table_info, const String & column_name) +{ + auto iter = std::find_if(table_info.columns.begin(), table_info.columns.end(), [&](const TiDB::ColumnInfo & column_info) { + return column_info.name == column_name; + }); + if (iter == table_info.columns.end()) + throw Exception("Invalid column name. Cannot find column " + column_name + " in `table_info`", + ErrorCodes::ILLEGAL_COLUMN); + return iter; +} + } // namespace DM } // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 8ce3f06a612..5b92b5c8be0 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -60,6 +63,8 @@ DeltaMergeStore::DeltaMergeStore(Context & db_context, if (col.name != table_handle_define.name && col.name != VERSION_COLUMN_NAME && col.name != TAG_COLUMN_NAME) table_columns.emplace_back(col); } + // update block header + header = genHeaderBlock(table_columns, table_handle_define, table_handle_real_type); DMContext dm_context = newDMContext(db_context, db_context.getSettingsRef()); if (!storage_pool.maxMetaPageId()) @@ -526,5 +531,142 @@ void DeltaMergeStore::check(const Context & db_context, const DB::Settings & db_ } } +Block DeltaMergeStore::genHeaderBlock(const ColumnDefines & raw_columns, + const ColumnDefine & handle_define, + const DataTypePtr & handle_real_type) +{ + ColumnDefines real_cols = raw_columns; + for (auto && col : real_cols) + { + if (col.id == handle_define.id) + { + if (handle_real_type) + col.type = handle_real_type; + } + } + return toEmptyBlock(real_cols); +} + +void DeltaMergeStore::applyAlters(const AlterCommands & commands, + const OptionTableInfoConstRef table_info, + ColumnID & max_column_id_used, + const Context & context) +{ + /// Force flush on store, so that no chunks with different data type in memory + // TODO maybe some ddl do not need to flush cache? eg. just change default value + this->flushCache(context); + + for (const auto & command : commands) + { + applyAlter(command, table_info, max_column_id_used); + } + + // Don't forget to update header + header = genHeaderBlock(table_columns, table_handle_define, table_handle_real_type); +} + +namespace +{ +inline void setColumnDefineDefaultValue(const AlterCommand & command, ColumnDefine & define) +{ + if (command.default_expression) + { + // a cast function + // change column_define.default_value + + if (auto default_literal = typeid_cast(command.default_expression.get()); + default_literal && default_literal->value.getType() == Field::Types::String) + { + const auto default_val = safeGet(default_literal->value); + define.default_value = default_val; + } + else if (auto default_cast_expr = typeid_cast(command.default_expression.get()); + default_cast_expr && default_cast_expr->name == "CAST" /* ParserCastExpression::name */) + { + // eg. CAST('1.234' AS Float32); CAST(999 AS Int32) + if (default_cast_expr->arguments->children.size() != 2) + { + throw Exception("Unknown CAST expression in default expr", ErrorCodes::NOT_IMPLEMENTED); + } + + auto default_literal_in_cast = typeid_cast(default_cast_expr->arguments->children[0].get()); + if (default_literal_in_cast && default_literal_in_cast->value.getType() == Field::Types::String) + { + const auto default_value = safeGet(default_literal_in_cast->value); + define.default_value = default_value; + } + else + { + throw Exception("First argument in CAST expression must be a string", ErrorCodes::NOT_IMPLEMENTED); + } + } + else + { + throw Exception("Default value must be a string or CAST('...' AS WhatType)", ErrorCodes::BAD_ARGUMENTS); + } + } +} +} // namespace + +void DeltaMergeStore::applyAlter(const AlterCommand & command, const OptionTableInfoConstRef table_info, ColumnID & max_column_id_used) +{ + if (command.type == AlterCommand::MODIFY_COLUMN) + { + // find column define and then apply modify + bool exist_column = false; + for (auto && column_define : table_columns) + { + if (column_define.name == command.column_name) + { + exist_column = true; + column_define.type = command.data_type; + setColumnDefineDefaultValue(command, column_define); + break; + } + } + if (!exist_column) + { + throw Exception(String("Alter column: ") + command.column_name + " is not exists.", ErrorCodes::LOGICAL_ERROR); + } + } + else if (command.type == AlterCommand::ADD_COLUMN) + { + // we don't care about `after_column` in `table_columns` + + /// If TableInfo from TiDB is not empty, we get column id from TiDB + ColumnDefine define(0, command.column_name, command.data_type); + if (table_info) + { + auto tidb_col_iter = findColumnInfoInTableInfo(table_info->get(), command.column_name); + define.id = tidb_col_iter->id; + } + else + { + define.id = max_column_id_used++; + } + assert(define.id != 0); + setColumnDefineDefaultValue(command, define); + table_columns.emplace_back(std::move(define)); + } + else if (command.type == AlterCommand::DROP_COLUMN) + { + // identify column by name in `AlterCommand`. TODO we may change to identify column by column-id later + table_columns.erase(std::remove_if(table_columns.begin(), + table_columns.end(), + [&](const ColumnDefine & c) { return c.name == command.column_name; }), + table_columns.end()); + } +} + +void DeltaMergeStore::flushCache(const Context & db_context) +{ + DMContext dm_context = newDMContext(db_context, db_context.getSettingsRef()); + for (auto && [_handle, segment] : segments) + { + (void)_handle; + segment->flushCache(dm_context); + } +} + } // namespace DM } // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 865168e18ca..78d3f57cf57 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -2,11 +2,13 @@ #include #include +#include #include #include #include #include #include +#include namespace DB { @@ -61,10 +63,23 @@ class DeltaMergeStore UInt64 max_version, size_t expected_block_size); + /// Force flush all data to disk. + /// Now is called by `StorageDeltaMerge`'s `alter` / `rename` + /// and no other threads is able to read / write at the same time. + void flushCache(const Context & context); + + /// Apply `commands` on `table_columns` + void applyAlters(const AlterCommands & commands, // + const OptionTableInfoConstRef table_info, + ColumnID & max_column_id_used, + const Context & context); + void setMinDataVersion(UInt64 version) { min_version = version; } - const ColumnDefines & getTableColumns() { return table_columns; } - const ColumnDefine & getHandle() { return table_handle_define; } + const ColumnDefines & getTableColumns() const { return table_columns; } + const ColumnDefine & getHandle() const { return table_handle_define; } + const Block & getHeader() const { return header; } + const Settings & getSettings() const { return settings; } void check(const Context & db_context, const DB::Settings & db_settings); @@ -73,7 +88,6 @@ class DeltaMergeStore { return DMContext{.db_context = db_context, .storage_pool = storage_pool, - .table_name = table_name, .table_columns = table_columns, .table_handle_define = table_handle_define, .min_version = min_version, @@ -91,15 +105,25 @@ class DeltaMergeStore void split(DMContext & dm_context, const SegmentPtr & segment); void merge(DMContext & dm_context, const SegmentPtr & left, const SegmentPtr & right); + void applyAlter(const AlterCommand & command, // + const OptionTableInfoConstRef table_info, + ColumnID & max_column_id_used); + + static Block genHeaderBlock(const ColumnDefines & raw_columns, // + const ColumnDefine & handle_define, + const DataTypePtr & handle_real_type); + private: using SegmentSortedMap = std::map; - String path; - StoragePool storage_pool; + String path; + StoragePool storage_pool; + String table_name; ColumnDefines table_columns; ColumnDefine table_handle_define; DataTypePtr table_handle_real_type; + Block header; // an empty block header BackgroundProcessingPool & background_pool; BackgroundProcessingPool::TaskHandle gc_handle; diff --git a/dbms/src/Storages/DeltaMerge/DiskValueSpace.cpp b/dbms/src/Storages/DeltaMerge/DiskValueSpace.cpp index 29109d133ef..1edc9862469 100644 --- a/dbms/src/Storages/DeltaMerge/DiskValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/DiskValueSpace.cpp @@ -181,13 +181,8 @@ AppendTaskPtr DiskValueSpace::createAppendTask(const OpContext & context, Append if (!is_delete) new_col->insertRangeFrom(*append_block.getByName(col_define.name).column, 0, append_rows); - ColumnWithTypeAndName col; - col.column = std::move(new_col); - col.name = col_define.name; - col.type = col_define.type; - col.column_id = col_define.id; - - compacted_block.insert(col); + ColumnWithTypeAndName col(std::move(new_col), col_define.type, col_define.name, col_define.id); + compacted_block.insert(std::move(col)); } } @@ -380,7 +375,7 @@ Block DiskValueSpace::read(const ColumnDefines & read_column_defines, if (rows_end_in_chunk > rows_start_in_chunk) { readChunkData( - columns, cur_chunk, read_column_defines, page_reader, rows_start_in_chunk, rows_end_in_chunk - rows_start_in_chunk); + columns, read_column_defines, cur_chunk, page_reader, rows_start_in_chunk, rows_end_in_chunk - rows_start_in_chunk); already_read_rows += rows_end_in_chunk - rows_start_in_chunk; } @@ -391,6 +386,8 @@ Block DiskValueSpace::read(const ColumnDefines & read_column_defines, if (already_read_rows < rows_limit) { + // TODO We do flush each time in `StorageDeltaMerge::alterImpl`, so that there is only the data with newest schema in cache. We ignore either new inserted col nor col type changed in cache for now. + // chunk_index could be larger than chunk_cache_start. size_t cache_rows_offset = 0; for (size_t i = chunk_cache_start; i < chunk_index; ++i) @@ -398,11 +395,12 @@ Block DiskValueSpace::read(const ColumnDefines & read_column_defines, for (size_t index = 0; index < read_column_defines.size(); ++index) { - ColumnDefine define = read_column_defines[index]; - auto & cache_col = cache.at(define.id); + const ColumnDefine & define = read_column_defines[index]; + auto & cache_col = cache.at(define.id); // TODO new inserted col'id don't exist in cache. size_t rows_offset_in_chunk = chunk_index == start_chunk_index ? rows_start_in_start_chunk : 0; + // TODO columns[index].type maybe not consisted with cache_col after ddl. columns[index]->insertRangeFrom(*cache_col, cache_rows_offset + rows_offset_in_chunk, rows_limit - already_read_rows); } } @@ -411,13 +409,8 @@ Block DiskValueSpace::read(const ColumnDefines & read_column_defines, for (size_t index = 0; index < read_column_defines.size(); ++index) { const ColumnDefine & define = read_column_defines[index]; - ColumnWithTypeAndName col; - col.type = define.type; - col.name = define.name; - col.column_id = define.id; - col.column = std::move(columns[index]); - - res.insert(col); + ColumnWithTypeAndName col(std::move(columns[index]), define.type, define.name, define.id); + res.insert(std::move(col)); } return res; } @@ -442,11 +435,13 @@ Block DiskValueSpace::read(const ColumnDefines & read_column_defines, const Page if (chunk_index < chunk_cache_start) { // Read from storage - readChunkData(columns, chunk, read_column_defines, page_reader, 0, chunk.getRows()); + readChunkData(columns, read_column_defines, chunk, page_reader, 0, chunk.getRows()); } else { // Read from cache + + // TODO We do flush each time in `StorageDeltaMerge::alterImpl`, so that there is only the data with newest schema in cache. We ignore either new inserted col nor col type changed in cache for now. size_t cache_rows_offset = 0; for (size_t i = chunk_cache_start; i < chunk_index; ++i) cache_rows_offset += chunks[i].getRows(); @@ -464,13 +459,8 @@ Block DiskValueSpace::read(const ColumnDefines & read_column_defines, const Page for (size_t index = 0; index < read_column_defines.size(); ++index) { const ColumnDefine & define = read_column_defines[index]; - ColumnWithTypeAndName col; - col.type = define.type; - col.name = define.name; - col.column_id = define.id; - col.column = std::move(columns[index]); - - res.insert(col); + ColumnWithTypeAndName col(std::move(columns[index]), define.type, define.name, define.id); + res.insert(std::move(col)); } return res; } @@ -515,14 +505,15 @@ BlockOrDeletes DiskValueSpace::getMergeBlocks(const ColumnDefine & handle, bool DiskValueSpace::tryFlushCache(const OpContext & context, bool force) { - if (!cache_chunks) + if (cache_chunks == 0) return false; - const size_t cache_rows = cacheRows(); - // A chunk can only contains one delete range. + // If last chunk is a delete range, we should flush cache. HandleRange delete_range = chunks.back().isDeleteRange() ? chunks.back().getDeleteRange() : HandleRange::newNone(); if (!delete_range.none()) force = true; + + const size_t cache_rows = cacheRows(); if (!force && cache_rows < context.dm_context.delta_cache_limit_rows && cacheBytes() < context.dm_context.delta_cache_limit_bytes) return false; @@ -581,11 +572,7 @@ bool DiskValueSpace::doFlushCache(const OpContext & context) // Use the cache. for (const auto & col_define : context.dm_context.table_columns) { - ColumnWithTypeAndName col; - col.column = cache.at(col_define.id)->cloneResized(cache_rows); - col.name = col_define.name; - col.type = col_define.type; - col.column_id = col_define.id; + ColumnWithTypeAndName col(cache.at(col_define.id)->cloneResized(cache_rows), col_define.type, col_define.name, col_define.id); compacted.insert(col); if (unlikely(col.column->size() != cache_rows)) diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 4f4d7fb05a0..869d9aca42b 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -369,6 +369,12 @@ SegmentPtr Segment::flush(DMContext & dm_context) return new_me; } +void Segment::flushCache(DMContext &dm_context) +{ + std::unique_lock lock(read_write_mutex); + delta->tryFlushCache(OpContext::createForLogStorage(dm_context), /* force= */ true); +} + Segment::Segment(UInt64 epoch_, const HandleRange & range_, PageId segment_id_, PageId next_segment_id_, PageId delta_id, PageId stable_id) : epoch(epoch_), range(range_), diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index abe29be53d2..f44091ff7f5 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -118,6 +118,9 @@ class Segment : private boost::noncopyable /// Flush delta into stable. i.e. delta merge. SegmentPtr flush(DMContext & dm_context); + /// Flush delta's cached chunks. + void flushCache(DMContext & dm_context); + size_t getEstimatedRows(); size_t getEstimatedBytes(); diff --git a/dbms/src/Storages/DeltaMerge/registerStorageDeltaMerge.cpp b/dbms/src/Storages/DeltaMerge/registerStorageDeltaMerge.cpp new file mode 100644 index 00000000000..9e5da382533 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/registerStorageDeltaMerge.cpp @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +static ASTPtr extractKeyExpressionList(IAST & node) +{ + const ASTFunction * expr_func = typeid_cast(&node); + + if (expr_func && expr_func->name == "tuple") + { + /// Primary key is specified in tuple. + return expr_func->children.at(0); + } + else + { + /// Primary key consists of one column. + auto res = std::make_shared(); + res->children.push_back(node.ptr()); + return res; + } +} + +static String getDeltaMergeVerboseHelp() +{ + String help = R"( + +DeltaMerge requires: +- primary key +- an extra table info parameter in JSON format +- in most cases, it should be created implicitly through raft rather than explicitly + +Examples of creating a DeltaMerge table: +- Create Table ... engine = DeltaMerge((CounterID, EventDate)) # JSON format table info is set to empty string +- Create Table ... engine = DeltaMerge((CounterID, EventDate), '{JSON format table info}') +)"; + return help; +} + +void registerStorageDeltaMerge(StorageFactory & factory) +{ + factory.registerStorage("DeltaMerge", [](const StorageFactory::Arguments & args) { + if (args.engine_args.size() > 2 || args.engine_args.empty()) + throw Exception(getDeltaMergeVerboseHelp(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTPtr primary_expr_list = extractKeyExpressionList(*args.engine_args[0]); + + TiDB::TableInfo info; + // Note: if `table_info_json` is not empty, `table_info` store a ref to `info` + std::optional> table_info = std::nullopt; + if (args.engine_args.size() == 2) + { + auto ast = typeid_cast(args.engine_args[1].get()); + if (ast && ast->value.getType() == Field::Types::String) + { + const auto table_info_json = safeGet(ast->value); + if (!table_info_json.empty()) + { + info.deserialize(table_info_json); + if (unlikely(info.columns.empty())) + throw Exception("Engine DeltaMerge table info is invalid. # of columns = 0", ErrorCodes::BAD_ARGUMENTS); + table_info = info; + } + } + else + throw Exception("Engine DeltaMerge table info must be a string" + getDeltaMergeVerboseHelp(), ErrorCodes::BAD_ARGUMENTS); + } + return StorageDeltaMerge::create(args.data_path, args.table_name, table_info, args.columns, primary_expr_list, args.context); + }); +} + +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/CMakeLists.txt b/dbms/src/Storages/DeltaMerge/tests/CMakeLists.txt index ed325652e15..fc30bf70fd3 100644 --- a/dbms/src/Storages/DeltaMerge/tests/CMakeLists.txt +++ b/dbms/src/Storages/DeltaMerge/tests/CMakeLists.txt @@ -7,9 +7,9 @@ macro(grep_gtest_sources BASE_DIR DST_VAR) endmacro() # attach all dm gtest sources grep_gtest_sources(${ClickHouse_SOURCE_DIR}/dbms/src/Storages/DeltaMerge/tests dm_gtest_sources) -add_executable(unit_tests_dm EXCLUDE_FROM_ALL ${dm_gtest_sources} dm_basic_include.h) -target_link_libraries(unit_tests_dm gtest_main dbms clickhouse_functions) -add_check(unit_tests_dm) +add_executable(gtests_dm EXCLUDE_FROM_ALL ${dm_gtest_sources} dm_basic_include.h) +target_link_libraries(gtests_dm gtest_main dbms clickhouse_functions) +add_check(gtests_dm) # add unit test by hand add_executable(dm_test_storage_delta_merge EXCLUDE_FROM_ALL gtest_dm_storage_delta_merge.cpp) diff --git a/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h b/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h index 2386b516361..ca54c60e4f1 100644 --- a/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h +++ b/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h @@ -19,11 +19,28 @@ namespace tests class DMTestEnv { public: - static Context getContext() + static Context getContext(const ::DB::Settings &settings = DB::Settings()) { - return ::DB::tests::TiFlashTestEnv::getContext(); + return ::DB::tests::TiFlashTestEnv::getContext(settings); } + static ColumnDefines getDefaultColumns() + { + ColumnDefines columns; + columns.emplace_back(ColumnDefine(1, "pk", std::make_shared())); + columns.emplace_back(VERSION_COLUMN_DEFINE); + columns.emplace_back(TAG_COLUMN_DEFINE); + return columns; + } + + /** + * Create a simple block with 3 columns: + * * `pk` - Int64 / `version` / `tag` + * @param beg `pk`'s value begin + * @param end `pk`'s value end (not included) + * @param reversed increasing/decreasing insert `pk`'s value + * @return + */ static Block prepareSimpleWriteBlock(size_t beg, size_t end, bool reversed) { Block block; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_chunk.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_chunk.cpp index 1df78e14076..221c96b2811 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_chunk.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_chunk.cpp @@ -1,8 +1,8 @@ #include -#include -#include #include +#include +#include namespace DB { @@ -14,16 +14,13 @@ namespace tests TEST(Chunk_test, Insert) { const UInt32 num_rows = 1024; - Chunk chunk; + Chunk chunk; - ColumnMeta u64_meta{ - 1, 2, num_rows, 4, - std::make_shared() - }; + ColumnMeta u64_meta{1, 2, num_rows, 4, std::make_shared()}; chunk.insert(u64_meta); { - const ColumnMeta &got = chunk.getColumn(u64_meta.col_id); + const ColumnMeta & got = chunk.getColumn(u64_meta.col_id); EXPECT_EQ(got.col_id, u64_meta.col_id); EXPECT_EQ(got.rows, u64_meta.rows); EXPECT_EQ(got.bytes, u64_meta.bytes); @@ -31,14 +28,11 @@ TEST(Chunk_test, Insert) EXPECT_TRUE(got.type->equals(*u64_meta.type)); } - ColumnMeta string_meta{ - 2, 3, num_rows, 5, - std::make_shared() - }; + ColumnMeta string_meta{2, 3, num_rows, 5, std::make_shared()}; chunk.insert(string_meta); { - const ColumnMeta &got = chunk.getColumn(string_meta.col_id); + const ColumnMeta & got = chunk.getColumn(string_meta.col_id); EXPECT_EQ(got.col_id, string_meta.col_id); EXPECT_EQ(got.rows, string_meta.rows); EXPECT_EQ(got.bytes, string_meta.bytes); @@ -50,31 +44,25 @@ TEST(Chunk_test, Insert) TEST(Chunk_test, Seri) { const UInt32 num_rows = 1024; - Chunk chunk; + Chunk chunk; - ColumnMeta u64_meta{ - 1, 2, num_rows, 4, - std::make_shared() - }; + ColumnMeta u64_meta{1, 2, num_rows, 4, std::make_shared()}; chunk.insert(u64_meta); - ColumnMeta string_meta{ - 2, 3, num_rows, 5, - std::make_shared() - }; + ColumnMeta string_meta{2, 3, num_rows, 5, std::make_shared()}; chunk.insert(string_meta); EXPECT_FALSE(chunk.isDeleteRange()); MemoryWriteBuffer wbuf(0, 1024); chunk.serialize(wbuf); - auto buf = wbuf.buffer(); + auto buf = wbuf.buffer(); ReadBufferFromMemory rbuf(buf.begin(), buf.size()); - Chunk deseri_chunk = Chunk::deserialize(rbuf); + Chunk deseri_chunk = Chunk::deserialize(rbuf); EXPECT_EQ(deseri_chunk.getRows(), chunk.getRows()); EXPECT_FALSE(deseri_chunk.isDeleteRange()); // check ColumnMeta in deseri_chunk { - const ColumnMeta &got = deseri_chunk.getColumn(u64_meta.col_id); + const ColumnMeta & got = deseri_chunk.getColumn(u64_meta.col_id); EXPECT_EQ(got.col_id, u64_meta.col_id); EXPECT_EQ(got.rows, u64_meta.rows); EXPECT_EQ(got.bytes, u64_meta.bytes); @@ -82,7 +70,7 @@ TEST(Chunk_test, Seri) EXPECT_TRUE(got.type->equals(*u64_meta.type)); } { - const ColumnMeta &got = deseri_chunk.getColumn(string_meta.col_id); + const ColumnMeta & got = deseri_chunk.getColumn(string_meta.col_id); EXPECT_EQ(got.col_id, string_meta.col_id); EXPECT_EQ(got.rows, string_meta.rows); EXPECT_EQ(got.bytes, string_meta.bytes); @@ -94,14 +82,14 @@ TEST(Chunk_test, Seri) TEST(DeleteRange_test, Seri) { HandleRange range{20, 999}; - Chunk chunk(range); + Chunk chunk(range); EXPECT_TRUE(chunk.isDeleteRange()); MemoryWriteBuffer wbuf(0, 1024); chunk.serialize(wbuf); - auto buf = wbuf.buffer(); + auto buf = wbuf.buffer(); ReadBufferFromMemory rbuf(buf.begin(), buf.size()); - Chunk deseri_chunk = Chunk::deserialize(rbuf); + Chunk deseri_chunk = Chunk::deserialize(rbuf); EXPECT_TRUE(deseri_chunk.isDeleteRange()); const HandleRange deseri_range = deseri_chunk.getDeleteRange(); @@ -109,6 +97,188 @@ TEST(DeleteRange_test, Seri) EXPECT_EQ(deseri_range.end, range.end); } +namespace +{ +DataTypePtr typeFromString(const String & str) +{ + auto & data_type_factory = DataTypeFactory::instance(); + return data_type_factory.get(str); +} +} // namespace + +TEST(ChunkColumnCast_test, CastNumeric) +{ + { + const Strings to_types = {"UInt16", "UInt32", "UInt64"}; + + DataTypePtr disk_data_type = typeFromString("UInt8"); + MutableColumnPtr disk_col = disk_data_type->createColumn(); + disk_col->insert(Field(UInt64(15))); + disk_col->insert(Field(UInt64(255))); + + for (const String & to_type : to_types) + { + DataTypePtr read_data_type = typeFromString(to_type); + ColumnDefine read_define(0, "c", read_data_type); + MutableColumnPtr memory_column = read_data_type->createColumn(); + memory_column->reserve(2); + + castColumnAccordingToColumnDefine(disk_data_type, disk_col->getPtr(), read_define, memory_column->getPtr(), 0, 2); + + UInt64 val1 = memory_column->getUInt(0); + ASSERT_EQ(val1, 15UL); + UInt64 val2 = memory_column->getUInt(1); + ASSERT_EQ(val2, 255UL); + } + } + + { + const Strings to_types = {"Int16", "Int32", "Int64"}; + + DataTypePtr disk_data_type = typeFromString("Int8"); + MutableColumnPtr disk_col = disk_data_type->createColumn(); + disk_col->insert(Field(Int64(127))); + disk_col->insert(Field(Int64(-1))); + + for (const String & to_type : to_types) + { + DataTypePtr read_data_type = typeFromString(to_type); + ColumnDefine read_define(0, "c", read_data_type); + MutableColumnPtr memory_column = read_data_type->createColumn(); + memory_column->reserve(2); + + castColumnAccordingToColumnDefine(disk_data_type, disk_col->getPtr(), read_define, memory_column->getPtr(), 0, 2); + + Int64 val1 = memory_column->getInt(0); + ASSERT_EQ(val1, 127L); + Int64 val2 = memory_column->getInt(1); + ASSERT_EQ(val2, -1L); + } + } +} + +TEST(ChunkColumnCast_test, CastNullableToNotNull) +{ + const Strings to_types = {"Int16", "Int32", "Int64"}; + + DataTypePtr disk_data_type = typeFromString("Nullable(Int8)"); + MutableColumnPtr disk_col = disk_data_type->createColumn(); + disk_col->insert(Field()); // a "NULL" value + disk_col->insert(Field(Int64(127))); + disk_col->insert(Field(Int64(-1))); + + for (const String & to_type : to_types) + { + DataTypePtr read_data_type = typeFromString(to_type); + ColumnDefine read_define(0, "c", read_data_type); + MutableColumnPtr memory_column = read_data_type->createColumn(); + memory_column->reserve(3); + + castColumnAccordingToColumnDefine(disk_data_type, disk_col->getPtr(), read_define, memory_column->getPtr(), 0, 3); + + Int64 val1 = memory_column->getInt(0); + ASSERT_EQ(val1, 0); // "NULL" value is cast to 0 + Int64 val2 = memory_column->getInt(1); + ASSERT_EQ(val2, 127L); + Int64 val3 = memory_column->getUInt(2); + ASSERT_EQ(val3, -1L); + } +} + +TEST(ChunkColumnCast_test, DISABLED_CastNullableToNotNullWithNonZeroDefaultValue) +{ + const Strings to_types = {"Int16", "Int32", "Int64"}; + + DataTypePtr disk_data_type = typeFromString("Nullable(Int8)"); + MutableColumnPtr disk_col = disk_data_type->createColumn(); + disk_col->insert(Field()); // a "NULL" value + disk_col->insert(Field(Int64(127))); + disk_col->insert(Field(Int64(-1))); + + for (const String & to_type : to_types) + { + DataTypePtr read_data_type = typeFromString(to_type); + ColumnDefine read_define(0, "c", read_data_type); + read_define.default_value = "5"; + MutableColumnPtr memory_column = read_data_type->createColumn(); + memory_column->reserve(3); + + castColumnAccordingToColumnDefine(disk_data_type, disk_col->getPtr(), read_define, memory_column->getPtr(), 0, 3); + + Int64 val1 = memory_column->getInt(0); + ASSERT_EQ(val1, 5); // "NULL" value is cast to default value (5) + Int64 val2 = memory_column->getInt(1); + ASSERT_EQ(val2, 127L); + Int64 val3 = memory_column->getUInt(2); + ASSERT_EQ(val3, -1L); + } +} + +TEST(ChunkColumnCast_test, CastNullableToNullable) +{ + const Strings to_types = {"Nullable(Int16)", "Nullable(Int32)", "Nullable(Int64)"}; + + DataTypePtr disk_data_type = typeFromString("Nullable(Int8)"); + MutableColumnPtr disk_col = disk_data_type->createColumn(); + disk_col->insert(Field()); // a "NULL" value + disk_col->insert(Field(Int64(127))); + disk_col->insert(Field(Int64(-1))); + + for (const String & to_type : to_types) + { + DataTypePtr read_data_type = typeFromString(to_type); + ColumnDefine read_define(0, "c", read_data_type); + MutableColumnPtr memory_column = read_data_type->createColumn(); + memory_column->reserve(3); + + castColumnAccordingToColumnDefine(disk_data_type, disk_col->getPtr(), read_define, memory_column->getPtr(), 0, 3); + + ASSERT_TRUE(memory_column->isNullAt(0)); + Field f = (*memory_column)[0]; + ASSERT_TRUE(f.isNull()); + + ASSERT_FALSE(memory_column->isNullAt(1)); + f = (*memory_column)[1]; + ASSERT_EQ(f.getType(), Field::Types::Int64); + ASSERT_EQ(f.get(), 127L); + + ASSERT_FALSE(memory_column->isNullAt(2)); + f = (*memory_column)[2]; + ASSERT_EQ(f.getType(), Field::Types::Int64); + ASSERT_EQ(f.get(), -1L); + } +} + +TEST(ChunkColumnCast_test, CastNotNullToNullable) +{ + const Strings to_types = {"Nullable(Int16)", "Nullable(Int32)", "Nullable(Int64)"}; + + DataTypePtr disk_data_type = typeFromString("Int8"); + MutableColumnPtr disk_col = disk_data_type->createColumn(); + disk_col->insert(Field(Int64(127))); + disk_col->insert(Field(Int64(-1))); + + for (const String & to_type : to_types) + { + DataTypePtr read_data_type = typeFromString(to_type); + ColumnDefine read_define(0, "c", read_data_type); + MutableColumnPtr memory_column = read_data_type->createColumn(); + memory_column->reserve(2); + + castColumnAccordingToColumnDefine(disk_data_type, disk_col->getPtr(), read_define, memory_column->getPtr(), 0, 2); + + ASSERT_FALSE(memory_column->isNullAt(0)); + Field f = (*memory_column)[0]; + ASSERT_EQ(f.getType(), Field::Types::Int64); + ASSERT_EQ(f.get(), 127L); + + ASSERT_FALSE(memory_column->isNullAt(1)); + f = (*memory_column)[1]; + ASSERT_EQ(f.getType(), Field::Types::Int64); + ASSERT_EQ(f.get(), -1L); + } +} + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index f8d9a797feb..165f479553d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -24,64 +24,111 @@ class DeltaMergeStore_test : public ::testing::Test Poco::File file(path); if (file.exists()) file.remove(true); + + context = std::make_unique(DMTestEnv::getContext()); + store = reload(); } -protected: + DeltaMergeStorePtr reload(const ColumnDefines & pre_define_columns = {}) + { + ColumnDefines cols = pre_define_columns.empty() ? DMTestEnv::getDefaultColumns() : pre_define_columns; + ColumnDefine handle_column_define = cols[0]; + + DeltaMergeStorePtr s + = std::make_shared(*context, path, name, cols, handle_column_define, DeltaMergeStore::Settings()); + return s; + } + +private: // the table name String name; // the path to the dir of table String path; + +protected: + std::unique_ptr context; + DeltaMergeStorePtr store; }; -TEST_F(DeltaMergeStore_test, Case1) +TEST_F(DeltaMergeStore_test, Create) { // create table - Context context = DMTestEnv::getContext(); - ColumnDefine handle_column_define(1, "pk", std::make_shared()); - ColumnDefines table_column_defines; - { - table_column_defines.emplace_back(handle_column_define); - ColumnDefine cd(2, "col2", std::make_shared()); - table_column_defines.emplace_back(cd); - } + ASSERT_NE(store, nullptr); - DeltaMergeStorePtr store - = std::make_shared(context, path, name, table_column_defines, handle_column_define, DeltaMergeStore::Settings()); { // check handle column of store auto & h = store->getHandle(); - ASSERT_EQ(h.name, handle_column_define.name); - ASSERT_EQ(h.id, handle_column_define.id); - ASSERT_TRUE(h.type->equals(*handle_column_define.type)); + ASSERT_EQ(h.name, "pk"); + ASSERT_EQ(h.id, 1); + ASSERT_TRUE(h.type->equals(*DataTypeFactory::instance().get("Int64"))); } { // check column structure of store auto & cols = store->getTableColumns(); // version & tag column added - ASSERT_EQ(cols.size(), table_column_defines.size() + 2); - // TODO check other cols name/type + ASSERT_EQ(cols.size(), 3UL); + } +} + +TEST_F(DeltaMergeStore_test, SimpleWriteRead) +{ + const ColumnDefine col_str_define(2, "col2", std::make_shared()); + const ColumnDefine col_i8_define(3, "i8", std::make_shared()); + { + ColumnDefines table_column_defines = DMTestEnv::getDefaultColumns(); + table_column_defines.emplace_back(col_str_define); + table_column_defines.emplace_back(col_i8_define); + store = reload(table_column_defines); } - const size_t num_rows_write = 500; + { + // check column structure + const auto & cols = store->getTableColumns(); + ASSERT_EQ(cols.size(), 5UL); + const auto & str_col = cols[3]; + ASSERT_EQ(str_col.name, col_str_define.name); + ASSERT_EQ(str_col.id, col_str_define.id); + ASSERT_TRUE(str_col.type->equals(*col_str_define.type)); + const auto & i8_col = cols[4]; + ASSERT_EQ(i8_col.name, col_i8_define.name); + ASSERT_EQ(i8_col.id, col_i8_define.id); + ASSERT_TRUE(i8_col.type->equals(*col_i8_define.type)); + } + + const size_t num_rows_write = 128; { // write to store Block block; { - block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, true); - - ColumnWithTypeAndName col2(std::make_shared(), "col2"); + block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); + // Add a column of col2:String for test + ColumnWithTypeAndName col2(col_str_define.type, col_str_define.name); { IColumn::MutablePtr m_col2 = col2.type->createColumn(); for (size_t i = 0; i < num_rows_write; i++) { - Field field("a", 1); + String s = DB::toString(i); + Field field(s.c_str(), s.size()); m_col2->insert(field); } col2.column = std::move(m_col2); } - block.insert(col2); + block.insert(std::move(col2)); + + // Add a column of i8:Int8 for test + ColumnWithTypeAndName i8(col_i8_define.type, col_i8_define.name); + { + IColumn::MutablePtr m_i8 = i8.type->createColumn(); + for (size_t i = 0; i < num_rows_write; i++) + { + Int64 num = i * (i % 2 == 0 ? -1 : 1); + m_i8->insert(Field(num)); + } + i8.column = std::move(m_i8); + } + block.insert(std::move(i8)); } - store->write(context, context.getSettingsRef(), block); + store->write(*context, context->getSettingsRef(), block); } { @@ -89,10 +136,12 @@ TEST_F(DeltaMergeStore_test, Case1) // TODO read data from mutli streams // TODO read partial columns from store // TODO read data of max_version + // read all columns from store - BlockInputStreamPtr in = store->read(context, - context.getSettingsRef(), - table_column_defines, + const auto & columns = store->getTableColumns(); + BlockInputStreamPtr in = store->read(*context, + context->getSettingsRef(), + columns, {HandleRange::newAll()}, /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), @@ -113,10 +162,126 @@ TEST_F(DeltaMergeStore_test, Case1) //printf("pk:%lld\n", c->getInt(i)); EXPECT_EQ(c->getInt(i), i); } - else if (iter.name == "col2") + else if (iter.name == col_str_define.name) + { + //printf("%s:%s\n", col_str_define.name.c_str(), c->getDataAt(i).data); + EXPECT_EQ(c->getDataAt(i), DB::toString(i)); + } + else if (iter.name == col_i8_define.name) + { + //printf("%s:%lld\n", col_i8_define.name.c_str(), c->getInt(i)); + Int64 num = i * (i % 2 == 0 ? -1 : 1); + EXPECT_EQ(c->getInt(i), num); + } + } + } + } + in->readSuffix(); + ASSERT_EQ(num_rows_read, num_rows_write); + } +} + +TEST_F(DeltaMergeStore_test, DDLChanegInt8ToInt32) +try +{ + const String col_name_ddl = "i8"; + const ColId col_id_ddl = 2; + const DataTypePtr col_type_before_ddl = DataTypeFactory::instance().get("Int8"); + const DataTypePtr col_type_after_ddl = DataTypeFactory::instance().get("Int32"); + { + ColumnDefines table_column_defines = DMTestEnv::getDefaultColumns(); + ColumnDefine cd(col_id_ddl, col_name_ddl, col_type_before_ddl); + table_column_defines.emplace_back(cd); + store = reload(table_column_defines); + } + + { + // check column structure + const auto & cols = store->getTableColumns(); + ASSERT_EQ(cols.size(), 4UL); + const auto & str_col = cols[3]; + ASSERT_EQ(str_col.name, col_name_ddl); + ASSERT_EQ(str_col.id, col_id_ddl); + ASSERT_TRUE(str_col.type->equals(*col_type_before_ddl)); + } + + const size_t num_rows_write = 128; + { + // write to store + Block block; + { + block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); + // Add a column of col2:String for test + ColumnWithTypeAndName col2(std::make_shared(), col_name_ddl); + { + IColumn::MutablePtr m_col2 = col2.type->createColumn(); + for (size_t i = 0; i < num_rows_write; i++) + { + Int64 num = i * (i % 2 == 0 ? -1 : 1); + m_col2->insert(Field(num)); + } + col2.column = std::move(m_col2); + } + block.insert(col2); + } + store->write(*context, context->getSettingsRef(), block); + } + + { + // DDL change col from i8 -> i32 + AlterCommands commands; + { + AlterCommand com; + com.type = AlterCommand::MODIFY_COLUMN; + com.data_type = col_type_after_ddl; + com.column_name = col_name_ddl; + commands.emplace_back(std::move(com)); + } + ColumnID _ignored = 0; + store->applyAlters(commands, std::nullopt, _ignored, *context); + } + + { + // read all columns from store + const auto & columns = store->getTableColumns(); + BlockInputStreams ins = store->read(*context, + context->getSettingsRef(), + columns, + {HandleRange::newAll()}, + /* num_streams= */ 1, + /* max_version= */ std::numeric_limits::max(), + /* expected_block_size= */ 1024); + ASSERT_EQ(ins.size(), 1UL); + BlockInputStreamPtr & in = ins[0]; + { + // check col type + const Block head = in->getHeader(); + const auto & col = head.getByName(col_name_ddl); + ASSERT_EQ(col.name, col_name_ddl); + ASSERT_EQ(col.column_id, col_id_ddl); + ASSERT_TRUE(col.type->equals(*col_type_after_ddl)); + } + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + for (auto && iter : block) + { + auto c = iter.column; + for (Int64 i = 0; i < Int64(c->size()); ++i) + { + if (iter.name == "pk") + { + //printf("pk:%lld\n", c->getInt(i)); + EXPECT_EQ(c->getInt(i), i); + } + else if (iter.name == col_name_ddl) { //printf("col2:%s\n", c->getDataAt(i).data); - EXPECT_EQ(c->getDataAt(i), "a"); + Int64 num = i * (i % 2 == 0 ? -1 : 1); + EXPECT_EQ(c->getInt(i), num); } } } @@ -125,6 +290,17 @@ TEST_F(DeltaMergeStore_test, Case1) ASSERT_EQ(num_rows_read, num_rows_write); } } +catch (const Exception & e) +{ + std::string text = e.displayText(); + + auto embedded_stack_trace_pos = text.find("Stack trace"); + std::cerr << "Code: " << e.code() << ". " << text << std::endl << std::endl; + if (std::string::npos == embedded_stack_trace_pos) + std::cerr << "Stack trace:" << std::endl << e.getStackTrace().toString() << std::endl; + + throw; +} } // namespace tests } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_disk_value_space.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_disk_value_space.cpp index 3e9456b69b3..461bb600087 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_disk_value_space.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_disk_value_space.cpp @@ -40,10 +40,12 @@ class DiskValueSpace_test : public ::testing::Test table_columns.emplace_back(VERSION_COLUMN_DEFINE); table_columns.emplace_back(TAG_COLUMN_DEFINE); + // TODO fill columns + // table_info.columns.emplace_back(); + dm_context = std::make_unique( DMContext{.db_context = context, .storage_pool = *storage_pool, - .table_name = name, .table_columns = table_columns, .table_handle_define = table_handle_define, .min_version = 0, @@ -62,6 +64,7 @@ class DiskValueSpace_test : public ::testing::Test String path; /// all these var lives as ref in dm_context std::unique_ptr storage_pool; + TiDB::TableInfo table_info; ColumnDefine table_handle_define; ColumnDefines table_columns; DM::DeltaMergeStore::Settings settings; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 5b8f304ba9f..b289e3291b9 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -2,7 +2,10 @@ #include "dm_basic_include.h" +#include #include +#include +#include #include #include @@ -30,49 +33,72 @@ class Segment_test : public ::testing::Test } protected: + static void SetUpTestCase() + { + Poco::AutoPtr channel = new Poco::ConsoleChannel(std::cerr); + Poco::AutoPtr formatter(new Poco::PatternFormatter); + formatter->setProperty("pattern", "%L%Y-%m-%d %H:%M:%S.%i <%p> %s: %t"); + Poco::AutoPtr formatting_channel(new Poco::FormattingChannel(formatter, channel)); + Logger::root().setChannel(formatting_channel); + Logger::root().setLevel("trace"); + } + void SetUp() override { + db_context = std::make_unique(DMTestEnv::getContext(DB::Settings())); dropDataInDisk(); - storage_pool = std::make_unique(path); - Context context = DMTestEnv::getContext(); - table_handle_define = ColumnDefine(1, "pk", std::make_shared()); - table_columns.clear(); - table_columns.emplace_back(table_handle_define); - table_columns.emplace_back(VERSION_COLUMN_DEFINE); - table_columns.emplace_back(TAG_COLUMN_DEFINE); - - dm_context = std::make_unique( - DMContext{.db_context = context, + segment = reload(); + ASSERT_EQ(segment->segmentId(), DELTA_MERGE_FIRST_SEGMENT_ID); + } + + SegmentPtr reload(ColumnDefines && pre_define_columns = {}, DB::Settings && db_settings = DB::Settings()) + { + storage_pool = std::make_unique(path); + *db_context = DMTestEnv::getContext(db_settings); + ColumnDefines cols = pre_define_columns.empty() ? DMTestEnv::getDefaultColumns() : pre_define_columns; + setColumns(cols); + + auto segment_id = storage_pool->newMetaPageId(); + return Segment::newSegment(*dm_context_, HandleRange::newAll(), segment_id, 0); + } + + // setColumns should update dm_context at the same time + void setColumns(const ColumnDefines & columns) + { + table_columns_ = columns; + + dm_context_ = std::make_unique( + DMContext{.db_context = *db_context, .storage_pool = *storage_pool, - .table_name = name, - .table_columns = table_columns, - .table_handle_define = table_handle_define, + .table_columns = table_columns_, + .table_handle_define = table_columns_.at(0), .min_version = 0, .not_compress = settings.not_compress_columns, - .delta_limit_rows = context.getSettingsRef().dm_segment_delta_limit_rows, - .delta_limit_bytes = context.getSettingsRef().dm_segment_delta_limit_bytes, - .delta_cache_limit_rows = context.getSettingsRef().dm_segment_delta_cache_limit_rows, - .delta_cache_limit_bytes = context.getSettingsRef().dm_segment_delta_cache_limit_bytes}); - - auto segment_id = storage_pool->newMetaPageId(); - ASSERT_EQ(segment_id, DELTA_MERGE_FIRST_SEGMENT_ID); - segment = Segment::newSegment(*dm_context, HandleRange::newAll(), segment_id, 0); + .delta_limit_rows = db_context->getSettingsRef().dm_segment_delta_limit_rows, + .delta_limit_bytes = db_context->getSettingsRef().dm_segment_delta_limit_bytes, + .delta_cache_limit_rows = db_context->getSettingsRef().dm_segment_delta_cache_limit_rows, + .delta_cache_limit_bytes = db_context->getSettingsRef().dm_segment_delta_cache_limit_bytes}); } -protected: + const ColumnDefines & tableColumns() const { return table_columns_; } + + DMContext & dmContext() { return *dm_context_; } + +private: + std::unique_ptr db_context; // the table name String name; // the path to the dir of table String path; /// all these var lives as ref in dm_context std::unique_ptr storage_pool; - ColumnDefine table_handle_define; - ColumnDefines table_columns; + ColumnDefines table_columns_; DM::DeltaMergeStore::Settings settings; /// dm_context - std::unique_ptr dm_context; + std::unique_ptr dm_context_; +protected: // the segment we are going to test SegmentPtr segment; }; @@ -83,15 +109,15 @@ TEST_F(Segment_test, WriteRead) { // write to segment Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); - segment->write(*dm_context, std::move(block)); + segment->write(dmContext(), std::move(block)); } { // read written data - auto in = segment->getInputStream(/* dm_context= */ *dm_context, + auto in = segment->getInputStream(/* dm_context= */ dmContext(), /* segment_snap= */ segment->getReadSnapshot(), - /* storage_snap= */ {dm_context->storage_pool}, - /* columns_to_read= */ table_columns, + /* storage_snap= */ {dmContext().storage_pool}, + /* columns_to_read= */ tableColumns(), /* read_ranges= */ {HandleRange::newAll()}, /* max_version= */ std::numeric_limits::max(), /* expected_block_size= */ 1024); @@ -108,17 +134,17 @@ TEST_F(Segment_test, WriteRead) { // test delete range [1,99) HandleRange remove(1, 99); - segment->write(*dm_context, {remove}); + segment->write(dmContext(), {remove}); // TODO test delete range partial overlap with segment // TODO test delete range not included by segment } { // read after delete range - auto in = segment->getInputStream(/* dm_context= */ *dm_context, + auto in = segment->getInputStream(/* dm_context= */ dmContext(), /* segment_snap= */ segment->getReadSnapshot(), - /* storage_snap= */ {dm_context->storage_pool}, - /* columns_to_read= */ table_columns, + /* storage_snap= */ {dmContext().storage_pool}, + /* columns_to_read= */ tableColumns(), /* read_ranges= */ {HandleRange::newAll()}, /* max_version= */ std::numeric_limits::max(), /* expected_block_size= */ 1024); @@ -146,18 +172,19 @@ TEST_F(Segment_test, Split) { // write to segment Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); - segment->write(*dm_context, std::move(block)); + segment->write(dmContext(), std::move(block)); } { // read written data - auto in = segment->getInputStream(/* dm_context= */ *dm_context, + auto in = segment->getInputStream(/* dm_context= */ dmContext(), /* segment_snap= */ segment->getReadSnapshot(), - /* storage_snap= */ {dm_context->storage_pool}, - /*columns_to_read= */ table_columns, + /* storage_snap= */ {dmContext().storage_pool}, + /* columns_to_read= */ tableColumns(), /* read_ranges= */ {HandleRange::newAll()}, /* max_version= */ std::numeric_limits::max(), /* expected_block_size= */ 1024); + size_t num_rows_read = 0; in->readPrefix(); while (Block block = in->read()) @@ -172,7 +199,7 @@ TEST_F(Segment_test, Split) SegmentPtr new_segment; // test split segment - std::tie(segment, new_segment) = segment->split(*dm_context); + std::tie(segment, new_segment) = segment->split(dmContext()); // check segment range const auto s1_range = segment->getRange(); @@ -186,10 +213,10 @@ TEST_F(Segment_test, Split) size_t num_rows_seg2 = 0; { { - auto in = segment->getInputStream(/* dm_context= */ *dm_context, + auto in = segment->getInputStream(/* dm_context= */ dmContext(), /* segment_snap= */ segment->getReadSnapshot(), - /* storage_snap= */ {dm_context->storage_pool}, - /* columns_to_read= */ table_columns, + /* storage_snap= */ {dmContext().storage_pool}, + /* columns_to_read= */ tableColumns(), /* read_ranges= */ {HandleRange::newAll()}, /* max_version= */ std::numeric_limits::max(), /* expected_block_size= */ 1024); @@ -201,13 +228,13 @@ TEST_F(Segment_test, Split) in->readSuffix(); } { - auto in = new_segment->getInputStream(/* dm_context= */ *dm_context, - /* segment_snap= */ segment->getReadSnapshot(), - /* storage_snap= */ {dm_context->storage_pool}, - /*columns_to_read= */ table_columns, - /* read_ranges= */ {HandleRange::newAll()}, - /* max_version= */ std::numeric_limits::max(), - /* expected_block_size= */ 1024); + auto in = segment->getInputStream(/* dm_context= */ dmContext(), + /* segment_snap= */ segment->getReadSnapshot(), + /* storage_snap= */ {dmContext().storage_pool}, + /* columns_to_read= */ tableColumns(), + /* read_ranges= */ {HandleRange::newAll()}, + /* max_version= */ std::numeric_limits::max(), + /* expected_block_size= */ 1024); in->readPrefix(); while (Block block = in->read()) { @@ -220,7 +247,7 @@ TEST_F(Segment_test, Split) // merge segments { - segment = Segment::merge(*dm_context, segment, new_segment); + segment = Segment::merge(dmContext(), segment, new_segment); { // check merged segment range const auto & merged_range = segment->getRange(); @@ -230,10 +257,10 @@ TEST_F(Segment_test, Split) } { size_t num_rows_read = 0; - auto in = segment->getInputStream(/* dm_context= */ *dm_context, + auto in = segment->getInputStream(/* dm_context= */ dmContext(), /* segment_snap= */ segment->getReadSnapshot(), - /* storage_snap= */ {dm_context->storage_pool}, - /* columns_to_read= */ table_columns, + /* storage_snap= */ {dmContext().storage_pool}, + /* columns_to_read= */ tableColumns(), /* read_ranges= */ {HandleRange::newAll()}, /* max_version= */ std::numeric_limits::max(), /* expected_block_size= */ 1024); @@ -248,7 +275,149 @@ TEST_F(Segment_test, Split) } } -TEST_F(Segment_test, Restore) {} +/// Mock a col from i8 -> i32 +TEST_F(Segment_test, DDLAlterInt8ToInt32) +{ + const String column_name_i8_to_i32 = "i8_to_i32"; + const ColumnID column_id_i8_to_i32 = 4; + const ColumnDefine column_i8_before_ddl(column_id_i8_to_i32, column_name_i8_to_i32, DataTypeFactory::instance().get("Int8")); + const ColumnDefine column_i32_after_ddl(column_id_i8_to_i32, column_name_i8_to_i32, DataTypeFactory::instance().get("Int32")); + + { + ColumnDefines columns_before_ddl = DMTestEnv::getDefaultColumns(); + columns_before_ddl.emplace_back(column_i8_before_ddl); + // Not cache any rows + DB::Settings db_settings; + db_settings.dm_segment_delta_cache_limit_rows = 0; + + segment = reload(std::move(columns_before_ddl), std::move(db_settings)); + } + + const size_t num_rows_write = 100; + { + // write to segment + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); + + // add int8_col and later read it as int32 + // (mock ddl change int8 -> int32) + const size_t num_rows = block.rows(); + ColumnWithTypeAndName int8_col(column_i8_before_ddl.type, column_i8_before_ddl.name); + { + IColumn::MutablePtr m_col = int8_col.type->createColumn(); + auto & column_data = typeid_cast &>(*m_col).getData(); + column_data.resize(num_rows); + for (size_t i = 0; i < num_rows; ++i) + { + column_data[i] = static_cast(-1 * (i % 2 ? 1 : -1) * i); + } + int8_col.column = std::move(m_col); + } + block.insert(int8_col); + + segment->write(dmContext(), std::move(block)); + } + + { + ColumnDefines columns_to_read{ + column_i32_after_ddl, + }; + + // read written data + auto in = segment->getInputStream(/* dm_context= */ dmContext(), + /* segment_snap= */ segment->getReadSnapshot(), + /* storage_snap= */ {dmContext().storage_pool}, + /* columns_to_read= */ columns_to_read, + /* read_ranges= */ {HandleRange::newAll()}, + /* max_version= */ std::numeric_limits::max(), + /* expected_block_size= */ 1024); + + // check that we can read correct values + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + const ColumnWithTypeAndName & col = block.getByName(column_name_i8_to_i32); + ASSERT_TRUE(col.type->equals(*column_i32_after_ddl.type)) << "col.type: " + col.type->getName() + " expect type: " + column_i32_after_ddl.type->getName(); + ASSERT_EQ(col.name, column_i32_after_ddl.name); + ASSERT_EQ(col.column_id, column_i32_after_ddl.id); + for (size_t i = 0; i < block.rows(); ++i) + { + auto value = col.column->getInt(i); + const auto expected = static_cast(-1 * (i % 2 ? 1 : -1) * i); + ASSERT_EQ(value, expected); + } + } + in->readSuffix(); + ASSERT_EQ(num_rows_read, num_rows_write); + } +} + +TEST_F(Segment_test, DDLAddColumnWithDefaultValue) +{ + const String new_column_name = "i8"; + const ColumnID new_column_id = 4; + ColumnDefine new_column_define(new_column_id, new_column_name, DataTypeFactory::instance().get("Int8")); + const Int8 new_column_default_value_int = 16; + new_column_define.default_value = DB::toString(new_column_default_value_int); + + { + ColumnDefines columns_before_ddl = DMTestEnv::getDefaultColumns(); + // Not cache any rows + DB::Settings db_settings; + db_settings.dm_segment_delta_cache_limit_rows = 0; + + segment = reload(std::move(columns_before_ddl), std::move(db_settings)); + } + + const size_t num_rows_write = 100; + { + // write to segment + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); + segment->write(dmContext(), std::move(block)); + } + + { + // DDL add new column with default value + ColumnDefines columns_after_ddl = DMTestEnv::getDefaultColumns(); + columns_after_ddl.emplace_back(new_column_define); + setColumns(columns_after_ddl); + } + + { + ColumnDefines columns_to_read{ + new_column_define, + }; + + // read written data + auto in = segment->getInputStream(/* dm_context= */ dmContext(), + /* segment_snap= */ segment->getReadSnapshot(), + /* storage_snap= */ {dmContext().storage_pool}, + /* columns_to_read= */ columns_to_read, + /* read_ranges= */ {HandleRange::newAll()}, + /* max_version= */ std::numeric_limits::max(), + /* expected_block_size= */ 1024); + + // check that we can read correct values + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + const ColumnWithTypeAndName & col = block.getByName(new_column_define.name); + ASSERT_TRUE(col.type->equals(*new_column_define.type)); + ASSERT_EQ(col.name, new_column_define.name); + ASSERT_EQ(col.column_id, new_column_define.id); + for (size_t i = 0; i < block.rows(); ++i) + { + auto value = col.column->getInt(i); + ASSERT_EQ(value, new_column_default_value_int) << "at row:" << i; + } + } + in->readSuffix(); + ASSERT_EQ(num_rows_read, num_rows_write); + } +} } // namespace tests } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp index 8c9ffb4d9c9..d8cb9a494e0 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp @@ -185,7 +185,7 @@ TEST(StorageDeltaMerge_test, ReadWriteCase1) astptr->children.emplace_back(new ASTIdentifier("col1")); Context context = DMTestEnv::getContext(); - storage = StorageDeltaMerge::create(".", "t", ColumnsDescription{names_and_types_list}, astptr, context); + storage = StorageDeltaMerge::create(".", "t", std::nullopt, ColumnsDescription{names_and_types_list}, astptr, context); storage->startup(); } diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index dc6301dd8ec..1223cf46b8f 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -3,41 +3,56 @@ #include #include +#include +#include #include #include #include #include #include +#include #include #include #include #include #include #include +#include #include - +#include namespace DB { +namespace ErrorCodes +{ +extern const int DIRECTORY_ALREADY_EXISTS; +} + using namespace DM; constexpr bool TEST_SPLIT = false; StorageDeltaMerge::StorageDeltaMerge(const std::string & path_, const std::string & name_, + const OptionTableInfoConstRef table_info_, const ColumnsDescription & columns_, const ASTPtr & primary_expr_ast_, Context & global_context_) : IManageableStorage{columns_}, path(path_ + "/" + name_), name(name_), + max_column_id_used(0), global_context(global_context_), log(&Logger::get("StorageDeltaMerge")) { if (primary_expr_ast_->children.empty()) throw Exception("No primary key"); + // save schema from TiDB + if (table_info_) + tidb_table_info = table_info_->get(); + std::unordered_set pks; for (size_t i = 0; i < primary_expr_ast_->children.size(); ++i) { @@ -50,13 +65,22 @@ StorageDeltaMerge::StorageDeltaMerge(const std::string & path_, size_t pks_combined_bytes = 0; auto all_columns = getColumns().getAllPhysical(); - size_t index = 0; + ColumnDefines table_column_defines; // column defines used in DeltaMergeStore + ColumnDefine handle_column_define; for (auto & col : all_columns) { - ColumnDefine column_define; - column_define.name = col.name; - column_define.type = col.type; - column_define.id = index++; + ColumnDefine column_define(0, col.name, col.type); + if (table_info_) + { + /// If TableInfo from TiDB is not empty, we get column id from TiDB + auto col_iter = findColumnInfoInTableInfo(table_info_->get(), column_define.name); + column_define.id = col_iter->id; + } + else + { + // in test cases, we allocate column_id here + column_define.id = max_column_id_used++; + } if (pks.count(col.name)) { @@ -72,7 +96,6 @@ StorageDeltaMerge::StorageDeltaMerge(const std::string & path_, } table_column_defines.push_back(column_define); - addColumn(header, column_define.id, col.name, col.type, col.type->createColumn()); } hidden_columns.emplace_back(VERSION_COLUMN_NAME); @@ -92,8 +115,10 @@ StorageDeltaMerge::StorageDeltaMerge(const std::string & path_, setColumns(new_columns); + assert(!handle_column_define.name.empty()); + assert(!table_column_defines.empty()); store = std::make_shared( - global_context, path, name, table_column_defines, handle_column_define, DeltaMergeStore::Settings()); + global_context, path, name, std::move(table_column_defines), std::move(handle_column_define), DeltaMergeStore::Settings()); } void StorageDeltaMerge::drop() @@ -118,11 +143,11 @@ Block StorageDeltaMerge::buildInsertBlock(bool is_import, const Block & old_bloc } const size_t rows = block.rows(); - if (!block.has(handle_column_define.name)) + if (!block.has(store->getHandle().name)) { // put handle column. - auto handle_column = handle_column_define.type->createColumn(); + auto handle_column = store->getHandle().type->createColumn(); auto & handle_data = typeid_cast &>(*handle_column).getData(); handle_data.resize(rows); @@ -141,7 +166,7 @@ Block StorageDeltaMerge::buildInsertBlock(bool is_import, const Block & old_bloc appendIntoHandleColumn(handle_data, pk_column_types[c], pk_columns[c]); } - addColumn(block, EXTRA_HANDLE_COLUMN_ID, EXTRA_HANDLE_COLUMN_NAME, EXTRA_HANDLE_COLUMN_TYPE, std::move(handle_column)); + addColumnToBlock(block, EXTRA_HANDLE_COLUMN_ID, EXTRA_HANDLE_COLUMN_NAME, EXTRA_HANDLE_COLUMN_TYPE, std::move(handle_column)); } // add version column @@ -155,7 +180,7 @@ Block StorageDeltaMerge::buildInsertBlock(bool is_import, const Block & old_bloc column_data[i] = next_version++; } - addColumn(block, VERSION_COLUMN_ID, VERSION_COLUMN_NAME, VERSION_COLUMN_TYPE, std::move(column)); + addColumnToBlock(block, VERSION_COLUMN_ID, VERSION_COLUMN_NAME, VERSION_COLUMN_TYPE, std::move(column)); } // add tag column (upsert / delete) @@ -169,10 +194,11 @@ Block StorageDeltaMerge::buildInsertBlock(bool is_import, const Block & old_bloc column_data[i] = 0; } - addColumn(block, TAG_COLUMN_ID, TAG_COLUMN_NAME, TAG_COLUMN_TYPE, std::move(column)); + addColumnToBlock(block, TAG_COLUMN_ID, TAG_COLUMN_NAME, TAG_COLUMN_TYPE, std::move(column)); } // Set the real column id. + const Block & header = store->getHeader(); for (auto & col : block) { if (col.name != VERSION_COLUMN_NAME && col.name != TAG_COLUMN_NAME && col.name != EXTRA_HANDLE_COLUMN_NAME) @@ -186,12 +212,9 @@ using BlockDecorator = std::function; class DMBlockOutputStream : public IBlockOutputStream { public: - DMBlockOutputStream(const DeltaMergeStorePtr & store_, - const Block & header_, - const BlockDecorator & decorator_, - const Context & db_context_, - const Settings & db_settings_) - : store(store_), header(header_), decorator(decorator_), db_context(db_context_), db_settings(db_settings_) + DMBlockOutputStream( + const DeltaMergeStorePtr & store_, const BlockDecorator & decorator_, const Context & db_context_, const Settings & db_settings_) + : store(store_), header(store->getHeader()), decorator(decorator_), db_context(db_context_), db_settings(db_settings_) {} Block getHeader() const override { return header; } @@ -210,7 +233,7 @@ BlockOutputStreamPtr StorageDeltaMerge::write(const ASTPtr & query, const Settin { auto & insert_query = typeid_cast(*query); BlockDecorator decorator = std::bind(&StorageDeltaMerge::buildInsertBlock, this, insert_query.is_import, std::placeholders::_1); - return std::make_shared(store, header, decorator, global_context, settings); + return std::make_shared(store, decorator, global_context, settings); } @@ -223,11 +246,12 @@ BlockInputStreams StorageDeltaMerge::read( // unsigned num_streams) { ColumnDefines to_read; + const Block & header = store->getHeader(); for (auto & n : column_names) { ColumnDefine col_define; if (n == EXTRA_HANDLE_COLUMN_NAME) - col_define = handle_column_define; + col_define = store->getHandle(); else if (n == VERSION_COLUMN_NAME) col_define = VERSION_COLUMN_DEFINE; else if (n == TAG_COLUMN_NAME) @@ -238,6 +262,8 @@ BlockInputStreams StorageDeltaMerge::read( // col_define.name = column.name; col_define.id = column.column_id; col_define.type = column.type; + // FIXME set non-empty default value so that we can fill missing value with the right default value + // col_define.default_value = ""; } to_read.push_back(col_define); } @@ -305,40 +331,176 @@ BlockInputStreams StorageDeltaMerge::read( // void StorageDeltaMerge::check(const Context & context) { store->check(context, context.getSettingsRef()); } -namespace ErrorCodes +//========================================================================================== +// DDL methods. +//========================================================================================== +void StorageDeltaMerge::alterFromTiDB( + const AlterCommands & params, const TiDB::TableInfo & table_info, const String & database_name, const Context & context) { -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + tidb_table_info = table_info; + alterImpl(params, database_name, table_info.name, std::optional>(table_info), context); } -static ASTPtr extractKeyExpressionList(IAST & node) +void StorageDeltaMerge::alter( + const AlterCommands & commands, const String & database_name, const String & table_name, const Context & context) { - const ASTFunction * expr_func = typeid_cast(&node); + alterImpl(commands, database_name, table_name, std::nullopt, context); +} - if (expr_func && expr_func->name == "tuple") +/// If any ddl statement change StorageDeltaMerge's schema, +/// we need to update the create statement in metadata, so that we can restore table structure next time +static void updateDeltaMergeTableCreateStatement( // + const String & database_name, const String & table_name, // + const ColumnsDescription & columns, + const OrderedNameSet & hidden_columns, // + const OptionTableInfoConstRef table_info_from_tidb, const ColumnDefines & store_table_columns, // + const Context & context); + +void StorageDeltaMerge::alterImpl(const AlterCommands & commands, + const String & database_name, + const String & table_name, + const OptionTableInfoConstRef table_info, + const Context & context) +{ + std::unordered_set cols_drop_forbidden; + for (const auto & n : pk_column_names) + cols_drop_forbidden.insert(n); + cols_drop_forbidden.insert(EXTRA_HANDLE_COLUMN_NAME); + cols_drop_forbidden.insert(VERSION_COLUMN_NAME); + cols_drop_forbidden.insert(TAG_COLUMN_NAME); + + for (const auto & command : commands) { - /// Primary key is specified in tuple. - return expr_func->children.at(0); + if (command.type == AlterCommand::MODIFY_PRIMARY_KEY) + { + // check that add primary key is forbidden + throw Exception("Storage engine " + getName() + " doesn't support modify primary key.", ErrorCodes::BAD_ARGUMENTS); + } + else if (command.type == AlterCommand::DROP_COLUMN) + { + // check that drop primary key is forbidden + // check that drop hidden columns is forbidden + if (cols_drop_forbidden.count(command.column_name) > 0) + throw Exception("Storage engine " + getName() + " doesn't support drop primary key / hidden column: " + command.column_name, + ErrorCodes::BAD_ARGUMENTS); + } } - else + + auto table_soft_lock = lockDataForAlter(__PRETTY_FUNCTION__); + auto table_hard_lock = lockStructureForAlter(__PRETTY_FUNCTION__); + + // update the metadata in database, so that we can read the new schema using TiFlash's client + ColumnsDescription new_columns = getColumns(); + + for (const auto & command : commands) { - /// Primary key consists of one column. - auto res = std::make_shared(); - res->children.push_back(node.ptr()); - return res; + if (command.type == AlterCommand::MODIFY_COLUMN) + { + // find the column we are going to modify + auto col_iter = command.findColumn(new_columns.ordinary); // just find in ordinary columns + if (!isSupportedDataTypeCast(col_iter->type, command.data_type)) + { + // check that lossy changes is forbidden + // check that changing the UNSIGNED attribute is forbidden + throw Exception("Storage engine " + getName() + "doesn't support lossy data type modify from " + col_iter->type->getName() + + " to " + command.data_type->getName(), + ErrorCodes::NOT_IMPLEMENTED); + } + } } + + commands.apply(new_columns); // apply AlterCommands to `new_columns` + // apply alter to store's table column in DeltaMergeStore + store->applyAlters(commands, table_info, max_column_id_used, context); + // after update `new_columns` and store's table columns, we need to update create table statement, + // so that we can restore table next time. + updateDeltaMergeTableCreateStatement( + database_name, table_name, new_columns, hidden_columns, table_info, store->getTableColumns(), context); + setColumns(std::move(new_columns)); +} + +void StorageDeltaMerge::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name) +{ + const String new_path = new_path_to_db + "/" + new_table_name; + + if (Poco::File{new_path}.exists()) + throw Exception{"Target path already exists: " + new_path, + /// @todo existing target can also be a file, not directory + ErrorCodes::DIRECTORY_ALREADY_EXISTS}; + + // flush store and then reset store to new path + store->flushCache(global_context); + ColumnDefines table_column_defines = store->getTableColumns(); + ColumnDefine handle_column_define = store->getHandle(); + DeltaMergeStore::Settings settings = store->getSettings(); + + store = {}; + + // rename path and generate a new store + Poco::File(path).renameTo(new_path); + store = std::make_shared( + global_context, new_path, new_table_name, std::move(table_column_defines), std::move(handle_column_define), settings); + + path = new_path; + name = new_table_name; } -void registerStorageDeltaMerge(StorageFactory & factory) +void updateDeltaMergeTableCreateStatement( // + const String & database_name, const String & table_name, // + const ColumnsDescription & columns, + const OrderedNameSet & hidden_columns, // + const OptionTableInfoConstRef table_info_from_tidb, const ColumnDefines & store_table_columns, // + const Context & context) { - factory.registerStorage("DeltaMerge", [](const StorageFactory::Arguments & args) { - if (args.engine_args.size() > 1) - throw Exception("Engine DeltaMerge expects only one parameter. e.g. engine = DeltaMerge((a, b))"); - if (args.engine_args.size() < 1) + /// Filter out hidden columns in the `create table statement` + ColumnsDescription columns_without_hidden; + columns_without_hidden.ordinary = columns.ordinary; + for (const auto & col : columns.materialized) + if (!hidden_columns.has(col.name)) + columns_without_hidden.materialized.emplace_back(col); + columns_without_hidden.aliases = columns.aliases; + columns_without_hidden.defaults = columns.defaults; + + /// If TableInfo from TiDB is empty, for example, create DM table for test, + /// we refine TableInfo from store's table column, so that we can restore column id next time + TiDB::TableInfo table_info_from_store; + if (!table_info_from_tidb) + { + table_info_from_store.schema_version = DEFAULT_UNSPECIFIED_SCHEMA_VERSION; + table_info_from_store.name = table_name; + for (const auto & column_define : store_table_columns) + { + if (hidden_columns.has(column_define.name)) + continue; + TiDB::ColumnInfo column_info = getColumnInfoByDataType(column_define.type); + column_info.id = column_define.id; + column_info.name = column_define.name; + column_info.origin_default_value = column_define.default_value; + table_info_from_store.columns.emplace_back(std::move(column_info)); + } + } + + // We need to update the JSON field in table ast + // engine = DeltaMerge((CounterID, EventDate), '{JSON format table info}') + IDatabase::ASTModifier storage_modifier = [&](IAST & ast) { + std::shared_ptr literal; + if (table_info_from_tidb) + literal = std::make_shared(Field(table_info_from_tidb->get().serialize(true))); + else + literal = std::make_shared(Field(table_info_from_store.serialize(true))); + auto & storage_ast = typeid_cast(ast); + auto & args = typeid_cast(*storage_ast.engine->arguments); + if (args.children.size() == 1) + args.children.emplace_back(literal); + else if (args.children.size() == 2) + args.children.back() = literal; + else throw Exception( - "Engine DeltaMerge needs primary key. e.g. engine = DeltaMerge((a, b))", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - ASTPtr primary_expr_list = extractKeyExpressionList(*args.engine_args[0]); - return StorageDeltaMerge::create(args.data_path, args.table_name, args.columns, primary_expr_list, args.context); - }); + "Wrong arguments num:" + DB::toString(args.children.size()) + " in table: " + table_name + " with engine=DeltaMerge", + ErrorCodes::BAD_ARGUMENTS); + }; + + context.getDatabase(database_name)->alterTable(context, table_name, columns_without_hidden, storage_modifier); } } // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index 94d75ba0fb6..2350d79c3b8 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -4,15 +4,15 @@ #include #include +#include #include #include +#include #include #include #include -#include - namespace DB { class StorageDeltaMerge : public ext::shared_ptr_helper, public IManageableStorage @@ -34,21 +34,40 @@ class StorageDeltaMerge : public ext::shared_ptr_helper, publ BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override; + void rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & /*new_table_name*/) override; + + void alter(const AlterCommands & commands, const String & database_name, const String & table_name, const Context & context) override; + + // Apply AlterCommands synced from TiDB should use `alterFromTiDB` instead of `alter(...)` + void alterFromTiDB( + const AlterCommands & commands, const TiDB::TableInfo & table_info, const String & database_name, const Context & context); + + inline const TiDB::TableInfo & getTableInfo() const { return tidb_table_info; } + const OrderedNameSet & getHiddenColumnsImpl() const override { return hidden_columns; } BlockInputStreamPtr status() override { throw Exception("Unimplemented"); } + void check(const Context & context) override; protected: StorageDeltaMerge(const std::string & path_, const std::string & name_, + const DM::OptionTableInfoConstRef table_info_, const ColumnsDescription & columns_, const ASTPtr & primary_expr_ast_, Context & global_context_); Block buildInsertBlock(bool is_import, const Block & block); +private: + void alterImpl(const AlterCommands & commands, + const String & database_name, + const String & table_name, + const DB::DM::OptionTableInfoConstRef table_info_, + const Context & context); + private: using ColumnIdMap = std::unordered_map; @@ -57,16 +76,18 @@ class StorageDeltaMerge : public ext::shared_ptr_helper, publ DM::DeltaMergeStorePtr store; - DM::ColumnDefines table_column_defines; - DM::ColumnDefine handle_column_define; Strings pk_column_names; - OrderedNameSet hidden_columns; + // The table schema synced from TiDB + TiDB::TableInfo tidb_table_info; + + // Used to allocate new column-id when this table is NOT synced from TiDB + ColumnID max_column_id_used; + std::atomic next_version = 1; //TODO: remove this!!! Context & global_context; - Block header; Logger * log; }; diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index 4e81039ac5f..bfc303cd2b3 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -272,7 +272,7 @@ struct TableInfo bool is_partition_table = false; TableID belonging_table_id = -1; PartitionInfo partition; - Int64 schema_version = -1; + Int64 schema_version = DEFAULT_UNSPECIFIED_SCHEMA_VERSION; ColumnID getColumnID(const String & name) const; String getColumnName(const ColumnID id) const; diff --git a/dbms/src/Storages/Transaction/TypeMapping.cpp b/dbms/src/Storages/Transaction/TypeMapping.cpp index d8ab1284b7b..5623abde3a3 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.cpp +++ b/dbms/src/Storages/Transaction/TypeMapping.cpp @@ -2,6 +2,9 @@ #include #include +#include +#include +#include #include #include #include @@ -13,6 +16,7 @@ #include #include #include +#include namespace DB { @@ -219,7 +223,7 @@ ColumnInfo reverseGetColumnInfo(const NameAndTypePair & column, ColumnID id, con ColumnInfo column_info; column_info.id = id; column_info.name = column.name; - const IDataType * nested_type = column.type.get(); + const IDataType *nested_type = column.type.get(); // Fill not null. if (!column.type->isNullable()) @@ -289,7 +293,8 @@ ColumnInfo reverseGetColumnInfo(const NameAndTypePair & column, ColumnID id, con column_info.tp = TiDB::TypeEnum; break; default: - throw DB::Exception("Unable reverse map TiFlash type " + nested_type->getName() + " to TiDB type", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception("Unable reverse map TiFlash type " + nested_type->getName() + " to TiDB type", + ErrorCodes::LOGICAL_ERROR); } // Fill unsigned flag. @@ -314,7 +319,7 @@ ColumnInfo reverseGetColumnInfo(const NameAndTypePair & column, ColumnID id, con if (checkDataType(nested_type)) { auto enum16_type = checkAndGetDataType(nested_type); - for (auto & element : enum16_type->getValues()) + for (auto &element : enum16_type->getValues()) { column_info.elems.emplace_back(element.first, element.second); } @@ -330,4 +335,137 @@ ColumnInfo reverseGetColumnInfo(const NameAndTypePair & column, ColumnID id, con return column_info; } +namespace +{ + +template +bool getDecimalInfo(const IDataType * type, ColumnInfo & column_info) +{ + using TypeDec = DataTypeDecimal; + if (auto decimal_type = checkAndGetDataType(type); decimal_type != nullptr) + { + column_info.flen = decimal_type->getPrec(); + column_info.decimal = decimal_type->getScale(); + column_info.tp = TiDB::TypeNewDecimal; + return true; + } + return false; +} + +} // namespace + +ColumnInfo getColumnInfoByDataType(const DataTypePtr & type) +{ + ColumnInfo col; + DataTypePtr not_null_type; + if (const auto * type_nullable = typeid_cast(type.get())) + { + not_null_type = type_nullable->getNestedType(); + } + else + { + col.setNotNullFlag(); + not_null_type = type; + } + + // Use enum TypeIndex + switch (not_null_type->getTypeId()) + { + case TypeIndex::Nothing: + col.tp = TiDB::TypeNull; + break; + + // UnSigned + case TypeIndex::UInt8: + col.setUnsignedFlag(); + col.tp = TiDB::TypeTiny; + break; + case TypeIndex::UInt16: + col.setUnsignedFlag(); + col.tp = TiDB::TypeShort; + break; + case TypeIndex::UInt32: + col.setUnsignedFlag(); + col.tp = TiDB::TypeLong; + break; + case TypeIndex::UInt64: + col.setUnsignedFlag(); + col.tp = TiDB::TypeLongLong; + break; + + // Signed + case TypeIndex::Int8: + col.tp = TiDB::TypeTiny; + break; + case TypeIndex::Int16: + col.tp = TiDB::TypeShort; + break; + case TypeIndex::Int32: + col.tp = TiDB::TypeLong; + break; + case TypeIndex::Int64: + col.tp = TiDB::TypeLongLong; + break; + + // Floating point types + case TypeIndex::Float32: + col.tp = TiDB::TypeFloat; + break; + case TypeIndex::Float64: + col.tp = TiDB::TypeDouble; + break; + + case TypeIndex::Date: + col.tp = TiDB::TypeDate; + break; + case TypeIndex::DateTime: + col.tp = TiDB::TypeDatetime; + break; + + case TypeIndex::String: + col.tp = TiDB::TypeString; + break; + case TypeIndex::FixedString: + col.tp = TiDB::TypeString; + break; + + // Decimal + case TypeIndex::Decimal32: + getDecimalInfo(type.get(), col); + break; + case TypeIndex::Decimal64: + getDecimalInfo(type.get(), col); + break; + case TypeIndex::Decimal128: + getDecimalInfo(type.get(), col); + break; + case TypeIndex::Decimal256: + getDecimalInfo(type.get(), col); + break; + + // Unknown numeric in TiDB + case TypeIndex::UInt128: + break; + case TypeIndex::Int128: + break; + case TypeIndex::Int256: + break; + + // Unkonwn + case TypeIndex::Enum8: + case TypeIndex::Enum16: + case TypeIndex::UUID: + case TypeIndex::Array: + case TypeIndex::Tuple: + case TypeIndex::Set: + case TypeIndex::Interval: + case TypeIndex::Nullable: + case TypeIndex::Function: + case TypeIndex::AggregateFunction: + case TypeIndex::LowCardinality: + throw Exception("Unknown TiDB type from " + type->getName(), ErrorCodes::NOT_IMPLEMENTED); + } + return col; +} + } // namespace DB diff --git a/dbms/src/Storages/Transaction/TypeMapping.h b/dbms/src/Storages/Transaction/TypeMapping.h index e0ee9a0198b..d864061b515 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.h +++ b/dbms/src/Storages/Transaction/TypeMapping.h @@ -23,5 +23,7 @@ TiDB::CodecFlag getCodecFlagByFieldType(const tipb::FieldType & field_type); // Note that not every TiFlash type has a corresponding TiDB type, // caller should make sure the source type is valid, otherwise exception will be thrown. ColumnInfo reverseGetColumnInfo(const NameAndTypePair & column, ColumnID id, const Field & default_value); +ColumnInfo getColumnInfoByDataType(const DataTypePtr &type); } // namespace DB + diff --git a/dbms/src/Storages/Transaction/tests/CMakeLists.txt b/dbms/src/Storages/Transaction/tests/CMakeLists.txt index 93dd50b1b4a..22e5ce98326 100644 --- a/dbms/src/Storages/Transaction/tests/CMakeLists.txt +++ b/dbms/src/Storages/Transaction/tests/CMakeLists.txt @@ -15,3 +15,6 @@ target_link_libraries (region_test dbms) add_executable (leveldb_test leveldb_test.cpp) target_link_libraries (leveldb_test dbms) +add_executable(type_mapping gtest_type_mapping.cpp) +target_link_libraries(type_mapping dbms gtest_main) + diff --git a/dbms/src/Storages/Transaction/tests/gtest_type_mapping.cpp b/dbms/src/Storages/Transaction/tests/gtest_type_mapping.cpp new file mode 100644 index 00000000000..4212a230c01 --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/gtest_type_mapping.cpp @@ -0,0 +1,82 @@ +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ + +namespace +{ +DataTypePtr typeFromString(const String & str) +{ + auto & data_type_factory = DataTypeFactory::instance(); + return data_type_factory.get(str); +} +} // namespace + + +TEST(TypeMapping_test, ColumnInfoToDataType) +{ + // TODO fill this test +} + +TEST(TypeMapping_test, DataTypeToColumnInfo) +try +{ + TiDB::ColumnInfo column_info; + const Strings numeric_types = {"Int8", "Int16", "Int32", "Int64"}; + for (const auto & numeric_type : numeric_types) + { + for (bool sign : {false, true}) + { + for (bool nullable : {false, true}) + { + String actual_test_type = numeric_type; + if (!sign) + actual_test_type = "U" + actual_test_type; + if (nullable) + actual_test_type = "Nullable(" + actual_test_type + ")"; + + column_info = getColumnInfoByDataType(typeFromString(actual_test_type)); + ASSERT_EQ(!sign, column_info.hasUnsignedFlag()) << actual_test_type; + ASSERT_EQ(!nullable, column_info.hasNotNullFlag()) << actual_test_type; + + if (numeric_type == numeric_types[0]) + { + ASSERT_EQ(column_info.tp, TiDB::TypeTiny) << actual_test_type; + } + else if (numeric_type == numeric_types[1]) + { + ASSERT_EQ(column_info.tp, TiDB::TypeShort) << actual_test_type; + } + else if (numeric_type == numeric_types[2]) + { + ASSERT_EQ(column_info.tp, TiDB::TypeLong) << actual_test_type; + } + else if (numeric_type == numeric_types[3]) + { + ASSERT_EQ(column_info.tp, TiDB::TypeLongLong) << actual_test_type; + } + } + } + } + + column_info = getColumnInfoByDataType(typeFromString("String")); + ASSERT_EQ(column_info.tp, TiDB::TypeString); +} +catch (const Exception & e) +{ + std::string text = e.displayText(); + auto embedded_stack_trace_pos = text.find("Stack trace"); + std::cerr << "Code: " << e.code() << ". " << text << std::endl << std::endl; + if (std::string::npos == embedded_stack_trace_pos) + std::cerr << "Stack trace:" << std::endl << e.getStackTrace().toString(); + + throw; +} + +} // namespace tests +} // namespace DB diff --git a/dbms/src/test_utils/TiflashTestBasic.h b/dbms/src/test_utils/TiflashTestBasic.h index f676ecd88f1..74947571fb3 100644 --- a/dbms/src/test_utils/TiflashTestBasic.h +++ b/dbms/src/test_utils/TiflashTestBasic.h @@ -11,9 +11,10 @@ namespace tests class TiFlashTestEnv { public: - static Context getContext() + static Context getContext(const DB::Settings &settings = DB::Settings()) { static Context context = DB::Context::createGlobal(); + context.getSettingsRef() = settings; return context; } }; diff --git a/tests/_env.sh b/tests/_env.sh index 6a15a30fab6..2faba1db422 100644 --- a/tests/_env.sh +++ b/tests/_env.sh @@ -3,9 +3,9 @@ # Executable path if [ `uname` == "Darwin" ]; then - export build_dir="../../build_clang" + export build_dir="../../build_clang" else - export build_dir="../../build" + export build_dir="../../build" fi export storage_bin="$build_dir/dbms/src/Server/theflash" diff --git a/tests/delta_merge/ddl/alter.test b/tests/delta_merge/ddl/alter.test new file mode 100644 index 00000000000..c17f2b412f5 --- /dev/null +++ b/tests/delta_merge/ddl/alter.test @@ -0,0 +1,93 @@ +>> drop table if exists dm_test + +## Prepare table +>> create table dm_test ( + a Int8, + b Int32 + ) engine = DeltaMerge((a)) + +>> insert into table dm_test values(1, 2) +>> select * from dm_test +┌─a─┬─b─┐ +│ 1 │ 2 │ +└───┴───┘ + +# drop pk is forbidden +>> alter table dm_test drop column a +Received exception from server (version 1.1.54381): +Code: 36. DB::Exception: Received from 127.0.0.1:9000. DB::Exception: Storage engine DeltaMerge doesn't support drop primary key / hidden column: a. +#>> show create table dm_test +#┌─statement───────────────────────────────────────────────────────────────┐ +#│ CREATE TABLE default.dm_test ( a Int8, b Int32) ENGINE = DeltaMerge(a) │ +#└─────────────────────────────────────────────────────────────────────────┘ + +>> alter table dm_test drop column b +#>> show create table dm_test +#┌─statement─────────────────────────────────────────────────────┐ +#│ CREATE TABLE default.dm_test ( a Int8) ENGINE = DeltaMerge(a) │ +#└───────────────────────────────────────────────────────────────┘ +>> select * from dm_test +┌─a─┐ +│ 1 │ +└───┘ + +# add a column which name is the same as before +>> alter table dm_test add column b Int32 +>> select * from dm_test +┌─a─┬─b─┐ +│ 1 │ 0 │ +└───┴───┘ + +# add another column +>> alter table dm_test add column c Int32 +>> select * from dm_test +┌─a─┬─b─┬─c─┐ +│ 1 │ 0 │ 0 │ +└───┴───┴───┘ + +## add a nullable column +>> alter table dm_test add column d Nullable(Int32) +# the old row of d is null now +>> select * from dm_test +┌─a─┬─b─┬─c─┬──d─┐ +│ 1 │ 0 │ 0 │ \N │ +└───┴───┴───┴────┘ + +>> insert into table dm_test values(2, 1024, 65535, 4096) +>> insert into table dm_test(a,b,c) values(3, 2048, 65536) +>> select * from dm_test +┌─a─┬────b─┬─────c─┬────d─┐ +│ 1 │ 0 │ 0 │ \N │ +│ 2 │ 1024 │ 65535 │ 4096 │ +│ 3 │ 2048 │ 65536 │ \N │ +└───┴──────┴───────┴──────┘ +# modify column 'c' data type from Int32 -> Int64 +>> alter table dm_test modify column c Int64 +>> select * from dm_test +┌─a─┬────b─┬─────c─┬────d─┐ +│ 1 │ 0 │ 0 │ \N │ +│ 2 │ 1024 │ 65535 │ 4096 │ +│ 3 │ 2048 │ 65536 │ \N │ +└───┴──────┴───────┴──────┘ + +# TODO rename column is not support in CH + + +## rename table +>> drop table if exists dm_test_renamed +>> rename table dm_test to dm_test_renamed +>> select * from dm_test +Received exception from server (version 1.1.54381): +Code: 60. DB::Exception: Received from 127.0.0.1:9000. DB::Exception: Table default.dm_test doesn't exist.. + +>> select * from dm_test_renamed +┌─a─┬────b─┬─────c─┬────d─┐ +│ 1 │ 0 │ 0 │ \N │ +│ 2 │ 1024 │ 65535 │ 4096 │ +│ 3 │ 2048 │ 65536 │ \N │ +└───┴──────┴───────┴──────┘ + + +## Clean up +>> drop table if exists dm_test +>> drop table if exists dm_test_renamed diff --git a/tests/delta_merge/ddl/alter_default_value.test b/tests/delta_merge/ddl/alter_default_value.test new file mode 100644 index 00000000000..46d87c37980 --- /dev/null +++ b/tests/delta_merge/ddl/alter_default_value.test @@ -0,0 +1,46 @@ +# modify column default value + +>> drop table if exists dm_test +## Prepare table +>> create table dm_test ( + pk Int8 + ) engine = DeltaMerge(pk) + +>> insert into table dm_test values(1) + +## create new column without default value. the row which pk==1, will filled with 'zero' value +>> alter table dm_test add column i0 Int32 +>> alter table dm_test add column f32_0 Float32 +>> alter table dm_test add column f64_0 Float64 +>> alter table dm_test add column dec0 Decimal(10,4) +>> alter table dm_test add column s0 String +>> alter table dm_test add column fs0 FixedString(4) +>> alter table dm_test add column dt0 DateTime + +## create new column with default value. the row which pk==1, will filled with those default value +## See FLASH-453 +#>> alter table dm_test add column s2 String default 'non-empty' + +## These alter command will throw exception now +## See FLASH-453 +#>> alter table dm_test add column i1 Int32 default 999 +#>> alter table dm_test add column f32_1 Float32 default 1.234 +#>> alter table dm_test add column f64_1 Float64 default 1.23456 +#>> alter table dm_test add column dec1 Decimal(10,4) default 3.1415 +#>> alter table dm_test add column fs1 FixedString(4) default 'aaa' +#>> alter table dm_test add column dt1 DateTime default '1999-09-09 12:34:56' + +>> select * from dm_test where pk = 1 +┌─pk─┬─i0─┬─f32_0─┬─f64_0─┬─dec0───┬─s0─┬─fs0──────┬─────────────────dt0─┐ +│ 1 │ 0 │ 0 │ 0 │ 0.0000 │ │ \0\0\0\0 │ 0000-00-00 00:00:00 │ +└────┴────┴───────┴───────┴────────┴────┴──────────┴─────────────────────┘ + +## insert a row, missing fields will be filled with default value +>> insert into table dm_test(pk) values(3) +>> select * from dm_test where pk = 3 +┌─pk─┬─i0─┬─f32_0─┬─f64_0─┬─dec0───┬─s0─┬─fs0──────┬─────────────────dt0─┐ +│ 3 │ 0 │ 0 │ 0 │ 0.0000 │ │ \0\0\0\0 │ 0000-00-00 00:00:00 │ +└────┴────┴───────┴───────┴────────┴────┴──────────┴─────────────────────┘ + +## clean up +>> drop table if exists dm_test diff --git a/tests/delta_merge/ddl/alter_joint_primary_key.test b/tests/delta_merge/ddl/alter_joint_primary_key.test new file mode 100644 index 00000000000..b13d64603a5 --- /dev/null +++ b/tests/delta_merge/ddl/alter_joint_primary_key.test @@ -0,0 +1,54 @@ +>> drop table if exists dm_test + +## tests for joint primary key +>> create table dm_test ( + a Int32, + b Int32, + c String, + d FixedString(20) + ) engine = DeltaMerge((a, b)) + +>> insert into table dm_test values(1, 2, 'hello TiFlash', 'hello world') + +# drop a part of pk is forbidden +>> alter table dm_test drop column a +Received exception from server (version 1.1.54381): +Code: 36. DB::Exception: Received from 127.0.0.1:9000. DB::Exception: Storage engine DeltaMerge doesn't support drop primary key / hidden column: a. + +>> alter table dm_test drop column b +Received exception from server (version 1.1.54381): +Code: 36. DB::Exception: Received from 127.0.0.1:9000. DB::Exception: Storage engine DeltaMerge doesn't support drop primary key / hidden column: b. + +>> select * from dm_test +┌─a─┬─b─┬─c─────────────┬─d─────────────────────────────┐ +│ 1 │ 2 │ hello TiFlash │ hello world\0\0\0\0\0\0\0\0\0 │ +└───┴───┴───────────────┴───────────────────────────────┘ +>> show create table dm_test +┌─statement─────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ CREATE TABLE default.dm_test ( a Int32, b Int32, c String, d FixedString(20)) ENGINE = DeltaMerge((a, b)) │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + + +>> alter table dm_test drop column c +#>> show create table dm_test +#┌─statement──────────────────────────────────────────────────────────────────────────────────────────┐ +#│ CREATE TABLE default.dm_test ( a Int32, b Int32, d FixedString(200)) ENGINE = DeltaMerge((a, b)) │ +#└────────────────────────────────────────────────────────────────────────────────────────────────────┘ +>> select * from dm_test +┌─a─┬─b─┬─d─────────────────────────────┐ +│ 1 │ 2 │ hello world\0\0\0\0\0\0\0\0\0 │ +└───┴───┴───────────────────────────────┘ + + +>> alter table dm_test drop column d +>> select * from dm_test +┌─a─┬─b─┐ +│ 1 │ 2 │ +└───┴───┘ +#>> show create table dm_test +#┌─statement─────────────────────────────────────────────────────────────────────┐ +#│ CREATE TABLE default.dm_test ( a Int32, b Int32) ENGINE = DeltaMerge((a, b)) │ +#└───────────────────────────────────────────────────────────────────────────────┘ + +## Clean up +>> drop table if exists dm_test diff --git a/tests/delta_merge/ddl/alter_nullable.test b/tests/delta_merge/ddl/alter_nullable.test new file mode 100644 index 00000000000..24a62086a49 --- /dev/null +++ b/tests/delta_merge/ddl/alter_nullable.test @@ -0,0 +1,101 @@ +>> drop table if exists dm_test + +## nullable -> nullable +>> drop table if exists dm_test +>> create table dm_test ( + a Int16, + b Nullable(Int8), + c Nullable(Int16), + d Nullable(String), + e Nullable(DateTime), + f Nullable(Decimal(10, 4)) + ) engine = DeltaMerge(a) + +>> insert into table dm_test values(2, 127, 2048, 'Hello TiFlash', '1999-09-09 00:00:01', 3.1415) +# -- keep some null values +>> insert into table dm_test(a) values(3) +>> insert into table dm_test values(4, -128, -4096, 'Hello TiSpark', '1999-09-09 11:11:11', 3.0) +>> select * from dm_test order by a +┌─a─┬────b─┬─────c─┬─d─────────────┬───────────────────e─┬─f──────┐ +│ 2 │ 127 │ 2048 │ Hello TiFlash │ 1999-09-09 00:00:01 │ 3.1415 │ +│ 3 │ \N │ \N │ \N │ \N │ \N │ +│ 4 │ -128 │ -4096 │ Hello TiSpark │ 1999-09-09 11:11:11 │ 3.0000 │ +└───┴──────┴───────┴───────────────┴─────────────────────┴────────┘ + +# -- Nullable(Int8) -> Nullable(Int32), Nullable(Int16 -> Int64) +>> alter table dm_test MODIFY COLUMN b Nullable(Int32) +>> alter table dm_test MODIFY COLUMN c Nullable(Int64) +>> alter table dm_test MODIFY COLUMN d Nullable(String) +>> alter table dm_test MODIFY COLUMN e Nullable(DateTime) +>> alter table dm_test MODIFY COLUMN f Nullable(Decimal(10, 4)) +>> select * from dm_test order by a +┌─a─┬────b─┬─────c─┬─d─────────────┬───────────────────e─┬─f──────┐ +│ 2 │ 127 │ 2048 │ Hello TiFlash │ 1999-09-09 00:00:01 │ 3.1415 │ +│ 3 │ \N │ \N │ \N │ \N │ \N │ +│ 4 │ -128 │ -4096 │ Hello TiSpark │ 1999-09-09 11:11:11 │ 3.0000 │ +└───┴──────┴───────┴───────────────┴─────────────────────┴────────┘ + + +## not null -> nullable +>> drop table if exists dm_test +>> create table dm_test ( + a Int16, + b Int8, + c Int16, + d String, + e DateTime, + f Decimal(10, 4) + ) engine = DeltaMerge(a) + +>> insert into table dm_test values(2, 127, 2048, 'Hello TiFlash', '1999-09-09 00:00:01', 3.1415) +>> insert into table dm_test values(4, -128, -4096, 'Hello TiSpark', '1999-09-09 11:11:11', 3.0) +>> select * from dm_test order by a +┌─a─┬────b─┬─────c─┬─d─────────────┬───────────────────e─┬─f──────┐ +│ 2 │ 127 │ 2048 │ Hello TiFlash │ 1999-09-09 00:00:01 │ 3.1415 │ +│ 4 │ -128 │ -4096 │ Hello TiSpark │ 1999-09-09 11:11:11 │ 3.0000 │ +└───┴──────┴───────┴───────────────┴─────────────────────┴────────┘ + +# -- Int8 -> Int32, Int16 -> Int64 +>> alter table dm_test MODIFY COLUMN b Nullable(Int32) +>> alter table dm_test MODIFY COLUMN c Nullable(Int64) +>> alter table dm_test MODIFY COLUMN d Nullable(String) +>> alter table dm_test MODIFY COLUMN e Nullable(DateTime) +>> alter table dm_test MODIFY COLUMN f Nullable(Decimal(10, 4)) +# -- insert some null fields +>> insert into table dm_test(a) values(3) +>> select * from dm_test order by a +┌─a─┬────b─┬─────c─┬─d─────────────┬───────────────────e─┬─f──────┐ +│ 2 │ 127 │ 2048 │ Hello TiFlash │ 1999-09-09 00:00:01 │ 3.1415 │ +│ 3 │ \N │ \N │ \N │ \N │ \N │ +│ 4 │ -128 │ -4096 │ Hello TiSpark │ 1999-09-09 11:11:11 │ 3.0000 │ +└───┴──────┴───────┴───────────────┴─────────────────────┴────────┘ + + +## nullable -> not null +>> drop table if exists dm_test +>> create table dm_test ( + a Int16, + b Nullable(Int32), + c Nullable(Int32), + d Nullable(String), + e Nullable(DateTime) + ) engine = DeltaMerge(a) + +>> insert into table dm_test values(20000, 2048, 65535, 'Hello TiFlash', '1999-09-09 00:00:00') +>> select * from dm_test +┌─────a─┬────b─┬─────c─┬─d─────────────┬───────────────────e─┐ +│ 20000 │ 2048 │ 65535 │ Hello TiFlash │ 1999-09-09 00:00:00 │ +└───────┴──────┴───────┴───────────────┴─────────────────────┘ + +>> alter table dm_test MODIFY COLUMN b Int32 +>> alter table dm_test MODIFY COLUMN c Int64 +>> alter table dm_test MODIFY COLUMN d String +>> alter table dm_test MODIFY COLUMN e DateTime +>> select * from dm_test +┌─────a─┬────b─┬─────c─┬─d─────────────┬───────────────────e─┐ +│ 20000 │ 2048 │ 65535 │ Hello TiFlash │ 1999-09-09 00:00:00 │ +└───────┴──────┴───────┴───────────────┴─────────────────────┘ + + +## Clean up +>> drop table if exists dm_test diff --git a/tests/docker/run.sh b/tests/docker/run.sh index 9408e8bb0e6..34c768c7136 100755 --- a/tests/docker/run.sh +++ b/tests/docker/run.sh @@ -23,7 +23,7 @@ rm -rf ./data ./log # (only tics0 up) docker-compose up -d --scale tics-gtest=0 --scale tiflash0=0 --scale tikv-learner0=0 --scale tikv0=0 --scale tidb0=0 --scale pd0=0 -docker-compose exec -T tics0 bash -c 'cd /tests ; ./run-test.sh mutable-test' +docker-compose exec -T tics0 bash -c 'cd /tests ; ./run-test.sh mutable-test delta_merge' docker-compose down # run gtest cases. (only tics-gtest up) diff --git a/tests/run-test.py b/tests/run-test.py index ea8afdb219e..b7ebb3e5eb7 100644 --- a/tests/run-test.py +++ b/tests/run-test.py @@ -15,6 +15,8 @@ UNFINISHED_2_PREFIX = ' ' WORD_PH = '{#WORD}' +verbose = False + class Executor: def __init__(self, dbc): self.dbc = dbc @@ -114,6 +116,7 @@ def on_line(self, line): elif line.startswith(CMD_PREFIX_TIDB): self.executor_tidb.exe(line[len(CMD_PREFIX_TIDB):]) elif line.startswith(CMD_PREFIX) or line.startswith(CMD_PREFIX_ALTER): + if verbose: print 'running', line if self.outputs != None and not matched(self.outputs, self.matches, self.fuzz): return False self.query = line[len(CMD_PREFIX):] @@ -165,6 +168,7 @@ def run(): path = sys.argv[2] fuzz = (sys.argv[3] == 'true') mysql_client = sys.argv[4] + if verbose: print 'parsing `{}`'.format(path) matched, matcher, todos = parse_exe_match(path, Executor(dbc), Executor(mysql_client), fuzz)