From 9bdb5584e777b6402648c2f9f5b4fcf351f4fbae Mon Sep 17 00:00:00 2001 From: JaySon Date: Fri, 16 Sep 2022 15:21:00 +0800 Subject: [PATCH] This is an automated cherry-pick of #5899 Signed-off-by: ti-chi-bot --- dbms/src/Storages/Transaction/RowCodec.cpp | 21 +++- .../Transaction/tests/RowCodecTestUtils.h | 45 ++++--- .../tests/gtest_region_block_reader.cpp | 116 +++++++++++++----- dbms/src/TestUtils/TiFlashTestBasic.cpp | 14 +++ dbms/src/TestUtils/TiFlashTestBasic.h | 9 ++ 5 files changed, 149 insertions(+), 56 deletions(-) diff --git a/dbms/src/Storages/Transaction/RowCodec.cpp b/dbms/src/Storages/Transaction/RowCodec.cpp index 427544a0467..54c90920da6 100644 --- a/dbms/src/Storages/Transaction/RowCodec.cpp +++ b/dbms/src/Storages/Transaction/RowCodec.cpp @@ -194,10 +194,18 @@ struct RowEncoderV2 /// Cache encoded individual columns. for (size_t i_col = 0, i_val = 0; i_col < table_info.columns.size(); i_col++) { + if (i_val == fields.size()) + break; + const auto & column_info = table_info.columns[i_col]; const auto & field = fields[i_val]; if ((table_info.pk_is_handle || table_info.is_common_handle) && column_info.hasPriKeyFlag()) + { + // for common handle/pk is handle table, + // the field with primary key flag is usually encoded to key instead of value continue; + } + if (column_info.id > std::numeric_limits::ColumnIDType>::max()) is_big = true; if (!field.isNull()) @@ -213,9 +221,6 @@ struct RowEncoderV2 null_column_ids.emplace(column_info.id); } i_val++; - - if (i_val == fields.size()) - break; } is_big = is_big || value_length > std::numeric_limits::ValueOffsetType>::max(); @@ -376,8 +381,14 @@ bool appendRowV2ToBlockImpl( else is_null = id_null < null_column_ids.size(); +<<<<<<< HEAD auto next_datum_column_id = is_null ? null_column_ids[id_null] : not_null_column_ids[id_not_null]; if (column_ids_iter->first > next_datum_column_id) +======= + auto next_datum_column_id = is_null ? null_column_ids[idx_null] : not_null_column_ids[idx_not_null]; + const auto next_column_id = column_ids_iter->first; + if (next_column_id > next_datum_column_id) +>>>>>>> aae88b120d (tests: Fix RegionBlockReaderTest helper functions (#5899)) { // extra column if (!force_decode) @@ -387,7 +398,7 @@ bool appendRowV2ToBlockImpl( else id_not_null++; } - else if (column_ids_iter->first < next_datum_column_id) + else if (next_column_id < next_datum_column_id) { const auto & column_info = column_infos[column_ids_iter->second]; if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, force_decode)) @@ -399,7 +410,7 @@ bool appendRowV2ToBlockImpl( { // if pk_handle_id is a valid column id, then it means the table's pk_is_handle is true // we can just ignore the pk value encoded in value part - if (unlikely(column_ids_iter->first == pk_handle_id)) + if (unlikely(next_column_id == pk_handle_id)) { column_ids_iter++; block_column_pos++; diff --git a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h index 34e0d3d4104..0cc5bfe6bff 100644 --- a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h +++ b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h @@ -15,7 +15,9 @@ #pragma once #include #include +#include #include +#include namespace DB::tests { @@ -146,7 +148,7 @@ struct ColumnIDValue { static constexpr bool value_is_null = true; using ValueType = std::decay_t; - ColumnIDValue(ColumnID id_) + explicit ColumnIDValue(ColumnID id_) : id(id_) {} ColumnID id; @@ -211,46 +213,55 @@ void getTableInfoFieldsInternal(OrderedColumnInfoFields & column_info_fields, Ty } template -std::pair> getTableInfoAndFields(ColumnIDs handle_ids, bool is_common_handle, Types &&... column_value_ids) +std::pair> getTableInfoAndFields(ColumnIDs pk_col_ids, bool is_common_handle, Types &&... column_value_ids) { OrderedColumnInfoFields column_info_fields; getTableInfoFieldsInternal(column_info_fields, std::forward(column_value_ids)...); TableInfo table_info; std::vector fields; + bool pk_is_handle = pk_col_ids.size() == 1 && pk_col_ids[0] != ::DB::TiDBPkColumnID; + for (auto & column_info_field : column_info_fields) { auto & column = std::get<0>(column_info_field.second); auto & field = std::get<1>(column_info_field.second); - if (std::find(handle_ids.begin(), handle_ids.end(), column.id) != handle_ids.end()) + if (std::find(pk_col_ids.begin(), pk_col_ids.end(), column.id) != pk_col_ids.end()) { column.setPriKeyFlag(); + if (column.tp != TiDB::TypeLong && column.tp != TiDB::TypeTiny && column.tp != TiDB::TypeLongLong && column.tp != TiDB::TypeShort && column.tp != TiDB::TypeInt24) + { + pk_is_handle = false; + } } table_info.columns.emplace_back(std::move(column)); fields.emplace_back(std::move(field)); } - if (!is_common_handle) - { - if (handle_ids[0] != EXTRA_HANDLE_COLUMN_ID) - table_info.pk_is_handle = true; - } - else + + table_info.pk_is_handle = pk_is_handle; + table_info.is_common_handle = is_common_handle; + if (is_common_handle) { table_info.is_common_handle = true; - TiDB::IndexInfo index_info; - for (auto handle_id : handle_ids) + // TiFlash maintains the column name of primary key + // for common handle table + TiDB::IndexInfo pk_index_info; + pk_index_info.is_primary = true; + pk_index_info.idx_name = "PRIMARY"; + pk_index_info.is_unique = true; + for (auto pk_col_id : pk_col_ids) { TiDB::IndexColumnInfo index_column_info; for (auto & column : table_info.columns) { - if (column.id == handle_id) + if (column.id == pk_col_id) { index_column_info.name = column.name; break; } } - index_info.idx_cols.emplace_back(index_column_info); + pk_index_info.idx_cols.emplace_back(index_column_info); } - table_info.index_infos.emplace_back(index_info); + table_info.index_infos.emplace_back(pk_index_info); } return std::make_pair(std::move(table_info), std::move(fields)); @@ -272,7 +283,7 @@ inline DecodingStorageSchemaSnapshotConstPtr getDecodingStorageSchemaSnapshot(co store_columns.emplace_back(VERSION_COLUMN_ID, VERSION_COLUMN_NAME, VERSION_COLUMN_TYPE); store_columns.emplace_back(TAG_COLUMN_ID, TAG_COLUMN_NAME, TAG_COLUMN_TYPE); ColumnID handle_id = EXTRA_HANDLE_COLUMN_ID; - for (auto & column_info : table_info.columns) + for (const auto & column_info : table_info.columns) { if (table_info.pk_is_handle) { @@ -301,7 +312,7 @@ size_t valueStartPos(const TableInfo & table_info) inline Block decodeRowToBlock(const String & row_value, DecodingStorageSchemaSnapshotConstPtr decoding_schema) { - auto & sorted_column_id_with_pos = decoding_schema->sorted_column_id_with_pos; + const auto & sorted_column_id_with_pos = decoding_schema->sorted_column_id_with_pos; auto iter = sorted_column_id_with_pos.begin(); const size_t value_column_num = 3; // skip first three column which is EXTRA_HANDLE_COLUMN, VERSION_COLUMN, TAG_COLUMN @@ -347,4 +358,4 @@ T getValueByRowV1(const T & v) return static_cast(std::move((*block.getByPosition(0).column)[0].template safeGet())); } -} // namespace DB::tests \ No newline at end of file +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp index d08b4dd3738..dd58f166dac 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp @@ -12,19 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include +#include #include -#include - -#include "RowCodecTestUtils.h" +#include +#include +#include +#include using TableInfo = TiDB::TableInfo; namespace DB::tests { using ColumnIDs = std::vector; -class RegionBlockReaderTestFixture : public ::testing::Test +class RegionBlockReaderTest : public ::testing::Test { +public: + RegionBlockReaderTest() + : logger(Logger::get("RegionBlockReaderTest")) + {} + protected: Int64 handle_value = 100; UInt8 del_mark_value = 0; @@ -34,6 +43,8 @@ class RegionBlockReaderTestFixture : public ::testing::Test RegionDataReadInfoList data_list_read; std::unordered_map fields_map; + LoggerPtr logger; + enum RowEncodeVersion { RowV1, @@ -49,7 +60,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test void TearDown() override {} - void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) + void encodeColumns(const TableInfo & table_info, const std::vector & fields, RowEncodeVersion row_version) { // for later check std::unordered_map column_name_columns_index_map; @@ -59,17 +70,24 @@ class RegionBlockReaderTestFixture : public ::testing::Test column_name_columns_index_map.emplace(table_info.columns[i].name, i); } - std::vector value_fields; - std::vector pk_fields; + std::vector value_encode_fields; + std::vector key_encode_fields; for (size_t i = 0; i < table_info.columns.size(); i++) { - if (!table_info.columns[i].hasPriKeyFlag()) - value_fields.emplace_back(fields[i]); + if (table_info.is_common_handle || table_info.pk_is_handle) + { + if (!table_info.columns[i].hasPriKeyFlag()) + value_encode_fields.emplace_back(fields[i]); + else + key_encode_fields.emplace_back(fields[i]); + } else - pk_fields.emplace_back(fields[i]); + { + value_encode_fields.emplace_back(fields[i]); + } } - // create PK + // create the RawTiDBPK section of encoded key WriteBufferFromOwnString pk_buf; if (table_info.is_common_handle) { @@ -77,7 +95,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) { auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; - EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); + EncodeDatum(key_encode_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); } } else @@ -85,21 +103,22 @@ class RegionBlockReaderTestFixture : public ::testing::Test DB::EncodeInt64(handle_value, pk_buf); } RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; - // create value + + // create encoded value WriteBufferFromOwnString value_buf; if (row_version == RowEncodeVersion::RowV1) { - encodeRowV1(table_info, value_fields, value_buf); + encodeRowV1(table_info, value_encode_fields, value_buf); } else if (row_version == RowEncodeVersion::RowV2) { - encodeRowV2(table_info, value_fields, value_buf); + encodeRowV2(table_info, value_encode_fields, value_buf); } else { throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); } - auto row_value = std::make_shared(std::move(value_buf.str())); + auto row_value = std::make_shared(value_buf.releaseStr()); for (size_t i = 0; i < rows; i++) data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } @@ -112,6 +131,14 @@ class RegionBlockReaderTestFixture : public ::testing::Test for (size_t pos = 0; pos < block.columns(); pos++) { const auto & column_element = block.getByPosition(pos); + auto gen_error_log = [&]() { + return fmt::format( + " when checking column\n id={}, name={}, nrow={}\n decoded block is:\n{}\n", + column_element.column_id, + column_element.name, + row, + getColumnsContent(block.getColumnsWithTypeAndName())); + }; if (row == 0) { ASSERT_EQ(column_element.column->size(), rows); @@ -120,24 +147,24 @@ class RegionBlockReaderTestFixture : public ::testing::Test { if (decoding_schema->is_common_handle) { - ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))); + ASSERT_FIELD_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))) << gen_error_log(); } else { - ASSERT_EQ((*column_element.column)[row], Field(handle_value)); + ASSERT_FIELD_EQ((*column_element.column)[row], Field(handle_value)) << gen_error_log(); } } else if (column_element.name == VERSION_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(version_value)); + ASSERT_FIELD_EQ((*column_element.column)[row], Field(version_value)) << gen_error_log(); } else if (column_element.name == TAG_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))); + ASSERT_FIELD_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))) << gen_error_log(); } else { - ASSERT_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)); + ASSERT_FIELD_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)) << gen_error_log(); } } } @@ -154,10 +181,10 @@ class RegionBlockReaderTestFixture : public ::testing::Test return true; } - std::pair> getNormalTableInfoFields(const ColumnIDs & handle_ids, bool is_common_handle) const + std::pair> getNormalTableInfoFields(const ColumnIDs & pk_col_ids, bool is_common_handle) const { return getTableInfoAndFields( - handle_ids, + pk_col_ids, is_common_handle, ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), @@ -241,31 +268,45 @@ class RegionBlockReaderTestFixture : public ::testing::Test } }; -TEST_F(RegionBlockReaderTestFixture, PKIsNotHandle) +TEST_F(RegionBlockReaderTest, PKIsNotHandle) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + ASSERT_EQ(table_info.is_common_handle, false); + ASSERT_EQ(table_info.pk_is_handle, false); + ASSERT_FALSE(table_info.getColumnInfo(2).hasPriKeyFlag()); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, PKIsHandle) +TEST_F(RegionBlockReaderTest, PKIsHandle) { auto [table_info, fields] = getNormalTableInfoFields({2}, false); + ASSERT_EQ(table_info.is_common_handle, false); + ASSERT_EQ(table_info.pk_is_handle, true); + ASSERT_TRUE(table_info.getColumnInfo(2).hasPriKeyFlag()); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, CommonHandle) +TEST_F(RegionBlockReaderTest, CommonHandle) { auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); + ASSERT_EQ(table_info.is_common_handle, true); + ASSERT_EQ(table_info.pk_is_handle, false); + ASSERT_TRUE(table_info.getColumnInfo(2).hasPriKeyFlag()); + ASSERT_TRUE(table_info.getColumnInfo(3).hasPriKeyFlag()); + ASSERT_TRUE(table_info.getColumnInfo(4).hasPriKeyFlag()); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, MissingColumnRowV2) +TEST_F(RegionBlockReaderTest, MissingColumnRowV2) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2); @@ -274,7 +315,7 @@ TEST_F(RegionBlockReaderTestFixture, MissingColumnRowV2) ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, false)); } -TEST_F(RegionBlockReaderTestFixture, MissingColumnRowV1) +TEST_F(RegionBlockReaderTest, MissingColumnRowV1) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV1); @@ -283,7 +324,7 @@ TEST_F(RegionBlockReaderTestFixture, MissingColumnRowV1) ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, false)); } -TEST_F(RegionBlockReaderTestFixture, ExtraColumnRowV2) +TEST_F(RegionBlockReaderTest, ExtraColumnRowV2) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2); @@ -293,7 +334,7 @@ TEST_F(RegionBlockReaderTestFixture, ExtraColumnRowV2) ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, ExtraColumnRowV1) +TEST_F(RegionBlockReaderTest, ExtraColumnRowV1) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV1); @@ -303,7 +344,7 @@ TEST_F(RegionBlockReaderTestFixture, ExtraColumnRowV1) ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, OverflowColumnRowV2) +TEST_F(RegionBlockReaderTest, OverflowColumnRowV2) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2); @@ -316,7 +357,7 @@ TEST_F(RegionBlockReaderTestFixture, OverflowColumnRowV2) ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, OverflowColumnRowV1) +TEST_F(RegionBlockReaderTest, OverflowColumnRowV1) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV1); @@ -329,17 +370,24 @@ TEST_F(RegionBlockReaderTestFixture, OverflowColumnRowV1) ASSERT_TRUE(decodeAndCheckColumns(decoding_schema, true)); } -TEST_F(RegionBlockReaderTestFixture, InvalidNULLRowV2) +TEST_F(RegionBlockReaderTest, InvalidNULLRowV2) +try { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + ASSERT_FALSE(table_info.getColumnInfo(11).hasNotNullFlag()); // col 11 is nullable + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + auto new_table_info = getTableInfoFieldsForInvalidNULLTest({EXTRA_HANDLE_COLUMN_ID}, false); + ASSERT_TRUE(new_table_info.getColumnInfo(11).hasNotNullFlag()); // col 11 is not null + auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info); ASSERT_FALSE(decodeAndCheckColumns(new_decoding_schema, false)); ASSERT_ANY_THROW(decodeAndCheckColumns(new_decoding_schema, true)); } +CATCH -TEST_F(RegionBlockReaderTestFixture, InvalidNULLRowV1) +TEST_F(RegionBlockReaderTest, InvalidNULLRowV1) { auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV1); diff --git a/dbms/src/TestUtils/TiFlashTestBasic.cpp b/dbms/src/TestUtils/TiFlashTestBasic.cpp index 18c166f453e..47dc82f983a 100644 --- a/dbms/src/TestUtils/TiFlashTestBasic.cpp +++ b/dbms/src/TestUtils/TiFlashTestBasic.cpp @@ -13,6 +13,8 @@ // limitations under the License. #include +#include +#include namespace DB::tests { @@ -27,4 +29,16 @@ ::testing::AssertionResult DataTypeCompare( else return ::testing::internal::EqFailure(lhs_expr, rhs_expr, lhs->getName(), rhs->getName(), false); } + +::testing::AssertionResult fieldCompare( + const char * lhs_expr, + const char * rhs_expr, + const Field & lhs, + const Field & rhs) +{ + if (lhs == rhs) + return ::testing::AssertionSuccess(); + return ::testing::internal::EqFailure(lhs_expr, rhs_expr, lhs.toString(), rhs.toString(), false); +} + } // namespace DB::tests diff --git a/dbms/src/TestUtils/TiFlashTestBasic.h b/dbms/src/TestUtils/TiFlashTestBasic.h index 91c2cc1d061..8a9202929dc 100644 --- a/dbms/src/TestUtils/TiFlashTestBasic.h +++ b/dbms/src/TestUtils/TiFlashTestBasic.h @@ -83,6 +83,15 @@ ::testing::AssertionResult DataTypeCompare( #define ASSERT_DATATYPE_EQ(val1, val2) ASSERT_PRED_FORMAT2(::DB::tests::DataTypeCompare, val1, val2) #define EXPECT_DATATYPE_EQ(val1, val2) EXPECT_PRED_FORMAT2(::DB::tests::DataTypeCompare, val1, val2) +::testing::AssertionResult fieldCompare( + const char * lhs_expr, + const char * rhs_expr, + const Field & lhs, + const Field & rhs); + +#define ASSERT_FIELD_EQ(val1, val2) ASSERT_PRED_FORMAT2(::DB::tests::fieldCompare, val1, val2) +#define EXPECT_FIELD_EQ(val1, val2) EXPECT_PRED_FORMAT2(::DB::tests::fieldCompare, val1, val2) + // A simple helper for getting DataType from type name inline DataTypePtr typeFromString(const String & str) {