Skip to content

Commit

Permalink
Storages: Fix comparing null in MinMaxIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
JinheLin committed Aug 27, 2024
1 parent 81cd947 commit 322c3cd
Show file tree
Hide file tree
Showing 4 changed files with 229 additions and 13 deletions.
17 changes: 7 additions & 10 deletions dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,6 @@ RSResults MinMaxIndex::checkNullableIn(
{
const auto & column_nullable = static_cast<const ColumnNullable &>(*minmaxes);
const auto & null_map = column_nullable.getNullMapColumn();

RSResults results(pack_count, RSResult::SomeNull);
const auto * raw_type = type.get();

#define DISPATCH(TYPE) \
Expand All @@ -284,6 +282,7 @@ RSResults MinMaxIndex::checkNullableIn(
const auto * string_column = checkAndGetColumn<ColumnString>(column_nullable.getNestedColumnPtr().get());
const auto & chars = string_column->getChars();
const auto & offsets = string_column->getOffsets();
RSResults results(pack_count, RSResult::SomeNull);
for (size_t i = start_pack; i < start_pack + pack_count; ++i)
{
if (details::minIsNull(null_map, i))
Expand Down Expand Up @@ -321,7 +320,7 @@ RSResults MinMaxIndex::checkNullableIn(
values,
type);
}
return results;
return RSResults(pack_count, RSResult::SomeNull);
}

template <typename T>
Expand Down Expand Up @@ -351,8 +350,6 @@ RSResults MinMaxIndex::checkIn(
const std::vector<Field> & values,
const DataTypePtr & type)
{
RSResults results(pack_count, RSResult::None);

const auto * raw_type = type.get();
if (typeid_cast<const DataTypeNullable *>(raw_type))
{
Expand All @@ -371,6 +368,7 @@ RSResults MinMaxIndex::checkIn(
}
if (typeid_cast<const DataTypeString *>(raw_type))
{
RSResults results(pack_count, RSResult::None);
const auto * string_column = checkAndGetColumn<ColumnString>(minmaxes.get());
const auto & chars = string_column->getChars();
const auto & offsets = string_column->getOffsets();
Expand Down Expand Up @@ -421,9 +419,8 @@ RSResults MinMaxIndex::checkCmpImpl(size_t start_pack, size_t pack_count, const
template <typename Op>
RSResults MinMaxIndex::checkCmp(size_t start_pack, size_t pack_count, const Field & value, const DataTypePtr & type)
{
RSResults results(pack_count, RSResult::None);
if (value.isNull())
return results;
return RSResults(pack_count, RSResult::NoneNull);

const auto * raw_type = type.get();
if (typeid_cast<const DataTypeNullable *>(raw_type))
Expand All @@ -445,6 +442,7 @@ RSResults MinMaxIndex::checkCmp(size_t start_pack, size_t pack_count, const Fiel
const auto * string_column = checkAndGetColumn<ColumnString>(minmaxes.get());
const auto & chars = string_column->getChars();
const auto & offsets = string_column->getOffsets();
RSResults results(pack_count, RSResult::None);
for (size_t i = start_pack; i < start_pack + pack_count; ++i)
{
if (!has_value_marks[i])
Expand Down Expand Up @@ -520,8 +518,6 @@ RSResults MinMaxIndex::checkNullableCmp(
{
const auto & column_nullable = static_cast<const ColumnNullable &>(*minmaxes);
const auto & null_map = column_nullable.getNullMapColumn();

RSResults results(pack_count, RSResult::SomeNull);
const auto * raw_type = type.get();

#define DISPATCH(TYPE) \
Expand All @@ -546,6 +542,7 @@ RSResults MinMaxIndex::checkNullableCmp(
const auto * string_column = checkAndGetColumn<ColumnString>(column_nullable.getNestedColumnPtr().get());
const auto & chars = string_column->getChars();
const auto & offsets = string_column->getOffsets();
RSResults results(pack_count, RSResult::SomeNull);
for (size_t i = start_pack; i < start_pack + pack_count; ++i)
{
if (details::minIsNull(null_map, i))
Expand Down Expand Up @@ -583,7 +580,7 @@ RSResults MinMaxIndex::checkNullableCmp(
value,
type);
}
return results;
return RSResults(pack_count, RSResult::SomeNull);
}

// If a pack only contains null marks and delete marks, checkIsNull will return RSResult::All.
Expand Down
7 changes: 4 additions & 3 deletions dbms/src/Storages/DeltaMerge/Index/RoughCheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,11 @@ struct CheckIn
{
if (result == RSResult::All)
break;
// skip null value

if (v.isNull())
continue;
result = result || CheckEqual::check<T>(v, type, min, max);
result = result || RSResult::NoneNull;
else
result = result || CheckEqual::check<T>(v, type, min, max);
}
return result;
}
Expand Down
78 changes: 78 additions & 0 deletions dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2662,4 +2662,82 @@ try
}
}
CATCH

TEST_F(MinMaxIndexTest, CheckCmpNullValue)
try
{
auto col_type = DataTypeFactory::instance().get("Int64");
auto minmax_index = createMinMaxIndex(*col_type, min_max_check_test_data);
auto rs_index = RSIndex(col_type, minmax_index);
RSCheckParam param;
param.indexes.emplace(DEFAULT_COL_ID, rs_index);

Field null_value;
ASSERT_TRUE(null_value.isNull());

auto check = [&](RSOperatorPtr rs, RSResult expected_res) {
auto results = rs->roughCheck(0, min_max_check_test_data.size(), param);
for (auto actual_res : results)
ASSERT_EQ(actual_res, expected_res) << fmt::format("actual: {}, expected: {}", actual_res, expected_res);
};

check(createGreater(attr("Int64"), null_value), RSResult::NoneNull);
check(createGreaterEqual(attr("Int64"), null_value), RSResult::NoneNull);
check(createLess(attr("Int64"), null_value), RSResult::AllNull);
check(createLessEqual(attr("Int64"), null_value), RSResult::AllNull);
check(createEqual(attr("Int64"), null_value), RSResult::NoneNull);
check(createNotEqual(attr("Int64"), null_value), RSResult::AllNull);
}
CATCH

TEST_F(MinMaxIndexTest, CheckInNullValue)
try
{
auto col_type = DataTypeFactory::instance().get("Nullable(Int64)");
auto minmax_index = createMinMaxIndex(*col_type, min_max_check_test_data);
auto rs_index = RSIndex(col_type, minmax_index);
RSCheckParam param;
param.indexes.emplace(DEFAULT_COL_ID, rs_index);

Field null_value;
ASSERT_TRUE(null_value.isNull());

{
auto rs = createIn(attr("Nullable(Int64)"), {null_value});
auto results = rs->roughCheck(0, min_max_check_test_data.size(), param);
auto excepted_results = {
RSResult::NoneNull,
RSResult::NoneNull,
RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check
RSResult::NoneNull,
RSResult::NoneNull,
RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check
RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check
RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check
RSResult::NoneNull,
RSResult::NoneNull,
};
ASSERT_TRUE(std::equal(results.cbegin(), results.cend(), excepted_results.begin()));
}

{
auto rs = createIn(attr("Nullable(Int64)"), {Field{static_cast<Int64>(1)}, null_value});
auto results = rs->roughCheck(0, min_max_check_test_data.size(), param);
auto excepted_results = {
RSResult::SomeNull,
RSResult::NoneNull,
RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check
RSResult::SomeNull,
RSResult::NoneNull,
RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check
RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check
RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check
RSResult::All,
RSResult::AllNull,
};
ASSERT_TRUE(std::equal(results.cbegin(), results.cend(), excepted_results.begin()));
}
}
CATCH

} // namespace DB::DM::tests
140 changes: 140 additions & 0 deletions tests/fullstack-test-dt/expr/compare_null.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Copyright 2023 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Must enable DT rough set filter and open debug level log to run this test, otherwise disable this test
mysql> drop table if exists test.t;

mysql> create table test.t (a datetime);

mysql> insert into test.t values("2024-08-26"),("2024-08-25"),("2024-08-24"),("2024-08-23");

mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> alter table test.t compact tiflash replica;

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a between NULL and '2024-08-25';

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where not (a between NULL and '2024-08-25');
+---------------------+
| a |
+---------------------+
| 2024-08-26 00:00:00 |
+---------------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL);

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL);

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, "2024-08-26");
+---------------------+
| a |
+---------------------+
| 2024-08-26 00:00:00 |
+---------------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, "2024-08-26");

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, "2024-09-01");

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, "2024-09-01");

mysql> drop table if exists test.t;

mysql> create table test.t (a datetime);

mysql> insert into test.t values("2024-08-26"),("2024-08-26"),("2024-08-26"),("2024-08-26");

mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> alter table test.t compact tiflash replica;

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, "2024-08-26");
+---------------------+
| a |
+---------------------+
| 2024-08-26 00:00:00 |
| 2024-08-26 00:00:00 |
| 2024-08-26 00:00:00 |
| 2024-08-26 00:00:00 |
+---------------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, "2024-08-26");

# not null

mysql> drop table if exists test.t;

mysql> create table test.t (a datetime not null);

mysql> insert into test.t values("2024-08-26"),("2024-08-25"),("2024-08-24"),("2024-08-23");

mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> alter table test.t compact tiflash replica;

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a between NULL and '2024-08-25';

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where not (a between NULL and '2024-08-25');
+---------------------+
| a |
+---------------------+
| 2024-08-26 00:00:00 |
+---------------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL);

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL);

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, "2024-08-26");
+---------------------+
| a |
+---------------------+
| 2024-08-26 00:00:00 |
+---------------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, "2024-08-26");

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, "2024-09-01");

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, "2024-09-01");

mysql> drop table if exists test.t;

mysql> create table test.t (a datetime not null);

mysql> insert into test.t values("2024-08-26"),("2024-08-26"),("2024-08-26"),("2024-08-26");

mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> alter table test.t compact tiflash replica;

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, "2024-08-26");
+---------------------+
| a |
+---------------------+
| 2024-08-26 00:00:00 |
| 2024-08-26 00:00:00 |
| 2024-08-26 00:00:00 |
| 2024-08-26 00:00:00 |
+---------------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, "2024-08-26");

0 comments on commit 322c3cd

Please sign in to comment.