Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storages: Fix comparing null in MinMaxIndex #9373

Merged
merged 10 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,6 @@ RSResults MinMaxIndex::checkNullableIn(
{
const auto & column_nullable = static_cast<const ColumnNullable &>(*minmaxes);
const auto & null_map = column_nullable.getNullMapColumn();

RSResults results(pack_count, RSResult::SomeNull);
Copy link
Contributor Author

@JinheLin JinheLin Aug 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just move some results to a smaller scope.

const auto * raw_type = type.get();

#define DISPATCH(TYPE) \
Expand All @@ -284,6 +282,7 @@ RSResults MinMaxIndex::checkNullableIn(
const auto * string_column = checkAndGetColumn<ColumnString>(column_nullable.getNestedColumnPtr().get());
const auto & chars = string_column->getChars();
const auto & offsets = string_column->getOffsets();
RSResults results(pack_count, RSResult::SomeNull);
for (size_t i = start_pack; i < start_pack + pack_count; ++i)
{
if (details::minIsNull(null_map, i))
Expand Down Expand Up @@ -321,7 +320,7 @@ RSResults MinMaxIndex::checkNullableIn(
values,
type);
}
return results;
return RSResults(pack_count, RSResult::SomeNull);
}

template <typename T>
Expand Down Expand Up @@ -351,8 +350,6 @@ RSResults MinMaxIndex::checkIn(
const std::vector<Field> & values,
const DataTypePtr & type)
{
RSResults results(pack_count, RSResult::None);

const auto * raw_type = type.get();
if (typeid_cast<const DataTypeNullable *>(raw_type))
{
Expand All @@ -371,6 +368,7 @@ RSResults MinMaxIndex::checkIn(
}
if (typeid_cast<const DataTypeString *>(raw_type))
{
RSResults results(pack_count, RSResult::None);
const auto * string_column = checkAndGetColumn<ColumnString>(minmaxes.get());
const auto & chars = string_column->getChars();
const auto & offsets = string_column->getOffsets();
Expand Down Expand Up @@ -421,9 +419,8 @@ RSResults MinMaxIndex::checkCmpImpl(size_t start_pack, size_t pack_count, const
template <typename Op>
RSResults MinMaxIndex::checkCmp(size_t start_pack, size_t pack_count, const Field & value, const DataTypePtr & type)
{
RSResults results(pack_count, RSResult::None);
if (value.isNull())
return results;
return RSResults(pack_count, RSResult::NoneNull);

const auto * raw_type = type.get();
if (typeid_cast<const DataTypeNullable *>(raw_type))
Expand All @@ -445,6 +442,7 @@ RSResults MinMaxIndex::checkCmp(size_t start_pack, size_t pack_count, const Fiel
const auto * string_column = checkAndGetColumn<ColumnString>(minmaxes.get());
const auto & chars = string_column->getChars();
const auto & offsets = string_column->getOffsets();
RSResults results(pack_count, RSResult::None);
for (size_t i = start_pack; i < start_pack + pack_count; ++i)
{
if (!has_value_marks[i])
Expand Down Expand Up @@ -520,8 +518,6 @@ RSResults MinMaxIndex::checkNullableCmp(
{
const auto & column_nullable = static_cast<const ColumnNullable &>(*minmaxes);
const auto & null_map = column_nullable.getNullMapColumn();

RSResults results(pack_count, RSResult::SomeNull);
const auto * raw_type = type.get();

#define DISPATCH(TYPE) \
Expand All @@ -546,6 +542,7 @@ RSResults MinMaxIndex::checkNullableCmp(
const auto * string_column = checkAndGetColumn<ColumnString>(column_nullable.getNestedColumnPtr().get());
const auto & chars = string_column->getChars();
const auto & offsets = string_column->getOffsets();
RSResults results(pack_count, RSResult::SomeNull);
for (size_t i = start_pack; i < start_pack + pack_count; ++i)
{
if (details::minIsNull(null_map, i))
Expand Down Expand Up @@ -583,7 +580,7 @@ RSResults MinMaxIndex::checkNullableCmp(
value,
type);
}
return results;
return RSResults(pack_count, RSResult::SomeNull);
}

// If a pack only contains null marks and delete marks, checkIsNull will return RSResult::All.
Expand Down
7 changes: 4 additions & 3 deletions dbms/src/Storages/DeltaMerge/Index/RoughCheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,11 @@ struct CheckIn
{
if (result == RSResult::All)
break;
// skip null value

if (v.isNull())
continue;
result = result || CheckEqual::check<T>(v, type, min, max);
result = result || RSResult::NoneNull;
else
result = result || CheckEqual::check<T>(v, type, min, max);
}
return result;
}
Expand Down
78 changes: 78 additions & 0 deletions dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2662,4 +2662,82 @@ try
}
}
CATCH

TEST_F(MinMaxIndexTest, CheckCmpNullValue)
try
{
auto col_type = DataTypeFactory::instance().get("Int64");
auto minmax_index = createMinMaxIndex(*col_type, min_max_check_test_data);
auto rs_index = RSIndex(col_type, minmax_index);
RSCheckParam param;
param.indexes.emplace(DEFAULT_COL_ID, rs_index);

Field null_value;
ASSERT_TRUE(null_value.isNull());

auto check = [&](RSOperatorPtr rs, RSResult expected_res) {
auto results = rs->roughCheck(0, min_max_check_test_data.size(), param);
for (auto actual_res : results)
ASSERT_EQ(actual_res, expected_res) << fmt::format("actual: {}, expected: {}", actual_res, expected_res);
};

check(createGreater(attr("Int64"), null_value), RSResult::NoneNull);
check(createGreaterEqual(attr("Int64"), null_value), RSResult::NoneNull);
check(createLess(attr("Int64"), null_value), RSResult::AllNull);
check(createLessEqual(attr("Int64"), null_value), RSResult::AllNull);
check(createEqual(attr("Int64"), null_value), RSResult::NoneNull);
check(createNotEqual(attr("Int64"), null_value), RSResult::AllNull);
Comment on lines +2684 to +2689
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why "less"/"lessequal"/"noteuqal" return "AllNull" but others return "NoneNull"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because Less is transformed to not GreaterEqual in implementation...

GreaterEqual(NULL) will return RSResult::NoneNull as expected.

RSResults roughCheck(size_t start_pack, size_t pack_count, const RSCheckParam & param) override
{
auto results = minMaxCheckCmp<RoughCheck::CheckGreaterEqual>(start_pack, pack_count, param, attr, value);
std::transform(results.begin(), results.end(), results.begin(), [](RSResult result) { return !result; });
return results;
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LessEqual is transformed to not Greater.
NotEqual is not Equal.

}
CATCH

TEST_F(MinMaxIndexTest, CheckInNullValue)
try
{
auto col_type = DataTypeFactory::instance().get("Nullable(Int64)");
auto minmax_index = createMinMaxIndex(*col_type, min_max_check_test_data);
auto rs_index = RSIndex(col_type, minmax_index);
RSCheckParam param;
param.indexes.emplace(DEFAULT_COL_ID, rs_index);

Field null_value;
ASSERT_TRUE(null_value.isNull());

{
auto rs = createIn(attr("Nullable(Int64)"), {null_value});
auto results = rs->roughCheck(0, min_max_check_test_data.size(), param);
auto excepted_results = {
RSResult::NoneNull,
RSResult::NoneNull,
RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check
RSResult::NoneNull,
RSResult::NoneNull,
RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check
RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check
RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check
RSResult::NoneNull,
RSResult::NoneNull,
};
ASSERT_TRUE(std::equal(results.cbegin(), results.cend(), excepted_results.begin()));
}

{
auto rs = createIn(attr("Nullable(Int64)"), {Field{static_cast<Int64>(1)}, null_value});
auto results = rs->roughCheck(0, min_max_check_test_data.size(), param);
auto excepted_results = {
RSResult::SomeNull,
RSResult::NoneNull,
RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check
RSResult::SomeNull,
RSResult::NoneNull,
RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check
RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check
RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check
RSResult::All,
RSResult::AllNull,
};
ASSERT_TRUE(std::equal(results.cbegin(), results.cend(), excepted_results.begin()));
}
}
CATCH

} // namespace DB::DM::tests
140 changes: 140 additions & 0 deletions tests/fullstack-test-dt/expr/compare_null.test
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better add some test case about this: https://dev.mysql.com/doc/refman/8.4/en/working-with-null.html

Two NULL values are regarded as equal in a GROUP BY.
When doing an ORDER BY, NULL values are presented first if you do ORDER BY ... ASC and last if you do ORDER BY ... DESC.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Copyright 2024 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Must enable DT rough set filter and open debug level log to run this test, otherwise disable this test
mysql> drop table if exists test.t;

mysql> create table test.t (a date);

mysql> insert into test.t values('2024-08-26'),('2024-08-25'),('2024-08-24'),('2024-08-23');

mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> alter table test.t compact tiflash replica;

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a between NULL and '2024-08-25';

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where not (a between NULL and '2024-08-25');
+------------+
| a |
+------------+
| 2024-08-26 |
+------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL);

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL);

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, '2024-08-26');
+------------+
| a |
+------------+
| 2024-08-26 |
+------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, '2024-08-26');

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, '2024-09-01');

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, '2024-09-01');

mysql> drop table if exists test.t;

mysql> create table test.t (a date);

mysql> insert into test.t values('2024-08-26'),('2024-08-26'),('2024-08-26'),('2024-08-26');

mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> alter table test.t compact tiflash replica;

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, '2024-08-26');
+------------+
| a |
+------------+
| 2024-08-26 |
| 2024-08-26 |
| 2024-08-26 |
| 2024-08-26 |
+------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, '2024-08-26');

# not null

mysql> drop table if exists test.t;

mysql> create table test.t (a date not null);

mysql> insert into test.t values('2024-08-26'),('2024-08-25'),('2024-08-24'),('2024-08-23');

mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> alter table test.t compact tiflash replica;

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a between NULL and '2024-08-25';

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where not (a between NULL and '2024-08-25');
+------------+
| a |
+------------+
| 2024-08-26 |
+------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL);

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL);

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, '2024-08-26');
+------------+
| a |
+------------+
| 2024-08-26 |
+------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, '2024-08-26');

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, '2024-09-01');

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, '2024-09-01');

mysql> drop table if exists test.t;

mysql> create table test.t (a date not null);

mysql> insert into test.t values('2024-08-26'),('2024-08-26'),('2024-08-26'),('2024-08-26');

mysql> alter table test.t set tiflash replica 1;

func> wait_table test t

mysql> alter table test.t compact tiflash replica;

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a in (NULL, '2024-08-26');
+------------+
| a |
+------------+
| 2024-08-26 |
| 2024-08-26 |
| 2024-08-26 |
| 2024-08-26 |
+------------+

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t where a not in (NULL, '2024-08-26');