Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: regard NULL as point when accessing composite index #30244

Merged
merged 10 commits into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions distsql/request_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,8 @@ func encodeIndexKey(sc *stmtctx.StatementContext, ran *ranger.Range) ([]byte, []
}
}

// NOTE: this is a hard-code operation to avoid wrong results when accessing unique index with NULL;
// Please see https://github.com/pingcap/tidb/issues/29650 for more details
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will we remove the hard-code operation in the following PRs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I plan to remove this hard-code operation with the switch tidb_regard_null_as_point together a few months later if there is no new bug.

if hasNull {
// Append 0 to make unique-key range [null, null] to be a scan rather than point-get.
high = kv.Key(high).Next()
Expand Down
3 changes: 2 additions & 1 deletion planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,8 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter
if canConvertPointGet {
allRangeIsPoint := true
for _, ran := range path.Ranges {
if !ran.IsPoint(ds.ctx) {
if !ran.IsPointNonNullable(ds.ctx) {
// unique indexes can have duplicated NULL rows so we cannot use PointGet if there is NULL
allRangeIsPoint = false
break
}
Expand Down
44 changes: 44 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4835,6 +4835,50 @@ func (s *testIntegrationSerialSuite) TestRejectSortForMPP(c *C) {
}
}

func (s *testIntegrationSerialSuite) TestRegardNULLAsPoint(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")

tk.MustExec("drop table if exists tpk")
tk.MustExec(`create table tuk (a int, b int, c int, unique key (a, b, c))`)
tk.MustExec(`create table tik (a int, b int, c int, key (a, b, c))`)
for _, va := range []string{"NULL", "1"} {
for _, vb := range []string{"NULL", "1"} {
for _, vc := range []string{"NULL", "1"} {
tk.MustExec(fmt.Sprintf(`insert into tuk values (%v, %v, %v)`, va, vb, vc))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it is better to insert each value twice expect (1,1,1). In this way we can check whether it returns multiple rows (expected) or just a single row(wrong).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

tk.MustExec(fmt.Sprintf(`insert into tik values (%v, %v, %v)`, va, vb, vc))
}
}
}

var input []string
var output []struct {
SQL string
PlanEnabled []string
PlanDisabled []string
Result []string
}
s.testData.GetTestCases(c, &input, &output)
for i, tt := range input {
s.testData.OnRecord(func() {
output[i].SQL = tt
tk.MustExec(`set @@session.tidb_regard_null_as_point=true`)
output[i].PlanEnabled = s.testData.ConvertRowsToStrings(tk.MustQuery("explain " + tt).Rows())
output[i].Result = s.testData.ConvertRowsToStrings(tk.MustQuery(tt).Rows())

tk.MustExec(`set @@session.tidb_regard_null_as_point=false`)
output[i].PlanDisabled = s.testData.ConvertRowsToStrings(tk.MustQuery("explain " + tt).Rows())
})
tk.MustExec(`set @@session.tidb_regard_null_as_point=true`)
tk.MustQuery("explain " + tt).Check(testkit.Rows(output[i].PlanEnabled...))
tk.MustQuery(tt).Check(testkit.Rows(output[i].Result...))

tk.MustExec(`set @@session.tidb_regard_null_as_point=false`)
tk.MustQuery("explain " + tt).Check(testkit.Rows(output[i].PlanDisabled...))
tk.MustQuery(tt).Check(testkit.Rows(output[i].Result...))
}
}

func (s *testIntegrationSuite) TestIssues29711(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
Expand Down
3 changes: 3 additions & 0 deletions planner/core/partition_pruner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ func (s *testPartitionPruneSuit) TestListPartitionPruner(c *C) {
tk.MustExec("use test_partition")
tk.Se.GetSessionVars().EnableClusteredIndex = variable.ClusteredIndexDefModeIntOnly
tk.MustExec("set @@session.tidb_enable_list_partition = ON")
tk.MustExec(`set @@session.tidb_regard_null_as_point=false`)
tk.MustExec("create table t1 (id int, a int, b int ) partition by list ( a ) (partition p0 values in (1,2,3,4,5), partition p1 values in (6,7,8,9,10,null));")
tk.MustExec("create table t2 (a int, id int, b int) partition by list (a*3 + b - 2*a - b) (partition p0 values in (1,2,3,4,5), partition p1 values in (6,7,8,9,10,null));")
tk.MustExec("create table t3 (b int, id int, a int) partition by list columns (a) (partition p0 values in (1,2,3,4,5), partition p1 values in (6,7,8,9,10,null));")
Expand Down Expand Up @@ -169,6 +170,7 @@ func (s *testPartitionPruneSuit) TestListColumnsPartitionPruner(c *C) {
// tk1 use to test partition table with index.
tk1 := testkit.NewTestKit(c, s.store)
tk1.MustExec("drop database if exists test_partition_1;")
tk1.MustExec(`set @@session.tidb_regard_null_as_point=false`)
tk1.MustExec("create database test_partition_1")
tk1.MustExec("use test_partition_1")
tk1.MustExec("set @@session.tidb_enable_list_partition = ON")
Expand All @@ -180,6 +182,7 @@ func (s *testPartitionPruneSuit) TestListColumnsPartitionPruner(c *C) {
// tk2 use to compare the result with normal table.
tk2 := testkit.NewTestKit(c, s.store)
tk2.MustExec("drop database if exists test_partition_2;")
tk2.MustExec(`set @@session.tidb_regard_null_as_point=false`)
tk2.MustExec("create database test_partition_2")
tk2.MustExec("use test_partition_2")
tk2.MustExec("create table t1 (id int, a int, b int)")
Expand Down
16 changes: 16 additions & 0 deletions planner/core/plan_to_pb.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,22 @@ func checkCoverIndex(idx *model.IndexInfo, ranges []*ranger.Range) bool {
if len(rg.LowVal) != len(idx.Columns) {
return false
}
if !rg.LowExclude {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to check whether null exists if rg.LowExclude or rg.HighExclude?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

for _, v := range rg.LowVal {
if v.IsNull() {
// a unique index may have duplicated rows with NULLs, so we cannot set the unique attribute to true when the range has NULL
// please see https://github.com/pingcap/tidb/issues/29650 for more details
return false
}
}
}
if !rg.HighExclude {
for _, v := range rg.HighVal {
if v.IsNull() {
return false
}
}
}
}
return true
}
Expand Down
15 changes: 15 additions & 0 deletions planner/core/testdata/integration_serial_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,21 @@
]

},
{
"name": "TestRegardNULLAsPoint",
"cases": [
"select * from tuk where a<=>null and b=1",
"select * from tik where a<=>null and b=1",
"select * from tuk where a<=>null and b=1 and c=1",
"select * from tik where a<=>null and b=1 and c=1",
"select * from tuk where a=1 and b<=>null and c=1",
"select * from tik where a=1 and b<=>null and c=1",
"select * from tuk where a<=>null and b<=>null and c=1",
"select * from tik where a<=>null and b<=>null and c=1",
"select * from tuk where a<=>null and b<=>null and c<=>null",
"select * from tik where a<=>null and b<=>null and c<=>null"
]
},
{
"name": "TestPushDownToTiFlashWithKeepOrder",
"cases": [
Expand Down
157 changes: 157 additions & 0 deletions planner/core/testdata/integration_serial_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,163 @@
}
]
},
{
"Name": "TestRegardNULLAsPoint",
"Cases": [
{
"SQL": "select * from tuk where a<=>null and b=1",
"PlanEnabled": [
"IndexReader_6 0.10 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 0.10 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL 1,NULL 1], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.01 root index:Selection_6",
"└─Selection_6 0.01 cop[tikv] eq(test.tuk.b, 1)",
" └─IndexRangeScan_5 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
],
"Result": [
"<nil> 1 <nil>",
"<nil> 1 1"
]
},
{
"SQL": "select * from tik where a<=>null and b=1",
"PlanEnabled": [
"IndexReader_6 0.10 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 0.10 cop[tikv] table:tik, index:a(a, b, c) range:[NULL 1,NULL 1], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.01 root index:Selection_6",
"└─Selection_6 0.01 cop[tikv] eq(test.tik.b, 1)",
" └─IndexRangeScan_5 10.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
],
"Result": [
"<nil> 1 <nil>",
"<nil> 1 1"
]
},
{
"SQL": "select * from tuk where a<=>null and b=1 and c=1",
"PlanEnabled": [
"IndexReader_6 1.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL 1 1,NULL 1 1], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.00 root index:Selection_6",
"└─Selection_6 0.00 cop[tikv] eq(test.tuk.b, 1), eq(test.tuk.c, 1)",
" └─IndexRangeScan_5 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
],
"Result": [
"<nil> 1 1"
]
},
{
"SQL": "select * from tik where a<=>null and b=1 and c=1",
"PlanEnabled": [
"IndexReader_6 0.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 0.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL 1 1,NULL 1 1], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.00 root index:Selection_6",
"└─Selection_6 0.00 cop[tikv] eq(test.tik.b, 1), eq(test.tik.c, 1)",
" └─IndexRangeScan_5 10.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
],
"Result": [
"<nil> 1 1"
]
},
{
"SQL": "select * from tuk where a=1 and b<=>null and c=1",
"PlanEnabled": [
"IndexReader_6 1.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.00 cop[tikv] table:tuk, index:a(a, b, c) range:[1 NULL 1,1 NULL 1], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.00 root index:Selection_6",
"└─Selection_6 0.00 cop[tikv] eq(test.tuk.c, 1)",
" └─IndexRangeScan_5 0.10 cop[tikv] table:tuk, index:a(a, b, c) range:[1 NULL,1 NULL], keep order:false, stats:pseudo"
],
"Result": [
"1 <nil> 1"
]
},
{
"SQL": "select * from tik where a=1 and b<=>null and c=1",
"PlanEnabled": [
"IndexReader_6 0.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 0.00 cop[tikv] table:tik, index:a(a, b, c) range:[1 NULL 1,1 NULL 1], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.00 root index:Selection_6",
"└─Selection_6 0.00 cop[tikv] eq(test.tik.c, 1)",
" └─IndexRangeScan_5 0.10 cop[tikv] table:tik, index:a(a, b, c) range:[1 NULL,1 NULL], keep order:false, stats:pseudo"
],
"Result": [
"1 <nil> 1"
]
},
{
"SQL": "select * from tuk where a<=>null and b<=>null and c=1",
"PlanEnabled": [
"IndexReader_6 1.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL NULL 1,NULL NULL 1], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.00 root index:Selection_6",
"└─Selection_6 0.00 cop[tikv] eq(test.tuk.c, 1), nulleq(test.tuk.b, NULL)",
" └─IndexRangeScan_5 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
],
"Result": [
"<nil> <nil> 1"
]
},
{
"SQL": "select * from tik where a<=>null and b<=>null and c=1",
"PlanEnabled": [
"IndexReader_6 0.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 0.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL NULL 1,NULL NULL 1], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.00 root index:Selection_6",
"└─Selection_6 0.00 cop[tikv] eq(test.tik.c, 1), nulleq(test.tik.b, NULL)",
" └─IndexRangeScan_5 10.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
],
"Result": [
"<nil> <nil> 1"
]
},
{
"SQL": "select * from tuk where a<=>null and b<=>null and c<=>null",
"PlanEnabled": [
"IndexReader_6 1.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL NULL NULL,NULL NULL NULL], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.00 root index:Selection_6",
"└─Selection_6 0.00 cop[tikv] nulleq(test.tuk.b, NULL), nulleq(test.tuk.c, NULL)",
" └─IndexRangeScan_5 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
],
"Result": [
"<nil> <nil> <nil>"
]
},
{
"SQL": "select * from tik where a<=>null and b<=>null and c<=>null",
"PlanEnabled": [
"IndexReader_6 0.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 0.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL NULL NULL,NULL NULL NULL], keep order:false, stats:pseudo"
],
"PlanDisabled": [
"IndexReader_7 0.00 root index:Selection_6",
"└─Selection_6 0.00 cop[tikv] nulleq(test.tik.b, NULL), nulleq(test.tik.c, NULL)",
" └─IndexRangeScan_5 10.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
],
"Result": [
"<nil> <nil> <nil>"
]
}
]
},
{
"Name": "TestPushDownToTiFlashWithKeepOrder",
"Cases": [
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,9 @@ type SessionVars struct {
// EnablePseudoForOutdatedStats if using pseudo for outdated stats
EnablePseudoForOutdatedStats bool

// RegardNULLAsPoint if regard NULL as Point
RegardNULLAsPoint bool

// LocalTemporaryTables is *infoschema.LocalTemporaryTables, use interface to avoid circle dependency.
// It's nil if there is no local temporary table.
LocalTemporaryTables interface{}
Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -1857,6 +1857,10 @@ var defaultSysVars = []*SysVar{
s.EnablePseudoForOutdatedStats = TiDBOptOn(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBRegardNULLAsPoint, Value: BoolToOnOff(DefTiDBRegardNULLAsPoint), Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
s.RegardNULLAsPoint = TiDBOptOn(val)
return nil
}},

{Scope: ScopeNone, Name: "version_compile_os", Value: runtime.GOOS},
{Scope: ScopeNone, Name: "version_compile_machine", Value: runtime.GOARCH},
Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,9 @@ const (
// TiDBEnablePseudoForOutdatedStats indicates whether use pseudo for outdated stats
TiDBEnablePseudoForOutdatedStats = "tidb_enable_pseudo_for_outdated_stats"

// TiDBRegardNULLAsPoint indicates whether regard NULL as point when optimizing
TiDBRegardNULLAsPoint = "tidb_regard_null_as_point"

// TiDBTmpTableMaxSize indicates the max memory size of temporary tables.
TiDBTmpTableMaxSize = "tidb_tmp_table_max_size"
)
Expand Down Expand Up @@ -768,6 +771,7 @@ const (
DefTiDBEnableTSOFollowerProxy = false
DefTiDBEnableOrderedResultMode = false
DefTiDBEnablePseudoForOutdatedStats = true
DefTiDBRegardNULLAsPoint = true
DefEnablePlacementCheck = true
DefTimestamp = "0"
)
Expand Down
10 changes: 5 additions & 5 deletions util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func detachColumnDNFConditions(sctx sessionctx.Context, conditions []expression.
// in function which is `column in (constant list)`.
// If so, it will return the offset of this column in the slice, otherwise return -1 for not found.
// Since combining `x >= 2` and `x <= 2` can lead to an eq condition `x = 2`, we take le/ge/lt/gt into consideration.
func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.Column) int {
func getPotentialEqOrInColOffset(sctx sessionctx.Context, expr expression.Expression, cols []*expression.Column) int {
f, ok := expr.(*expression.ScalarFunction)
if !ok {
return -1
Expand All @@ -109,7 +109,7 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.
dnfItems := expression.FlattenDNFConditions(f)
offset := int(-1)
for _, dnfItem := range dnfItems {
curOffset := getPotentialEqOrInColOffset(dnfItem, cols)
curOffset := getPotentialEqOrInColOffset(sctx, dnfItem, cols)
if curOffset == -1 {
return -1
}
Expand All @@ -129,7 +129,7 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.
}
if constVal, ok := f.GetArgs()[1].(*expression.Constant); ok {
val, err := constVal.Eval(chunk.Row{})
if err != nil || val.IsNull() {
if err != nil || (!sctx.GetSessionVars().RegardNULLAsPoint && val.IsNull()) {
// treat col<=>null as range scan instead of point get to avoid incorrect results
// when nullable unique index has multiple matches for filter x is null
return -1
Expand All @@ -151,7 +151,7 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.
}
if constVal, ok := f.GetArgs()[0].(*expression.Constant); ok {
val, err := constVal.Eval(chunk.Row{})
if err != nil || val.IsNull() {
if err != nil || (!sctx.GetSessionVars().RegardNULLAsPoint && val.IsNull()) {
return -1
}
for i, col := range cols {
Expand Down Expand Up @@ -517,7 +517,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
columnValues := make([]*valueInfo, len(cols))
offsets := make([]int, len(conditions))
for i, cond := range conditions {
offset := getPotentialEqOrInColOffset(cond, cols)
offset := getPotentialEqOrInColOffset(sctx, cond, cols)
offsets[i] = offset
if offset == -1 {
continue
Expand Down
1 change: 1 addition & 0 deletions util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,7 @@ func TestCompIndexDNFMatch(t *testing.T) {
require.NoError(t, err)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec(`set @@session.tidb_regard_null_as_point=false`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would happen if we don't add this line.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then the default value true is used and some plans in this test cases will change.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't affect the correctness, right?

testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, c int, key(a,b,c));")
testKit.MustExec("insert into t values(1,2,2)")
Expand Down
Loading