pingcap · qw4990 · Dec 1, 2021 · Nov 29, 2021 · Nov 30, 2021 · Nov 30, 2021
diff --git a/distsql/request_builder.go b/distsql/request_builder.go
@@ -715,6 +715,8 @@ func encodeIndexKey(sc *stmtctx.StatementContext, ran *ranger.Range) ([]byte, []
 		}
 	}
 
+	// NOTE: this is a hard-code operation to avoid wrong results when accessing unique index with NULL;
+	// Please see https://github.com/pingcap/tidb/issues/29650 for more details
 	if hasNull {
 		// Append 0 to make unique-key range [null, null] to be a scan rather than point-get.
 		high = kv.Key(high).Next()

diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
@@ -843,7 +843,8 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter
 		if canConvertPointGet {
 			allRangeIsPoint := true
 			for _, ran := range path.Ranges {
-				if !ran.IsPoint(ds.ctx) {
+				if !ran.IsPointNonNullable(ds.ctx) {
+					// unique indexes can have duplicated NULL rows so we cannot use PointGet if there is NULL
 					allRangeIsPoint = false
 					break
 				}

diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
@@ -4835,6 +4835,50 @@ func (s *testIntegrationSerialSuite) TestRejectSortForMPP(c *C) {
 	}
 }
 
+func (s *testIntegrationSerialSuite) TestRegardNULLAsPoint(c *C) {
+	tk := testkit.NewTestKit(c, s.store)
+	tk.MustExec("use test")
+
+	tk.MustExec("drop table if exists tpk")
+	tk.MustExec(`create table tuk (a int, b int, c int, unique key (a, b, c))`)
+	tk.MustExec(`create table tik (a int, b int, c int, key (a, b, c))`)
+	for _, va := range []string{"NULL", "1"} {
+		for _, vb := range []string{"NULL", "1"} {
+			for _, vc := range []string{"NULL", "1"} {
+				tk.MustExec(fmt.Sprintf(`insert into tuk values (%v, %v, %v)`, va, vb, vc))
+				tk.MustExec(fmt.Sprintf(`insert into tik values (%v, %v, %v)`, va, vb, vc))
+			}
+		}
+	}
+
+	var input []string
+	var output []struct {
+		SQL          string
+		PlanEnabled  []string
+		PlanDisabled []string
+		Result       []string
+	}
+	s.testData.GetTestCases(c, &input, &output)
+	for i, tt := range input {
+		s.testData.OnRecord(func() {
+			output[i].SQL = tt
+			tk.MustExec(`set @@session.tidb_regard_null_as_point=true`)
+			output[i].PlanEnabled = s.testData.ConvertRowsToStrings(tk.MustQuery("explain " + tt).Rows())
+			output[i].Result = s.testData.ConvertRowsToStrings(tk.MustQuery(tt).Rows())
+
+			tk.MustExec(`set @@session.tidb_regard_null_as_point=false`)
+			output[i].PlanDisabled = s.testData.ConvertRowsToStrings(tk.MustQuery("explain " + tt).Rows())
+		})
+		tk.MustExec(`set @@session.tidb_regard_null_as_point=true`)
+		tk.MustQuery("explain " + tt).Check(testkit.Rows(output[i].PlanEnabled...))
+		tk.MustQuery(tt).Check(testkit.Rows(output[i].Result...))
+
+		tk.MustExec(`set @@session.tidb_regard_null_as_point=false`)
+		tk.MustQuery("explain " + tt).Check(testkit.Rows(output[i].PlanDisabled...))
+		tk.MustQuery(tt).Check(testkit.Rows(output[i].Result...))
+	}
+}
+
 func (s *testIntegrationSuite) TestIssues29711(c *C) {
 	tk := testkit.NewTestKit(c, s.store)
 	tk.MustExec("use test")

diff --git a/planner/core/partition_pruner_test.go b/planner/core/partition_pruner_test.go
@@ -93,6 +93,7 @@ func (s *testPartitionPruneSuit) TestListPartitionPruner(c *C) {
 	tk.MustExec("use test_partition")
 	tk.Se.GetSessionVars().EnableClusteredIndex = variable.ClusteredIndexDefModeIntOnly
 	tk.MustExec("set @@session.tidb_enable_list_partition = ON")
+	tk.MustExec(`set @@session.tidb_regard_null_as_point=false`)
 	tk.MustExec("create table t1 (id int, a int, b int                 ) partition by list (    a    ) (partition p0 values in (1,2,3,4,5), partition p1 values in (6,7,8,9,10,null));")
 	tk.MustExec("create table t2 (a int, id int, b int) partition by list (a*3 + b - 2*a - b) (partition p0 values in (1,2,3,4,5), partition p1 values in (6,7,8,9,10,null));")
 	tk.MustExec("create table t3 (b int, id int, a int) partition by list columns (a) (partition p0 values in (1,2,3,4,5), partition p1 values in (6,7,8,9,10,null));")
@@ -169,6 +170,7 @@ func (s *testPartitionPruneSuit) TestListColumnsPartitionPruner(c *C) {
 	// tk1 use to test partition table with index.
 	tk1 := testkit.NewTestKit(c, s.store)
 	tk1.MustExec("drop database if exists test_partition_1;")
+	tk1.MustExec(`set @@session.tidb_regard_null_as_point=false`)
 	tk1.MustExec("create database test_partition_1")
 	tk1.MustExec("use test_partition_1")
 	tk1.MustExec("set @@session.tidb_enable_list_partition = ON")
@@ -180,6 +182,7 @@ func (s *testPartitionPruneSuit) TestListColumnsPartitionPruner(c *C) {
 	// tk2 use to compare the result with normal table.
 	tk2 := testkit.NewTestKit(c, s.store)
 	tk2.MustExec("drop database if exists test_partition_2;")
+	tk2.MustExec(`set @@session.tidb_regard_null_as_point=false`)
 	tk2.MustExec("create database test_partition_2")
 	tk2.MustExec("use test_partition_2")
 	tk2.MustExec("create table t1 (id int, a int, b int)")

diff --git a/planner/core/plan_to_pb.go b/planner/core/plan_to_pb.go
@@ -212,6 +212,22 @@ func checkCoverIndex(idx *model.IndexInfo, ranges []*ranger.Range) bool {
 		if len(rg.LowVal) != len(idx.Columns) {
 			return false
 		}
+		if !rg.LowExclude {
+			for _, v := range rg.LowVal {
+				if v.IsNull() {
+					// a unique index may have duplicated rows with NULLs, so we cannot set the unique attribute to true when the range has NULL
+					// please see https://github.com/pingcap/tidb/issues/29650 for more details
+					return false
+				}
+			}
+		}
+		if !rg.HighExclude {
+			for _, v := range rg.HighVal {
+				if v.IsNull() {
+					return false
+				}
+			}
+		}
 	}
 	return true
 }

diff --git a/planner/core/testdata/integration_serial_suite_in.json b/planner/core/testdata/integration_serial_suite_in.json
@@ -29,6 +29,21 @@
     ]
 
   },
+  {
+    "name": "TestRegardNULLAsPoint",
+    "cases": [
+      "select * from tuk where a<=>null and b=1",
+      "select * from tik where a<=>null and b=1",
+      "select * from tuk where a<=>null and b=1 and c=1",
+      "select * from tik where a<=>null and b=1 and c=1",
+      "select * from tuk where a=1 and b<=>null and c=1",
+      "select * from tik where a=1 and b<=>null and c=1",
+      "select * from tuk where a<=>null and b<=>null and c=1",
+      "select * from tik where a<=>null and b<=>null and c=1",
+      "select * from tuk where a<=>null and b<=>null and c<=>null",
+      "select * from tik where a<=>null and b<=>null and c<=>null"
+    ]
+  },
   {
     "name": "TestPushDownToTiFlashWithKeepOrder",
     "cases": [

diff --git a/planner/core/testdata/integration_serial_suite_out.json b/planner/core/testdata/integration_serial_suite_out.json
@@ -220,6 +220,163 @@
       }
     ]
   },
+  {
+    "Name": "TestRegardNULLAsPoint",
+    "Cases": [
+      {
+        "SQL": "select * from tuk where a<=>null and b=1",
+        "PlanEnabled": [
+          "IndexReader_6 0.10 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 0.10 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL 1,NULL 1], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.01 root  index:Selection_6",
+          "└─Selection_6 0.01 cop[tikv]  eq(test.tuk.b, 1)",
+          "  └─IndexRangeScan_5 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "<nil> 1 <nil>",
+          "<nil> 1 1"
+        ]
+      },
+      {
+        "SQL": "select * from tik where a<=>null and b=1",
+        "PlanEnabled": [
+          "IndexReader_6 0.10 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 0.10 cop[tikv] table:tik, index:a(a, b, c) range:[NULL 1,NULL 1], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.01 root  index:Selection_6",
+          "└─Selection_6 0.01 cop[tikv]  eq(test.tik.b, 1)",
+          "  └─IndexRangeScan_5 10.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "<nil> 1 <nil>",
+          "<nil> 1 1"
+        ]
+      },
+      {
+        "SQL": "select * from tuk where a<=>null and b=1 and c=1",
+        "PlanEnabled": [
+          "IndexReader_6 1.00 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 1.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL 1 1,NULL 1 1], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.00 root  index:Selection_6",
+          "└─Selection_6 0.00 cop[tikv]  eq(test.tuk.b, 1), eq(test.tuk.c, 1)",
+          "  └─IndexRangeScan_5 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "<nil> 1 1"
+        ]
+      },
+      {
+        "SQL": "select * from tik where a<=>null and b=1 and c=1",
+        "PlanEnabled": [
+          "IndexReader_6 0.00 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 0.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL 1 1,NULL 1 1], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.00 root  index:Selection_6",
+          "└─Selection_6 0.00 cop[tikv]  eq(test.tik.b, 1), eq(test.tik.c, 1)",
+          "  └─IndexRangeScan_5 10.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "<nil> 1 1"
+        ]
+      },
+      {
+        "SQL": "select * from tuk where a=1 and b<=>null and c=1",
+        "PlanEnabled": [
+          "IndexReader_6 1.00 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 1.00 cop[tikv] table:tuk, index:a(a, b, c) range:[1 NULL 1,1 NULL 1], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.00 root  index:Selection_6",
+          "└─Selection_6 0.00 cop[tikv]  eq(test.tuk.c, 1)",
+          "  └─IndexRangeScan_5 0.10 cop[tikv] table:tuk, index:a(a, b, c) range:[1 NULL,1 NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "1 <nil> 1"
+        ]
+      },
+      {
+        "SQL": "select * from tik where a=1 and b<=>null and c=1",
+        "PlanEnabled": [
+          "IndexReader_6 0.00 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 0.00 cop[tikv] table:tik, index:a(a, b, c) range:[1 NULL 1,1 NULL 1], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.00 root  index:Selection_6",
+          "└─Selection_6 0.00 cop[tikv]  eq(test.tik.c, 1)",
+          "  └─IndexRangeScan_5 0.10 cop[tikv] table:tik, index:a(a, b, c) range:[1 NULL,1 NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "1 <nil> 1"
+        ]
+      },
+      {
+        "SQL": "select * from tuk where a<=>null and b<=>null and c=1",
+        "PlanEnabled": [
+          "IndexReader_6 1.00 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 1.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL NULL 1,NULL NULL 1], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.00 root  index:Selection_6",
+          "└─Selection_6 0.00 cop[tikv]  eq(test.tuk.c, 1), nulleq(test.tuk.b, NULL)",
+          "  └─IndexRangeScan_5 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "<nil> <nil> 1"
+        ]
+      },
+      {
+        "SQL": "select * from tik where a<=>null and b<=>null and c=1",
+        "PlanEnabled": [
+          "IndexReader_6 0.00 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 0.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL NULL 1,NULL NULL 1], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.00 root  index:Selection_6",
+          "└─Selection_6 0.00 cop[tikv]  eq(test.tik.c, 1), nulleq(test.tik.b, NULL)",
+          "  └─IndexRangeScan_5 10.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "<nil> <nil> 1"
+        ]
+      },
+      {
+        "SQL": "select * from tuk where a<=>null and b<=>null and c<=>null",
+        "PlanEnabled": [
+          "IndexReader_6 1.00 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 1.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL NULL NULL,NULL NULL NULL], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.00 root  index:Selection_6",
+          "└─Selection_6 0.00 cop[tikv]  nulleq(test.tuk.b, NULL), nulleq(test.tuk.c, NULL)",
+          "  └─IndexRangeScan_5 10.00 cop[tikv] table:tuk, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "<nil> <nil> <nil>"
+        ]
+      },
+      {
+        "SQL": "select * from tik where a<=>null and b<=>null and c<=>null",
+        "PlanEnabled": [
+          "IndexReader_6 0.00 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 0.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL NULL NULL,NULL NULL NULL], keep order:false, stats:pseudo"
+        ],
+        "PlanDisabled": [
+          "IndexReader_7 0.00 root  index:Selection_6",
+          "└─Selection_6 0.00 cop[tikv]  nulleq(test.tik.b, NULL), nulleq(test.tik.c, NULL)",
+          "  └─IndexRangeScan_5 10.00 cop[tikv] table:tik, index:a(a, b, c) range:[NULL,NULL], keep order:false, stats:pseudo"
+        ],
+        "Result": [
+          "<nil> <nil> <nil>"
+        ]
+      }
+    ]
+  },
   {
     "Name": "TestPushDownToTiFlashWithKeepOrder",
     "Cases": [

@@ -936,6 +936,9 @@ type SessionVars struct {
 	// EnablePseudoForOutdatedStats if using pseudo for outdated stats
 	EnablePseudoForOutdatedStats bool
 
+	// RegardNULLAsPoint if regard NULL as Point
+	RegardNULLAsPoint bool
+
 	// LocalTemporaryTables is *infoschema.LocalTemporaryTables, use interface to avoid circle dependency.
 	// It's nil if there is no local temporary table.
 	LocalTemporaryTables interface{}

@@ -1857,6 +1857,10 @@ var defaultSysVars = []*SysVar{
 		s.EnablePseudoForOutdatedStats = TiDBOptOn(val)
 		return nil
 	}},
+	{Scope: ScopeGlobal | ScopeSession, Name: TiDBRegardNULLAsPoint, Value: BoolToOnOff(DefTiDBRegardNULLAsPoint), Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
+		s.RegardNULLAsPoint = TiDBOptOn(val)
+		return nil
+	}},
 
 	{Scope: ScopeNone, Name: "version_compile_os", Value: runtime.GOOS},
 	{Scope: ScopeNone, Name: "version_compile_machine", Value: runtime.GOARCH},

@@ -595,6 +595,9 @@ const (
 	// TiDBEnablePseudoForOutdatedStats indicates whether use pseudo for outdated stats
 	TiDBEnablePseudoForOutdatedStats = "tidb_enable_pseudo_for_outdated_stats"
 
+	// TiDBRegardNULLAsPoint indicates whether regard NULL as point when optimizing
+	TiDBRegardNULLAsPoint = "tidb_regard_null_as_point"
+
 	// TiDBTmpTableMaxSize indicates the max memory size of temporary tables.
 	TiDBTmpTableMaxSize = "tidb_tmp_table_max_size"
 )
@@ -768,6 +771,7 @@ const (
 	DefTiDBEnableTSOFollowerProxy         = false
 	DefTiDBEnableOrderedResultMode        = false
 	DefTiDBEnablePseudoForOutdatedStats   = true
+	DefTiDBRegardNULLAsPoint              = true
 	DefEnablePlacementCheck               = true
 	DefTimestamp                          = "0"
 )

diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go
@@ -98,7 +98,7 @@ func detachColumnDNFConditions(sctx sessionctx.Context, conditions []expression.
 // in function which is `column in (constant list)`.
 // If so, it will return the offset of this column in the slice, otherwise return -1 for not found.
 // Since combining `x >= 2` and `x <= 2` can lead to an eq condition `x = 2`, we take le/ge/lt/gt into consideration.
-func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.Column) int {
+func getPotentialEqOrInColOffset(sctx sessionctx.Context, expr expression.Expression, cols []*expression.Column) int {
 	f, ok := expr.(*expression.ScalarFunction)
 	if !ok {
 		return -1
@@ -109,7 +109,7 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.
 		dnfItems := expression.FlattenDNFConditions(f)
 		offset := int(-1)
 		for _, dnfItem := range dnfItems {
-			curOffset := getPotentialEqOrInColOffset(dnfItem, cols)
+			curOffset := getPotentialEqOrInColOffset(sctx, dnfItem, cols)
 			if curOffset == -1 {
 				return -1
 			}
@@ -129,7 +129,7 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.
 			}
 			if constVal, ok := f.GetArgs()[1].(*expression.Constant); ok {
 				val, err := constVal.Eval(chunk.Row{})
-				if err != nil || val.IsNull() {
+				if err != nil || (!sctx.GetSessionVars().RegardNULLAsPoint && val.IsNull()) {
 					// treat col<=>null as range scan instead of point get to avoid incorrect results
 					// when nullable unique index has multiple matches for filter x is null
 					return -1
@@ -151,7 +151,7 @@ func getPotentialEqOrInColOffset(expr expression.Expression, cols []*expression.
 			}
 			if constVal, ok := f.GetArgs()[0].(*expression.Constant); ok {
 				val, err := constVal.Eval(chunk.Row{})
-				if err != nil || val.IsNull() {
+				if err != nil || (!sctx.GetSessionVars().RegardNULLAsPoint && val.IsNull()) {
 					return -1
 				}
 				for i, col := range cols {
@@ -517,7 +517,7 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex
 	columnValues := make([]*valueInfo, len(cols))
 	offsets := make([]int, len(conditions))
 	for i, cond := range conditions {
-		offset := getPotentialEqOrInColOffset(cond, cols)
+		offset := getPotentialEqOrInColOffset(sctx, cond, cols)
 		offsets[i] = offset
 		if offset == -1 {
 			continue

diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go
@@ -961,6 +961,7 @@ func TestCompIndexDNFMatch(t *testing.T) {
 	require.NoError(t, err)
 	testKit := testkit.NewTestKit(t, store)
 	testKit.MustExec("use test")
+	testKit.MustExec(`set @@session.tidb_regard_null_as_point=false`)
 	testKit.MustExec("drop table if exists t")
 	testKit.MustExec("create table t(a int, b int, c int, key(a,b,c));")
 	testKit.MustExec("insert into t values(1,2,2)")