From 6075250df6d6315753aed60a2663e2a7565a0d4f Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 27 Dec 2018 20:34:57 +0800 Subject: [PATCH 1/5] util/ranger: fix its behavior about prefix-column --- util/ranger/ranger.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 0402209e25bb7..96c25ef84b06d 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -415,18 +415,24 @@ func hasPrefix(lengths []int) bool { func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) { for _, ran := range ranges { + lowCut := false for i := 0; i < len(ran.LowVal); i++ { - fixRangeDatum(&ran.LowVal[i], lengths[i], tp[i]) + lowCut = lowCut || fixRangeDatum(&ran.LowVal[i], lengths[i], tp[i]) } - ran.LowExclude = false + if lowCut { + ran.LowExclude = false + } + highCut := false for i := 0; i < len(ran.HighVal); i++ { - fixRangeDatum(&ran.HighVal[i], lengths[i], tp[i]) + highCut = highCut || fixRangeDatum(&ran.HighVal[i], lengths[i], tp[i]) + } + if highCut { + ran.HighExclude = false } - ran.HighExclude = false } } -func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) { +func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) bool { // If this column is prefix and the prefix length is smaller than the range, cut it. // In case of UTF8, prefix should be cut by characters rather than bytes if v.Kind() == types.KindString || v.Kind() == types.KindBytes { @@ -439,12 +445,15 @@ func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) { truncateStr := string(rs[:length]) // truncate value and limit its length v.SetString(truncateStr) + return true } } else if length != types.UnspecifiedLength && len(colValue) > length { // truncate value and limit its length v.SetBytes(colValue[:length]) + return true } } + return false } // We cannot use the FieldType of column directly. e.g. the column a is int32 and we have a > 1111111111111111111. From 9f6ee37bb6dc30b15dbac8552ad27bab778090cf Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 27 Dec 2018 21:01:47 +0800 Subject: [PATCH 2/5] a full fix --- util/ranger/ranger.go | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 96c25ef84b06d..16caf57a1ff9d 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -330,7 +330,12 @@ func buildCNFIndexRange(sc *stmtctx.StatementContext, cols []*expression.Column, // Take prefix index into consideration. if hasPrefix(lengths) { - fixPrefixColRange(ranges, lengths, newTp) + if fixPrefixColRange(ranges, lengths, newTp) { + ranges, err = unionRanges(sc, ranges) + if err != nil { + return nil, errors.Trace(err) + } + } } if len(ranges) > 0 && len(ranges[0].LowVal) < len(cols) { @@ -413,23 +418,30 @@ func hasPrefix(lengths []int) bool { return false } -func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) { +func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) bool { + hasCut := false for _, ran := range ranges { - lowCut := false - for i := 0; i < len(ran.LowVal); i++ { - lowCut = lowCut || fixRangeDatum(&ran.LowVal[i], lengths[i], tp[i]) + lowTail := len(ran.LowVal) - 1 + for i := 0; i < lowTail; i++ { + fixRangeDatum(&ran.LowVal[i], lengths[i], tp[i]) } + lowCut := false + lowCut = fixRangeDatum(&ran.LowVal[lowTail], lengths[lowTail], tp[lowTail]) if lowCut { ran.LowExclude = false } - highCut := false - for i := 0; i < len(ran.HighVal); i++ { - highCut = highCut || fixRangeDatum(&ran.HighVal[i], lengths[i], tp[i]) + highTail := len(ran.HighVal) - 1 + for i := 0; i < highTail; i++ { + fixRangeDatum(&ran.HighVal[i], lengths[i], tp[i]) } + highCut := false + highCut = fixRangeDatum(&ran.HighVal[highTail], lengths[highTail], tp[highTail]) if highCut { ran.HighExclude = false } + hasCut = lowCut || highCut } + return hasCut } func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) bool { From 03b551593cd5306254d7237641f8ec0389803b4a Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Fri, 28 Dec 2018 16:33:38 +0800 Subject: [PATCH 3/5] add tests --- util/ranger/ranger_test.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index 03be414c29e3f..3b453d8fd8aa7 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -544,6 +544,34 @@ func (s *testRangerSuite) TestIndexRange(c *C) { filterConds: "[eq(test.t.e, 你好啊)]", resultStr: "[[\"[228 189]\",\"[228 189]\"]]", }, + { + indexPos: 2, + exprStr: `d in ("你好啊")`, + accessConds: "[in(test.t.d, 你好啊)]", + filterConds: "[in(test.t.d, 你好啊)]", + resultStr: "[[\"你好\",\"你好\"]]", + }, + { + indexPos: 2, + exprStr: `d not in ("你好啊")`, + accessConds: "[not(in(test.t.d, 你好啊))]", + filterConds: "[not(in(test.t.d, 你好啊))]", + resultStr: "[(NULL,+inf]]", + }, + { + indexPos: 2, + exprStr: `d < "你好" || d > "你好"`, + accessConds: "[or(lt(test.t.d, 你好), gt(test.t.d, 你好))]", + filterConds: "[or(lt(test.t.d, 你好), gt(test.t.d, 你好))]", + resultStr: "[[-inf,\"你好\") (\"你好\",+inf]]", + }, + { + indexPos: 2, + exprStr: `not(d < "你好" || d > "你好")`, + accessConds: "[and(ge(test.t.d, 你好), le(test.t.d, 你好))]", + filterConds: "[and(ge(test.t.d, 你好), le(test.t.d, 你好))]", + resultStr: "[[\"你好\",\"你好\"]]", + }, } for _, tt := range tests { From 34c11954dc203e0fd7a37f8bb5e00c26beeb1b22 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Sat, 29 Dec 2018 14:06:46 +0800 Subject: [PATCH 4/5] add comment --- util/ranger/ranger.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 16caf57a1ff9d..e8f4115b1c70f 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -418,6 +418,13 @@ func hasPrefix(lengths []int) bool { return false } +// fixPrefixColRange checks whether the range of one column exceeds the length and needs to be cut. +// It specially handles the last column of each range point. If the last one need to be cut, it will +// change the exclude status of that point and return `true` to tell +// that we need do a range merging since that interval may have intersection. +// e.g. if the interval is (-inf -inf, a xxxxx), (a xxxxx, +inf +inf) and the length of the last column is 3, +// then we'll change it to (-inf -inf, a xxx], [a xxx, +inf +inf). You can see that this two interval intersect, +// so we need a merge operation. func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) bool { hasCut := false for _, ran := range ranges { From 900b11b794041f94390f0d060d94204547f0e995 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Sat, 29 Dec 2018 15:12:20 +0800 Subject: [PATCH 5/5] add more comment --- util/ranger/ranger.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index e8f4115b1c70f..a38a9c75f5313 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -425,6 +425,12 @@ func hasPrefix(lengths []int) bool { // e.g. if the interval is (-inf -inf, a xxxxx), (a xxxxx, +inf +inf) and the length of the last column is 3, // then we'll change it to (-inf -inf, a xxx], [a xxx, +inf +inf). You can see that this two interval intersect, // so we need a merge operation. +// Q: only checking the last column to decide whether the endpoint's exclude status needs to be reset is enough? +// A: Yes, suppose that the interval is (-inf -inf, a xxxxx b) and only the second column needs to be cut. +// The result would be (-inf -inf, a xxx b) if the length of it is 3. Obviously we only need to care about the data +// whose the first two key is `a` and `xxx`. It read all data whose index value begins with `a` and `xxx` and the third +// value less than `b`, covering the values begin with `a` and `xxxxx` and the third value less than `b` perfectly. +// So in this case we don't need to reset its exclude status. The right endpoint case can be proved in the same way. func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) bool { hasCut := false for _, ran := range ranges {