Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gen4: Count Distinct support #8543

Merged
merged 13 commits into from
Jul 28, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions go/sqltypes/value.go
Original file line number Diff line number Diff line change
@@ -372,6 +372,18 @@ func (v Value) IsDateTime() bool {
return int(v.typ)&dt == dt
}

// IsComparable returns true if the Value is null safe comparable without collation information.
func (v *Value) IsComparable() bool {
if v.typ == Null || IsNumber(v.typ) || IsBinary(v.typ) {
return true
}
switch v.typ {
case Timestamp, Date, Time, Datetime, Enum, Set, TypeJSON, Bit:
return true
}
return false
}

// MarshalJSON should only be used for testing.
// It's not a complete implementation.
func (v Value) MarshalJSON() ([]byte, error) {
47 changes: 30 additions & 17 deletions go/vt/vtgate/engine/ordered_aggregate.go
Original file line number Diff line number Diff line change
@@ -45,7 +45,7 @@ type OrderedAggregate struct {
PreProcess bool `json:",omitempty"`
// Aggregates specifies the aggregation parameters for each
// aggregation function: function opcode and input column number.
Aggregates []AggregateParams
Aggregates []*AggregateParams

// GroupByKeys specifies the input values that must be used for
// the aggregation key.
@@ -78,27 +78,33 @@ func (gbp GroupByParams) String() string {
// AggregateParams specify the parameters for each aggregation.
// It contains the opcode and input column number.
type AggregateParams struct {
Opcode AggregateOpcode
Col int
Opcode AggregateOpcode
Col int
KeyCol int
WCol int
WAssigned bool
// Alias is set only for distinct opcodes.
Alias string `json:",omitempty"`
Expr sqlparser.Expr
}

func (ap AggregateParams) isDistinct() bool {
func (ap *AggregateParams) isDistinct() bool {
return ap.Opcode == AggregateCountDistinct || ap.Opcode == AggregateSumDistinct
}

func (ap AggregateParams) preProcess() bool {
func (ap *AggregateParams) preProcess() bool {
return ap.Opcode == AggregateCountDistinct || ap.Opcode == AggregateSumDistinct || ap.Opcode == AggregateGtid
}

func (ap AggregateParams) String() string {
func (ap *AggregateParams) String() string {
keyCol := strconv.Itoa(ap.Col)
if ap.Opcode == AggregateCountDistinct && ap.WAssigned {
keyCol = fmt.Sprintf("%s|%d", keyCol, ap.WCol)
}
if ap.Alias != "" {
return fmt.Sprintf("%s(%d) AS %s", ap.Opcode.String(), ap.Col, ap.Alias)
return fmt.Sprintf("%s(%s) AS %s", ap.Opcode.String(), keyCol, ap.Alias)
}

return fmt.Sprintf("%s(%d)", ap.Opcode.String(), ap.Col)
return fmt.Sprintf("%s(%s)", ap.Opcode.String(), keyCol)
}

// AggregateOpcode is the aggregation Opcode.
@@ -306,6 +312,9 @@ func (oa *OrderedAggregate) convertFields(fields []*querypb.Field) []*querypb.Fi
Name: aggr.Alias,
Type: opcodeType[aggr.Opcode],
}
if aggr.isDistinct() {
aggr.KeyCol = aggr.Col
}
}
return fields
}
@@ -318,17 +327,21 @@ func (oa *OrderedAggregate) convertRow(row []sqltypes.Value) (newRow []sqltypes.
for _, aggr := range oa.Aggregates {
switch aggr.Opcode {
case AggregateCountDistinct:
curDistinct = row[aggr.Col]
curDistinct = row[aggr.KeyCol]
if aggr.WAssigned && !curDistinct.IsComparable() {
aggr.KeyCol = aggr.WCol
curDistinct = row[aggr.KeyCol]
}
// Type is int64. Ok to call MakeTrusted.
if row[aggr.Col].IsNull() {
if row[aggr.KeyCol].IsNull() {
newRow[aggr.Col] = countZero
} else {
newRow[aggr.Col] = countOne
}
case AggregateSumDistinct:
curDistinct = row[aggr.Col]
curDistinct = row[aggr.KeyCol]
var err error
newRow[aggr.Col], err = evalengine.Cast(row[aggr.Col], opcodeType[aggr.Opcode])
newRow[aggr.Col], err = evalengine.Cast(row[aggr.KeyCol], opcodeType[aggr.Opcode])
if err != nil {
newRow[aggr.Col] = sumZero
}
@@ -392,17 +405,17 @@ func (oa *OrderedAggregate) merge(fields []*querypb.Field, row1, row2 []sqltypes
result := sqltypes.CopyRow(row1)
for _, aggr := range oa.Aggregates {
if aggr.isDistinct() {
if row2[aggr.Col].IsNull() {
if row2[aggr.KeyCol].IsNull() {
continue
}
cmp, err := evalengine.NullsafeCompare(curDistinct, row2[aggr.Col])
cmp, err := evalengine.NullsafeCompare(curDistinct, row2[aggr.KeyCol])
if err != nil {
return nil, sqltypes.NULL, err
}
if cmp == 0 {
continue
}
curDistinct = row2[aggr.Col]
curDistinct = row2[aggr.KeyCol]
}
var err error
switch aggr.Opcode {
@@ -473,7 +486,7 @@ func createEmptyValueFor(opcode AggregateOpcode) (sqltypes.Value, error) {
}

func aggregateParamsToString(in interface{}) string {
return in.(AggregateParams).String()
return in.(*AggregateParams).String()
}

func groupByParamsToString(i interface{}) string {
144 changes: 131 additions & 13 deletions go/vt/vtgate/engine/ordered_aggregate_test.go
Original file line number Diff line number Diff line change
@@ -49,7 +49,7 @@ func TestOrderedAggregateExecute(t *testing.T) {
}

oa := &OrderedAggregate{
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCount,
Col: 1,
}},
@@ -86,7 +86,7 @@ func TestOrderedAggregateExecuteTruncate(t *testing.T) {
}

oa := &OrderedAggregate{
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCount,
Col: 1,
}},
@@ -128,7 +128,7 @@ func TestOrderedAggregateStreamExecute(t *testing.T) {
}

oa := &OrderedAggregate{
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCount,
Col: 1,
}},
@@ -171,7 +171,7 @@ func TestOrderedAggregateStreamExecuteTruncate(t *testing.T) {
}

oa := &OrderedAggregate{
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCount,
Col: 1,
}},
@@ -307,7 +307,7 @@ func TestOrderedAggregateExecuteCountDistinct(t *testing.T) {

oa := &OrderedAggregate{
PreProcess: true,
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCountDistinct,
Col: 1,
Alias: "count(distinct col2)",
@@ -383,7 +383,7 @@ func TestOrderedAggregateStreamCountDistinct(t *testing.T) {

oa := &OrderedAggregate{
PreProcess: true,
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCountDistinct,
Col: 1,
Alias: "count(distinct col2)",
@@ -471,7 +471,7 @@ func TestOrderedAggregateSumDistinctGood(t *testing.T) {

oa := &OrderedAggregate{
PreProcess: true,
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateSumDistinct,
Col: 1,
Alias: "sum(distinct col2)",
@@ -520,7 +520,7 @@ func TestOrderedAggregateSumDistinctTolerateError(t *testing.T) {

oa := &OrderedAggregate{
PreProcess: true,
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateSumDistinct,
Col: 1,
Alias: "sum(distinct col2)",
@@ -556,7 +556,7 @@ func TestOrderedAggregateKeysFail(t *testing.T) {
}

oa := &OrderedAggregate{
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCount,
Col: 1,
}},
@@ -589,7 +589,7 @@ func TestOrderedAggregateMergeFail(t *testing.T) {
}

oa := &OrderedAggregate{
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCount,
Col: 1,
}},
@@ -629,7 +629,7 @@ func TestOrderedAggregateMergeFail(t *testing.T) {
func TestMerge(t *testing.T) {
assert := assert.New(t)
oa := &OrderedAggregate{
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateCount,
Col: 1,
}, {
@@ -716,7 +716,7 @@ func TestNoInputAndNoGroupingKeys(outer *testing.T) {

oa := &OrderedAggregate{
PreProcess: true,
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: test.opcode,
Col: 0,
Alias: test.name,
@@ -769,7 +769,7 @@ func TestOrderedAggregateExecuteGtid(t *testing.T) {

oa := &OrderedAggregate{
PreProcess: true,
Aggregates: []AggregateParams{{
Aggregates: []*AggregateParams{{
Opcode: AggregateGtid,
Col: 1,
Alias: "vgtid",
@@ -790,3 +790,121 @@ func TestOrderedAggregateExecuteGtid(t *testing.T) {
)
assert.Equal(t, wantResult, result)
}

systay marked this conversation as resolved.
Show resolved Hide resolved
func TestCountDistinctOnVarchar(t *testing.T) {
fields := sqltypes.MakeTestFields(
"c1|c2|weight_string(c2)",
"int64|varchar|varbinary",
)
fp := &fakePrimitive{
results: []*sqltypes.Result{sqltypes.MakeTestResult(
fields,
"1|a|0x41",
"1|a|0x41",
"1|b|0x42",
"2|b|0x42",
)},
}

oa := &OrderedAggregate{
PreProcess: true,
Aggregates: []*AggregateParams{{
Opcode: AggregateCountDistinct,
Col: 1,
WCol: 2,
WAssigned: true,
Alias: "count(distinct c2)",
}},
GroupByKeys: []GroupByParams{{KeyCol: 0}},
Input: fp,
TruncateColumnCount: 2,
}

want := sqltypes.MakeTestResult(
sqltypes.MakeTestFields(
"c1|count(distinct c2)",
"int64|int64",
),
`1|2`, `2|1`,
)

qr, err := oa.Execute(nil, nil, false)
require.NoError(t, err)
assert.Equal(t, want, qr)

fp.rewind()
results := &sqltypes.Result{}
err = oa.StreamExecute(nil, nil, false, func(qr *sqltypes.Result) error {
if qr.Fields != nil {
results.Fields = qr.Fields
}
results.Rows = append(results.Rows, qr.Rows...)
return nil
})
require.NoError(t, err)
assert.Equal(t, want, results)
}

func TestCountDistinctOnVarcharWithNulls(t *testing.T) {
fields := sqltypes.MakeTestFields(
"c1|c2|weight_string(c2)",
"int64|varchar|varbinary",
)
fp := &fakePrimitive{
results: []*sqltypes.Result{sqltypes.MakeTestResult(
fields,
"null|a|0x41",
"null|b|0x42",
"null|null|null",
"1|null|null",
"1|null|null",
"1|a|0x41",
"1|a|0x41",
"1|b|0x42",
"2|null|null",
"2|b|0x42",
"3|null|null",
"3|null|null",
"3|null|null",
"3|null|null",
)},
}

oa := &OrderedAggregate{
PreProcess: true,
Aggregates: []*AggregateParams{{
Opcode: AggregateCountDistinct,
Col: 1,
WCol: 2,
WAssigned: true,
Alias: "count(distinct c2)",
}},
GroupByKeys: []GroupByParams{{KeyCol: 0}},
Input: fp,
TruncateColumnCount: 2,
}

want := sqltypes.MakeTestResult(
sqltypes.MakeTestFields(
"c1|count(distinct c2)",
"int64|int64",
),
`null|2`, `1|2`, `2|1`, `3|0`,
)

qr, err := oa.Execute(nil, nil, false)
require.NoError(t, err)
assert.Equal(t, want, qr)

fp.rewind()
results := &sqltypes.Result{}
err = oa.StreamExecute(nil, nil, false, func(qr *sqltypes.Result) error {
if qr.Fields != nil {
results.Fields = qr.Fields
}
results.Rows = append(results.Rows, qr.Rows...)
return nil
})
require.NoError(t, err)
assert.Equal(t, want, results)
}
Loading