From e4510af612485cda70a4a25ce3e7f92fb48e3dc0 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Thu, 8 Sep 2022 17:29:52 -0700 Subject: [PATCH] colbuilder: fix aggregation with no aggregate funcs Previously, if there are no aggregate functions to compute, we would plan a special "num fixed tuples" operator that would always return a single tuple when in scalar and no tuples when in non-scalar context, but this is incorrect - we should be returning the same number of tuples as there are groups for non-empty input. The previous setup was correct only when the input is empty. This is now fixed by teaching the ordered aggregator to handle this special case instead. The impact of the bug seems minor since the optimizer should only be creating such plans when some of its rules are disabled. Release note: None (this shouldn't be a user-visible bug) --- pkg/sql/colexec/colbuilder/execplan.go | 25 ------------------- pkg/sql/colexec/colexecbase/distinct.go | 15 +++++++++-- pkg/sql/colexec/ordered_aggregator.go | 13 +++++++++- .../testdata/logic_test/vectorize_agg | 16 ++++++++++++ 4 files changed, 41 insertions(+), 28 deletions(-) diff --git a/pkg/sql/colexec/colbuilder/execplan.go b/pkg/sql/colexec/colbuilder/execplan.go index 92bcfc7d61ab..e970ae5d12a5 100644 --- a/pkg/sql/colexec/colbuilder/execplan.go +++ b/pkg/sql/colexec/colbuilder/execplan.go @@ -804,31 +804,6 @@ func NewColOperator( return r, err } aggSpec := core.Aggregator - if len(aggSpec.Aggregations) == 0 { - // We can get an aggregator when no aggregate functions are - // present if HAVING clause is present, for example, with a - // query as follows: SELECT 1 FROM t HAVING true. In this case, - // we plan a special operator that outputs a batch of length 1 - // or 0 (depending on whether the aggregate is in scalar context - // or not) without actual columns once and then zero-length - // batches. The actual "data" will be added by projections - // below. - // TODO(solon): The distsql plan for this case includes a - // TableReader, so we end up creating an orphaned colBatchScan. - // We should avoid that. Ideally the optimizer would not plan a - // scan in this unusual case. - numTuples := 0 - if aggSpec.IsScalar() { - numTuples = 1 - } - result.Root, err = colexecutils.NewFixedNumTuplesNoInputOp( - getStreamingAllocator(ctx, args), numTuples, inputs[0].Root, - ), nil - // We make ColumnTypes non-nil so that sanity check doesn't - // panic. - result.ColumnTypes = []*types.T{} - break - } if aggSpec.IsRowCount() { result.Root, err = colexec.NewCountOp(getStreamingAllocator(ctx, args), inputs[0].Root), nil result.ColumnTypes = []*types.T{types.Int} diff --git a/pkg/sql/colexec/colexecbase/distinct.go b/pkg/sql/colexec/colexecbase/distinct.go index 998aadb59f1b..ba0510f4cba6 100644 --- a/pkg/sql/colexec/colexecbase/distinct.go +++ b/pkg/sql/colexec/colexecbase/distinct.go @@ -35,8 +35,19 @@ func OrderedDistinctColsToOperators( OneInputHelper: colexecop.MakeOneInputHelper(input), fn: func() { copy(distinctCol, colexecutils.ZeroBoolColumn) }, } - for i := range distinctCols { - input = newSingleDistinct(input, int(distinctCols[i]), distinctCol, typs[distinctCols[i]], nullsAreDistinct) + if len(distinctCols) > 0 { + for i := range distinctCols { + input = newSingleDistinct(input, int(distinctCols[i]), distinctCol, typs[distinctCols[i]], nullsAreDistinct) + } + } else { + // If there are no distinct columns, we have to mark the very first + // tuple as distinct ourselves. + firstTuple := true + input.(*fnOp).fn = func() { + copy(distinctCol, colexecutils.ZeroBoolColumn) + distinctCol[0] = firstTuple + firstTuple = false + } } r, ok := input.(colexecop.ResettableOperator) if !ok { diff --git a/pkg/sql/colexec/ordered_aggregator.go b/pkg/sql/colexec/ordered_aggregator.go index 76bef5d09007..83f81dce9e1c 100644 --- a/pkg/sql/colexec/ordered_aggregator.go +++ b/pkg/sql/colexec/ordered_aggregator.go @@ -255,7 +255,18 @@ func (a *orderedAggregator) Next() coldata.Batch { } }) } - a.scratch.resumeIdx = a.bucket.fns[0].CurrentOutputIndex() + if len(a.bucket.fns) > 0 { + a.scratch.resumeIdx = a.bucket.fns[0].CurrentOutputIndex() + } else { + // When there are no aggregate functions to compute, we + // simply need to output the same number of empty rows as + // the number of groups. + for _, newGroup := range a.groupCol[:batchLength] { + if newGroup { + a.scratch.resumeIdx++ + } + } + } } if batchLength == 0 { a.state = orderedAggregatorOutputting diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize_agg b/pkg/sql/logictest/testdata/logic_test/vectorize_agg index 21bd5c7b126e..2da332fe4820 100644 --- a/pkg/sql/logictest/testdata/logic_test/vectorize_agg +++ b/pkg/sql/logictest/testdata/logic_test/vectorize_agg @@ -55,3 +55,19 @@ NULL NULL 1 1 22 22 33 33 + +# Regression test for incorrect handling of non-scalar GROUP BY with no +# aggregate functions (#87619). +statement ok +CREATE TABLE t87619 (b) AS SELECT true; +SET testing_optimizer_random_seed = 3164997759865821235; +SET testing_optimizer_disable_rule_probability = 0.500000; + +query B +SELECT true FROM t87619 GROUP BY b HAVING b +---- +true + +statement ok +RESET testing_optimizer_random_seed; +RESET testing_optimizer_disable_rule_probability;