From 9944c05330f441ec0d439b9107063bb9b4a9b3fd Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 23 Aug 2022 15:33:15 -0700 Subject: [PATCH] colexec: fix a couple of issues with the new range stats operator This commit fixes a couple of issues with the new range stats operator: - it adds the null check for each value before using it as a key - it performs a copy of each non-null value before appending it to the slice of the `roachpb.Key`s. This is the limitation of the KV API where keys cannot be mutated on the client side once the BatchRequest is issued, and we will mutate the incoming `Bytes` vector since it is reset and reused. In theory, this copy should not be necessary, but the race gRPC transport complains if the copy is not made. Release justification: fixes of new functionality. Release note: None --- pkg/sql/colexec/range_stats.go | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/pkg/sql/colexec/range_stats.go b/pkg/sql/colexec/range_stats.go index 7c98466d7b91..f0677e932caa 100644 --- a/pkg/sql/colexec/range_stats.go +++ b/pkg/sql/colexec/range_stats.go @@ -57,6 +57,18 @@ func newRangeStatsOperator( // range statistics also call that function; optimizing one without the other // is pointless, and they are very similar. +// appendKey appends the ith value from the inBytes vector to the given keys +// and returns the updated slice. A copy of the ith value is made since these +// keys then go into the KV layer, and we are not allowed to modify them after +// (which we will do with the inBytes vector since it is reset and reused). We +// can avoid making this copy once #75452 is resolved. +func appendKey(keys []roachpb.Key, inBytes *coldata.Bytes, i int) []roachpb.Key { + origKey := inBytes.Get(i) + keyCopy := make([]byte, len(origKey)) + copy(keyCopy, origKey) + return append(keys, keyCopy) +} + func (r *rangeStatsOperator) Next() coldata.Batch { // Naively take the input batch and use it to define the batch size. // @@ -71,6 +83,7 @@ func (r *rangeStatsOperator) Next() coldata.Batch { inSel := batch.Selection() inVec := batch.ColVec(r.argumentCol) inBytes := inVec.Bytes() + inNulls := inVec.Nulls() output := batch.ColVec(r.outputIdx) jsonOutput := output.JSON() @@ -80,13 +93,24 @@ func (r *rangeStatsOperator) Next() coldata.Batch { keys := make([]roachpb.Key, 0, batch.Length()) if inSel == nil { for i := 0; i < batch.Length(); i++ { - keys = append(keys, inBytes.Get(i)) + if inNulls.MaybeHasNulls() && inNulls.NullAt(i) { + // Skip all NULL keys. + continue + } + keys = appendKey(keys, inBytes, i) } } else { for _, idx := range inSel { - keys = append(keys, inBytes.Get(idx)) + if inNulls.MaybeHasNulls() && inNulls.NullAt(idx) { + // Skip all NULL keys. + continue + } + keys = appendKey(keys, inBytes, idx) } } + if inNulls.MaybeHasNulls() { + output.Nulls().Copy(inNulls) + } // TODO(ajwerner): Reserve memory for the responses. We know they'll // at least, on average, contain keys so it'll be 2x the size of the // keys plus some constant multiple.