From 11b9353ebcb655ab3615eaf4046f081139f400c2 Mon Sep 17 00:00:00 2001 From: Michael Erickson Date: Mon, 4 Jul 2022 23:31:54 -0700 Subject: [PATCH] sql/stats: convert between histograms and quantile functions To predict histograms in statistics forecasts, we will use linear regression over quantile functions. (Quantile functions are another representation of histogram data, in a form more amenable to statistical manipulation.) This commit defines quantile functions and adds methods to convert between histograms and quantile functions. This code was originally part of #77070 but has been pulled out to simplify that PR. A few changes have been made: - Common code has been factored into closures. - More checks have been added for positive values. - In `makeQuantile` we now trim leading empty buckets as well as trailing empty buckets. - The logic in `quantile.toHistogram` to steal from `NumRange` if `NumEq` is zero now checks that `NumRange` will still be >= 1. - More tests have been added. Assists: #79872 Release note: None --- pkg/sql/stats/quantile.go | 310 +++++++++++++++++++++++++++- pkg/sql/stats/quantile_test.go | 360 ++++++++++++++++++++++++++++++++- 2 files changed, 654 insertions(+), 16 deletions(-) diff --git a/pkg/sql/stats/quantile.go b/pkg/sql/stats/quantile.go index b6e94a391650..71ae4d53cdc5 100644 --- a/pkg/sql/stats/quantile.go +++ b/pkg/sql/stats/quantile.go @@ -14,6 +14,8 @@ import ( "math" "time" + "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" + "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/timeutil" @@ -21,6 +23,84 @@ import ( "github.com/cockroachdb/errors" ) +// quantile is a piecewise quantile function with float64 values. +// +// A quantile function is a way of representing a probability distribution. It +// is a function from p to v, over (p=0, p=1], where p is the probability that +// an item in the distribution will have value <= v. The quantile function for a +// probability distribution is the inverse of the cumulative distribution +// function for the same probability distribution. See +// https://en.wikipedia.org/wiki/Quantile_function for more background. +// +// We use quantile functions within our modeling for a few reasons: +// * Unlike histograms, quantile functions are independent of the absolute +// counts. They are a "shape" not a "size". +// * Unlike cumulative distribution functions or probability density functions, +// we can always take the definite integral of a quantile function from p=0 to +// p=1. We use this when performing linear regression over quantiles. +// +// Type quantile represents a piecewise quantile function with float64 values as +// a series of quantilePoints from p=0 (exclusive) to p=1 (inclusive). A +// well-formed quantile is non-decreasing in both p and v. A quantile must have +// at least two points. The first point must have p=0, and the last point must +// have p=1. The pieces of the quantile function are line segments between +// subsequent points (exclusive and inclusive, respectively). +// +// Subsequent points may have the same p (a vertical line, or discontinuity), +// meaning the probability of finding a value > v₁ and <= v₂ is zero. Subsequent +// points may have the same v (a horizontal line), meaning the probability of +// finding exactly that v is p₂ - p₁. To put it in terms of our histograms: +// NumRange = 0 becomes a vertical line, NumRange > 0 becomes a slanted line +// with positive slope, NumEq = 0 goes away, and NumEq > 0 becomes a horizontal +// line. +// +// For example, given this population of 10 values: +// +// {200, 200, 210, 210, 210, 211, 212, 221, 222, 230} +// +// One possible histogram might be: +// +// {{UpperBound: 200, NumRange: 0, NumEq: 2}, +// {UpperBound: 210, NumRange: 0, NumEq: 3}, +// {UpperBound: 220, NumRange: 2, NumEq: 0}, +// {UpperBound: 230, NumRange: 2, NumEq: 1}} +// +// And the corresponding quantile function would be: +// +// {{0, 200}, {0.2, 200}, {0.2, 210}, {0.5, 210}, {0.7, 220}, {0.9, 230}, {1, 230}} +// +// 230 | *-* +// | / +// 220 | * +// | / +// 210 | o-----* +// | +// 200 o---* +// | +// 190 + - - - - - - - - - - +// 0 .2 .4 .6 .8 1 +// +type quantile []quantilePoint + +// quantilePoint is an endpoint of a piece (line segment) in a piecewise +// quantile function. +type quantilePoint struct { + p, v float64 +} + +// quantileIndex is the ordinal position of a quantilePoint within a +// quantile. +type quantileIndex = int + +// zeroQuantile is what we use for empty tables. Technically it says nothing +// about the number of rows in the table / items in the probability +// distribution, only that they all equal the zero value. +var zeroQuantile = quantile{{p: 0, v: 0}, {p: 1, v: 0}} + +// makeQuantile and quantile.toHistogram might need to change if we introduce a +// new histogram version. +const _ uint = 1 - uint(histVersion) + // CanMakeQuantile returns true if a quantile function can be created for a // histogram of the given type. // TODO(michae2): Add support for DECIMAL, TIME, TIMETZ, and INTERVAL. @@ -34,20 +114,238 @@ func CanMakeQuantile(colType *types.T) bool { types.DateFamily, types.TimestampFamily, types.TimestampTZFamily: + // TODO(michae2): Check that there are no constraints making this a de facto + // ENUM. (Could also check histogram for sum(NumRange) > 0.) return true default: return false } } -// ToQuantileValue converts from a datum to a float suitable for use in a quantile +// makeQuantile converts a histogram to a quantile function, or returns an error +// if it cannot. The histogram must not contain a bucket for NULL values, and +// the row count must not include NULL values. The first bucket of the histogram +// must have NumRange == 0. +func makeQuantile(hist histogram, rowCount float64) (quantile, error) { + if !isPositive(rowCount) { + return nil, errors.AssertionFailedf("invalid rowCount: %v", rowCount) + } + + // Empty table cases. + if len(hist.buckets) == 0 || rowCount < 1 { + return zeroQuantile, nil + } + + // To produce a quantile with first point at p=0 and at least two points, we + // need the first bucket to have NumRange == 0. + if hist.buckets[0].NumRange != 0 { + return nil, errors.AssertionFailedf( + "histogram with non-zero NumRange in first bucket: %v", hist.buckets[0].NumRange, + ) + } + + var ( + // qfTrimLo and qfTrimHi are indexes to slice the quantile to when trimming + // zero-row buckets from the beginning and end of the histogram. + qfTrimLo, qfTrimHi quantileIndex + qf quantile + prevV = math.Inf(-1) + p float64 + ) + + addPoint := func(num, v float64) error { + if !isPositive(num) { + return errors.AssertionFailedf("invalid histogram num: %v", num) + } + // Advance p by the proportion of rows counted by num. + p += num / rowCount + // Fix any floating point errors or histogram errors (e.g. sum of bucket row + // counts > total row count) causing p to go above 1. + if p > 1 { + p = 1 + } + qf = append(qf, quantilePoint{p: p, v: v}) + if p == 0 { + qfTrimLo = len(qf) - 1 + } + if num > 0 { + qfTrimHi = len(qf) + } + return nil + } + + // For each histogram bucket, add two points to the quantile: (1) an endpoint + // for NumRange and (2) an endpoint for NumEq. If NumEq == 0 we can skip the + // second point, but we must always add the first point even if NumRange == 0. + for i := range hist.buckets { + if hist.buckets[i].NumRange < 0 || hist.buckets[i].NumEq < 0 { + return nil, errors.AssertionFailedf("histogram bucket with negative row count") + } + v, err := toQuantileValue(hist.buckets[i].UpperBound) + if err != nil { + return nil, err + } + if v <= prevV { + return nil, errors.AssertionFailedf("non-increasing quantile values") + } + prevV = v + + if err := addPoint(hist.buckets[i].NumRange, v); err != nil { + return nil, err + } + if hist.buckets[i].NumEq == 0 { + // Small optimization: skip adding a duplicate point to the quantile. + continue + } + if err := addPoint(hist.buckets[i].NumEq, v); err != nil { + return nil, err + } + } + + if qfTrimHi <= qfTrimLo { + // In the unlikely case that every bucket had zero rows we simply return the + // zeroQuantile. + qf = zeroQuantile + } else { + // Trim any zero-row buckets from the beginning and end. + qf = qf[qfTrimLo:qfTrimHi] + // Fix any floating point errors or histogram errors (e.g. sum of bucket row + // counts < total row count) causing p to be below 1 at the end. + qf[len(qf)-1].p = 1 + } + return qf, nil +} + +// toHistogram converts a quantile into a histogram, using the provided type and +// row count. It returns an error if the conversion fails. +func (qf quantile) toHistogram(colType *types.T, rowCount float64) (histogram, error) { + if len(qf) < 2 || qf[0].p != 0 || qf[len(qf)-1].p != 1 { + return histogram{}, errors.AssertionFailedf("invalid quantile: %v", qf) + } + + var hist histogram + + // Empty table case. + if rowCount < 1 { + return hist, nil + } + + // None of our supported types need information from eval.Context, but we do + // need UnwrapDatum for CompareError to work correctly, so use a typed nil. + var nilEvalContext *eval.Context + + var i int + // Skip any leading p=0 points instead of emitting zero-row buckets. + for qf[i].p == 0 { + i++ + } + + // Create the first bucket of the histogram. The first bucket must always have + // NumRange == 0. Sometimes we will emit a zero-row bucket to make this true. + currentUpperBound, err := fromQuantileValue(colType, qf[i-1].v) + if err != nil { + return histogram{}, err + } + currentBucket := cat.HistogramBucket{ + NumEq: 0, + NumRange: 0, + DistinctRange: 0, + UpperBound: currentUpperBound, + } + + var pEq float64 + + closeCurrentBucket := func() error { + numEq := pEq * rowCount + if !isPositive(numEq) { + return errors.AssertionFailedf("invalid histogram NumEq: %v", numEq) + } + if numEq < 1 && currentBucket.NumRange+numEq >= 2 { + // Steal from NumRange so that NumEq is at least 1, if it wouldn't make + // NumRange 0. This makes the histogram look more like something + // EquiDepthHistogram would produce. + currentBucket.NumRange -= 1 - numEq + numEq = 1 + } + currentBucket.NumEq = numEq + + // Calculate DistinctRange for this bucket now that NumRange is final. + lowerBound := getNextLowerBound(nilEvalContext, currentUpperBound) + distinctRange := estimatedDistinctValuesInRange( + nilEvalContext, currentBucket.NumRange, lowerBound, currentUpperBound, + ) + if !isPositive(distinctRange) { + return errors.AssertionFailedf("invalid histogram DistinctRange: %v", distinctRange) + } + currentBucket.DistinctRange = distinctRange + + hist.buckets = append(hist.buckets, currentBucket) + pEq = 0 + return nil + } + + // For each point in the quantile, if its value is equal to the current + // upperBound then add to NumEq of the current bucket. Otherwise close the + // current bucket and add to NumRange of a new current bucket. + for ; i < len(qf); i++ { + upperBound, err := fromQuantileValue(colType, qf[i].v) + if err != nil { + return histogram{}, err + } + cmp, err := upperBound.CompareError(nilEvalContext, currentUpperBound) + if err != nil { + return histogram{}, err + } + if cmp < 0 { + return histogram{}, errors.AssertionFailedf("decreasing histogram values") + } + if cmp == 0 { + pEq += qf[i].p - qf[i-1].p + } else { + if err := closeCurrentBucket(); err != nil { + return histogram{}, err + } + + // Start a new current bucket. + pRange := qf[i].p - qf[i-1].p + numRange := pRange * rowCount + if !isPositive(numRange) { + return histogram{}, errors.AssertionFailedf("invalid histogram NumRange: %v", numRange) + } + currentUpperBound = upperBound + currentBucket = cat.HistogramBucket{ + NumEq: 0, + NumRange: numRange, + DistinctRange: 0, + UpperBound: currentUpperBound, + } + } + // Skip any trailing p=1 points instead of emitting zero-row buckets. + if qf[i].p == 1 { + break + } + } + + // Close the last bucket. + if err := closeCurrentBucket(); err != nil { + return histogram{}, err + } + + return hist, nil +} + +func isPositive(x float64) bool { + return !math.IsNaN(x) && !math.IsInf(x, 0) && x >= 0 +} + +// toQuantileValue converts from a datum to a float suitable for use in a quantile // function. It differs from eval.PerformCast in a few ways: // 1. It supports conversions that are not legal casts (e.g. DATE to FLOAT). // 2. It errors on NaN and infinite values because they will break our model. -// FromQuantileValue is the inverse of this function, and together they should +// fromQuantileValue is the inverse of this function, and together they should // support round-trip conversions. // TODO(michae2): Add support for DECIMAL, TIME, TIMETZ, and INTERVAL. -func ToQuantileValue(d tree.Datum) (float64, error) { +func toQuantileValue(d tree.Datum) (float64, error) { switch v := d.(type) { case *tree.DInt: return float64(*v), nil @@ -89,8 +387,8 @@ var ( quantileMaxTimestampSec = float64(quantileMaxTimestamp.Unix()) ) -// FromQuantileValue converts from a quantile value back to a datum suitable for -// use in a histogram. It is the inverse of ToQuantileValue. It differs from +// fromQuantileValue converts from a quantile value back to a datum suitable for +// use in a histogram. It is the inverse of toQuantileValue. It differs from // eval.PerformCast in a few ways: // 1. It supports conversions that are not legal casts (e.g. FLOAT to DATE). // 2. It errors on NaN and infinite values because they indicate a problem with @@ -98,7 +396,7 @@ var ( // 3. On overflow or underflow it clamps to maximum or minimum finite values // rather than failing the conversion (and thus the entire histogram). // TODO(michae2): Add support for DECIMAL, TIME, TIMETZ, and INTERVAL. -func FromQuantileValue(colType *types.T, val float64) (tree.Datum, error) { +func fromQuantileValue(colType *types.T, val float64) (tree.Datum, error) { if math.IsNaN(val) || math.IsInf(val, 0) { return nil, tree.ErrFloatOutOfRange } diff --git a/pkg/sql/stats/quantile_test.go b/pkg/sql/stats/quantile_test.go index fbf7845f7ce8..cbc9d3be18de 100644 --- a/pkg/sql/stats/quantile_test.go +++ b/pkg/sql/stats/quantile_test.go @@ -12,16 +12,356 @@ package stats import ( "math" + "reflect" "strconv" "testing" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" ) +type testHistogram []testBucket + +type testBucket struct { + NumEq, NumRange, DistinctRange, UpperBound float64 +} + +func (th testHistogram) toHistogram() histogram { + if th == nil { + return histogram{} + } + h := histogram{buckets: make([]cat.HistogramBucket, len(th))} + for i := range th { + h.buckets[i].NumEq = th[i].NumEq + h.buckets[i].NumRange = th[i].NumRange + h.buckets[i].DistinctRange = th[i].DistinctRange + h.buckets[i].UpperBound = tree.NewDFloat(tree.DFloat(th[i].UpperBound)) + } + return h +} + +// Test conversions from histogram to quantile. +func TestMakeQuantile(t *testing.T) { + // We use all floats here. TestToQuantileValue and TestFromQuantileValue test + // conversions to other datatypes. + testCases := []struct { + hist testHistogram + rows float64 + qfun quantile + err bool + }{ + { + hist: nil, + rows: 0, + qfun: zeroQuantile, + }, + { + hist: testHistogram{}, + rows: 0, + qfun: zeroQuantile, + }, + { + hist: testHistogram{{0, 0, 0, 0}}, + rows: 0, + qfun: zeroQuantile, + }, + { + hist: testHistogram{{0, 0, 0, 100}, {0, 0, 0, 200}}, + rows: 10, + qfun: zeroQuantile, + }, + { + hist: testHistogram{{1, 0, 0, 0}}, + rows: 1, + qfun: zeroQuantile, + }, + { + hist: testHistogram{{2, 0, 0, 0}}, + rows: 2, + qfun: zeroQuantile, + }, + { + hist: testHistogram{{1, 0, 0, 100}}, + rows: 1, + qfun: quantile{{0, 100}, {1, 100}}, + }, + { + hist: testHistogram{{1, 0, 0, 100}, {0, 1, 1, 200}}, + rows: 2, + qfun: quantile{{0, 100}, {0.5, 100}, {1, 200}}, + }, + { + hist: testHistogram{{1, 0, 0, 100}, {2, 1, 1, 200}}, + rows: 4, + qfun: quantile{{0, 100}, {0.25, 100}, {0.5, 200}, {1, 200}}, + }, + { + hist: testHistogram{{0, 0, 0, 100}, {6, 2, 2, 200}}, + rows: 8, + qfun: quantile{{0, 100}, {0.25, 200}, {1, 200}}, + }, + { + hist: testHistogram{{0, 0, 0, 100}, {6, 2, 2, 200}}, + rows: 8, + qfun: quantile{{0, 100}, {0.25, 200}, {1, 200}}, + }, + { + hist: testHistogram{{2, 0, 0, 100}, {6, 2, 2, 200}, {2, 0, 0, 300}, {0, 4, 4, 400}}, + rows: 16, + qfun: quantile{{0, 100}, {0.125, 100}, {0.25, 200}, {0.625, 200}, {0.625, 300}, {0.75, 300}, {1, 400}}, + }, + // Cases where we trim leading and trailing zero buckets. + { + hist: testHistogram{{0, 0, 0, 0}, {0, 0, 0, 100}, {2, 2, 2, 200}}, + rows: 4, + qfun: quantile{{0, 100}, {0.5, 200}, {1, 200}}, + }, + { + hist: testHistogram{{0, 0, 0, 100}, {2, 6, 6, 200}, {0, 0, 0, 300}}, + rows: 8, + qfun: quantile{{0, 100}, {0.75, 200}, {1, 200}}, + }, + { + hist: testHistogram{{0, 0, 0, 0}, {4, 0, 0, 100}, {1, 3, 3, 200}, {0, 0, 0, 300}}, + rows: 8, + qfun: quantile{{0, 100}, {0.5, 100}, {0.875, 200}, {1, 200}}, + }, + // Cases where we clamp p to 1 to fix histogram errors. + { + hist: testHistogram{{2, 0, 0, 100}}, + rows: 1, + qfun: quantile{{0, 100}, {1, 100}}, + }, + { + hist: testHistogram{{1, 0, 0, 100}, {0, 1, 1, 200}}, + rows: 1, + qfun: quantile{{0, 100}, {1, 100}, {1, 200}}, + }, + // Error cases. + { + hist: testHistogram{}, + rows: math.Inf(1), + err: true, + }, + { + hist: testHistogram{}, + rows: math.NaN(), + err: true, + }, + { + hist: testHistogram{}, + rows: -1, + err: true, + }, + { + hist: testHistogram{{0, 1, 1, 100}}, + rows: 1, + err: true, + }, + { + hist: testHistogram{{-1, 0, 0, 100}}, + rows: 1, + err: true, + }, + { + hist: testHistogram{{math.Inf(1), 0, 0, 100}}, + rows: 1, + err: true, + }, + { + hist: testHistogram{{1, 0, 0, 100}, {1, 0, 0, 99}}, + rows: 2, + err: true, + }, + { + hist: testHistogram{{1, 0, 0, 100}, {0, 1, 1, 100}}, + rows: 2, + err: true, + }, + } + for i, tc := range testCases { + t.Run(strconv.Itoa(i), func(t *testing.T) { + q, err := makeQuantile(tc.hist.toHistogram(), tc.rows) + if err != nil { + if !tc.err { + t.Errorf("test case %d unexpected makeQuantile err: %v", i, err) + } + return + } + if tc.err { + t.Errorf("test case %d expected makeQuantile err", i) + return + } + if !reflect.DeepEqual(q, tc.qfun) { + t.Errorf("test case %d incorrect quantile %v expected %v", i, q, tc.qfun) + } + }) + } +} + +// Test conversions from quantile to histogram. +func TestQuantileToHistogram(t *testing.T) { + // We use all floats here. TestToQuantileValue and TestFromQuantileValue test + // conversions to other datatypes. + testCases := []struct { + qfun quantile + rows float64 + hist testHistogram + err bool + }{ + { + qfun: zeroQuantile, + rows: 0, + hist: nil, + }, + { + qfun: zeroQuantile, + rows: 1, + hist: testHistogram{{1, 0, 0, 0}}, + }, + { + qfun: zeroQuantile, + rows: 2, + hist: testHistogram{{2, 0, 0, 0}}, + }, + { + qfun: quantile{{0, 100}, {1, 100}}, + rows: 1, + hist: testHistogram{{1, 0, 0, 100}}, + }, + { + qfun: quantile{{0, 0}, {0, 100}, {1, 100}}, + rows: 1, + hist: testHistogram{{1, 0, 0, 100}}, + }, + { + qfun: quantile{{0, 100}, {1, 100}, {1, 100}}, + rows: 1, + hist: testHistogram{{1, 0, 0, 100}}, + }, + { + qfun: quantile{{0, 100}, {1, 100}, {1, 200}}, + rows: 1, + hist: testHistogram{{1, 0, 0, 100}}, + }, + { + qfun: quantile{{0, 0}, {1, 100}}, + rows: 1, + hist: testHistogram{{0, 0, 0, 0}, {0, 1, 1, 100}}, + }, + { + qfun: quantile{{0, 0}, {0.5, 100}, {1, 100}}, + rows: 2, + hist: testHistogram{{0, 0, 0, 0}, {1, 1, 1, 100}}, + }, + { + qfun: quantile{{0, 0}, {0.9, 100}, {1, 100}}, + rows: 10, + hist: testHistogram{{0, 0, 0, 0}, {1, 9, 9, 100}}, + }, + { + qfun: quantile{{0, 100}, {0.25, 100}, {0.75, 200}, {1, 200}}, + rows: 16, + hist: testHistogram{{4, 0, 0, 100}, {4, 8, 8, 200}}, + }, + { + qfun: quantile{{0, 100}, {0.25, 100}, {0.5, 200}, {0.75, 200}, {0.75, 300}, {1, 300}}, + rows: 16, + hist: testHistogram{{4, 0, 0, 100}, {4, 4, 4, 200}, {4, 0, 0, 300}}, + }, + { + qfun: quantile{{0, 500}, {0.125, 500}, {0.25, 600}, {0.5, 600}, {0.75, 800}, {1, 800}}, + rows: 16, + hist: testHistogram{{2, 0, 0, 500}, {4, 2, 2, 600}, {4, 4, 4, 800}}, + }, + { + qfun: quantile{{0, 300}, {0, 310}, {0.125, 310}, {0.125, 320}, {0.25, 320}, {0.25, 330}, {0.5, 330}, {0.5, 340}, {0.625, 340}, {0.625, 350}, {0.75, 350}, {0.75, 360}, {0.875, 360}, {0.875, 370}, {1, 370}}, + rows: 32, + hist: testHistogram{{4, 0, 0, 310}, {4, 0, 0, 320}, {8, 0, 0, 330}, {4, 0, 0, 340}, {4, 0, 0, 350}, {4, 0, 0, 360}, {4, 0, 0, 370}}, + }, + // Cases where we steal a row from NumRange to give to NumEq. + { + qfun: quantile{{0, 0}, {1, 100}}, + rows: 2, + hist: testHistogram{{0, 0, 0, 0}, {1, 1, 1, 100}}, + }, + { + qfun: quantile{{0, 100}, {0.5, 100}, {1, 200}, {1, 300}}, + rows: 4, + hist: testHistogram{{2, 0, 0, 100}, {1, 1, 1, 200}}, + }, + { + qfun: quantile{{0, 0}, {0.875, 87.5}, {1, 100}}, + rows: 8, + hist: testHistogram{{0, 0, 0, 0}, {1, 6, 6, 87.5}, {0, 1, 1, 100}}, + }, + { + qfun: quantile{{0, 400}, {0.5, 600}, {0.75, 700}, {1, 800}}, + rows: 16, + hist: testHistogram{{0, 0, 0, 400}, {1, 7, 7, 600}, {1, 3, 3, 700}, {1, 3, 3, 800}}, + }, + // Error cases. + { + qfun: quantile{}, + rows: 1, + err: true, + }, + { + qfun: quantile{{0, 0}}, + rows: 1, + err: true, + }, + { + qfun: quantile{{1, 0}, {0, 0}}, + rows: 1, + err: true, + }, + { + qfun: quantile{{0, 100}, {0, 200}}, + rows: 1, + err: true, + }, + { + qfun: quantile{{0, 100}, {math.NaN(), 100}, {1, 100}}, + rows: 1, + err: true, + }, + { + qfun: quantile{{0, 0}, {0.75, 25}, {0.25, 75}, {1, 100}}, + rows: 1, + err: true, + }, + { + qfun: quantile{{0, 100}, {1, 99}}, + rows: 1, + err: true, + }, + } + for i, tc := range testCases { + t.Run(strconv.Itoa(i), func(t *testing.T) { + h, err := tc.qfun.toHistogram(types.Float, tc.rows) + if err != nil { + if !tc.err { + t.Errorf("test case %d unexpected quantile.toHistogram err: %v", i, err) + } + return + } + if tc.err { + t.Errorf("test case %d expected quantile.toHistogram err", i) + return + } + h2 := tc.hist.toHistogram() + if !reflect.DeepEqual(h, h2) { + t.Errorf("test case %d incorrect histogram %v expected %v", i, h, h2) + } + }) + } +} + // Test conversions from datum to quantile value and back. func TestToQuantileValue(t *testing.T) { testCases := []struct { @@ -258,15 +598,15 @@ func TestToQuantileValue(t *testing.T) { evalCtx := eval.NewTestingEvalContext(cluster.MakeTestingClusterSettings()) for i, tc := range testCases { t.Run(strconv.Itoa(i), func(t *testing.T) { - val, err := ToQuantileValue(tc.dat) + val, err := toQuantileValue(tc.dat) if err != nil { if !tc.err { - t.Errorf("test case %d (%v) unexpected ToQuantileValue err: %v", i, tc.typ.Name(), err) + t.Errorf("test case %d (%v) unexpected toQuantileValue err: %v", i, tc.typ.Name(), err) } return } if tc.err { - t.Errorf("test case %d (%v) expected ToQuantileValue err", i, tc.typ.Name()) + t.Errorf("test case %d (%v) expected toQuantileValue err", i, tc.typ.Name()) return } if val != tc.val { @@ -274,9 +614,9 @@ func TestToQuantileValue(t *testing.T) { return } // Check that we can make the round trip. - res, err := FromQuantileValue(tc.typ, val) + res, err := fromQuantileValue(tc.typ, val) if err != nil { - t.Errorf("test case %d (%v) unexpected FromQuantileValue err: %v", i, tc.typ.Name(), err) + t.Errorf("test case %d (%v) unexpected fromQuantileValue err: %v", i, tc.typ.Name(), err) return } cmp, err := res.CompareError(evalCtx, tc.dat) @@ -535,15 +875,15 @@ func TestFromQuantileValue(t *testing.T) { evalCtx := eval.NewTestingEvalContext(cluster.MakeTestingClusterSettings()) for i, tc := range testCases { t.Run(strconv.Itoa(i), func(t *testing.T) { - d, err := FromQuantileValue(tc.typ, tc.val) + d, err := fromQuantileValue(tc.typ, tc.val) if err != nil { if !tc.err { - t.Errorf("test case %d (%v) unexpected FromQuantileValue err: %v", i, tc.typ.Name(), err) + t.Errorf("test case %d (%v) unexpected fromQuantileValue err: %v", i, tc.typ.Name(), err) } return } if tc.err { - t.Errorf("test case %d (%v) expected FromQuantileValue err", i, tc.typ.Name()) + t.Errorf("test case %d (%v) expected fromQuantileValue err", i, tc.typ.Name()) return } cmp, err := d.CompareError(evalCtx, tc.dat) @@ -556,9 +896,9 @@ func TestFromQuantileValue(t *testing.T) { return } // Check that we can make the round trip with the clamped value. - res, err := ToQuantileValue(d) + res, err := toQuantileValue(d) if err != nil { - t.Errorf("test case %d (%v) unexpected ToQuantileValue err: %v", i, tc.typ.Name(), err) + t.Errorf("test case %d (%v) unexpected toQuantileValue err: %v", i, tc.typ.Name(), err) return } if res != tc.res {