Skip to content

Commit

Permalink
API usability improvements in sampling package (#31940)
Browse files Browse the repository at this point in the history
**Description:**

Several usability issues were ironed out while working on #31894. This
PR is the pkg/sampling changes from that PR.

Highlights:
- Adds `NeverSampleThreshold` value, which is the exclusive
upper-boundary of threshold values. This makes negative sampling
decisions easier to manage, as shown in #31894.
- Adds `AllProbabilitiesRandomness` value, which is the inclusive
upper-boundary of Randomness values. This makes error handling more
natural as shown in #31894. All thresholds except `NeverSampleThreshold`
will be sampled at `AllProbabilitiesRandomness`.
- Adds `UnsignedToThreshold` constructor for explicit threshold
construction. This is useful in the case of #31894 because it constructs
a 14-bit threshold value.
- Adds `UnsignedToRandomness` constructor for explicit randomness
construction. This is useful in the case of #31894 because it constructs
randomness values from log records w/o use of TraceState.
- Removes a parameter from `UpdateTValueWithSampling` to avoid the
potential for an inconsistent update through mis-use (as identified in
#31894, there is less optimization potential because sampling.threshold
modifies thresholds in all modes).
- Eliminates the `ErrPrecisionUnderflow` error condition and
automatically corrects the problem by extending precision near 0.0 and
1.0 where there are obligatory leading `f` or `0` digits.

**Link to tracking Issue:** #31918 

**Testing:** New tests added for coverage.

**Documentation:** New comments to explain.

---------

Co-authored-by: Sean Marciniak <[email protected]>
  • Loading branch information
jmacd and MovieStoreGuy authored Apr 9, 2024
1 parent 78d1718 commit beef35e
Show file tree
Hide file tree
Showing 11 changed files with 258 additions and 115 deletions.
27 changes: 27 additions & 0 deletions .chloggen/sampling-improvements.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/sampling

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Usability improvements in the sampling API.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [31918]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [api]
93 changes: 49 additions & 44 deletions pkg/sampling/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,53 +37,58 @@
// conditioned on how much sampling has already taken place, use the
// following pseudocode.
//
// func Setup() {
// // Get a fixed probability value from the configuration, in
// // the range (0, 1].
// probability := *FLAG_probability
// func Setup() {
// // Get a fixed probability value from the configuration, in
// // the range (0, 1].
// probability := *FLAG_probability
//
// // Calculate the sampling threshold from probability using 3
// // hex digits of precision.
// fixedThreshold, err = ProbabilityToThresholdWithPrecision(probability, 3)
// if err != nil {
// // error case: Probability is not valid.
// }
// }
// // Calculate the sampling threshold from probability using 3
// // hex digits of precision.
// fixedThreshold, err = ProbabilityToThresholdWithPrecision(probability, 3)
// if err != nil {
// // error case: Probability is not valid.
// }
// }
//
// func MakeDecision(tracestate string, tid TraceID) bool {
// // Parse the incoming tracestate
// ts, err := NewW3CTraceState(tracestate)
// if err != nil {
// // error case: Tracestate is ill-formed.
// }
// // For an absolute probability sample, we check the incoming
// // tracestate to see whether it was already sampled enough.
// if len(ts.OTelValue().TValue()) != 0 {
// // If the incoming tracestate was already sampled at
// // least as much as our threshold implies, then its
// // (rejection) threshold is higher. If so, then no
// // further sampling is called for.
// if ThresholdGreater(ts.OTelValue().TValueThreshold(), fixedThreshold) {
// return true
// }
// }
// var rnd Randomness
// // If the R-value is present, use it. If not, rely on TraceID
// // randomness. Note that OTLP v1.1.0 introduces a new Span flag
// // to convey trace randomness correctly, and if the context has
// // neither the randomness bit set or the R-value set, we need a
// // fallback, which can be to synthesize an R-value or to assume
// // the TraceID has sufficient randomness. This detail is left
// // out of scope.
// if rval, hasRval := ts.OTelValue().RValueRandomness(); hasRv {
// rnd = rval
// } else {
// rnd = TraceIDToRandomness(tid)
// }
//
// return fixedThreshold.ShouldSample(rnd)
// }
// func MakeDecision(tracestate string, tid TraceID) bool {
// // Parse the incoming tracestate
// ts, err := NewW3CTraceState(tracestate)
// if err != nil {
// // error case: Tracestate is ill-formed.
// }
// // For an absolute probability sample, we check the incoming
// // tracestate to see whether it was already sampled enough.
// if threshold, hasThreshold := ts.OTelValue().TValueThreshold(); hasThreshold {
// // If the incoming tracestate was already sampled at
// // least as much as our threshold implies, then its
// // (rejection) threshold is higher. If so, then no
// // further sampling is called for.
// if ThresholdGreater(threshold, fixedThreshold) {
// // Do not update.
// return true
// }
// // The error below is ignored because we've tested
// // the equivalent condition above. This lowers the sampling
// // probability expressed in the tracestate T-value.
// _ = ts.OTelValue().UpdateThresholdWithSampling(fixedThreshold)
// }
// var rnd Randomness
// // If the R-value is present, use it. If not, rely on TraceID
// // randomness. Note that OTLP v1.1.0 introduces a new Span flag
// // to convey trace randomness correctly, and if the context has
// // neither the randomness bit set or the R-value set, we need a
// // fallback, which can be to synthesize an R-value or to assume
// // the TraceID has sufficient randomness. This detail is left
// // out of scope.
// if rv, hasRand := ts.OTelValue().RValueRandomness(); hasRand {
// rnd = rv
// } else {
// rnd = TraceIDToRandomness(tid)
// }
// return fixedThreshold.ShouldSample(rnd)
// }
//
// [W3C tracecontext specification]: https://www.w3.org/TR/trace-context/#tracestate-header
// [Tracestate handling]: https://opentelemetry.io/docs/specs/otel/trace/tracestate-handling/

package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling"
47 changes: 33 additions & 14 deletions pkg/sampling/oteltracestate.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,17 @@ func NewOpenTelemetryTraceState(input string) (OpenTelemetryTraceState, error) {
if otts.rnd, err = RValueToRandomness(value); err == nil {
otts.rvalue = value
} else {
// The zero-value for randomness implies always-sample;
// the threshold test is R < T, but T is not meaningful
// at zero, and this value implies zero adjusted count.
// RValueRandomness() will return false, the error
// accumulates and is returned below.
otts.rvalue = ""
otts.rnd = Randomness{}
}
case tValueFieldName:
if otts.threshold, err = TValueToThreshold(value); err == nil {
otts.tvalue = value
} else {
// TValueThreshold() will return false, the error
// accumulates and is returned below.
otts.tvalue = ""
otts.threshold = AlwaysSampleThreshold
}
Expand Down Expand Up @@ -148,29 +149,47 @@ func (otts *OpenTelemetryTraceState) TValueThreshold() (Threshold, bool) {
}

// UpdateTValueWithSampling modifies the TValue of this object, which
// changes its adjusted count. If the change of TValue leads to
// inconsistency (i.e., raising sampling probability), an error is
// returned.
func (otts *OpenTelemetryTraceState) UpdateTValueWithSampling(sampledThreshold Threshold, encodedTValue string) error {
// changes its adjusted count. It is not logical to modify a sampling
// probability in the direction of larger probability. This prevents
// accidental loss of adjusted count.
//
// If the change of TValue leads to inconsistency, an error is returned.
func (otts *OpenTelemetryTraceState) UpdateTValueWithSampling(sampledThreshold Threshold) error {
// Note: there was once a code path here that optimized for
// cases where a static threshold is used, in which case the
// call to TValue() causes an unnecessary allocation per data
// item (w/ a constant result). We have eliminated that
// parameter, due to the significant potential for mis-use.
// Therefore, this method always recomputes TValue() of the
// sampledThreshold (on success). A future method such as
// UpdateTValueWithSamplingFixedTValue() could extend this
// API to address this allocation, although it is probably
// not significant.
if len(otts.TValue()) != 0 && ThresholdGreater(otts.threshold, sampledThreshold) {
return ErrInconsistentSampling
}
// Note NeverSampleThreshold is the (exclusive) upper boundary
// of valid thresholds, so the test above permits never-
// sampled updates, in which case the TValue() here is empty.
otts.threshold = sampledThreshold
otts.tvalue = encodedTValue
otts.tvalue = sampledThreshold.TValue()
return nil
}

// AdjustedCount returns the adjusted count implied by this TValue.
// This term is defined here:
// https://opentelemetry.io/docs/specs/otel/trace/tracestate-probability-sampling/
// AdjustedCount returns the adjusted count for this item. If the
// TValue string is empty, this returns 0, otherwise returns
// Threshold.AdjustedCount().
func (otts *OpenTelemetryTraceState) AdjustedCount() float64 {
if len(otts.TValue()) == 0 {
if len(otts.tvalue) == 0 {
// Note: this case covers the zero state, where
// len(tvalue) == 0 and threshold == AlwaysSampleThreshold.
// We return 0 to indicate that no information is available.
return 0
}
return 1.0 / otts.threshold.Probability()
return otts.threshold.AdjustedCount()
}

// ClearTValue is used to unset TValue, in cases where it is
// ClearTValue is used to unset TValue, for use in cases where it is
// inconsistent on arrival.
func (otts *OpenTelemetryTraceState) ClearTValue() {
otts.tvalue = ""
Expand Down
6 changes: 3 additions & 3 deletions pkg/sampling/oteltracestate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func TestOpenTelemetryTraceStateTValueUpdate(t *testing.T) {
require.NotEqual(t, "", otts.RValue())

th, _ := TValueToThreshold("3")
require.NoError(t, otts.UpdateTValueWithSampling(th, "3"))
require.NoError(t, otts.UpdateTValueWithSampling(th))

require.Equal(t, "3", otts.TValue())
tv, hasTv := otts.TValueThreshold()
Expand All @@ -126,7 +126,7 @@ func TestOpenTelemetryTraceStateRTUpdate(t *testing.T) {
require.True(t, otts.HasAnyValue())

th, _ := TValueToThreshold("3")
require.NoError(t, otts.UpdateTValueWithSampling(th, "3"))
require.NoError(t, otts.UpdateTValueWithSampling(th))
otts.SetRValue(must(RValueToRandomness("00000000000003")))

const updated = "rv:00000000000003;th:3;a:b"
Expand Down Expand Up @@ -329,7 +329,7 @@ func TestUpdateTValueWithSampling(t *testing.T) {
newTh, err := ProbabilityToThreshold(test.prob)
require.NoError(t, err)

upErr := otts.UpdateTValueWithSampling(newTh, newTh.TValue())
upErr := otts.UpdateTValueWithSampling(newTh)

require.Equal(t, test.updateErr, upErr)

Expand Down
84 changes: 37 additions & 47 deletions pkg/sampling/probability.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,9 @@ import (
// ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1].
var ErrProbabilityRange = errors.New("sampling probability out of the range [1/MaxAdjustedCount, 1]")

// ErrPrecisionUnderflow is returned when a precision is too great for the range.
var ErrPrecisionUnderflow = errors.New("sampling precision is too great for the range")

// MinSamplingProbability is the smallest representable probability
// and is the inverse of MaxAdjustedCount.
const MinSamplingProbability = 1.0 / MaxAdjustedCount
const MinSamplingProbability = 1.0 / float64(MaxAdjustedCount)

// probabilityInRange tests MinSamplingProb <= prob <= 1.
func probabilityInRange(prob float64) bool {
Expand All @@ -26,67 +23,60 @@ func probabilityInRange(prob float64) bool {
// ProbabilityToThreshold converts a probability to a Threshold. It
// returns an error when the probability is out-of-range.
func ProbabilityToThreshold(prob float64) (Threshold, error) {
// Probability cases
if !probabilityInRange(prob) {
return AlwaysSampleThreshold, ErrProbabilityRange
}

scaled := uint64(math.Round(prob * MaxAdjustedCount))

return Threshold{
unsigned: MaxAdjustedCount - scaled,
}, nil
return ProbabilityToThresholdWithPrecision(prob, NumHexDigits)
}

// ProbabilityToThresholdWithPrecision is like ProbabilityToThreshold
// with support for reduced precision. The `prec` argument determines
// with support for reduced precision. The `precision` argument determines
// how many significant hex digits will be used to encode the exact
// probability.
func ProbabilityToThresholdWithPrecision(prob float64, prec uint8) (Threshold, error) {
func ProbabilityToThresholdWithPrecision(fraction float64, precision int) (Threshold, error) {
// Assume full precision at 0.
if prec == 0 {
return ProbabilityToThreshold(prob)
if precision == 0 {
precision = NumHexDigits
}
if !probabilityInRange(prob) {
if !probabilityInRange(fraction) {
return AlwaysSampleThreshold, ErrProbabilityRange
}
// Special case for prob == 1. The logic for revising precision
// that follows requires 0 < 1 - prob < 1.
if prob == 1 {
// Special case for prob == 1.
if fraction == 1 {
return AlwaysSampleThreshold, nil
}

// Adjust precision considering the significance of leading
// zeros. If we can multiply the rejection probability by 16
// and still be less than 1, then there is a leading zero of
// obligatory precision.
for reject := 1 - prob; reject*16 < 1; {
reject *= 16
prec++
}

// Check if leading zeros plus precision is above the maximum.
// This is called underflow because the requested precision
// leads to complete no significant figures.
if prec > NumHexDigits {
return AlwaysSampleThreshold, ErrPrecisionUnderflow
// Calculate the amount of precision needed to encode the
// threshold with reasonable precision. Here, we count the
// number of leading `0` or `f` characters and automatically
// add precision to preserve relative error near the extremes.
//
// Frexp() normalizes both the fraction and one-minus the
// fraction, because more digits of precision are needed if
// either value is near zero. Frexp returns an exponent <= 0.
//
// If `exp <= -4`, there will be a leading hex `0` or `f`.
// For every multiple of -4, another leading `0` or `f`
// appears, so this raises precision accordingly.
_, expF := math.Frexp(fraction)
_, expR := math.Frexp(1 - fraction)
precision = min(NumHexDigits, max(precision+expF/-hexBits, precision+expR/-hexBits))

// Compute the threshold
scaled := uint64(math.Round(fraction * float64(MaxAdjustedCount)))
threshold := MaxAdjustedCount - scaled

// Round to the specified precision, if less than the maximum.
if shift := hexBits * (NumHexDigits - precision); shift != 0 {
half := uint64(1) << (shift - 1)
threshold += half
threshold >>= shift
threshold <<= shift
}

scaled := uint64(math.Round(prob * MaxAdjustedCount))
rscaled := MaxAdjustedCount - scaled
shift := 4 * (14 - prec)
half := uint64(1) << (shift - 1)

rscaled += half
rscaled >>= shift
rscaled <<= shift

return Threshold{
unsigned: rscaled,
unsigned: threshold,
}, nil
}

// Probability is the sampling ratio in the range [MinSamplingProb, 1].
func (t Threshold) Probability() float64 {
return float64(MaxAdjustedCount-t.unsigned) / MaxAdjustedCount
return float64(MaxAdjustedCount-t.unsigned) / float64(MaxAdjustedCount)
}
7 changes: 5 additions & 2 deletions pkg/sampling/probability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,10 @@ func ExampleProbabilityToThreshold_verysmall() {
0x8p-56, // Skip 8 out of 2**56
0x10p-56, // Skip 0x10 out of 2**56
} {
tval, _ := ProbabilityToThreshold(prob)
// Note that precision is automatically raised for
// such small probabilities, because leading 'f' and
// '0' digits are discounted.
tval, _ := ProbabilityToThresholdWithPrecision(prob, 3)
fmt.Println(tval.TValue())
}

Expand Down Expand Up @@ -279,7 +282,7 @@ func TestProbabilityToThresholdWithPrecision(t *testing.T) {
for len(strip) > 0 && strip[0] == '0' {
strip = strip[1:]
}
rth, err := ProbabilityToThresholdWithPrecision(test.prob, uint8(len(strip)))
rth, err := ProbabilityToThresholdWithPrecision(test.prob, len(strip))
require.NoError(t, err)
require.Equal(t, round, rth.TValue())
})
Expand Down
Loading

0 comments on commit beef35e

Please sign in to comment.