vitessio · rohit-nayak-ps · Apr 26, 2021 · Apr 21, 2021 · Apr 22, 2021 · Apr 22, 2021
diff --git a/go/mathstats/beta.go b/go/mathstats/beta.go
@@ -0,0 +1,87 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package mathstats
+
+import "math"
+
+func lgamma(x float64) float64 {
+	y, _ := math.Lgamma(x)
+	return y
+}
+
+// mathBetaInc returns the value of the regularized incomplete beta
+// function Iₓ(a, b).
+//
+// This is not to be confused with the "incomplete beta function",
+// which can be computed as BetaInc(x, a, b)*Beta(a, b).
+//
+// If x < 0 or x > 1, returns NaN.
+func mathBetaInc(x, a, b float64) float64 {
+	// Based on Numerical Recipes in C, section 6.4. This uses the
+	// continued fraction definition of I:
+	//
+	//  (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...))))))
+	//
+	// where B(a,b) is the beta function and
+	//
+	//  d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1))
+	//  d_{2m}   = m(b-m)x/((a+2m-1)(a+2m))
+	if x < 0 || x > 1 {
+		return math.NaN()
+	}
+	bt := 0.0
+	if 0 < x && x < 1 {
+		// Compute the coefficient before the continued
+		// fraction.
+		bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) +
+			a*math.Log(x) + b*math.Log(1-x))
+	}
+	if x < (a+1)/(a+b+2) {
+		// Compute continued fraction directly.
+		return bt * betacf(x, a, b) / a
+	} else {
+		// Compute continued fraction after symmetry transform.
+		return 1 - bt*betacf(1-x, b, a)/b
+	}
+}
+
+// betacf is the continued fraction component of the regularized
+// incomplete beta function Iₓ(a, b).
+func betacf(x, a, b float64) float64 {
+	const maxIterations = 200
+	const epsilon = 3e-14
+
+	raiseZero := func(z float64) float64 {
+		if math.Abs(z) < math.SmallestNonzeroFloat64 {
+			return math.SmallestNonzeroFloat64
+		}
+		return z
+	}
+
+	c := 1.0
+	d := 1 / raiseZero(1-(a+b)*x/(a+1))
+	h := d
+	for m := 1; m <= maxIterations; m++ {
+		mf := float64(m)
+
+		// Even step of the recurrence.
+		numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf))
+		d = 1 / raiseZero(1+numer*d)
+		c = raiseZero(1 + numer/c)
+		h *= d * c
+
+		// Odd step of the recurrence.
+		numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1))
+		d = 1 / raiseZero(1+numer*d)
+		c = raiseZero(1 + numer/c)
+		hfac := d * c
+		h *= hfac
+
+		if math.Abs(hfac-1) < epsilon {
+			return h
+		}
+	}
+	panic("betainc: a or b too big; failed to converge")
+}
diff --git a/go/mathstats/beta_test.go b/go/mathstats/beta_test.go
@@ -0,0 +1,28 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package mathstats
+
+import (
+	"testing"
+)
+
+func TestBetaInc(t *testing.T) {
+	// Example values from MATLAB betainc documentation.
+	testFunc(t, "I_0.5(%v, 3)",
+		func(a float64) float64 { return mathBetaInc(0.5, a, 3) },
+		map[float64]float64{
+			0:  1.00000000000000,
+			1:  0.87500000000000,
+			2:  0.68750000000000,
+			3:  0.50000000000000,
+			4:  0.34375000000000,
+			5:  0.22656250000000,
+			6:  0.14453125000000,
+			7:  0.08984375000000,
+			8:  0.05468750000000,
+			9:  0.03271484375000,
+			10: 0.01928710937500,
+		})
+}
diff --git a/go/mathstats/sample.go b/go/mathstats/sample.go
@@ -0,0 +1,235 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package mathstats
+
+import (
+	"math"
+	"sort"
+)
+
+// Sample is a collection of possibly weighted data points.
+type Sample struct {
+	// Xs is the slice of sample values.
+	Xs []float64
+
+	// Sorted indicates that Xs is sorted in ascending order.
+	Sorted bool
+}
+
+// Bounds returns the minimum and maximum values of xs.
+func Bounds(xs []float64) (min float64, max float64) {
+	if len(xs) == 0 {
+		return math.NaN(), math.NaN()
+	}
+	min, max = xs[0], xs[0]
+	for _, x := range xs {
+		if x < min {
+			min = x
+		}
+		if x > max {
+			max = x
+		}
+	}
+	return
+}
+
+// Bounds returns the minimum and maximum values of the Sample.
+//
+// If the Sample is weighted, this ignores samples with zero weight.
+//
+// This is constant time if s.Sorted and there are no zero-weighted
+// values.
+func (s Sample) Bounds() (min float64, max float64) {
+	if len(s.Xs) == 0 || !s.Sorted {
+		return Bounds(s.Xs)
+	}
+	return s.Xs[0], s.Xs[len(s.Xs)-1]
+}
+
+// vecSum returns the sum of xs.
+func vecSum(xs []float64) float64 {
+	sum := 0.0
+	for _, x := range xs {
+		sum += x
+	}
+	return sum
+}
+
+// Sum returns the (possibly weighted) sum of the Sample.
+func (s Sample) Sum() float64 {
+	return vecSum(s.Xs)
+}
+
+// Weight returns the total weight of the Sasmple.
+func (s Sample) Weight() float64 {
+	return float64(len(s.Xs))
+}
+
+// Mean returns the arithmetic mean of xs.
+func Mean(xs []float64) float64 {
+	if len(xs) == 0 {
+		return math.NaN()
+	}
+	m := 0.0
+	for i, x := range xs {
+		m += (x - m) / float64(i+1)
+	}
+	return m
+}
+
+// Mean returns the arithmetic mean of the Sample.
+func (s Sample) Mean() float64 {
+	return Mean(s.Xs)
+}
+
+// GeoMean returns the geometric mean of xs. xs must be positive.
+func GeoMean(xs []float64) float64 {
+	if len(xs) == 0 {
+		return math.NaN()
+	}
+	m := 0.0
+	for i, x := range xs {
+		if x <= 0 {
+			return math.NaN()
+		}
+		lx := math.Log(x)
+		m += (lx - m) / float64(i+1)
+	}
+	return math.Exp(m)
+}
+
+// GeoMean returns the geometric mean of the Sample. All samples
+// values must be positive.
+func (s Sample) GeoMean() float64 {
+	return GeoMean(s.Xs)
+}
+
+// Variance returns the sample variance of xs.
+func Variance(xs []float64) float64 {
+	if len(xs) == 0 {
+		return math.NaN()
+	} else if len(xs) <= 1 {
+		return 0
+	}
+
+	// Based on Wikipedia's presentation of Welford 1962
+	// (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm).
+	// This is more numerically stable than the standard two-pass
+	// formula and not prone to massive cancellation.
+	mean, M2 := 0.0, 0.0
+	for n, x := range xs {
+		delta := x - mean
+		mean += delta / float64(n+1)
+		M2 += delta * (x - mean)
+	}
+	return M2 / float64(len(xs)-1)
+}
+
+// Variance returns the variance of xs
+func (s Sample) Variance() float64 {
+	return Variance(s.Xs)
+}
+
+// StdDev returns the sample standard deviation of xs.
+func StdDev(xs []float64) float64 {
+	return math.Sqrt(Variance(xs))
+}
+
+// StdDev returns the sample standard deviation of the Sample.
+func (s Sample) StdDev() float64 {
+	return StdDev(s.Xs)
+}
+
+// Percentile returns the pctileth value from the Sample. This uses
+// interpolation method R8 from Hyndman and Fan (1996).
+//
+// pctile will be capped to the range [0, 1]. If len(xs) == 0 or all
+// weights are 0, returns NaN.
+//
+// Percentile(0.5) is the median. Percentile(0.25) and
+// Percentile(0.75) are the first and third quartiles, respectively.
+//
+// This is constant time if s.Sorted and s.Weights == nil.
+func (s *Sample) Percentile(pctile float64) float64 {
+	if len(s.Xs) == 0 {
+		return math.NaN()
+	} else if pctile <= 0 {
+		min, _ := s.Bounds()
+		return min
+	} else if pctile >= 1 {
+		_, max := s.Bounds()
+		return max
+	}
+
+	if !s.Sorted {
+		s.Sort()
+	}
+
+	N := float64(len(s.Xs))
+	//n := pctile * (N + 1) // R6
+	n := 1/3.0 + pctile*(N+1/3.0) // R8
+	kf, frac := math.Modf(n)
+	k := int(kf)
+	if k <= 0 {
+		return s.Xs[0]
+	} else if k >= len(s.Xs) {
+		return s.Xs[len(s.Xs)-1]
+	}
+	return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
+}
+
+// IQR returns the interquartile range of the Sample.
+//
+// This is constant time if s.Sorted and s.Weights == nil.
+func (s Sample) IQR() float64 {
+	if !s.Sorted {
+		s = *s.Copy().Sort()
+	}
+	return s.Percentile(0.75) - s.Percentile(0.25)
+}
+
+// Sort sorts the samples in place in s and returns s.
+//
+// A sorted sample improves the performance of some algorithms.
+func (s *Sample) Sort() *Sample {
+	if s.Sorted || sort.Float64sAreSorted(s.Xs) {
+		// All set
+	} else {
+		sort.Float64s(s.Xs)
+	}
+	s.Sorted = true
+	return s
+}
+
+// Copy returns a copy of the Sample.
+//
+// The returned Sample shares no data with the original, so they can
+// be modified (for example, sorted) independently.
+func (s Sample) Copy() *Sample {
+	xs := make([]float64, len(s.Xs))
+	copy(xs, s.Xs)
+	return &Sample{xs, s.Sorted}
+}
+
+// FilterOutliers updates this sample in-place by removing all the values that are outliers
+func (s *Sample) FilterOutliers() {
+	// Discard outliers.
+	q1, q3 := s.Percentile(0.25), s.Percentile(0.75)
+	lo, hi := q1-1.5*(q3-q1), q3+1.5*(q3-q1)
+	nn := 0
+	for _, value := range s.Xs {
+		if lo <= value && value <= hi {
+			s.Xs[nn] = value
+			nn++
+		}
+	}
+	s.Xs = s.Xs[:nn]
+}
+
+// Clear resets this sample so it contains 0 values
+func (s *Sample) Clear() {
+	s.Xs = s.Xs[:0]
+	s.Sorted = false
+}
diff --git a/go/mathstats/sample_test.go b/go/mathstats/sample_test.go
@@ -0,0 +1,21 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package mathstats
+
+import "testing"
+
+func TestSamplePercentile(t *testing.T) {
+	s := Sample{Xs: []float64{15, 20, 35, 40, 50}}
+	testFunc(t, "Percentile", s.Percentile, map[float64]float64{
+		-1:  15,
+		0:   15,
+		.05: 15,
+		.30: 19.666666666666666,
+		.40: 27,
+		.95: 50,
+		1:   50,
+		2:   50,
+	})
+}