Skip to content

Commit 542afb5

Browse files
committed
x/exp/stats: implement basic stats API
Updates golang/go#69264 Change-Id: Id57190f3d284a6e36433c85f48f22390b152b702 Reviewed-on: https://go-review.googlesource.com/c/exp/+/687255 Reviewed-by: Jonathan Amsterdam <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Alan Donovan <[email protected]>
1 parent 6ae5c78 commit 542afb5

File tree

2 files changed

+423
-0
lines changed

2 files changed

+423
-0
lines changed

stats/stats.go

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Package stats provides basic descriptive statistics.
6+
//
7+
// This is intended not as a comprehensive statistics package, but
8+
// to provide common, everyday statistical functions.
9+
//
10+
// As a rule of thumb, a statistical function belongs in this package
11+
// if it would be explained in a typical high school.
12+
//
13+
// These functions aim to balance performance and accuracy, but some
14+
// amount of error is inevitable in floating-point computations.
15+
// The underlying implementations may change, resulting in small
16+
// changes in their results from version to version. If the caller
17+
// needs particular guarantees on accuracy and overflow behavior or
18+
// version stability, they should use a more specialized
19+
// implementation.
20+
package stats
21+
22+
// References:
23+
//
24+
// Hyndman, Rob J.; Fan, Yanan (November 1996).
25+
// "Sample Quantiles in Statistical Packages".
26+
// American Statistician. 50 (4).
27+
// American Statistical Association: 361–365.
28+
// doi:10.2307/2684934. JSTOR 2684934.
29+
30+
import (
31+
"math"
32+
"slices"
33+
)
34+
35+
// Mean returns the arithmetic mean of the values in values.
36+
//
37+
// Mean does not modify the array.
38+
//
39+
// Mean panics if values is an empty slice.
40+
//
41+
// If values contains NaN or both Inf and -Inf, it returns NaN.
42+
// If values contains Inf, it returns Inf. If values contains -Inf, it returns -Inf.
43+
func Mean(values []float64) float64 {
44+
mean, infs := meanInf(values)
45+
switch infs {
46+
case negInf:
47+
return math.Inf(-1)
48+
case posInf:
49+
return math.Inf(1)
50+
case negInf | posInf:
51+
return math.NaN()
52+
default: // passthrough mean or NaN
53+
}
54+
return mean
55+
}
56+
57+
// MeanAndStdDev returns the arithmetic mean and
58+
// sample standard deviation of values; the standard
59+
// deviation is only defined for len(values) > 1.
60+
//
61+
// MeanAndStdDev does not modify the array.
62+
//
63+
// MeanAndStdDev panics if values is an empty slice.
64+
//
65+
// If values contains NaN, it returns NaN, NaN.
66+
// If values contains both Inf and -Inf, it returns NaN, Inf.
67+
// If values contains Inf, it returns Inf, Inf.
68+
// If values contains -Inf, it returns -Inf, Inf.
69+
func MeanAndStdDev(values []float64) (float64, float64) {
70+
mean, infs := meanInf(values)
71+
switch infs {
72+
case 0:
73+
if math.IsNaN(mean) {
74+
return mean, math.NaN()
75+
}
76+
case negInf, posInf, negInf | posInf:
77+
return mean, math.Inf(1)
78+
}
79+
if len(values) == 1 {
80+
return mean, 0
81+
}
82+
squaredDiffs := 0.0
83+
for _, v := range values {
84+
diff := v - mean
85+
squaredDiffs += diff * diff
86+
}
87+
return mean, math.Sqrt(squaredDiffs / float64(len(values)-1))
88+
}
89+
90+
// meanInf calculates a naive mean value
91+
// and reports the infinities status.
92+
func meanInf(values []float64) (float64, infinities) {
93+
if len(values) == 0 {
94+
panic("mean: empty slice")
95+
}
96+
sum, infs := 0.0, infinities(0)
97+
for _, v := range values {
98+
switch {
99+
case math.IsInf(v, 1):
100+
infs |= posInf
101+
case math.IsInf(v, -1):
102+
infs |= negInf
103+
}
104+
sum += v
105+
}
106+
return sum / float64(len(values)), infs
107+
}
108+
109+
// infinities is a bitset that records the presence of ±Inf in the input
110+
type infinities uint8
111+
112+
const (
113+
negInf infinities = 1 << iota
114+
posInf
115+
)
116+
117+
// Median returns the median of the values in values.
118+
//
119+
// Median does not modify the array.
120+
//
121+
// Median may perform asymptotically faster and allocate
122+
// asymptotically less if the slice is already sorted.
123+
//
124+
// If values is an empty slice, it panics.
125+
// If values contains NaN, it returns NaN.
126+
// -Inf is treated as smaller than all other values,
127+
// Inf is treated as larger than all other values, and
128+
// -0.0 is treated as smaller than 0.0.
129+
func Median(values []float64) float64 { return Quantiles(values, 0.5)[0] }
130+
131+
// Quantiles returns a sequence of quantiles of values.
132+
//
133+
// The returned slice has the same length as the quantiles slice,
134+
// and the elements are one-to-one with the input quantiles.
135+
// A quantile of 0 corresponds to the minimum value in values and
136+
// a quantile of 1 corresponds to the maximum value in values.
137+
// A quantile of 0.5 is the same as the value returned by [Median].
138+
//
139+
// Quantiles does not modify the array.
140+
//
141+
// Quantiles may perform asymptotically faster and allocate
142+
// asymptotically less if the slice is already sorted.
143+
//
144+
// Quantiles panics if values is an empty slice or any
145+
// quantile is not contained in the interval [0, 1].
146+
//
147+
// If values contains NaN, it returns [NaN, ..., NaN].
148+
// -Inf is treated as smaller than all other values,
149+
// Inf is treated as larger than all other values, and
150+
// -0.0 is treated as smaller than 0.0.
151+
func Quantiles(values []float64, quantiles ...float64) []float64 {
152+
if len(values) == 0 {
153+
panic("quantiles: empty slice")
154+
}
155+
if !slices.IsSorted(values) {
156+
values = slices.Clone(values)
157+
slices.Sort(values)
158+
}
159+
res := make([]float64, len(quantiles))
160+
if math.IsNaN(values[0]) {
161+
for i := range res {
162+
res[i] = math.NaN()
163+
}
164+
return res
165+
}
166+
for i, q := range quantiles {
167+
if !(0 <= q && q <= 1) {
168+
panic("quantile must be contained in the interval [0, 1]")
169+
}
170+
// There are many methods for computing quantiles. Quantiles uses the
171+
// "inclusive" method, also known as Q7 in Hyndman and Fan, or the
172+
// "linear" or "R-7" method. This assumes that the data is either a
173+
// population or a sample that includes the most extreme values of the
174+
// underlying population.
175+
res[i] = hyndmanFanR7(values, q)
176+
}
177+
return res
178+
}
179+
180+
// hyndmanFanR7 implements the Hyndman and Fan "R-7"
181+
// method of computing interpolated quantile values
182+
// over a sorted slice of vals.
183+
//
184+
// hyndmanFanR7 does not modify the array.
185+
func hyndmanFanR7(values []float64, q float64) float64 {
186+
h := float64(len(values)-1)*q + 1
187+
// the h-th smallest of len(vals) values is at fn(h)-1.
188+
return values[floor(h-1)] + (h-math.Floor(h))*(values[ceil(h-1)]-values[floor(h-1)])
189+
}
190+
191+
// ceil returns the integer value of [math.Ceil].
192+
func ceil(n float64) int { return int(math.Ceil(n)) }
193+
194+
// floor returns the integer value of [math.Floor].
195+
func floor(n float64) int { return int(math.Floor(n)) }

0 commit comments

Comments
 (0)