From db1b7c2211c064ae387c259258a2e66d726c449a Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Wed, 17 May 2017 10:21:59 -0700 Subject: [PATCH] math/rand: shard locks for global locked source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DO NOT REVIEW [needs careful docs, not sure we want to do it] demo for #20387 name old time/op new time/op delta Int63Threadsafe 23.4ns ± 5% 35.9ns ± 9% +52.92% (p=0.000 n=10+10) Int63Threadsafe-2 22.2ns ±13% 34.9ns ± 3% +57.08% (p=0.000 n=10+10) Int63Threadsafe-4 20.9ns ±16% 34.9ns ± 2% +67.26% (p=0.000 n=10+10) Int63Threadsafe-8 20.1ns ± 2% 34.9ns ± 1% +74.10% (p=0.000 n=8+8) Int63Threadsafe-16 21.2ns ±18% 34.9ns ± 2% +64.15% (p=0.000 n=10+10) Int63Threadsafe-32 20.9ns ± 2% 34.8ns ± 1% +66.45% (p=0.000 n=9+9) Int63Threadsafe-64 22.1ns ±15% 34.6ns ± 1% +56.38% (p=0.000 n=10+9) Int63ThreadsafeParallel 21.2ns ± 3% 35.2ns ± 1% +65.65% (p=0.000 n=10+8) Int63ThreadsafeParallel-2 28.1ns ± 2% 66.3ns ± 4% +135.54% (p=0.000 n=10+10) Int63ThreadsafeParallel-4 45.9ns ± 1% 43.9ns ± 1% -4.31% (p=0.000 n=9+10) Int63ThreadsafeParallel-8 60.1ns ± 2% 34.1ns ± 4% -43.23% (p=0.000 n=9+10) Int63ThreadsafeParallel-16 70.4ns ± 2% 33.9ns ± 3% -51.75% (p=0.000 n=9+10) Int63ThreadsafeParallel-32 78.3ns ±17% 33.5ns ± 3% -57.18% (p=0.000 n=10+10) Int63ThreadsafeParallel-64 105ns ± 5% 33ns ± 1% -68.63% (p=0.000 n=10+9) Change-Id: I02f036c4c80e41df3065446be36840992b1c978e --- src/math/rand/rand.go | 96 ++++++++++++++++++++++++++++++-------- src/math/rand/rand_test.go | 8 ++++ 2 files changed, 85 insertions(+), 19 deletions(-) diff --git a/src/math/rand/rand.go b/src/math/rand/rand.go index 147c92f9e257e3..977131ae58d2bf 100644 --- a/src/math/rand/rand.go +++ b/src/math/rand/rand.go @@ -15,7 +15,11 @@ // package. package rand -import "sync" +import ( + "sync" + "sync/atomic" + "unsafe" +) // A Source represents a source of uniformly-distributed // pseudo-random int64 values in the range [0, 1<<63). @@ -282,7 +286,7 @@ func read(p []byte, int63 func() int64, readVal *int64, readPos *int8) (n int, e * Top-level convenience functions */ -var globalRand = New(&lockedSource{src: NewSource(1).(Source64)}) +var globalRand = New(newLockedSource()) // Seed uses the provided seed value to initialize the default Source to a // deterministic state. If Seed is not called, the generator behaves as @@ -368,43 +372,97 @@ func NormFloat64() float64 { return globalRand.NormFloat64() } // func ExpFloat64() float64 { return globalRand.ExpFloat64() } +func newLockedSource() *lockedSource { + ls := new(lockedSource) + for i := range ls.srcs { + // TODO: What are good initial values? + ls.srcs[i].Source64 = NewSource(1 + int64(i)*104729).(Source64) + } + return ls +} + +const nLockedSources = 64 + type lockedSource struct { - lk sync.Mutex - src Source64 + n uint32 + _ [128 - 32]byte + srcs [nLockedSources]locksource +} + +type locksource struct { + sync.Mutex + Source64 + _ [128 - unsafe.Sizeof(struct { + sync.Mutex + Source64 + }{})]byte +} + +func (r *lockedSource) enter() (*locksource, bool) { + idx := atomic.AddUint32(&r.n, 1) - 1 + return &r.srcs[idx%nLockedSources], idx == 0 +} + +func (r *lockedSource) exit() { + // possibly still serial; attempt to detect concurrency. + // use load-then-store instead of compare-and-swap + // because it is more performant. + // If there is a logical race, it will result + // in unnecessarily setting r.n to zero, + // i.e. a false positive for being serial, which is ok. + if atomic.LoadUint32(&r.n) == 1 { + atomic.StoreUint32(&r.n, 0) + } } func (r *lockedSource) Int63() (n int64) { - r.lk.Lock() - n = r.src.Int63() - r.lk.Unlock() + ls, ser := r.enter() + ls.Lock() + n = ls.Int63() + ls.Unlock() + if ser { + r.exit() + } return } func (r *lockedSource) Uint64() (n uint64) { - r.lk.Lock() - n = r.src.Uint64() - r.lk.Unlock() + ls, ser := r.enter() + ls.Lock() + n = ls.Uint64() + ls.Unlock() + if ser { + r.exit() + } return } func (r *lockedSource) Seed(seed int64) { - r.lk.Lock() - r.src.Seed(seed) - r.lk.Unlock() + atomic.StoreUint32(&r.n, 0) + ls, ser := r.enter() + ls.Lock() + ls.Seed(seed) + ls.Unlock() + if ser { + r.exit() + } } // seedPos implements Seed for a lockedSource without a race condition. func (r *lockedSource) seedPos(seed int64, readPos *int8) { - r.lk.Lock() - r.src.Seed(seed) + atomic.StoreUint32(&r.n, 0) + ls := &r.srcs[0] + ls.Lock() + ls.Seed(seed) *readPos = 0 - r.lk.Unlock() + ls.Unlock() } // read implements Read for a lockedSource without a race condition. func (r *lockedSource) read(p []byte, readVal *int64, readPos *int8) (n int, err error) { - r.lk.Lock() - n, err = read(p, r.src.Int63, readVal, readPos) - r.lk.Unlock() + ls := &r.srcs[0] + ls.Lock() + n, err = read(p, ls.Int63, readVal, readPos) + ls.Unlock() return } diff --git a/src/math/rand/rand_test.go b/src/math/rand/rand_test.go index e663b84f9fc3da..ff06f803abb136 100644 --- a/src/math/rand/rand_test.go +++ b/src/math/rand/rand_test.go @@ -565,6 +565,14 @@ func BenchmarkInt63Threadsafe(b *testing.B) { } } +func BenchmarkInt63ThreadsafeParallel(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Int63() + } + }) +} + func BenchmarkInt63Unthreadsafe(b *testing.B) { r := New(NewSource(1)) for n := b.N; n > 0; n-- {