From edea2de0232841b566dbc159a07416e4aa6c5c53 Mon Sep 17 00:00:00 2001 From: chethega Date: Sun, 23 Feb 2020 21:18:00 +0100 Subject: [PATCH 1/2] Introduce task-local and free-standing xoshiro RNG Co-Authored-By: Jeff Bezanson Co-Authored-by: Rafael Fourquet - try to use high bits instead of low when we take a subset of them - use shift and multiply instead of mask and subtract for generating floats --- src/jltypes.c | 16 +- src/julia.h | 4 + src/task.c | 60 ++++++ stdlib/Random/src/RNGs.jl | 5 +- stdlib/Random/src/Random.jl | 8 +- stdlib/Random/src/Xoshiro.jl | 227 +++++++++++++++++++++++ stdlib/Random/src/XoshiroSimd.jl | 308 +++++++++++++++++++++++++++++++ stdlib/Random/src/normal.jl | 19 +- test/threads_exec.jl | 25 +++ 9 files changed, 661 insertions(+), 11 deletions(-) create mode 100644 stdlib/Random/src/Xoshiro.jl create mode 100644 stdlib/Random/src/XoshiroSimd.jl diff --git a/src/jltypes.c b/src/jltypes.c index 12a20dfe88206..de1b7c9408294 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -2524,7 +2524,7 @@ void jl_init_types(void) JL_GC_DISABLED NULL, jl_any_type, jl_emptysvec, - jl_perm_symsvec(10, + jl_perm_symsvec(14, "next", "queue", "storage", @@ -2534,8 +2534,12 @@ void jl_init_types(void) JL_GC_DISABLED "code", "_state", "sticky", - "_isexception"), - jl_svec(10, + "_isexception", + "rngState0", + "rngState1", + "rngState2", + "rngState3"), + jl_svec(14, jl_any_type, jl_any_type, jl_any_type, @@ -2545,7 +2549,11 @@ void jl_init_types(void) JL_GC_DISABLED jl_any_type, jl_uint8_type, jl_bool_type, - jl_bool_type), + jl_bool_type, + jl_uint64_type, + jl_uint64_type, + jl_uint64_type, + jl_uint64_type), 0, 1, 6); jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type); jl_svecset(jl_task_type->types, 0, listt); diff --git a/src/julia.h b/src/julia.h index c8ab34605c514..69b771bfca073 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1805,6 +1805,10 @@ typedef struct _jl_task_t { uint8_t _state; uint8_t sticky; // record whether this Task can be migrated to a new thread uint8_t _isexception; // set if `result` is an exception to throw or that we exited with + uint64_t rngState0; // really rngState[4], but more convenient to split + uint64_t rngState1; + uint64_t rngState2; + uint64_t rngState3; // hidden state: // saved gc stack top for context switches diff --git a/src/task.c b/src/task.c index bd77de0898f01..be08deca63fc1 100644 --- a/src/task.c +++ b/src/task.c @@ -35,6 +35,7 @@ #include "julia_internal.h" #include "threading.h" #include "julia_assert.h" +#include "support/hashing.h" #ifdef __cplusplus extern "C" { @@ -648,6 +649,63 @@ JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED) throw_internal(ct, NULL); } +/* This is xoshiro256++ 1.0, used for tasklocal random number generation in julia. + This implementation is intended for embedders and internal use by the runtime, and is + based on the reference implementation on http://prng.di.unimi.it + + Credits go to Sebastiano Vigna for coming up with this PRNG. + + There is a pure julia implementation in stdlib that tends to be faster when used from + within julia, due to inlining and more agressive architecture-specific optimizations. +*/ +JL_DLLEXPORT uint64_t jl_tasklocal_genrandom(jl_task_t *task) JL_NOTSAFEPOINT +{ + uint64_t s0 = task->rngState0; + uint64_t s1 = task->rngState1; + uint64_t s2 = task->rngState2; + uint64_t s3 = task->rngState3; + + uint64_t t = s0 << 17; + uint64_t tmp = s0 + s3; + uint64_t res = ((tmp << 23) | (tmp >> 41)) + s0; + s2 ^= s0; + s3 ^= s1; + s1 ^= s2; + s0 ^= s3; + s2 ^= t; + s3 = (s3 << 45) | (s3 >> 19); + + task->rngState0 = s0; + task->rngState1 = s1; + task->rngState2 = s2; + task->rngState3 = s3; + return res; +} + +void rng_split(jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT +{ + /* TODO: consider a less ad-hoc construction + Ideally we could just use the output of the random stream to seed the initial + state of the child. Out of an overabundance of caution we multiply with + effectively random coefficients, to break possible self-interactions. + + It is not the goal to mix bits -- we work under the assumption that the + source is well-seeded, and its output looks effectively random. + However, xoshiro has never been studied in the mode where we seed the + initial state with the output of another xoshiro instance. + + Constants have nothing up their sleeve: + 0x02011ce34bce797f == hash(UInt(1))|0x01 + 0x5a94851fb48a6e05 == hash(UInt(2))|0x01 + 0x3688cf5d48899fa7 == hash(UInt(3))|0x01 + 0x867b4bb4c42e5661 == hash(UInt(4))|0x01 + */ + to->rngState0 = 0x02011ce34bce797f * jl_tasklocal_genrandom(from); + to->rngState1 = 0x5a94851fb48a6e05 * jl_tasklocal_genrandom(from); + to->rngState2 = 0x3688cf5d48899fa7 * jl_tasklocal_genrandom(from); + to->rngState3 = 0x867b4bb4c42e5661 * jl_tasklocal_genrandom(from); +} + JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize) { jl_task_t *ct = jl_current_task; @@ -683,6 +741,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->_isexception = 0; // Inherit logger state from parent task t->logstate = ct->logstate; + // Fork task-local random state from parent + rng_split(ct, t); // there is no active exception handler available on this stack yet t->eh = NULL; t->sticky = 1; diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index 2bdecd92e6a08..fedad41d54cd3 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -2,10 +2,6 @@ ## RandomDevice -# SamplerUnion(X, Y, ...}) == Union{SamplerType{X}, SamplerType{Y}, ...} -SamplerUnion(U...) = Union{Any[SamplerType{T} for T in U]...} -const SamplerBoolBitInteger = SamplerUnion(Bool, BitInteger_types...) - if Sys.iswindows() struct RandomDevice <: AbstractRNG buffer::Vector{UInt128} @@ -382,6 +378,7 @@ end function __init__() resize!(empty!(THREAD_RNGs), Threads.nthreads()) # ensures that we didn't save a bad object + seed!(TaskLocalRNG()) end diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl index 2cdffd6067252..53dbe38238bbc 100644 --- a/stdlib/Random/src/Random.jl +++ b/stdlib/Random/src/Random.jl @@ -27,7 +27,7 @@ export rand!, randn!, shuffle, shuffle!, randperm, randperm!, randcycle, randcycle!, - AbstractRNG, MersenneTwister, RandomDevice + AbstractRNG, MersenneTwister, RandomDevice, TaskLocalRNG, Xoshiro ## general definitions @@ -291,11 +291,17 @@ rand( ::Type{X}, dims::Dims) where {X} = rand(default_rng(), X, d rand(r::AbstractRNG, ::Type{X}, d::Integer, dims::Integer...) where {X} = rand(r, X, Dims((d, dims...))) rand( ::Type{X}, d::Integer, dims::Integer...) where {X} = rand(X, Dims((d, dims...))) +# SamplerUnion(X, Y, ...}) == Union{SamplerType{X}, SamplerType{Y}, ...} +SamplerUnion(U...) = Union{Any[SamplerType{T} for T in U]...} +const SamplerBoolBitInteger = SamplerUnion(Bool, BitInteger_types...) + +include("Xoshiro.jl") include("RNGs.jl") include("generation.jl") include("normal.jl") include("misc.jl") +include("XoshiroSimd.jl") ## rand & rand! & seed! docstrings diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl new file mode 100644 index 0000000000000..a55c9688f9dda --- /dev/null +++ b/stdlib/Random/src/Xoshiro.jl @@ -0,0 +1,227 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +## Xoshiro RNG +# Lots of implementation is shared with TaskLocalRNG + +""" + Xoshiro + +Xoshiro256++ is a fast pseudorandom number generator originally developed by Sebastian Vigna. +Reference implementation is available at http://prng.di.unimi.it + +Apart from the high speed, Xoshiro has a small memory footprint, making it suitable for +applications where many different random states need to be held for long time. + +Julia's Xoshiro implementation has a bulk-generation mode; this seeds new virtual PRNGs +from the parent, and uses SIMD to generate in parallel (i.e. the bulk stream consists of +multiple interleaved xoshiro instances). +The virtual PRNGs are discarded once the bulk request has been serviced (and should cause +no heap allocations). +""" +mutable struct Xoshiro <: AbstractRNG + s0::UInt64 + s1::UInt64 + s2::UInt64 + s3::UInt64 +end + +Xoshiro(::Nothing) = Xoshiro() + +function Xoshiro() + parent = RandomDevice() + # Constants have nothing up their sleeve, see task.c + # 0x02011ce34bce797f == hash(UInt(1))|0x01 + # 0x5a94851fb48a6e05 == hash(UInt(2))|0x01 + # 0x3688cf5d48899fa7 == hash(UInt(3))|0x01 + # 0x867b4bb4c42e5661 == hash(UInt(4))|0x01 + + Xoshiro(0x02011ce34bce797f * rand(parent, UInt64), + 0x5a94851fb48a6e05 * rand(parent, UInt64), + 0x3688cf5d48899fa7 * rand(parent, UInt64), + 0x867b4bb4c42e5661 * rand(parent, UInt64)) +end + +copy(rng::Xoshiro) = Xoshiro(rng.s0, rng.s1, rng.s2, rng.s3) + +function copy!(dst::Xoshiro, src::Xoshiro) + dst.s0, dst.s1, dst.s2, dst.s3 = src.s0, src.s1, src.s2, src.s3 + dst +end + +function ==(a::Xoshiro, b::Xoshiro) + a.s0 == b.s0 && a.s1 == b.s1 && a.s2 == b.s2 && a.s3 == b.s3 +end + +rng_native_52(::Xoshiro) = UInt64 + +function seed!(rng::Xoshiro, seed::NTuple{4,UInt64}) + s = Base.hash_64_64(seed[1]) + rng.s0 = s + s += Base.hash_64_64(seed[2]) + rng.s1 = s + s += Base.hash_64_64(seed[3]) + rng.s2 = s + s += Base.hash_64_64(seed[4]) + rng.s3 = s + rng +end + +@inline function rand(rng::Xoshiro, ::SamplerType{UInt64}) + s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3 + tmp = s0 + s3 + res = tmp << 23 | tmp >> 41 + t = s1 << 17 + s2 = xor(s2, s0) + s3 = xor(s3, s1) + s1 = xor(s1, s2) + s0 = xor(s0, s3) + s2 = xor(s2, t) + s3 = s3 << 45 | s3 >> 19 + rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3 + res +end + + +## Task local RNG + +""" + TaskLocalRNG + +The `TaskLocalRNG` has state that is local to its task, not its thread. +It is seeded upon task creation, from the state of its parent task. +Therefore, task creation is an event that changes the parent's RNG state. + +As an upside, the `TaskLocalRNG` is pretty fast, and permits reproducible +multithreaded simulations (barring race conditions), independent of scheduler +decisions. As long as the number of threads is not used to make decisions on +task creation, simulation results are also independent of the number of available +threads / CPUs. The random stream should not depend on hardware specifics, up to +endianness and possibly word size. + +Using or seeding the RNG of any other task than the one returned by `current_task()` +is undefined behavior: it will work most of the time, and may sometimes fail silently. +""" +struct TaskLocalRNG <: AbstractRNG end +TaskLocalRNG(::Nothing) = TaskLocalRNG() +rng_native_52(::TaskLocalRNG) = UInt64 + +function seed!(rng::TaskLocalRNG, seed::NTuple{4,UInt64}) + # TODO: Consider a less ad-hoc construction + # We can afford burning a handful of cycles here, and we don't want any + # surprises with respect to bad seeds / bad interactions. + t = current_task() + s = Base.hash_64_64(seed[1]) + t.rngState0 = s + s += Base.hash_64_64(seed[2]) + t.rngState1 = s + s += Base.hash_64_64(seed[3]) + t.rngState2 = s + s += Base.hash_64_64(seed[4]) + t.rngState3 = s + rand(rng, UInt64) + rand(rng, UInt64) + rand(rng, UInt64) + rand(rng, UInt64) + rng +end + +@inline function rand(::TaskLocalRNG, ::SamplerType{UInt64}) + task = current_task() + s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3 + tmp = s0 + s3 + res = tmp << 23 | tmp >> 41 + t = s1 << 17 + s2 = xor(s2, s0) + s3 = xor(s3, s1) + s1 = xor(s1, s2) + s0 = xor(s0, s3) + s2 = xor(s2, t) + s3 = s3 << 45 | s3 >> 19 + task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3 + res +end + +# Shared implementation between Xoshiro and TaskLocalRNG -- seeding +function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::UInt128) + seed0 = seed % UInt64 + seed1 = (seed>>>64) % UInt64 + seed!(rng, (seed0, seed1, zero(UInt64), zero(UInt64))) +end +seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, UInt128(seed)) + +seed!(rng::Union{TaskLocalRNG, Xoshiro}) = + seed!(rng, (rand(RandomDevice(), UInt64), rand(RandomDevice(), UInt64), + rand(RandomDevice(), UInt64), rand(RandomDevice(), UInt64))) + +function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::AbstractVector{UInt64}) + if length(seed) > 4 + throw(ArgumentError("seed should have no more than 256 bits")) + end + seed0 = length(seed)>0 ? seed[1] : UInt64(0) + seed1 = length(seed)>1 ? seed[2] : UInt64(0) + seed2 = length(seed)>2 ? seed[3] : UInt64(0) + seed3 = length(seed)>3 ? seed[4] : UInt64(0) + seed!(rng, (seed0, seed1, seed2, seed3)) +end + +function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::AbstractVector{UInt32}) + if iseven(length(seed)) + seed!(rng, reinterpret(UInt64, seed)) + else + seed!(rng, UInt64[reinterpret(UInt64, @view(seed[begin:end-1])); seed[end] % UInt64]) + end +end + +@inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt128}) + first = rand(rng, UInt64) + second = rand(rng,UInt64) + second + UInt128(first)<<64 +end + +@inline rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{Int128}) = rand(rng, UInt128) % Int128 + +@inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, + T::SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64)) + S = T[] + # use upper bits + (rand(rng, UInt64) >>> (64 - 8*sizeof(S))) % S +end + +function copy(rng::TaskLocalRNG) + t = current_task() + Xoshiro(t.rngState0, t.rngState1, t.rngState2, t.rngState3) +end + +function copy!(dst::TaskLocalRNG, src::Xoshiro) + t = current_task() + t.rngState0, t.rngState1, t.rngState2, t.rngState3 = src.s0, src.s1, src.s2, src.s3 + dst +end + +function copy!(dst::Xoshiro, src::TaskLocalRNG) + t = current_task() + dst.s0, dst.s1, dst.s2, dst.s3 = t.rngState0, t.rngState1, t.rngState2, t.rngState3 + dst +end + +function ==(a::Xoshiro, b::TaskLocalRNG) + t = current_task() + a.s0 == t.rngState0 && a.s1 == t.rngState1 && a.s2 == t.rngState2 && a.s3 == t.rngState3 +end + +==(a::TaskLocalRNG, b::Xoshiro) = b == a + +# for partial words, use upper bits from Xoshiro + +rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52Raw{UInt64}}) = rand(r, UInt64) >>> 12 +rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52{UInt64}}) = rand(r, UInt64) >>> 12 +rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt104{UInt128}}) = rand(r, UInt104Raw()) + +rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float16}}) = + Float16(Float32(rand(r, UInt16) >>> 5) * Float32(0x1.0p-11)) + +rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float32}}) = + Float32(rand(r, UInt32) >>> 8) * Float32(0x1.0p-24) + +rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01_64}) = + Float64(rand(r, UInt64) >>> 11) * 0x1.0p-53 diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl new file mode 100644 index 0000000000000..e115533bb6fef --- /dev/null +++ b/stdlib/Random/src/XoshiroSimd.jl @@ -0,0 +1,308 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module XoshiroSimd +# Getting the xoroshiro RNG to reliably vectorize is somewhat of a hassle without Simd.jl. +import ..Random: TaskLocalRNG, rand, rand!, Xoshiro, CloseOpen01, UnsafeView, + SamplerType, SamplerTrivial +using Base: BitInteger_types +using Core.Intrinsics: llvmcall + +# Vector-width. Influences random stream. +xoshiroWidth() = Val(8) +# Simd threshold. Influences random stream. +simdThreshold(::Type{T}) where T = 64 +simdThreshold(::Type{Bool}) = 640 + +@inline _rotl45(x::UInt64) = (x<<45)|(x>>19) +@inline _shl17(x::UInt64) = x<<17 +@inline _rotl23(x::UInt64) = (x<<23)|(x>>41) +@inline _plus(x::UInt64,y::UInt64) = x+y +@inline _xor(x::UInt64,y::UInt64) = xor(x,y) +@inline _and(x::UInt64, y::UInt64) = x & y +@inline _or(x::UInt64, y::UInt64) = x | y +@inline _lshr(x, y::Int32) = _lshr(x, y % Int64) +@inline _lshr(x::UInt64, y::Int64) = llvmcall(""" + %res = lshr i64 %0, %1 + ret i64 %res + """, + UInt64, + Tuple{UInt64, Int64}, + x, y) + +@inline _bits2float(x::UInt64, ::Type{Float64}) = reinterpret(UInt64, Float64(x >>> 11) * 0x1.0p-53) +@inline function _bits2float(x::UInt64, ::Type{Float32}) + #= + # this implementation uses more high bits, but is harder to vectorize + x = x >>> 16 # discard low 16 bits + u = Float32(x >>> 24) * Float32(0x1.0p-24) + l = Float32(x & 0x00ffffff) * Float32(0x1.0p-24) + =# + ui = (x>>>32) % UInt32 + li = x % UInt32 + u = Float32(ui >>> 8) * Float32(0x1.0p-24) + l = Float32(li >>> 8) * Float32(0x1.0p-24) + (UInt64(reinterpret(UInt32, u)) << 32) | UInt64(reinterpret(UInt32, l)) +end + +# required operations. These could be written more concisely with `ntuple`, but the compiler +# sometimes refuses to properly vectorize. +for N in [4,8,16] + let code, s, fshl = "llvm.fshl.v$(N)i64", + VT = :(NTuple{$N, VecElement{UInt64}}) + + s = ntuple(_->VecElement(UInt64(45)), N) + @eval @inline _rotl45(x::$VT) = ccall($fshl, llvmcall, $VT, ($VT, $VT, $VT), x, x, $s) + + s = ntuple(_->VecElement(UInt64(23)), N) + @eval @inline _rotl23(x::$VT) = ccall($fshl, llvmcall, $VT, ($VT, $VT, $VT), x, x, $s) + + code = """ + %lshiftOp = shufflevector <1 x i64> , <1 x i64> undef, <$N x i32> zeroinitializer + %res = shl <$N x i64> %0, %lshiftOp + ret <$N x i64> %res + """ + @eval @inline _shl17(x::$VT) = llvmcall($code, $VT, Tuple{$VT}, x) + + code = """ + %res = add <$N x i64> %1, %0 + ret <$N x i64> %res + """ + @eval @inline _plus(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y) + + code = """ + %res = xor <$N x i64> %1, %0 + ret <$N x i64> %res + """ + @eval @inline _xor(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y) + + code = """ + %res = and <$N x i64> %1, %0 + ret <$N x i64> %res + """ + @eval @inline _and(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y) + + code = """ + %res = or <$N x i64> %1, %0 + ret <$N x i64> %res + """ + @eval @inline _or(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y) + + code = """ + %tmp = insertelement <1 x i64> undef, i64 %1, i32 0 + %shift = shufflevector <1 x i64> %tmp, <1 x i64> %tmp, <$N x i32> zeroinitializer + %res = lshr <$N x i64> %0, %shift + ret <$N x i64> %res + """ + @eval @inline _lshr(x::$VT, y::Int64) = llvmcall($code, $VT, Tuple{$VT, Int64}, x, y) + + code = """ + %shiftamt = shufflevector <1 x i64> , <1 x i64> undef, <$N x i32> zeroinitializer + %sh = lshr <$N x i64> %0, %shiftamt + %f = uitofp <$N x i64> %sh to <$N x double> + %scale = shufflevector <1 x double> , <1 x double> undef, <$N x i32> zeroinitializer + %m = fmul <$N x double> %f, %scale + %i = bitcast <$N x double> %m to <$N x i64> + ret <$N x i64> %i + """ + @eval @inline _bits2float(x::$VT, ::Type{Float64}) = llvmcall($code, $VT, Tuple{$VT}, x) + + code = """ + %as32 = bitcast <$N x i64> %0 to <$(2N) x i32> + %shiftamt = shufflevector <1 x i32> , <1 x i32> undef, <$(2N) x i32> zeroinitializer + %sh = lshr <$(2N) x i32> %as32, %shiftamt + %f = uitofp <$(2N) x i32> %sh to <$(2N) x float> + %scale = shufflevector <1 x float> , <1 x float> undef, <$(2N) x i32> zeroinitializer + %m = fmul <$(2N) x float> %f, %scale + %i = bitcast <$(2N) x float> %m to <$N x i64> + ret <$N x i64> %i + """ + @eval @inline _bits2float(x::$VT, ::Type{Float32}) = llvmcall($code, $VT, Tuple{$VT}, x) + end +end + + +function forkRand(rng::Union{TaskLocalRNG, Xoshiro}, ::Val{N}) where N + # constants have nothing up their sleeve. For more discussion, cf rng_split in task.c + # 0x02011ce34bce797f == hash(UInt(1))|0x01 + # 0x5a94851fb48a6e05 == hash(UInt(2))|0x01 + # 0x3688cf5d48899fa7 == hash(UInt(3))|0x01 + # 0x867b4bb4c42e5661 == hash(UInt(4))|0x01 + s0 = ntuple(i->VecElement(0x02011ce34bce797f * rand(rng, UInt64)), Val(N)) + s1 = ntuple(i->VecElement(0x5a94851fb48a6e05 * rand(rng, UInt64)), Val(N)) + s2 = ntuple(i->VecElement(0x3688cf5d48899fa7 * rand(rng, UInt64)), Val(N)) + s3 = ntuple(i->VecElement(0x867b4bb4c42e5661 * rand(rng, UInt64)), Val(N)) + (s0, s1, s2, s3) +end + +_id(x, T) = x + +@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F} + if len >= simdThreshold(T) + written = xoshiro_bulk_simd(rng, dst, len, T, Val(N), f) + len -= written + dst += written + end + if len != 0 + xoshiro_bulk_nosimd(rng, dst, len, T, f) + end + nothing +end + +@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F) where {T, F} + if rng isa TaskLocalRNG + task = current_task() + s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3 + else + (; s0, s1, s2, s3) = rng::Xoshiro + end + + i = 0 + while i+8 <= len + res = _rotl23(_plus(s0,s3)) + unsafe_store!(reinterpret(Ptr{UInt64}, dst + i), f(res, T)) + t = _shl17(s1) + s2 = _xor(s2, s0) + s3 = _xor(s3, s1) + s1 = _xor(s1, s2) + s0 = _xor(s0, s3) + s2 = _xor(s2, t) + s3 = _rotl45(s3) + i += 8 + end + if i < len + res = _rotl23(_plus(s0,s3)) + t = _shl17(s1) + s2 = _xor(s2, s0) + s3 = _xor(s3, s1) + s1 = _xor(s1, s2) + s0 = _xor(s0, s3) + s2 = _xor(s2, t) + s3 = _rotl45(s3) + ref = Ref(f(res, T)) + # TODO: This may make the random-stream dependent on system endianness + ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i) + end + if rng isa TaskLocalRNG + task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3 + else + rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3 + end + nothing +end + +@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, f) + if rng isa TaskLocalRNG + task = current_task() + s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3 + else + (; s0, s1, s2, s3) = rng::Xoshiro + end + + i = 0 + while i+8 <= len + res = _rotl23(_plus(s0,s3)) + shift = 0 + while i+8 <= len && shift < 8 + resLoc = _and(_lshr(res, shift), 0x0101010101010101) + unsafe_store!(reinterpret(Ptr{UInt64}, dst + i), resLoc) + i += 8 + shift += 1 + end + + t = _shl17(s1) + s2 = _xor(s2, s0) + s3 = _xor(s3, s1) + s1 = _xor(s1, s2) + s0 = _xor(s0, s3) + s2 = _xor(s2, t) + s3 = _rotl45(s3) + end + if i < len + # we may overgenerate some bytes here, if len mod 64 <= 56 and len mod 8 != 0 + res = _rotl23(_plus(s0,s3)) + resLoc = _and(res, 0x0101010101010101) + ref = Ref(resLoc) + ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i) + t = _shl17(s1) + s2 = _xor(s2, s0) + s3 = _xor(s3, s1) + s1 = _xor(s1, s2) + s0 = _xor(s0, s3) + s2 = _xor(s2, t) + s3 = _rotl45(s3) + end + if rng isa TaskLocalRNG + task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3 + else + rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3 + end + nothing +end + + +@noinline function xoshiro_bulk_simd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, ::Val{N}, f::F) where {T,N,F} + s0, s1, s2, s3 = forkRand(rng, Val(N)) + + i = 0 + while i + 8*N <= len + res = _rotl23(_plus(s0,s3)) + t = _shl17(s1) + s2 = _xor(s2, s0) + s3 = _xor(s3, s1) + s1 = _xor(s1, s2) + s0 = _xor(s0, s3) + s2 = _xor(s2, t) + s3 = _rotl45(s3) + unsafe_store!(reinterpret(Ptr{NTuple{N,VecElement{UInt64}}}, dst + i), f(res, T)) + i += 8*N + end + return i +end + +@noinline function xoshiro_bulk_simd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, ::Val{N}, f) where {N} + s0, s1, s2, s3 = forkRand(rng, Val(N)) + msk = ntuple(i->VecElement(0x0101010101010101), Val(N)) + i = 0 + while i + 64*N <= len + res = _rotl23(_plus(s0,s3)) + t = _shl17(s1) + s2 = _xor(s2, s0) + s3 = _xor(s3, s1) + s1 = _xor(s1, s2) + s0 = _xor(s0, s3) + s2 = _xor(s2, t) + s3 = _rotl45(s3) + for k=0:7 + tmp = _lshr(res, k) + toWrite = _and(tmp, msk) + unsafe_store!(reinterpret(Ptr{NTuple{N,VecElement{UInt64}}}, dst + i + k*N*8), toWrite) + end + i += 64*N + end + return i +end + + +function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float32}, ::SamplerTrivial{CloseOpen01{Float32}}) + GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*4, Float32, xoshiroWidth(), _bits2float) + dst +end + +function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float64}, ::SamplerTrivial{CloseOpen01{Float64}}) + GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*8, Float64, xoshiroWidth(), _bits2float) + dst +end + +for T in BitInteger_types + @eval function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Union{Array{$T}, UnsafeView{$T}}, ::SamplerType{$T}) + GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*sizeof($T), UInt8, xoshiroWidth()) + dst + end +end + +function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Bool}, ::SamplerType{Bool}) + GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst), Bool, xoshiroWidth()) + dst +end + +end # module diff --git a/stdlib/Random/src/normal.jl b/stdlib/Random/src/normal.jl index 8638d3d62c624..6bb4cd2c36ce8 100644 --- a/stdlib/Random/src/normal.jl +++ b/stdlib/Random/src/normal.jl @@ -44,10 +44,9 @@ julia> randn(rng, ComplexF32, (2, 3)) inside the following function. =# @inbounds begin - r = rand(rng, UInt52Raw()) + r = rand(rng, UInt52()) # the following code is identical to the one in `_randn(rng::AbstractRNG, r::UInt64)` - r &= 0x000fffffffffffff rabs = Int64(r>>1) # One bit for the sign idx = rabs & 0xFF x = ifelse(r % Bool, -rabs, rabs)*wi[idx+1] @@ -214,6 +213,22 @@ for randfun in [:randn, :randexp] A end + # optimization for Xoshiro, which randomizes natively Array{UInt64} + function $randfun!(rng::Union{Xoshiro, TaskLocalRNG}, A::Array{Float64}) + if length(A) < 7 + for i in eachindex(A) + @inbounds A[i] = $randfun(rng, Float64) + end + else + GC.@preserve A rand!(rng, UnsafeView{UInt64}(pointer(A), length(A))) + + for i in eachindex(A) + @inbounds A[i] = $_randfun(rng, reinterpret(UInt64, A[i]) >>> 12) + end + end + A + end + $randfun!(A::AbstractArray) = $randfun!(default_rng(), A) # generating arrays diff --git a/test/threads_exec.jl b/test/threads_exec.jl index 860f0e03e2f5e..b7e4c37631d83 100644 --- a/test/threads_exec.jl +++ b/test/threads_exec.jl @@ -874,3 +874,28 @@ end end @test sort!(collect(ys)) == 1:3 end + +# reproducible multi-threaded rand() + +using Random + +function reproducible_rand(r, i) + if i == 0 + return UInt64(0) + end + r1 = rand(r, UInt64)*hash(i) + t1 = Threads.@spawn reproducible_rand(r, i-1) + t2 = Threads.@spawn reproducible_rand(r, i-1) + r2 = rand(r, UInt64) + return r1 + r2 + fetch(t1) + fetch(t2) +end + +@testset "Task-local random" begin + r = Random.TaskLocalRNG() + Random.seed!(r, 23) + val = reproducible_rand(r, 10) + for i = 1:4 + Random.seed!(r, 23) + @test reproducible_rand(r, 10) == val + end +end From cfd940e9ebec94f3146644b1698a96ac887b29a3 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Tue, 20 Apr 2021 16:35:44 -0400 Subject: [PATCH 2/2] make TaskLocal the default RNG --- NEWS.md | 10 ++++ doc/src/devdocs/subarrays.md | 10 ++-- doc/src/manual/performance-tips.md | 16 +++---- stdlib/LinearAlgebra/test/bunchkaufman.jl | 2 +- stdlib/LinearAlgebra/test/cholesky.jl | 2 +- stdlib/LinearAlgebra/test/dense.jl | 10 ++-- stdlib/LinearAlgebra/test/eigen.jl | 2 +- stdlib/LinearAlgebra/test/lu.jl | 4 +- stdlib/LinearAlgebra/test/qr.jl | 2 +- stdlib/LinearAlgebra/test/uniformscaling.jl | 2 +- stdlib/Random/docs/src/index.md | 12 ++--- stdlib/Random/src/RNGs.jl | 53 +++++++++------------ stdlib/Random/src/misc.jl | 4 +- stdlib/Random/test/runtests.jl | 38 +++++++-------- stdlib/SparseArrays/src/sparsematrix.jl | 15 +++--- stdlib/SparseArrays/test/sparse.jl | 2 +- stdlib/Test/src/Test.jl | 6 +-- stdlib/Test/test/runtests.jl | 2 +- test/error.jl | 2 +- 19 files changed, 98 insertions(+), 96 deletions(-) diff --git a/NEWS.md b/NEWS.md index 80768cc5e43a8..fe085376609ab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,6 +26,8 @@ Language changes * Destructuring will no longer mutate values on the left hand side while iterating through values on the right hand side. In the example of an array `x`, `x[2], x[1] = x` will now swap the first and second entry of `x`, whereas it used to fill both entries with `x[1]` because `x[2]` was mutated during the iteration of `x`. ([#40737]) +* The default random number generator has changed, so all random numbers will be different (even with the + same seed) unless an explicit RNG object is used. See the section on the `Random` standard library below. Compiler/Runtime improvements ----------------------------- @@ -39,7 +41,12 @@ Command-line option changes Multi-threading changes ----------------------- + * If the `JULIA_NUM_THREADS` environment variable is set to `auto`, then the number of threads will be set to the number of CPU threads ([#38952]) +* Every `Task` object has a local random number generator state, providing reproducible (schedule-independent) execution + of parallel simulation code by default. The default generator is also significantly faster in parallel than in + previous versions. + Build system changes -------------------- @@ -130,6 +137,9 @@ Standard library changes #### Random +* The default random number generator has been changed from Mersenne Twister to [Xoshiro256++](https://prng.di.unimi.it/). + The new generator has smaller state, better performance, and superior statistical properties. + This generator is the one used for reproducible Task-local randomness. #### REPL diff --git a/doc/src/devdocs/subarrays.md b/doc/src/devdocs/subarrays.md index 8ebc773812131..dee9547fb1efd 100644 --- a/doc/src/devdocs/subarrays.md +++ b/doc/src/devdocs/subarrays.md @@ -19,14 +19,14 @@ julia> A = rand(2,3,4); julia> S1 = view(A, :, 1, 2:3) 2×2 view(::Array{Float64, 3}, :, 1, 2:3) with eltype Float64: - 0.200586 0.066423 - 0.298614 0.956753 + 0.166507 0.97397 + 0.754392 0.831383 julia> S2 = view(A, 1, :, 2:3) 3×2 view(::Array{Float64, 3}, 1, :, 2:3) with eltype Float64: - 0.200586 0.066423 - 0.246837 0.646691 - 0.648882 0.276021 + 0.166507 0.97397 + 0.518957 0.0705793 + 0.503714 0.825124 ``` ```@meta DocTestSetup = nothing diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md index 86005171e8f74..da9bf8fd1bf46 100644 --- a/doc/src/manual/performance-tips.md +++ b/doc/src/manual/performance-tips.md @@ -77,12 +77,12 @@ julia> function sum_global() end; julia> @time sum_global() - 0.009639 seconds (7.36 k allocations: 300.310 KiB, 98.32% compilation time) -496.84883432553846 + 0.026328 seconds (9.30 k allocations: 416.747 KiB, 36.50% gc time, 99.48% compilation time) +508.39048990953665 julia> @time sum_global() - 0.000140 seconds (3.49 k allocations: 70.313 KiB) -496.84883432553846 + 0.000075 seconds (3.49 k allocations: 70.156 KiB) +508.39048990953665 ``` On the first call (`@time sum_global()`) the function gets compiled. (If you've not yet used [`@time`](@ref) @@ -113,12 +113,12 @@ julia> function sum_arg(x) end; julia> @time sum_arg(x) - 0.006202 seconds (4.18 k allocations: 217.860 KiB, 99.72% compilation time) -496.84883432553846 + 0.010298 seconds (4.23 k allocations: 226.021 KiB, 99.81% compilation time) +508.39048990953665 julia> @time sum_arg(x) 0.000005 seconds (1 allocation: 16 bytes) -496.84883432553846 +508.39048990953665 ``` The 1 allocation seen is from running the `@time` macro itself in global scope. If we instead run @@ -129,7 +129,7 @@ julia> time_sum(x) = @time sum_arg(x); julia> time_sum(x) 0.000001 seconds -496.84883432553846 +508.39048990953665 ``` In some situations, your function may need to allocate memory as part of its operation, and this diff --git a/stdlib/LinearAlgebra/test/bunchkaufman.jl b/stdlib/LinearAlgebra/test/bunchkaufman.jl index e05592cdc1c5c..f1da22d8733e2 100644 --- a/stdlib/LinearAlgebra/test/bunchkaufman.jl +++ b/stdlib/LinearAlgebra/test/bunchkaufman.jl @@ -12,7 +12,7 @@ n = 10 n1 = div(n, 2) n2 = 2*n1 -Random.seed!(12343210) +Random.seed!(12343212) areal = randn(n,n)/2 aimg = randn(n,n)/2 diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl index 170af59eef8c3..83aed94de9e35 100644 --- a/stdlib/LinearAlgebra/test/cholesky.jl +++ b/stdlib/LinearAlgebra/test/cholesky.jl @@ -39,7 +39,7 @@ end n1 = div(n, 2) n2 = 2*n1 - Random.seed!(12343) + Random.seed!(12344) areal = randn(n,n)/2 aimg = randn(n,n)/2 diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl index f78279cfc365d..57cb06786e994 100644 --- a/stdlib/LinearAlgebra/test/dense.jl +++ b/stdlib/LinearAlgebra/test/dense.jl @@ -15,17 +15,17 @@ n = 10 n1 = div(n, 2) n2 = 2*n1 -Random.seed!(1234321) +Random.seed!(1234323) @testset "Matrix condition number" begin ainit = rand(n,n) @testset "for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64) ainit = convert(Matrix{elty}, ainit) for a in (copy(ainit), view(ainit, 1:n, 1:n)) - @test cond(a,1) ≈ 4.837320054554436e+02 atol=0.01 - @test cond(a,2) ≈ 1.960057871514615e+02 atol=0.01 - @test cond(a,Inf) ≈ 3.757017682707787e+02 atol=0.01 - @test cond(a[:,1:5]) ≈ 10.233059337453463 atol=0.01 + @test cond(a,1) ≈ 50.60863783272028 atol=0.5 + @test cond(a,2) ≈ 23.059634761613314 atol=0.5 + @test cond(a,Inf) ≈ 45.12503933120795 atol=0.4 + @test cond(a[:,1:5]) ≈ 5.719500544258695 atol=0.01 @test_throws ArgumentError cond(a,3) end end diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl index fd9f7dfba92ee..487c72d2e01f4 100644 --- a/stdlib/LinearAlgebra/test/eigen.jl +++ b/stdlib/LinearAlgebra/test/eigen.jl @@ -11,7 +11,7 @@ n = 10 n1 = div(n, 2) n2 = 2*n1 -Random.seed!(1234321) +Random.seed!(12343219) areal = randn(n,n)/2 aimg = randn(n,n)/2 diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl index 6a1c34e511c2e..e537658e4a0c8 100644 --- a/stdlib/LinearAlgebra/test/lu.jl +++ b/stdlib/LinearAlgebra/test/lu.jl @@ -11,7 +11,7 @@ n = 10 n1 = div(n, 2) n2 = 2*n1 -Random.seed!(1234321) +Random.seed!(1234324) areal = randn(n,n)/2 aimg = randn(n,n)/2 @@ -241,7 +241,7 @@ end end @testset "conversion" begin - Random.seed!(3) + Random.seed!(4) a = Tridiagonal(rand(9),rand(10),rand(9)) fa = Array(a) falu = lu(fa) diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl index dd5a0db40dd95..f20be93572c43 100644 --- a/stdlib/LinearAlgebra/test/qr.jl +++ b/stdlib/LinearAlgebra/test/qr.jl @@ -11,7 +11,7 @@ n = 10 n1 = div(n, 2) n2 = 2*n1 -Random.seed!(1234321) +Random.seed!(1234325) areal = randn(n,n)/2 aimg = randn(n,n)/2 diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl index 8c69308d06ce8..04d4d7d7e0c9e 100644 --- a/stdlib/LinearAlgebra/test/uniformscaling.jl +++ b/stdlib/LinearAlgebra/test/uniformscaling.jl @@ -10,7 +10,7 @@ using .Main.Quaternions isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl")) using .Main.OffsetArrays -Random.seed!(123) +Random.seed!(1234543) @testset "basic functions" begin @test I === I' # transpose diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md index ca86de44ecce4..facb18bbdd8d5 100644 --- a/stdlib/Random/docs/src/index.md +++ b/stdlib/Random/docs/src/index.md @@ -147,20 +147,20 @@ Scalar and array methods for `Die` now work as expected: ```jldoctest Die; setup = :(Random.seed!(1)) julia> rand(Die) -Die(15) +Die(6) julia> rand(MersenneTwister(0), Die) Die(11) julia> rand(Die, 3) 3-element Vector{Die}: - Die(18) - Die(5) + Die(15) + Die(19) Die(4) julia> a = Vector{Die}(undef, 3); rand!(a) 3-element Vector{Die}: - Die(5) + Die(17) Die(20) Die(15) ``` @@ -175,13 +175,13 @@ In order to define random generation out of objects of type `S`, the following m julia> Random.rand(rng::AbstractRNG, d::Random.SamplerTrivial{Die}) = rand(rng, 1:d[].nsides); julia> rand(Die(4)) -3 +1 julia> rand(Die(4), 3) 3-element Vector{Any}: + 3 4 1 - 1 ``` Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`. diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index fedad41d54cd3..45d0934dc4039 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -355,48 +355,37 @@ function seed!(r::MersenneTwister, seed::Vector{UInt32}) return r end -seed!(r::MersenneTwister=default_rng()) = seed!(r, make_seed()) +seed!(r::MersenneTwister) = seed!(r, make_seed()) seed!(r::MersenneTwister, n::Integer) = seed!(r, make_seed(n)) -seed!(seed::Union{Integer,Vector{UInt32}}) = seed!(default_rng(), seed) ### Global RNG -const THREAD_RNGs = MersenneTwister[] -@inline default_rng() = default_rng(Threads.threadid()) -@noinline function default_rng(tid::Int) - 0 < tid <= length(THREAD_RNGs) || _rng_length_assert() - if @inbounds isassigned(THREAD_RNGs, tid) - @inbounds MT = THREAD_RNGs[tid] - else - MT = MersenneTwister() - @inbounds THREAD_RNGs[tid] = MT - end - return MT -end -@noinline _rng_length_assert() = @assert false "0 < tid <= length(THREAD_RNGs)" - -function __init__() - resize!(empty!(THREAD_RNGs), Threads.nthreads()) # ensures that we didn't save a bad object - seed!(TaskLocalRNG()) -end - - struct _GLOBAL_RNG <: AbstractRNG global const GLOBAL_RNG = _GLOBAL_RNG.instance end -# GLOBAL_RNG currently represents a MersenneTwister -typeof_rng(::_GLOBAL_RNG) = MersenneTwister +# GLOBAL_RNG currently uses TaskLocalRNG +typeof_rng(::_GLOBAL_RNG) = TaskLocalRNG -copy!(dst::MersenneTwister, ::_GLOBAL_RNG) = copy!(dst, default_rng()) -copy!(::_GLOBAL_RNG, src::MersenneTwister) = copy!(default_rng(), src) +@inline default_rng() = TaskLocalRNG() +@inline default_rng(tid::Int) = TaskLocalRNG() + +copy!(dst::Xoshiro, ::_GLOBAL_RNG) = copy!(dst, default_rng()) +copy!(::_GLOBAL_RNG, src::Xoshiro) = copy!(default_rng(), src) copy(::_GLOBAL_RNG) = copy(default_rng()) -seed!(::_GLOBAL_RNG, seed::Vector{UInt32}) = seed!(default_rng(), seed) -seed!(::_GLOBAL_RNG, n::Integer) = seed!(default_rng(), n) -seed!(::_GLOBAL_RNG, ::Nothing) = seed!(default_rng(), nothing) -seed!(::_GLOBAL_RNG) = seed!(default_rng(), nothing) +GLOBAL_SEED = 0 + +seed!(::_GLOBAL_RNG, seed) = (global GLOBAL_SEED = seed; seed!(default_rng(), seed)) + +function seed!(rng::_GLOBAL_RNG) + seed!(rng, (rand(RandomDevice(), UInt64), rand(RandomDevice(), UInt64), + rand(RandomDevice(), UInt64), rand(RandomDevice(), UInt64))) +end +seed!() = seed!(GLOBAL_RNG) +seed!(rng::_GLOBAL_RNG, ::Nothing) = seed!(rng) # to resolve ambiguity +seed!(seed::Union{Integer,Vector{UInt32},Vector{UInt64},NTuple{4,UInt64}}) = seed!(GLOBAL_RNG, seed) rng_native_52(::_GLOBAL_RNG) = rng_native_52(default_rng()) rand(::_GLOBAL_RNG, sp::SamplerBoolBitInteger) = rand(default_rng(), sp) @@ -420,6 +409,10 @@ for T in BitInteger_types @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerType{$T}) = rand!(default_rng(), A, I) end +function __init__() + seed!(GLOBAL_RNG) +end + ### generation diff --git a/stdlib/Random/src/misc.jl b/stdlib/Random/src/misc.jl index 7c26d36f0a0d4..674c1d3bfe571 100644 --- a/stdlib/Random/src/misc.jl +++ b/stdlib/Random/src/misc.jl @@ -53,13 +53,13 @@ number generator, see [Random Numbers](@ref). # Examples ```jldoctest julia> Random.seed!(3); randstring() -"Y7m62wOj" +"vZmAMp3z" julia> randstring(MersenneTwister(3), 'a':'z', 6) "ocucay" julia> randstring("ACGT") -"ATTTGCGT" +"CAAACACC" ``` !!! note diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl index 639a2bbed60c2..1d591ce7493b9 100644 --- a/stdlib/Random/test/runtests.jl +++ b/stdlib/Random/test/runtests.jl @@ -628,7 +628,7 @@ guardseed() do m = MersenneTwister(0) @test Random.seed!() === g @test Random.seed!(rand(UInt)) === g - @test Random.seed!(rand(UInt32, rand(1:10))) === g + @test Random.seed!(rand(UInt32, rand(1:8))) === g @test Random.seed!(m) === m @test Random.seed!(m, rand(UInt)) === m @test Random.seed!(m, rand(UInt32, rand(1:10))) === m @@ -752,28 +752,26 @@ end @test Random.seed!(GLOBAL_RNG, 0) === LOCAL_RNG @test Random.seed!(GLOBAL_RNG) === LOCAL_RNG - mt = MersenneTwister(1) - @test copy!(mt, GLOBAL_RNG) === mt - @test mt == LOCAL_RNG - Random.seed!(mt, 2) - @test mt != LOCAL_RNG - @test copy!(GLOBAL_RNG, mt) === LOCAL_RNG - @test mt == LOCAL_RNG - mt2 = copy(GLOBAL_RNG) - @test mt2 isa typeof(LOCAL_RNG) - @test mt2 !== LOCAL_RNG - @test mt2 == LOCAL_RNG + xo = Xoshiro() + @test copy!(xo, GLOBAL_RNG) === xo + @test xo == LOCAL_RNG + Random.seed!(xo, 2) + @test xo != LOCAL_RNG + @test copy!(GLOBAL_RNG, xo) === LOCAL_RNG + @test xo == LOCAL_RNG + xo2 = copy(GLOBAL_RNG) + @test xo2 !== LOCAL_RNG + @test xo2 == LOCAL_RNG for T in (Random.UInt52Raw{UInt64}, - Random.UInt2x52Raw{UInt128}, Random.UInt104Raw{UInt128}, Random.CloseOpen12_64) x = Random.SamplerTrivial(T()) - @test rand(GLOBAL_RNG, x) === rand(mt, x) + @test rand(GLOBAL_RNG, x) === rand(xo, x) end for T in (Int64, UInt64, Int128, UInt128, Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32) x = Random.SamplerType{T}() - @test rand(GLOBAL_RNG, x) === rand(mt, x) + @test rand(GLOBAL_RNG, x) === rand(xo, x) end A = fill(0.0, 100, 100) @@ -782,21 +780,21 @@ end vB = view(B, :, :) I1 = Random.SamplerTrivial(Random.CloseOpen01{Float64}()) I2 = Random.SamplerTrivial(Random.CloseOpen12{Float64}()) - @test rand!(GLOBAL_RNG, A, I1) === A == rand!(mt, B, I1) === B + @test rand!(GLOBAL_RNG, A, I1) === A == rand!(xo, B, I1) === B B = fill!(B, 1.0) @test rand!(GLOBAL_RNG, vA, I1) === vA - rand!(mt, vB, I1) + rand!(xo, vB, I1) @test A == B for T in (Float16, Float32) B = fill!(B, 1.0) - @test rand!(GLOBAL_RNG, A, I2) === A == rand!(mt, B, I2) === B + @test rand!(GLOBAL_RNG, A, I2) === A == rand!(xo, B, I2) === B B = fill!(B, 1.0) - @test rand!(GLOBAL_RNG, A, I1) === A == rand!(mt, B, I1) === B + @test rand!(GLOBAL_RNG, A, I1) === A == rand!(xo, B, I1) === B end for T in Base.BitInteger_types x = Random.SamplerType{T}() B = fill!(B, 1.0) - @test rand!(GLOBAL_RNG, A, x) === A == rand!(mt, B, x) === B + @test rand!(GLOBAL_RNG, A, x) === A == rand!(xo, B, x) === B end # issue #33170 @test Sampler(GLOBAL_RNG, 2:4, Val(1)) isa SamplerRangeNDL diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index d4b1cc4ed8e6f..9fd4bf62492c3 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -1609,13 +1609,14 @@ argument specifies a random number generator, see [Random Numbers](@ref). # Examples ```jldoctest; setup = :(using Random; Random.seed!(1234)) julia> sprand(Bool, 2, 2, 0.5) -2×2 SparseMatrixCSC{Bool, Int64} with 1 stored entry: +2×2 SparseMatrixCSC{Bool, Int64} with 2 stored entries: ⋅ ⋅ - ⋅ 1 + 1 1 julia> sprand(Float64, 3, 0.75) -3-element SparseVector{Float64, Int64} with 1 stored entry: - [3] = 0.298614 +3-element SparseVector{Float64, Int64} with 2 stored entries: + [1] = 0.523355 + [2] = 0.0890391 ``` """ function sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn::Function, ::Type{T}=eltype(rfn(r, 1))) where T @@ -1656,9 +1657,9 @@ argument specifies a random number generator, see [Random Numbers](@ref). # Examples ```jldoctest; setup = :(using Random; Random.seed!(0)) julia> sprandn(2, 2, 0.75) -2×2 SparseMatrixCSC{Float64, Int64} with 2 stored entries: - ⋅ 0.586617 - ⋅ 0.297336 +2×2 SparseMatrixCSC{Float64, Int64} with 3 stored entries: + -1.92631 -0.858041 + ⋅ 0.0213808 ``` """ sprandn(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) = diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl index e1d4c8c23c48a..a5e4f48fd4e2d 100644 --- a/stdlib/SparseArrays/test/sparse.jl +++ b/stdlib/SparseArrays/test/sparse.jl @@ -1742,7 +1742,7 @@ end local A = guardseed(1234321) do triu(sprand(10, 10, 0.2)) end - @test getcolptr(SparseArrays.droptol!(A, 0.01)) == [1, 2, 2, 3, 4, 5, 5, 6, 8, 10, 13] + @test getcolptr(SparseArrays.droptol!(A, 0.01)) == [1, 1, 1, 1, 3, 3, 5, 6, 8, 11, 12] @test isequal(SparseArrays.droptol!(sparse([1], [1], [1]), 1), SparseMatrixCSC(1, 1, Int[1, 1], Int[], Int[])) end diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 62d084bdcad58..a858418f3496e 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -1277,7 +1277,7 @@ function testset_beginend(args, tests, source) local oldrng = copy(RNG) try # RNG is re-seeded with its own seed to ease reproduce a failed test - Random.seed!(RNG.seed) + Random.seed!(Random.GLOBAL_SEED) let $(esc(tests)) end @@ -1368,7 +1368,7 @@ function testset_forloop(args, testloop, source) local ts local RNG = default_rng() local oldrng = copy(RNG) - Random.seed!(RNG.seed) + Random.seed!(Random.GLOBAL_SEED) local tmprng = copy(RNG) try let @@ -1839,7 +1839,7 @@ end "`guardseed(f, seed)` is equivalent to running `Random.seed!(seed); f()` and then restoring the state of the global RNG as it was before." -guardseed(f::Function, seed::Union{Vector{UInt32},Integer}) = guardseed() do +guardseed(f::Function, seed::Union{Vector{UInt64},Vector{UInt32},Integer,NTuple{4,UInt64}}) = guardseed() do Random.seed!(seed) f() end diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl index ff8ba13841aaa..3c56ad8feba51 100644 --- a/stdlib/Test/test/runtests.jl +++ b/stdlib/Test/test/runtests.jl @@ -853,7 +853,7 @@ let code = quote end @testset "@testset preserves GLOBAL_RNG's state, and re-seeds it" begin - # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_RNG.seed)` block + # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_SEED)` block seed = rand(UInt128) Random.seed!(seed) a = rand() diff --git a/test/error.jl b/test/error.jl index bb97a0e66ed0b..38ea378664241 100644 --- a/test/error.jl +++ b/test/error.jl @@ -9,7 +9,7 @@ Test.guardseed(12345) do x = ratio(collect(ExponentialBackOff(n=100, max_delay=Inf, factor=1, jitter=0.1))) xm = sum(x) / length(x) - @test (xm - 1.0) < 1e-4 + @test abs(xm - 1.0) < 0.01 end end @testset "retrying after errors" begin