Skip to content

Commit

Permalink
Don't use integer division for cong (#50427)
Browse files Browse the repository at this point in the history
  • Loading branch information
gbaraldi authored Jul 24, 2023
1 parent c57d33a commit 6f6439e
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 29 deletions.
17 changes: 5 additions & 12 deletions base/partr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@ end
const heap_d = UInt32(8)
const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
const heaps_lock = [SpinLock(), SpinLock()]
const cong_unbias = [typemax(UInt32), typemax(UInt32)]


cong(max::UInt32, unbias::UInt32) =
ccall(:jl_rand_ptls, UInt32, (UInt32, UInt32), max, unbias) + UInt32(1)

function unbias_cong(max::UInt32)
return typemax(UInt32) - ((typemax(UInt32) % max) + UInt32(1))
end
cong(max::UInt32) = ccall(:jl_rand_ptls, UInt32, (UInt32,), max) + UInt32(1)


function multiq_sift_up(heap::taskheap, idx::Int32)
Expand Down Expand Up @@ -86,7 +80,6 @@ function multiq_size(tpid::Int8)
newheaps[i] = taskheap()
end
heaps[tp] = newheaps
cong_unbias[tp] = unbias_cong(heap_p)
end

return heap_p
Expand All @@ -100,10 +93,10 @@ function multiq_insert(task::Task, priority::UInt16)

task.priority = priority

rn = cong(heap_p, cong_unbias[tp])
rn = cong(heap_p)
tpheaps = heaps[tp]
while !trylock(tpheaps[rn].lock)
rn = cong(heap_p, cong_unbias[tp])
rn = cong(heap_p)
end

heap = tpheaps[rn]
Expand Down Expand Up @@ -140,8 +133,8 @@ function multiq_deletemin()
if i == heap_p
return nothing
end
rn1 = cong(heap_p, cong_unbias[tp])
rn2 = cong(heap_p, cong_unbias[tp])
rn1 = cong(heap_p)
rn2 = cong(heap_p)
prio1 = tpheaps[rn1].priority
prio2 = tpheaps[rn2].priority
if prio1 > prio2
Expand Down
4 changes: 2 additions & 2 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2768,7 +2768,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
steal : {
// Try to steal chunk from random GC thread
for (int i = 0; i < 4 * jl_n_markthreads; i++) {
uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
uint32_t v = gc_first_tid + cong(jl_n_markthreads, &ptls->rngseed);
jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
c = gc_chunkqueue_steal_from(mq2);
if (c.cid != GC_empty_chunk) {
Expand All @@ -2795,7 +2795,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
}
// Try to steal pointer from random GC thread
for (int i = 0; i < 4 * jl_n_markthreads; i++) {
uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
uint32_t v = gc_first_tid + cong(jl_n_markthreads, &ptls->rngseed);
jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
new_obj = gc_ptr_queue_steal_from(mq2);
if (new_obj != NULL)
Expand Down
20 changes: 12 additions & 8 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "support/strtod.h"
#include "gc-alloc-profiler.h"
#include "support/rle.h"
#include <stdint.h>
#include <uv.h>
#include <llvm-c/Types.h>
#include <llvm-c/Orc.h>
Expand Down Expand Up @@ -1216,15 +1217,18 @@ void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_AR
//--------------------------------------------------
// congruential random number generator
// for a small amount of thread-local randomness
STATIC_INLINE void unbias_cong(uint64_t max, uint64_t *unbias) JL_NOTSAFEPOINT
{
*unbias = UINT64_MAX - ((UINT64_MAX % max) + 1);
}
STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed) JL_NOTSAFEPOINT

STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT
{
while ((*seed = 69069 * (*seed) + 362437) > unbias)
;
return *seed % max;
uint64_t mask = ~(uint64_t)0;
--max;
mask >>= __builtin_clzll(max|1);
uint64_t x;
do {
*seed = 69069 * (*seed) + 362437;
x = *seed & mask;
} while (x > max);
return x;
}
JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
Expand Down
5 changes: 2 additions & 3 deletions src/partr.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,10 @@ extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
// parallel task runtime
// ---

JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max, uint32_t unbias)
JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max)
{
jl_ptls_t ptls = jl_current_task->ptls;
// one-extend unbias back to 64-bits
return cong(max, -(uint64_t)-unbias, &ptls->rngseed);
return cong(max, &ptls->rngseed);
}

// initialize the threading infrastructure
Expand Down
3 changes: 1 addition & 2 deletions src/signal-handling.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed)
// The "modern Fisher–Yates shuffle" - O(n) algorithm
// https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
for (int i = size; i-- > 1; ) {
uint64_t unbias = UINT64_MAX; // slightly biased, but i is very small
size_t j = cong(i, unbias, seed);
size_t j = cong(i, seed);
uint64_t tmp = carray[j];
carray[j] = carray[i];
carray[i] = tmp;
Expand Down
3 changes: 1 addition & 2 deletions src/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -776,13 +776,12 @@ static _Atomic(uint64_t) g_rngseed;
JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT
{
uint64_t max = UINT64_MAX;
uint64_t unbias = UINT64_MAX;
uint64_t rngseed0 = jl_atomic_load_relaxed(&g_rngseed);
uint64_t rngseed;
uint64_t rnd;
do {
rngseed = rngseed0;
rnd = cong(max, unbias, &rngseed);
rnd = cong(max, &rngseed);
} while (!jl_atomic_cmpswap_relaxed(&g_rngseed, &rngseed0, rngseed));
return rnd;
}
Expand Down

0 comments on commit 6f6439e

Please sign in to comment.