Skip to content

Commit

Permalink
remove nanosleep (#7962)
Browse files Browse the repository at this point in the history
See #7951

Authors:
- Elias Stehle (https://github.com/elstehle)
  • Loading branch information
elstehle authored Apr 15, 2021
1 parent e0eed20 commit a600005
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 30 deletions.
8 changes: 2 additions & 6 deletions cpp/src/io/comp/debrotli.cu
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,6 @@ static __device__ uint8_t *ext_heap_alloc(uint32_t bytes,
first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block);
if (first_free_block == ~0 || first_free_block >= ext_heap_size) {
// Some other block is holding the heap or there are no free blocks: try again later
// Wait a bit in an attempt to make the spin less resource-hungry
nanosleep(100);
continue;
}
if (first_free_block == 0) {
Expand Down Expand Up @@ -408,8 +406,7 @@ static __device__ uint8_t *ext_heap_alloc(uint32_t bytes,
}
} while (blk_next != 0 && blk_next < ext_heap_size);
first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block);
// Wait a while since reaching here means the heap is full
nanosleep(10000);
// Reaching here means the heap is full
// Just in case we're trying to allocate more than the entire heap
if (len > ext_heap_size - 4 * sizeof(uint32_t)) { break; }
}
Expand All @@ -429,8 +426,7 @@ static __device__ void ext_heap_free(void *ptr,
for (;;) {
first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block);
if (first_free_block != ~0) { break; }
// Some other block is holding the heap: wait
nanosleep(50);
// Some other block is holding the heap
}
if (first_free_block >= ext_heap_size) {
// Heap is currently empty
Expand Down
13 changes: 4 additions & 9 deletions cpp/src/io/comp/gpuinflate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -512,13 +512,10 @@ __device__ void decode_symbols(inflate_state_s *s)
#if ENABLE_PREFETCH
// Wait for prefetcher to fetch a worst-case of 48 bits per symbol
while ((*(volatile int32_t *)&s->pref.cur_p - (int32_t)(size_t)cur < batch_size * 6) ||
(s->x.batch_len[batch] != 0))
(s->x.batch_len[batch] != 0)) {}
#else
while (s->x.batch_len[batch] != 0)
while (s->x.batch_len[batch] != 0) {}
#endif
{
nanosleep(100);
}
batch_len = 0;
#if ENABLE_PREFETCH
if (cur + (bitpos >> 3) >= end) {
Expand Down Expand Up @@ -662,7 +659,7 @@ __device__ void decode_symbols(inflate_state_s *s)
if (batch_len != 0) batch = (batch + 1) & (batch_count - 1);
} while (sym != 256);

while (s->x.batch_len[batch] != 0) { nanosleep(150); }
while (s->x.batch_len[batch] != 0) {}
s->x.batch_len[batch] = -1;
s->bitbuf = bitbuf;
s->bitpos = bitpos;
Expand Down Expand Up @@ -779,7 +776,7 @@ __device__ void process_symbols(inflate_state_s *s, int t)
uint32_t lit_mask;

if (t == 0) {
while ((batch_len = s->x.batch_len[batch]) == 0) { nanosleep(100); }
while ((batch_len = s->x.batch_len[batch]) == 0) {}
} else {
batch_len = 0;
}
Expand Down Expand Up @@ -962,8 +959,6 @@ __device__ void prefetch_warp(volatile inflate_state_s *s, int t)
s->pref.cur_p = cur_p;
__threadfence_block();
}
} else if (t == 0) {
nanosleep(150);
}
}
}
Expand Down
9 changes: 4 additions & 5 deletions cpp/src/io/comp/unsnap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ __device__ void snappy_prefetch_bytestream(unsnap_state_s *s, int t)
blen = 0;
break;
}
nanosleep(100);
}
}
blen = shuffle(blen);
Expand Down Expand Up @@ -281,7 +280,7 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t)
if (t == 0) {
s->q.prefetch_rdpos = cur;
#pragma unroll(1) // We don't want unrolling here
while (s->q.prefetch_wrpos < min(cur + 5 * batch_size, end)) { nanosleep(50); }
while (s->q.prefetch_wrpos < min(cur + 5 * batch_size, end)) {}
b = &s->q.batch[batch * batch_size];
}
// Process small symbols in parallel: for data that does not get good compression,
Expand Down Expand Up @@ -441,7 +440,7 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t)
// Wait for prefetcher
s->q.prefetch_rdpos = cur;
#pragma unroll(1) // We don't want unrolling here
while (s->q.prefetch_wrpos < min(cur + 5 * batch_size, end)) { nanosleep(50); }
while (s->q.prefetch_wrpos < min(cur + 5 * batch_size, end)) {}
dst_pos += blen;
if (bytes_left < blen) break;
bytes_left -= blen;
Expand All @@ -457,7 +456,7 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t)
}
batch_len = shuffle(batch_len);
if (t == 0) {
while (s->q.batch_len[batch] != 0) { nanosleep(100); }
while (s->q.batch_len[batch] != 0) {}
}
if (batch_len != batch_size) { break; }
}
Expand Down Expand Up @@ -490,7 +489,7 @@ __device__ void snappy_process_symbols(unsnap_state_s *s, int t, Storage &temp_s
int32_t batch_len, blen_t, dist_t;

if (t == 0) {
while ((batch_len = s->q.batch_len[batch]) == 0) { nanosleep(100); }
while ((batch_len = s->q.batch_len[batch]) == 0) {}
} else {
batch_len = 0;
}
Expand Down
10 changes: 0 additions & 10 deletions cpp/src/io/utilities/block_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,6 @@ inline __device__ void syncwarp(void) { __syncwarp(); }

inline __device__ uint32_t ballot(int pred) { return __ballot_sync(~0, pred); }

template <typename T>
inline __device__ void nanosleep(T d)
{
#if (__CUDA_ARCH__ >= 700)
__nanosleep(d);
#else
clock();
#endif
}

// Warp reduction helpers
template <typename T>
inline __device__ T WarpReduceSum2(T acc)
Expand Down

0 comments on commit a600005

Please sign in to comment.