-
Notifications
You must be signed in to change notification settings - Fork 919
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Resolve racecheck errors in ORC kernels #9916
Changes from 4 commits
cac1f58
c9db3c7
eba4778
eefe523
48273db
6116a12
188f7b2
76cc2b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -650,13 +650,11 @@ static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, | |
int t, | ||
bool has_buffered_values = false) | ||
{ | ||
uint32_t numvals, numruns; | ||
int r, tr; | ||
|
||
if (t == 0) { | ||
uint32_t maxpos = min(bs->len, bs->pos + (bytestream_buffer_size - 8u)); | ||
uint32_t lastpos = bs->pos; | ||
numvals = numruns = 0; | ||
auto numvals = 0; | ||
auto numruns = 0; | ||
// Find the length and start location of each run | ||
while (numvals < maxvals) { | ||
uint32_t pos = lastpos; | ||
|
@@ -713,9 +711,9 @@ static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, | |
} | ||
__syncthreads(); | ||
// Process the runs, 1 warp per run | ||
numruns = rle->num_runs; | ||
r = t >> 5; | ||
tr = t & 0x1f; | ||
auto const numruns = rle->num_runs; | ||
auto const r = t >> 5; | ||
auto const tr = t & 0x1f; | ||
for (uint32_t run = r; run < numruns; run += num_warps) { | ||
uint32_t base, pos, w, n; | ||
int mode; | ||
|
@@ -731,7 +729,7 @@ static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, | |
w = 8 + (byte0 & 0x38); // 8 to 64 bits | ||
n = 3 + (byte0 & 7); // 3 to 10 values | ||
bytestream_readbe(bs, pos * 8, w, baseval); | ||
if (sizeof(T) <= 4) { | ||
if constexpr (sizeof(T) <= 4) { | ||
rle->baseval.u32[r] = baseval; | ||
} else { | ||
rle->baseval.u64[r] = baseval; | ||
|
@@ -766,7 +764,7 @@ static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, | |
int64_t delta; | ||
// Delta | ||
pos = decode_varint(bs, pos, baseval); | ||
if (sizeof(T) <= 4) { | ||
if constexpr (sizeof(T) <= 4) { | ||
rle->baseval.u32[r] = baseval; | ||
} else { | ||
rle->baseval.u64[r] = baseval; | ||
|
@@ -782,6 +780,7 @@ static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, | |
pos = shuffle(pos); | ||
n = shuffle(n); | ||
w = shuffle(w); | ||
__syncwarp(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure this one is needed here, as our There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll add a comment. Really want to go towards error-free memcheck/racecheck reports. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one resolves the following
|
||
for (uint32_t i = tr; i < n; i += 32) { | ||
if (sizeof(T) <= 4) { | ||
if (mode == 0) { | ||
|
@@ -860,14 +859,15 @@ static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, | |
if (j & i) vals[base + j] += vals[base + ((j & ~i) | (i - 1))]; | ||
} | ||
} | ||
if (sizeof(T) <= 4) | ||
if constexpr (sizeof(T) <= 4) | ||
baseval = rle->baseval.u32[r]; | ||
else | ||
baseval = rle->baseval.u64[r]; | ||
for (uint32_t j = tr; j < n; j += 32) { | ||
vals[base + j] += baseval; | ||
} | ||
} | ||
__syncwarp(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one fixes the following warning:
|
||
} | ||
__syncthreads(); | ||
return rle->num_vals; | ||
|
@@ -1679,11 +1679,12 @@ __global__ void __launch_bounds__(block_size) | |
} | ||
} | ||
} | ||
if (t == 0 && numvals + vals_skipped > 0 && numvals < s->top.data.max_vals) { | ||
if (s->chunk.type_kind == TIMESTAMP) { | ||
s->top.data.buffered_count = s->top.data.max_vals - numvals; | ||
if (t == 0 && numvals + vals_skipped > 0) { | ||
auto const max_vals = s->top.data.max_vals; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Workaround for a presumable false positive:
|
||
if (max_vals > numvals) { | ||
if (s->chunk.type_kind == TIMESTAMP) { s->top.data.buffered_count = max_vals - numvals; } | ||
s->top.data.max_vals = numvals; | ||
vuule marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
s->top.data.max_vals = numvals; | ||
} | ||
__syncthreads(); | ||
// Use the valid bits to compute non-null row positions until we get a full batch of values to | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -705,10 +705,7 @@ static __device__ void encode_null_mask(orcenc_state_s* s, | |
} | ||
|
||
// reset shared state | ||
if (t == 0) { | ||
s->nnz = 0; | ||
s->numvals = 0; | ||
} | ||
if (t == 0) { s->nnz = 0; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixes the error:
Resetting |
||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't spot the fix in this file. Is this code cleanup only?