-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Faster findall for bitarrays #29888
Faster findall for bitarrays #29888
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1510,37 +1510,70 @@ function findprev(testf::Function, B::BitArray, start::Integer) | |
end | ||
#findlast(testf::Function, B::BitArray) = findprev(testf, B, 1) ## defined in array.jl | ||
|
||
# findall helper functions | ||
# Generic case (>2 dimensions) | ||
function allindices!(I, B::BitArray) | ||
ind = first(keys(B)) | ||
for k = 1:length(B) | ||
I[k] = ind | ||
ind = nextind(B, ind) | ||
end | ||
end | ||
|
||
# Optimized case for vector | ||
function allindices!(I, B::BitVector) | ||
I[:] .= 1:length(B) | ||
end | ||
|
||
# Optimized case for matrix | ||
function allindices!(I, B::BitMatrix) | ||
k = 1 | ||
for c = 1:size(B,2), r = 1:size(B,1) | ||
I[k] = CartesianIndex(r, c) | ||
k += 1 | ||
end | ||
end | ||
|
||
@inline overflowind(i1, irest::Tuple{}, size) = (i1, irest) | ||
@inline function overflowind(i1, irest, size) | ||
i2 = irest[1] | ||
while i1 > size[1] | ||
i1 -= size[1] | ||
i2 += 1 | ||
end | ||
i2, irest = overflowind(i2, tail(irest), tail(size)) | ||
return (i1, (i2, irest...)) | ||
end | ||
|
||
@inline toind(i1, irest::Tuple{}) = i1 | ||
@inline toind(i1, irest) = CartesianIndex(i1, irest...) | ||
|
||
function findall(B::BitArray) | ||
l = length(B) | ||
nnzB = count(B) | ||
ind = first(keys(B)) | ||
I = Vector{typeof(ind)}(undef, nnzB) | ||
I = Vector{eltype(keys(B))}(undef, nnzB) | ||
nnzB == 0 && return I | ||
nnzB == length(B) && (allindices!(I, B); return I) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It annoyed me too that I needed almost as many lines of code for the for B in [trues(100000), trues(200, 200), trues(50, 50, 50), trues(16, 16, 16, 16)]
print(size(B)); @btime findall_optimized($B)
print(size(B)); @btime vec(collect(keys($B)))
end With results:
In fact, for matrices, it would be better then to turn off this special case optimization. Timings for
While I think some performance can be sacrificed for simpler code, IMO the degradation for matrices is a bit much. Can you think of a performant solution that works for arrays of all dimensions? If not, two alternatives are: 1) keep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the thorough testing here. I think what you have makes sense and is just fine. |
||
Bc = B.chunks | ||
Icount = 1 | ||
for i = 1:length(Bc)-1 | ||
u = UInt64(1) | ||
c = Bc[i] | ||
for j = 1:64 | ||
if c & u != 0 | ||
I[Icount] = ind | ||
Icount += 1 | ||
end | ||
ind = nextind(B, ind) | ||
u <<= 1 | ||
end | ||
end | ||
u = UInt64(1) | ||
c = Bc[end] | ||
for j = 0:_mod64(l-1) | ||
if c & u != 0 | ||
I[Icount] = ind | ||
Icount += 1 | ||
Bs = size(B) | ||
Bi = i1 = i = 1 | ||
irest = ntuple(one, length(B.dims) - 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dang, constant propagation is amazing — I had to check that this was type stable. I would slightly prefer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, great, wasn't aware of |
||
c = Bc[1] | ||
@inbounds while true | ||
while c == 0 | ||
Bi == length(Bc) && return I | ||
i1 += 64 | ||
Bi += 1 | ||
c = Bc[Bi] | ||
end | ||
ind = nextind(B, ind) | ||
u <<= 1 | ||
|
||
tz = trailing_zeros(c) | ||
c = _blsr(c) | ||
|
||
i1, irest = overflowind(i1 + tz, irest, Bs) | ||
I[i] = toind(i1, irest) | ||
i += 1 | ||
i1 -= tz | ||
end | ||
return I | ||
end | ||
|
||
# For performance | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1159,9 +1159,29 @@ timesofar("datamove") | |||||
@test findnextnot((.~(b1 >> i)) .⊻ submask, j) == i+1 | ||||||
end | ||||||
|
||||||
# Do a few more thorough tests for findall | ||||||
b1 = bitrand(n1, n2) | ||||||
@check_bit_operation findall(b1) Vector{CartesianIndex{2}} | ||||||
@check_bit_operation findall(!iszero, b1) Vector{CartesianIndex{2}} | ||||||
|
||||||
# tall-and-skinny (test index overflow logic in findall) | ||||||
@check_bit_operation findall(bitrand(1, 1, 1, 250)) Vector{CartesianIndex{4}} | ||||||
|
||||||
# empty dimensions | ||||||
@check_bit_operation findall(bitrand(0, 0, 10)) Vector{CartesianIndex{3}} | ||||||
|
||||||
# sparse (test empty 64-bit chunks in findall) | ||||||
b1 = falses(8, 8, 8) | ||||||
b1[3,3,3] = b1[6,6,6] = true | ||||||
@check_bit_operation findall(b1) Vector{CartesianIndex{3}} | ||||||
|
||||||
# BitArrays of various dimensions | ||||||
for dims = 2:8 | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Let's also add tests for 0-dimensional arrays — they work due to the early exits, but would fail the general algorithm if that wasn't the case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea. Had to update the code slightly to work for the 1-dimensional case. |
||||||
t = Tuple(fill(2, dims)) | ||||||
@check_bit_operation findall(trues(t)) Vector{CartesianIndex{dims}} | ||||||
@check_bit_operation findall(falses(t)) Vector{CartesianIndex{dims}} | ||||||
@check_bit_operation findall(bitrand(t)) Vector{CartesianIndex{dims}} | ||||||
end | ||||||
end | ||||||
|
||||||
timesofar("find") | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer to name this
_overflowind
(andtoind
below to_toind
) — they're helper functions that are only relevant to this one method, but those are fairly common names and likely to be mistaken forto_indices
.