From d7cd0235259128a6eef8aa8087ce43a0aa2dc86e Mon Sep 17 00:00:00 2001 From: Carlo Baldassi Date: Thu, 12 Nov 2015 00:09:30 -0500 Subject: [PATCH] Speed up BitArray packing Changes in convert(::BitArray, ::AbstractArray) and dumpbitcache(): * avoid branches in for loops * pack 8 Bools at a time * use inbounds --- base/bitarray.jl | 8 ++------ base/broadcast.jl | 28 +++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/base/bitarray.jl b/base/bitarray.jl index 845b040ce7498..83194d470dcef 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -310,21 +310,17 @@ function convert{T,N}(::Type{BitArray{N}}, A::AbstractArray{T,N}) ind = 1 @inbounds begin for i = 1:length(Bc)-1 - u = UInt64(1) c = UInt64(0) for j = 0:63 - A[ind]!=0 && (c |= u) + c |= (UInt64(A[ind] != 0) << j) ind += 1 - u <<= 1 end Bc[i] = c end - u = UInt64(1) c = UInt64(0) for j = 0:_mod64(l-1) - A[ind]!=0 && (c |= u) + c |= (UInt64(A[ind] != 0) << j) ind += 1 - u <<= 1 end Bc[end] = c end diff --git a/base/broadcast.jl b/base/broadcast.jl index 32666ed6a32c8..efb0eb1173c5b 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -103,16 +103,34 @@ end const bitcache_chunks = 64 # this can be changed const bitcache_size = 64 * bitcache_chunks # do not change this +function bpack(z::UInt64) + z |= z >>> 7 + z |= z >>> 14 + z |= z >>> 28 + z &= 0xFF + return z +end + function dumpbitcache(Bc::Vector{UInt64}, bind::Int, C::Vector{Bool}) ind = 1 nc = min(bitcache_chunks, length(Bc)-bind+1) - for i = 1:nc - u = UInt64(1) + C8 = reinterpret(UInt64, C) + nc8 = (nc >>> 3) << 3 + @inbounds for i = 1:nc8 + c = UInt64(0) + for j = 0:8:63 + c |= (bpack(C8[ind]) << j) + ind += 1 + end + Bc[bind] = c + bind += 1 + end + ind = (ind-1) << 3 + 1 + @inbounds for i = (nc8+1):nc c = UInt64(0) - for j = 1:64 - C[ind] && (c |= u) + for j = 0:63 + c |= (UInt64(C[ind]) << j) ind += 1 - u <<= 1 end Bc[bind] = c bind += 1