From 2ae88306ee62e074412bcdf352fdb0d7a934124f Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Sat, 17 Sep 2022 23:43:23 -0600 Subject: [PATCH 1/3] Define nextind/prevind for ChainedVectorIndex The `BitVector(::AbstractVector{Bool})` method uses the `Base.nextind` method to increment the inds from `eachindex` that I didn't know existed. I'm not exactly sure why they use that instead of just iterating `eachindex` but it turns out we can define `nextind` to be pretty efficient for `ChainedVectorIndex` since we basically already have that logic in `eachindex(::ChainedVector)`. Fixes the issue reported in https://github.com/JuliaData/SentinelArrays.jl/issues/74. --- src/chainedvector.jl | 39 +++++++++++++++++++++++++++++++++++++-- test/chainedvector.jl | 16 ++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/chainedvector.jl b/src/chainedvector.jl index 4fd3ec4..efa5f9c 100644 --- a/src/chainedvector.jl +++ b/src/chainedvector.jl @@ -196,6 +196,7 @@ end # custom index type used in eachindex struct ChainedVectorIndex{A} <: Integer + arrays_i::Int array::A array_i::Int i::Int @@ -230,6 +231,40 @@ function Base.getindex(A::ChainedVector{T}, inds::AbstractVector{<:ChainedVector return x end +function Base.nextind(A::ChainedVector, x::ChainedVectorIndex) + chunkidx = x.arrays_i + chunk = x.array + chunk_i = x.array_i + i = x.i + if chunk_i < length(chunk) + chunk_i += 1 + i += 1 + elseif chunkidx < length(A.arrays) + chunkidx += 1 + @inbounds chunk = A.arrays[chunkidx] + chunk_i = 1 + i += 1 + end + return ChainedVectorIndex(chunkidx, chunk, chunk_i, i) +end + +function Base.prevind(A::ChainedVector, x::ChainedVectorIndex) + chunkidx = x.arrays_i + chunk = x.array + chunk_i = x.array_i + i = x.i + if chunk_i > 1 + chunk_i -= 1 + i -= 1 + elseif chunkidx > 1 + chunkidx -= 1 + @inbounds chunk = A.arrays[chunkidx] + chunk_i = length(chunk) + i -= 1 + end + return ChainedVectorIndex(chunkidx, chunk, chunk_i, i) +end + # efficient iteration via eachindex struct IndexIterator{A} arrays::Vector{A} @@ -251,7 +286,7 @@ end chunkidx = chunk_i = 1 @inbounds chunk = arrays[chunkidx] # we already ran cleanup! so chunks are guaranteed non-empty - return ChainedVectorIndex(chunk, chunk_i, 1), (arrays, chunkidx, chunk, length(chunk), chunk_i + 1, 2) + return ChainedVectorIndex(chunkidx, chunk, chunk_i, 1), (arrays, chunkidx, chunk, length(chunk), chunk_i + 1, 2) end @inline function Base.iterate(x::IndexIterator, (arrays, chunkidx, chunk, chunklen, chunk_i, i)) @@ -262,7 +297,7 @@ end chunklen = length(chunk) chunk_i = 1 end - return ChainedVectorIndex(chunk, chunk_i, i), (arrays, chunkidx, chunk, chunklen, chunk_i + 1, i + 1) + return ChainedVectorIndex(chunkidx, chunk, chunk_i, i), (arrays, chunkidx, chunk, chunklen, chunk_i + 1, i + 1) end @inline function Base.iterate(A::ChainedVector) diff --git a/test/chainedvector.jl b/test/chainedvector.jl index dde897a..b4f62ee 100644 --- a/test/chainedvector.jl +++ b/test/chainedvector.jl @@ -510,3 +510,19 @@ end append!(x, [2]) @test x[end] == 2 end + +@testset "prevind/nextind ChainedVector" begin + x = ChainedVector([collect(1:i) for i = 10:100]) + ind = first(eachindex(x)) + for i = 1:length(x) + @test x[ind] == x[i] + ind = nextind(x, ind) + end + for i = length(x):-1:1 + @test x[ind] == x[i] + ind = prevind(x, ind) + end + # https://github.com/JuliaData/SentinelArrays.jl/issues/74 + x = ChainedVector([[true], [false], [true]]) + @test BitVector(x) == [true, false, true] +end From 37c72bdf6de34f1fcc425cec2750622d6be443f0 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Sat, 17 Sep 2022 23:57:51 -0600 Subject: [PATCH 2/3] fix nightly test --- test/chainedvector.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/chainedvector.jl b/test/chainedvector.jl index b4f62ee..b2318ec 100644 --- a/test/chainedvector.jl +++ b/test/chainedvector.jl @@ -194,7 +194,7 @@ # https://github.com/JuliaData/SentinelArrays.jl/issues/57 cx1 = ChainedVector([[1, 2], [3]]) cx2 = ChainedVector([[1.1, 2.2], [3.3]]) - @test ChainedVector([cx1, cx2]) isa ChainedVector{Float64, <:ChainedVector{Float64}} + @test ChainedVector([cx1, cx2]) isa ChainedVector{Float64} x = ChainedVector([[1], [2], [3]]) y = map(v -> v == 1 ? missing : v, x) From 037ef0c51467e00f52bc5bc333601b1313ff9997 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Mon, 19 Sep 2022 12:11:53 -0600 Subject: [PATCH 3/3] fix out of bounds cases --- src/chainedvector.jl | 12 ++++++++++-- test/chainedvector.jl | 5 ++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/chainedvector.jl b/src/chainedvector.jl index efa5f9c..27f1382 100644 --- a/src/chainedvector.jl +++ b/src/chainedvector.jl @@ -213,11 +213,15 @@ Base.hash(x::ChainedVectorIndex, h::UInt) = hash(x.i, h) @inline Base.getindex(x::ChainedVectorIndex) = @inbounds x.array[x.array_i] -@inline function Base.getindex(A::ChainedVector, x::ChainedVectorIndex) +Base.checkbounds(::Type{Bool}, A::ChainedVector, ind::ChainedVectorIndex) = 1 <= ind.array_i <= length(ind.array) + +Base.@propagate_inbounds function Base.getindex(A::ChainedVector, x::ChainedVectorIndex) + @boundscheck checkbounds(A, x) return @inbounds x.array[x.array_i] end -@inline function Base.setindex!(A::ChainedVector, v, x::ChainedVectorIndex) +Base.@propagate_inbounds function Base.setindex!(A::ChainedVector, v, x::ChainedVectorIndex) + @boundscheck checkbounds(A, x) @inbounds x.array[x.array_i] = v return v end @@ -244,6 +248,8 @@ function Base.nextind(A::ChainedVector, x::ChainedVectorIndex) @inbounds chunk = A.arrays[chunkidx] chunk_i = 1 i += 1 + else + chunk_i += 1 # make sure this goes out of bounds end return ChainedVectorIndex(chunkidx, chunk, chunk_i, i) end @@ -261,6 +267,8 @@ function Base.prevind(A::ChainedVector, x::ChainedVectorIndex) @inbounds chunk = A.arrays[chunkidx] chunk_i = length(chunk) i -= 1 + else + chunk_i -= 1 # make sure this goes out of bounds end return ChainedVectorIndex(chunkidx, chunk, chunk_i, i) end diff --git a/test/chainedvector.jl b/test/chainedvector.jl index b2318ec..b39ce7e 100644 --- a/test/chainedvector.jl +++ b/test/chainedvector.jl @@ -518,10 +518,13 @@ end @test x[ind] == x[i] ind = nextind(x, ind) end + @test_throws BoundsError x[ind] for i = length(x):-1:1 - @test x[ind] == x[i] ind = prevind(x, ind) + @test x[ind] == x[i] end + ind = prevind(x, ind) + @test_throws BoundsError x[ind] # https://github.com/JuliaData/SentinelArrays.jl/issues/74 x = ChainedVector([[true], [false], [true]]) @test BitVector(x) == [true, false, true]