From 4d6e516844d969d2761be2638bf4a55808bc9e0f Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 22 May 2018 19:17:43 -0400 Subject: [PATCH] Don't allow reinterprets that would expose padding In #25908 it was noted that reinterpreting structures with paddings exposes undef LLVM values to user code. This is problematic, because an LLVM undef value is quite dangerous (it can have a different value at every use, e.g. for `a::Bool` undef, we can have `a || !a == true`. There are proposal in LLVM to create values that are merely arbitrary (but the same at every use), but that capability does not currently exist in LLVM. As such, we should try hard to prevent `undef` showing up in a user-visible way. There are several ways to fix this: 1. Wait until LLVM comes up with a safer `undef` and have the value merely be arbitrary, but not dangerous. 2. Always guarantee that padding bytes will be 0. 3. For contiguous-memory arrays, guarantee that we end up with the underlying bytes from that array. However, for now, I think don't think we should make a choice here. Issues like #21912, may play into the consideration, and I think we should be able to reserve making a choice until that point. So what this PR does is only allow reinterprets when they would not expose padding. This should hopefully cover the most common use cases of reinterpret: - Reinterpreting a vector or matrix of values to StaticVectors of the same element type. These should generally always have compatiable padding (if not, reinterpret was likely the wrong API to use). - Reinterpreting from a Vector{UInt8} to a vector of structs (that may have padding). This PR allows this for reading (but not for writing). Both cases are generally better served by the IO APIs, but hopefully this should still allow the common cases. Fixes #25908 --- base/atomics.jl | 6 +- base/iterators.jl | 3 + base/reinterpretarray.jl | 122 ++++++++++++++++++++++++++++++++++++++- base/sysimg.jl | 3 +- test/reinterpretarray.jl | 19 ++++++ 5 files changed, 147 insertions(+), 6 deletions(-) diff --git a/base/atomics.jl b/base/atomics.jl index cb7b2259168b8..99a98cecfdbdc 100644 --- a/base/atomics.jl +++ b/base/atomics.jl @@ -337,7 +337,7 @@ inttype(::Type{Float32}) = Int32 inttype(::Type{Float64}) = Int64 -alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T)) +gc_alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T)) # All atomic operations have acquire and/or release semantics, depending on # whether the load or store values. Most of the time, this is what one wants @@ -350,13 +350,13 @@ for typ in atomictypes @eval getindex(x::Atomic{$typ}) = llvmcall($""" %ptr = inttoptr i$WORD_SIZE %0 to $lt* - %rv = load atomic $rt %ptr acquire, align $(alignment(typ)) + %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ)) ret $lt %rv """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x)) @eval setindex!(x::Atomic{$typ}, v::$typ) = llvmcall($""" %ptr = inttoptr i$WORD_SIZE %0 to $lt* - store atomic $lt %1, $lt* %ptr release, align $(alignment(typ)) + store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ)) ret void """, Cvoid, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v) diff --git a/base/iterators.jl b/base/iterators.jl index 5f0c5a8721a72..67bb9e03f6761 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -1030,6 +1030,9 @@ mutable struct Stateful{T, VS} # A bit awkward right now, but adapted to the new iteration protocol nextvalstate::Union{VS, Nothing} taken::Int + @inline function Stateful{<:Any, Any}(itr::T) where {T} + new{T, Any}(itr, iterate(itr), 0) + end @inline function Stateful(itr::T) where {T} VS = approx_iter_type(T) new{T, VS}(itr, iterate(itr)::VS, 0) diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl index 126cbac9eee6b..651afda4260cd 100644 --- a/base/reinterpretarray.jl +++ b/base/reinterpretarray.jl @@ -7,6 +7,8 @@ the first dimension. """ struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N} parent::A + readable::Bool + writable::Bool global reinterpret function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}} function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U} @@ -31,10 +33,32 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N} dim = size(a)[1] rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim) end - new{T, N, S, A}(a) + readable = array_subpadding(T, S) + writable = array_subpadding(S, T) + new{T, N, S, A}(a, readable, writable) end end +function check_readable(a::ReinterpretArray{T, N, S} where N) where {T,S} + # See comment in check_writable + if !a.readable && !array_subpadding(T, S) + throw(PaddingError(T, S)) + end +end + +function check_writable(a::ReinterpretArray{T, N, S} where N) where {T,S} + # `array_subpadding` is relatively expensive (compared to a simple arrayref), + # so it is cached in the array. However, it is computable at compile time if, + # inference has the types available. By using this form of the check, we can + # get the best of both worlds for the success case. If the types were not + # available to inference, we simply need to check the field (relatively cheap) + # and if they were we should be able to fold this check away entirely. + if !a.writable && !array_subpadding(S, T) + throw(PaddingError(T, S)) + end +end + + parent(a::ReinterpretArray) = a.parent dataids(a::ReinterpretArray) = dataids(a.parent) @@ -51,6 +75,7 @@ unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = @inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1] @inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S} + check_readable(a) # Make sure to match the scalar reinterpret if that is applicable if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0 return reinterpret(T, a.parent[inds...]) @@ -85,6 +110,7 @@ end @inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v) @inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S} + check_writable(a) v = convert(T, v)::T # Make sure to match the scalar reinterpret if that is applicable if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0 @@ -136,3 +162,97 @@ end end return a end + +# Padding +struct Padding + offset::Int + size::Int +end +function intersect(p1::Padding, p2::Padding) + start = max(p1.offset, p2.offset) + stop = min(p1.offset + p1.size, p2.offset + p2.size) + Padding(start, max(0, stop-start)) +end + +struct PaddingError + S::Type + T::Type +end + +function showerror(io::IO, p::PaddingError) + print(io, "Padding of type $(p.S) is not compatible with type $(p.T).") +end + +""" + CyclePadding(padding, total_size) + +Cylces an iterator of `Padding` structs, restarting the padding at `total_size`. +E.g. if `padding` is all the padding in a struct and `total_size` is the total +aligned size of that array, `CyclePadding` will correspond to the padding in an +infinite vector of such structs. +""" +struct CyclePadding{P} + padding::P + total_size::Int +end +eltype(::Type{<:CyclePadding}) = Padding +IteratorSize(::Type{<:CyclePadding}) = IsInfinite() +isempty(cp::CyclePadding) = isempty(cp.padding) +function iterate(cp::CyclePadding) + y = iterate(cp.padding) + y === nothing && return nothing + y[1], (0, y[2]) +end +function iterate(cp::CyclePadding, state::Tuple) + y = iterate(cp.padding, tail(state)...) + y === nothing && return iterate(cp, (state[1]+cp.total_size,)) + Padding(y[1].offset+state[1], y[1].size), (state[1], tail(y)...) +end + +""" + Compute the location of padding in a type. +""" +function padding(T) + padding = Padding[] + last_end::Int = 0 + for i = 1:fieldcount(T) + offset = fieldoffset(T, i) + fT = fieldtype(T, i) + if offset != last_end + push!(padding, Padding(offset, offset-last_end)) + end + last_end = offset + sizeof(fT) + end + padding +end + +function CyclePadding(T::DataType) + a, s = datatype_alignment(T), sizeof(T) + as = s + (a - (s % a)) % a + pad = padding(T) + s != as && push!(pad, Padding(s, as - s)) + CyclePadding(pad, as) +end + +using .Iterators: Stateful +@pure function array_subpadding(S, T) + checked_size = 0 + lcm_size = lcm(sizeof(S), sizeof(T)) + s, t = Stateful{<:Any, Any}(CyclePadding(S)), + Stateful{<:Any, Any}(CyclePadding(T)) + isempty(t) && return true + isempty(s) && return false + while checked_size < lcm_size + # Take padding in T + pad = popfirst!(t) + # See if there's corresponding padding in S + while true + ps = peek(s) + ps.offset > pad.offset && return false + intersect(ps, pad) == pad && break + popfirst!(s) + end + checked_size = pad.offset + pad.size + end + return true +end diff --git a/base/sysimg.jl b/base/sysimg.jl index cb82306742dc7..ecf97f5b3f0e7 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -137,8 +137,6 @@ include("array.jl") include("abstractarray.jl") include("subarray.jl") include("views.jl") -include("reinterpretarray.jl") - # ## dims-type-converting Array constructors for convenience # type and dimensionality specified, accepting dims as series of Integers @@ -205,6 +203,7 @@ include("reduce.jl") ## core structures include("reshapedarray.jl") +include("reinterpretarray.jl") include("bitarray.jl") include("bitset.jl") diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl index fcec17f6b2dff..d802434cb2759 100644 --- a/test/reinterpretarray.jl +++ b/test/reinterpretarray.jl @@ -49,3 +49,22 @@ let A = collect(reshape(1:20, 5, 4)) @test view(R, :, :) isa StridedArray @test reshape(R, :) isa StridedArray end + +# Error on reinterprets that would expose padding +struct S1 + a::Int8 + b::Int64 +end + +struct S2 + a::Int16 + b::Int64 +end + +A1 = S1[S1(0, 0)] +A2 = S2[S2(0, 0)] +@test reinterpret(S1, A2)[1] == S1(0, 0) +@test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2)) +@test_throws Base.PaddingError reinterpret(S2, A1)[1] +reinterpret(S2, A1)[1] = S2(1, 2) +@test A1[1] == S1(1, 2)