From 9b5d953d9b273b302aaf1466f87b9b044177b5ba Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@alumni.harvard.edu>
Date: Tue, 22 May 2018 19:17:43 -0400
Subject: [PATCH] Make reinterprets that would expose padding an error

In #25908 it was noted that reinterpreting structures with paddings
exposes undef LLVM values to user code. This is problematic, because
an LLVM undef value is quite dangerous (it can have a different value
at every use, e.g. for `a::Bool` undef, we can have `a || !a == true`.
There are proposal in LLVM to create values that are merely arbitrary
(but the same at every use), but that capability does not currently
exist in LLVM. As such, we should try hard to prevent `undef` showing
up in a user-visible way. There are several ways to fix this:
 1. Wait until LLVM comes up with a safer `undef` and have the value merely
    be arbitrary, but not dangerous.
 2. Always guarantee that padding bytes will be 0.
 3. For contiguous-memory arrays, guarantee that we end up with the underlying
    bytes from that array.

However, for now, I think don't think we should make a choice here. Issues
like #21912, may play into the consideration, and I think we should be
able to reserve making a choice until that point. So what this PR does
is only allow reinterprets when they would not expose padding. This should
hopefully cover the most common use cases of reinterpret:
 - Reinterpreting a vector or matrix of values to StaticVectors of the same
   element type. These should generally always have compatiable padding (if
   not, reinterpret was likely the wrong API to use).
 - Reinterpreting from a Vector{UInt8} to a vector of structs (that may have padding).
   This PR allows this for reading (but not for writing). Both cases are generally
   better served by the IO APIs, but hopefully this should still allow the
   common cases.

Fixes #25908
---
 base/atomics.jl          |   6 +-
 base/iterators.jl        |   3 +
 base/reinterpretarray.jl | 122 ++++++++++++++++++++++++++++++++++++++-
 base/sysimg.jl           |   3 +-
 test/reinterpretarray.jl |  19 ++++++
 5 files changed, 147 insertions(+), 6 deletions(-)

diff --git a/base/atomics.jl b/base/atomics.jl
index cb7b2259168b8..99a98cecfdbdc 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -337,7 +337,7 @@ inttype(::Type{Float32}) = Int32
 inttype(::Type{Float64}) = Int64
 
 
-alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T))
+gc_alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T))
 
 # All atomic operations have acquire and/or release semantics, depending on
 # whether the load or store values. Most of the time, this is what one wants
@@ -350,13 +350,13 @@ for typ in atomictypes
     @eval getindex(x::Atomic{$typ}) =
         llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
-                 %rv = load atomic $rt %ptr acquire, align $(alignment(typ))
+                 %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
                  ret $lt %rv
                  """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
     @eval setindex!(x::Atomic{$typ}, v::$typ) =
         llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
-                 store atomic $lt %1, $lt* %ptr release, align $(alignment(typ))
+                 store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
                  ret void
                  """, Cvoid, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
 
diff --git a/base/iterators.jl b/base/iterators.jl
index 0401d114d8932..4223323ac351b 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -1031,6 +1031,9 @@ mutable struct Stateful{T, VS}
     # A bit awkward right now, but adapted to the new iteration protocol
     nextvalstate::Union{VS, Nothing}
     taken::Int
+    @inline function Stateful{<:Any, Any}(itr::T) where {T}
+        new{T, Any}(itr, iterate(itr), 0)
+    end
     @inline function Stateful(itr::T) where {T}
         VS = approx_iter_type(T)
         new{T, VS}(itr, iterate(itr)::VS, 0)
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index 126cbac9eee6b..651afda4260cd 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -7,6 +7,8 @@ the first dimension.
 """
 struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
     parent::A
+    readable::Bool
+    writable::Bool
     global reinterpret
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
         function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U}
@@ -31,10 +33,32 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
             dim = size(a)[1]
             rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim)
         end
-        new{T, N, S, A}(a)
+        readable = array_subpadding(T, S)
+        writable = array_subpadding(S, T)
+        new{T, N, S, A}(a, readable, writable)
     end
 end
 
+function check_readable(a::ReinterpretArray{T, N, S} where N) where {T,S}
+    # See comment in check_writable
+    if !a.readable && !array_subpadding(T, S)
+        throw(PaddingError(T, S))
+    end
+end
+
+function check_writable(a::ReinterpretArray{T, N, S} where N) where {T,S}
+    # `array_subpadding` is relatively expensive (compared to a simple arrayref),
+    # so it is cached in the array. However, it is computable at compile time if,
+    # inference has the types available. By using this form of the check, we can
+    # get the best of both worlds for the success case. If the types were not
+    # available to inference, we simply need to check the field (relatively cheap)
+    # and if they were we should be able to fold this check away entirely.
+    if !a.writable && !array_subpadding(S, T)
+        throw(PaddingError(T, S))
+    end
+end
+
+
 parent(a::ReinterpretArray) = a.parent
 dataids(a::ReinterpretArray) = dataids(a.parent)
 
@@ -51,6 +75,7 @@ unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} =
 @inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1]
 
 @inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
+    check_readable(a)
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
         return reinterpret(T, a.parent[inds...])
@@ -85,6 +110,7 @@ end
 @inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v)
 
 @inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
+    check_writable(a)
     v = convert(T, v)::T
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
@@ -136,3 +162,97 @@ end
     end
     return a
 end
+
+# Padding
+struct Padding
+    offset::Int
+    size::Int
+end
+function intersect(p1::Padding, p2::Padding)
+    start = max(p1.offset, p2.offset)
+    stop = min(p1.offset + p1.size, p2.offset + p2.size)
+    Padding(start, max(0, stop-start))
+end
+
+struct PaddingError
+    S::Type
+    T::Type
+end
+
+function showerror(io::IO, p::PaddingError)
+    print(io, "Padding of type $(p.S) is not compatible with type $(p.T).")
+end
+
+"""
+    CyclePadding(padding, total_size)
+
+Cylces an iterator of `Padding` structs, restarting the padding at `total_size`.
+E.g. if `padding` is all the padding in a struct and `total_size` is the total
+aligned size of that array, `CyclePadding` will correspond to the padding in an
+infinite vector of such structs.
+"""
+struct CyclePadding{P}
+    padding::P
+    total_size::Int
+end
+eltype(::Type{<:CyclePadding}) = Padding
+IteratorSize(::Type{<:CyclePadding}) = IsInfinite()
+isempty(cp::CyclePadding) = isempty(cp.padding)
+function iterate(cp::CyclePadding)
+    y = iterate(cp.padding)
+    y === nothing && return nothing
+    y[1], (0, y[2])
+end
+function iterate(cp::CyclePadding, state::Tuple)
+    y = iterate(cp.padding, tail(state)...)
+    y === nothing && return iterate(cp, (state[1]+cp.total_size,))
+    Padding(y[1].offset+state[1], y[1].size), (state[1], tail(y)...)
+end
+
+"""
+    Compute the location of padding in a type.
+"""
+function padding(T)
+    padding = Padding[]
+    last_end::Int = 0
+    for i = 1:fieldcount(T)
+        offset = fieldoffset(T, i)
+        fT = fieldtype(T, i)
+        if offset != last_end
+            push!(padding, Padding(offset, offset-last_end))
+        end
+        last_end = offset + sizeof(fT)
+    end
+    padding
+end
+
+function CyclePadding(T::DataType)
+    a, s = datatype_alignment(T), sizeof(T)
+    as = s + (a - (s % a)) % a
+    pad = padding(T)
+    s != as && push!(pad, Padding(s, as - s))
+    CyclePadding(pad, as)
+end
+
+using .Iterators: Stateful
+@pure function array_subpadding(S, T)
+    checked_size = 0
+    lcm_size = lcm(sizeof(S), sizeof(T))
+    s, t = Stateful{<:Any, Any}(CyclePadding(S)),
+           Stateful{<:Any, Any}(CyclePadding(T))
+    isempty(t) && return true
+    isempty(s) && return false
+    while checked_size < lcm_size
+        # Take padding in T
+        pad = popfirst!(t)
+        # See if there's corresponding padding in S
+        while true
+            ps = peek(s)
+            ps.offset > pad.offset && return false
+            intersect(ps, pad) == pad && break
+            popfirst!(s)
+        end
+        checked_size = pad.offset + pad.size
+    end
+    return true
+end
diff --git a/base/sysimg.jl b/base/sysimg.jl
index a707be20977c2..0c84a43857202 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -137,8 +137,6 @@ include("array.jl")
 include("abstractarray.jl")
 include("subarray.jl")
 include("views.jl")
-include("reinterpretarray.jl")
-
 
 # ## dims-type-converting Array constructors for convenience
 # type and dimensionality specified, accepting dims as series of Integers
@@ -205,6 +203,7 @@ include("reduce.jl")
 
 ## core structures
 include("reshapedarray.jl")
+include("reinterpretarray.jl")
 include("bitarray.jl")
 include("bitset.jl")
 
diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl
index fcec17f6b2dff..d802434cb2759 100644
--- a/test/reinterpretarray.jl
+++ b/test/reinterpretarray.jl
@@ -49,3 +49,22 @@ let A = collect(reshape(1:20, 5, 4))
     @test view(R, :, :) isa StridedArray
     @test reshape(R, :) isa StridedArray
 end
+
+# Error on reinterprets that would expose padding
+struct S1
+    a::Int8
+    b::Int64
+end
+
+struct S2
+    a::Int16
+    b::Int64
+end
+
+A1 = S1[S1(0, 0)]
+A2 = S2[S2(0, 0)]
+@test reinterpret(S1, A2)[1] == S1(0, 0)
+@test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
+@test_throws Base.PaddingError reinterpret(S2, A1)[1]
+reinterpret(S2, A1)[1] = S2(1, 2)
+@test A1[1] == S1(1, 2)