Skip to content

Commit

Permalink
Don't allow reinterprets that would expose padding
Browse files Browse the repository at this point in the history
In #25908 it was noted that reinterpreting structures with paddings
exposes undef LLVM values to user code. This is problematic, because
an LLVM undef value is quite dangerous (it can have a different value
at every use, e.g. for `a::Bool` undef, we can have `a || !a == true`.
There are proposal in LLVM to create values that are merely arbitrary
(but the same at every use), but that capability does not currently
exist in LLVM. As such, we should try hard to prevent `undef` showing
up in a user-visible way. There are several ways to fix this:
 1. Wait until LLVM comes up with a safer `undef` and have the value merely
    be arbitrary, but not dangerous.
 2. Always guarantee that padding bytes will be 0.
 3. For contiguous-memory arrays, guarantee that we end up with the underlying
    bytes from that array.

However, for now, I think don't think we should make a choice here. Issues
like #21912, may play into the consideration, and I think we should be
able to reserve making a choice until that point. So what this PR does
is only allow reinterprets when they would not expose padding. This should
hopefully cover the most common use cases of reinterpret:
 - Reinterpreting a vector or matrix of values to StaticVectors of the same
   element type. These should generally always have compatiable padding (if
   not, reinterpret was likely the wrong API to use).
 - Reinterpreting from a Vector{UInt8} to a vector of structs (that may have padding).
   This PR allows this for reading (but not for writing). Both cases are generally
   better served by the IO APIs, but hopefully this should still allow the
   common cases.

Fixes #25908
  • Loading branch information
Keno committed Jun 30, 2018
1 parent c670739 commit 4d6e516
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 6 deletions.
6 changes: 3 additions & 3 deletions base/atomics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ inttype(::Type{Float32}) = Int32
inttype(::Type{Float64}) = Int64


alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T))
gc_alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T))

# All atomic operations have acquire and/or release semantics, depending on
# whether the load or store values. Most of the time, this is what one wants
Expand All @@ -350,13 +350,13 @@ for typ in atomictypes
@eval getindex(x::Atomic{$typ}) =
llvmcall($"""
%ptr = inttoptr i$WORD_SIZE %0 to $lt*
%rv = load atomic $rt %ptr acquire, align $(alignment(typ))
%rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
ret $lt %rv
""", $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
@eval setindex!(x::Atomic{$typ}, v::$typ) =
llvmcall($"""
%ptr = inttoptr i$WORD_SIZE %0 to $lt*
store atomic $lt %1, $lt* %ptr release, align $(alignment(typ))
store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
ret void
""", Cvoid, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)

Expand Down
3 changes: 3 additions & 0 deletions base/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,9 @@ mutable struct Stateful{T, VS}
# A bit awkward right now, but adapted to the new iteration protocol
nextvalstate::Union{VS, Nothing}
taken::Int
@inline function Stateful{<:Any, Any}(itr::T) where {T}
new{T, Any}(itr, iterate(itr), 0)
end
@inline function Stateful(itr::T) where {T}
VS = approx_iter_type(T)
new{T, VS}(itr, iterate(itr)::VS, 0)
Expand Down
122 changes: 121 additions & 1 deletion base/reinterpretarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ the first dimension.
"""
struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
parent::A
readable::Bool
writable::Bool
global reinterpret
function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U}
Expand All @@ -31,10 +33,32 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
dim = size(a)[1]
rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim)
end
new{T, N, S, A}(a)
readable = array_subpadding(T, S)
writable = array_subpadding(S, T)
new{T, N, S, A}(a, readable, writable)
end
end

function check_readable(a::ReinterpretArray{T, N, S} where N) where {T,S}
# See comment in check_writable
if !a.readable && !array_subpadding(T, S)
throw(PaddingError(T, S))
end
end

function check_writable(a::ReinterpretArray{T, N, S} where N) where {T,S}
# `array_subpadding` is relatively expensive (compared to a simple arrayref),
# so it is cached in the array. However, it is computable at compile time if,
# inference has the types available. By using this form of the check, we can
# get the best of both worlds for the success case. If the types were not
# available to inference, we simply need to check the field (relatively cheap)
# and if they were we should be able to fold this check away entirely.
if !a.writable && !array_subpadding(S, T)
throw(PaddingError(T, S))
end
end


parent(a::ReinterpretArray) = a.parent
dataids(a::ReinterpretArray) = dataids(a.parent)

Expand All @@ -51,6 +75,7 @@ unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} =
@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1]

@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
check_readable(a)
# Make sure to match the scalar reinterpret if that is applicable
if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
return reinterpret(T, a.parent[inds...])
Expand Down Expand Up @@ -85,6 +110,7 @@ end
@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v)

@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
check_writable(a)
v = convert(T, v)::T
# Make sure to match the scalar reinterpret if that is applicable
if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
Expand Down Expand Up @@ -136,3 +162,97 @@ end
end
return a
end

# Padding
struct Padding
offset::Int
size::Int
end
function intersect(p1::Padding, p2::Padding)
start = max(p1.offset, p2.offset)
stop = min(p1.offset + p1.size, p2.offset + p2.size)
Padding(start, max(0, stop-start))
end

struct PaddingError
S::Type
T::Type
end

function showerror(io::IO, p::PaddingError)
print(io, "Padding of type $(p.S) is not compatible with type $(p.T).")
end

"""
CyclePadding(padding, total_size)
Cylces an iterator of `Padding` structs, restarting the padding at `total_size`.
E.g. if `padding` is all the padding in a struct and `total_size` is the total
aligned size of that array, `CyclePadding` will correspond to the padding in an
infinite vector of such structs.
"""
struct CyclePadding{P}
padding::P
total_size::Int
end
eltype(::Type{<:CyclePadding}) = Padding
IteratorSize(::Type{<:CyclePadding}) = IsInfinite()
isempty(cp::CyclePadding) = isempty(cp.padding)
function iterate(cp::CyclePadding)
y = iterate(cp.padding)
y === nothing && return nothing
y[1], (0, y[2])
end
function iterate(cp::CyclePadding, state::Tuple)
y = iterate(cp.padding, tail(state)...)
y === nothing && return iterate(cp, (state[1]+cp.total_size,))
Padding(y[1].offset+state[1], y[1].size), (state[1], tail(y)...)
end

"""
Compute the location of padding in a type.
"""
function padding(T)
padding = Padding[]
last_end::Int = 0
for i = 1:fieldcount(T)
offset = fieldoffset(T, i)
fT = fieldtype(T, i)
if offset != last_end
push!(padding, Padding(offset, offset-last_end))
end
last_end = offset + sizeof(fT)
end
padding
end

function CyclePadding(T::DataType)
a, s = datatype_alignment(T), sizeof(T)
as = s + (a - (s % a)) % a
pad = padding(T)
s != as && push!(pad, Padding(s, as - s))
CyclePadding(pad, as)
end

using .Iterators: Stateful
@pure function array_subpadding(S, T)
checked_size = 0
lcm_size = lcm(sizeof(S), sizeof(T))
s, t = Stateful{<:Any, Any}(CyclePadding(S)),
Stateful{<:Any, Any}(CyclePadding(T))
isempty(t) && return true
isempty(s) && return false
while checked_size < lcm_size
# Take padding in T
pad = popfirst!(t)
# See if there's corresponding padding in S
while true
ps = peek(s)
ps.offset > pad.offset && return false
intersect(ps, pad) == pad && break
popfirst!(s)
end
checked_size = pad.offset + pad.size
end
return true
end
3 changes: 1 addition & 2 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,6 @@ include("array.jl")
include("abstractarray.jl")
include("subarray.jl")
include("views.jl")
include("reinterpretarray.jl")


# ## dims-type-converting Array constructors for convenience
# type and dimensionality specified, accepting dims as series of Integers
Expand Down Expand Up @@ -205,6 +203,7 @@ include("reduce.jl")

## core structures
include("reshapedarray.jl")
include("reinterpretarray.jl")
include("bitarray.jl")
include("bitset.jl")

Expand Down
19 changes: 19 additions & 0 deletions test/reinterpretarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,22 @@ let A = collect(reshape(1:20, 5, 4))
@test view(R, :, :) isa StridedArray
@test reshape(R, :) isa StridedArray
end

# Error on reinterprets that would expose padding
struct S1
a::Int8
b::Int64
end

struct S2
a::Int16
b::Int64
end

A1 = S1[S1(0, 0)]
A2 = S2[S2(0, 0)]
@test reinterpret(S1, A2)[1] == S1(0, 0)
@test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
@test_throws Base.PaddingError reinterpret(S2, A1)[1]
reinterpret(S2, A1)[1] = S2(1, 2)
@test A1[1] == S1(1, 2)

0 comments on commit 4d6e516

Please sign in to comment.