From 8fd5d61dd260ea770e8b117178afa29658090807 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@alumni.harvard.edu>
Date: Sat, 9 Sep 2017 19:30:55 -0400
Subject: [PATCH] Implement ReinterpretArray
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This redoes `reinterpret` in julia rather than punning the memory
of the actual array. The motivation for this is to avoid the API
limitations of the current reinterpret implementation (Array only,
preventing strong TBAA, alignment problems). The surface API
essentially unchanged, though the shape argument to reinterpret
is removed, since those concepts are now orthogonal. The return
type from `reinterpret` is now `ReinterpretArray`, which implements
the AbstractArray interface and does the reinterpreting lazily on
demand. The compiler is able to fold away the abstraction and
generate very tight IR:

```
julia> ar = reinterpret(Complex{Int64}, rand(Int64, 1000));

julia> typeof(ar)
Base.ReinterpretArray{Complex{Int64},Int64,1,Array{Int64,1}}

julia> f(ar) = @inbounds return ar[1]
f (generic function with 1 method)

julia> @code_llvm f(ar)

; Function f
; Location: REPL[2]
define void @julia_f_63575({ i64, i64 } addrspace(11)* noalias nocapture sret, %jl_value_t addrspace(10)* dereferenceable(8)) #0 {
top:
; Location: REPL[2]:1
; Function getindex; {
; Location: reinterpretarray.jl:31
  %2 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
  %3 = bitcast %jl_value_t addrspace(11)* %2 to %jl_value_t addrspace(10)* addrspace(11)*
  %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %3, align 8
  %5 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)*
  %6 = bitcast %jl_value_t addrspace(11)* %5 to i64* addrspace(11)*
  %7 = load i64*, i64* addrspace(11)* %6, align 8
  %8 = load i64, i64* %7, align 8
  %9 = getelementptr i64, i64* %7, i64 1
  %10 = load i64, i64* %9, align 8
  %.sroa.0.0..sroa_idx = getelementptr inbounds { i64, i64 }, { i64, i64 } addrspace(11)* %0, i64 0, i32 0
  store i64 %8, i64 addrspace(11)* %.sroa.0.0..sroa_idx, align 8
  %.sroa.3.0..sroa_idx13 = getelementptr inbounds { i64, i64 }, { i64, i64 } addrspace(11)* %0, i64 0, i32 1
  store i64 %10, i64 addrspace(11)* %.sroa.3.0..sroa_idx13, align 8
;}
  ret void
}

julia> g(a) = @inbounds return reinterpret(Complex{Int64}, a)[1]
g (generic function with 1 method)

julia> @code_llvm g(randn(1000))

; Function g
; Location: REPL[4]
define void @julia_g_63642({ i64, i64 } addrspace(11)* noalias nocapture sret, %jl_value_t addrspace(10)* dereferenceable(40)) #0 {
top:
; Location: REPL[4]:1
; Function getindex; {
; Location: reinterpretarray.jl:31
  %2 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
  %3 = bitcast %jl_value_t addrspace(11)* %2 to double* addrspace(11)*
  %4 = load double*, double* addrspace(11)* %3, align 8
  %5 = bitcast double* %4 to i64*
  %6 = load i64, i64* %5, align 8
  %7 = getelementptr double, double* %4, i64 1
  %8 = bitcast double* %7 to i64*
  %9 = load i64, i64* %8, align 8
  %.sroa.0.0..sroa_idx = getelementptr inbounds { i64, i64 }, { i64, i64 } addrspace(11)* %0, i64 0, i32 0
  store i64 %6, i64 addrspace(11)* %.sroa.0.0..sroa_idx, align 8
  %.sroa.3.0..sroa_idx13 = getelementptr inbounds { i64, i64 }, { i64, i64 } addrspace(11)* %0, i64 0, i32 1
  store i64 %9, i64 addrspace(11)* %.sroa.3.0..sroa_idx13, align 8
;}
  ret void
}
```

In addition, the new `reinterpret` implementation is able to handle any AbstractArray
(whether useful or not is a separate decision):

```
invoke(reinterpret, Tuple{Type{Complex{Float64}}, AbstractArray}, Complex{Float64}, speye(10))
5×10 Base.ReinterpretArray{Complex{Float64},Float64,2,SparseMatrixCSC{Float64,Int64}}:
 1.0+0.0im  0.0+1.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im
 0.0+0.0im  0.0+0.0im  1.0+0.0im  0.0+1.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im
 0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  1.0+0.0im  0.0+1.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im
 0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  1.0+0.0im  0.0+1.0im  0.0+0.0im  0.0+0.0im
 0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  0.0+0.0im  1.0+0.0im  0.0+1.0im
```

The remaining todo is to audit the uses of reinterpret in base. I've fixed up the uses themselves, but there's
code deeper in the array code that needs to be broadened to allow ReinterpretArray.

Fixes #22849
Fixes #19238
---
 base/array.jl                 |  27 --------
 base/essentials.jl            |  10 +--
 base/io.jl                    |   7 +-
 base/linalg/factorization.jl  |   4 +-
 base/linalg/lq.jl             |   6 +-
 base/linalg/matmul.jl         |  10 +--
 base/linalg/qr.jl             |   4 +-
 base/random/dSFMT.jl          |  11 +--
 base/reinterpretarray.jl      | 127 ++++++++++++++++++++++++++++++++++
 base/show.jl                  |   6 ++
 base/sparse/abstractsparse.jl |  18 ++++-
 base/sparse/sparsematrix.jl   |  30 --------
 base/sparse/sparsevector.jl   |   8 +--
 base/sparse/spqr.jl           |   4 +-
 base/sysimg.jl                |   8 ++-
 src/array.c                   |   1 +
 test/arrayops.jl              |  10 +--
 test/choosetests.jl           |   3 +-
 test/core.jl                  |  20 ------
 test/inference.jl             |   2 +-
 test/reinterpretarray.jl      |  24 +++++++
 test/sparse/sparse.jl         |   4 +-
 test/sparse/sparsevector.jl   |   2 +-
 23 files changed, 214 insertions(+), 132 deletions(-)
 create mode 100644 base/reinterpretarray.jl
 create mode 100644 test/reinterpretarray.jl

diff --git a/base/array.jl b/base/array.jl
index 91e9f800f8cb4..b8ebcb909ec91 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -218,33 +218,6 @@ original.
 """
 copy(a::T) where {T<:Array} = ccall(:jl_array_copy, Ref{T}, (Any,), a)
 
-function reinterpret(::Type{T}, a::Array{S,1}) where T where S
-    nel = Int(div(length(a) * sizeof(S), sizeof(T)))
-    # TODO: maybe check that remainder is zero?
-    return reinterpret(T, a, (nel,))
-end
-
-function reinterpret(::Type{T}, a::Array{S}) where T where S
-    if sizeof(S) != sizeof(T)
-        throw(ArgumentError("result shape not specified"))
-    end
-    reinterpret(T, a, size(a))
-end
-
-function reinterpret(::Type{T}, a::Array{S}, dims::NTuple{N,Int}) where T where S where N
-    function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U}
-        @_noinline_meta
-        throw(ArgumentError("cannot reinterpret Array{$(S)} to ::Type{Array{$(T)}}, type $(U) is not a bits type"))
-    end
-    isbits(T) || throwbits(S, T, T)
-    isbits(S) || throwbits(S, T, S)
-    nel = div(length(a) * sizeof(S), sizeof(T))
-    if prod(dims) != nel
-        _throw_dmrsa(dims, nel)
-    end
-    ccall(:jl_reshape_array, Array{T,N}, (Any, Any, Any), Array{T,N}, a, dims)
-end
-
 # reshaping to same # of dimensions
 function reshape(a::Array{T,N}, dims::NTuple{N,Int}) where T where N
     if prod(dims) != length(a)
diff --git a/base/essentials.jl b/base/essentials.jl
index 450058d81322c..d58bdf5a9c87c 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -321,20 +321,12 @@ unsafe_convert(::Type{P}, x::Ptr) where {P<:Ptr} = convert(P, x)
     reinterpret(type, A)
 
 Change the type-interpretation of a block of memory.
-For arrays, this constructs an array with the same binary data as the given
+For arrays, this constructs a view of the array with the same binary data as the given
 array, but with the specified element type.
 For example,
 `reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a
 [`Float32`](@ref).
 
-!!! warning
-
-    It is not allowed to `reinterpret` an array to an element type with a larger alignment then
-    the alignment of the array. For a normal `Array`, this is the alignment of its element type.
-    For a reinterpreted array, this is the alignment of the `Array` it was reinterpreted from.
-    For example, `reinterpret(UInt32, UInt8[0, 0, 0, 0])` is not allowed but
-    `reinterpret(UInt32, reinterpret(UInt8, Float32[1.0]))` is allowed.
-
 # Examples
 ```jldoctest
 julia> reinterpret(Float32, UInt32(7))
diff --git a/base/io.jl b/base/io.jl
index 4b3a74e30e264..cfb65ff25fcb6 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -267,15 +267,16 @@ readlines(s=STDIN; chomp::Bool=true) = collect(eachline(s, chomp=chomp))
 
 ## byte-order mark, ntoh & hton ##
 
-let endian_boms = reinterpret(UInt8, UInt32[0x01020304])
+a = UInt32[0x01020304]
+let endian_bom = unsafe_load(convert(Ptr{UInt8}, pointer(a)))
     global ntoh, hton, ltoh, htol
-    if endian_boms == UInt8[1:4;]
+    if endian_bom == 0x01
         ntoh(x) = x
         hton(x) = x
         ltoh(x) = bswap(x)
         htol(x) = bswap(x)
         const global ENDIAN_BOM = 0x01020304
-    elseif endian_boms == UInt8[4:-1:1;]
+    elseif endian_bom == 0x04
         ntoh(x) = bswap(x)
         hton(x) = bswap(x)
         ltoh(x) = x
diff --git a/base/linalg/factorization.jl b/base/linalg/factorization.jl
index 9acaef101ccaa..6ab24cfc42fa9 100644
--- a/base/linalg/factorization.jl
+++ b/base/linalg/factorization.jl
@@ -56,9 +56,9 @@ Base.isequal(F::T, G::T) where {T<:Factorization} = all(f -> isequal(getfield(F,
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns
 function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
-    c2r = reshape(transpose(reinterpret(T, B, (2, length(B)))), size(B, 1), 2*size(B, 2))
+    c2r = reshape(transpose(reinterpret(T, reshape(B, (1, length(B))))), size(B, 1), 2*size(B, 2))
     x = A_ldiv_B!(F, c2r)
-    return reinterpret(Complex{T}, transpose(reshape(x, div(length(x), 2), 2)), _ret_size(F, B))
+    return reshape(collect(reinterpret(Complex{T}, transpose(reshape(x, div(length(x), 2), 2)))), _ret_size(F, B))
 end
 
 for (f1, f2) in ((:\, :A_ldiv_B!),
diff --git a/base/linalg/lq.jl b/base/linalg/lq.jl
index b878468617cf8..356b3e99b9228 100644
--- a/base/linalg/lq.jl
+++ b/base/linalg/lq.jl
@@ -267,10 +267,10 @@ end
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns
 function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
-    c2r = reshape(transpose(reinterpret(T, B, (2, length(B)))), size(B, 1), 2*size(B, 2))
+    c2r = reshape(transpose(reinterpret(T, reshape(B, (1, length(B))))), size(B, 1), 2*size(B, 2))
     x = A_ldiv_B!(F, c2r)
-    return reinterpret(Complex{T}, transpose(reshape(x, div(length(x), 2), 2)),
-        isa(B, AbstractVector) ? (size(F,2),) : (size(F,2), size(B,2)))
+    return reshape(collect(reinterpret(Complex{T}, transpose(reshape(x, div(length(x), 2), 2)))),
+                           isa(B, AbstractVector) ? (size(F,2),) : (size(F,2), size(B,2)))
 end
 
 
diff --git a/base/linalg/matmul.jl b/base/linalg/matmul.jl
index 0ca518a58fb41..88885be32dcf5 100644
--- a/base/linalg/matmul.jl
+++ b/base/linalg/matmul.jl
@@ -90,7 +90,7 @@ A_mul_B!(y::StridedVector{T}, A::StridedVecOrMat{T}, x::StridedVector{T}) where
 for elty in (Float32,Float64)
     @eval begin
         function A_mul_B!(y::StridedVector{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, x::StridedVector{$elty})
-            Afl = reinterpret($elty,A,(2size(A,1),size(A,2)))
+            Afl = reinterpret($elty,A)
             yfl = reinterpret($elty,y)
             gemv!(yfl,'N',Afl,x)
             return y
@@ -148,8 +148,8 @@ A_mul_B!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T}) wher
 for elty in (Float32,Float64)
     @eval begin
         function A_mul_B!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, B::StridedVecOrMat{$elty})
-            Afl = reinterpret($elty, A, (2size(A,1), size(A,2)))
-            Cfl = reinterpret($elty, C, (2size(C,1), size(C,2)))
+            Afl = reinterpret($elty, A)
+            Cfl = reinterpret($elty, C)
             gemm_wrapper!(Cfl, 'N', 'N', Afl, B)
             return C
         end
@@ -190,8 +190,8 @@ A_mul_Bt!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T}) whe
 for elty in (Float32,Float64)
     @eval begin
         function A_mul_Bt!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, B::StridedVecOrMat{$elty})
-            Afl = reinterpret($elty, A, (2size(A,1), size(A,2)))
-            Cfl = reinterpret($elty, C, (2size(C,1), size(C,2)))
+            Afl = reinterpret($elty, A)
+            Cfl = reinterpret($elty, C)
             gemm_wrapper!(Cfl, 'N', 'T', Afl, B)
             return C
         end
diff --git a/base/linalg/qr.jl b/base/linalg/qr.jl
index 859ccd659bed9..0090880dbe027 100644
--- a/base/linalg/qr.jl
+++ b/base/linalg/qr.jl
@@ -923,7 +923,7 @@ function (\)(A::Union{QR{T},QRCompactWY{T},QRPivoted{T}}, BIn::VecOrMat{Complex{
 # |z2|z4|      ->       |y1|y2|y3|y4|     ->      |x2|y2|     ->    |x2|y2|x4|y4|
 #                                                 |x3|y3|
 #                                                 |x4|y4|
-    B = reshape(transpose(reinterpret(T, BIn, (2, length(BIn)))), size(BIn, 1), 2*size(BIn, 2))
+    B = reshape(transpose(reinterpret(T, reshape(BIn, (1, length(BIn))))), size(BIn, 1), 2*size(BIn, 2))
 
     X = A_ldiv_B!(A, _append_zeros(B, T, n))
 
@@ -931,7 +931,7 @@ function (\)(A::Union{QR{T},QRCompactWY{T},QRPivoted{T}}, BIn::VecOrMat{Complex{
 # |z2|z4|      <-       |y1|y2|y3|y4|     <-      |x2|y2|     <-    |x2|y2|x4|y4|
 #                                                 |x3|y3|
 #                                                 |x4|y4|
-    XX = reinterpret(Complex{T}, transpose(reshape(X, div(length(X), 2), 2)), _ret_size(A, BIn))
+    XX = reshape(collect(reinterpret(Complex{T}, transpose(reshape(X, div(length(X), 2), 2)))), _ret_size(A, BIn))
     return _cut_B(XX, 1:n)
 end
 
diff --git a/base/random/dSFMT.jl b/base/random/dSFMT.jl
index 2061cc54f9741..d4ae974dbe9fc 100644
--- a/base/random/dSFMT.jl
+++ b/base/random/dSFMT.jl
@@ -104,7 +104,8 @@ function dsfmt_jump(s::DSFMT_state, jp::AbstractString)
     val = s.val
     nval = length(val)
     index = val[nval - 1]
-    work = zeros(UInt64, JN32 >> 1)
+    work = zeros(Int32, JN32)
+    rwork = reinterpret(UInt64, work)
     dsfmt = Vector{UInt64}(nval >> 1)
     ccall(:memcpy, Ptr{Void}, (Ptr{UInt64}, Ptr{Int32}, Csize_t),
           dsfmt, val, (nval - 1) * sizeof(Int32))
@@ -113,17 +114,17 @@ function dsfmt_jump(s::DSFMT_state, jp::AbstractString)
     for c in jp
         bits = parse(UInt8,c,16)
         for j in 1:4
-            (bits & 0x01) != 0x00 && dsfmt_jump_add!(work, dsfmt)
+            (bits & 0x01) != 0x00 && dsfmt_jump_add!(rwork, dsfmt)
             bits = bits >> 0x01
             dsfmt_jump_next_state!(dsfmt)
         end
     end
 
-    work[end] = index
-    return DSFMT_state(reinterpret(Int32, work))
+    rwork[end] = index
+    return DSFMT_state(work)
 end
 
-function dsfmt_jump_add!(dest::Vector{UInt64}, src::Vector{UInt64})
+function dsfmt_jump_add!(dest::AbstractVector{UInt64}, src::Vector{UInt64})
     dp = dest[end] >> 1
     sp = src[end] >> 1
     diff = ((sp - dp + N) % N)
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
new file mode 100644
index 0000000000000..5a5a524f1e810
--- /dev/null
+++ b/base/reinterpretarray.jl
@@ -0,0 +1,127 @@
+"""
+Gives a reinterpreted view (of element type T) of the underlying array (of element type S).
+If the size of `T` differs from the size of `S`, the array will be compressed/expanded in
+the first dimension.
+"""
+struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
+    parent::A
+    function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
+        function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U}
+            @_noinline_meta
+            throw(ArgumentError("cannot reinterpret `$(S)` `$(T)`, type `$(U)` is not a bits type"))
+        end
+        function throwsize0(::Type{S}, ::Type{T})
+            @_noinline_meta
+            throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a different size"))
+        end
+        function thrownonint(::Type{S}, ::Type{T}, dim)
+            @_noinline_meta
+            throw(ArgumentError("""
+                cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`.
+                The resulting array would have non-integral first dimension.
+            """))
+        end
+        isbits(T) || throwbits(S, T, T)
+        isbits(S) || throwbits(S, T, S)
+        (N != 0 || sizeof(T) == sizeof(S)) || throwsize0(S, T)
+        if N != 0 && sizeof(S) != sizeof(T)
+            dim = size(a)[1]
+            rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim)
+        end
+        new{T, N, S, A}(a)
+    end
+end
+
+parent(a::ReinterpretArray) = a.parent
+
+eltype(a::ReinterpretArray{T}) where {T} = T
+function size(a::ReinterpretArray{T,N,S} where {N}) where {T,S}
+    psize = size(a.parent)
+    size1 = div(psize[1]*sizeof(S), sizeof(T))
+    tuple(size1, tail(psize)...)
+end
+
+unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent))
+
+@inline @propagate_inbounds getindex(a::ReinterpretArray{T,0}) where {T} = reinterpret(T, a.parent[])
+@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1]
+
+@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
+    if sizeof(T) == sizeof(S)
+        return reinterpret(T, a.parent[inds...])
+    else
+        ind_start, sidx = divrem((inds[1]-1)*sizeof(T), sizeof(S))
+        t = Ref{T}()
+        s = Ref{S}()
+        @gc_preserve t s begin
+            tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t))
+            sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s))
+            i = 1
+            nbytes_copied = 0
+            # This is a bit complicated to deal with partial elements
+            # at both the start and the end. LLVM will fold as appropriate,
+            # once it knows the data layout
+            while nbytes_copied < sizeof(T)
+                s[] = a.parent[ind_start + i, tail(inds)...]
+                while nbytes_copied < sizeof(T) && sidx < sizeof(S)
+                    unsafe_store!(tptr, unsafe_load(sptr, sidx + 1), nbytes_copied + 1)
+                    sidx += 1
+                    nbytes_copied += 1
+                end
+                sidx = 0
+                i += 1
+            end
+        end
+        return t[]
+    end
+end
+
+@inline @propagate_inbounds setindex!(a::ReinterpretArray{T,0,S} where T, v) where {S} = (a.parent[] = reinterpret(S, v))
+@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v)
+
+@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
+    v = convert(T, v)::T
+    if sizeof(T) == sizeof(S)
+        return setindex!(a.parent, reinterpret(S, v), inds...)
+    else
+        ind_start, sidx = divrem((inds[1]-1)*sizeof(T), sizeof(S))
+        t = Ref{T}(v)
+        s = Ref{S}()
+        @gc_preserve t s begin
+            tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t))
+            sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s))
+            nbytes_copied = 0
+            i = 1
+            @inline function copy_element()
+                while nbytes_copied < sizeof(T) && sidx < sizeof(S)
+                    unsafe_store!(sptr, unsafe_load(tptr, nbytes_copied + 1), sidx + 1)
+                    sidx += 1
+                    nbytes_copied += 1
+                end
+            end
+            # Deal with any partial elements at the start. We'll have to copy in the
+            # element from the original array and overwrite the relevant parts
+            if sidx != 0
+                s[] = a.parent[ind_start + i, tail(inds)...]
+                copy_element()
+                a.parent[ind_start + i, tail(inds)...] = s[]
+                i += 1
+                sidx = 0
+            end
+            # Deal with the main body of elements
+            while nbytes_copied < sizeof(T) && (sizeof(T) - nbytes_copied) > sizeof(S)
+                copy_element()
+                a.parent[ind_start + i, tail(inds)...] = s[]
+                i += 1
+                sidx = 0
+            end
+            # Deal with trailing partial elements
+            if nbytes_copied < sizeof(T)
+                s[] = a.parent[ind_start + i, tail(inds)...]
+                copy_element()
+                a.parent[ind_start + i, tail(inds)...] = s[]
+            end
+        end
+    end
+    return a
+end
diff --git a/base/show.jl b/base/show.jl
index ce9f3eb17dff4..65c91412ebd7b 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -1888,6 +1888,12 @@ function showarg(io::IO, r::ReshapedArray, toplevel)
     toplevel && print(io, " with eltype ", eltype(r))
 end
 
+function showarg(io::IO, r::ReinterpretArray{T}, toplevel) where {T}
+    print(io, "reinterpret($T, ")
+    showarg(io, parent(r), false)
+    print(io, ')')
+end
+
 # n-dimensional arrays
 function show_nd(io::IO, a::AbstractArray, print_matrix, label_slices)
     limit::Bool = get(io, :limit, false)
diff --git a/base/sparse/abstractsparse.jl b/base/sparse/abstractsparse.jl
index e17d3be97dbcb..ca73c3b3166b2 100644
--- a/base/sparse/abstractsparse.jl
+++ b/base/sparse/abstractsparse.jl
@@ -2,7 +2,7 @@
 
 abstract type AbstractSparseArray{Tv,Ti,N} <: AbstractArray{Tv,N} end
 
-const AbstractSparseVector{Tv,Ti} = AbstractSparseArray{Tv,Ti,1}
+const AbstractSparseVector{Tv,Ti} = Union{AbstractSparseArray{Tv,Ti,1}, Base.ReinterpretArray{Tv,1,T,<:AbstractSparseArray{T,Ti,1}} where T}
 const AbstractSparseMatrix{Tv,Ti} = AbstractSparseArray{Tv,Ti,2}
 
 """
@@ -19,5 +19,21 @@ issparse(S::LowerTriangular{<:Any,<:AbstractSparseMatrix}) = true
 issparse(S::LinAlg.UnitLowerTriangular{<:Any,<:AbstractSparseMatrix}) = true
 issparse(S::UpperTriangular{<:Any,<:AbstractSparseMatrix}) = true
 issparse(S::LinAlg.UnitUpperTriangular{<:Any,<:AbstractSparseMatrix}) = true
+issparse(S::Base.ReinterpretArray) = issparse(S.parent)
 
 indtype(S::AbstractSparseArray{<:Any,Ti}) where {Ti} = Ti
+
+nonzeros(A::Base.ReinterpretArray{T}) where {T} = reinterpret(T, nonzeros(A.parent))
+function nonzeroinds(A::Base.ReinterpretArray{T,N,S} where {N}) where {T,S}
+    if sizeof(T) == sizeof(S)
+        return nonzeroinds(A.parent)
+    elseif sizeof(T) > sizeof(S)
+        unique(map(nonzeroinds(A.parent)) do ind
+            div(ind, div(sizeof(T), sizeof(S)))
+        end)
+    else
+        map(nonzeroinds(A.parent)) do ind
+            ind * div(sizeof(S), sizeof(T))
+        end
+    end
+end
\ No newline at end of file
diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl
index 09255dacdaefa..098081eb15dbf 100644
--- a/base/sparse/sparsematrix.jl
+++ b/base/sparse/sparsematrix.jl
@@ -212,17 +212,6 @@ end
 
 ## Reinterpret and Reshape
 
-function reinterpret(::Type{T}, a::SparseMatrixCSC{Tv}) where {T,Tv}
-    if sizeof(T) != sizeof(Tv)
-        throw(ArgumentError("SparseMatrixCSC reinterpret is only supported for element types of the same size"))
-    end
-    mA, nA = size(a)
-    colptr = copy(a.colptr)
-    rowval = copy(a.rowval)
-    nzval  = reinterpret(T, a.nzval)
-    return SparseMatrixCSC(mA, nA, colptr, rowval, nzval)
-end
-
 function sparse_compute_reshaped_colptr_and_rowval(colptrS::Vector{Ti}, rowvalS::Vector{Ti},
                                                    mS::Int, nS::Int, colptrA::Vector{Ti},
                                                    rowvalA::Vector{Ti}, mA::Int, nA::Int) where Ti
@@ -257,25 +246,6 @@ function sparse_compute_reshaped_colptr_and_rowval(colptrS::Vector{Ti}, rowvalS:
     end
 end
 
-function reinterpret(::Type{T}, a::SparseMatrixCSC{Tv,Ti}, dims::NTuple{N,Int}) where {T,Tv,Ti,N}
-    if sizeof(T) != sizeof(Tv)
-        throw(ArgumentError("SparseMatrixCSC reinterpret is only supported for element types of the same size"))
-    end
-    if prod(dims) != length(a)
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(length(a))"))
-    end
-    mS,nS = dims
-    mA,nA = size(a)
-    numnz = nnz(a)
-    colptr = Vector{Ti}(nS+1)
-    rowval = similar(a.rowval)
-    nzval = reinterpret(T, a.nzval)
-
-    sparse_compute_reshaped_colptr_and_rowval(colptr, rowval, mS, nS, a.colptr, a.rowval, mA, nA)
-
-    return SparseMatrixCSC(mS, nS, colptr, rowval, nzval)
-end
-
 function copy(ra::ReshapedArray{<:Any,2,<:SparseMatrixCSC})
     mS,nS = size(ra)
     a = parent(ra)
diff --git a/base/sparse/sparsevector.jl b/base/sparse/sparsevector.jl
index ddaf2ef9ebf54..a8b77b8fd7a2e 100644
--- a/base/sparse/sparsevector.jl
+++ b/base/sparse/sparsevector.jl
@@ -14,7 +14,7 @@ import Base.LinAlg: promote_to_array_type, promote_to_arrays_
 
 Vector type for storing sparse vectors.
 """
-struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
+struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseArray{Tv,Ti,1}
     n::Int              # Length of the sparse vector
     nzind::Vector{Ti}   # Indices of stored values
     nzval::Vector{Tv}   # Stored values, typically nonzeros
@@ -856,12 +856,6 @@ vec(x::AbstractSparseVector) = x
 copy(x::AbstractSparseVector) =
     SparseVector(length(x), copy(nonzeroinds(x)), copy(nonzeros(x)))
 
-function reinterpret(::Type{T}, x::AbstractSparseVector{Tv}) where {T,Tv}
-    sizeof(T) == sizeof(Tv) ||
-        throw(ArgumentError("reinterpret of sparse vectors only supports element types of the same size."))
-    SparseVector(length(x), copy(nonzeroinds(x)), reinterpret(T, nonzeros(x)))
-end
-
 float(x::AbstractSparseVector{<:AbstractFloat}) = x
 float(x::AbstractSparseVector) =
     SparseVector(length(x), copy(nonzeroinds(x)), float(nonzeros(x)))
diff --git a/base/sparse/spqr.jl b/base/sparse/spqr.jl
index d752712027760..e1bf8119ec133 100644
--- a/base/sparse/spqr.jl
+++ b/base/sparse/spqr.jl
@@ -341,14 +341,14 @@ function (\)(F::QRSparse{Float64}, B::VecOrMat{Complex{Float64}})
 # |z2|z4|      ->       |y1|y2|y3|y4|     ->      |x2|y2|     ->    |x2|y2|x4|y4|
 #                                                 |x3|y3|
 #                                                 |x4|y4|
-    c2r = reshape(transpose(reinterpret(Float64, B, (2, length(B)))), size(B, 1), 2*size(B, 2))
+    c2r = reshape(transpose(reinterpret(Float64, reshape(B, (1, length(B))))), size(B, 1), 2*size(B, 2))
     x = F\c2r
 
 # |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
 # |z2|z4|      <-       |y1|y2|y3|y4|     <-      |x2|y2|     <-    |x2|y2|x4|y4|
 #                                                 |x3|y3|
 #                                                 |x4|y4|
-    return reinterpret(Complex{Float64}, transpose(reshape(x, (length(x) >> 1), 2)), _ret_size(F, B))
+    return collect(reshape(reinterpret(Complex{Float64}, transpose(reshape(x, (length(x) >> 1), 2))), _ret_size(F, B)))
 end
 
 function _ldiv_basic(F::QRSparse, B::StridedVecOrMat)
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 7609ab098fad2..15307bee62fff 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -121,6 +121,7 @@ include("indices.jl")
 include("array.jl")
 include("abstractarray.jl")
 include("subarray.jl")
+include("reinterpretarray.jl")
 
 # Array convenience converting constructors
 Array{T}(m::Integer) where {T} = Array{T,1}(Int(m))
@@ -182,15 +183,16 @@ using .Iterators: Flatten, product  # for generators
 
 # Definition of StridedArray
 StridedReshapedArray{T,N,A<:Union{DenseArray,FastContiguousSubArray}} = ReshapedArray{T,N,A}
+StridedReinterpretArray{T,N,A<:Union{DenseArray,FastContiguousSubArray}} = ReinterpretArray{T,N,S,A} where S
 StridedArray{T,N,A<:Union{DenseArray,StridedReshapedArray},
     I<:Tuple{Vararg{Union{RangeIndex, AbstractCartesianIndex}}}} =
-    Union{DenseArray{T,N}, SubArray{T,N,A,I}, StridedReshapedArray{T,N}}
+    Union{DenseArray{T,N}, SubArray{T,N,A,I}, StridedReshapedArray{T,N}, StridedReinterpretArray{T,N,A}}
 StridedVector{T,A<:Union{DenseArray,StridedReshapedArray},
     I<:Tuple{Vararg{Union{RangeIndex, AbstractCartesianIndex}}}} =
-    Union{DenseArray{T,1}, SubArray{T,1,A,I}, StridedReshapedArray{T,1}}
+    Union{DenseArray{T,1}, SubArray{T,1,A,I}, StridedReshapedArray{T,1}, StridedReinterpretArray{T,1,A}}
 StridedMatrix{T,A<:Union{DenseArray,StridedReshapedArray},
     I<:Tuple{Vararg{Union{RangeIndex, AbstractCartesianIndex}}}} =
-    Union{DenseArray{T,2}, SubArray{T,2,A,I}, StridedReshapedArray{T,2}}
+    Union{DenseArray{T,2}, SubArray{T,2,A,I}, StridedReshapedArray{T,2}, StridedReinterpretArray{T,2,A}}
 StridedVecOrMat{T} = Union{StridedVector{T}, StridedMatrix{T}}
 
 # For OS specific stuff
diff --git a/src/array.c b/src/array.c
index 0810ab9348958..e519415a6cca0 100644
--- a/src/array.c
+++ b/src/array.c
@@ -180,6 +180,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
     size_t ndims = jl_nfields(_dims);
     assert(is_ntuple_long(_dims));
     size_t *dims = (size_t*)_dims;
+    assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
 
     int ndimwords = jl_array_ndimwords(ndims);
     int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
diff --git a/test/arrayops.jl b/test/arrayops.jl
index 79dd9f788a62b..be0a3ecbb141e 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -68,19 +68,15 @@ using Main.TestHelpers.OAs
     @test a[1,2,1,1,2] == 20
     @test a[1,1,2,2,1] == 30
 
-    @test_throws ArgumentError reinterpret(Int8, a)
-
     b = reshape(a, (32,))
     @test b[1]  == 10
     @test b[19] == 20
     @test b[13] == 30
     @test_throws DimensionMismatch reshape(b,(5,7))
     @test_throws DimensionMismatch reshape(b,(35,))
-    @test_throws DimensionMismatch reinterpret(Int, b, (35,))
-    @test_throws ArgumentError reinterpret(Any, b, (32,))
-    @test_throws DimensionMismatch reinterpret(Complex128, b, (32,))
+    @test_throws ArgumentError reinterpret(Any, b)
     c = ["hello", "world"]
-    @test_throws ArgumentError reinterpret(Float32, c, (2,))
+    @test_throws ArgumentError reinterpret(Float32, c)
     a = Vector(ones(5))
     @test_throws ArgumentError resize!(a, -2)
 
@@ -209,7 +205,7 @@ end
     @test b[5] == -4
     @test b[6] == -3
     @test b[7] == -2
-    b = reinterpret(Int, a, (3,4))
+    b = reinterpret(Int, a)
     b[1] = -1
     @test vec(b) == vec(a)
 
diff --git a/test/choosetests.jl b/test/choosetests.jl
index c769fe9d44dca..aa42294befb4b 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -41,7 +41,8 @@ function choosetests(choices = [])
         "enums", "cmdlineargs", "i18n", "workspace", "libdl", "int",
         "checked", "intset", "floatfuncs", "compile", "distributed", "inline",
         "boundscheck", "error", "ambiguous", "cartesian", "asmvariant", "osutils",
-        "channels", "iostream", "specificity", "codegen", "codevalidation"
+        "channels", "iostream", "specificity", "codegen", "codevalidation",
+        "reinterpretarray"
     ]
     profile_skipped = false
     if startswith(string(Sys.ARCH), "arm")
diff --git a/test/core.jl b/test/core.jl
index d5701879c43ae..bb2c2a9ddcff7 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -3953,9 +3953,6 @@ f = unsafe_wrap(Array, pointer(d), length(d))
 @test !check_nul(f)
 f = unsafe_wrap(Array, ccall(:malloc, Ptr{UInt8}, (Csize_t,), 10), 10, true)
 @test !check_nul(f)
-g = reinterpret(UInt8, UInt16[0x1, 0x2])
-@test !check_nul(g)
-@test check_nul(copy(g))
 end
 
 # Copy of `#undef`
@@ -5007,23 +5004,6 @@ end
 g21719(f, goal; tol = 1e-6) = T21719(f, tol, goal)
 @test isa(g21719(identity, 1.0; tol=0.1), T21719)
 
-# reinterpret alignment requirement
-let arr8 = zeros(UInt8, 16),
-    arr64 = zeros(UInt64, 2),
-    arr64_8 = reinterpret(UInt8, arr64),
-    arr64_i
-
-    # Not allowed to reinterpret arrays allocated as UInt8 array to a Int32 array
-    res = @test_throws ArgumentError reinterpret(Int32, arr8)
-    @test res.value.msg == "reinterpret from alignment 1 bytes to alignment 4 bytes not allowed"
-    # OK to reinterpret arrays allocated as UInt64 array to a Int64 array even though
-    # it is passed as a UInt8 array
-    arr64_i = reinterpret(Int64, arr64_8)
-    @test arr8 == arr64_8
-    arr64_i[2] = 1234
-    @test arr64[2] == 1234
-end
-
 # Alignment of perm boxes
 for i in 1:10
     # Int64 box should be 16bytes aligned even on 32bits
diff --git a/test/inference.jl b/test/inference.jl
index 81910152cf37e..1642878136df1 100644
--- a/test/inference.jl
+++ b/test/inference.jl
@@ -830,7 +830,7 @@ f2_17003(::Any) = f2_17003(NArray_17003(gl_17003))
 
 # issue #20847
 function segfaultfunction_20847(A::Vector{NTuple{N, T}}) where {N, T}
-    B = reinterpret(T, A, (N, length(A)))
+    B = reshape(reinterpret(T, A), (N, length(A)))
     return nothing
 end
 
diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl
new file mode 100644
index 0000000000000..31d431afbf0ca
--- /dev/null
+++ b/test/reinterpretarray.jl
@@ -0,0 +1,24 @@
+using Base.Test
+
+A = Int64[1, 2, 3, 4]
+B = Complex{Int64}[5+6im, 7+8im, 9+10im]
+# getindex
+@test reinterpret(Complex{Int64}, A) == [1 + 2im, 3 + 4im]
+@test reinterpret(Float64, A) == reinterpret.(Float64, A)
+
+@test reinterpret(NTuple{3, Int64}, B) == [(5,6,7),(8,9,10)]
+
+# setindex
+let Ac = copy(A), Bc = copy(B)
+    reinterpret(Complex{Int64}, Ac)[2] = -1 - 2im
+    @test Ac == [1, 2, -1, -2]
+    reinterpret(NTuple{3, Int64}, Bc)[2] = (4,5,6)
+    @test Bc == Complex{Int64}[5+6im, 7+4im, 5+6im]
+    reinterpret(NTuple{3, Int64}, Bc)[1] = (1,2,3)
+    @test Bc == Complex{Int64}[1+2im, 3+4im, 5+6im]
+
+    A1 = reinterpret(Float64, A)
+    A2 = reinterpret(Complex{Float64}, A)
+    A1[1] = 1.0
+    @test real(A2[1]) == 1.0
+end
diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl
index dd22123c9187e..cb448b85daedb 100644
--- a/test/sparse/sparse.jl
+++ b/test/sparse/sparse.jl
@@ -964,7 +964,7 @@ end
     ACPY = copy(A)
     B = reshape(A,25,1)
     @test A == ACPY
-    C = reinterpret(Int64, A, (25, 1))
+    C = reshape(reinterpret(Int64, A), (25, 1))
     @test A == ACPY
     D = reinterpret(Int64, copy(B))
     @test C == D
@@ -1319,8 +1319,6 @@ end
 @testset "error conditions for reinterpret, reshape, and squeeze" begin
     local A = sprand(Bool, 5, 5, 0.2)
     @test_throws ArgumentError reinterpret(Complex128, A)
-    @test_throws ArgumentError reinterpret(Complex128, A,(5, 5))
-    @test_throws DimensionMismatch reinterpret(Int8, A,(20,))
     @test_throws DimensionMismatch reshape(A,(20, 2))
     @test_throws ArgumentError squeeze(A,(1, 1))
 end
diff --git a/test/sparse/sparsevector.jl b/test/sparse/sparsevector.jl
index fcc0baac1447b..d422bdf2aeb58 100644
--- a/test/sparse/sparsevector.jl
+++ b/test/sparse/sparsevector.jl
@@ -283,7 +283,7 @@ let a = SparseVector(8, [2, 5, 6], Int32[12, 35, 72])
 
     # reinterpret
     au = reinterpret(UInt32, a)
-    @test isa(au, SparseVector{UInt32,Int})
+    @test isa(au, AbstractSparseVector{UInt32,Int})
     @test exact_equal(au, SparseVector(8, [2, 5, 6], UInt32[12, 35, 72]))
 
     # float