-
Notifications
You must be signed in to change notification settings - Fork 78
/
abstractarray.jl
244 lines (191 loc) · 8.23 KB
/
abstractarray.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# core definition of the AbstractGPUArray type
export AbstractGPUArray
"""
AbstractGPUArray{T, N} <: DenseArray{T, N}
Supertype for `N`-dimensional GPU arrays (or array-like types) with elements of type `T`.
Instances of this type are expected to live on the host, see [`AbstractDeviceArray`](@ref)
for device-side objects.
"""
abstract type AbstractGPUArray{T, N} <: DenseArray{T, N} end
const AbstractGPUVector{T} = AbstractGPUArray{T, 1}
const AbstractGPUMatrix{T} = AbstractGPUArray{T, 2}
const AbstractGPUVecOrMat{T} = Union{AbstractGPUArray{T, 1}, AbstractGPUArray{T, 2}}
# convenience aliases for working with wrapped arrays
const WrappedGPUArray{T,N} = WrappedArray{T,N,AbstractGPUArray,AbstractGPUArray{T,N}}
const AnyGPUArray{T,N} = Union{AbstractGPUArray{T,N}, WrappedGPUArray{T,N}}
# input/output
## serialization
using Serialization: AbstractSerializer, serialize_type
function Serialization.serialize(s::AbstractSerializer, t::T) where T <: AbstractGPUArray
serialize_type(s, T)
serialize(s, Array(t))
end
function Serialization.deserialize(s::AbstractSerializer, ::Type{T}) where T <: AbstractGPUArray
A = deserialize(s)
T(A)
end
## showing
struct ToArray end
Adapt.adapt_storage(::ToArray, xs::AbstractGPUArray) = convert(Array, xs)
# display
Base.print_array(io::IO, X::AnyGPUArray) =
Base.print_array(io, adapt(ToArray(), X))
# show
Base._show_nonempty(io::IO, X::AnyGPUArray, prefix::String) =
Base._show_nonempty(io, adapt(ToArray(), X), prefix)
Base._show_empty(io::IO, X::AnyGPUArray) =
Base._show_empty(io, adapt(ToArray(), X))
Base.show_vector(io::IO, v::AnyGPUArray, args...) =
Base.show_vector(io, adapt(ToArray(), v), args...)
## collect to CPU (discarding wrapper type)
collect_to_cpu(xs::AbstractArray) = collect(adapt(ToArray(), xs))
Base.collect(X::AnyGPUArray) = collect_to_cpu(X)
# memory copying
## basic linear copies of identically-typed memory
# expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
# offset and length) from and to CPU arrays and between GPU arrays.
for (D, S) in ((AnyGPUArray, Array),
(Array, AnyGPUArray),
(AnyGPUArray, AnyGPUArray))
@eval begin
function Base.copyto!(dest::$D{<:Any, N}, rdest::UnitRange,
src::$S{<:Any, N}, ssrc::UnitRange) where {N}
drange = CartesianIndices((rdest,))
srange = CartesianIndices((ssrc,))
copyto!(dest, drange, src, srange)
end
function Base.copyto!(dest::$D, d_range::CartesianIndices{1},
src::$S, s_range::CartesianIndices{1})
len = length(d_range)
if length(s_range) != len
throw(ArgumentError("Copy range needs same length. Found: dest: $len, src: $(length(s_range))"))
end
len == 0 && return dest
d_offset = first(d_range)[1]
s_offset = first(s_range)[1]
copyto!(dest, d_offset, src, s_offset, len)
end
Base.copyto!(dest::$D, src::$S) = copyto!(dest, 1, src, 1, length(src))
end
end
# kernel-based variant for copying between wrapped GPU arrays
function linear_copy_kernel!(ctx::AbstractKernelContext, dest, dstart, src, sstart, n)
i = linear_index(ctx)-1
if i < n
@inbounds dest[dstart+i] = src[sstart+i]
end
return
end
function Base.copyto!(dest::AnyGPUArray, dstart::Integer,
src::AnyGPUArray, sstart::Integer, n::Integer)
n == 0 && return dest
n < 0 && throw(ArgumentError(string("tried to copy n=", n, " elements, but n should be nonnegative")))
destinds, srcinds = LinearIndices(dest), LinearIndices(src)
(checkbounds(Bool, destinds, dstart) && checkbounds(Bool, destinds, dstart+n-1)) || throw(BoundsError(dest, dstart:dstart+n-1))
(checkbounds(Bool, srcinds, sstart) && checkbounds(Bool, srcinds, sstart+n-1)) || throw(BoundsError(src, sstart:sstart+n-1))
gpu_call(linear_copy_kernel!,
dest, dstart, src, sstart, n;
total_threads=n)
return dest
end
# variants that materialize the GPU wrapper before copying from or to the CPU
function Base.copyto!(dest::Array, dstart::Integer,
src::WrappedGPUArray, sstart::Integer, n::Integer)
n == 0 && return dest
temp = similar(src, n)
copyto!(temp, 1, src, sstart, n)
copyto!(dest, dstart, temp, 1, n)
return dest
end
function Base.copyto!(dest::WrappedGPUArray, dstart::Integer,
src::Array, sstart::Integer, n::Integer)
n == 0 && return dest
temp = similar(dest, n)
copyto!(temp, 1, src, sstart, n)
copyto!(dest, dstart, temp, 1, n)
return dest
end
# variants that converts values on the CPU when there's a type mismatch
#
# we prefer to convert on the CPU where there's typically more memory / less memory pressure
# to quickly perform these very lightweight conversions
function Base.copyto!(dest::Array{T}, dstart::Integer,
src::AnyGPUArray{U}, sstart::Integer,
n::Integer) where {T,U}
n == 0 && return dest
temp = Vector{U}(undef, n)
copyto!(temp, 1, src, sstart, n)
copyto!(dest, dstart, temp, 1, n)
return dest
end
function Base.copyto!(dest::AnyGPUArray{T}, dstart::Integer,
src::Array{U}, sstart::Integer, n::Integer) where {T,U}
n == 0 && return dest
temp = Vector{T}(undef, n)
copyto!(temp, 1, src, sstart, n)
copyto!(dest, dstart, temp, 1, n)
return dest
end
## generalized blocks of heterogeneous memory
function cartesian_copy_kernel!(ctx::AbstractKernelContext, dest, dest_offsets, src, src_offsets, shape, length)
i = linear_index(ctx)
if i <= length
idx = CartesianIndices(shape)[i]
@inbounds dest[idx + dest_offsets] = src[idx + src_offsets]
end
return
end
function Base.copyto!(dest::AnyGPUArray{<:Any, N}, destcrange::CartesianIndices{N},
src::AnyGPUArray{<:Any, N}, srccrange::CartesianIndices{N}) where {N}
shape = size(destcrange)
if shape != size(srccrange)
throw(ArgumentError("Ranges don't match their size. Found: $shape, $(size(srccrange))"))
end
len = length(destcrange)
len == 0 && return dest
dest_offsets = first(destcrange) - oneunit(CartesianIndex{N})
src_offsets = first(srccrange) - oneunit(CartesianIndex{N})
gpu_call(cartesian_copy_kernel!,
dest, dest_offsets, src, src_offsets, shape, len;
total_threads=len)
dest
end
for (dstTyp, srcTyp) in (AbstractGPUArray=>Array, Array=>AbstractGPUArray)
@eval function Base.copyto!(dst::$dstTyp{T,N}, dstrange::CartesianIndices{N},
src::$srcTyp{T,N}, srcrange::CartesianIndices{N}) where {T,N}
isempty(dstrange) && return dst
if size(dstrange) != size(srcrange)
throw(ArgumentError("source and destination must have same size (got $(size(srcrange)) and $(size(dstrange)))"))
end
# figure out how many dimensions of the Cartesian ranges map onto contiguous memory
# in both source and destination. we will copy these one by one as linear ranges.
contiguous_dims = 1
for dim in 2:N
# a slice is broken up if the previous dimension didn't cover the entire range
if axes(src, dim-1) == axes(srcrange, dim-1) &&
axes(dst, dim-1) == axes(dstrange, dim-1)
contiguous_dims = dim
else
break
end
end
m = prod(size(dstrange)[1:contiguous_dims]) # inner, contiguous length
n = prod(size(dstrange)[contiguous_dims+1:end]) # outer non-contiguous length
@assert m*n == length(srcrange) == length(dstrange)
# copy linear slices
for i in 1:m:m*n
srcoff = LinearIndices(src)[srcrange[i]]
dstoff = LinearIndices(dst)[dstrange[i]]
# TODO: Use asynchronous memory copies
copyto!(dst, dstoff, src, srcoff, m)
end
dst
end
end
## other
Base.copy(x::AbstractGPUArray) = error("Not implemented") # COV_EXCL_LINE
Base.deepcopy(x::AbstractGPUArray) = copy(x)
# filtering
# TODO: filter!
# revert of JuliaLang/julia#31929
Base.filter(f, As::AbstractGPUArray) = As[map(f, As)::AbstractGPUArray{Bool}]