From ae0ae6e180969eb6eb156ecc45b95d734cb703e7 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Sun, 1 Dec 2013 11:11:36 -0500 Subject: [PATCH 01/13] base/array/view.jl: some very basic array view sketching. - only supports Vector{T} as the data array - multidimensional indexing via an arbitrary linear transform - linear indexing compatible with the multidimensional indexing --- base/array/view.jl | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 base/array/view.jl diff --git a/base/array/view.jl b/base/array/view.jl new file mode 100644 index 0000000000000..2b2ec18cc31cc --- /dev/null +++ b/base/array/view.jl @@ -0,0 +1,41 @@ +importall Base + +isdefined(:ArrayView) || immutable ArrayView{T,n} <: AbstractArray + data::Vector{T} + offset::Int + strides::NTuple{n,Int} + size::NTuple{n,Int} +end + +size(v::ArrayView) = v.size +size(v::ArrayView, i::Int) = v.size[i] +ndims{T,n}(v::ArrayView{T,n}) = n +show(io::IO, v::ArrayView) = invoke(show,(IO,Any),io,v) + +function view2data{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) + d = v.offset + t = v.strides + # z = v.size + @inbounds for k = 1:n + j = I[k] + # 1 <= j <= z[k] || throw(BoundsError()) + d += t[k]*(j-1) + end + return d+1 +end + +function view2data{T,n}(v::ArrayView{T,n}, i::Int) + i -= 1 + z = v.size + I = ntuple(n) do k + j = i % z[k] + i = div(i,z[k]) + return j+1 + end + view2data(v,I) +end + +getindex(v::ArrayView, i::Int) = @inbounds return v.data[view2data(v,i)] +getindex(v::ArrayView, I::Int...) = @inbounds return v.data[view2data(v,I)] +setindex!(v::ArrayView, x, i::Int) = (@inbounds v.data[view2data(v,i)] = x) +setindex!(v::ArrayView, x, I::Int...) = (@inbounds v.data[view2data(v,I)] = x) From c5d517a199a17d60d5125a9bfb325a895de53ea6 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Sun, 1 Dec 2013 11:48:06 -0500 Subject: [PATCH 02/13] base/array/view.jl: manually inline and simplify linear view2data. --- base/array/view.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index 2b2ec18cc31cc..e478bd1892d7d 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -26,13 +26,14 @@ end function view2data{T,n}(v::ArrayView{T,n}, i::Int) i -= 1 + d = v.offset z = v.size - I = ntuple(n) do k - j = i % z[k] + t = v.strides + @inbounds for k = 1:n + d += t[k]*rem(i,z[k]) i = div(i,z[k]) - return j+1 end - view2data(v,I) + return d+1 end getindex(v::ArrayView, i::Int) = @inbounds return v.data[view2data(v,i)] From 0a42dd34b32f693489fbe96ff00de1fdd799961a Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Sun, 1 Dec 2013 19:02:45 -0500 Subject: [PATCH 03/13] base/array/view.jl: tweaking for beauty. --- base/array/view.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index e478bd1892d7d..76fc84313d1e4 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -14,14 +14,14 @@ show(io::IO, v::ArrayView) = invoke(show,(IO,Any),io,v) function view2data{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) d = v.offset - t = v.strides # z = v.size + t = v.strides @inbounds for k = 1:n j = I[k] # 1 <= j <= z[k] || throw(BoundsError()) - d += t[k]*(j-1) + d += (j-1)*t[k] end - return d+1 + d + 1 end function view2data{T,n}(v::ArrayView{T,n}, i::Int) @@ -30,10 +30,10 @@ function view2data{T,n}(v::ArrayView{T,n}, i::Int) z = v.size t = v.strides @inbounds for k = 1:n - d += t[k]*rem(i,z[k]) - i = div(i,z[k]) + d += rem(i,z[k])*t[k] + i = div(i,z[k]) end - return d+1 + d + 1 end getindex(v::ArrayView, i::Int) = @inbounds return v.data[view2data(v,i)] From 5751e84c18c9f505d47888ae7e5d184c0ef55434 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 2 Dec 2013 03:00:32 +0000 Subject: [PATCH 04/13] [su]div_int intrinsics: these are really important and were slow. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm not even sure why we were generating code to check for a zero denominator – this seems to still raise an exception as expected. The other special case just isn't worth it to emit more than one instruction for div – and div/rem since x86 does both together. --- src/intrinsics.cpp | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index dc3ba31a3781a..b28b99e0c79ba 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -743,37 +743,13 @@ static Value *emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs, return t == T_void ? x : y; Value *fy; - Value *den; - Value *typemin; switch (f) { HANDLE(neg_int,1) return builder.CreateSub(ConstantInt::get(t, 0), JL_INT(x)); HANDLE(add_int,2) return builder.CreateAdd(JL_INT(x), JL_INT(y)); HANDLE(sub_int,2) return builder.CreateSub(JL_INT(x), JL_INT(y)); HANDLE(mul_int,2) return builder.CreateMul(JL_INT(x), JL_INT(y)); - HANDLE(sdiv_int,2) - den = JL_INT(y); - x = JL_INT(x); - - typemin = builder.CreateShl(ConstantInt::get(t,1), - x->getType()->getPrimitiveSizeInBits()-1); - raise_exception_unless(builder. - CreateAnd(builder. - CreateICmpNE(den, ConstantInt::get(t,0)), - builder. - CreateOr(builder. - CreateICmpNE(den, - ConstantInt::get(t,-1,true)), - builder.CreateICmpNE(x, typemin))), - jldiverr_var, ctx); - - return builder.CreateSDiv(x, den); - HANDLE(udiv_int,2) - den = JL_INT(y); - raise_exception_unless(builder.CreateICmpNE(den, - ConstantInt::get(t,0)), - jldiverr_var, ctx); - return builder.CreateUDiv(JL_INT(x), den); - + HANDLE(sdiv_int,2) return builder.CreateSDiv(JL_INT(x), JL_INT(y)); + HANDLE(udiv_int,2) return builder.CreateUDiv(JL_INT(x), JL_INT(y)); HANDLE(srem_int,2) return builder.CreateSRem(JL_INT(x), JL_INT(y)); HANDLE(urem_int,2) return builder.CreateURem(JL_INT(x), JL_INT(y)); HANDLE(smod_int,2) From 68c241f41151f0902360cf70cbe7dc6866006b03 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 2 Dec 2013 03:27:02 +0000 Subject: [PATCH 05/13] base/array/view.jl: factor out the really basic index computation. This version lets you see how tight the code gen can be without the gratuitous (now-removed) checks from the div intrinsic and the undef checks from loading the fields from the ArrayView structure: julia> code_native(view2data,(Int,(Int,Int),(Int,Int))) .section __TEXT,__text,regular,pure_instructions Filename: /Users/stefan/projects/julia/base/array/view.jl Source line: 17 push RBP mov RBP, RSP Source line: 17 mov R8, QWORD PTR [RSI + 16] mov RSI, QWORD PTR [RSI + 24] mov RAX, QWORD PTR [RDX + 16] mov RCX, QWORD PTR [RDX + 24] mov RCX, QWORD PTR [RCX + 8] dec RCX imul RCX, QWORD PTR [RSI + 8] mov RAX, QWORD PTR [RAX + 8] dec RAX imul RAX, QWORD PTR [R8 + 8] add RAX, RDI Source line: 19 lea RAX, QWORD PTR [RCX + RAX + 1] pop RBP ret julia> code_native(view2data,(Int,(Int,Int),(Int,Int),Int)) .section __TEXT,__text,regular,pure_instructions Filename: /Users/stefan/projects/julia/base/array/view.jl Source line: 23 push RBP mov RBP, RSP mov R8, RDX Source line: 23 lea RAX, QWORD PTR [RCX - 1] Source line: 25 mov RCX, QWORD PTR [RSI + 16] mov R9, QWORD PTR [RSI + 24] cqo idiv QWORD PTR [RCX + 8] mov RCX, RDX mov RDX, QWORD PTR [R8 + 16] mov RSI, QWORD PTR [R8 + 24] imul RCX, QWORD PTR [RDX + 8] add RCX, RDI cqo idiv QWORD PTR [R9 + 8] imul RDX, QWORD PTR [RSI + 8] Source line: 28 lea RAX, QWORD PTR [RDX + RCX + 1] pop RBP ret --- base/array/view.jl | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index 76fc84313d1e4..aa0a8bb958d29 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -12,30 +12,25 @@ size(v::ArrayView, i::Int) = v.size[i] ndims{T,n}(v::ArrayView{T,n}) = n show(io::IO, v::ArrayView) = invoke(show,(IO,Any),io,v) -function view2data{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) - d = v.offset - # z = v.size - t = v.strides +function view2data{n}(d::Int, strides::NTuple{n,Int}, I::NTuple{n,Int}) @inbounds for k = 1:n - j = I[k] - # 1 <= j <= z[k] || throw(BoundsError()) - d += (j-1)*t[k] + d += (I[k]-1)*strides[k] end d + 1 end -function view2data{T,n}(v::ArrayView{T,n}, i::Int) +function view2data{n}(d::Int, strides::NTuple{n,Int}, size::NTuple{n,Int}, i::Int) i -= 1 - d = v.offset - z = v.size - t = v.strides @inbounds for k = 1:n - d += rem(i,z[k])*t[k] - i = div(i,z[k]) + d += rem(i,strides[k])*size[k] + i = div(i,strides[k]) end d + 1 end +view2data{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) = view2data(v.offset, v.strides, I) +view2data{T,n}(v::ArrayView{T,n}, i::Int) = view2data(v.offset, v.strides, v.size, i) + getindex(v::ArrayView, i::Int) = @inbounds return v.data[view2data(v,i)] getindex(v::ArrayView, I::Int...) = @inbounds return v.data[view2data(v,I)] setindex!(v::ArrayView, x, i::Int) = (@inbounds v.data[view2data(v,i)] = x) From a335ce18298ac1b93bde57000f98d0615d56e522 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 2 Dec 2013 05:53:02 +0000 Subject: [PATCH 06/13] base/array/view.jl: view construction with ranges a la slicing. Also support views into multidimensional arrays. This wasn't all that annoying, actually; I realized I could just add an `m` parameter. --- base/array/view.jl | 47 +++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index aa0a8bb958d29..b284a4ff9f962 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -1,37 +1,50 @@ importall Base -isdefined(:ArrayView) || immutable ArrayView{T,n} <: AbstractArray - data::Vector{T} - offset::Int - strides::NTuple{n,Int} +isdefined(:ArrayView) || immutable ArrayView{T,n,m} <: AbstractArray + array::Array{T,m} size::NTuple{n,Int} + strides::NTuple{n,Int} + offset::Int end size(v::ArrayView) = v.size size(v::ArrayView, i::Int) = v.size[i] ndims{T,n}(v::ArrayView{T,n}) = n +eltype(v::ArrayView) = eltype(v.array) show(io::IO, v::ArrayView) = invoke(show,(IO,Any),io,v) -function view2data{n}(d::Int, strides::NTuple{n,Int}, I::NTuple{n,Int}) +function v2a{n}(strides::NTuple{n,Int}, o::Int, I::NTuple{n,Int}) @inbounds for k = 1:n - d += (I[k]-1)*strides[k] + o += (I[k]-1)*strides[k] end - d + 1 + o + 1 end -function view2data{n}(d::Int, strides::NTuple{n,Int}, size::NTuple{n,Int}, i::Int) +function v2a{n}(size::NTuple{n,Int}, strides::NTuple{n,Int}, o::Int, i::Int) i -= 1 @inbounds for k = 1:n - d += rem(i,strides[k])*size[k] - i = div(i,strides[k]) + o += rem(i,size[k])*strides[k] + i = div(i,size[k]) end - d + 1 + o + 1 end -view2data{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) = view2data(v.offset, v.strides, I) -view2data{T,n}(v::ArrayView{T,n}, i::Int) = view2data(v.offset, v.strides, v.size, i) +v2a{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) = v2a(v.strides, v.offset, I) +v2a{T,n}(v::ArrayView{T,n}, i::Int) = v2a(v.size, v.strides, v.offset, i) -getindex(v::ArrayView, i::Int) = @inbounds return v.data[view2data(v,i)] -getindex(v::ArrayView, I::Int...) = @inbounds return v.data[view2data(v,I)] -setindex!(v::ArrayView, x, i::Int) = (@inbounds v.data[view2data(v,i)] = x) -setindex!(v::ArrayView, x, I::Int...) = (@inbounds v.data[view2data(v,I)] = x) +getindex(v::ArrayView, i::Int) = @inbounds return v.array[v2a(v,i)] +getindex(v::ArrayView, I::Int...) = @inbounds return v.array[v2a(v,I)] +setindex!(v::ArrayView, x, i::Int) = (@inbounds v.array[v2a(v,i)] = x) +setindex!(v::ArrayView, x, I::Int...) = (@inbounds v.array[v2a(v,I)] = x) + +function ArrayView{T,n,m}(a::Array{T,m}, R::NTuple{n,Ranges}) + sz = ntuple(n,k->length(R[k])) + offset, prod = 0, 1 + strides = ntuple(n) do k + offset += prod*(first(R[k])-1) + stride = prod*step(R[k]) + prod *= size(a,k) + return stride + end + ArrayView(a,sz,strides,offset) +end From 7806513302f7fa4604588327350faca9caba7255 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 2 Dec 2013 06:06:32 +0000 Subject: [PATCH 07/13] base/array/view.jl: use 1-based origin instead of 1-based offset. --- base/array/view.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index b284a4ff9f962..ec13f14eabce8 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -4,7 +4,7 @@ isdefined(:ArrayView) || immutable ArrayView{T,n,m} <: AbstractArray array::Array{T,m} size::NTuple{n,Int} strides::NTuple{n,Int} - offset::Int + origin::Int end size(v::ArrayView) = v.size @@ -17,7 +17,7 @@ function v2a{n}(strides::NTuple{n,Int}, o::Int, I::NTuple{n,Int}) @inbounds for k = 1:n o += (I[k]-1)*strides[k] end - o + 1 + return o end function v2a{n}(size::NTuple{n,Int}, strides::NTuple{n,Int}, o::Int, i::Int) @@ -26,11 +26,11 @@ function v2a{n}(size::NTuple{n,Int}, strides::NTuple{n,Int}, o::Int, i::Int) o += rem(i,size[k])*strides[k] i = div(i,size[k]) end - o + 1 + return o end -v2a{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) = v2a(v.strides, v.offset, I) -v2a{T,n}(v::ArrayView{T,n}, i::Int) = v2a(v.size, v.strides, v.offset, i) +v2a{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) = v2a(v.strides, v.origin, I) +v2a{T,n}(v::ArrayView{T,n}, i::Int) = v2a(v.size, v.strides, v.origin, i) getindex(v::ArrayView, i::Int) = @inbounds return v.array[v2a(v,i)] getindex(v::ArrayView, I::Int...) = @inbounds return v.array[v2a(v,I)] @@ -39,12 +39,12 @@ setindex!(v::ArrayView, x, I::Int...) = (@inbounds v.array[v2a(v,I)] = x) function ArrayView{T,n,m}(a::Array{T,m}, R::NTuple{n,Ranges}) sz = ntuple(n,k->length(R[k])) - offset, prod = 0, 1 + origin = prod = 1 strides = ntuple(n) do k - offset += prod*(first(R[k])-1) + origin += prod*(first(R[k])-1) stride = prod*step(R[k]) prod *= size(a,k) return stride end - ArrayView(a,sz,strides,offset) + ArrayView(a,sz,strides,origin) end From 22490cb907b35ea9535e68c0f1ee97e207883723 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 2 Dec 2013 06:22:53 +0000 Subject: [PATCH 08/13] base/array/view.jl: simpler size init; drop gratuitous type params. --- base/array/view.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index ec13f14eabce8..f5ea2978d8216 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -37,8 +37,7 @@ getindex(v::ArrayView, I::Int...) = @inbounds return v.array[v2a(v,I)] setindex!(v::ArrayView, x, i::Int) = (@inbounds v.array[v2a(v,i)] = x) setindex!(v::ArrayView, x, I::Int...) = (@inbounds v.array[v2a(v,I)] = x) -function ArrayView{T,n,m}(a::Array{T,m}, R::NTuple{n,Ranges}) - sz = ntuple(n,k->length(R[k])) +function ArrayView{n}(a::Array, R::NTuple{n,Ranges}) origin = prod = 1 strides = ntuple(n) do k origin += prod*(first(R[k])-1) @@ -46,5 +45,5 @@ function ArrayView{T,n,m}(a::Array{T,m}, R::NTuple{n,Ranges}) prod *= size(a,k) return stride end - ArrayView(a,sz,strides,origin) + ArrayView(a,map(length,R),strides,origin) end From cf1199d457cc0f36a508f57c0a131631c030ce3b Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 2 Dec 2013 06:58:41 +0000 Subject: [PATCH 09/13] base/array/view.jl: an view of a view is a view (dims must match). --- base/array/view.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/base/array/view.jl b/base/array/view.jl index f5ea2978d8216..04b53d2db544f 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -38,7 +38,7 @@ setindex!(v::ArrayView, x, i::Int) = (@inbounds v.array[v2a(v,i)] = x) setindex!(v::ArrayView, x, I::Int...) = (@inbounds v.array[v2a(v,I)] = x) function ArrayView{n}(a::Array, R::NTuple{n,Ranges}) - origin = prod = 1 + prod = origin = 1 strides = ntuple(n) do k origin += prod*(first(R[k])-1) stride = prod*step(R[k]) @@ -47,3 +47,14 @@ function ArrayView{n}(a::Array, R::NTuple{n,Ranges}) end ArrayView(a,map(length,R),strides,origin) end + +function ArrayView{T,n}(v::ArrayView{T,n}, R::NTuple{n,Ranges}) + prod = 1 + origin = v.origin + for k = 1:n + origin += prod*(first(R[k])-1) + prod *= v.size[k] + end + strides = ntuple(n,k->step(R[k])*v.strides[k]) + ArrayView(v.array,map(length,R),strides,origin) +end From 2f0e4d62b773a44c22e8aef016b8cfbcc67cfa10 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 9 Dec 2013 16:29:07 -0500 Subject: [PATCH 10/13] base/array/view.jl: linear v2a using only div, not precomputed. --- base/array/view.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index 04b53d2db544f..e231c32ee0050 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -22,9 +22,11 @@ end function v2a{n}(size::NTuple{n,Int}, strides::NTuple{n,Int}, o::Int, i::Int) i -= 1 - @inbounds for k = 1:n - o += rem(i,size[k])*strides[k] - i = div(i,size[k]) + p = size[1] + o += i*strides[1] + @inbounds for k = 2:n + o += div(i,p)*(strides[k]-size[k-1]*strides[k-1]) + p *= size[k] end return o end From e8c0ce4395ea0b04d6b3511d81c8c30c053410fb Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Sat, 28 Dec 2013 16:59:41 -0500 Subject: [PATCH 11/13] New fast multiply-shift linear indexing algorithm. julia> include(joinpath(JULIA_HOME,"../../base/array/view.jl")) benchmark (generic function with 4 methods) julia> results = benchmark(); julia> data = squeeze(mapslices(median,results[:,:,2:3]./results[:,:,1],2),2) 10x2 Array{Float64,2}: 6.64002 2.99833 6.69273 16.3886 8.59361 29.9632 10.4218 48.2432 12.1854 65.8061 13.2618 2357.6 16.1612 2534.79 17.3616 2367.67 17.1718 2754.53 20.8452 3254.62 julia> data./[1:size(data,1)] 10x2 Array{Float64,2}: 6.64002 2.99833 3.34636 8.19429 2.86454 9.98773 2.60545 12.0608 2.43708 13.1612 2.21029 392.934 2.30875 362.113 2.1702 295.958 1.90797 306.059 2.08452 325.462 --- base/array/view.jl | 77 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index e231c32ee0050..499c010eac8b0 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -1,17 +1,18 @@ importall Base -isdefined(:ArrayView) || immutable ArrayView{T,n,m} <: AbstractArray +isdefined(:ArrayView) || immutable ArrayView{T,n,m} <: AbstractArray{T,n} array::Array{T,m} - size::NTuple{n,Int} + sizes::NTuple{n,Int} strides::NTuple{n,Int} origin::Int + magic::NTuple{n,Int} + coefs::NTuple{n,Int} end -size(v::ArrayView) = v.size -size(v::ArrayView, i::Int) = v.size[i] +size(v::ArrayView) = v.sizes +size(v::ArrayView, i::Int) = v.sizes[i] ndims{T,n}(v::ArrayView{T,n}) = n eltype(v::ArrayView) = eltype(v.array) -show(io::IO, v::ArrayView) = invoke(show,(IO,Any),io,v) function v2a{n}(strides::NTuple{n,Int}, o::Int, I::NTuple{n,Int}) @inbounds for k = 1:n @@ -20,19 +21,17 @@ function v2a{n}(strides::NTuple{n,Int}, o::Int, I::NTuple{n,Int}) return o end -function v2a{n}(size::NTuple{n,Int}, strides::NTuple{n,Int}, o::Int, i::Int) +function v2a{n}(magic::NTuple{n,Int}, coefs::NTuple{n,Int}, o::Int, i::Int) i -= 1 - p = size[1] - o += i*strides[1] + o += i*coefs[1] @inbounds for k = 2:n - o += div(i,p)*(strides[k]-size[k-1]*strides[k-1]) - p *= size[k] + o += (magic[k]*i>>>32)*coefs[k] end return o end v2a{T,n}(v::ArrayView{T,n}, I::NTuple{n,Int}) = v2a(v.strides, v.origin, I) -v2a{T,n}(v::ArrayView{T,n}, i::Int) = v2a(v.size, v.strides, v.origin, i) +v2a{T,n}(v::ArrayView{T,n}, i::Int) = v2a(v.magic, v.coefs, v.origin, i) getindex(v::ArrayView, i::Int) = @inbounds return v.array[v2a(v,i)] getindex(v::ArrayView, I::Int...) = @inbounds return v.array[v2a(v,I)] @@ -47,16 +46,54 @@ function ArrayView{n}(a::Array, R::NTuple{n,Ranges}) prod *= size(a,k) return stride end - ArrayView(a,map(length,R),strides,origin) + sizes = map(length,R) + prod = 1 + magic = ntuple(n) do k + m = div(1<<32+prod-1,prod) + prod *= sizes[k] + return m + end + coefs = ntuple(n) do k + k == 1 ? strides[k] : strides[k]-sizes[k-1]*strides[k-1] + end + ArrayView(a,sizes,strides,origin,magic,coefs) end -function ArrayView{T,n}(v::ArrayView{T,n}, R::NTuple{n,Ranges}) - prod = 1 - origin = v.origin - for k = 1:n - origin += prod*(first(R[k])-1) - prod *= v.size[k] +function repeated_linear_indexing(a,n=1000) + t = zero(eltype(a)) + for _ = 1:n, i = 1:length(a) + t += a[i] + end + return t +end + +using Distributions + +function rand_slice(z) + a = max(1,rand(Binomial(z,1/3))) + b = max(1,rand(Binomial(z,2/3))) + s = max(1,rand(NegativeBinomial(1,1/2))) + r = a:copysign(s,sign(b-a)):b + randbool() ? r : z-r+1 +end + +function benchmark(z=1000000,n=10,m=10) + results = Array(Float64,n,m,3) + for d = 1:n, _ = 1:m + S = max(1,rand(Binomial((2z)^(1/d),0.5),d)) + A = rand(S...) + R = map(rand_slice,size(A)) + # @show size(A), R + a = A[R...] + v = ArrayView(A,R) + s = sub(A,R) + @assert a == v + @assert a == s + @assert v == s + ta = @elapsed repeated_linear_indexing(a) + tv = @elapsed repeated_linear_indexing(v) + ts = @elapsed repeated_linear_indexing(s) + results[d,_,:] = [ta,tv,ts] end - strides = ntuple(n,k->step(R[k])*v.strides[k]) - ArrayView(v.array,map(length,R),strides,origin) + return results end From 091a4ca082c92ad945d1541015d550f85fd1720f Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Sat, 28 Dec 2013 18:57:14 -0500 Subject: [PATCH 12/13] make benchmark array sizes more consistent + deterministic unfortunately, this takes a damned long time to run because the multidimensional linear SubArray indexing is *soooo* slow. --- base/array/view.jl | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/base/array/view.jl b/base/array/view.jl index 499c010eac8b0..e24523f6eb3ab 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -59,41 +59,32 @@ function ArrayView{n}(a::Array, R::NTuple{n,Ranges}) ArrayView(a,sizes,strides,origin,magic,coefs) end -function repeated_linear_indexing(a,n=1000) - t = zero(eltype(a)) - for _ = 1:n, i = 1:length(a) - t += a[i] +function repeated_linear_indexing(a,cap=0.01) + n = 0 + time = 0.0 + total = zero(eltype(a)) + while time < cap + time += @elapsed for i = 1:length(a) + total += a[i] + end + n += 1 end - return t -end - -using Distributions - -function rand_slice(z) - a = max(1,rand(Binomial(z,1/3))) - b = max(1,rand(Binomial(z,2/3))) - s = max(1,rand(NegativeBinomial(1,1/2))) - r = a:copysign(s,sign(b-a)):b - randbool() ? r : z-r+1 + total, time/n end function benchmark(z=1000000,n=10,m=10) results = Array(Float64,n,m,3) - for d = 1:n, _ = 1:m - S = max(1,rand(Binomial((2z)^(1/d),0.5),d)) + for d = 1:n, x = 1:m + S = [ iround(3z^(1/d)+1) for _=1:d ] A = rand(S...) - R = map(rand_slice,size(A)) - # @show size(A), R + R = map(s->2:3:s-1,size(A)) a = A[R...] v = ArrayView(A,R) s = sub(A,R) - @assert a == v - @assert a == s - @assert v == s - ta = @elapsed repeated_linear_indexing(a) - tv = @elapsed repeated_linear_indexing(v) - ts = @elapsed repeated_linear_indexing(s) - results[d,_,:] = [ta,tv,ts] + @show S, size(a), length(a) + results[d,x,1] = repeated_linear_indexing(a)[2] + results[d,x,2] = repeated_linear_indexing(v)[2] + results[d,x,3] = repeated_linear_indexing(s)[2] end return results end From 8a4d07e618289b68fb9cd59515e6d4b8cbf19dfe Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Sun, 29 Dec 2013 00:06:25 -0500 Subject: [PATCH 13/13] reduce default iteration count to get it to turn over fast enough. --- base/array/view.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/array/view.jl b/base/array/view.jl index e24523f6eb3ab..b7f155c73068c 100644 --- a/base/array/view.jl +++ b/base/array/view.jl @@ -72,7 +72,7 @@ function repeated_linear_indexing(a,cap=0.01) total, time/n end -function benchmark(z=1000000,n=10,m=10) +function benchmark(z=100000,n=10,m=10) results = Array(Float64,n,m,3) for d = 1:n, x = 1:m S = [ iround(3z^(1/d)+1) for _=1:d ]