Skip to content

Commit

Permalink
add multi threading (#19)
Browse files Browse the repository at this point in the history
* add multi threading

* update tune & add threads holds

* fix

* fix

* fix multi threading macro

* setup benchmark
  • Loading branch information
Roger-luo authored Apr 24, 2020
1 parent 932c243 commit 656516e
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 49 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
docs/build/
docs/site/
docs/src/examples/*.md
benchmark/Manifest.toml
*.swp
docs/Manifest.toml

Expand Down
9 changes: 9 additions & 0 deletions benchmark/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
BitBasis = "50ba71b6-fa0f-514d-ae9a-0916efc90dcf"
LuxurySparse = "d05aeea4-b7d4-55ac-b691-9e7fabb07ba2"
PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
YaoArrayRegister = "e600142f-9330-5003-8abb-0ebd767abc51"
YaoBase = "a8f54c17-34bc-5a9d-b050-f522fe3f755f"
1 change: 1 addition & 0 deletions benchmark/tune.json

Large diffs are not rendered by default.

156 changes: 107 additions & 49 deletions src/instruct.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# in principal we only allow one to input a tuple of address to instruct
# but most of the instruct function in this file defines a forward method
# for single qubit version with an int as input.
#
#
# In order to make multi threading work, the state vector MUST be named as state

using YaoBase, BitBasis, LuxurySparse, StaticArrays
export instruct!
Expand All @@ -19,6 +22,64 @@ A list of symbol for specialized gates/operators.
const SPECIALIZATION_LIST =
Symbol[:X, :Y, :Z, :S, :T, :Sdag, :Tdag, :H, :SWAP, :PSWAP, :CPHASE]


const THREAD_THRESHOLD = 10

# generates the threading expression
macro threads(ex)
if Threads.nthreads() == 1 # do nothing in single threading mode
return esc(:(@inbounds $ex))
else
return esc(quote
if log2dim1(state) < THREAD_THRESHOLD
@inbounds $ex
else
@inbounds Threads.@threads $ex
end
end)
end
end

# to avoid potential ambiguity, we limit them to tuple for now
# but they only has to be an iterator over integers
const Locations{T} = NTuple{N, T} where N
const BitConfigs{T} = NTuple{N, T} where N

function YaoBase.instruct!(
state::AbstractVecOrMat{T1},
operator::AbstractMatrix{T2},
locs::Tuple{},
control_locs::NTuple{C, Int}=(),
control_bits::NTuple{C, Int}=()) where {T1, T2, M, C}
return state
end

function YaoBase.instruct!(
state::AbstractVecOrMat{T1},
operator::AbstractMatrix{T2},
locs::NTuple{M, Int},
control_locs::NTuple{C, Int}=(),
control_bits::NTuple{C, Int}=()) where {T1, T2, M, C}

@warn "Element Type Mismatch: register $(T1), operator $(T2). Converting operator to match, this may cause performance issue"
return instruct!(state, copyto!(similar(operator, T1), operator), locs, control_locs, control_bits)
end

function YaoBase.instruct!(state::AbstractVecOrMat{T1}, U1::AbstractMatrix{T2}, loc::Int) where {T1, T2}
@warn "Element Type Mismatch: register $(T1), operator $(T2). Converting operator to match, this may cause performance issue"
return instruct!(state, copyto!(similar(U1, T1), U1), loc)
end

function YaoBase.instruct!(state::AbstractVecOrMat{T1}, U1::SDPermMatrix{T2}, loc::Int) where {T1, T2}
@warn "Element Type Mismatch: register $(T1), operator $(T2). Converting operator to match, this may cause performance issue"
return instruct!(state, copyto!(similar(U1, T1), U1), loc)
end

function YaoBase.instruct!(state::AbstractVecOrMat{T1}, U1::SDDiagonal{T2}, loc::Int) where {T1, T2}
@warn "Element Type Mismatch: register $(T1), operator $(T2). Converting operator to match, this may cause performance issue"
return instruct!(state, copyto!(similar(U1, T1), U1), loc)
end

function _prepare_instruct(
state,
U,
Expand Down Expand Up @@ -109,9 +170,9 @@ function YaoBase.instruct!(
end

@inline function instruct_kernel(state::AbstractVecOrMat, loc, step1, step2, a, b, c, d)
for j = 0:step2:size(state, 1)-step1
@inbounds for i = j+1:j+step1
u1rows!(state, i, i + step1, a, b, c, d)
@threads for j in 0:step2:size(state, 1)-step1
@inbounds for i in j+1:j+step1
u1rows!(state, i, i+step1, a, b, c, d)
end
end
return state
Expand All @@ -130,12 +191,14 @@ function YaoBase.instruct!(
) where {T1,T2}
U1.perm[1] == 1 && return instruct!(state, Diagonal(U1), loc)
mask = bmask(loc)
b, c = T1.(U1.vals)
step = 1 << (loc - 1)
step_2 = 1 << loc
for j = 0:step_2:size(state, 1)-step
@inbounds for i = j+1:j+step
swaprows!(state, i, i + step, c, b)
b, c = U1.vals
step = 1<<(loc-1)
step_2 = 1<<loc
if log2dim1(state) < THREAD_THRESHOLD
end
@threads for j in 0:step_2:size(state, 1)-step
@inbounds for i in j+1:j+step
swaprows!(state, i, i+step, c, b)
end
end
return state
Expand All @@ -156,8 +219,8 @@ function YaoBase.instruct!(
a, d = T1.(U1.diag)
step = 1 << (loc - 1)
step_2 = 1 << loc
for j = 0:step_2:size(state, 1)-step
@inbounds for i = j+1:j+step
@threads for j in 0:step_2:size(state, 1)-step
@inbounds for i in j+1:j+step
mulrow!(state, i, a)
mulrow!(state, i + step, d)
end
Expand All @@ -174,7 +237,7 @@ function YaoBase.instruct!(
) where {N}
mask = bmask(locs)
do_mask = bmask(first(locs))
for b in basis(state)
@threads for b in basis(state)
@inbounds if anyone(b, do_mask)
i = b + 1
i_ = flip(b, mask) + 1
Expand Down Expand Up @@ -203,7 +266,7 @@ function YaoBase.instruct!(
bit_parity = iseven(length(locs)) ? 1 : -1
factor = T(-im)^length(locs)

@inbounds for b in basis(Int, state)
@threads for b in basis(Int, state)
if anyone(b, do_mask)
i = b + 1
i_ = flip(b, mask) + 1
Expand All @@ -221,7 +284,7 @@ function YaoBase.instruct!(
locs::NTuple{N,Int},
) where {T,N}
mask = bmask(Int, locs)
@inbounds for b in basis(Int, state)
@threads for b in basis(Int, state)
if isodd(count_ones(b & mask))
mulrow!(state, b + 1, -1)
end
Expand All @@ -239,8 +302,8 @@ for (G, FACTOR) in zip(
locs::NTuple{N,Int},
) where {T,N}
mask = bmask(Int, locs)
@inbounds for b in basis(Int, state)
mulrow!(state, b + 1, $FACTOR^count_ones(b & mask))
@threads for b in basis(Int, state)
mulrow!(state, b+1, $FACTOR^count_ones(b & mask))
end
return state
end
Expand All @@ -266,10 +329,10 @@ for (G, FACTOR) in zip(
locs::Int,
) where {T}
mask = bmask(locs)
step = 1 << (locs - 1)
step_2 = 1 << locs
@inbounds for j = 0:step_2:size(state, 1)-step
for i = j+step+1:j+step_2
step = 1<<(locs-1)
step_2 = 1<<locs
@threads for j in 0:step_2:size(state, 1)-step
for i in j+step+1:j+step_2
mulrow!(state, i, $FACTOR)
end
end
Expand Down Expand Up @@ -339,7 +402,7 @@ function YaoBase.instruct!(

ctrl = controller((control_locs..., locs[1]), (control_bits..., 0))
mask2 = bmask(locs)
@inbounds for b in basis(state)
@threads for b in basis(state)
if ctrl(b)
i = b + 1
i_ = flip(b, mask2) + 1
Expand All @@ -359,8 +422,7 @@ function YaoBase.instruct!(

ctrl = controller((control_locs..., locs[1]), (control_bits..., 0))
mask2 = bmask(locs)
@inbounds for b in basis(state)
local i_::Int
@threads for b in basis(state)
if ctrl(b)
i = b + 1
i_ = flip(b, mask2) + 1
Expand All @@ -383,7 +445,7 @@ for (G, FACTOR) in zip(
) where {T,N1,N2,N3}

ctrl = controller([control_locs..., locs[1]], [control_bits..., 1])
@inbounds for b in basis(state)
@threads for b in basis(state)
if ctrl(b)
mulrow!(state, b + 1, $FACTOR)
end
Expand Down Expand Up @@ -427,8 +489,8 @@ function YaoBase.instruct!(
step = 1 << (control_locs - 1)
step_2 = 1 << control_locs
start = control_bits == 1 ? step : 0
for j = start:step_2:size(state, 1)-step+start
for b = j:j+step-1
@threads for j in start:step_2:size(state, 1)-step+start
for b in j:j+step-1
@inbounds if allone(b, mask2)
i = b + 1
i_ = flip(b, mask2) + 1
Expand All @@ -440,20 +502,16 @@ function YaoBase.instruct!(
end

function YaoBase.instruct!(
state::AbstractVecOrMat{T},
::Val{:Y},
loc::Int,
control_locs::Int,
control_bits::Int,
) where {T}

mask2 = bmask(loc)
mask = bmask(control_locs, loc)
step = 1 << (control_locs - 1)
step_2 = 1 << control_locs
start = control_bits == 1 ? step : 0
for j = start:step_2:size(state, 1)-step+start
for b = j:j+step-1
state::AbstractVecOrMat{T},
::Val{:Y}, loc::Int,
control_locs::Int, control_bits::Int) where T

mask2 = bmask(loc); mask = bmask(control_locs, loc)
step = 1<<(control_locs-1)
step_2 = 1<<control_locs
start = control_bits==1 ? step : 0
@threads for j in start:step_2:size(state, 1)-step+start
for b in j:j+step-1
@inbounds if allone(b, mask2)
i = b + 1
i_ = flip(b, mask2) + 1
Expand Down Expand Up @@ -486,9 +544,9 @@ for (G, FACTOR) in zip(
step = 1 << (control_locs - 1)
step_2 = 1 << control_locs
start = control_bits == 1 ? step : 0
@inbounds for j = start:step_2:size(state, 1)-step+start
for i = j+1:j+step
if allone(i - 1, mask2)
@threads for j in start:step_2:size(state, 1)-step+start
for i in j+1:j+step
if allone(i-1, mask2)
mulrow!(state, i, $FACTOR)
end
end
Expand All @@ -506,10 +564,10 @@ function YaoBase.instruct!(

mask1 = bmask(locs[1])
mask2 = bmask(locs[2])
mask12 = mask1 | mask2
@inbounds for b in basis(state)
if b & mask1 == 0 && b & mask2 == mask2
i = b + 1
mask12 = mask1|mask2
@threads for b in basis(state)
if b&mask1==0 && b&mask2==mask2
i = b+1
i_ = b mask12 + 1
swaprows!(state, i, i_)
end
Expand All @@ -529,7 +587,7 @@ function YaoBase.instruct!(
a = T(cos(theta / 2))
c = T(-im * sin(theta / 2))
e = T(exp(-im / 2 * theta))
@inbounds for b in basis(state)
@threads for b in basis(state)
if b & mask1 == 0
i = b + 1
i_ = b mask12 + 1
Expand Down Expand Up @@ -558,7 +616,7 @@ function YaoBase.instruct!(
a = T(cos(theta / 2))
c = T(-im * sin(theta / 2))
e = T(exp(-im / 2 * theta))
@inbounds for b in itercontrol(
@threads for b in itercontrol(
log2i(size(state, 1)),
collect(Int, control_locs),
collect(Int, control_bits),
Expand Down

0 comments on commit 656516e

Please sign in to comment.