diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 0000000..9613e05 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1 @@ +style = "yas" \ No newline at end of file diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml new file mode 100644 index 0000000..1fa4385 --- /dev/null +++ b/.github/workflows/FormatCheck.yml @@ -0,0 +1,42 @@ +# modified from https://github.com/SciML/DiffEqDocs.jl/tree/master/.github/workflows +name: format-check + +on: + push: + branches: + - 'main' + tags: '*' + pull_request: + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + julia-version: [1] + julia-arch: [x86] + os: [ubuntu-latest] + steps: + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.julia-version }} + + - uses: actions/checkout@v1 + - name: Install JuliaFormatter and format + # This will use the latest version by default but you can set the version like so: + # + # julia -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter", version="0.13.0"))' + run: | + julia -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter"))' + julia -e 'using JuliaFormatter; format(".", verbose=true)' + - name: Format check + run: | + julia -e ' + out = Cmd(`git diff --name-only`) |> read |> String + if out == "" + exit(0) + else + @error "Some files have not been formatted !!!" + write(stdout, out) + exit(1) + end' \ No newline at end of file diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index d5b8871..85049d8 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -23,7 +23,7 @@ K = laplace_matrix(X, Y) irange = 1:N jrange = 1:N -compressors = [PartialACA(rtol = rtol), ACA(rtol = rtol), TSVD(rtol = rtol)] +compressors = [PartialACA(; rtol=rtol), ACA(; rtol=rtol), TSVD(; rtol=rtol)] for comp in compressors SUITE["Compressors"][string(comp)] = @benchmarkable $comp($K, $irange, $jrange) @@ -53,26 +53,34 @@ comp = HMatrices.PartialACA(; rtol) step = 1.75 * π * radius / sqrt(N) k = 2 * π / (10 * step) # 10 pts per wavelength -kernels = [ - ("Laplace", laplace_matrix(X, X), true), - ("Helmholtz", helmholtz_matrix(X, X, k), true), - ("LaplaceVec", LaplaceMatrixVec(Xp,Xp), false), - ("HelmholtzVec", HelmholtzMatrixVec(Xp,Xp,k),false) -] +kernels = [("Laplace", laplace_matrix(X, X), true), + ("Helmholtz", helmholtz_matrix(X, X, k), true), + ("LaplaceVec", LaplaceMatrixVec(Xp, Xp), false), + ("HelmholtzVec", HelmholtzMatrixVec(Xp, Xp, k), false)] for (name, K, p) in kernels SUITE[name] = BenchmarkGroup([name, N]) # bench assemble - SUITE[name]["assemble cpu"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; adm = $adm, comp = $comp, threads = false, distributed = false, global_index = $p) - SUITE[name]["assemble threads"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; adm = $adm, comp = $comp, threads = true, distributed = false, global_index = $p) + SUITE[name]["assemble cpu"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; adm=$adm, + comp=$comp, threads=false, + distributed=false, + global_index=$p) + SUITE[name]["assemble threads"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; + adm=$adm, comp=$comp, + threads=true, + distributed=false, + global_index=$p) # SUITE[name]["assemble procs"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; adm = $adm, comp = $comp, threads = false, distributed = true, global_index = $p) # bench gemv only for regular case since the vectorized case should be the same if p x = rand(eltype(K), N) y = zero(x) - H = assemble_hmat(K, Xclt, Xclt; adm, comp, threads = true, distributed = false, global_index = p) - SUITE[name]["gemv cpu"] = @benchmarkable mul!($y, $H, $x, $1, $0; threads = false, global_index = $p) - SUITE[name]["gemv threads"] = @benchmarkable mul!($y, $H, $x, $1, $0; threads = true, global_index = $p) - SUITE[name]["lu"] = @benchmarkable lu!($H;rank=5) + H = assemble_hmat(K, Xclt, Xclt; adm, comp, threads=true, distributed=false, + global_index=p) + SUITE[name]["gemv cpu"] = @benchmarkable mul!($y, $H, $x, $1, $0; threads=false, + global_index=$p) + SUITE[name]["gemv threads"] = @benchmarkable mul!($y, $H, $x, $1, $0; threads=true, + global_index=$p) + SUITE[name]["lu"] = @benchmarkable lu!($H; rank=5) end end diff --git a/benchmark/make.jl b/benchmark/make.jl index 3de6453..3ed8067 100644 --- a/benchmark/make.jl +++ b/benchmark/make.jl @@ -1,20 +1,19 @@ using PkgBenchmark -function postprocess(results,fname="benchs") +function postprocess(results, fname="benchs") # writeresults(joinpath(path,fname),results) dir = @__DIR__ - path = joinpath(dir,"../docs/src") - export_markdown(joinpath(path,fname*".md"),results) + path = joinpath(dir, "../docs/src") + return export_markdown(joinpath(path, fname * ".md"), results) end -env = Dict("JULIA_NUM_THREADS" =>4, - "OPEN_BLAS_NUM_THREADS" => 1 - ) +env = Dict("JULIA_NUM_THREADS" => 4, + "OPEN_BLAS_NUM_THREADS" => 1) -config = BenchmarkConfig(;juliacmd=`julia -O3`,env) +config = BenchmarkConfig(; juliacmd=`julia -O3`, env) -dir = @__DIR__ -retune = false -results = benchmarkpkg("HMatrices",config;retune) +dir = @__DIR__ +retune = false +results = benchmarkpkg("HMatrices", config; retune) postprocess(results) diff --git a/docs/make.jl b/docs/make.jl index 00b3c43..fd14f72 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -4,26 +4,21 @@ using Documenter DocMeta.setdocmeta!(HMatrices, :DocTestSetup, :(using HMatrices); recursive=true) makedocs(; - modules=[HMatrices], - authors="Luiz M. Faria and contributors", - repo="https://github.com/WaveProp/HMatrices.jl/blob/{commit}{path}#{line}", - sitename="HMatrices.jl", - format=Documenter.HTML(; - prettyurls=get(ENV, "CI", "false") == "true", - canonical="https://WaveProp.github.io/HMatrices.jl", - assets=String[], - ), - pages=[ - "Getting started" => "index.md", - "Kernel matrices" => "kernelmatrix.md", - "Distributed HMatrix" => "dhmatrix.md", - "Notebooks" => "notebooks.md", - "Benchmarks" => ["benchs.md"], - "References" => "references.md" - ], -) + modules=[HMatrices], + authors="Luiz M. Faria and contributors", + repo="https://github.com/WaveProp/HMatrices.jl/blob/{commit}{path}#{line}", + sitename="HMatrices.jl", + format=Documenter.HTML(; + prettyurls=get(ENV, "CI", "false") == "true", + canonical="https://WaveProp.github.io/HMatrices.jl", + assets=String[]), + pages=["Getting started" => "index.md", + "Kernel matrices" => "kernelmatrix.md", + "Distributed HMatrix" => "dhmatrix.md", + "Notebooks" => "notebooks.md", + "Benchmarks" => ["benchs.md"], + "References" => "references.md"]) deploydocs(; - repo="github.com/WaveProp/HMatrices.jl", - devbranch="main" -) + repo="github.com/WaveProp/HMatrices.jl", + devbranch="main") diff --git a/docs/notebooks/vectorization.jl b/docs/notebooks/vectorization.jl index 3ed26a8..7c55e3c 100644 --- a/docs/notebooks/vectorization.jl +++ b/docs/notebooks/vectorization.jl @@ -6,122 +6,125 @@ using InteractiveUtils # ╔═╡ 2b4aa264-4145-11ec-0d24-ab437ba0f84e begin - import Pkg + using Pkg: Pkg # activate the shared project environment Pkg.activate(Base.current_project()) - using HMatrices, Plots, PlutoUI, LinearAlgebra, StaticArrays, WavePropBase, BenchmarkTools, LoopVectorization - using WavePropBase: PlotPoints, PlotTree, root_elements + using HMatrices, Plots, PlutoUI, LinearAlgebra, StaticArrays, WavePropBase, + BenchmarkTools, LoopVectorization + using WavePropBase: PlotPoints, PlotTree, root_elements end # ╔═╡ 9efa3b28-900c-4515-8aba-617bce2ddc68 -PlutoUI.TableOfContents(;depth=2) +PlutoUI.TableOfContents(; depth=2) # ╔═╡ d1917856-64d1-4dab-9e05-40b1152217d2 begin - const Point3D = SVector{3,Float64} - m = 10_000 - X = Y = [Point3D(sin(θ)cos(ϕ),sin(θ)*sin(ϕ),cos(θ)) for (θ,ϕ) in zip(π*rand(m),2π*rand(m))] + const Point3D = SVector{3,Float64} + m = 10_000 + X = Y = [Point3D(sin(θ)cos(ϕ), sin(θ) * sin(ϕ), cos(θ)) + for (θ, ϕ) in zip(π * rand(m), 2π * rand(m))] end # ╔═╡ bdfebc7a-a7a6-4fa6-8884-b4f86dc55cb3 begin - Xclt = Yclt = ClusterTree(X) - plot(PlotTree(),Xclt,mc=:red) + Xclt = Yclt = ClusterTree(X) + plot(PlotTree(), Xclt; mc=:red) end # ╔═╡ 306d493b-a558-4e1d-bad6-5c295da79e82 begin - function G(x,y) - d = norm(x-y) + 1e-8 - inv(4π*d) - end - K = KernelMatrix(G,X,Y) + function G(x, y) + d = norm(x - y) + 1e-8 + return inv(4π * d) + end + K = KernelMatrix(G, X, Y) end # ╔═╡ 99041c6a-c12f-4a09-8bfd-cf769fd258a6 -H = assemble_hmat(K,Xclt,Yclt;threads=false) +H = assemble_hmat(K, Xclt, Yclt; threads=false) # ╔═╡ 02d89923-e478-4778-8b80-eaa3357a6a8c -b = @benchmark assemble_hmat($K,$Xclt,$Yclt;threads=false) +b = @benchmark assemble_hmat($K, $Xclt, $Yclt; threads=false) # ╔═╡ fca099bf-25c0-4ee8-b3f0-c042c4e5fde3 -begin - Xp = Yp = root_elements(Xclt) - Kp = KernelMatrix(G,Xp,Yp) - Hp = assemble_hmat(Kp,Xclt,Yclt;threads=false,global_index=false) +begin + Xp = Yp = root_elements(Xclt) + Kp = KernelMatrix(G, Xp, Yp) + Hp = assemble_hmat(Kp, Xclt, Yclt; threads=false, global_index=false) end # ╔═╡ 37ed939e-1738-4ade-85d7-526c3c4ab536 -bp = @benchmark assemble_hmat($Kp,Xclt,Yclt;threads=false,global_index=false) +bp = @benchmark assemble_hmat($Kp, Xclt, Yclt; threads=false, global_index=false) # ╔═╡ c6729646-0b6c-4ea2-b692-62e1e8f6b97c begin - struct LaplaceMatrixVec <: AbstractKernelMatrix{Float64} - X::Matrix{Float64} - Y::Matrix{Float64} - end - Base.size(K::LaplaceMatrixVec) = size(K.X,1), size(K.Y,1) - - # convenience constructor based on Vector of StaticVector - function LaplaceMatrixVec(_X::Vector{Point3D},_Y::Vector{Point3D}) - X = reshape(reinterpret(Float64,_X), 3,:) |> transpose |> collect - Y = reshape(reinterpret(Float64,_Y), 3,:) |> transpose |> collect - LaplaceMatrixVec(X,Y) - end - - function Base.getindex(K::LaplaceMatrixVec,i::Int,j::Int) - d2 = (K.X[i,1] - K.Y[j,1])^2 + (K.X[i,2] - K.Y[j,2])^2 + (K.X[i,3] - K.Y[j,3])^2 - d = sqrt(d2) + 1e-8 - return inv(4π*d) - end - function Base.getindex(K::LaplaceMatrixVec,I::UnitRange,J::UnitRange) - T = eltype(K) - m = length(I) - n = length(J) - Xv = view(K.X,I,:) - Yv = view(K.Y,J,:) - out = Matrix{T}(undef,m,n) - @turbo for j in 1:n - for i in 1:m - d2 = (Xv[i,1] - Yv[j,1])^2 - d2 += (Xv[i,2] - Yv[j,2])^2 - d2 += (Xv[i,3] - Yv[j,3])^2 - d = sqrt(d2) + 1e-8 - out[i,j] = inv(4*π*d) - end - end - return out - end - function Base.getindex(K::LaplaceMatrixVec,I::UnitRange,j::Int) - T = eltype(K) - m = length(I) - Xv = view(K.X,I,:) - out = Vector{T}(undef,m) - @turbo for i in 1:m - d2 = (Xv[i,1] - K.Y[j,1])^2 - d2 += (Xv[i,2] - K.Y[j,2])^2 - d2 += (Xv[i,3] - K.Y[j,3])^2 - d = sqrt(d2) + 1e-8 - out[i] = inv(4*π*d) - end - return out - end - function Base.getindex(adjK::Adjoint{<:Any,<:LaplaceMatrixVec},I::UnitRange,j::Int) - K = parent(adjK) - T = eltype(K) - m = length(I) - Yv = view(K.Y,I,:) - out = Vector{T}(undef,m) - @turbo for i in 1:m - d2 = (Yv[i,1] - K.X[j,1])^2 - d2 += (Yv[i,2] - K.X[j,2])^2 - d2 += (Yv[i,3] - K.X[j,3])^2 - d = sqrt(d2) + 1e-8 - out[i] = inv(4*π*d) - end - return out - end + struct LaplaceMatrixVec <: AbstractKernelMatrix{Float64} + X::Matrix{Float64} + Y::Matrix{Float64} + end + Base.size(K::LaplaceMatrixVec) = size(K.X, 1), size(K.Y, 1) + + # convenience constructor based on Vector of StaticVector + function LaplaceMatrixVec(_X::Vector{Point3D}, _Y::Vector{Point3D}) + X = collect(transpose(reshape(reinterpret(Float64, _X), 3, :))) + Y = collect(transpose(reshape(reinterpret(Float64, _Y), 3, :))) + return LaplaceMatrixVec(X, Y) + end + + function Base.getindex(K::LaplaceMatrixVec, i::Int, j::Int) + d2 = (K.X[i, 1] - K.Y[j, 1])^2 + (K.X[i, 2] - K.Y[j, 2])^2 + + (K.X[i, 3] - K.Y[j, 3])^2 + d = sqrt(d2) + 1e-8 + return inv(4π * d) + end + function Base.getindex(K::LaplaceMatrixVec, I::UnitRange, J::UnitRange) + T = eltype(K) + m = length(I) + n = length(J) + Xv = view(K.X, I, :) + Yv = view(K.Y, J, :) + out = Matrix{T}(undef, m, n) + @turbo for j in 1:n + for i in 1:m + d2 = (Xv[i, 1] - Yv[j, 1])^2 + d2 += (Xv[i, 2] - Yv[j, 2])^2 + d2 += (Xv[i, 3] - Yv[j, 3])^2 + d = sqrt(d2) + 1e-8 + out[i, j] = inv(4 * π * d) + end + end + return out + end + function Base.getindex(K::LaplaceMatrixVec, I::UnitRange, j::Int) + T = eltype(K) + m = length(I) + Xv = view(K.X, I, :) + out = Vector{T}(undef, m) + @turbo for i in 1:m + d2 = (Xv[i, 1] - K.Y[j, 1])^2 + d2 += (Xv[i, 2] - K.Y[j, 2])^2 + d2 += (Xv[i, 3] - K.Y[j, 3])^2 + d = sqrt(d2) + 1e-8 + out[i] = inv(4 * π * d) + end + return out + end + function Base.getindex(adjK::Adjoint{<:Any,<:LaplaceMatrixVec}, I::UnitRange, j::Int) + K = parent(adjK) + T = eltype(K) + m = length(I) + Yv = view(K.Y, I, :) + out = Vector{T}(undef, m) + @turbo for i in 1:m + d2 = (Yv[i, 1] - K.X[j, 1])^2 + d2 += (Yv[i, 2] - K.X[j, 2])^2 + d2 += (Yv[i, 3] - K.X[j, 3])^2 + d = sqrt(d2) + 1e-8 + out[i] = inv(4 * π * d) + end + return out + end end # ╔═╡ 85de3038-88f8-442e-a2cd-5462f9552d12 @@ -209,9 +212,9 @@ The `LaplaceMatrix` struct above has a vectorized implementatio of `getindex` fo """ # ╔═╡ 71208d87-a9ed-410e-bad1-f770fc84d4db -begin - Kv = LaplaceMatrixVec(Xp,Yp) - Hv = assemble_hmat(Kv,Xclt,Yclt;global_index=false,threads=false) +begin + Kv = LaplaceMatrixVec(Xp, Yp) + Hv = assemble_hmat(Kv, Xclt, Yclt; global_index=false, threads=false) end # ╔═╡ 6eaa63e3-755d-482a-a113-98312b6f6fb1 @@ -220,7 +223,7 @@ Benchmarking the new implementation yields: """ # ╔═╡ 59fda22e-0547-4845-9207-31c7b526d2f6 -bv = @benchmark assemble_hmat($Kv,Xclt,Yclt;threads=false,global_index=false) +bv = @benchmark assemble_hmat($Kv, Xclt, Yclt; threads=false, global_index=false) # ╔═╡ 0b967aaf-271c-4153-acd6-2e5acff8dc75 md""" @@ -228,7 +231,7 @@ Comparing the times we see that the vectorized implementation is the fastest, an """ # ╔═╡ afc737f1-3523-413f-a73a-96a909580047 -minimum(b),minimum(bp),minimum(bv) +minimum(b), minimum(bp), minimum(bv) # ╔═╡ 44f08eda-b5d3-40d0-aa8b-3cae9e137854 md""" @@ -237,14 +240,14 @@ Finally, not that as expected all of these yield the same matrix, as indicated b # ╔═╡ 5fd9b390-ea5f-425c-a624-0cd4a146a897 begin - x = rand(m) - y = H*x - yv = Hv*x - yp = Hp*x - er = max(norm(y-yv)/norm(y),norm(y-yp)/norm(y)) - md""" - Maximum error: $er - """ + x = rand(m) + y = H * x + yv = Hv * x + yp = Hp * x + er = max(norm(y - yv) / norm(y), norm(y - yp) / norm(y)) + md""" + Maximum error: $er + """ end # ╔═╡ Cell order: diff --git a/src/HMatrices.jl b/src/HMatrices.jl index f877284..06a4de4 100644 --- a/src/HMatrices.jl +++ b/src/HMatrices.jl @@ -1,10 +1,10 @@ module HMatrices -const PROJECT_ROOT = pkgdir(HMatrices) +const PROJECT_ROOT = pkgdir(HMatrices) using StaticArrays using LinearAlgebra -using Statistics:median +using Statistics: median using TimerOutputs using Printf using RecipesBase @@ -12,7 +12,7 @@ using Distributed using Base.Threads using AbstractTrees: print_tree -import AbstractTrees +using AbstractTrees: AbstractTrees import LinearAlgebra: mul!, lu!, lu, LU, ldiv!, rdiv!, axpy!, rank, rmul!, lmul! import Base: Matrix, adjoint, parent @@ -55,28 +55,28 @@ include("triangular.jl") include("lu.jl") export - # types (re-exported) - CardinalitySplitter, - ClusterTree, - DyadicSplitter, - GeometricSplitter, - GeometricMinimalSplitter, - HyperRectangle, - # abstract types - AbstractKernelMatrix, - # types - HMatrix, - KernelMatrix, - StrongAdmissibilityStd, - WeakAdmissibilityStd, - PartialACA, - ACA, - TSVD, - # functions - compression_ratio, - print_tree, - assemble_hmat, - # macros - @hprofile +# types (re-exported) + CardinalitySplitter, + ClusterTree, + DyadicSplitter, + GeometricSplitter, + GeometricMinimalSplitter, + HyperRectangle, +# abstract types + AbstractKernelMatrix, +# types + HMatrix, + KernelMatrix, + StrongAdmissibilityStd, + WeakAdmissibilityStd, + PartialACA, + ACA, + TSVD, +# functions + compression_ratio, + print_tree, + assemble_hmat, +# macros + @hprofile end diff --git a/src/addition.jl b/src/addition.jl index 30d1302..2006fb3 100644 --- a/src/addition.jl +++ b/src/addition.jl @@ -15,100 +15,100 @@ simply be assigned `X`. This means that after the call `axpy(a,X,Y)`, the object `Y` is in a *dirty* state (see [`isclean`][@ref]) and usually a call to [`flush_to_leaves!`](@ref) or [`flush_to_children!`](@ref) follows. """ -function axpy!(a,X::Matrix,Y::RkMatrix) - axpy!(a,RkMatrix(X),Y) +function axpy!(a, X::Matrix, Y::RkMatrix) + return axpy!(a, RkMatrix(X), Y) end # 1.3 -function axpy!(a,X::Matrix,Y::HMatrix) +function axpy!(a, X::Matrix, Y::HMatrix) if hasdata(Y) - axpy!(a,X,data(Y)) + axpy!(a, X, data(Y)) else - setdata!(Y,a*X) + setdata!(Y, a * X) end return Y end # 2.1 -function axpy!(a,X::RkMatrix,Y::Matrix) - axpy!(a,Matrix(X),Y) +function axpy!(a, X::RkMatrix, Y::Matrix) + return axpy!(a, Matrix(X), Y) end #2.2 -function axpy!(a,X::RkMatrix,Y::RkMatrix) - Y.A = hcat(a*X.A,Y.A) - Y.B = hcat(X.B,Y.B) +function axpy!(a, X::RkMatrix, Y::RkMatrix) + Y.A = hcat(a * X.A, Y.A) + Y.B = hcat(X.B, Y.B) return Y end # 2.3 -function axpy!(a,X::RkMatrix,Y::HMatrix) +function axpy!(a, X::RkMatrix, Y::HMatrix) if hasdata(Y) - axpy!(a,X,data(Y)) + axpy!(a, X, data(Y)) else - setdata!(Y,a*X) + setdata!(Y, a * X) end return Y end #3.1 -function axpy!(a,X::HMatrix,Y::Matrix) +function axpy!(a, X::HMatrix, Y::Matrix) @debug "calling axpy! with `X` and HMatrix and `Y` a Matrix" shift = pivot(X) .- 1 for block in AbstractTrees.PreOrderDFS(X) irange = rowrange(block) .- shift[1] jrange = colrange(block) .- shift[2] if hasdata(block) - axpy!(a,data(block),view(Y,irange,jrange)) + axpy!(a, data(block), view(Y, irange, jrange)) end end return Y end # 3.2 -function axpy!(a,X::HMatrix,Y::RkMatrix) +function axpy!(a, X::HMatrix, Y::RkMatrix) @debug "calling axpby! with `X` and HMatrix and `Y` an RkMatrix" # FIXME: inneficient implementation due to conversion from HMatrix to # Matrix. Does it really matter? I don't think this function should be # called. - axpy!(a,Matrix(X;global_index=false),Y) + return axpy!(a, Matrix(X; global_index=false), Y) end # 3.3 -function axpy!(a,X::HMatrix,Y::HMatrix) +function axpy!(a, X::HMatrix, Y::HMatrix) # TODO: assumes X and Y have the same structure. How to reinforce this? if hasdata(X) if hasdata(Y) - axpy!(a,data(X),Y) + axpy!(a, data(X), Y) else - setdata!(Y,a*data(X)) + setdata!(Y, a * data(X)) end end @assert size(children(X)) == size(children(Y)) "adding hierarchical matrices requires identical block structure" - for (bx,by) in zip(children(X),children(Y)) - axpy!(a,bx,by) + for (bx, by) in zip(children(X), children(Y)) + axpy!(a, bx, by) end return Y end # add a unifor scaling to an HMatrix return an HMatrix -function axpy!(a,X::UniformScaling,Y::HMatrix) +function axpy!(a, X::UniformScaling, Y::HMatrix) @assert isclean(Y) if hasdata(Y) d = data(Y) @assert d isa Matrix n = min(size(d)...) - for i=1:n - d[i,i] += a*X.λ + for i in 1:n + d[i, i] += a * X.λ end else n = min(blocksize(Y)...) - for i=1:n - axpy!(a,X,children(Y)[i,i]) + for i in 1:n + axpy!(a, X, children(Y)[i, i]) end end return Y end -Base.:(+)(X::UniformScaling,Y::HMatrix) = axpy!(true,X,deepcopy(Y)) -Base.:(+)(X::HMatrix,Y::UniformScaling) = Y+X +Base.:(+)(X::UniformScaling, Y::HMatrix) = axpy!(true, X, deepcopy(Y)) +Base.:(+)(X::HMatrix, Y::UniformScaling) = Y + X diff --git a/src/clustertree.jl b/src/clustertree.jl index bfa3e9b..6b80d22 100644 --- a/src/clustertree.jl +++ b/src/clustertree.jl @@ -21,8 +21,8 @@ mutable struct ClusterTree{N,T} buffer::Vector{Int} children::Vector{ClusterTree{N,T}} parent::ClusterTree{N,T} - function ClusterTree(els::Vector{SVector{N,T}},container,loc_idxs, - loc2glob,buffer,children,parent) where {N,T} + function ClusterTree(els::Vector{SVector{N,T}}, container, loc_idxs, + loc2glob, buffer, children, parent) where {N,T} clt = new{N,T}(els, container, loc_idxs, loc2glob, buffer) clt.children = isnothing(children) ? Vector{typeof(clt)}() : children clt.parent = isnothing(parent) ? clt : parent @@ -108,16 +108,16 @@ strategy encoded in `splitter`. If `copy_elements` is set to false, the during the tree construction. """ function ClusterTree(elements, splitter=CardinalitySplitter(); - copy_elements=true, - threads=false) + copy_elements=true, + threads=false) copy_elements && (elements = deepcopy(elements)) bbox = bounding_box(elements) n = length(elements) irange = 1:n loc2glob = collect(irange) - buffer = collect(irange) + buffer = collect(irange) children = nothing - parent = nothing + parent = nothing #build the root, then recurse root = ClusterTree(elements, bbox, irange, loc2glob, buffer, children, parent) _build_cluster_tree!(root, splitter, threads) @@ -142,8 +142,6 @@ function _build_cluster_tree!(current_node, splitter, threads, depth=0) return current_node end - - function Base.show(io::IO, tree::ClusterTree{N,T}) where {N,T} return print(io, "ClusterTree with $(length(tree.index_range)) points.") end diff --git a/src/compressor.jl b/src/compressor.jl index 161f6cf..93cd9a2 100644 --- a/src/compressor.jl +++ b/src/compressor.jl @@ -33,24 +33,24 @@ true ``` """ -@Base.kwdef struct ACA +Base.@kwdef struct ACA atol::Float64 = 0 - rank::Int = typemax(Int) - rtol::Float64 = atol>0 || rank 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64)) end -function (aca::ACA)(K,rowtree::ClusterTree,coltree::ClusterTree) +function (aca::ACA)(K, rowtree::ClusterTree, coltree::ClusterTree) irange = index_range(rowtree) jrange = index_range(coltree) - aca(K,irange,jrange) + return aca(K, irange, jrange) end -function (aca::ACA)(K,irange,jrange) - M = K[irange,jrange] # computes the entire matrix. - _aca_full!(M,aca.atol,aca.rank,aca.rtol) +function (aca::ACA)(K, irange, jrange) + M = K[irange, jrange] # computes the entire matrix. + return _aca_full!(M, aca.atol, aca.rank, aca.rtol) end -(comp::ACA)(K::Matrix) = comp(K,1:size(K,1),1:size(K,2)) +(comp::ACA)(K::Matrix) = comp(K, 1:size(K, 1), 1:size(K, 2)) """ _aca_full!(M,atol,rmax,rtol) @@ -60,35 +60,35 @@ full pivoting. The matrix `M` is modified in place. The returned `RkMatrix` has rank at most `rmax`, and is expected to satisfy `|M - R| < max(atol,rtol*|M|)`. """ function _aca_full!(M, atol, rmax, rtol) - Madj = adjoint(M) - T = eltype(M) - m,n = size(M) - A = Vector{Vector{T}}() - B = Vector{Vector{T}}() + Madj = adjoint(M) + T = eltype(M) + m, n = size(M) + A = Vector{Vector{T}}() + B = Vector{Vector{T}}() er = Inf - exact_norm = norm(M,2) # exact norm + exact_norm = norm(M, 2) # exact norm r = 0 # current rank - while er > max(atol,rtol*exact_norm) && r < rmax + while er > max(atol, rtol * exact_norm) && r < rmax I = _aca_full_pivot(M) - i,j = Tuple(I) - δ = M[I] + i, j = Tuple(I) + δ = M[I] if svdvals(δ)[end] == 0 - return RkMatrix(A,B) + return RkMatrix(A, B) else iδ = inv(δ) - col = M[:,j] - adjcol = Madj[:,i] + col = M[:, j] + adjcol = Madj[:, i] for k in eachindex(adjcol) - adjcol[k] = adjcol[k]*adjoint(iδ) + adjcol[k] = adjcol[k] * adjoint(iδ) end r += 1 - push!(A,col) - push!(B,adjcol) - axpy!(-1,col*adjoint(adjcol),M) # M <-- M - col*row' - er = norm(M,2) # exact error + push!(A, col) + push!(B, adjcol) + axpy!(-1, col * adjoint(adjcol), M) # M <-- M - col*row' + er = norm(M, 2) # exact error end end - return RkMatrix(A,B) + return RkMatrix(A, B) end """ @@ -118,26 +118,26 @@ evaluated at every `i,j`. This is usually much faster than [`ACA`](@ref), but due to the pivoting strategy the algorithm may fail in special cases, even when the underlying linear operator is of low rank. """ -@Base.kwdef struct PartialACA +Base.@kwdef struct PartialACA atol::Float64 = 0 - rank::Int = typemax(Int) - rtol::Float64 = atol>0 || rank 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64)) end -function (paca::PartialACA)(K,rowtree::ClusterTree,coltree::ClusterTree) +function (paca::PartialACA)(K, rowtree::ClusterTree, coltree::ClusterTree) # find initial column pivot for partial ACA istart = _aca_partial_initial_pivot(rowtree) irange = index_range(rowtree) jrange = index_range(coltree) - _aca_partial(K,irange,jrange,paca.atol,paca.rank,paca.rtol,istart-irange.start+1) + return _aca_partial(K, irange, jrange, paca.atol, paca.rank, paca.rtol, + istart - irange.start + 1) end -function (paca::PartialACA)(K,irange::UnitRange,jrange::UnitRange) - _aca_partial(K,irange,jrange,paca.atol,paca.rank,paca.rtol) +function (paca::PartialACA)(K, irange::UnitRange, jrange::UnitRange) + return _aca_partial(K, irange, jrange, paca.atol, paca.rank, paca.rtol) end -(paca::PartialACA)(K::Matrix) = paca(K,1:size(K,1),1:size(K,2)) - +(paca::PartialACA)(K::Matrix) = paca(K, 1:size(K, 1), 1:size(K, 2)) """ _aca_partial(K,irange,jrange,atol,rmax,rtol,istart=1) @@ -148,73 +148,73 @@ partial pivoting. The returned `R::RkMatrix` provides an approximation to max(atol,rtol*|M|)`, but this inequality may fail to hold due to the various errors involved in estimating the error and |M|. """ -function _aca_partial(K,irange,jrange,atol,rmax,rtol,istart=1) +function _aca_partial(K, irange, jrange, atol, rmax, rtol, istart=1) Kadj = adjoint(K) # if irange and jrange are Colon, extract the size from `K` directly. This # allows for some code reuse with specializations on getindex(i,::Colon) and # getindex(::Colon,j) for when `K` is a `RkMatrix` if irange isa Colon && jrange isa Colon - m,n = size(K) - ishift,jshift = 0,0 + m, n = size(K) + ishift, jshift = 0, 0 else - m,n = length(irange), length(jrange) - ishift,jshift = irange.start-1, jrange.start-1 + m, n = length(irange), length(jrange) + ishift, jshift = irange.start - 1, jrange.start - 1 # maps global indices to local indices end - rmax = min(m,n,rmax) - T = Base.eltype(K) - A = Vector{Vector{T}}() - B = Vector{Vector{T}}() - I = BitVector(true for i = 1:m) - J = BitVector(true for i = 1:n) - i = istart # initial pivot - er = Inf + rmax = min(m, n, rmax) + T = Base.eltype(K) + A = Vector{Vector{T}}() + B = Vector{Vector{T}}() + I = BitVector(true for i in 1:m) + J = BitVector(true for i in 1:n) + i = istart # initial pivot + er = Inf est_norm = 0 # approximate norm of K[irange,jrange] - r = 0 # current rank - while er > max(atol,rtol*est_norm) && r < rmax + r = 0 # current rank + while er > max(atol, rtol * est_norm) && r < rmax # remove index i from allowed row I[i] = false # compute next row by row <-- K[i+ishift,jrange] - R[i,:] - adjcol = Kadj[jrange,i+ishift] - for k = 1:r - axpy!(-adjoint(A[k][i]),B[k],adjcol) + adjcol = Kadj[jrange, i + ishift] + for k in 1:r + axpy!(-adjoint(A[k][i]), B[k], adjcol) # for j in eachindex(row) # row[j] = row[j] - B[k][j]*adjoint(A[k][i]) # end end - j = _aca_partial_pivot(adjcol,J) - δ = adjcol[j] + j = _aca_partial_pivot(adjcol, J) + δ = adjcol[j] if svdvals(δ)[end] == 0 @debug "zero pivot found during partial aca" - i = findfirst(x->x==true,I) + i = findfirst(x -> x == true, I) else # δ != 0 iδ = inv(δ) # rdiv!(b,δ) # b <-- b/δ for k in eachindex(adjcol) - adjcol[k] = adjcol[k]*iδ + adjcol[k] = adjcol[k] * iδ end J[j] = false # compute next col by col <-- K[irange,j+jshift] - R[:,j] - col = K[irange,j+jshift] - for k = 1:r - axpy!(-adjoint(B[k][j]),A[k],col) + col = K[irange, j + jshift] + for k in 1:r + axpy!(-adjoint(B[k][j]), A[k], col) # for i in eachindex(col) # col[i] = col[i] - A[k][i]*adjoint(B[k][j]) # end end # push new cross and increase rank r += 1 - push!(A,col) - push!(B,adjcol) + push!(A, col) + push!(B, adjcol) # estimate the error by || R_{k} - R_{k-1} || = ||a|| ||b|| - er = norm(col)*norm(adjcol) + er = norm(col) * norm(adjcol) # estimate the norm by || K || ≈ || R_k || - est_norm = _update_frob_norm(est_norm,A,B) - i = _aca_partial_pivot(col,I) + est_norm = _update_frob_norm(est_norm, A, B) + i = _aca_partial_pivot(col, I) # @show r, er end end - return RkMatrix(A,B) + return RkMatrix(A, B) end """ @@ -223,14 +223,14 @@ end Given the Frobenius norm of `Rₖ = A[1:end-1]*adjoint(B[1:end-1])` in `acc`, compute the Frobenius norm of `Rₖ₊₁ = A*adjoint(B)` efficiently. """ -@inline function _update_frob_norm(cur,A,B) +@inline function _update_frob_norm(cur, A, B) @timeit_debug "Update Frobenius norm" begin k = length(A) a = A[end] b = B[end] out = norm(a)^2 * norm(b)^2 - for l=1:k-1 - out += 2*real(dot(A[l],a)*(dot(b,B[l]))) + for l in 1:(k - 1) + out += 2 * real(dot(A[l], a) * (dot(b, B[l]))) end end return sqrt(cur^2 + out) @@ -250,13 +250,13 @@ kernels; see (https://www.sciencedirect.com/science/article/pii/S0021999117306721)[https://www.sciencedirect.com/science/article/pii/S0021999117306721] for more details. """ -function _aca_partial_pivot(v,J) +function _aca_partial_pivot(v, J) idx = -1 val = -Inf for n in 1:length(J) J[n] || continue - x = v[n] - σ = svdvals(x)[end] + x = v[n] + σ = svdvals(x)[end] σ < val && continue idx = n val = σ @@ -276,11 +276,11 @@ When `x` is a scalar, this is simply the element with largest absolute value. """ function _aca_full_pivot(M) idxs = CartesianIndices(M) - idx = first(idxs) + idx = first(idxs) val = -Inf for I in idxs - x = M[I] - σ = svdvals(x)[end] + x = M[I] + σ = svdvals(x)[end] σ < val && continue idx = I val = σ @@ -292,15 +292,15 @@ function _aca_partial_initial_pivot(rowtree) # the method below is suggested in Bebendorf, but it does not seem to # improve the error. The idea is that the initial pivot is the closesest # point to the center of the cluster - xc = center(container(rowtree)) - d = Inf - els = root_elements(rowtree) - loc_idxs = index_range(rowtree) - istart = first(loc_idxs) + xc = center(container(rowtree)) + d = Inf + els = root_elements(rowtree) + loc_idxs = index_range(rowtree) + istart = first(loc_idxs) for i in loc_idxs - x = els[i] - if norm(x-xc) < d - d = norm(x-xc) + x = els[i] + if norm(x - xc) < d + d = norm(x - xc) istart = i end end @@ -314,24 +314,24 @@ Compression algorithm based on *a posteriori* truncation of an `SVD`. This is the optimal approximation in Frobenius norm; however, it also tends to be very expensive and thus should be used mostly for "small" matrices. """ -@Base.kwdef struct TSVD +Base.@kwdef struct TSVD atol::Float64 = 0 - rank::Int = typemax(Int) - rtol::Float64 = atol>0 || rank 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64)) end -function (tsvd::TSVD)(K,rowtree::ClusterTree,coltree::ClusterTree) +function (tsvd::TSVD)(K, rowtree::ClusterTree, coltree::ClusterTree) irange = index_range(rowtree) jrange = index_range(coltree) - tsvd(K,irange,jrange) + return tsvd(K, irange, jrange) end -function (tsvd::TSVD)(K,irange::UnitRange,jrange::UnitRange) - M = K[irange,jrange] - compress!(M,tsvd) +function (tsvd::TSVD)(K, irange::UnitRange, jrange::UnitRange) + M = K[irange, jrange] + return compress!(M, tsvd) end -(comp::TSVD)(K::Matrix) = comp(K,1:size(K,1),1:size(K,2)) +(comp::TSVD)(K::Matrix) = comp(K, 1:size(K, 1), 1:size(K, 2)) """ compress!(M::RkMatrix,tsvd::TSVD) @@ -340,26 +340,26 @@ Recompress the matrix `R` using a truncated svd of `R`. The implementation uses the `qr-svd` strategy to efficiently compute `svd(R)` when `rank(R) ≪ min(size(R))`. """ -function compress!(R::RkMatrix,tsvd::TSVD) - m,n = size(R) +function compress!(R::RkMatrix, tsvd::TSVD) + m, n = size(R) QA, RA = qr!(R.A) QB, RB = qr!(R.B) - F = svd!(RA*adjoint(RB)) # svd of an r×r problem - U = QA*F.U - Vt = F.Vt*adjoint(QB) - V = adjoint(Vt) + F = svd!(RA * adjoint(RB)) # svd of an r×r problem + U = QA * F.U + Vt = F.Vt * adjoint(QB) + V = adjoint(Vt) sp_norm = F.S[1] # spectral norm - r = findlast(x -> x>max(tsvd.atol,tsvd.rtol*sp_norm), F.S) - isnothing(r) && (r = min(rank(R),m,n)) - r = min(r,tsvd.rank) - if m x > max(tsvd.atol, tsvd.rtol * sp_norm), F.S) + isnothing(r) && (r = min(rank(R), m, n)) + r = min(r, tsvd.rank) + if m < n + A = @views U[:, 1:r] * Diagonal(F.S[1:r]) + B = V[:, 1:r] else - A = U[:,1:r] - B = @views (V[:,1:r])*Diagonal(F.S[1:r]) + A = U[:, 1:r] + B = @views (V[:, 1:r]) * Diagonal(F.S[1:r]) end - R.A,R.B = A,B + R.A, R.B = A, B return R end @@ -369,19 +369,19 @@ end Recompress the matrix `M` using a truncated svd and output an `RkMatrix`. The data in `M` is invalidated in the process. """ -function compress!(M::Matrix,tsvd::TSVD) - m,n = size(M) - F = svd!(M) +function compress!(M::Matrix, tsvd::TSVD) + m, n = size(M) + F = svd!(M) sp_norm = F.S[1] # spectral norm - r = findlast(x -> x>max(tsvd.atol,tsvd.rtol*sp_norm), F.S) - isnothing(r) && (r = min(m,n)) - r = min(r,tsvd.rank) - if m x > max(tsvd.atol, tsvd.rtol * sp_norm), F.S) + isnothing(r) && (r = min(m, n)) + r = min(r, tsvd.rank) + if m < n + A = @views F.U[:, 1:r] * Diagonal(F.S[1:r]) + B = F.V[:, 1:r] else - A = F.U[:,1:r] - B = @views (F.V[:,1:r])*Diagonal(F.S[1:r]) + A = F.U[:, 1:r] + B = @views (F.V[:, 1:r]) * Diagonal(F.S[1:r]) end - return RkMatrix(A,B) + return RkMatrix(A, B) end diff --git a/src/dhmatrix.jl b/src/dhmatrix.jl index d568c0e..8f3a47f 100644 --- a/src/dhmatrix.jl +++ b/src/dhmatrix.jl @@ -3,8 +3,8 @@ using Distributed function typeof_future(r::Future) if isready(r) pid = r.where - f = @spawnat pid typeof(fetch(r)) - T = fetch(f) + f = @spawnat pid typeof(fetch(r)) + T = fetch(f) return T else error("future is not ready") @@ -23,9 +23,9 @@ end Base.fetch(r::RemoteHMatrix) = fetch(r.future) -function Base.getindex(r::RemoteHMatrix,i::Int,j::Int) +function Base.getindex(r::RemoteHMatrix, i::Int, j::Int) pid = r.future.where - @fetchfrom pid getindex(fetch(r),i,j) + @fetchfrom pid getindex(fetch(r), i, j) end """ @@ -46,10 +46,10 @@ mutable struct DHMatrix{R,T} <: AbstractHMatrix{T} children::Matrix{DHMatrix{R,T}} parent::DHMatrix{R,T} # inner constructor which handles `nothing` fields. - function DHMatrix{R,T}(rowtree,coltree,data,children,parent) where {R,T} - dhmat = new{R,T}(rowtree,coltree,data) - dhmat.children = isnothing(children) ? Matrix{DHMatrix{R,T}}(undef,0,0) : children - dhmat.parent = isnothing(parent) ? dhmat : parent + function DHMatrix{R,T}(rowtree, coltree, data, children, parent) where {R,T} + dhmat = new{R,T}(rowtree, coltree, data) + dhmat.children = isnothing(children) ? Matrix{DHMatrix{R,T}}(undef, 0, 0) : children + dhmat.parent = isnothing(parent) ? dhmat : parent return dhmat end end @@ -65,22 +65,24 @@ for distributed computing. Currently, the only available options is `distribute_columns`, which will partition the columns of the underlying matrix into `floor(log2(nw))` parts, where `nw` is the number of workers available. """ -function DHMatrix{T}(rowtree::R, coltree::R; partition_strategy=:distribute_columns) where {R,T} +function DHMatrix{T}(rowtree::R, coltree::R; + partition_strategy=:distribute_columns) where {R,T} #build root - root = DHMatrix{R,T}(rowtree,coltree,nothing,nothing,nothing) + root = DHMatrix{R,T}(rowtree, coltree, nothing, nothing, nothing) # depending on the partition strategy, dispatch to appropriate (recursive) # method if partition_strategy == :distribute_columns - nw = nworkers() - dmax = floor(Int64,log2(nw)) - _build_block_structure_distribute_cols!(root,dmax) + nw = nworkers() + dmax = floor(Int64, log2(nw)) + _build_block_structure_distribute_cols!(root, dmax) else error("unrecognized partition strategy") end return root end -function _build_block_structure_distribute_cols!(current_node::DHMatrix{R,T},dmax) where {R,T} +function _build_block_structure_distribute_cols!(current_node::DHMatrix{R,T}, + dmax) where {R,T} if Trees.depth(current_node) == dmax return current_node else @@ -90,10 +92,11 @@ function _build_block_structure_distribute_cols!(current_node::DHMatrix{R,T},dma # do not recurse on row, only on columns row_children = [X] col_children = Y.children - children = [DHMatrix{R,T}(r,c,nothing,nothing,current_node) for r in row_children, c in col_children] + children = [DHMatrix{R,T}(r, c, nothing, nothing, current_node) + for r in row_children, c in col_children] current_node.children = children for child in children - _build_block_structure_distribute_cols!(child,dmax) + _build_block_structure_distribute_cols!(child, dmax) end return current_node end @@ -101,17 +104,18 @@ end Base.size(H::DHMatrix) = length(rowrange(H)), length(colrange(H)) -function Base.show(io::IO,::MIME"text/plain",hmat::DHMatrix) +function Base.show(io::IO, ::MIME"text/plain", hmat::DHMatrix) # isclean(hmat) || return print(io,"Dirty DHMatrix") - println(io,"Distributed HMatrix of $(eltype(hmat)) with range $(rowrange(hmat)) × $(colrange(hmat))") + println(io, + "Distributed HMatrix of $(eltype(hmat)) with range $(rowrange(hmat)) × $(colrange(hmat))") nodes = collect(AbstractTrees.PreOrderDFS(hmat)) - println(io,"\t number of nodes in tree: $(length(nodes))") + println(io, "\t number of nodes in tree: $(length(nodes))") leaves = collect(AbstractTrees.Leaves(hmat)) - @printf(io,"\t number of leaves: %i\n",length(leaves)) - for (i,leaf) in enumerate(leaves) - r = leaf.data.future + @printf(io, "\t number of leaves: %i\n", length(leaves)) + for (i, leaf) in enumerate(leaves) + r = leaf.data.future pid = r.where - irange,jrange = @fetchfrom pid rowrange(fetch(r)), colrange(fetch(r)) + irange, jrange = @fetchfrom pid rowrange(fetch(r)), colrange(fetch(r)) println("\t\t leaf $i on process $pid spanning $irange × $jrange") end end @@ -122,58 +126,61 @@ end Internal methods called **after** the `DHMatrix` structure has been initialized in order to construct the `HMatrix` on each of the leaves of the `DHMatrix`. """ -function _assemble_hmat_distributed(K,rtree,ctree;adm=StrongAdmissibilityStd(),comp=PartialACA(), - global_index=use_global_index(),threads=use_threads()) +function _assemble_hmat_distributed(K, rtree, ctree; adm=StrongAdmissibilityStd(), + comp=PartialACA(), + global_index=use_global_index(), threads=use_threads()) # R = typeof(rtree) T = eltype(K) - wids = workers() - root = DHMatrix{T}(rtree,ctree;partition_strategy=:distribute_columns) - leaves = AbstractTrees.Leaves(root) |> collect + wids = workers() + root = DHMatrix{T}(rtree, ctree; partition_strategy=:distribute_columns) + leaves = collect(AbstractTrees.Leaves(root)) @info "Assembling distributed HMatrix on $(length(leaves)) processes" @sync for (k, leaf) in enumerate(leaves) pid = wids[k] # id of k-th worker - r = @spawnat pid assemble_hmat(K, rowtree(leaf), coltree(leaf); adm, comp,global_index,threads,distributed=false) + r = @spawnat pid assemble_hmat(K, rowtree(leaf), coltree(leaf); adm, comp, + global_index, threads, distributed=false) leaf.data = RemoteHMatrix{R,T}(r) end return root end -function mul!(y::AbstractVector,A::DHMatrix,x::AbstractVector,a::Number,b::Number; - global_index=use_global_index(),threads=use_threads()) +function mul!(y::AbstractVector, A::DHMatrix, x::AbstractVector, a::Number, b::Number; + global_index=use_global_index(), threads=use_threads()) # since the HMatrix represents A = Pr*H*Pc, where Pr and Pc are row and column # permutations, we need first to rewrite C <-- b*C + a*(Pc*H*Pb)*B as # C <-- Pr*(b*inv(Pr)*C + a*H*(Pc*B)). Following this rewrite, the # multiplication is performed by first defining B <-- Pc*B, and C <-- # inv(Pr)*C, doing the multiplication with the permuted entries, and then # permuting the result C <-- Pr*C at the end. - ctree = A.coltree - rtree = A.rowtree + ctree = A.coltree + rtree = A.rowtree # permute input if global_index - x = x[ctree.loc2glob] - y = permute!(y,rtree.loc2glob) - rmul!(x,a) # multiply in place since this is a new copy, so does not mutate exterior x + x = x[ctree.loc2glob] + y = permute!(y, rtree.loc2glob) + rmul!(x, a) # multiply in place since this is a new copy, so does not mutate exterior x else - x = a*x # new copy of x + x = a * x # new copy of x end - rmul!(y,b) - leaves = filter_tree(x->Trees.isleaf(x),A) - nb = length(leaves) - acc = Vector{Future}(undef,nb) + rmul!(y, b) + leaves = filter_tree(x -> Trees.isleaf(x), A) + nb = length(leaves) + acc = Vector{Future}(undef, nb) @sync for i in 1:nb - r = leaves[i].data.future - pid = r.where - jrange = @fetchfrom pid colrange(fetch(r)) - T,n = eltype(y), length(y) - acc[i] = @spawnat pid mul!(zeros(T,n),fetch(r),view(x,jrange),1,0;global_index=false,threads) + r = leaves[i].data.future + pid = r.where + jrange = @fetchfrom pid colrange(fetch(r)) + T, n = eltype(y), length(y) + acc[i] = @spawnat pid mul!(zeros(T, n), fetch(r), view(x, jrange), 1, 0; + global_index=false, threads) end # reduction stage: fetch everybody to worker 1 and sum them up for yi in acc - axpy!(1,fetch(yi),y) + axpy!(1, fetch(yi), y) end # permute output - global_index && invpermute!(y,loc2glob(rtree)) + global_index && invpermute!(y, loc2glob(rtree)) return y end diff --git a/src/hmatrix.jl b/src/hmatrix.jl index 9cdfb87..f22e519 100644 --- a/src/hmatrix.jl +++ b/src/hmatrix.jl @@ -1,6 +1,6 @@ -abstract type AbstractHMatrix{T} <: AbstractMatrix{T} end +abstract type AbstractHMatrix{T} <: AbstractMatrix{T} end -function Base.getindex(H::AbstractHMatrix,i,j) +function Base.getindex(H::AbstractHMatrix, i, j) # NOTE: you may disable `getindex` to avoid having code that will work, but be # horribly slow because it falls back to some generic implementation in # LinearAlgebra. The downside is that the `show` method, which will usually @@ -15,25 +15,25 @@ function Base.getindex(H::AbstractHMatrix,i,j) `AbstractHMatrix` has fallen back to a generic implementation. """ if ALLOW_GETINDEX[] - shift = pivot(H) .-1 - _getindex(H,i+shift[1],j+shift[2]) + shift = pivot(H) .- 1 + _getindex(H, i + shift[1], j + shift[2]) else error(msg) end end -function _getindex(H,i,j) - (i ∈ rowrange(H)) && (j ∈ colrange(H)) || throw(BoundsError(H,(i,j))) +function _getindex(H, i, j) + (i ∈ rowrange(H)) && (j ∈ colrange(H)) || throw(BoundsError(H, (i, j))) acc = zero(eltype(H)) shift = pivot(H) .- 1 if hasdata(H) il = i - shift[1] jl = j - shift[2] - acc += data(H)[il,jl] + acc += data(H)[il, jl] end for child in children(H) if (i ∈ rowrange(child)) && (j ∈ colrange(child)) - acc += _getindex(child,i,j) + acc += _getindex(child, i, j) end end return acc @@ -53,96 +53,95 @@ mutable struct HMatrix{R,T} <: AbstractHMatrix{T} children::Matrix{HMatrix{R,T}} parent::HMatrix{R,T} # inner constructor which handles `nothing` fields. - function HMatrix{R,T}(rowtree,coltree,adm,data,children,parent) where {R,T} + function HMatrix{R,T}(rowtree, coltree, adm, data, children, parent) where {R,T} if data !== nothing - @assert (length(rowtree),length(coltree)) === size(data) "$(length(rowtree)),$(length(coltree)) != $(size(data))" + @assert (length(rowtree), length(coltree)) === size(data) "$(length(rowtree)),$(length(coltree)) != $(size(data))" end - hmat = new{R,T}(rowtree,coltree,adm,data) - hmat.children = isnothing(children) ? Matrix{HMatrix{R,T}}(undef,0,0) : children - hmat.parent = isnothing(parent) ? hmat : parent + hmat = new{R,T}(rowtree, coltree, adm, data) + hmat.children = isnothing(children) ? Matrix{HMatrix{R,T}}(undef, 0, 0) : children + hmat.parent = isnothing(parent) ? hmat : parent return hmat end end # setters and getters (defined for AbstractHMatrix) -isadmissible(H::AbstractHMatrix) = H.admissible -hasdata(H::AbstractHMatrix) = !isnothing(H.data) -data(H::AbstractHMatrix) = H.data -setdata!(H::AbstractHMatrix,d) = setfield!(H,:data,d) +isadmissible(H::AbstractHMatrix) = H.admissible +hasdata(H::AbstractHMatrix) = !isnothing(H.data) +data(H::AbstractHMatrix) = H.data +setdata!(H::AbstractHMatrix, d) = setfield!(H, :data, d) rowtree(H::AbstractHMatrix) = H.rowtree coltree(H::AbstractHMatrix) = H.coltree cluster_type(::HMatrix{R,T}) where {R,T} = R -Base.getindex(H::HMatrix,::Colon,j) = getcol(H,j) +Base.getindex(H::HMatrix, ::Colon, j) = getcol(H, j) # getcol for regular matrices -function getcol!(col,M::Matrix,j) - @assert length(col) == size(M,1) - copyto!(col,view(M,:,j)) +function getcol!(col, M::Matrix, j) + @assert length(col) == size(M, 1) + return copyto!(col, view(M, :, j)) end -function getcol!(col,adjM::Adjoint{<:Any,<:Matrix},j) - @assert length(col) == size(adjM,1) - copyto!(col,view(adjM,:,j)) +function getcol!(col, adjM::Adjoint{<:Any,<:Matrix}, j) + @assert length(col) == size(adjM, 1) + return copyto!(col, view(adjM, :, j)) end -getcol(M::Matrix,j) = M[:,j] -getcol(adjM::Adjoint{<:Any,<:Matrix},j) = adjM[:,j] +getcol(M::Matrix, j) = M[:, j] +getcol(adjM::Adjoint{<:Any,<:Matrix}, j) = adjM[:, j] - -function getcol(H::HMatrix,j::Int) - m,n = size(H) - T = eltype(H) - col = zeros(T,m) - getcol!(col,H,j) +function getcol(H::HMatrix, j::Int) + m, n = size(H) + T = eltype(H) + col = zeros(T, m) + return getcol!(col, H, j) end -function getcol!(col,H::HMatrix,j::Int) +function getcol!(col, H::HMatrix, j::Int) (j ∈ colrange(H)) || throw(BoundsError()) piv = pivot(H) - _getcol!(col,H,j,piv) + return _getcol!(col, H, j, piv) end -function _getcol!(col,H::HMatrix,j,piv) +function _getcol!(col, H::HMatrix, j, piv) if hasdata(H) - shift = pivot(H) .- 1 - jl = j - shift[2] - irange = rowrange(H) .- (piv[1] - 1) - getcol!(view(col,irange),data(H),jl) + shift = pivot(H) .- 1 + jl = j - shift[2] + irange = rowrange(H) .- (piv[1] - 1) + getcol!(view(col, irange), data(H), jl) end for child in children(H) if j ∈ colrange(child) - _getcol!(col,child,j,piv) + _getcol!(col, child, j, piv) end end return col end -Base.getindex(adjH::Adjoint{<:Any,<:HMatrix},::Colon,j) = getcol(adjH,j) +Base.getindex(adjH::Adjoint{<:Any,<:HMatrix}, ::Colon, j) = getcol(adjH, j) -function getcol(adjH::Adjoint{<:Any,<:HMatrix},j::Int) +function getcol(adjH::Adjoint{<:Any,<:HMatrix}, j::Int) # (j ∈ colrange(adjH)) || throw(BoundsError()) - m,n = size(adjH) - T = eltype(adjH) - col = zeros(T,m) - getcol!(col,adjH,j) + m, n = size(adjH) + T = eltype(adjH) + col = zeros(T, m) + return getcol!(col, adjH, j) end -function getcol!(col,adjH::Adjoint{<:Any,<:HMatrix},j::Int) +function getcol!(col, adjH::Adjoint{<:Any,<:HMatrix}, j::Int) piv = pivot(adjH) - _getcol!(col,adjH,j,piv) + return _getcol!(col, adjH, j, piv) end -function _getcol!(col,adjH::Adjoint{<:Any,<:HMatrix},j,piv) +function _getcol!(col, adjH::Adjoint{<:Any,<:HMatrix}, j, piv) if hasdata(adjH) - shift = pivot(adjH) .- 1 - jl = j - shift[2] - irange = rowrange(adjH) .- (piv[1] - 1) - getcol!(view(col,irange),data(adjH),jl) + shift = pivot(adjH) .- 1 + jl = j - shift[2] + irange = rowrange(adjH) .- (piv[1] - 1) + getcol!(view(col, irange), data(adjH), jl) end for child in children(adjH) if j ∈ colrange(child) - _getcol!(col,child,j,piv) + _getcol!(col, child, j, piv) end end return col @@ -150,28 +149,28 @@ end # Trees interface children(H::AbstractHMatrix) = H.children -children(H::AbstractHMatrix,idxs...) = H.children[idxs] -parent(H::AbstractHMatrix) = H.parent -isleaf(H::AbstractHMatrix) = isempty(children(H)) -isroot(H::AbstractHMatrix) = parent(H) === H +children(H::AbstractHMatrix, idxs...) = H.children[idxs] +parent(H::AbstractHMatrix) = H.parent +isleaf(H::AbstractHMatrix) = isempty(children(H)) +isroot(H::AbstractHMatrix) = parent(H) === H # interface to AbstractTrees. No children is determined by an empty tuple for # AbstractTrees. AbstractTrees.children(t::AbstractHMatrix) = isleaf(t) ? () : t.children AbstractTrees.nodetype(t::AbstractHMatrix) = typeof(t) -rowrange(H::AbstractHMatrix) = index_range(H.rowtree) -colrange(H::AbstractHMatrix) = index_range(H.coltree) -rowperm(H::AbstractHMatrix) = H |> rowtree |> loc2glob -colperm(H::AbstractHMatrix) = H |> coltree |> loc2glob -pivot(H::AbstractHMatrix) = (rowrange(H).start,colrange(H).start) -offset(H::AbstractHMatrix) = pivot(H) .- 1 +rowrange(H::AbstractHMatrix) = index_range(H.rowtree) +colrange(H::AbstractHMatrix) = index_range(H.coltree) +rowperm(H::AbstractHMatrix) = loc2glob(rowtree(H)) +colperm(H::AbstractHMatrix) = loc2glob(coltree(H)) +pivot(H::AbstractHMatrix) = (rowrange(H).start, colrange(H).start) +offset(H::AbstractHMatrix) = pivot(H) .- 1 # Base.axes(H::HMatrix) = rowrange(H),colrange(H) Base.size(H::AbstractHMatrix) = length(rowrange(H)), length(colrange(H)) function blocksize(H::AbstractHMatrix) - size(children(H)) + return size(children(H)) end """ @@ -184,41 +183,44 @@ function compression_ratio(H::HMatrix) nr = length(H) # represented entries for block in AbstractTrees.Leaves(H) data = block.data - ns += num_stored_elements(data) - end - return nr/ns + ns += num_stored_elements(data) + end + return nr / ns end num_stored_elements(M::Matrix) = length(M) -function Base.show(io::IO,hmat::HMatrix) - isclean(hmat) || return print(io,"Dirty HMatrix") - print(io,"HMatrix of $(eltype(hmat)) with range $(rowrange(hmat)) × $(colrange(hmat))") - _show(io,hmat) +function Base.show(io::IO, hmat::HMatrix) + isclean(hmat) || return print(io, "Dirty HMatrix") + print(io, "HMatrix of $(eltype(hmat)) with range $(rowrange(hmat)) × $(colrange(hmat))") + _show(io, hmat) return io end -Base.show(io::IO,::MIME"text/plain",hmat::HMatrix) = show(io,hmat) +Base.show(io::IO, ::MIME"text/plain", hmat::HMatrix) = show(io, hmat) -function _show(io,hmat) +function _show(io, hmat) nodes = collect(AbstractTrees.PreOrderDFS(hmat)) @printf io "\n\t number of nodes in tree: %i" length(nodes) leaves = collect(AbstractTrees.Leaves(hmat)) - sparse_leaves = filter(isadmissible,leaves) - dense_leaves = filter(!isadmissible,leaves) - @printf(io,"\n\t number of leaves: %i (%i admissible + %i full)",length(leaves),length(sparse_leaves), - length(dense_leaves)) - rmin,rmax = isempty(sparse_leaves) ? (-1,-1) : extrema(x->rank(x.data),sparse_leaves) - @printf(io,"\n\t min rank of sparse blocks : %i",rmin) - @printf(io,"\n\t max rank of sparse blocks : %i",rmax) - dense_min,dense_max = isempty(dense_leaves) ? (-1,-1) : extrema(x->length(x.data),dense_leaves) - @printf(io,"\n\t min length of dense blocks : %i",dense_min) - @printf(io,"\n\t max length of dense blocks : %i",dense_max) - points_per_leaf = map(length,leaves) - @printf(io,"\n\t min number of elements per leaf: %i",minimum(points_per_leaf)) - @printf(io,"\n\t max number of elements per leaf: %i",maximum(points_per_leaf)) - depth_per_leaf = map(depth,leaves) - @printf(io,"\n\t depth of tree: %i",maximum(depth_per_leaf)) - @printf(io,"\n\t compression ratio: %f\n",compression_ratio(hmat)) + sparse_leaves = filter(isadmissible, leaves) + dense_leaves = filter(!isadmissible, leaves) + @printf(io, "\n\t number of leaves: %i (%i admissible + %i full)", length(leaves), + length(sparse_leaves), + length(dense_leaves)) + rmin, rmax = isempty(sparse_leaves) ? (-1, -1) : + extrema(x -> rank(x.data), sparse_leaves) + @printf(io, "\n\t min rank of sparse blocks : %i", rmin) + @printf(io, "\n\t max rank of sparse blocks : %i", rmax) + dense_min, dense_max = isempty(dense_leaves) ? (-1, -1) : + extrema(x -> length(x.data), dense_leaves) + @printf(io, "\n\t min length of dense blocks : %i", dense_min) + @printf(io, "\n\t max length of dense blocks : %i", dense_max) + points_per_leaf = map(length, leaves) + @printf(io, "\n\t min number of elements per leaf: %i", minimum(points_per_leaf)) + @printf(io, "\n\t max number of elements per leaf: %i", maximum(points_per_leaf)) + depth_per_leaf = map(depth, leaves) + @printf(io, "\n\t depth of tree: %i", maximum(depth_per_leaf)) + @printf(io, "\n\t compression ratio: %f\n", compression_ratio(hmat)) return io end @@ -230,18 +232,18 @@ global indexing system (see [`HMatrix`](@ref) for more information); otherwise the *local* indexing system induced by the row and columns trees are used (default). """ -Matrix(hmat::HMatrix;global_index=true) = Matrix{eltype(hmat)}(hmat;global_index) -function Base.Matrix{T}(hmat::HMatrix;global_index) where {T} - M = zeros(T,size(hmat)...) +Matrix(hmat::HMatrix; global_index=true) = Matrix{eltype(hmat)}(hmat; global_index) +function Base.Matrix{T}(hmat::HMatrix; global_index) where {T} + M = zeros(T, size(hmat)...) piv = pivot(hmat) for block in AbstractTrees.PreOrderDFS(hmat) hasdata(block) || continue irange = rowrange(block) .- piv[1] .+ 1 jrange = colrange(block) .- piv[2] .+ 1 - M[irange,jrange] += Matrix(block.data) + M[irange, jrange] += Matrix(block.data) end if global_index - P = PermutedMatrix(M,invperm(rowperm(hmat)),invperm(colperm(hmat))) + P = PermutedMatrix(M, invperm(rowperm(hmat)), invperm(colperm(hmat))) return Matrix(P) else return M @@ -263,39 +265,43 @@ It is assumed that `K` supports `getindex(K,i,j)`, and that comp can be called as `comp(K,irange::UnitRange,jrange::UnitRange)` to produce a compressed version of `K[irange,jrange]` in the form of an [`RkMatrix`](@ref). """ -function assemble_hmat(K,rowtree,coltree;adm=StrongAdmissibilityStd(3),comp=PartialACA(), - global_index=use_global_index(),threads=use_threads(),distributed=false) - T = eltype(K) +function assemble_hmat(K, rowtree, coltree; adm=StrongAdmissibilityStd(3), + comp=PartialACA(), + global_index=use_global_index(), threads=use_threads(), + distributed=false) + T = eltype(K) if distributed - _assemble_hmat_distributed(K,rowtree,coltree;adm,comp,global_index,threads) + _assemble_hmat_distributed(K, rowtree, coltree; adm, comp, global_index, threads) else # create first the structure. No parellelism used as this should be light. @timeit_debug "initilizing block structure" begin - hmat = HMatrix{T}(rowtree,coltree,adm) + hmat = HMatrix{T}(rowtree, coltree, adm) end # if needed permute kernel entries into indexing induced by trees - global_index && (K = PermutedMatrix(K,loc2glob(rowtree),loc2glob(coltree))) + global_index && (K = PermutedMatrix(K, loc2glob(rowtree), loc2glob(coltree))) # now assemble the data in the blocks @timeit_debug "assembling hmatrix" begin if threads @info "Assembling HMatrix on $(Threads.nthreads()) threads" - _assemble_threads!(hmat,K,comp) + _assemble_threads!(hmat, K, comp) else @info "Assembling HMatrix on 1 thread" - _assemble_cpu!(hmat,K,comp) + _assemble_cpu!(hmat, K, comp) end end end end -function assemble_hmat(K::AbstractKernelMatrix;atol=0,rank=typemax(Int),rtol=atol>0 || rank 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64)), + kwargs...) + comp = PartialACA(; rtol, atol, rank) + adm = StrongAdmissibilityStd(3) + X = rowelements(K) + Y = colelements(K) Xclt = ClusterTree(X) Yclt = ClusterTree(Y) - assemble_hmat(K,Xclt,Yclt;adm,comp,kwargs...) + return assemble_hmat(K, Xclt, Yclt; adm, comp, kwargs...) end """ @@ -308,9 +314,9 @@ hierarchical matrix, but **does not compute `data`** field in the blocks. See """ function HMatrix{T}(rowtree::R, coltree::R, adm) where {R,T} #build root - root = HMatrix{R,T}(rowtree,coltree,false,nothing,nothing,nothing) + root = HMatrix{R,T}(rowtree, coltree, false, nothing, nothing, nothing) # recurse - _build_block_structure!(adm,root) + _build_block_structure!(adm, root) return root end @@ -324,16 +330,17 @@ function _build_block_structure!(adm, current_node::HMatrix{R,T}) where {R,T} Y = current_node.coltree if (isleaf(X) || isleaf(Y)) current_node.admissible = false - elseif adm(X,Y) + elseif adm(X, Y) current_node.admissible = true else current_node.admissible = false row_children = X.children col_children = Y.children - children = [HMatrix{R,T}(r,c,false,nothing,nothing,current_node) for r in row_children, c in col_children] + children = [HMatrix{R,T}(r, c, false, nothing, nothing, current_node) + for r in row_children, c in col_children] current_node.children = children for child in children - _build_block_structure!(adm,child) + _build_block_structure!(adm, child) end end return current_node @@ -347,17 +354,17 @@ using the compressor `comp`. This function assumes the structure of `hmat` has already been intialized, and therefore should not be called directly. See [`HMatrix`](@ref) information on constructors. """ -function _assemble_cpu!(hmat,K,comp) +function _assemble_cpu!(hmat, K, comp) if isleaf(hmat) # base case if isadmissible(hmat) - _assemble_sparse_block!(hmat,K,comp) + _assemble_sparse_block!(hmat, K, comp) else - _assemble_dense_block!(hmat,K) + _assemble_dense_block!(hmat, K) end else # recurse on children for child in hmat.children - _assemble_cpu!(child,K,comp) + _assemble_cpu!(child, K, comp) end end return hmat @@ -370,31 +377,31 @@ Like [`_assemble_cpu!`](@ref), but uses threads to assemble the leaves. Note that the threads are spanwned using `Threads.@spawn`, which means they are spawned on the same worker as the caller. """ -function _assemble_threads!(hmat,K,comp) +function _assemble_threads!(hmat, K, comp) # FIXME: ideally something like `omp for schedule(guided)` should be used here # to avoid spawning too many (small) tasks. In the absece of such scheduling # strategy in julia at the moment (v1.6), we resort to manually limiting the size # of the tasks by directly calling the serial method for blocks which are # smaller than a given length (1000^2 here). - blocks = filter_tree(hmat,true) do x - (isleaf(x) || length(x)<1000*1000) + blocks = filter_tree(hmat, true) do x + return (isleaf(x) || length(x) < 1000 * 1000) end - sort!(blocks;lt=(x,y)->length(x) length(x) < length(y), rev=true) n = length(blocks) @sync for i in 1:n - Threads.@spawn _assemble_cpu!(blocks[i],K,comp) + Threads.@spawn _assemble_cpu!(blocks[i], K, comp) end return hmat end -function _assemble_sparse_block!(hmat,K,comp) - hmat.data = comp(K,hmat.rowtree,hmat.coltree) +function _assemble_sparse_block!(hmat, K, comp) + return hmat.data = comp(K, hmat.rowtree, hmat.coltree) end -function _assemble_dense_block!(hmat,K) +function _assemble_dense_block!(hmat, K) irange = rowrange(hmat) jrange = colrange(hmat) - hmat.data = K[irange,jrange] + hmat.data = K[irange, jrange] return hmat end @@ -409,14 +416,15 @@ isleaf(adjH::Adjoint{<:Any,<:HMatrix}) = isleaf(adjH.parent) Base.size(adjH::Adjoint{<:Any,<:HMatrix}) = reverse(size(adjH.parent)) -function Base.show(io::IO,adjH::Adjoint{<:Any,<:HMatrix}) +function Base.show(io::IO, adjH::Adjoint{<:Any,<:HMatrix}) hmat = parent(adjH) - isclean(hmat) || return print(io,"Dirty HMatrix") - print(io,"Adjoint HMatrix of $(eltype(hmat)) with range $(rowrange(adjH)) × $(colrange(adjH))") - _show(io,hmat) + isclean(hmat) || return print(io, "Dirty HMatrix") + print(io, + "Adjoint HMatrix of $(eltype(hmat)) with range $(rowrange(adjH)) × $(colrange(adjH))") + _show(io, hmat) return io end -Base.show(io::IO,::MIME"text/plain",adjH::Adjoint{<:Any,<:HMatrix}) = show(io,adjH) +Base.show(io::IO, ::MIME"text/plain", adjH::Adjoint{<:Any,<:HMatrix}) = show(io, adjH) """ struct StrongAdmissibilityStd @@ -432,13 +440,13 @@ adm(Xnode,Ynode) ``` """ Base.@kwdef struct StrongAdmissibilityStd - eta::Float64=3.0 + eta::Float64 = 3.0 end function (adm::StrongAdmissibilityStd)(left_node, right_node) - diam_min = minimum(diameter,(left_node,right_node)) - dist = distance(left_node,right_node) - return diam_min < adm.eta*dist + diam_min = minimum(diameter, (left_node, right_node)) + dist = distance(left_node, right_node) + return diam_min < adm.eta * dist end """ @@ -450,7 +458,7 @@ between them is positive. struct WeakAdmissibilityStd end -(adm::WeakAdmissibilityStd)(left_node, right_node) = distance(left_node,right_node) > 0 +(adm::WeakAdmissibilityStd)(left_node, right_node) = distance(left_node, right_node) > 0 """ isclean(H::HMatrix) @@ -477,58 +485,57 @@ function isclean(H::HMatrix) return true end -function depth(tree::HMatrix,acc=0) +function depth(tree::HMatrix, acc=0) if isroot(tree) return acc else - depth(parent(tree),acc+1) + depth(parent(tree), acc + 1) end end function Base.zero(H::HMatrix) H0 = deepcopy(H) - rmul!(H0,0) + rmul!(H0, 0) return H0 end -function compress!(H::HMatrix,comp) +function compress!(H::HMatrix, comp) @assert isclean(H) for leaf in AbstractTrees.Leaves(H) d = data(leaf) if d isa RkMatrix - compress!(d,comp) + compress!(d, comp) end end return H end - ############################################################################################ # Recipes ############################################################################################ @recipe function f(hmat::HMatrix) legend --> false - grid --> false + grid --> false aspect_ratio --> :equal - yflip := true + yflip := true seriestype := :shape - linecolor --> :black + linecolor --> :black # all leaves for block in AbstractTrees.Leaves(hmat) @series begin if isadmissible(block) - fillcolor --> :blue - seriesalpha --> 1/compression_ratio(block.data) + fillcolor --> :blue + seriesalpha --> 1 / compression_ratio(block.data) else - fillcolor --> :red - seriesalpha --> 0.3 + fillcolor --> :red + seriesalpha --> 0.3 end pt1 = pivot(block) pt2 = pt1 .+ size(block) .- 1 - y1, y2 = pt1[1],pt2[1] - x1, x2 = pt1[2],pt2[2] + y1, y2 = pt1[1], pt2[1] + x1, x2 = pt1[2], pt2[2] # annotations := ((x1+x2)/2,(y1+y2)/2, rank(block.data)) - [x1,x2,x2,x1,x1],[y1,y1,y2,y2,y1] + [x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1] end end end diff --git a/src/hyperrectangle.jl b/src/hyperrectangle.jl index 2f4f8e0..321054b 100644 --- a/src/hyperrectangle.jl +++ b/src/hyperrectangle.jl @@ -34,7 +34,7 @@ half_width(r::HyperRectangle) = width(r) / 2 Minimal Euclidean distance between a point `x ∈ Ω1` and `y ∈ Ω2`. """ function distance(rec1::HyperRectangle{N}, - rec2::HyperRectangle{N}) where {N} + rec2::HyperRectangle{N}) where {N} d2 = 0 rec1_low_corner = low_corner(rec1) rec1_high_corner = high_corner(rec1) @@ -63,7 +63,6 @@ function distance(pt::SVector{N}, rec::HyperRectangle{N}) where {N} end distance(rec::HyperRectangle{N}, pt::SVector{N}) where {N} = distance(pt, rec) - """ diameter(Ω) @@ -91,24 +90,24 @@ function vertices(rec::HyperRectangle{2}) lc = low_corner(rec) hc = high_corner(rec) return SVector(SVector(lc[1], lc[2]), - SVector(hc[1], lc[2]), - SVector(hc[1], hc[2]), - SVector(lc[1], hc[2])) + SVector(hc[1], lc[2]), + SVector(hc[1], hc[2]), + SVector(lc[1], hc[2])) end function vertices(rec::HyperRectangle{3}) lc = low_corner(rec) hc = high_corner(rec) return SVector( - # lower face - SVector(lc[1], lc[2], lc[3]), - SVector(hc[1], lc[2], lc[3]), - SVector(hc[1], hc[2], lc[3]), - SVector(lc[1], hc[2], lc[3]), - # upper face - SVector(lc[1], lc[2], hc[3]), - SVector(hc[1], lc[2], hc[3]), - SVector(hc[1], hc[2], hc[3]), - SVector(lc[1], hc[2], hc[3])) + # lower face + SVector(lc[1], lc[2], lc[3]), + SVector(hc[1], lc[2], lc[3]), + SVector(hc[1], hc[2], lc[3]), + SVector(lc[1], hc[2], lc[3]), + # upper face + SVector(lc[1], lc[2], hc[3]), + SVector(hc[1], lc[2], hc[3]), + SVector(hc[1], hc[2], hc[3]), + SVector(lc[1], hc[2], hc[3])) end function bounding_box(els, cube=false) @@ -133,7 +132,7 @@ function bounding_box(els, cube=false) return HyperRectangle(lb, ub) end center(x::SVector) = x -center(x::NTuple) = SVector(x) +center(x::NTuple) = SVector(x) """ split(rec::HyperRectangle,[axis]::Int,[place]) @@ -148,8 +147,8 @@ When no axis and no place is given, defaults to splitting along the largest axis function Base.split(rec::HyperRectangle{N}, axis, place) where {N} rec_low_corner = low_corner(rec) rec_high_corner = high_corner(rec) - high_corner1 = ntuple(n -> n == axis ? place : rec_high_corner[n], N) |> SVector - low_corner2 = ntuple(n -> n == axis ? place : rec_low_corner[n], N) |> SVector + high_corner1 = SVector(ntuple(n -> n == axis ? place : rec_high_corner[n], N)) + low_corner2 = SVector(ntuple(n -> n == axis ? place : rec_low_corner[n], N)) rec1 = HyperRectangle(rec_low_corner, high_corner1) rec2 = HyperRectangle(low_corner2, rec_high_corner) return (rec1, rec2) diff --git a/src/kernelmatrix.jl b/src/kernelmatrix.jl index 702341a..230c7ad 100644 --- a/src/kernelmatrix.jl +++ b/src/kernelmatrix.jl @@ -36,14 +36,14 @@ struct KernelMatrix{Tf,Tx,Ty,T} <: AbstractKernelMatrix{T} Y::Ty end -Base.size(K::KernelMatrix) = length(K.X), length(K.Y) -Base.getindex(K::KernelMatrix,i::Int,j::Int) = K.f(K.X[i],K.Y[j]) +Base.size(K::KernelMatrix) = length(K.X), length(K.Y) +Base.getindex(K::KernelMatrix, i::Int, j::Int) = K.f(K.X[i], K.Y[j]) rowelements(K::KernelMatrix) = K.X colelements(K::KernelMatrix) = K.Y kernel(K::KernelMatrix) = K.f -function KernelMatrix(f,X,Y) - T = Base.promote_op(f,eltype(X),eltype(Y)) - KernelMatrix{typeof(f),typeof(X),typeof(Y),T}(f,X,Y) +function KernelMatrix(f, X, Y) + T = Base.promote_op(f, eltype(X), eltype(Y)) + return KernelMatrix{typeof(f),typeof(X),typeof(Y),T}(f, X, Y) end diff --git a/src/lu.jl b/src/lu.jl index 0edfb14..bd686d9 100644 --- a/src/lu.jl +++ b/src/lu.jl @@ -1,13 +1,13 @@ const NOPIVOT = VERSION >= v"1.7" ? NoPivot : Val{false} -function Base.getproperty(LU::LU{<:Any,<:HMatrix},s::Symbol) - H = getfield(LU,:factors) # the underlying hierarchical matrix +function Base.getproperty(LU::LU{<:Any,<:HMatrix}, s::Symbol) + H = getfield(LU, :factors) # the underlying hierarchical matrix if s == :L return UnitLowerTriangular(H) elseif s == :U return UpperTriangular(H) else - return getfield(LU,s) + return getfield(LU, s) end end @@ -17,13 +17,13 @@ end Hierarhical LU facotrization of `M`, using `comp` to generate the compressed blocks during the multiplication routines. """ -function lu!(M::HMatrix,compressor;threads=use_threads()) +function lu!(M::HMatrix, compressor; threads=use_threads()) # perform the lu decomposition of M in place @timeit_debug "lu factorization" begin - _lu!(M,compressor,threads) + _lu!(M, compressor, threads) end # wrap the result in the LU structure - return LU(M,LinearAlgebra.BlasInt[],LinearAlgebra.BlasInt(0)) + return LU(M, LinearAlgebra.BlasInt[], LinearAlgebra.BlasInt(0)) end """ @@ -32,9 +32,10 @@ end Hierarhical LU facotrization of `M`, using the `PartialACA(;atol,rtol;rank)` compressor. """ -function lu!(M::HMatrix;atol=0,rank=typemax(Int),rtol=atol>0 || rank 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64)), kwargs...) + compressor = PartialACA(atol, rank, rtol) + return lu!(M, compressor) end """ @@ -42,43 +43,45 @@ end Hierarchical LU factorization. See [`lu!`](@ref) for the available options. """ -lu(M::HMatrix,args...;kwargs...) = lu!(deepcopy(M),args...;kwargs...) +lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwargs...) -function _lu!(M::HMatrix,compressor,threads) +function _lu!(M::HMatrix, compressor, threads) if isleaf(M) d = data(M) @assert d isa Matrix @timeit_debug "dense lu factorization" begin - lu!(d,NOPIVOT()) + lu!(d, NOPIVOT()) end else @assert !hasdata(M) chdM = children(M) - m,n = size(chdM) - for i=1:m - _lu!(chdM[i,i],compressor,threads) - for j=i+1:n + m, n = size(chdM) + for i in 1:m + _lu!(chdM[i, i], compressor, threads) + for j in (i + 1):n @sync begin @timeit_debug "ldiv! solution" begin if threads - Threads.@spawn ldiv!(UnitLowerTriangular(chdM[i,i]),chdM[i,j],compressor) + Threads.@spawn ldiv!(UnitLowerTriangular(chdM[i, i]), + chdM[i, j], compressor) else - ldiv!(UnitLowerTriangular(chdM[i,i]),chdM[i,j],compressor) + ldiv!(UnitLowerTriangular(chdM[i, i]), chdM[i, j], compressor) end end @timeit_debug "rdiv! solution" begin if threads - Threads.@spawn rdiv!(chdM[j,i],UpperTriangular(chdM[i,i]),compressor) + Threads.@spawn rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]), + compressor) else - rdiv!(chdM[j,i],UpperTriangular(chdM[i,i]),compressor) + rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]), compressor) end end end end - for j in i+1:m - for k in i+1:n + for j in (i + 1):m + for k in (i + 1):n @timeit_debug "hmul!" begin - hmul!(chdM[j,k],chdM[j,i],chdM[i,k],-1,1,compressor) + hmul!(chdM[j, k], chdM[j, i], chdM[i, k], -1, 1, compressor) end end end @@ -87,19 +90,19 @@ function _lu!(M::HMatrix,compressor,threads) return M end -function ldiv!(A::LU{<:Any,<:HMatrix},y::AbstractVector;global_index=true) - p = A.factors # underlying data - ctree = coltree(p) - rtree = rowtree(p) +function ldiv!(A::LU{<:Any,<:HMatrix}, y::AbstractVector; global_index=true) + p = A.factors # underlying data + ctree = coltree(p) + rtree = rowtree(p) # permute input - global_index && permute!(y,loc2glob(ctree)) - L,U = A.L, A.U + global_index && permute!(y, loc2glob(ctree)) + L, U = A.L, A.U # solve LUx = y through: # (a) L(z) = y # (b) Ux = z - ldiv!(L,y) - ldiv!(U,y) - global_index && invpermute!(y,loc2glob(rtree)) + ldiv!(L, y) + ldiv!(U, y) + global_index && invpermute!(y, loc2glob(rtree)) return y end @@ -111,19 +114,19 @@ function ldiv!(L::HUnitLowerTriangular, y::AbstractVector) else @assert !hasdata(H) "only leaves are allowed to have data when using `ldiv`!" shift = pivot(H) .- 1 - chdH = children(H) - m, n = size(chdH) + chdH = children(H) + m, n = size(chdH) @assert m === n - for i = 1:m - irows = colrange(chdH[i,i]) .- shift[2] - bi = view(y, irows) - for j = 1:(i - 1)# ji - jrows = colrange(chdH[i,j]) .- shift[2] - bj = view(y, jrows) - _mul131!(bi, chdH[i,j], bj, -1) + for i in m:-1:1 + irows = colrange(chdH[i, i]) .- shift[2] + bi = view(y, irows) + for j in (i + 1):n # j>i + jrows = colrange(chdH[i, j]) .- shift[2] + bj = view(y, jrows) + _mul131!(bi, chdH[i, j], bj, -1) end # recursion stage - ldiv!(UpperTriangular(chdH[i,i]), bi) + ldiv!(UpperTriangular(chdH[i, i]), bi) end end return y diff --git a/src/multiplication.jl b/src/multiplication.jl index c873bf2..f163d88 100644 --- a/src/multiplication.jl +++ b/src/multiplication.jl @@ -6,21 +6,21 @@ hierarchical matrices and `compressor` is a function/functor used in the intermediate stages of the multiplication to avoid growring the rank of admissible blocks after addition is performed. """ -function hmul!(C::HMatrix,A::HMatrix,B::HMatrix,a,b,compressor) - b == true || rmul!(C,b) +function hmul!(C::HMatrix, A::HMatrix, B::HMatrix, a, b, compressor) + b == true || rmul!(C, b) @timeit_debug "constructing plan" begin - plan = plan_hmul(C,A,B,a,1) + plan = plan_hmul(C, A, B, a, 1) end @timeit_debug "executing plan" begin - execute!(plan,compressor) + execute!(plan, compressor) end return C end # disable `mul` of hierarchial matrices -function mul!(C::HMatrix,A::HMatrix,B::HMatrix,a::Number,b::Number) +function mul!(C::HMatrix, A::HMatrix, B::HMatrix, a::Number, b::Number) msg = "use `hmul` to multiply hierarchical matrices" - error(msg) + return error(msg) end """ @@ -44,50 +44,50 @@ mutable struct HMulNode{T} multiplier::Float64 end -function HMulNode(C::HMatrix,a::Number) - T = typeof(C) +function HMulNode(C::HMatrix, a::Number) + T = typeof(C) chdC = children(C) - m,n = size(chdC) - HMulNode{T}(C,Matrix{HMulNode{T}}(undef,m,n),T[],a) + m, n = size(chdC) + return HMulNode{T}(C, Matrix{HMulNode{T}}(undef, m, n), T[], a) end -function build_HMulNode_structure(C::HMatrix,a) - node = HMulNode(C,a) +function build_HMulNode_structure(C::HMatrix, a) + node = HMulNode(C, a) chdC = children(C) - m,n = size(chdC) + m, n = size(chdC) for i in 1:m for j in 1:n - child = build_HMulNode_structure(chdC[i,j],a) - node.children[i,j] = child + child = build_HMulNode_structure(chdC[i, j], a) + node.children[i, j] = child end end - node + return node end -function plan_hmul(C::T,A::T,B::T,a,b) where {T<:HMatrix} +function plan_hmul(C::T, A::T, B::T, a, b) where {T<:HMatrix} @assert b == 1 # root = HMulNode(C) - root = build_HMulNode_structure(C,a) + root = build_HMulNode_structure(C, a) # recurse - _build_hmul_tree!(root,A,B) + _build_hmul_tree!(root, A, B) return root end -function _build_hmul_tree!(tree::HMulNode,A::HMatrix,B::HMatrix) +function _build_hmul_tree!(tree::HMulNode, A::HMatrix, B::HMatrix) C = tree.target if isleaf(A) || isleaf(B) || isleaf(C) - push!(tree.sources,(A,B)) + push!(tree.sources, (A, B)) else - ni,nj = blocksize(C) - _ ,nk = blocksize(A) + ni, nj = blocksize(C) + _, nk = blocksize(A) A_children = children(A) B_children = children(B) C_children = children(C) - for i=1:ni - for j=1:nj - child = tree.children[i,j] - for k=1:nk - _build_hmul_tree!(child,A_children[i,k],B_children[k,j]) + for i in 1:ni + for j in 1:nj + child = tree.children[i, j] + for k in 1:nk + _build_hmul_tree!(child, A_children[i, k], B_children[k, j]) end end end @@ -95,27 +95,31 @@ function _build_hmul_tree!(tree::HMulNode,A::HMatrix,B::HMatrix) return tree end -function Base.show(io::IO,::MIME"text/plain",tree::HMulNode) - print(io,"HMulNode with $(size(children(tree))) children and $(length(sources(tree))) pairs") +function Base.show(io::IO, ::MIME"text/plain", tree::HMulNode) + return print(io, + "HMulNode with $(size(children(tree))) children and $(length(sources(tree))) pairs") end -function Base.show(io::IO,tree::HMulNode) - print(io,"HMulNode with $(size(children(tree))) children and $(length(sources(tree))) pairs") +function Base.show(io::IO, tree::HMulNode) + return print(io, + "HMulNode with $(size(children(tree))) children and $(length(sources(tree))) pairs") end -function Base.show(io::IO,::MIME"text/plain",tree::Adjoint{<:Any,<:HMulNode}) +function Base.show(io::IO, ::MIME"text/plain", tree::Adjoint{<:Any,<:HMulNode}) p = parent(tree) - print(io,"adjoint HMulNode with $(size(children(p))) children and $(length(sources(p))) pairs") + return print(io, + "adjoint HMulNode with $(size(children(p))) children and $(length(sources(p))) pairs") end -function Base.show(io::IO,tree::Adjoint{<:Any,<:HMulNode}) +function Base.show(io::IO, tree::Adjoint{<:Any,<:HMulNode}) p = parent(tree) - print(io,"adjoint HMulNode with $(size(children(p))) children and $(length(sources(p))) pairs") + return print(io, + "adjoint HMulNode with $(size(children(p))) children and $(length(sources(p))) pairs") end # compress the operator L = C + ∑ a*Aᵢ*Bᵢ function (paca::PartialACA)(plan::HMulNode) - _aca_partial(plan,:,:,paca.atol,paca.rank,paca.rtol) + return _aca_partial(plan, :, :, paca.atol, paca.rank, paca.rtol) end # getters @@ -125,51 +129,51 @@ multiplier(node::HMulNode) = node.multiplier # Trees interface children(node::HMulNode) = node.children -children(node::HMulNode,idxs...) = node.children[idxs] -parent(node::HMulNode) = node.parent -isleaf(node::HMulNode) = isempty(children(node)) -isroot(node::HMulNode) = parent(node) === node +children(node::HMulNode, idxs...) = node.children[idxs] +parent(node::HMulNode) = node.parent +isleaf(node::HMulNode) = isempty(children(node)) +isroot(node::HMulNode) = parent(node) === node # AbstractMatrix interface Base.size(node::HMulNode) = size(target(node)) Base.eltype(node::HMulNode) = eltype(target(node)) -Base.getindex(node::HMulNode,::Colon,j::Int) = getcol(node,j) +Base.getindex(node::HMulNode, ::Colon, j::Int) = getcol(node, j) -function getcol(node::HMulNode,j) - m,n = size(node) - T = eltype(node) - col = zeros(T,m) - getcol!(col,node,j) +function getcol(node::HMulNode, j) + m, n = size(node) + T = eltype(node) + col = zeros(T, m) + getcol!(col, node, j) return col end -function getcol!(col,node::HMulNode,j) +function getcol!(col, node::HMulNode, j) a = multiplier(node) C = target(node) - m,n = size(C) - T = eltype(C) - ej = zeros(T,n) + m, n = size(C) + T = eltype(C) + ej = zeros(T, n) ej[j] = 1 # compute j-th column of ∑ Aᵢ Bᵢ - for (A,B) in sources(node) - m,k = size(A) - k,n = size(B) - tmp = zeros(T,k) - jg = j + offset(B)[2] # global index on hierarchila matrix B - getcol!(tmp,B,jg) - _hgemv_recursive!(col,A,tmp,offset(A)) + for (A, B) in sources(node) + m, k = size(A) + k, n = size(B) + tmp = zeros(T, k) + jg = j + offset(B)[2] # global index on hierarchila matrix B + getcol!(tmp, B, jg) + _hgemv_recursive!(col, A, tmp, offset(A)) end # multiply by a - rmul!(col,a) + rmul!(col, a) # add the j-th column of C if C has data # jg = j + offset(C)[2] # global index on hierarchila matrix B # cj = getcol(C,jg) # axpy!(1,cj,col) if hasdata(C) - d = data(C) - cj = getcol(d,j) - axpy!(1,cj,col) + d = data(C) + cj = getcol(d, j) + axpy!(1, cj, col) end return col end @@ -178,79 +182,79 @@ adjoint(node::HMulNode) = Adjoint(node) Base.size(adjnode::Adjoint{<:Any,<:HMulNode}) = reverse(size(adjnode.parent)) children(adjnode::Adjoint{<:Any,<:HMulNode}) = adjoint(children(adjnode.parent)) -Base.getindex(adjnode::Adjoint{<:Any,<:HMulNode},::Colon,j::Int) = getcol(adjnode,j) +Base.getindex(adjnode::Adjoint{<:Any,<:HMulNode}, ::Colon, j::Int) = getcol(adjnode, j) -function getcol(adjnode::Adjoint{<:Any,<:HMulNode},j) - m,n = size(adjnode) - T = eltype(adjnode) - col = zeros(T,m) - getcol!(col,adjnode,j) +function getcol(adjnode::Adjoint{<:Any,<:HMulNode}, j) + m, n = size(adjnode) + T = eltype(adjnode) + col = zeros(T, m) + getcol!(col, adjnode, j) return col end -function getcol!(col,adjnode::Adjoint{<:Any,<:HMulNode},j) - node = parent(adjnode) - a = multiplier(node) - C = target(node) - T = eltype(C) - Ct = adjoint(C) - m,n = size(Ct) - ej = zeros(T,n) +function getcol!(col, adjnode::Adjoint{<:Any,<:HMulNode}, j) + node = parent(adjnode) + a = multiplier(node) + C = target(node) + T = eltype(C) + Ct = adjoint(C) + m, n = size(Ct) + ej = zeros(T, n) ej[j] = 1 # compute j-th column of ∑ adjoint(Bᵢ)*adjoint(Aᵢ) - for (A,B) in sources(node) - At,Bt = adjoint(A), adjoint(B) - tmp = zeros(T,size(At,1)) + for (A, B) in sources(node) + At, Bt = adjoint(A), adjoint(B) + tmp = zeros(T, size(At, 1)) # _hgemv_recursive!(tmp,At,ej,offset(At)) - jg = j + offset(At)[2] # global index on hierarchila matrix B - getcol!(tmp,At,jg) - _hgemv_recursive!(col,Bt,tmp,offset(Bt)) + jg = j + offset(At)[2] # global index on hierarchila matrix B + getcol!(tmp, At, jg) + _hgemv_recursive!(col, Bt, tmp, offset(Bt)) end # multiply by a - rmul!(col,conj(a)) + rmul!(col, conj(a)) # add the j-th column of Ct if it has data # jg = j + offset(Ct)[2] # global index on hierarchila matrix B # cj = getcol(Ct,jg) # axpy!(1,cj,col) if hasdata(Ct) - d = data(Ct) - cj = getcol(d,j) - axpy!(1,cj,col) + d = data(Ct) + cj = getcol(d, j) + axpy!(1, cj, col) end return col end -function execute!(node::HMulNode,compressor) - execute_node!(node,compressor) +function execute!(node::HMulNode, compressor) + execute_node!(node, compressor) C = target(node) flush_to_children!(C) # FIXME: using @threads below breaks things when the `RkMatrix` has its own # buffer due to a race condition that I do not yet undestand for chd in children(node) - execute!(chd,compressor) + execute!(chd, compressor) end return node end # non-recursive execution -function execute_node!(node::HMulNode,compressor) +function execute_node!(node::HMulNode, compressor) C = target(node) isempty(sources(node)) && (return node) a = multiplier(node) if isleaf(C) && !isadmissible(C) d = data(C) - for (A,B) in sources(node) + for (A, B) in sources(node) # _mul_leaf!(C,A,B,a,compressor) - _mul_dense!(d,A,B,a) + _mul_dense!(d, A, B, a) end else R = compressor(node) - setdata!(C,R) + setdata!(C, R) end return node end -function _mul_dense!(C::Matrix,A,B,a) +function _mul_dense!(C::Matrix, A, B, a) Adata = isleaf(A) ? A.data : A Bdata = isleaf(B) ? B.data : B if Adata isa HMatrix @@ -285,25 +289,25 @@ Transfer the blocks `data` to its children. At the end, set `H.data` to `nothing """ function flush_to_children!(H::HMatrix) T = eltype(H) - isleaf(H) && (return H) + isleaf(H) && (return H) hasdata(H) || (return H) - R::RkMatrix{T} = data(H) - _add_to_children!(H,R) - setdata!(H,nothing) + R::RkMatrix{T} = data(H) + _add_to_children!(H, R) + setdata!(H, nothing) return H end -function _add_to_children!(H,R::RkMatrix) +function _add_to_children!(H, R::RkMatrix) shift = pivot(H) .- 1 for block in children(H) - irange = rowrange(block) .- shift[1] - jrange = colrange(block) .- shift[2] - bdata = data(block) - tmp = RkMatrix(R.A[irange,:],R.B[jrange,:]) + irange = rowrange(block) .- shift[1] + jrange = colrange(block) .- shift[2] + bdata = data(block) + tmp = RkMatrix(R.A[irange, :], R.B[jrange, :]) if bdata === nothing - setdata!(block,tmp) + setdata!(block, tmp) else - axpy!(true,tmp,bdata) + axpy!(true, tmp, bdata) end end end @@ -317,38 +321,43 @@ Transfer the blocks `data` to its leaves. At the end, set `H.data` to `nothing`. """ function flush_to_leaves!(H::HMatrix) T = eltype(H) - isleaf(H) && (return H) + isleaf(H) && (return H) hasdata(H) || (return H) - R::RkMatrix{T} = data(H) - _add_to_leaves!(H,R) - setdata!(H,nothing) + R::RkMatrix{T} = data(H) + _add_to_leaves!(H, R) + setdata!(H, nothing) return H end -function _add_to_leaves!(H,R::RkMatrix) +function _add_to_leaves!(H, R::RkMatrix) shift = pivot(H) .- 1 for block in AbstractTrees.Leaves(H) - irange = rowrange(block) .- shift[1] - jrange = colrange(block) .- shift[2] - bdata = data(block) - tmp = RkMatrix(R.A[irange,:],R.B[jrange,:]) + irange = rowrange(block) .- shift[1] + jrange = colrange(block) .- shift[2] + bdata = data(block) + tmp = RkMatrix(R.A[irange, :], R.B[jrange, :]) if bdata === nothing - setdata!(block,tmp) + setdata!(block, tmp) else - axpy!(true,tmp,bdata) + axpy!(true, tmp, bdata) end end end -_mul111!(C::Union{Matrix,SubArray,Adjoint},A::Union{Matrix,SubArray,Adjoint},B::Union{Matrix,SubArray,Adjoint},a::Number) = mul!(C,A,B,a,true) +function _mul111!(C::Union{Matrix,SubArray,Adjoint}, A::Union{Matrix,SubArray,Adjoint}, + B::Union{Matrix,SubArray,Adjoint}, a::Number) + return mul!(C, A, B, a, true) +end -function _mul112!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, a::Number) +function _mul112!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Adjoint}, + R::RkMatrix, a::Number) buffer = M * R.A _mul111!(C, buffer, R.Bt, a) return C end -function _mul113!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Adjoint}, H::HMatrix, a::Number) +function _mul113!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Adjoint}, + H::HMatrix, a::Number) T = eltype(C) if hasdata(H) mat = data(H) @@ -361,19 +370,20 @@ function _mul113!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Ad end end for child in children(H) - shift = pivot(H) .- 1 + shift = pivot(H) .- 1 irange = rowrange(child) .- shift[1] jrange = colrange(child) .- shift[2] - Cview = @views C[:, jrange] - Mview = @views M[:, irange] + Cview = @views C[:, jrange] + Mview = @views M[:, irange] _mul113!(Cview, Mview, child, a) end return C end -function _mul121!(C::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, M::Union{Matrix,SubArray,Adjoint}, a::Number) +function _mul121!(C::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, + M::Union{Matrix,SubArray,Adjoint}, a::Number) buffer = R.Bt * M - _mul111!(C, R.A, buffer, a) + return _mul111!(C, R.A, buffer, a) end function _mul122!(C::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, S::RkMatrix, a::Number) @@ -393,7 +403,8 @@ function _mul123!(C::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, H::HMatrix, a: return C end -function _mul131!(C::Union{Matrix,SubArray,Adjoint}, H::HMatrix, M::Union{Matrix,SubArray,Adjoint}, a::Number) +function _mul131!(C::Union{Matrix,SubArray,Adjoint}, H::HMatrix, + M::Union{Matrix,SubArray,Adjoint}, a::Number) if isleaf(H) mat = data(H) if mat isa Matrix @@ -405,21 +416,21 @@ function _mul131!(C::Union{Matrix,SubArray,Adjoint}, H::HMatrix, M::Union{Matrix end end for child in children(H) - shift = pivot(H) .- 1 + shift = pivot(H) .- 1 irange = rowrange(child) .- shift[1] jrange = colrange(child) .- shift[2] - Cview = view(C, irange, :) - Mview = view(M, jrange, :) + Cview = view(C, irange, :) + Mview = view(M, jrange, :) _mul131!(Cview, child, Mview, a) end return C end function _mul132!(C::Union{Matrix,SubArray,Adjoint}, H::HMatrix, R::RkMatrix, a::Number) - T = promote_type(eltype(H),eltype(R)) + T = promote_type(eltype(H), eltype(R)) buffer = zeros(T, size(H, 1), size(R.A, 2)) - _mul131!(buffer,H,R.A,1) - _mul111!(C, buffer, R.Bt, a,) + _mul131!(buffer, H, R.A, 1) + _mul111!(C, buffer, R.Bt, a) return C end @@ -430,18 +441,19 @@ end ############################################################################################ # 1.2.1 -function mul!(y::AbstractVector,R::RkMatrix,x::AbstractVector,a::Number,b::Number) +function mul!(y::AbstractVector, R::RkMatrix, x::AbstractVector, a::Number, b::Number) # tmp = R.Bt*x - tmp = mul!(R.buffer,adjoint(R.B),x) - mul!(y,R.A,tmp,a,b) + tmp = mul!(R.buffer, adjoint(R.B), x) + return mul!(y, R.A, tmp, a, b) end # 1.2.1 -function mul!(y::AbstractVector,adjR::Adjoint{<:Any,<:RkMatrix},x::AbstractVector,a::Number,b::Number) +function mul!(y::AbstractVector, adjR::Adjoint{<:Any,<:RkMatrix}, x::AbstractVector, + a::Number, b::Number) R = parent(adjR) # tmp = R.At*x - tmp = mul!(R.buffer,adjoint(R.A),x) - mul!(y,R.B,tmp,a,b) + tmp = mul!(R.buffer, adjoint(R.A), x) + return mul!(y, R.B, tmp, a, b) end # 1.3.1 @@ -450,25 +462,25 @@ end Perform `y <-- H*x*a + y*b` in place. """ -function mul!(y::AbstractVector,A::HMatrix,x::AbstractVector,a::Number=1,b::Number=0; - global_index=use_global_index(),threads=use_threads()) +function mul!(y::AbstractVector, A::HMatrix, x::AbstractVector, a::Number=1, b::Number=0; + global_index=use_global_index(), threads=use_threads()) # since the HMatrix represents A = inv(Pr)*H*Pc, where Pr and Pc are row and column # permutations, we need first to rewrite C <-- b*C + a*(inv(Pr)*H*Pc)*B as # C <-- inv(Pr)*(b*Pr*C + a*H*(Pc*B)). Following this rewrite, the # multiplication is performed by first defining B <-- Pc*B, and C <-- # Pr*C, doing the multiplication with the permuted entries, and then # permuting the result back C <-- inv(Pr)*C at the end. - ctree = coltree(A) - rtree = rowtree(A) + ctree = coltree(A) + rtree = rowtree(A) # permute input if global_index - x = x[colperm(A)] - y = permute!(y,rowperm(A)) - rmul!(x,a) # multiply in place since this is a new copy, so does not mutate exterior x + x = x[colperm(A)] + y = permute!(y, rowperm(A)) + rmul!(x, a) # multiply in place since this is a new copy, so does not mutate exterior x elseif a != 1 - x = a*x # new copy of x since we should not mutate the external x in mul! + x = a * x # new copy of x since we should not mutate the external x in mul! end - iszero(b) ? fill!(y,zero(eltype(y))) : rmul!(y,b) + iszero(b) ? fill!(y, zero(eltype(y))) : rmul!(y, b) # offset in case A is not indexed starting at (1,1); e.g. A is not the root # of and HMatrix offset = pivot(A) .- 1 @@ -485,22 +497,23 @@ function mul!(y::AbstractVector,A::HMatrix,x::AbstractVector,a::Number=1,b::Numb # Right now the hilbert partition is chosen by default without proper # testing. @timeit_debug "partitioning leaves" begin - haskey(CACHED_PARTITIONS,A) || hilbert_partition(A,Threads.nthreads(),_cost_gemv) + haskey(CACHED_PARTITIONS, A) || + hilbert_partition(A, Threads.nthreads(), _cost_gemv) # haskey(CACHED_PARTITIONS,A) || col_partition(A,Threads.nthreads(),_cost_gemv) # haskey(CACHED_PARTITIONS,A) || row_partition(A,Threads.nthreads(),_cost_gemv) end @timeit_debug "threaded multiplication" begin p = CACHED_PARTITIONS[A] - _hgemv_static_partition!(y,x,p.partition,offset) + _hgemv_static_partition!(y, x, p.partition, offset) # _hgemv_threads!(y,x,p.partition,offset) # threaded implementation end else @timeit_debug "serial multiplication" begin - _hgemv_recursive!(y,A,x,offset) # serial implementation + _hgemv_recursive!(y, A, x, offset) # serial implementation end end # permute output - global_index && invpermute!(y,rowperm(A)) + global_index && invpermute!(y, rowperm(A)) return y end @@ -513,74 +526,75 @@ rowrange(A) - offset[1]` and `J = rowrange(B) - offset[2]`. The `offset` argument is used on the caller side to signal if the original hierarchical matrix had a `pivot` other than `(1,1)`. """ -function _hgemv_recursive!(C::AbstractVector,A::Union{HMatrix,Adjoint{<:Any,<:HMatrix}},B::AbstractVector,offset) +function _hgemv_recursive!(C::AbstractVector, A::Union{HMatrix,Adjoint{<:Any,<:HMatrix}}, + B::AbstractVector, offset) T = eltype(A) if isleaf(A) irange = rowrange(A) .- offset[1] jrange = colrange(A) .- offset[2] - d = data(A) + d = data(A) if T <: SMatrix # FIXME: there is bug with gemv and static arrays, so we convert # them to matrices of n × 1 # (https://github.com/JuliaArrays/StaticArrays.jl/issues/966#issuecomment-943679214). - mul!(view(C,irange,1:1),d,view(B,jrange,1:1),1,1) + mul!(view(C, irange, 1:1), d, view(B, jrange, 1:1), 1, 1) else # C and B are the "global" vectors handled by the caller, so a view # is needed. - mul!(view(C,irange),d,view(B,jrange),1,1) + mul!(view(C, irange), d, view(B, jrange), 1, 1) end else for block in children(A) - _hgemv_recursive!(C,block,B,offset) + _hgemv_recursive!(C, block, B, offset) end end return C end -function _hgemv_threads!(C::AbstractVector,B::AbstractVector,partition,offset) - nt = Threads.nthreads() +function _hgemv_threads!(C::AbstractVector, B::AbstractVector, partition, offset) + nt = Threads.nthreads() # make `nt` copies of C and run in parallel - Cthreads = [zero(C) for _ in 1:nt] + Cthreads = [zero(C) for _ in 1:nt] @sync for p in partition for block in p Threads.@spawn begin id = Threads.threadid() - _hgemv_recursive!(Cthreads[id],block,B,offset) + _hgemv_recursive!(Cthreads[id], block, B, offset) end end end # reduce for Ct in Cthreads - axpy!(1,Ct,C) + axpy!(1, Ct, C) end return C end -function _hgemv_static_partition!(C::AbstractVector,B::AbstractVector,partition,offset) +function _hgemv_static_partition!(C::AbstractVector, B::AbstractVector, partition, offset) # create a lock for the reduction step T = eltype(C) mutex = ReentrantLock() - np = length(partition) - nt = Threads.nthreads() - Cthreads = [zero(C) for _ in 1:nt] + np = length(partition) + nt = Threads.nthreads() + Cthreads = [zero(C) for _ in 1:nt] @sync for n in 1:np Threads.@spawn begin - id = Threads.threadid() + id = Threads.threadid() leaves = partition[n] - Cloc = Cthreads[id] + Cloc = Cthreads[id] for leaf in leaves irange = rowrange(leaf) .- offset[1] jrange = colrange(leaf) .- offset[2] - data = leaf.data + data = leaf.data if T <: SVector - mul!(view(Cloc,irange,1:1),data,view(B,jrange,1:1),1,1) + mul!(view(Cloc, irange, 1:1), data, view(B, jrange, 1:1), 1, 1) else - mul!(view(Cloc,irange),data,view(B,jrange),1,1) + mul!(view(Cloc, irange), data, view(B, jrange), 1, 1) end end # reduction lock(mutex) do - axpy!(1,Cloc,C) + return axpy!(1, Cloc, C) end end end @@ -614,10 +628,10 @@ end A proxy for the computational cost of a matrix/vector product. """ function _cost_gemv(R::RkMatrix) - rank(R)*sum(size(R)) + return rank(R) * sum(size(R)) end function _cost_gemv(M::Matrix) - length(M) + return length(M) end function _cost_gemv(H::HMatrix) acc = 0.0 diff --git a/src/partitions.jl b/src/partitions.jl index d51c605..c581f8f 100644 --- a/src/partitions.jl +++ b/src/partitions.jl @@ -23,8 +23,8 @@ recompute the partition for each matrix/vector product. """ const CACHED_PARTITIONS = WeakKeyDict{HMatrix,Partition}() -Base.hash(A::AbstractHMatrix,h::UInt) = hash(objectid(A),h) -Base.:(==)(A::AbstractHMatrix,B::AbstractHMatrix) = A === B +Base.hash(A::AbstractHMatrix, h::UInt) = hash(objectid(A), h) +Base.:(==)(A::AbstractHMatrix, B::AbstractHMatrix) = A === B """ build_sequence_partition(seq,nq,cost,nmax) @@ -33,7 +33,7 @@ Partition the sequence `seq` into `nq` contiguous subsequences with a maximum of cost of `nmax` per set. Note that if `nmax` is too small, this may not be possible (see [`has_partition`](@ref)). """ -function build_sequence_partition(seq,np,cost,cmax) +function build_sequence_partition(seq, np, cost, cmax) acc = 0 partition = [empty(seq) for _ in 1:np] k = 1 @@ -42,11 +42,11 @@ function build_sequence_partition(seq,np,cost,cmax) acc += c if acc > cmax k += 1 - push!(partition[k],el) + push!(partition[k], el) acc = c @assert k <= np "unable to build sequence partition. Value of `cmax` may be too small." else - push!(partition[k],el) + push!(partition[k], el) end end return partition @@ -59,17 +59,17 @@ Find an approximation to the cost of an optimal partitioning of `seq` into `nq` contiguous segments. The optimal cost is the smallest number `cmax` such that `has_partition(seq,nq,cost,cmax)` returns `true`. """ -function find_optimal_cost(seq,np,cost=identity,tol=1) - lbound = maximum(cost,seq) |> Float64 - ubound = sum(cost,seq) |> Float64 - guess = (lbound+ubound)/2 +function find_optimal_cost(seq, np, cost=identity, tol=1) + lbound = Float64(maximum(cost, seq)) + ubound = Float64(sum(cost, seq)) + guess = (lbound + ubound) / 2 while ubound - lbound ≥ tol - if has_partition(seq,np,guess,cost) + if has_partition(seq, np, guess, cost) ubound = guess else lbound = guess end - guess = (lbound+ubound)/2 + guess = (lbound + ubound) / 2 end return ubound end @@ -85,9 +85,9 @@ which minimizes the maximum `cost` over all possible partitions of `seq` into The generated partition is optimal up to a tolerance `tol`; for integer valued `cost`, setting `tol=1` means the partition is optimal. """ -function find_optimal_partition(seq,np,cost=(x)->1,tol=1) - cmax = find_optimal_cost(seq,np,cost,tol) - p = build_sequence_partition(seq,np,cost,cmax) +function find_optimal_partition(seq, np, cost=(x) -> 1, tol=1) + cmax = find_optimal_cost(seq, np, cost, tol) + p = build_sequence_partition(seq, np, cost, cmax) return p end @@ -97,9 +97,9 @@ end Given a vector `v`, determine whether or not a partition into `np` segments is possible where the `cost` of each partition does not exceed `cmax`. """ -function has_partition(seq,np,cmax,cost=identity) +function has_partition(seq, np, cmax, cost=identity) acc = 0 - k = 1 + k = 1 for el in seq c = cost(el) acc += c @@ -119,63 +119,63 @@ Partiotion the leaves of `H` into `np` sequences of approximate equal cost (as determined by the `cost` function) while also trying to maximize the locality of each partition. """ -function hilbert_partition(H::HMatrix,np=Threads.nthreads(),cost=_cost_gemv) +function hilbert_partition(H::HMatrix, np=Threads.nthreads(), cost=_cost_gemv) # the hilbert curve will be indexed from (0,0) × (N-1,N-1), so set N to be # the smallest power of two larger than max(m,n), where m,n = size(H) - m,n = size(H) - N = max(m,n) - N = nextpow(2,N) + m, n = size(H) + N = max(m, n) + N = nextpow(2, N) # sort the leaves by their hilbert index - leaves = AbstractTrees.Leaves(H) |> collect + leaves = collect(AbstractTrees.Leaves(H)) hilbert_indices = map(leaves) do leaf # use the center of the leaf as a cartesian index - i,j = pivot(leaf) .- 1 .+ size(leaf) .÷ 2 - hilbert_cartesian_to_linear(N,i,j) + i, j = pivot(leaf) .- 1 .+ size(leaf) .÷ 2 + return hilbert_cartesian_to_linear(N, i, j) end p = sortperm(hilbert_indices) - permute!(leaves,p) + permute!(leaves, p) # now compute a quasi-optimal partition of leaves based `cost_mv` - cmax = find_optimal_cost(leaves,np,cost,1) - _partition = build_sequence_partition(leaves,np,cost,cmax) - partition = Partition(_partition,:hilbert) - push!(CACHED_PARTITIONS,H=>partition) + cmax = find_optimal_cost(leaves, np, cost, 1) + _partition = build_sequence_partition(leaves, np, cost, cmax) + partition = Partition(_partition, :hilbert) + push!(CACHED_PARTITIONS, H => partition) return partition end # TODO: benchmark the different partitioning strategies for gemv. Is the hilber # partition really faster than the simpler alternatives (row partition, col partition)? -function row_partition(H::HMatrix,np=Threads.nthreads(),cost=_cost_gemv) +function row_partition(H::HMatrix, np=Threads.nthreads(), cost=_cost_gemv) # sort the leaves by their row index - leaves = filter_tree(x -> isleaf(x),H) + leaves = filter_tree(x -> isleaf(x), H) row_indices = map(leaves) do leaf # use the center of the leaf as a cartesian index - i,j = pivot(leaf) + i, j = pivot(leaf) return i end p = sortperm(row_indices) - permute!(leaves,p) + permute!(leaves, p) # now compute a quasi-optimal partition of leaves based `cost_mv` - cmax = find_optimal_cost(leaves,np,cost,1) - _partition = build_sequence_partition(leaves,np,cost,cmax) - partition = Partition(_partition,:row) - push!(CACHED_PARTITIONS,H=>partition) + cmax = find_optimal_cost(leaves, np, cost, 1) + _partition = build_sequence_partition(leaves, np, cost, cmax) + partition = Partition(_partition, :row) + push!(CACHED_PARTITIONS, H => partition) return partition end -function col_partition(H::HMatrix,np=Threads.nthreads(),cost=_cost_gemv) +function col_partition(H::HMatrix, np=Threads.nthreads(), cost=_cost_gemv) # sort the leaves by their row index - leaves = filter_tree(x -> isleaf(x),H) + leaves = filter_tree(x -> isleaf(x), H) row_indices = map(leaves) do leaf # use the center of the leaf as a cartesian index - i,j = pivot(leaf) + i, j = pivot(leaf) return j end p = sortperm(row_indices) - permute!(leaves,p) + permute!(leaves, p) # now compute a quasi-optimal partition of leaves based `cost_mv` - cmax = find_optimal_cost(leaves,np,cost,1) - _partition = build_sequence_partition(leaves,np,cost,cmax) - partition = Partition(_partition,:col) - push!(CACHED_PARTITIONS,H=>partition) + cmax = find_optimal_cost(leaves, np, cost, 1) + _partition = build_sequence_partition(leaves, np, cost, cmax) + partition = Partition(_partition, :col) + push!(CACHED_PARTITIONS, H => partition) return partition end diff --git a/src/rkmatrix.jl b/src/rkmatrix.jl index de2c889..9f64fc4 100644 --- a/src/rkmatrix.jl +++ b/src/rkmatrix.jl @@ -11,29 +11,29 @@ mutable struct RkMatrix{T} <: AbstractMatrix{T} A::Matrix{T} B::Matrix{T} buffer::Vector{T} # used for gemv routines to avoid allocations - function RkMatrix(A::Matrix{T},B::Matrix{T}) where {T} - @assert size(A,2) == size(B,2) "second dimension of `A` and `B` must match" - m,r = size(A) - n = size(B,1) - if r*(m+n) >= m*n + function RkMatrix(A::Matrix{T}, B::Matrix{T}) where {T} + @assert size(A, 2) == size(B, 2) "second dimension of `A` and `B` must match" + m, r = size(A) + n = size(B, 1) + if r * (m + n) >= m * n @debug "Inefficient RkMatrix:" size(A) size(B) # error("Inefficient RkMatrix") end - buffer = Vector{T}(undef,r) - new{T}(A,B,buffer) + buffer = Vector{T}(undef, r) + return new{T}(A, B, buffer) end end -RkMatrix(A,B) = RkMatrix(promote(A,B)...) +RkMatrix(A, B) = RkMatrix(promote(A, B)...) -function Base.setproperty!(R::RkMatrix,s::Symbol,mat::Matrix) - setfield!(R,s,mat) +function Base.setproperty!(R::RkMatrix, s::Symbol, mat::Matrix) + setfield!(R, s, mat) # resize buffer - r = size(mat,2) - resize!(R.buffer,r) + r = size(mat, 2) + resize!(R.buffer, r) return R end -function Base.getindex(rmat::RkMatrix,i::Int,j::Int) +function Base.getindex(rmat::RkMatrix, i::Int, j::Int) msg = """ method `getindex(::AbstractHMatrix,args...)` has been disabled to avoid performance pitfalls. Unless you made an explicit call to `getindex`, this @@ -44,7 +44,7 @@ function Base.getindex(rmat::RkMatrix,i::Int,j::Int) r = rank(rmat) acc = zero(eltype(rmat)) for k in 1:r - acc += rmat.A[i,k] * conj(rmat.B[j,k]) + acc += rmat.A[i, k] * conj(rmat.B[j, k]) end else error(msg) @@ -53,15 +53,15 @@ function Base.getindex(rmat::RkMatrix,i::Int,j::Int) end # some "fast" ways of computing a column of R and row of ajoint(R) -Base.getindex(R::RkMatrix, ::Colon, j::Int) = getcol(R,j) +Base.getindex(R::RkMatrix, ::Colon, j::Int) = getcol(R, j) """ getcol!(col,M::AbstractMatrix,j) Fill the entries of `col` with column `j` of `M`. """ -function getcol!(col,R::RkMatrix,j::Int) - mul!(col,R.A,conj(view(R.B,j,:))) +function getcol!(col, R::RkMatrix, j::Int) + return mul!(col, R.A, conj(view(R.B, j, :))) end """ @@ -69,26 +69,26 @@ end Return a vector containing the `j`-th column of `M`. """ -function getcol(R::RkMatrix,j::Int) - m = size(R,1) +function getcol(R::RkMatrix, j::Int) + m = size(R, 1) T = eltype(R) - col = zeros(T,m) - getcol!(col,R,j) + col = zeros(T, m) + return getcol!(col, R, j) end -function getcol!(col,Ra::Adjoint{<:Any,<:RkMatrix},j::Int) +function getcol!(col, Ra::Adjoint{<:Any,<:RkMatrix}, j::Int) R = parent(Ra) - mul!(col,R.B,conj(view(R.A,j,:))) + return mul!(col, R.B, conj(view(R.A, j, :))) end -function getcol(Ra::Adjoint{<:Any,<:RkMatrix},j::Int) - m = size(Ra,1) +function getcol(Ra::Adjoint{<:Any,<:RkMatrix}, j::Int) + m = size(Ra, 1) T = eltype(Ra) - col = zeros(T,m) - getcol!(col,Ra,j) + col = zeros(T, m) + return getcol!(col, Ra, j) end -Base.getindex(Ra::Adjoint{<:Any,<:RkMatrix}, ::Colon, j::Int) = getcol(Ra,j) +Base.getindex(Ra::Adjoint{<:Any,<:RkMatrix}, ::Colon, j::Int) = getcol(Ra, j) """ RkMatrix(A::Vector{<:Vector},B::Vector{<:Vector}) @@ -97,35 +97,37 @@ Construct an `RkMatrix` from a vector of vectors. Assumes that `length(A) == length(B)`, which determines the rank, and that all vectors in `A` (resp. `B`) have the same length `m` (resp. `n`). """ -function RkMatrix(_A::Vector{V},_B::Vector{V}) where {V<:AbstractVector} - T = eltype(V) +function RkMatrix(_A::Vector{V}, _B::Vector{V}) where {V<:AbstractVector} + T = eltype(V) @assert length(_A) == length(_B) - k = length(_A) - m = length(first(_A)) - n = length(first(_B)) - A = Matrix{T}(undef,m,k) - B = Matrix{T}(undef,n,k) + k = length(_A) + m = length(first(_A)) + n = length(first(_B)) + A = Matrix{T}(undef, m, k) + B = Matrix{T}(undef, n, k) for i in 1:k - copyto!(view(A,:,i),_A[i]) - copyto!(view(B,:,i),_B[i]) + copyto!(view(A, :, i), _A[i]) + copyto!(view(B, :, i), _B[i]) end - return RkMatrix(A,B) + return RkMatrix(A, B) end Base.eltype(::RkMatrix{T}) where {T} = T -Base.size(rmat::RkMatrix) = (size(rmat.A,1), size(rmat.B,1)) -Base.length(rmat::RkMatrix) = prod(size(rmat)) -Base.isapprox(rmat::RkMatrix,B::AbstractArray,args...;kwargs...) = isapprox(Matrix(rmat),B,args...;kwargs...) +Base.size(rmat::RkMatrix) = (size(rmat.A, 1), size(rmat.B, 1)) +Base.length(rmat::RkMatrix) = prod(size(rmat)) +function Base.isapprox(rmat::RkMatrix, B::AbstractArray, args...; kwargs...) + return isapprox(Matrix(rmat), B, args...; kwargs...) +end -rank(M::RkMatrix) = size(M.A,2) +rank(M::RkMatrix) = size(M.A, 2) -function Base.getproperty(R::RkMatrix,s::Symbol) - if s == :Bt +function Base.getproperty(R::RkMatrix, s::Symbol) + if s == :Bt return adjoint(R.B) - elseif s == :At + elseif s == :At return adjoint(R.A) else - return getfield(R,s) + return getfield(R, s) end end @@ -135,17 +137,18 @@ end Concatenated `M1` and `M2` horizontally to produce a new `RkMatrix` of rank `rank(M1)+rank(M2)`. """ -function Base.hcat(M1::RkMatrix{T},M2::RkMatrix{T}) where {T} - m,n = size(M1) - s,t = size(M2) - (m == s) || throw(ArgumentError("number of rows of each array must match: got ($m,$s)")) - r1 = size(M1.A,2) - r2 = size(M2.A,2) - A = hcat(M1.A,M2.A) - B1 = vcat(M1.B,zeros(T,t,r1)) - B2 = vcat(zeros(T,n,r2),M2.B) - B = hcat(B1,B2) - return RkMatrix(A,B) +function Base.hcat(M1::RkMatrix{T}, M2::RkMatrix{T}) where {T} + m, n = size(M1) + s, t = size(M2) + (m == s) || + throw(ArgumentError("number of rows of each array must match: got ($m,$s)")) + r1 = size(M1.A, 2) + r2 = size(M2.A, 2) + A = hcat(M1.A, M2.A) + B1 = vcat(M1.B, zeros(T, t, r1)) + B2 = vcat(zeros(T, n, r2), M2.B) + B = hcat(B1, B2) + return RkMatrix(A, B) end """ @@ -154,28 +157,29 @@ end Concatenated `M1` and `M2` vertically to produce a new `RkMatrix` of rank `rank(M1)+rank(M2)` """ -function Base.vcat(M1::RkMatrix{T},M2::RkMatrix{T}) where {T} - m,n = size(M1) - s,t = size(M2) - n == t || throw(ArgumentError("number of columns of each array must match (got ($n,$t))")) - r1 = size(M1.A,2) - r2 = size(M2.A,2) - A1 = vcat(M1.A,zeros(T,s,r1)) - A2 = vcat(zeros(T,m,r2),M2.A) - A = hcat(A1,A2) - B = hcat(M1.B,M2.B) - return RkMatrix(A,B) +function Base.vcat(M1::RkMatrix{T}, M2::RkMatrix{T}) where {T} + m, n = size(M1) + s, t = size(M2) + n == t || + throw(ArgumentError("number of columns of each array must match (got ($n,$t))")) + r1 = size(M1.A, 2) + r2 = size(M2.A, 2) + A1 = vcat(M1.A, zeros(T, s, r1)) + A2 = vcat(zeros(T, m, r2), M2.A) + A = hcat(A1, A2) + B = hcat(M1.B, M2.B) + return RkMatrix(A, B) end -Base.copy(R::RkMatrix) = RkMatrix(copy(R.A),copy(R.B)) +Base.copy(R::RkMatrix) = RkMatrix(copy(R.A), copy(R.B)) function Base.Matrix(R::RkMatrix{<:Number}) - Matrix(R.A*R.Bt) + return Matrix(R.A * R.Bt) end function Base.Matrix(R::RkMatrix{<:SMatrix}) # collect must be used when we have a matrix of `SMatrix` because of this issue: # https://github.com/JuliaArrays/StaticArrays.jl/issues/966#issuecomment-943679214 - R.A*collect(R.Bt) + return R.A * collect(R.Bt) end """ @@ -184,9 +188,9 @@ end Construct an [`RkMatrix`](@ref) from an `SVD` factorization. """ function RkMatrix(F::SVD) - A = F.U*Diagonal(F.S) - B = copy(F.V) - return RkMatrix(A,B) + A = F.U * Diagonal(F.S) + B = copy(F.V) + return RkMatrix(A, B) end """ @@ -197,7 +201,7 @@ Construct an [`RkMatrix`](@ref) from a `Matrix` by passing through the full """ function RkMatrix(M::Matrix) F = svd(M) - RkMatrix(F) + return RkMatrix(F) end """ @@ -206,7 +210,7 @@ end The number of entries stored in the representation. Note that this is *not* `length(R)`. """ -num_stored_elements(R::RkMatrix) = size(R.A,2)*(sum(size(R))) +num_stored_elements(R::RkMatrix) = size(R.A, 2) * (sum(size(R))) """ compression_ratio(R::RkMatrix) @@ -222,15 +226,16 @@ compression_ratio(R::RkMatrix) = prod(size(R)) / num_stored_elements(R) # problem in LinearAlgebra for the generic mulplication mul!(C,A,B,a,b) when # C and B are a vectors of static matrices, and A is a matrix of static # matrices. Should eventually be removed. -function mul!(C::AbstractVector,Rk::RkMatrix{T},F::AbstractVector,a::Number,b::Number) where {T<:SMatrix} - m,n = size(Rk) - r = rank(Rk) - tmp = Rk.Bt*F - rmul!(C,b) +function mul!(C::AbstractVector, Rk::RkMatrix{T}, F::AbstractVector, a::Number, + b::Number) where {T<:SMatrix} + m, n = size(Rk) + r = rank(Rk) + tmp = Rk.Bt * F + rmul!(C, b) for k in 1:r tmp[k] *= a for i in 1:m - C[i] += Rk.A[i,k]*tmp[k] + C[i] += Rk.A[i, k] * tmp[k] end end return C diff --git a/src/splitter.jl b/src/splitter.jl index 9687010..04046a1 100644 --- a/src/splitter.jl +++ b/src/splitter.jl @@ -134,7 +134,7 @@ the "left" node) or `false` (point sorted on the "right" node). At the end a minimal `HyperRectangle` containing all left/right points is created. """ function binary_split!(cluster::ClusterTree{N,T}, predicate::Function) where {N,T} - f = predicate + f = predicate rec = container(cluster) els = root_elements(cluster) irange = index_range(cluster) @@ -156,12 +156,12 @@ function binary_split!(cluster::ClusterTree{N,T}, predicate::Function) where {N, else xl_right = min.(xl_right, pt) xu_right = max.(xu_right, pt) - buff[n-npts_right] = l2g[i] + buff[n - npts_right] = l2g[i] npts_right += 1 end end # bounding boxes - left_rec = HyperRectangle(xl_left, xu_left) + left_rec = HyperRectangle(xl_left, xu_left) right_rec = HyperRectangle(xl_right, xu_right) @assert npts_left + npts_right == length(irange) "elements lost during split" # new ranges for children cluster diff --git a/src/triangular.jl b/src/triangular.jl index 902d076..3633f83 100644 --- a/src/triangular.jl +++ b/src/triangular.jl @@ -1,5 +1,5 @@ const HUnitLowerTriangular = UnitLowerTriangular{<:Any,<:HMatrix} -const HUpperTriangular = UpperTriangular{<:Any,<:HMatrix} +const HUpperTriangular = UpperTriangular{<:Any,<:HMatrix} function ldiv!(L::HUnitLowerTriangular, B::AbstractMatrix) H = parent(L) @@ -9,19 +9,19 @@ function ldiv!(L::HUnitLowerTriangular, B::AbstractMatrix) else @assert !hasdata(H) "only leaves are allowed to have data when using `ldiv`!" shift = pivot(H) .- 1 - chdH = children(H) - m, n = size(chdH) + chdH = children(H) + m, n = size(chdH) @assert m === n - for i = 1:m - irows = colrange(chdH[i,i]) .- shift[2] - bi = view(B, irows, :) - for j = 1:(i - 1)# ji - jrows = colrange(chdH[i,j]) .- shift[2] - bj = view(B, jrows, :) - _mul131!(bi, chdH[i,j], bj, -1) + for i in m:-1:1 + irows = colrange(chdH[i, i]) .- shift[2] + bi = view(B, irows, :) + for j in (i + 1):n # j>i + jrows = colrange(chdH[i, j]) .- shift[2] + bj = view(B, jrows, :) + _mul131!(bi, chdH[i, j], bj, -1) end # recursion stage - ldiv!(UpperTriangular(chdH[i,i]), bi) + ldiv!(UpperTriangular(chdH[i, i]), bi) end end return B @@ -93,20 +93,20 @@ function rdiv!(B::StridedMatrix, U::HUpperTriangular) rdiv!(B, UpperTriangular(d)) # b <-- b/L else @assert !hasdata(H) "only leaves are allowed to have data when using `rdiv`!" - shift = pivot(H) .- 1 |> reverse - chdH = children(H) - m, n = size(chdH) + shift = reverse(pivot(H) .- 1) + chdH = children(H) + m, n = size(chdH) @assert m === n - for i = 1:m - icols = rowrange(chdH[i,i]) .- shift[1] - bi = view(B, :, icols) - for j = 1:(i - 1) - jcols = rowrange(chdH[j,i]) .- shift[1] - bj = view(B, :, jcols) - _mul113!(bi, bj, chdH[j,i], -1) + for i in 1:m + icols = rowrange(chdH[i, i]) .- shift[1] + bi = view(B, :, icols) + for j in 1:(i - 1) + jcols = rowrange(chdH[j, i]) .- shift[1] + bj = view(B, :, jcols) + _mul113!(bi, bj, chdH[j, i], -1) end # recursion stage - rdiv!(bi, UpperTriangular(chdH[i,i])) + rdiv!(bi, UpperTriangular(chdH[i, i])) end end return B @@ -135,13 +135,13 @@ function rdiv!(X::AbstractHMatrix, U::HUpperTriangular, compressor) chdH = children(H) m, n = size(chdH) for k in 1:size(chdX, 1) - for i = 1:m - for j = 1:(i - 1) + for i in 1:m + for j in 1:(i - 1) @timeit_debug "hmul!" begin - hmul!(chdX[k,i], chdX[k,j], chdH[j,i], -1, 1, compressor) + hmul!(chdX[k, i], chdX[k, j], chdH[j, i], -1, 1, compressor) end end - rdiv!(chdX[k,i], UpperTriangular(chdH[i,i]), compressor) + rdiv!(chdX[k, i], UpperTriangular(chdH[i, i]), compressor) end end end diff --git a/src/utils.jl b/src/utils.jl index f9750b2..ad2a54c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -30,18 +30,18 @@ struct PermutedMatrix{K,T} <: AbstractMatrix{T} data::K # original matrix rowperm::Vector{Int} colperm::Vector{Int} - function PermutedMatrix(orig,rowperm,colperm) + function PermutedMatrix(orig, rowperm, colperm) K = typeof(orig) T = eltype(orig) - new{K,T}(orig,rowperm,colperm) + return new{K,T}(orig, rowperm, colperm) end end Base.size(M::PermutedMatrix) = size(M.data) -function Base.getindex(M::PermutedMatrix,i,j) +function Base.getindex(M::PermutedMatrix, i, j) ip = M.rowperm[i] jp = M.colperm[j] - M.data[ip,jp] + return M.data[ip, jp] end """ @@ -53,20 +53,20 @@ output `d` ranges from `0` to `n^2-1`. See [https://en.wikipedia.org/wiki/Hilbert_curve](https://en.wikipedia.org/wiki/Hilbert_curve). """ -function hilbert_cartesian_to_linear(n::Integer,x,y) +function hilbert_cartesian_to_linear(n::Integer, x, y) @assert ispow2(n) - @assert 0 ≤ x ≤ n-1 - @assert 0 ≤ y ≤ n-1 + @assert 0 ≤ x ≤ n - 1 + @assert 0 ≤ y ≤ n - 1 d = 0 s = n >> 1 - while s>0 + while s > 0 rx = (x & s) > 0 ry = (y & s) > 0 - d += s^2*((3*rx)⊻ry) - x,y = _rot(n,x,y,rx,ry) + d += s^2 * ((3 * rx) ⊻ ry) + x, y = _rot(n, x, y, rx, ry) s = s >> 1 end - @assert 0 ≤ d ≤ n^2-1 + @assert 0 ≤ d ≤ n^2 - 1 return d end @@ -78,47 +78,47 @@ n-1` and `0 ≤ y ≤ n-1` on the Hilbert curve of order `n`. See [https://en.wikipedia.org/wiki/Hilbert_curve](https://en.wikipedia.org/wiki/Hilbert_curve). """ -function hilbert_linear_to_cartesian(n::Integer,d) +function hilbert_linear_to_cartesian(n::Integer, d) @assert ispow2(n) - @assert 0 ≤ d ≤ n^2-1 - x,y = 0,0 + @assert 0 ≤ d ≤ n^2 - 1 + x, y = 0, 0 s = 1 - while s> 1) ry = 1 & (d ⊻ rx) - x,y = _rot(s,x,y,rx,ry) - x += s*rx - y += s*ry + x, y = _rot(s, x, y, rx, ry) + x += s * rx + y += s * ry d = d >> 2 s = s << 1 end - @assert 0 ≤ x ≤ n-1 - @assert 0 ≤ y ≤ n-1 - return x,y + @assert 0 ≤ x ≤ n - 1 + @assert 0 ≤ y ≤ n - 1 + return x, y end # auxiliary function using in hilbert curve. Rotates the points x,y -function _rot(n,x,y,rx,ry) +function _rot(n, x, y, rx, ry) if ry == 0 if rx == 1 - x = n-1 - x - y = n-1 - y + x = n - 1 - x + y = n - 1 - y end - x,y = y,x + x, y = y, x end - return x,y + return x, y end function hilbert_points(n::Integer) @assert ispow2(n) xx = Int[] yy = Int[] - for d in 0:n^2-1 - x,y = hilbert_linear_to_cartesian(n,d) - push!(xx,x) - push!(yy,y) + for d in 0:(n^2 - 1) + x, y = hilbert_linear_to_cartesian(n, d) + push!(xx, x) + push!(yy, y) end - xx,yy + return xx, yy end """ @@ -136,7 +136,7 @@ disable_getindex() = (ALLOW_GETINDEX[] = false) The opposite of [`disable_getindex`](@ref). """ -enable_getindex() = (ALLOW_GETINDEX[] = true) +enable_getindex() = (ALLOW_GETINDEX[] = true) """ filter_tree(f,tree,isterminal=true) diff --git a/test/addition_test.jl b/test/addition_test.jl index 97c5f39..adb52d7 100644 --- a/test/addition_test.jl +++ b/test/addition_test.jl @@ -5,36 +5,36 @@ using Random using StaticArrays using HMatrices: RkMatrix -include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl")) +include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl")) -N,r = 500,3 -atol = 1e-6 -X = Y = rand(SVector{3,Float64},N) -splitter = CardinalitySplitter(nmax=50) -Xclt = Yclt = ClusterTree(X,splitter) -adm = StrongAdmissibilityStd(eta=3) -rtol = 1e-5 -comp = PartialACA(rtol=rtol) -K = helmholtz_matrix(X,Y,1.3) -H = assemble_hmat(K,Xclt,Yclt;adm,comp,threads=false,distributed=false) -T = eltype(H) -_H = Matrix(H;global_index=false) -R = RkMatrix(rand(T,N,r),rand(T,N,r)) -_R = Matrix(R) -M = rand(T,N,N) -_M = Matrix(M) -a = rand() +N, r = 500, 3 +atol = 1e-6 +X = Y = rand(SVector{3,Float64}, N) +splitter = CardinalitySplitter(; nmax=50) +Xclt = Yclt = ClusterTree(X, splitter) +adm = StrongAdmissibilityStd(; eta=3) +rtol = 1e-5 +comp = PartialACA(; rtol=rtol) +K = helmholtz_matrix(X, Y, 1.3) +H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads=false, distributed=false) +T = eltype(H) +_H = Matrix(H; global_index=false) +R = RkMatrix(rand(T, N, r), rand(T, N, r)) +_R = Matrix(R) +M = rand(T, N, N) +_M = Matrix(M) +a = rand() @testset "axpy!" begin - @test axpy!(a,M,deepcopy(R)) ≈ a*_M + _R - tmp = axpy!(a,M,deepcopy(H)) - @test Matrix(tmp;global_index=false) ≈ a*_M + _H - @test axpy!(a,R,deepcopy(M)) ≈ a*_R + _M - @test axpy!(a,R,deepcopy(R)) ≈ a*_R + _R - tmp = axpy!(a,R,deepcopy(H)) - Matrix(tmp;global_index=false) ≈ a*_R + _H - @test axpy!(a,H,deepcopy(M)) ≈ a*_H + _M - @test axpy!(a,H,deepcopy(R)) ≈ a*_H + _R - tmp = axpy!(a,H,deepcopy(H)) - @test Matrix(tmp;global_index=false) ≈ a*_H + _H + @test axpy!(a, M, deepcopy(R)) ≈ a * _M + _R + tmp = axpy!(a, M, deepcopy(H)) + @test Matrix(tmp; global_index=false) ≈ a * _M + _H + @test axpy!(a, R, deepcopy(M)) ≈ a * _R + _M + @test axpy!(a, R, deepcopy(R)) ≈ a * _R + _R + tmp = axpy!(a, R, deepcopy(H)) + Matrix(tmp; global_index=false) ≈ a * _R + _H + @test axpy!(a, H, deepcopy(M)) ≈ a * _H + _M + @test axpy!(a, H, deepcopy(R)) ≈ a * _H + _R + tmp = axpy!(a, H, deepcopy(H)) + @test Matrix(tmp; global_index=false) ≈ a * _H + _H end diff --git a/test/compressor_test.jl b/test/compressor_test.jl index 7fc3c85..aafa3e7 100644 --- a/test/compressor_test.jl +++ b/test/compressor_test.jl @@ -4,142 +4,141 @@ using StaticArrays using LinearAlgebra using HMatrices: ACA, PartialACA, TSVD, RkMatrix -include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl")) +include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl")) using Random Random.seed!(1) @testset "Scalar" begin T = ComplexF64 - m,n = 100,100 - X = rand(SVector{3,Float64},m) - Y = map(i->SVector(10,0,0) + rand(SVector{3,Float64}),1:n) - K = helmholtz_matrix(X,Y,1.0) + m, n = 100, 100 + X = rand(SVector{3,Float64}, m) + Y = map(i -> SVector(10, 0, 0) + rand(SVector{3,Float64}), 1:n) + K = helmholtz_matrix(X, Y, 1.0) M = Matrix(K) - irange,jrange = 1:m,1:n + irange, jrange = 1:m, 1:n @testset "aca_full" begin atol = 1e-5 - aca = ACA(atol=atol) - R = aca(K,irange,jrange) + aca = ACA(; atol=atol) + R = aca(K, irange, jrange) @test norm(Matrix(R) - M) < atol rtol = 1e-5 - aca = ACA(rtol=rtol) - R = aca(M,irange,jrange) + aca = ACA(; rtol=rtol) + R = aca(M, irange, jrange) norm(Matrix(R) - M) - @test norm(Matrix(R) - M) < rtol*norm(M) + @test norm(Matrix(R) - M) < rtol * norm(M) r = 10 - aca = ACA(rank=r) - R = aca(M,irange,jrange) + aca = ACA(; rank=r) + R = aca(M, irange, jrange) @test rank(R) == r end @testset "aca_partial" begin atol = 1e-5 - aca = PartialACA(atol=atol) - R = aca(M,irange,jrange) + aca = PartialACA(; atol=atol) + R = aca(M, irange, jrange) @test norm(Matrix(R) - M) < atol rtol = 1e-5 - aca = PartialACA(rtol=rtol) - R = aca(M,irange,jrange) - @test norm(Matrix(R) - M) < rtol*norm(M) + aca = PartialACA(; rtol=rtol) + R = aca(M, irange, jrange) + @test norm(Matrix(R) - M) < rtol * norm(M) r = 10 - aca = PartialACA(rank=r) - R = aca(M,irange,jrange) + aca = PartialACA(; rank=r) + R = aca(M, irange, jrange) @test rank(R) == r # test fast update of frobenius norm - m,n = 10000,1000 + m, n = 10000, 1000 r = 10 T = ComplexF64 - A = [rand(T,m) for _ in 1:r] - B = [rand(T,n) for _ in 1:r] - R = RkMatrix(A,B) - old_norm = norm(Matrix(R),2) - push!(A,rand(T,m)) - push!(B,rand(T,n)) - Rnew = RkMatrix(A,B) - new_norm = norm(Matrix(Rnew),2) - @test new_norm ≈ HMatrices._update_frob_norm(old_norm,A,B) + A = [rand(T, m) for _ in 1:r] + B = [rand(T, n) for _ in 1:r] + R = RkMatrix(A, B) + old_norm = norm(Matrix(R), 2) + push!(A, rand(T, m)) + push!(B, rand(T, n)) + Rnew = RkMatrix(A, B) + new_norm = norm(Matrix(Rnew), 2) + @test new_norm ≈ HMatrices._update_frob_norm(old_norm, A, B) # test simple case where things are not compressible - A = rand(2,2) - comp = PartialACA(rtol=1e-5) - @test comp(A,1:2,1:2) ≈ A + A = rand(2, 2) + comp = PartialACA(; rtol=1e-5) + @test comp(A, 1:2, 1:2) ≈ A end @testset "truncated svd" begin atol = 1e-5 - tsvd = TSVD(atol=atol) - R = tsvd(M,irange,jrange) + tsvd = TSVD(; atol=atol) + R = tsvd(M, irange, jrange) # the inequality below is guaranteed to be true for the spectral norm # i.e. (the `opnorm` with `p=2`). @test opnorm(Matrix(R) - M) < atol rtol = 1e-5 - tsvd = TSVD(rtol=rtol) - R = tsvd(M,irange,jrange) - @test opnorm(Matrix(R) - M) < rtol*opnorm(M) + tsvd = TSVD(; rtol=rtol) + R = tsvd(M, irange, jrange) + @test opnorm(Matrix(R) - M) < rtol * opnorm(M) r = 10 - tsvd = TSVD(rank=r) - R = tsvd(M,irange,jrange) + tsvd = TSVD(; rank=r) + R = tsvd(M, irange, jrange) @test rank(R) == r end end - @testset "Tensorial" begin T = SMatrix{3,3,ComplexF64,9} # T = SMatrix{3,3,Float64,9} - m,n = 100,100 - X = rand(SVector{3,Float64},m) - Y = map(i->SVector(10,0,0) + rand(SVector{3,Float64}),1:n) - K = elastosdynamic_matrix(X,Y,1.0,2.0,1.0,1.0) + m, n = 100, 100 + X = rand(SVector{3,Float64}, m) + Y = map(i -> SVector(10, 0, 0) + rand(SVector{3,Float64}), 1:n) + K = elastosdynamic_matrix(X, Y, 1.0, 2.0, 1.0, 1.0) # K = ElastostaticMatrix(X,Y,1.0,2.0) M = Matrix(K) - irange,jrange = 1:m,1:n + irange, jrange = 1:m, 1:n @testset "aca_full" begin atol = 1e-5 - aca = ACA(atol=atol) - R = aca(K,irange,jrange); + aca = ACA(; atol=atol) + R = aca(K, irange, jrange) @test norm(Matrix(R) - M) < atol rtol = 1e-5 - aca = ACA(rtol=rtol) - R = aca(M,irange,jrange) + aca = ACA(; rtol=rtol) + R = aca(M, irange, jrange) norm(Matrix(R) - M) - @test norm(Matrix(R) - M) < rtol*norm(M) + @test norm(Matrix(R) - M) < rtol * norm(M) r = 10 - aca = ACA(rank=r) - R = aca(M,irange,jrange); + aca = ACA(; rank=r) + R = aca(M, irange, jrange) @test rank(R) == r end @testset "aca_partial" begin atol = 1e-5 - aca = PartialACA(atol=atol) - R = aca(M,irange,jrange) + aca = PartialACA(; atol=atol) + R = aca(M, irange, jrange) @test norm(Matrix(R) - M) < atol rtol = 1e-5 - aca = PartialACA(rtol=rtol) - R = aca(M,irange,jrange) - @test norm(Matrix(R) - M) < rtol*norm(M) + aca = PartialACA(; rtol=rtol) + R = aca(M, irange, jrange) + @test norm(Matrix(R) - M) < rtol * norm(M) r = 10 - aca = PartialACA(rank=r) - R = aca(M,irange,jrange); + aca = PartialACA(; rank=r) + R = aca(M, irange, jrange) @test rank(R) == r # test fast update of frobenius norm - m,n = 10000,1000 + m, n = 10000, 1000 r = 10 T = ComplexF64 - A = [rand(T,m) for _ in 1:r] - B = [rand(T,n) for _ in 1:r] - R = RkMatrix(A,B) - old_norm = norm(Matrix(R),2) - push!(A,rand(T,m)) - push!(B,rand(T,n)) - Rnew = RkMatrix(A,B) - new_norm = norm(Matrix(Rnew),2) - @test new_norm ≈ HMatrices._update_frob_norm(old_norm,A,B) + A = [rand(T, m) for _ in 1:r] + B = [rand(T, n) for _ in 1:r] + R = RkMatrix(A, B) + old_norm = norm(Matrix(R), 2) + push!(A, rand(T, m)) + push!(B, rand(T, n)) + Rnew = RkMatrix(A, B) + new_norm = norm(Matrix(Rnew), 2) + @test new_norm ≈ HMatrices._update_frob_norm(old_norm, A, B) # test simple case where things are not compressible - A = rand(2,2) - comp = PartialACA(rtol=1e-5) - @test comp(A,1:2,1:2) ≈ A + A = rand(2, 2) + comp = PartialACA(; rtol=1e-5) + @test comp(A, 1:2, 1:2) ≈ A end end diff --git a/test/dhmatrix_test.jl b/test/dhmatrix_test.jl index 48ec243..e96a658 100644 --- a/test/dhmatrix_test.jl +++ b/test/dhmatrix_test.jl @@ -4,21 +4,21 @@ addprocs(4; exeflags=`--project=$(Base.active_project())`) using Test using StaticArrays @everywhere using HMatrices -@everywhere include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl")) +@everywhere include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl")) @testset "Assemble" begin - m,n = 10_000,10_000 - X = Y = points_on_cylinder(1,m) - splitter = CardinalitySplitter(nmax=100) - Xclt = Yclt = ClusterTree(X,splitter) - adm = StrongAdmissibilityStd(eta=3) - atol = 1e-6 - comp = PartialACA(;atol) + m, n = 10_000, 10_000 + X = Y = points_on_cylinder(1, m) + splitter = CardinalitySplitter(; nmax=100) + Xclt = Yclt = ClusterTree(X, splitter) + adm = StrongAdmissibilityStd(; eta=3) + atol = 1e-6 + comp = PartialACA(; atol) # Laplace - K = laplace_matrix(X,Y) - H = assemble_hmat(K,Xclt,Yclt;adm,comp,distributed=false,threads=true) - Hd = assemble_hmat(K,Xclt,Yclt;adm,comp,distributed=true,threads=false) + K = laplace_matrix(X, Y) + H = assemble_hmat(K, Xclt, Yclt; adm, comp, distributed=false, threads=true) + Hd = assemble_hmat(K, Xclt, Yclt; adm, comp, distributed=true, threads=false) x = rand(n) y = rand(m) - @test H*x ≈ Hd*x + @test H * x ≈ Hd * x end diff --git a/test/hmatrix_test.jl b/test/hmatrix_test.jl index fd2b709..3000d46 100644 --- a/test/hmatrix_test.jl +++ b/test/hmatrix_test.jl @@ -3,38 +3,38 @@ using StaticArrays using HMatrices using LinearAlgebra -include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl")) +include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl")) @testset "Assemble" begin - m,n = 1_000,1_000 - X = Y = rand(SVector{3,Float64},m) - splitter = CardinalitySplitter(nmax=20) - Xclt = Yclt = ClusterTree(X,splitter) - adm = StrongAdmissibilityStd(eta=3) - rtol = 1e-5 - comp = PartialACA(rtol=rtol) + m, n = 1_000, 1_000 + X = Y = rand(SVector{3,Float64}, m) + splitter = CardinalitySplitter(; nmax=20) + Xclt = Yclt = ClusterTree(X, splitter) + adm = StrongAdmissibilityStd(; eta=3) + rtol = 1e-5 + comp = PartialACA(; rtol=rtol) # Laplace - for threads in (true,false) - K = laplace_matrix(X,Y) - H = assemble_hmat(K,Xclt,Yclt;adm,comp,threads) - @test norm(Matrix(K) - Matrix(H;global_index=true)) < rtol*norm(Matrix(K)) - H = assemble_hmat(K;threads,distributed=false) - @test norm(Matrix(K) - Matrix(H;global_index=true)) < rtol*norm(Matrix(K)) - adjH = adjoint(H) - H_full = Matrix(H;global_index=false) + for threads in (true, false) + K = laplace_matrix(X, Y) + H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads) + @test norm(Matrix(K) - Matrix(H; global_index=true)) < rtol * norm(Matrix(K)) + H = assemble_hmat(K; threads, distributed=false) + @test norm(Matrix(K) - Matrix(H; global_index=true)) < rtol * norm(Matrix(K)) + adjH = adjoint(H) + H_full = Matrix(H; global_index=false) adjH_full = adjoint(H_full) @testset "getindex" begin HMatrices.enable_getindex() - @test H_full[8,64] ≈ H[8,64] + @test H_full[8, 64] ≈ H[8, 64] HMatrices.disable_getindex() - @test_throws ErrorException H_full[8,64] ≈ H[8,64] + @test_throws ErrorException H_full[8, 64] ≈ H[8, 64] HMatrices.enable_getindex() - @test H_full[:,666] ≈ H[:,666] - @test adjH_full[:,666] ≈ adjH[:,666] + @test H_full[:, 666] ≈ H[:, 666] + @test adjH_full[:, 666] ≈ adjH[:, 666] end # Elastostatic - K = elastostatic_matrix(X,Y,1.1,1.2) - H = assemble_hmat(K,Xclt,Yclt;adm,comp,threads) - @test norm(Matrix(K) - Matrix(H;global_index=true)) < rtol*norm(Matrix(K)) + K = elastostatic_matrix(X, Y, 1.1, 1.2) + H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads) + @test norm(Matrix(K) - Matrix(H; global_index=true)) < rtol * norm(Matrix(K)) end end diff --git a/test/hyperrectangle_test.jl b/test/hyperrectangle_test.jl index 3fdc82a..ef39b67 100644 --- a/test/hyperrectangle_test.jl +++ b/test/hyperrectangle_test.jl @@ -26,7 +26,8 @@ using StaticArrays push!(pts, SVector(x, y)) end end - @test HMatrices.bounding_box(pts) == HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 1.0)) + @test HMatrices.bounding_box(pts) == + HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 1.0)) @test HMatrices.bounding_box(pts, true) == HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 1.0)) pts = SVector{2,Float64}[] @@ -35,7 +36,8 @@ using StaticArrays push!(pts, SVector(x, y)) end end - @test HMatrices.bounding_box(pts) == HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 2.0)) + @test HMatrices.bounding_box(pts) == + HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 2.0)) @test HMatrices.bounding_box(pts, true) == HMatrices.HyperRectangle(SVector(-1.5, -1), SVector(3 / 2, 2.0)) rec1 = HMatrices.HyperRectangle(SVector(0, 0), SVector(1, 1)) diff --git a/test/lu_test.jl b/test/lu_test.jl index 5294864..2af2c73 100644 --- a/test/lu_test.jl +++ b/test/lu_test.jl @@ -6,29 +6,29 @@ using StaticArrays using HMatrices: RkMatrix -include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl")) +include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl")) Random.seed!(1) m = 5000 T = Float64 -X = points_on_sphere(m) -Y = X +X = points_on_sphere(m) +Y = X -K = laplace_matrix(X,X) +K = laplace_matrix(X, X) -splitter = CardinalitySplitter(nmax=50) -Xclt = ClusterTree(X,splitter) -Yclt = ClusterTree(Y,splitter) +splitter = CardinalitySplitter(; nmax=50) +Xclt = ClusterTree(X, splitter) +Yclt = ClusterTree(Y, splitter) adm = StrongAdmissibilityStd(3) -comp = PartialACA(;atol=1e-10); -H = assemble_hmat(K,Xclt,Yclt;adm,comp,threads=false,distributed=false) -H_full = Matrix(H) +comp = PartialACA(; atol=1e-10); +H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads=false, distributed=false) +H_full = Matrix(H) ## -hlu = lu(H;atol=1e-5) -y = rand(m) -M = Matrix(K) -exact = M\y -approx = hlu\y -@test norm(exact - approx,Inf) < 1e-6 +hlu = lu(H; atol=1e-5) +y = rand(m) +M = Matrix(K) +exact = M \ y +approx = hlu \ y +@test norm(exact - approx, Inf) < 1e-6 diff --git a/test/multiplication_test.jl b/test/multiplication_test.jl index 495bc40..d973b6b 100755 --- a/test/multiplication_test.jl +++ b/test/multiplication_test.jl @@ -6,59 +6,58 @@ using StaticArrays using HMatrices: RkMatrix -include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl")) +include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl")) Random.seed!(1) m = 2000 n = 2000 -X = rand(SVector{3,Float64},m) -Y = [rand(SVector{3,Float64}) for _ in 1:n] -splitter = CardinalitySplitter(nmax=40) -Xclt = ClusterTree(X,splitter) -Yclt = ClusterTree(Y,splitter) -adm = StrongAdmissibilityStd(eta=3) -rtol = 1e-5 -comp = PartialACA(rtol=rtol) -K = laplace_matrix(X,Y) -H = assemble_hmat(K,Xclt,Yclt;adm,comp,threads=false,distributed=false) +X = rand(SVector{3,Float64}, m) +Y = [rand(SVector{3,Float64}) for _ in 1:n] +splitter = CardinalitySplitter(; nmax=40) +Xclt = ClusterTree(X, splitter) +Yclt = ClusterTree(Y, splitter) +adm = StrongAdmissibilityStd(; eta=3) +rtol = 1e-5 +comp = PartialACA(; rtol=rtol) +K = laplace_matrix(X, Y) +H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads=false, distributed=false) -H_full = Matrix(H;global_index=false) +H_full = Matrix(H; global_index=false) α = rand() - 0.5 β = rand() - 0.5 @testset "hmul!" begin - C = deepcopy(H); - tmp = β*H_full + α*H_full*H_full - HMatrices.hmul!(C,H,H,α,β,PartialACA(;atol=1e-6)) - @test Matrix(C;global_index=false) ≈ tmp + C = deepcopy(H) + tmp = β * H_full + α * H_full * H_full + HMatrices.hmul!(C, H, H, α, β, PartialACA(; atol=1e-6)) + @test Matrix(C; global_index=false) ≈ tmp end @testset "gemv" begin - - α = rand()-0.5 - β = rand()-0.5 + α = rand() - 0.5 + β = rand() - 0.5 T = eltype(H) - m,n = size(H) - x = rand(T,n) - y = rand(T,m) + m, n = size(H) + x = rand(T, n) + y = rand(T, m) @testset "serial" begin - exact = β*y + α*H_full*x - approx = mul!(copy(y),H,x,α,β;threads=false,global_index=false) + exact = β * y + α * H_full * x + approx = mul!(copy(y), H, x, α, β; threads=false, global_index=false) @test exact ≈ approx - exact = β*y + α*Matrix(H;global_index=true)*x - approx = mul!(copy(y),H,x,α,β;threads=false,global_index=true) + exact = β * y + α * Matrix(H; global_index=true) * x + approx = mul!(copy(y), H, x, α, β; threads=false, global_index=true) @test exact ≈ approx end @testset "threads" begin - exact = β*y + α*H_full*x - approx = mul!(copy(y),H,x,α,β;threads=true,global_index=false) + exact = β * y + α * H_full * x + approx = mul!(copy(y), H, x, α, β; threads=true, global_index=false) @test exact ≈ approx - exact = β*y + α*Matrix(H;global_index=true)*x - approx = mul!(copy(y),H,x,α,β;threads=false,global_index=true) + exact = β * y + α * Matrix(H; global_index=true) * x + approx = mul!(copy(y), H, x, α, β; threads=false, global_index=true) @test exact ≈ approx end end diff --git a/test/rkmatrix_test.jl b/test/rkmatrix_test.jl index 05d63c8..f65833a 100644 --- a/test/rkmatrix_test.jl +++ b/test/rkmatrix_test.jl @@ -9,20 +9,20 @@ using HMatrices: RkMatrix, compression_ratio m = 20 n = 30 r = 5 - A = rand(ComplexF64,m,r) - B = rand(ComplexF64,n,r) - R = RkMatrix(A,B); - Ra = adjoint(R); - M = A*adjoint(B) - Ma = adjoint(M) + A = rand(ComplexF64, m, r) + B = rand(ComplexF64, n, r) + R = RkMatrix(A, B) + Ra = adjoint(R) + M = A * adjoint(B) + Ma = adjoint(M) ## basic tests - @test size(R) == (m,n) + @test size(R) == (m, n) @test rank(R) == r @test Matrix(R) ≈ M - @test compression_ratio(R) ≈ m*n / (r*(m+n)) - @test R[:,5] ≈ M[:,5] - @test Ra[:,5] ≈ Ma[:,5] + @test compression_ratio(R) ≈ m * n / (r * (m + n)) + @test R[:, 5] ≈ M[:, 5] + @test Ra[:, 5] ≈ Ma[:, 5] end @testset "Matrix entries" begin @@ -30,12 +30,12 @@ using HMatrices: RkMatrix, compression_ratio n = 30 r = 10 T = SMatrix{3,3,ComplexF64,9} - A = rand(T,m,r) - B = rand(T,n,r) - R = RkMatrix(A,B) + A = rand(T, m, r) + B = rand(T, n, r) + R = RkMatrix(A, B) ## basic tests - @test size(R) == (m,n) + @test size(R) == (m, n) @test rank(R) == r - @test compression_ratio(R) ≈ m*n / (r*(m+n)) + @test compression_ratio(R) ≈ m * n / (r * (m + n)) end end diff --git a/test/runtests.jl b/test/runtests.jl index 5d43f48..0f245e2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,23 +1,43 @@ using SafeTestsets -@safetestset "Utils" begin include("utils_test.jl") end +@safetestset "Utils" begin + include("utils_test.jl") +end -@safetestset "HyperRectangle" begin include("hyperrectangle_test.jl") end +@safetestset "HyperRectangle" begin + include("hyperrectangle_test.jl") +end -@safetestset "ClusterTree" begin include("clustertree_test.jl") end +@safetestset "ClusterTree" begin + include("clustertree_test.jl") +end -@safetestset "RkMatrix" begin include("rkmatrix_test.jl") end +@safetestset "RkMatrix" begin + include("rkmatrix_test.jl") +end -@safetestset "Compressors" begin include("compressor_test.jl") end +@safetestset "Compressors" begin + include("compressor_test.jl") +end -@safetestset "HMatrix" begin include("hmatrix_test.jl") end +@safetestset "HMatrix" begin + include("hmatrix_test.jl") +end -@safetestset "Addition" begin include("addition_test.jl") end +@safetestset "Addition" begin + include("addition_test.jl") +end -@safetestset "Multiplication" begin include("multiplication_test.jl") end +@safetestset "Multiplication" begin + include("multiplication_test.jl") +end -@safetestset "Triangular" begin include("triangular_test.jl") end +@safetestset "Triangular" begin + include("triangular_test.jl") +end -@safetestset "LU" begin include("lu_test.jl") end +@safetestset "LU" begin + include("lu_test.jl") +end # @safetestset "DHMatrix" begin include("dhmatrix_test.jl") end diff --git a/test/testutils.jl b/test/testutils.jl index da4f10c..23af927 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -12,8 +12,8 @@ function laplace_matrix(X, Y) d = norm(x - y) + EPS inv(4π * d) end - KernelMatrix(f, X, Y) - end + return KernelMatrix(f, X, Y) +end function helmholtz_matrix(X, Y, k) f = (x, y) -> begin @@ -21,7 +21,7 @@ function helmholtz_matrix(X, Y, k) d = norm(x - y) + EPS exp(im * k * d) * inv(4π * d) end - KernelMatrix(f, X, Y) + return KernelMatrix(f, X, Y) end function elastostatic_matrix(X, Y, μ, λ) @@ -32,7 +32,7 @@ function elastostatic_matrix(X, Y, μ, λ) RRT = r * transpose(r) # r ⊗ rᵗ return 1 / (16π * μ * (1 - ν) * d) * ((3 - 4 * ν) * I + RRT / d^2) end - KernelMatrix(f, X, Y) + return KernelMatrix(f, X, Y) end function elastosdynamic_matrix(X, Y, μ, λ, ω, ρ) @@ -40,17 +40,18 @@ function elastosdynamic_matrix(X, Y, μ, λ, ω, ρ) c1 = sqrt((λ + 2μ) / ρ) c2 = sqrt(μ / ρ) r = x - y - d = norm(r) + EPS + d = norm(r) + EPS RRT = r * transpose(r) # r ⊗ rᵗ - s = -im * ω - z1 = s * d / c1 - z2 = s * d / c2 - α = 4 - ψ = exp(-z2) / d + (1 + z2) / (z2^2) * exp(-z2) / d - c2^2 / c1^2 * (1 + z1) / (z1^2) * exp(-z1) / d + s = -im * ω + z1 = s * d / c1 + z2 = s * d / c2 + α = 4 + ψ = exp(-z2) / d + (1 + z2) / (z2^2) * exp(-z2) / d - + c2^2 / c1^2 * (1 + z1) / (z1^2) * exp(-z1) / d chi = 3 * ψ - 2 * exp(-z2) / d - c2^2 / c1^2 * exp(-z1) / d return 1 / (α * π * μ) * (ψ * I - chi * RRT / d^2) + x * transpose(y) end - KernelMatrix(f, X, Y) + return KernelMatrix(f, X, Y) end """ @@ -63,22 +64,23 @@ struct LaplaceMatrixVec{T,Td} <: AbstractKernelMatrix{T} Y::Matrix{Td} function LaplaceMatrixVec{T}(X::Matrix{Td}, Y::Matrix{Td}) where {T,Td} @assert size(X, 2) == size(Y, 2) == 3 - new{T,Td}(X, Y) + return new{T,Td}(X, Y) end end Base.size(K::LaplaceMatrixVec) = size(K.X, 1), size(K.Y, 1) # constructor based on Vector of StaticVector -function LaplaceMatrixVec{T}(_X::Vector{SVector{3,Td}}, _Y::Vector{SVector{3,Td}}) where {T,Td} - X = reshape(reinterpret(Td, _X), 3, :) |> transpose |> collect - Y = reshape(reinterpret(Td, _Y), 3, :) |> transpose |> collect - LaplaceMatrixVec{T}(X, Y) +function LaplaceMatrixVec{T}(_X::Vector{SVector{3,Td}}, + _Y::Vector{SVector{3,Td}}) where {T,Td} + X = collect(transpose(reshape(reinterpret(Td, _X), 3, :))) + Y = collect(transpose(reshape(reinterpret(Td, _Y), 3, :))) + return LaplaceMatrixVec{T}(X, Y) end LaplaceMatrixVec(args...) = LaplaceMatrixVec{Float64}(args...) # default to Float64 function Base.getindex(K::LaplaceMatrixVec{T}, i::Int, j::Int)::T where {T} - d2 = (K.X[i,1] - K.Y[j,1])^2 + (K.X[i,2] - K.Y[j,2])^2 + (K.X[i,3] - K.Y[j,3])^2 + d2 = (K.X[i, 1] - K.Y[j, 1])^2 + (K.X[i, 2] - K.Y[j, 2])^2 + (K.X[i, 3] - K.Y[j, 3])^2 d = sqrt(d2) + EPS return inv(4π * d) end @@ -91,21 +93,21 @@ function Base.getindex(K::LaplaceMatrixVec, I::UnitRange, J::UnitRange) out = Matrix{T}(undef, m, n) @avx for j in 1:n for i in 1:m - d2 = (Xv[i,1] - Yv[j,1])^2 - d2 += (Xv[i,2] - Yv[j,2])^2 - d2 += (Xv[i,3] - Yv[j,3])^2 + d2 = (Xv[i, 1] - Yv[j, 1])^2 + d2 += (Xv[i, 2] - Yv[j, 2])^2 + d2 += (Xv[i, 3] - Yv[j, 3])^2 d = sqrt(d2) + EPS - out[i,j] = inv(4 * π * d) + out[i, j] = inv(4 * π * d) end end return out end function Base.getindex(K::LaplaceMatrixVec, I::UnitRange, j::Int) - return vec(K[I,j:j]) + return vec(K[I, j:j]) end function Base.getindex(adjK::Adjoint{<:Any,LaplaceMatrixVec}, I::UnitRange, j::Int) K = parent(adjK) - vec(K[j:j,I]) + return vec(K[j:j, I]) end """ @@ -119,25 +121,27 @@ struct HelmholtzMatrixVec{T,Td,Tk} <: AbstractKernelMatrix{T} k::Tk function HelmholtzMatrixVec{T}(X::Matrix{Td}, Y::Matrix{Td}, k::Tk) where {T,Td,Tk} @assert size(X, 2) == size(Y, 2) == 3 - new{T,Td,Tk}(X, Y, k) + return new{T,Td,Tk}(X, Y, k) end end HelmholtzMatrixVec(args...) = HelmholtzMatrixVec{ComplexF64}(args...) Base.size(K::HelmholtzMatrixVec) = size(K.X, 1), size(K.Y, 1) -function HelmholtzMatrixVec{T}(_X::Vector{SVector{3,Td}}, _Y::Vector{SVector{3,Td}}, k) where {T,Td} - X = reshape(reinterpret(Td, _X), 3, :) |> transpose |> collect - Y = reshape(reinterpret(Td, _Y), 3, :) |> transpose |> collect - HelmholtzMatrixVec{T}(X, Y, k) +function HelmholtzMatrixVec{T}(_X::Vector{SVector{3,Td}}, _Y::Vector{SVector{3,Td}}, + k) where {T,Td} + X = collect(transpose(reshape(reinterpret(Td, _X), 3, :))) + Y = collect(transpose(reshape(reinterpret(Td, _Y), 3, :))) + return HelmholtzMatrixVec{T}(X, Y, k) end function Base.getindex(K::HelmholtzMatrixVec{T}, i::Int, j::Int)::T where {T} - d2 = (K.X[i,1] - K.Y[j,1])^2 + (K.X[i,2] - K.Y[j,2])^2 + (K.X[i,3] - K.Y[j,3])^2 - d = sqrt(d2) + EPS + d2 = (K.X[i, 1] - K.Y[j, 1])^2 + (K.X[i, 2] - K.Y[j, 2])^2 + (K.X[i, 3] - K.Y[j, 3])^2 + d = sqrt(d2) + EPS return inv(4π * d) * exp(im * K.k * d) end -function Base.getindex(K::HelmholtzMatrixVec{Complex{T}}, I::UnitRange, J::UnitRange) where {T} +function Base.getindex(K::HelmholtzMatrixVec{Complex{T}}, I::UnitRange, + J::UnitRange) where {T} k = K.k m = length(I) n = length(J) @@ -146,55 +150,55 @@ function Base.getindex(K::HelmholtzMatrixVec{Complex{T}}, I::UnitRange, J::UnitR # since LoopVectorization does not (yet) support Complex{T} types, we will # reinterpret the output as a Matrix{T}, then use views. This can probably be # done better. - out = Matrix{Complex{T}}(undef, m, n) - out_T = reinterpret(T, out) - out_r = @views out_T[1:2:end,:] - out_i = @views out_T[2:2:end,:] + out = Matrix{Complex{T}}(undef, m, n) + out_T = reinterpret(T, out) + out_r = @views out_T[1:2:end, :] + out_i = @views out_T[2:2:end, :] @avx for j in 1:n for i in 1:m - d2 = (Xv[i,1] - Yv[j,1])^2 - d2 += (Xv[i,2] - Yv[j,2])^2 - d2 += (Xv[i,3] - Yv[j,3])^2 - d = sqrt(d2) + EPS + d2 = (Xv[i, 1] - Yv[j, 1])^2 + d2 += (Xv[i, 2] - Yv[j, 2])^2 + d2 += (Xv[i, 3] - Yv[j, 3])^2 + d = sqrt(d2) + EPS s, c = sincos(k * d) zr = inv(4π * d) * c zi = inv(4π * d) * s - out_r[i,j] = zr - out_i[i,j] = zi + out_r[i, j] = zr + out_i[i, j] = zi end end return out end function Base.getindex(K::HelmholtzMatrixVec, I::UnitRange, j::Int) - vec(K[I,j:j]) + return vec(K[I, j:j]) end function Base.getindex(adjK::Adjoint{<:Any,<:HelmholtzMatrixVec}, I::UnitRange, j::Int) K = parent(adjK) - conj!(K[j:j,I]) |> vec + return vec(conj!(K[j:j, I])) end function points_on_sphere(npts, R=1) theta = π * rand(npts) - phi = 2 * π * rand(npts) - x = @. sin(theta) * cos(phi) - y = @. R * sin(theta) * sin(phi) - z = @. R * cos(theta) - data = vcat(x', y', z') - pts = reinterpret(SVector{3,Float64}, vec(data)) |> collect + phi = 2 * π * rand(npts) + x = @. sin(theta) * cos(phi) + y = @. R * sin(theta) * sin(phi) + z = @. R * cos(theta) + data = vcat(x', y', z') + pts = collect(reinterpret(SVector{3,Float64}, vec(data))) return pts end -function points_on_cylinder(radius,n,shift=SVector(0,0,0)) - step = 1.75*π*radius/sqrt(n) - result = Vector{SVector{3,Float64}}(undef,n) - length = 2*π*radius - pointsPerCircle = length/step - angleStep = 2*π/pointsPerCircle - for i=0:n-1 - x = radius * cos(angleStep*i) - y = radius * sin(angleStep*i) - z = step*i/pointsPerCircle - result[i+1] = shift + SVector(x,y,z) +function points_on_cylinder(radius, n, shift=SVector(0, 0, 0)) + step = 1.75 * π * radius / sqrt(n) + result = Vector{SVector{3,Float64}}(undef, n) + length = 2 * π * radius + pointsPerCircle = length / step + angleStep = 2 * π / pointsPerCircle + for i in 0:(n - 1) + x = radius * cos(angleStep * i) + y = radius * sin(angleStep * i) + z = step * i / pointsPerCircle + result[i + 1] = shift + SVector(x, y, z) end return result end diff --git a/test/triangular_test.jl b/test/triangular_test.jl index be0756d..b7c2d38 100755 --- a/test/triangular_test.jl +++ b/test/triangular_test.jl @@ -6,74 +6,74 @@ using StaticArrays using HMatrices: RkMatrix -include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl")) +include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl")) Random.seed!(1) m = 1000 T = Float64 -X = points_on_sphere(m) -Y = X +X = points_on_sphere(m) +Y = X struct ExponentialKernel <: AbstractMatrix{Float64} X::Vector{SVector{3,Float64}} Y::Vector{SVector{3,Float64}} end -function Base.getindex(K::ExponentialKernel,i::Int,j::Int) - x,y = K.X[i], K.Y[j] - exp(-norm(x-y)) +function Base.getindex(K::ExponentialKernel, i::Int, j::Int) + x, y = K.X[i], K.Y[j] + return exp(-norm(x - y)) end Base.size(K::ExponentialKernel) = length(K.X), length(K.Y) -K = ExponentialKernel(X,X) +K = ExponentialKernel(X, X) -splitter = CardinalitySplitter(nmax=50) -Xclt = ClusterTree(X,splitter) -Yclt = ClusterTree(Y,splitter) -H = assemble_hmat(K,Xclt,Yclt;threads=false,distributed=false) -H_full = Matrix(H;global_index=false) +splitter = CardinalitySplitter(; nmax=50) +Xclt = ClusterTree(X, splitter) +Yclt = ClusterTree(Y, splitter) +H = assemble_hmat(K, Xclt, Yclt; threads=false, distributed=false) +H_full = Matrix(H; global_index=false) @testset "ldiv!" begin - B = rand(m,2) - R = RkMatrix(rand(m,3),rand(m,3)) + B = rand(m, 2) + R = RkMatrix(rand(m, 3), rand(m, 3)) R_full = Matrix(R) ## 3.1 - exact = ldiv!(UnitLowerTriangular(H_full),copy(B)) - approx = ldiv!(UnitLowerTriangular(H),copy(B)) + exact = ldiv!(UnitLowerTriangular(H_full), copy(B)) + approx = ldiv!(UnitLowerTriangular(H), copy(B)) @test exact ≈ approx - exact = ldiv!(UpperTriangular(H_full),copy(B)) - approx = ldiv!(UpperTriangular(H),copy(B)) + exact = ldiv!(UpperTriangular(H_full), copy(B)) + approx = ldiv!(UpperTriangular(H), copy(B)) @test exact ≈ approx ## 3.2 - exact = ldiv!(UnitLowerTriangular(H_full),copy(R_full)) - approx = ldiv!(UnitLowerTriangular(H),copy(R)) + exact = ldiv!(UnitLowerTriangular(H_full), copy(R_full)) + approx = ldiv!(UnitLowerTriangular(H), copy(R)) @test exact ≈ Matrix(approx) ## 3.3 - compressor = PartialACA(;atol=1e-8) - exact = ldiv!(UnitLowerTriangular(H_full),copy(H_full)) - approx = ldiv!(UnitLowerTriangular(H),deepcopy(H),compressor) - @test exact ≈ Matrix(approx;global_index=false) + compressor = PartialACA(; atol=1e-8) + exact = ldiv!(UnitLowerTriangular(H_full), copy(H_full)) + approx = ldiv!(UnitLowerTriangular(H), deepcopy(H), compressor) + @test exact ≈ Matrix(approx; global_index=false) end @testset "rdiv!" begin - B = rand(2,m) - R = RkMatrix(rand(m,3),rand(m,3)) + B = rand(2, m) + R = RkMatrix(rand(m, 3), rand(m, 3)) R_full = Matrix(R) # 3.1 - exact = rdiv!(copy(B),UpperTriangular(H_full)) - approx = rdiv!(copy(B),UpperTriangular(H)) + exact = rdiv!(copy(B), UpperTriangular(H_full)) + approx = rdiv!(copy(B), UpperTriangular(H)) @test exact ≈ approx ## 3.2 - exact = rdiv!(copy(R_full),UpperTriangular(H_full)) - approx = rdiv!(copy(R),UpperTriangular(H)) + exact = rdiv!(copy(R_full), UpperTriangular(H_full)) + approx = rdiv!(copy(R), UpperTriangular(H)) @test exact ≈ approx ## 3.3 - compressor = PartialACA(;atol=1e-10) - exact = rdiv!(deepcopy(H_full),UpperTriangular(H_full)) - approx = rdiv!(deepcopy(H),UpperTriangular(H),compressor) - @test exact ≈ Matrix(approx;global_index=false) + compressor = PartialACA(; atol=1e-10) + exact = rdiv!(deepcopy(H_full), UpperTriangular(H_full)) + approx = rdiv!(deepcopy(H), UpperTriangular(H), compressor) + @test exact ≈ Matrix(approx; global_index=false) end diff --git a/test/utils_test.jl b/test/utils_test.jl index 1323663..795938f 100644 --- a/test/utils_test.jl +++ b/test/utils_test.jl @@ -5,11 +5,11 @@ using HMatrices: hilbert_linear_to_cartesian, hilbert_cartesian_to_linear @testset "Hilbert points" begin # check that hilbert curves spans the lattice, and the inverse is correct for n in [2^p for p in 0:3] - lattice = Set((i,j) for i in 0:n-1,j in 0:n-1) - for d in 0:(n^2-1) - x,y = hilbert_linear_to_cartesian(n,d) - pop!(lattice,(x,y)) - @test hilbert_cartesian_to_linear(n,x,y) == d + lattice = Set((i, j) for i in 0:(n - 1), j in 0:(n - 1)) + for d in 0:(n^2 - 1) + x, y = hilbert_linear_to_cartesian(n, d) + pop!(lattice, (x, y)) + @test hilbert_cartesian_to_linear(n, x, y) == d end @test isempty(lattice) end