diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
new file mode 100644
index 0000000..9613e05
--- /dev/null
+++ b/.JuliaFormatter.toml
@@ -0,0 +1 @@
+style = "yas"
\ No newline at end of file
diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml
new file mode 100644
index 0000000..1fa4385
--- /dev/null
+++ b/.github/workflows/FormatCheck.yml
@@ -0,0 +1,42 @@
+# modified from https://github.com/SciML/DiffEqDocs.jl/tree/master/.github/workflows
+name: format-check
+
+on:
+  push:
+    branches:
+      - 'main'
+    tags: '*'
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        julia-version: [1]
+        julia-arch: [x86]
+        os: [ubuntu-latest]
+    steps:
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: ${{ matrix.julia-version }}
+
+      - uses: actions/checkout@v1
+      - name: Install JuliaFormatter and format
+        # This will use the latest version by default but you can set the version like so:
+        #
+        # julia  -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter", version="0.13.0"))'
+        run: |
+          julia  -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter"))'
+          julia  -e 'using JuliaFormatter; format(".", verbose=true)'
+      - name: Format check
+        run: |
+          julia -e '
+          out = Cmd(`git diff --name-only`) |> read |> String
+          if out == ""
+              exit(0)
+          else
+              @error "Some files have not been formatted !!!"
+              write(stdout, out)
+              exit(1)
+          end'
\ No newline at end of file
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index d5b8871..85049d8 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -23,7 +23,7 @@ K = laplace_matrix(X, Y)
 irange = 1:N
 jrange = 1:N
 
-compressors = [PartialACA(rtol = rtol), ACA(rtol = rtol), TSVD(rtol = rtol)]
+compressors = [PartialACA(; rtol=rtol), ACA(; rtol=rtol), TSVD(; rtol=rtol)]
 
 for comp in compressors
     SUITE["Compressors"][string(comp)] = @benchmarkable $comp($K, $irange, $jrange)
@@ -53,26 +53,34 @@ comp = HMatrices.PartialACA(; rtol)
 step = 1.75 * π * radius / sqrt(N)
 k = 2 * π / (10 * step) # 10 pts per wavelength
 
-kernels = [
-    ("Laplace", laplace_matrix(X, X), true),
-    ("Helmholtz", helmholtz_matrix(X, X, k), true),
-    ("LaplaceVec", LaplaceMatrixVec(Xp,Xp), false),
-    ("HelmholtzVec", HelmholtzMatrixVec(Xp,Xp,k),false)
-]
+kernels = [("Laplace", laplace_matrix(X, X), true),
+           ("Helmholtz", helmholtz_matrix(X, X, k), true),
+           ("LaplaceVec", LaplaceMatrixVec(Xp, Xp), false),
+           ("HelmholtzVec", HelmholtzMatrixVec(Xp, Xp, k), false)]
 
 for (name, K, p) in kernels
     SUITE[name] = BenchmarkGroup([name, N])
     # bench assemble
-    SUITE[name]["assemble cpu"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; adm = $adm, comp = $comp, threads = false, distributed = false, global_index = $p)
-    SUITE[name]["assemble threads"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; adm = $adm, comp = $comp, threads = true, distributed = false, global_index = $p)
+    SUITE[name]["assemble cpu"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; adm=$adm,
+                                                               comp=$comp, threads=false,
+                                                               distributed=false,
+                                                               global_index=$p)
+    SUITE[name]["assemble threads"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt;
+                                                                   adm=$adm, comp=$comp,
+                                                                   threads=true,
+                                                                   distributed=false,
+                                                                   global_index=$p)
     # SUITE[name]["assemble procs"] = @benchmarkable assemble_hmat($K, $Xclt, $Xclt; adm = $adm, comp = $comp, threads = false, distributed = true, global_index = $p)
     # bench gemv only for regular case since the vectorized case should be the same
     if p
         x = rand(eltype(K), N)
         y = zero(x)
-        H = assemble_hmat(K, Xclt, Xclt; adm, comp, threads = true, distributed = false, global_index = p)
-        SUITE[name]["gemv cpu"] = @benchmarkable mul!($y, $H, $x, $1, $0; threads = false, global_index = $p)
-        SUITE[name]["gemv threads"] = @benchmarkable mul!($y, $H, $x, $1, $0; threads = true, global_index = $p)
-        SUITE[name]["lu"]     = @benchmarkable lu!($H;rank=5)
+        H = assemble_hmat(K, Xclt, Xclt; adm, comp, threads=true, distributed=false,
+                          global_index=p)
+        SUITE[name]["gemv cpu"] = @benchmarkable mul!($y, $H, $x, $1, $0; threads=false,
+                                                      global_index=$p)
+        SUITE[name]["gemv threads"] = @benchmarkable mul!($y, $H, $x, $1, $0; threads=true,
+                                                          global_index=$p)
+        SUITE[name]["lu"] = @benchmarkable lu!($H; rank=5)
     end
 end
diff --git a/benchmark/make.jl b/benchmark/make.jl
index 3de6453..3ed8067 100644
--- a/benchmark/make.jl
+++ b/benchmark/make.jl
@@ -1,20 +1,19 @@
 using PkgBenchmark
 
-function postprocess(results,fname="benchs")
+function postprocess(results, fname="benchs")
     # writeresults(joinpath(path,fname),results)
     dir = @__DIR__
-    path = joinpath(dir,"../docs/src")
-    export_markdown(joinpath(path,fname*".md"),results)
+    path = joinpath(dir, "../docs/src")
+    return export_markdown(joinpath(path, fname * ".md"), results)
 end
 
-env = Dict("JULIA_NUM_THREADS" =>4,
-           "OPEN_BLAS_NUM_THREADS" => 1
-           )
+env = Dict("JULIA_NUM_THREADS" => 4,
+           "OPEN_BLAS_NUM_THREADS" => 1)
 
-config = BenchmarkConfig(;juliacmd=`julia -O3`,env)
+config = BenchmarkConfig(; juliacmd=`julia -O3`, env)
 
-dir       = @__DIR__
-retune    = false
-results   = benchmarkpkg("HMatrices",config;retune)
+dir = @__DIR__
+retune = false
+results = benchmarkpkg("HMatrices", config; retune)
 
 postprocess(results)
diff --git a/docs/make.jl b/docs/make.jl
index 00b3c43..fd14f72 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -4,26 +4,21 @@ using Documenter
 DocMeta.setdocmeta!(HMatrices, :DocTestSetup, :(using HMatrices); recursive=true)
 
 makedocs(;
-    modules=[HMatrices],
-    authors="Luiz M. Faria <maltezfaria@gmail.com> and contributors",
-    repo="https://github.com/WaveProp/HMatrices.jl/blob/{commit}{path}#{line}",
-    sitename="HMatrices.jl",
-    format=Documenter.HTML(;
-        prettyurls=get(ENV, "CI", "false") == "true",
-        canonical="https://WaveProp.github.io/HMatrices.jl",
-        assets=String[],
-    ),
-    pages=[
-        "Getting started" => "index.md",
-        "Kernel matrices" => "kernelmatrix.md",
-        "Distributed HMatrix" => "dhmatrix.md",
-        "Notebooks" => "notebooks.md",
-        "Benchmarks" => ["benchs.md"],
-        "References" => "references.md"
-    ],
-)
+         modules=[HMatrices],
+         authors="Luiz M. Faria <maltezfaria@gmail.com> and contributors",
+         repo="https://github.com/WaveProp/HMatrices.jl/blob/{commit}{path}#{line}",
+         sitename="HMatrices.jl",
+         format=Documenter.HTML(;
+                                prettyurls=get(ENV, "CI", "false") == "true",
+                                canonical="https://WaveProp.github.io/HMatrices.jl",
+                                assets=String[]),
+         pages=["Getting started" => "index.md",
+                "Kernel matrices" => "kernelmatrix.md",
+                "Distributed HMatrix" => "dhmatrix.md",
+                "Notebooks" => "notebooks.md",
+                "Benchmarks" => ["benchs.md"],
+                "References" => "references.md"])
 
 deploydocs(;
-    repo="github.com/WaveProp/HMatrices.jl",
-    devbranch="main"
-)
+           repo="github.com/WaveProp/HMatrices.jl",
+           devbranch="main")
diff --git a/docs/notebooks/vectorization.jl b/docs/notebooks/vectorization.jl
index 3ed26a8..7c55e3c 100644
--- a/docs/notebooks/vectorization.jl
+++ b/docs/notebooks/vectorization.jl
@@ -6,122 +6,125 @@ using InteractiveUtils
 
 # ╔═╡ 2b4aa264-4145-11ec-0d24-ab437ba0f84e
 begin
-    import Pkg
+    using Pkg: Pkg
     # activate the shared project environment
     Pkg.activate(Base.current_project())
 
-    using HMatrices, Plots, PlutoUI, LinearAlgebra, StaticArrays, WavePropBase, BenchmarkTools, LoopVectorization
-	using WavePropBase: PlotPoints, PlotTree, root_elements
+    using HMatrices, Plots, PlutoUI, LinearAlgebra, StaticArrays, WavePropBase,
+          BenchmarkTools, LoopVectorization
+    using WavePropBase: PlotPoints, PlotTree, root_elements
 end
 
 # ╔═╡ 9efa3b28-900c-4515-8aba-617bce2ddc68
-PlutoUI.TableOfContents(;depth=2)
+PlutoUI.TableOfContents(; depth=2)
 
 # ╔═╡ d1917856-64d1-4dab-9e05-40b1152217d2
 begin
-	const Point3D = SVector{3,Float64}
-	m = 10_000
-	X = Y = [Point3D(sin(θ)cos(ϕ),sin(θ)*sin(ϕ),cos(θ)) for (θ,ϕ) in zip(π*rand(m),2π*rand(m))]
+    const Point3D = SVector{3,Float64}
+    m = 10_000
+    X = Y = [Point3D(sin(θ)cos(ϕ), sin(θ) * sin(ϕ), cos(θ))
+             for (θ, ϕ) in zip(π * rand(m), 2π * rand(m))]
 end
 
 # ╔═╡ bdfebc7a-a7a6-4fa6-8884-b4f86dc55cb3
 begin
-	Xclt = Yclt = ClusterTree(X)
-	plot(PlotTree(),Xclt,mc=:red)
+    Xclt = Yclt = ClusterTree(X)
+    plot(PlotTree(), Xclt; mc=:red)
 end
 
 # ╔═╡ 306d493b-a558-4e1d-bad6-5c295da79e82
 begin
-	function G(x,y) 
-	    d = norm(x-y) + 1e-8
-	    inv(4π*d)
-	end
-	K = KernelMatrix(G,X,Y)
+    function G(x, y)
+        d = norm(x - y) + 1e-8
+        return inv(4π * d)
+    end
+    K = KernelMatrix(G, X, Y)
 end
 
 # ╔═╡ 99041c6a-c12f-4a09-8bfd-cf769fd258a6
-H = assemble_hmat(K,Xclt,Yclt;threads=false)
+H = assemble_hmat(K, Xclt, Yclt; threads=false)
 
 # ╔═╡ 02d89923-e478-4778-8b80-eaa3357a6a8c
-b = @benchmark assemble_hmat($K,$Xclt,$Yclt;threads=false)
+b = @benchmark assemble_hmat($K, $Xclt, $Yclt; threads=false)
 
 # ╔═╡ fca099bf-25c0-4ee8-b3f0-c042c4e5fde3
-begin 
-	Xp = Yp = root_elements(Xclt)
-	Kp = KernelMatrix(G,Xp,Yp)
-	Hp = assemble_hmat(Kp,Xclt,Yclt;threads=false,global_index=false)
+begin
+    Xp = Yp = root_elements(Xclt)
+    Kp = KernelMatrix(G, Xp, Yp)
+    Hp = assemble_hmat(Kp, Xclt, Yclt; threads=false, global_index=false)
 end
 
 # ╔═╡ 37ed939e-1738-4ade-85d7-526c3c4ab536
-bp = @benchmark assemble_hmat($Kp,Xclt,Yclt;threads=false,global_index=false)
+bp = @benchmark assemble_hmat($Kp, Xclt, Yclt; threads=false, global_index=false)
 
 # ╔═╡ c6729646-0b6c-4ea2-b692-62e1e8f6b97c
 begin
-	struct LaplaceMatrixVec <: AbstractKernelMatrix{Float64}
-		X::Matrix{Float64}
-		Y::Matrix{Float64}
-	end
-	Base.size(K::LaplaceMatrixVec) = size(K.X,1), size(K.Y,1)
-	
-	# convenience constructor based on Vector of StaticVector
-	function LaplaceMatrixVec(_X::Vector{Point3D},_Y::Vector{Point3D})
-	    X = reshape(reinterpret(Float64,_X), 3,:) |> transpose |> collect
-	    Y = reshape(reinterpret(Float64,_Y), 3,:) |> transpose |> collect
-	    LaplaceMatrixVec(X,Y)
-	end
-	
-	function Base.getindex(K::LaplaceMatrixVec,i::Int,j::Int)
-	    d2 = (K.X[i,1] - K.Y[j,1])^2 + (K.X[i,2] - K.Y[j,2])^2 + (K.X[i,3] - K.Y[j,3])^2
-	    d = sqrt(d2) + 1e-8
-	    return inv(4π*d)
-	end
-	function Base.getindex(K::LaplaceMatrixVec,I::UnitRange,J::UnitRange)
-	    T = eltype(K)
-	    m = length(I)
-	    n = length(J)
-	    Xv = view(K.X,I,:)
-	    Yv = view(K.Y,J,:)
-	    out = Matrix{T}(undef,m,n)
-	    @turbo for j in 1:n
-	        for i in 1:m
-	            d2 = (Xv[i,1] - Yv[j,1])^2
-	            d2 += (Xv[i,2] - Yv[j,2])^2
-	            d2 += (Xv[i,3] - Yv[j,3])^2
-	            d = sqrt(d2) + 1e-8
-	            out[i,j] = inv(4*π*d)
-	        end
-	    end
-	    return out
-	end
-	function Base.getindex(K::LaplaceMatrixVec,I::UnitRange,j::Int)
-	    T = eltype(K)
-	    m = length(I)
-	    Xv = view(K.X,I,:)
-	    out = Vector{T}(undef,m)
-	    @turbo for i in 1:m
-	            d2 = (Xv[i,1] -  K.Y[j,1])^2
-	            d2 += (Xv[i,2] - K.Y[j,2])^2
-	            d2 += (Xv[i,3] - K.Y[j,3])^2
-	            d = sqrt(d2) + 1e-8
-	            out[i] = inv(4*π*d)
-		end
-	    return out
-	end
-	function Base.getindex(adjK::Adjoint{<:Any,<:LaplaceMatrixVec},I::UnitRange,j::Int)
-		K = parent(adjK)
-	    T = eltype(K)
-	    m = length(I)
-		Yv = view(K.Y,I,:)
-	    out = Vector{T}(undef,m)
-	    @turbo for i in 1:m
-	            d2 = (Yv[i,1] - K.X[j,1])^2
-	            d2 += (Yv[i,2] - K.X[j,2])^2
-	            d2 += (Yv[i,3] - K.X[j,3])^2
-	            d = sqrt(d2) + 1e-8
-	            out[i] = inv(4*π*d)
-		end
-	    return out
-	end
+    struct LaplaceMatrixVec <: AbstractKernelMatrix{Float64}
+        X::Matrix{Float64}
+        Y::Matrix{Float64}
+    end
+    Base.size(K::LaplaceMatrixVec) = size(K.X, 1), size(K.Y, 1)
+
+    # convenience constructor based on Vector of StaticVector
+    function LaplaceMatrixVec(_X::Vector{Point3D}, _Y::Vector{Point3D})
+        X = collect(transpose(reshape(reinterpret(Float64, _X), 3, :)))
+        Y = collect(transpose(reshape(reinterpret(Float64, _Y), 3, :)))
+        return LaplaceMatrixVec(X, Y)
+    end
+
+    function Base.getindex(K::LaplaceMatrixVec, i::Int, j::Int)
+        d2 = (K.X[i, 1] - K.Y[j, 1])^2 + (K.X[i, 2] - K.Y[j, 2])^2 +
+             (K.X[i, 3] - K.Y[j, 3])^2
+        d = sqrt(d2) + 1e-8
+        return inv(4π * d)
+    end
+    function Base.getindex(K::LaplaceMatrixVec, I::UnitRange, J::UnitRange)
+        T = eltype(K)
+        m = length(I)
+        n = length(J)
+        Xv = view(K.X, I, :)
+        Yv = view(K.Y, J, :)
+        out = Matrix{T}(undef, m, n)
+        @turbo for j in 1:n
+            for i in 1:m
+                d2 = (Xv[i, 1] - Yv[j, 1])^2
+                d2 += (Xv[i, 2] - Yv[j, 2])^2
+                d2 += (Xv[i, 3] - Yv[j, 3])^2
+                d = sqrt(d2) + 1e-8
+                out[i, j] = inv(4 * π * d)
+            end
+        end
+        return out
+    end
+    function Base.getindex(K::LaplaceMatrixVec, I::UnitRange, j::Int)
+        T = eltype(K)
+        m = length(I)
+        Xv = view(K.X, I, :)
+        out = Vector{T}(undef, m)
+        @turbo for i in 1:m
+            d2 = (Xv[i, 1] - K.Y[j, 1])^2
+            d2 += (Xv[i, 2] - K.Y[j, 2])^2
+            d2 += (Xv[i, 3] - K.Y[j, 3])^2
+            d = sqrt(d2) + 1e-8
+            out[i] = inv(4 * π * d)
+        end
+        return out
+    end
+    function Base.getindex(adjK::Adjoint{<:Any,<:LaplaceMatrixVec}, I::UnitRange, j::Int)
+        K = parent(adjK)
+        T = eltype(K)
+        m = length(I)
+        Yv = view(K.Y, I, :)
+        out = Vector{T}(undef, m)
+        @turbo for i in 1:m
+            d2 = (Yv[i, 1] - K.X[j, 1])^2
+            d2 += (Yv[i, 2] - K.X[j, 2])^2
+            d2 += (Yv[i, 3] - K.X[j, 3])^2
+            d = sqrt(d2) + 1e-8
+            out[i] = inv(4 * π * d)
+        end
+        return out
+    end
 end
 
 # ╔═╡ 85de3038-88f8-442e-a2cd-5462f9552d12
@@ -209,9 +212,9 @@ The `LaplaceMatrix` struct above has a vectorized implementatio of `getindex` fo
 """
 
 # ╔═╡ 71208d87-a9ed-410e-bad1-f770fc84d4db
-begin 
-	Kv = LaplaceMatrixVec(Xp,Yp)
-	Hv = assemble_hmat(Kv,Xclt,Yclt;global_index=false,threads=false)
+begin
+    Kv = LaplaceMatrixVec(Xp, Yp)
+    Hv = assemble_hmat(Kv, Xclt, Yclt; global_index=false, threads=false)
 end
 
 # ╔═╡ 6eaa63e3-755d-482a-a113-98312b6f6fb1
@@ -220,7 +223,7 @@ Benchmarking the new implementation yields:
 """
 
 # ╔═╡ 59fda22e-0547-4845-9207-31c7b526d2f6
-bv = @benchmark assemble_hmat($Kv,Xclt,Yclt;threads=false,global_index=false)
+bv = @benchmark assemble_hmat($Kv, Xclt, Yclt; threads=false, global_index=false)
 
 # ╔═╡ 0b967aaf-271c-4153-acd6-2e5acff8dc75
 md"""
@@ -228,7 +231,7 @@ Comparing the times we see that the vectorized implementation is the fastest, an
 """
 
 # ╔═╡ afc737f1-3523-413f-a73a-96a909580047
-minimum(b),minimum(bp),minimum(bv)
+minimum(b), minimum(bp), minimum(bv)
 
 # ╔═╡ 44f08eda-b5d3-40d0-aa8b-3cae9e137854
 md"""
@@ -237,14 +240,14 @@ Finally, not that as expected all of these yield the same matrix, as indicated b
 
 # ╔═╡ 5fd9b390-ea5f-425c-a624-0cd4a146a897
 begin
-	x = rand(m)
-	y  = H*x
-	yv = Hv*x
-	yp = Hp*x
-	er = max(norm(y-yv)/norm(y),norm(y-yp)/norm(y))
-	md"""
-	Maximum error: $er
-	"""
+    x = rand(m)
+    y = H * x
+    yv = Hv * x
+    yp = Hp * x
+    er = max(norm(y - yv) / norm(y), norm(y - yp) / norm(y))
+    md"""
+    Maximum error: $er
+    """
 end
 
 # ╔═╡ Cell order:
diff --git a/src/HMatrices.jl b/src/HMatrices.jl
index f877284..06a4de4 100644
--- a/src/HMatrices.jl
+++ b/src/HMatrices.jl
@@ -1,10 +1,10 @@
 module HMatrices
 
-const PROJECT_ROOT =  pkgdir(HMatrices)
+const PROJECT_ROOT = pkgdir(HMatrices)
 
 using StaticArrays
 using LinearAlgebra
-using Statistics:median
+using Statistics: median
 using TimerOutputs
 using Printf
 using RecipesBase
@@ -12,7 +12,7 @@ using Distributed
 using Base.Threads
 using AbstractTrees: print_tree
 
-import AbstractTrees
+using AbstractTrees: AbstractTrees
 import LinearAlgebra: mul!, lu!, lu, LU, ldiv!, rdiv!, axpy!, rank, rmul!, lmul!
 import Base: Matrix, adjoint, parent
 
@@ -55,28 +55,28 @@ include("triangular.jl")
 include("lu.jl")
 
 export
-    # types (re-exported)
-    CardinalitySplitter,
-    ClusterTree,
-    DyadicSplitter,
-    GeometricSplitter,
-    GeometricMinimalSplitter,
-    HyperRectangle,
-    # abstract types
-    AbstractKernelMatrix,
-    # types
-    HMatrix,
-    KernelMatrix,
-    StrongAdmissibilityStd,
-    WeakAdmissibilityStd,
-    PartialACA,
-    ACA,
-    TSVD,
-    # functions
-    compression_ratio,
-    print_tree,
-    assemble_hmat,
-    # macros
-    @hprofile
+# types (re-exported)
+      CardinalitySplitter,
+      ClusterTree,
+      DyadicSplitter,
+      GeometricSplitter,
+      GeometricMinimalSplitter,
+      HyperRectangle,
+# abstract types
+      AbstractKernelMatrix,
+# types
+      HMatrix,
+      KernelMatrix,
+      StrongAdmissibilityStd,
+      WeakAdmissibilityStd,
+      PartialACA,
+      ACA,
+      TSVD,
+# functions
+      compression_ratio,
+      print_tree,
+      assemble_hmat,
+# macros
+      @hprofile
 
 end
diff --git a/src/addition.jl b/src/addition.jl
index 30d1302..2006fb3 100644
--- a/src/addition.jl
+++ b/src/addition.jl
@@ -15,100 +15,100 @@ simply be assigned `X`. This means that after the call `axpy(a,X,Y)`, the object
 `Y` is in a *dirty* state (see [`isclean`][@ref]) and usually a call to
 [`flush_to_leaves!`](@ref) or [`flush_to_children!`](@ref) follows.
 """
-function axpy!(a,X::Matrix,Y::RkMatrix)
-    axpy!(a,RkMatrix(X),Y)
+function axpy!(a, X::Matrix, Y::RkMatrix)
+    return axpy!(a, RkMatrix(X), Y)
 end
 
 # 1.3
-function axpy!(a,X::Matrix,Y::HMatrix)
+function axpy!(a, X::Matrix, Y::HMatrix)
     if hasdata(Y)
-        axpy!(a,X,data(Y))
+        axpy!(a, X, data(Y))
     else
-        setdata!(Y,a*X)
+        setdata!(Y, a * X)
     end
     return Y
 end
 
 # 2.1
-function axpy!(a,X::RkMatrix,Y::Matrix)
-    axpy!(a,Matrix(X),Y)
+function axpy!(a, X::RkMatrix, Y::Matrix)
+    return axpy!(a, Matrix(X), Y)
 end
 
 #2.2
-function axpy!(a,X::RkMatrix,Y::RkMatrix)
-    Y.A   = hcat(a*X.A,Y.A)
-    Y.B   = hcat(X.B,Y.B)
+function axpy!(a, X::RkMatrix, Y::RkMatrix)
+    Y.A = hcat(a * X.A, Y.A)
+    Y.B = hcat(X.B, Y.B)
     return Y
 end
 
 # 2.3
-function axpy!(a,X::RkMatrix,Y::HMatrix)
+function axpy!(a, X::RkMatrix, Y::HMatrix)
     if hasdata(Y)
-        axpy!(a,X,data(Y))
+        axpy!(a, X, data(Y))
     else
-        setdata!(Y,a*X)
+        setdata!(Y, a * X)
     end
     return Y
 end
 
 #3.1
-function axpy!(a,X::HMatrix,Y::Matrix)
+function axpy!(a, X::HMatrix, Y::Matrix)
     @debug "calling axpy! with `X` and HMatrix and `Y` a Matrix"
     shift = pivot(X) .- 1
     for block in AbstractTrees.PreOrderDFS(X)
         irange = rowrange(block) .- shift[1]
         jrange = colrange(block) .- shift[2]
         if hasdata(block)
-            axpy!(a,data(block),view(Y,irange,jrange))
+            axpy!(a, data(block), view(Y, irange, jrange))
         end
     end
     return Y
 end
 
 # 3.2
-function axpy!(a,X::HMatrix,Y::RkMatrix)
+function axpy!(a, X::HMatrix, Y::RkMatrix)
     @debug "calling axpby! with `X` and HMatrix and `Y` an RkMatrix"
     # FIXME: inneficient implementation due to conversion from HMatrix to
     # Matrix. Does it really matter? I don't think this function should be
     # called.
-    axpy!(a,Matrix(X;global_index=false),Y)
+    return axpy!(a, Matrix(X; global_index=false), Y)
 end
 
 # 3.3
-function axpy!(a,X::HMatrix,Y::HMatrix)
+function axpy!(a, X::HMatrix, Y::HMatrix)
     # TODO: assumes X and Y have the same structure. How to reinforce this?
     if hasdata(X)
         if hasdata(Y)
-            axpy!(a,data(X),Y)
+            axpy!(a, data(X), Y)
         else
-            setdata!(Y,a*data(X))
+            setdata!(Y, a * data(X))
         end
     end
     @assert size(children(X)) == size(children(Y)) "adding hierarchical matrices requires identical block structure"
-    for (bx,by) in zip(children(X),children(Y))
-        axpy!(a,bx,by)
+    for (bx, by) in zip(children(X), children(Y))
+        axpy!(a, bx, by)
     end
     return Y
 end
 
 # add a unifor scaling to an HMatrix return an HMatrix
-function axpy!(a,X::UniformScaling,Y::HMatrix)
+function axpy!(a, X::UniformScaling, Y::HMatrix)
     @assert isclean(Y)
     if hasdata(Y)
         d = data(Y)
         @assert d isa Matrix
         n = min(size(d)...)
-        for i=1:n
-            d[i,i] += a*X.λ
+        for i in 1:n
+            d[i, i] += a * X.λ
         end
     else
         n = min(blocksize(Y)...)
-        for i=1:n
-            axpy!(a,X,children(Y)[i,i])
+        for i in 1:n
+            axpy!(a, X, children(Y)[i, i])
         end
     end
     return Y
 end
 
-Base.:(+)(X::UniformScaling,Y::HMatrix) = axpy!(true,X,deepcopy(Y))
-Base.:(+)(X::HMatrix,Y::UniformScaling) = Y+X
+Base.:(+)(X::UniformScaling, Y::HMatrix) = axpy!(true, X, deepcopy(Y))
+Base.:(+)(X::HMatrix, Y::UniformScaling) = Y + X
diff --git a/src/clustertree.jl b/src/clustertree.jl
index bfa3e9b..6b80d22 100644
--- a/src/clustertree.jl
+++ b/src/clustertree.jl
@@ -21,8 +21,8 @@ mutable struct ClusterTree{N,T}
     buffer::Vector{Int}
     children::Vector{ClusterTree{N,T}}
     parent::ClusterTree{N,T}
-    function ClusterTree(els::Vector{SVector{N,T}},container,loc_idxs,
-                        loc2glob,buffer,children,parent) where {N,T}
+    function ClusterTree(els::Vector{SVector{N,T}}, container, loc_idxs,
+                         loc2glob, buffer, children, parent) where {N,T}
         clt = new{N,T}(els, container, loc_idxs, loc2glob, buffer)
         clt.children = isnothing(children) ? Vector{typeof(clt)}() : children
         clt.parent = isnothing(parent) ? clt : parent
@@ -108,16 +108,16 @@ strategy encoded in `splitter`. If `copy_elements` is set to false, the
 during the tree construction.
 """
 function ClusterTree(elements, splitter=CardinalitySplitter();
-                    copy_elements=true,
-                    threads=false)
+                     copy_elements=true,
+                     threads=false)
     copy_elements && (elements = deepcopy(elements))
     bbox = bounding_box(elements)
     n = length(elements)
     irange = 1:n
     loc2glob = collect(irange)
-    buffer   = collect(irange)
+    buffer = collect(irange)
     children = nothing
-    parent   = nothing
+    parent = nothing
     #build the root, then recurse
     root = ClusterTree(elements, bbox, irange, loc2glob, buffer, children, parent)
     _build_cluster_tree!(root, splitter, threads)
@@ -142,8 +142,6 @@ function _build_cluster_tree!(current_node, splitter, threads, depth=0)
     return current_node
 end
 
-
-
 function Base.show(io::IO, tree::ClusterTree{N,T}) where {N,T}
     return print(io, "ClusterTree with $(length(tree.index_range)) points.")
 end
diff --git a/src/compressor.jl b/src/compressor.jl
index 161f6cf..93cd9a2 100644
--- a/src/compressor.jl
+++ b/src/compressor.jl
@@ -33,24 +33,24 @@ true
 
 ```
 """
-@Base.kwdef struct ACA
+Base.@kwdef struct ACA
     atol::Float64 = 0
-    rank::Int     = typemax(Int)
-    rtol::Float64 = atol>0 || rank<typemax(Int) ? 0 : sqrt(eps(Float64))
+    rank::Int = typemax(Int)
+    rtol::Float64 = atol > 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64))
 end
 
-function (aca::ACA)(K,rowtree::ClusterTree,coltree::ClusterTree)
+function (aca::ACA)(K, rowtree::ClusterTree, coltree::ClusterTree)
     irange = index_range(rowtree)
     jrange = index_range(coltree)
-    aca(K,irange,jrange)
+    return aca(K, irange, jrange)
 end
 
-function (aca::ACA)(K,irange,jrange)
-    M  = K[irange,jrange] # computes the entire matrix.
-    _aca_full!(M,aca.atol,aca.rank,aca.rtol)
+function (aca::ACA)(K, irange, jrange)
+    M = K[irange, jrange] # computes the entire matrix.
+    return _aca_full!(M, aca.atol, aca.rank, aca.rtol)
 end
 
-(comp::ACA)(K::Matrix) = comp(K,1:size(K,1),1:size(K,2))
+(comp::ACA)(K::Matrix) = comp(K, 1:size(K, 1), 1:size(K, 2))
 
 """
     _aca_full!(M,atol,rmax,rtol)
@@ -60,35 +60,35 @@ full pivoting. The matrix `M` is modified in place. The returned `RkMatrix` has
 rank at most `rmax`, and is expected to satisfy `|M - R| < max(atol,rtol*|M|)`.
 """
 function _aca_full!(M, atol, rmax, rtol)
-    Madj  = adjoint(M)
-    T   = eltype(M)
-    m,n = size(M)
-    A   = Vector{Vector{T}}()
-    B   = Vector{Vector{T}}()
+    Madj = adjoint(M)
+    T = eltype(M)
+    m, n = size(M)
+    A = Vector{Vector{T}}()
+    B = Vector{Vector{T}}()
     er = Inf
-    exact_norm = norm(M,2) # exact norm
+    exact_norm = norm(M, 2) # exact norm
     r = 0 # current rank
-    while er > max(atol,rtol*exact_norm) && r < rmax
+    while er > max(atol, rtol * exact_norm) && r < rmax
         I = _aca_full_pivot(M)
-        i,j = Tuple(I)
-        δ   = M[I]
+        i, j = Tuple(I)
+        δ = M[I]
         if svdvals(δ)[end] == 0
-            return RkMatrix(A,B)
+            return RkMatrix(A, B)
         else
             iδ = inv(δ)
-            col  = M[:,j]
-            adjcol  = Madj[:,i]
+            col = M[:, j]
+            adjcol = Madj[:, i]
             for k in eachindex(adjcol)
-                adjcol[k] = adjcol[k]*adjoint(iδ)
+                adjcol[k] = adjcol[k] * adjoint(iδ)
             end
             r += 1
-            push!(A,col)
-            push!(B,adjcol)
-            axpy!(-1,col*adjoint(adjcol),M) # M <-- M - col*row'
-            er = norm(M,2) # exact error
+            push!(A, col)
+            push!(B, adjcol)
+            axpy!(-1, col * adjoint(adjcol), M) # M <-- M - col*row'
+            er = norm(M, 2) # exact error
         end
     end
-    return RkMatrix(A,B)
+    return RkMatrix(A, B)
 end
 
 """
@@ -118,26 +118,26 @@ evaluated at every `i,j`. This is usually much faster than [`ACA`](@ref), but
 due to the pivoting strategy the algorithm may fail in special cases, even when
 the underlying linear operator is of low rank.
 """
-@Base.kwdef struct PartialACA
+Base.@kwdef struct PartialACA
     atol::Float64 = 0
-    rank::Int     = typemax(Int)
-    rtol::Float64 = atol>0 || rank<typemax(Int) ? 0 : sqrt(eps(Float64))
+    rank::Int = typemax(Int)
+    rtol::Float64 = atol > 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64))
 end
 
-function (paca::PartialACA)(K,rowtree::ClusterTree,coltree::ClusterTree)
+function (paca::PartialACA)(K, rowtree::ClusterTree, coltree::ClusterTree)
     # find initial column pivot for partial ACA
     istart = _aca_partial_initial_pivot(rowtree)
     irange = index_range(rowtree)
     jrange = index_range(coltree)
-    _aca_partial(K,irange,jrange,paca.atol,paca.rank,paca.rtol,istart-irange.start+1)
+    return _aca_partial(K, irange, jrange, paca.atol, paca.rank, paca.rtol,
+                        istart - irange.start + 1)
 end
 
-function (paca::PartialACA)(K,irange::UnitRange,jrange::UnitRange)
-    _aca_partial(K,irange,jrange,paca.atol,paca.rank,paca.rtol)
+function (paca::PartialACA)(K, irange::UnitRange, jrange::UnitRange)
+    return _aca_partial(K, irange, jrange, paca.atol, paca.rank, paca.rtol)
 end
 
-(paca::PartialACA)(K::Matrix) = paca(K,1:size(K,1),1:size(K,2))
-
+(paca::PartialACA)(K::Matrix) = paca(K, 1:size(K, 1), 1:size(K, 2))
 
 """
     _aca_partial(K,irange,jrange,atol,rmax,rtol,istart=1)
@@ -148,73 +148,73 @@ partial pivoting. The returned `R::RkMatrix` provides an approximation to
 max(atol,rtol*|M|)`, but this inequality may fail to hold due to the various
 errors involved in estimating the error and |M|.
 """
-function _aca_partial(K,irange,jrange,atol,rmax,rtol,istart=1)
+function _aca_partial(K, irange, jrange, atol, rmax, rtol, istart=1)
     Kadj = adjoint(K)
     # if irange and jrange are Colon, extract the size from `K` directly. This
     # allows for some code reuse with specializations on getindex(i,::Colon) and
     # getindex(::Colon,j) for when `K` is a `RkMatrix`
     if irange isa Colon && jrange isa Colon
-        m,n = size(K)
-        ishift,jshift = 0,0
+        m, n = size(K)
+        ishift, jshift = 0, 0
     else
-        m,n = length(irange), length(jrange)
-        ishift,jshift = irange.start-1, jrange.start-1
+        m, n = length(irange), length(jrange)
+        ishift, jshift = irange.start - 1, jrange.start - 1
         # maps global indices to local indices
     end
-    rmax = min(m,n,rmax)
-    T   = Base.eltype(K)
-    A   = Vector{Vector{T}}()
-    B   = Vector{Vector{T}}()
-    I   = BitVector(true for i = 1:m)
-    J   = BitVector(true for i = 1:n)
-    i   = istart # initial pivot
-    er  = Inf
+    rmax = min(m, n, rmax)
+    T = Base.eltype(K)
+    A = Vector{Vector{T}}()
+    B = Vector{Vector{T}}()
+    I = BitVector(true for i in 1:m)
+    J = BitVector(true for i in 1:n)
+    i = istart # initial pivot
+    er = Inf
     est_norm = 0 # approximate norm of K[irange,jrange]
-    r   = 0 # current rank
-    while er > max(atol,rtol*est_norm) && r < rmax
+    r = 0 # current rank
+    while er > max(atol, rtol * est_norm) && r < rmax
         # remove index i from allowed row
         I[i] = false
         # compute next row by row <-- K[i+ishift,jrange] - R[i,:]
-        adjcol = Kadj[jrange,i+ishift]
-        for k = 1:r
-            axpy!(-adjoint(A[k][i]),B[k],adjcol)
+        adjcol = Kadj[jrange, i + ishift]
+        for k in 1:r
+            axpy!(-adjoint(A[k][i]), B[k], adjcol)
             # for j in eachindex(row)
             #     row[j] = row[j] - B[k][j]*adjoint(A[k][i])
             # end
         end
-        j    = _aca_partial_pivot(adjcol,J)
-        δ    = adjcol[j]
+        j = _aca_partial_pivot(adjcol, J)
+        δ = adjcol[j]
         if svdvals(δ)[end] == 0
             @debug "zero pivot found during partial aca"
-            i = findfirst(x->x==true,I)
+            i = findfirst(x -> x == true, I)
         else # δ != 0
             iδ = inv(δ)
             # rdiv!(b,δ) # b <-- b/δ
             for k in eachindex(adjcol)
-                adjcol[k] = adjcol[k]*iδ
+                adjcol[k] = adjcol[k] * iδ
             end
             J[j] = false
             # compute next col by col <-- K[irange,j+jshift] - R[:,j]
-            col    = K[irange,j+jshift]
-            for k = 1:r
-                axpy!(-adjoint(B[k][j]),A[k],col)
+            col = K[irange, j + jshift]
+            for k in 1:r
+                axpy!(-adjoint(B[k][j]), A[k], col)
                 # for i in eachindex(col)
                 #     col[i] = col[i] - A[k][i]*adjoint(B[k][j])
                 # end
             end
             # push new cross and increase rank
             r += 1
-            push!(A,col)
-            push!(B,adjcol)
+            push!(A, col)
+            push!(B, adjcol)
             # estimate the error by || R_{k} - R_{k-1} || = ||a|| ||b||
-            er       = norm(col)*norm(adjcol)
+            er = norm(col) * norm(adjcol)
             # estimate the norm by || K || ≈ || R_k ||
-            est_norm = _update_frob_norm(est_norm,A,B)
-            i        = _aca_partial_pivot(col,I)
+            est_norm = _update_frob_norm(est_norm, A, B)
+            i = _aca_partial_pivot(col, I)
             # @show r, er
         end
     end
-    return RkMatrix(A,B)
+    return RkMatrix(A, B)
 end
 
 """
@@ -223,14 +223,14 @@ end
 Given the Frobenius norm of `Rₖ = A[1:end-1]*adjoint(B[1:end-1])` in `acc`,
 compute the Frobenius norm of `Rₖ₊₁ = A*adjoint(B)` efficiently.
 """
-@inline function _update_frob_norm(cur,A,B)
+@inline function _update_frob_norm(cur, A, B)
     @timeit_debug "Update Frobenius norm" begin
         k = length(A)
         a = A[end]
         b = B[end]
         out = norm(a)^2 * norm(b)^2
-        for l=1:k-1
-            out += 2*real(dot(A[l],a)*(dot(b,B[l])))
+        for l in 1:(k - 1)
+            out += 2 * real(dot(A[l], a) * (dot(b, B[l])))
         end
     end
     return sqrt(cur^2 + out)
@@ -250,13 +250,13 @@ kernels; see
 (https://www.sciencedirect.com/science/article/pii/S0021999117306721)[https://www.sciencedirect.com/science/article/pii/S0021999117306721]
 for more details.
 """
-function _aca_partial_pivot(v,J)
+function _aca_partial_pivot(v, J)
     idx = -1
     val = -Inf
     for n in 1:length(J)
         J[n] || continue
-        x   = v[n]
-        σ   = svdvals(x)[end]
+        x = v[n]
+        σ = svdvals(x)[end]
         σ < val && continue
         idx = n
         val = σ
@@ -276,11 +276,11 @@ When `x` is a scalar, this is simply the element with largest absolute value.
 """
 function _aca_full_pivot(M)
     idxs = CartesianIndices(M)
-    idx  = first(idxs)
+    idx = first(idxs)
     val = -Inf
     for I in idxs
-        x   = M[I]
-        σ   = svdvals(x)[end]
+        x = M[I]
+        σ = svdvals(x)[end]
         σ < val && continue
         idx = I
         val = σ
@@ -292,15 +292,15 @@ function _aca_partial_initial_pivot(rowtree)
     # the method below is suggested in Bebendorf, but it does not seem to
     # improve the  error. The idea is that the initial pivot is the closesest
     # point to the center of the cluster
-    xc        = center(container(rowtree))
-    d         = Inf
-    els       = root_elements(rowtree)
-    loc_idxs  = index_range(rowtree)
-    istart    = first(loc_idxs)
+    xc = center(container(rowtree))
+    d = Inf
+    els = root_elements(rowtree)
+    loc_idxs = index_range(rowtree)
+    istart = first(loc_idxs)
     for i in loc_idxs
-        x     = els[i]
-        if norm(x-xc) < d
-            d      = norm(x-xc)
+        x = els[i]
+        if norm(x - xc) < d
+            d = norm(x - xc)
             istart = i
         end
     end
@@ -314,24 +314,24 @@ Compression algorithm based on *a posteriori* truncation of an `SVD`. This is
 the optimal approximation in Frobenius norm; however, it also tends to be very
 expensive and thus should be used mostly for "small" matrices.
 """
-@Base.kwdef struct TSVD
+Base.@kwdef struct TSVD
     atol::Float64 = 0
-    rank::Int     = typemax(Int)
-    rtol::Float64 = atol>0 || rank<typemax(Int) ? 0 : sqrt(eps(Float64))
+    rank::Int = typemax(Int)
+    rtol::Float64 = atol > 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64))
 end
 
-function (tsvd::TSVD)(K,rowtree::ClusterTree,coltree::ClusterTree)
+function (tsvd::TSVD)(K, rowtree::ClusterTree, coltree::ClusterTree)
     irange = index_range(rowtree)
     jrange = index_range(coltree)
-    tsvd(K,irange,jrange)
+    return tsvd(K, irange, jrange)
 end
 
-function (tsvd::TSVD)(K,irange::UnitRange,jrange::UnitRange)
-    M = K[irange,jrange]
-    compress!(M,tsvd)
+function (tsvd::TSVD)(K, irange::UnitRange, jrange::UnitRange)
+    M = K[irange, jrange]
+    return compress!(M, tsvd)
 end
 
-(comp::TSVD)(K::Matrix) = comp(K,1:size(K,1),1:size(K,2))
+(comp::TSVD)(K::Matrix) = comp(K, 1:size(K, 1), 1:size(K, 2))
 
 """
     compress!(M::RkMatrix,tsvd::TSVD)
@@ -340,26 +340,26 @@ Recompress the matrix `R` using a truncated svd of `R`. The implementation uses
 the `qr-svd` strategy to efficiently compute `svd(R)` when `rank(R) ≪
 min(size(R))`.
 """
-function compress!(R::RkMatrix,tsvd::TSVD)
-    m,n   = size(R)
+function compress!(R::RkMatrix, tsvd::TSVD)
+    m, n = size(R)
     QA, RA = qr!(R.A)
     QB, RB = qr!(R.B)
-    F      = svd!(RA*adjoint(RB)) # svd of an r×r problem
-    U      = QA*F.U
-    Vt     = F.Vt*adjoint(QB)
-    V      = adjoint(Vt)
+    F = svd!(RA * adjoint(RB)) # svd of an r×r problem
+    U = QA * F.U
+    Vt = F.Vt * adjoint(QB)
+    V = adjoint(Vt)
     sp_norm = F.S[1] # spectral norm
-    r     = findlast(x -> x>max(tsvd.atol,tsvd.rtol*sp_norm), F.S)
-    isnothing(r) && (r = min(rank(R),m,n))
-    r = min(r,tsvd.rank)
-    if m<n
-        A = @views U[:,1:r]*Diagonal(F.S[1:r])
-        B = V[:,1:r]
+    r = findlast(x -> x > max(tsvd.atol, tsvd.rtol * sp_norm), F.S)
+    isnothing(r) && (r = min(rank(R), m, n))
+    r = min(r, tsvd.rank)
+    if m < n
+        A = @views U[:, 1:r] * Diagonal(F.S[1:r])
+        B = V[:, 1:r]
     else
-        A = U[:,1:r]
-        B = @views (V[:,1:r])*Diagonal(F.S[1:r])
+        A = U[:, 1:r]
+        B = @views (V[:, 1:r]) * Diagonal(F.S[1:r])
     end
-    R.A,R.B = A,B
+    R.A, R.B = A, B
     return R
 end
 
@@ -369,19 +369,19 @@ end
 Recompress the matrix `M` using a truncated svd and output an `RkMatrix`. The
 data in `M` is invalidated in the process.
 """
-function compress!(M::Matrix,tsvd::TSVD)
-    m,n = size(M)
-    F      = svd!(M)
+function compress!(M::Matrix, tsvd::TSVD)
+    m, n = size(M)
+    F = svd!(M)
     sp_norm = F.S[1] # spectral norm
-    r     = findlast(x -> x>max(tsvd.atol,tsvd.rtol*sp_norm), F.S)
-    isnothing(r) && (r = min(m,n))
-    r = min(r,tsvd.rank)
-    if m<n
-        A = @views F.U[:,1:r]*Diagonal(F.S[1:r])
-        B = F.V[:,1:r]
+    r = findlast(x -> x > max(tsvd.atol, tsvd.rtol * sp_norm), F.S)
+    isnothing(r) && (r = min(m, n))
+    r = min(r, tsvd.rank)
+    if m < n
+        A = @views F.U[:, 1:r] * Diagonal(F.S[1:r])
+        B = F.V[:, 1:r]
     else
-        A = F.U[:,1:r]
-        B = @views (F.V[:,1:r])*Diagonal(F.S[1:r])
+        A = F.U[:, 1:r]
+        B = @views (F.V[:, 1:r]) * Diagonal(F.S[1:r])
     end
-    return RkMatrix(A,B)
+    return RkMatrix(A, B)
 end
diff --git a/src/dhmatrix.jl b/src/dhmatrix.jl
index d568c0e..8f3a47f 100644
--- a/src/dhmatrix.jl
+++ b/src/dhmatrix.jl
@@ -3,8 +3,8 @@ using Distributed
 function typeof_future(r::Future)
     if isready(r)
         pid = r.where
-        f   = @spawnat pid typeof(fetch(r))
-        T   = fetch(f)
+        f = @spawnat pid typeof(fetch(r))
+        T = fetch(f)
         return T
     else
         error("future is not ready")
@@ -23,9 +23,9 @@ end
 
 Base.fetch(r::RemoteHMatrix) = fetch(r.future)
 
-function Base.getindex(r::RemoteHMatrix,i::Int,j::Int)
+function Base.getindex(r::RemoteHMatrix, i::Int, j::Int)
     pid = r.future.where
-    @fetchfrom pid getindex(fetch(r),i,j)
+    @fetchfrom pid getindex(fetch(r), i, j)
 end
 
 """
@@ -46,10 +46,10 @@ mutable struct DHMatrix{R,T} <: AbstractHMatrix{T}
     children::Matrix{DHMatrix{R,T}}
     parent::DHMatrix{R,T}
     # inner constructor which handles `nothing` fields.
-    function DHMatrix{R,T}(rowtree,coltree,data,children,parent) where {R,T}
-        dhmat = new{R,T}(rowtree,coltree,data)
-        dhmat.children = isnothing(children) ? Matrix{DHMatrix{R,T}}(undef,0,0) : children
-        dhmat.parent   = isnothing(parent) ? dhmat : parent
+    function DHMatrix{R,T}(rowtree, coltree, data, children, parent) where {R,T}
+        dhmat = new{R,T}(rowtree, coltree, data)
+        dhmat.children = isnothing(children) ? Matrix{DHMatrix{R,T}}(undef, 0, 0) : children
+        dhmat.parent = isnothing(parent) ? dhmat : parent
         return dhmat
     end
 end
@@ -65,22 +65,24 @@ for distributed computing. Currently, the only available options is
 `distribute_columns`, which will partition the columns of the underlying matrix
 into `floor(log2(nw))` parts, where `nw` is the number of workers available.
 """
-function DHMatrix{T}(rowtree::R, coltree::R; partition_strategy=:distribute_columns) where {R,T}
+function DHMatrix{T}(rowtree::R, coltree::R;
+                     partition_strategy=:distribute_columns) where {R,T}
     #build root
-    root  = DHMatrix{R,T}(rowtree,coltree,nothing,nothing,nothing)
+    root = DHMatrix{R,T}(rowtree, coltree, nothing, nothing, nothing)
     # depending on the partition strategy, dispatch to appropriate (recursive)
     # method
     if partition_strategy == :distribute_columns
-        nw        = nworkers()
-        dmax      = floor(Int64,log2(nw))
-        _build_block_structure_distribute_cols!(root,dmax)
+        nw = nworkers()
+        dmax = floor(Int64, log2(nw))
+        _build_block_structure_distribute_cols!(root, dmax)
     else
         error("unrecognized partition strategy")
     end
     return root
 end
 
-function _build_block_structure_distribute_cols!(current_node::DHMatrix{R,T},dmax) where {R,T}
+function _build_block_structure_distribute_cols!(current_node::DHMatrix{R,T},
+                                                 dmax) where {R,T}
     if Trees.depth(current_node) == dmax
         return current_node
     else
@@ -90,10 +92,11 @@ function _build_block_structure_distribute_cols!(current_node::DHMatrix{R,T},dma
         # do not recurse on row, only on columns
         row_children = [X]
         col_children = Y.children
-        children     = [DHMatrix{R,T}(r,c,nothing,nothing,current_node) for r in row_children, c in col_children]
+        children = [DHMatrix{R,T}(r, c, nothing, nothing, current_node)
+                    for r in row_children, c in col_children]
         current_node.children = children
         for child in children
-            _build_block_structure_distribute_cols!(child,dmax)
+            _build_block_structure_distribute_cols!(child, dmax)
         end
         return current_node
     end
@@ -101,17 +104,18 @@ end
 
 Base.size(H::DHMatrix) = length(rowrange(H)), length(colrange(H))
 
-function Base.show(io::IO,::MIME"text/plain",hmat::DHMatrix)
+function Base.show(io::IO, ::MIME"text/plain", hmat::DHMatrix)
     # isclean(hmat) || return print(io,"Dirty DHMatrix")
-    println(io,"Distributed HMatrix of $(eltype(hmat)) with range $(rowrange(hmat)) × $(colrange(hmat))")
+    println(io,
+            "Distributed HMatrix of $(eltype(hmat)) with range $(rowrange(hmat)) × $(colrange(hmat))")
     nodes = collect(AbstractTrees.PreOrderDFS(hmat))
-    println(io,"\t number of nodes in tree: $(length(nodes))")
+    println(io, "\t number of nodes in tree: $(length(nodes))")
     leaves = collect(AbstractTrees.Leaves(hmat))
-    @printf(io,"\t number of leaves: %i\n",length(leaves))
-    for (i,leaf) in enumerate(leaves)
-        r   = leaf.data.future
+    @printf(io, "\t number of leaves: %i\n", length(leaves))
+    for (i, leaf) in enumerate(leaves)
+        r = leaf.data.future
         pid = r.where
-        irange,jrange = @fetchfrom pid rowrange(fetch(r)), colrange(fetch(r))
+        irange, jrange = @fetchfrom pid rowrange(fetch(r)), colrange(fetch(r))
         println("\t\t leaf $i on process $pid spanning $irange × $jrange")
     end
 end
@@ -122,58 +126,61 @@ end
 Internal methods called **after** the `DHMatrix` structure has been initialized
 in order to construct the `HMatrix` on each of the leaves of the `DHMatrix`.
 """
-function _assemble_hmat_distributed(K,rtree,ctree;adm=StrongAdmissibilityStd(),comp=PartialACA(),
-                                    global_index=use_global_index(),threads=use_threads())
+function _assemble_hmat_distributed(K, rtree, ctree; adm=StrongAdmissibilityStd(),
+                                    comp=PartialACA(),
+                                    global_index=use_global_index(), threads=use_threads())
     #
     R = typeof(rtree)
     T = eltype(K)
-    wids   = workers()
-    root   = DHMatrix{T}(rtree,ctree;partition_strategy=:distribute_columns)
-    leaves = AbstractTrees.Leaves(root) |> collect
+    wids = workers()
+    root = DHMatrix{T}(rtree, ctree; partition_strategy=:distribute_columns)
+    leaves = collect(AbstractTrees.Leaves(root))
     @info "Assembling distributed HMatrix on $(length(leaves)) processes"
     @sync for (k, leaf) in enumerate(leaves)
         pid = wids[k] # id of k-th worker
-        r   = @spawnat pid assemble_hmat(K, rowtree(leaf), coltree(leaf); adm, comp,global_index,threads,distributed=false)
+        r = @spawnat pid assemble_hmat(K, rowtree(leaf), coltree(leaf); adm, comp,
+                                       global_index, threads, distributed=false)
         leaf.data = RemoteHMatrix{R,T}(r)
     end
     return root
 end
 
-function mul!(y::AbstractVector,A::DHMatrix,x::AbstractVector,a::Number,b::Number;
-                            global_index=use_global_index(),threads=use_threads())
+function mul!(y::AbstractVector, A::DHMatrix, x::AbstractVector, a::Number, b::Number;
+              global_index=use_global_index(), threads=use_threads())
     # since the HMatrix represents A = Pr*H*Pc, where Pr and Pc are row and column
     # permutations, we need first to rewrite C <-- b*C + a*(Pc*H*Pb)*B as
     # C <-- Pr*(b*inv(Pr)*C + a*H*(Pc*B)). Following this rewrite, the
     # multiplication is performed by first defining B <-- Pc*B, and C <--
     # inv(Pr)*C, doing the multiplication with the permuted entries, and then
     # permuting the result  C <-- Pr*C at the end.
-    ctree     = A.coltree
-    rtree     = A.rowtree
+    ctree = A.coltree
+    rtree = A.rowtree
     # permute input
     if global_index
-        x         = x[ctree.loc2glob]
-        y         = permute!(y,rtree.loc2glob)
-        rmul!(x,a) # multiply in place since this is a new copy, so does not mutate exterior x
+        x = x[ctree.loc2glob]
+        y = permute!(y, rtree.loc2glob)
+        rmul!(x, a) # multiply in place since this is a new copy, so does not mutate exterior x
     else
-        x = a*x # new copy of x
+        x = a * x # new copy of x
     end
-    rmul!(y,b)
-    leaves = filter_tree(x->Trees.isleaf(x),A)
-    nb  = length(leaves)
-    acc = Vector{Future}(undef,nb)
+    rmul!(y, b)
+    leaves = filter_tree(x -> Trees.isleaf(x), A)
+    nb = length(leaves)
+    acc = Vector{Future}(undef, nb)
     @sync for i in 1:nb
-        r        = leaves[i].data.future
-        pid      = r.where
-        jrange   = @fetchfrom pid colrange(fetch(r))
-        T,n      = eltype(y), length(y)
-        acc[i] = @spawnat pid mul!(zeros(T,n),fetch(r),view(x,jrange),1,0;global_index=false,threads)
+        r = leaves[i].data.future
+        pid = r.where
+        jrange = @fetchfrom pid colrange(fetch(r))
+        T, n = eltype(y), length(y)
+        acc[i] = @spawnat pid mul!(zeros(T, n), fetch(r), view(x, jrange), 1, 0;
+                                   global_index=false, threads)
     end
     # reduction stage: fetch everybody to worker 1 and sum them up
     for yi in acc
-        axpy!(1,fetch(yi),y)
+        axpy!(1, fetch(yi), y)
     end
     # permute output
-    global_index && invpermute!(y,loc2glob(rtree))
+    global_index && invpermute!(y, loc2glob(rtree))
     return y
 end
 
diff --git a/src/hmatrix.jl b/src/hmatrix.jl
index 9cdfb87..f22e519 100644
--- a/src/hmatrix.jl
+++ b/src/hmatrix.jl
@@ -1,6 +1,6 @@
-abstract type  AbstractHMatrix{T} <: AbstractMatrix{T} end
+abstract type AbstractHMatrix{T} <: AbstractMatrix{T} end
 
-function Base.getindex(H::AbstractHMatrix,i,j)
+function Base.getindex(H::AbstractHMatrix, i, j)
     # NOTE: you may disable `getindex` to avoid having code that will work, but be
     # horribly slow because it falls back to some generic implementation in
     # LinearAlgebra. The downside is that the `show` method, which will usually
@@ -15,25 +15,25 @@ function Base.getindex(H::AbstractHMatrix,i,j)
     `AbstractHMatrix` has fallen back to a generic implementation.
     """
     if ALLOW_GETINDEX[]
-        shift = pivot(H) .-1
-        _getindex(H,i+shift[1],j+shift[2])
+        shift = pivot(H) .- 1
+        _getindex(H, i + shift[1], j + shift[2])
     else
         error(msg)
     end
 end
 
-function _getindex(H,i,j)
-    (i ∈ rowrange(H)) && (j ∈ colrange(H)) || throw(BoundsError(H,(i,j)))
+function _getindex(H, i, j)
+    (i ∈ rowrange(H)) && (j ∈ colrange(H)) || throw(BoundsError(H, (i, j)))
     acc = zero(eltype(H))
     shift = pivot(H) .- 1
     if hasdata(H)
         il = i - shift[1]
         jl = j - shift[2]
-        acc  += data(H)[il,jl]
+        acc += data(H)[il, jl]
     end
     for child in children(H)
         if (i ∈ rowrange(child)) && (j ∈ colrange(child))
-            acc += _getindex(child,i,j)
+            acc += _getindex(child, i, j)
         end
     end
     return acc
@@ -53,96 +53,95 @@ mutable struct HMatrix{R,T} <: AbstractHMatrix{T}
     children::Matrix{HMatrix{R,T}}
     parent::HMatrix{R,T}
     # inner constructor which handles `nothing` fields.
-    function HMatrix{R,T}(rowtree,coltree,adm,data,children,parent) where {R,T}
+    function HMatrix{R,T}(rowtree, coltree, adm, data, children, parent) where {R,T}
         if data !== nothing
-            @assert (length(rowtree),length(coltree)) === size(data) "$(length(rowtree)),$(length(coltree)) != $(size(data))"
+            @assert (length(rowtree), length(coltree)) === size(data) "$(length(rowtree)),$(length(coltree)) != $(size(data))"
         end
-        hmat = new{R,T}(rowtree,coltree,adm,data)
-        hmat.children = isnothing(children) ? Matrix{HMatrix{R,T}}(undef,0,0) : children
-        hmat.parent   = isnothing(parent) ? hmat : parent
+        hmat = new{R,T}(rowtree, coltree, adm, data)
+        hmat.children = isnothing(children) ? Matrix{HMatrix{R,T}}(undef, 0, 0) : children
+        hmat.parent = isnothing(parent) ? hmat : parent
         return hmat
     end
 end
 
 # setters and getters (defined for AbstractHMatrix)
-isadmissible(H::AbstractHMatrix)  = H.admissible
-hasdata(H::AbstractHMatrix)       = !isnothing(H.data)
-data(H::AbstractHMatrix)          = H.data
-setdata!(H::AbstractHMatrix,d) = setfield!(H,:data,d)
+isadmissible(H::AbstractHMatrix) = H.admissible
+hasdata(H::AbstractHMatrix) = !isnothing(H.data)
+data(H::AbstractHMatrix) = H.data
+setdata!(H::AbstractHMatrix, d) = setfield!(H, :data, d)
 rowtree(H::AbstractHMatrix) = H.rowtree
 coltree(H::AbstractHMatrix) = H.coltree
 
 cluster_type(::HMatrix{R,T}) where {R,T} = R
 
-Base.getindex(H::HMatrix,::Colon,j) = getcol(H,j)
+Base.getindex(H::HMatrix, ::Colon, j) = getcol(H, j)
 
 # getcol for regular matrices
-function getcol!(col,M::Matrix,j)
-    @assert length(col) == size(M,1)
-    copyto!(col,view(M,:,j))
+function getcol!(col, M::Matrix, j)
+    @assert length(col) == size(M, 1)
+    return copyto!(col, view(M, :, j))
 end
-function getcol!(col,adjM::Adjoint{<:Any,<:Matrix},j)
-    @assert length(col) == size(adjM,1)
-    copyto!(col,view(adjM,:,j))
+function getcol!(col, adjM::Adjoint{<:Any,<:Matrix}, j)
+    @assert length(col) == size(adjM, 1)
+    return copyto!(col, view(adjM, :, j))
 end
 
-getcol(M::Matrix,j) = M[:,j]
-getcol(adjM::Adjoint{<:Any,<:Matrix},j) = adjM[:,j]
+getcol(M::Matrix, j) = M[:, j]
+getcol(adjM::Adjoint{<:Any,<:Matrix}, j) = adjM[:, j]
 
-
-function getcol(H::HMatrix,j::Int)
-    m,n = size(H)
-    T   = eltype(H)
-    col = zeros(T,m)
-    getcol!(col,H,j)
+function getcol(H::HMatrix, j::Int)
+    m, n = size(H)
+    T = eltype(H)
+    col = zeros(T, m)
+    return getcol!(col, H, j)
 end
 
-function getcol!(col,H::HMatrix,j::Int)
+function getcol!(col, H::HMatrix, j::Int)
     (j ∈ colrange(H)) || throw(BoundsError())
     piv = pivot(H)
-    _getcol!(col,H,j,piv)
+    return _getcol!(col, H, j, piv)
 end
 
-function _getcol!(col,H::HMatrix,j,piv)
+function _getcol!(col, H::HMatrix, j, piv)
     if hasdata(H)
-        shift        = pivot(H) .- 1
-        jl           = j - shift[2]
-        irange       = rowrange(H) .- (piv[1] - 1)
-        getcol!(view(col,irange),data(H),jl)
+        shift = pivot(H) .- 1
+        jl = j - shift[2]
+        irange = rowrange(H) .- (piv[1] - 1)
+        getcol!(view(col, irange), data(H), jl)
     end
     for child in children(H)
         if j ∈ colrange(child)
-            _getcol!(col,child,j,piv)
+            _getcol!(col, child, j, piv)
         end
     end
     return col
 end
 
-Base.getindex(adjH::Adjoint{<:Any,<:HMatrix},::Colon,j) = getcol(adjH,j)
+Base.getindex(adjH::Adjoint{<:Any,<:HMatrix}, ::Colon, j) = getcol(adjH, j)
 
-function getcol(adjH::Adjoint{<:Any,<:HMatrix},j::Int)
+function getcol(adjH::Adjoint{<:Any,<:HMatrix}, j::Int)
     # (j ∈ colrange(adjH)) || throw(BoundsError())
-    m,n = size(adjH)
-    T   = eltype(adjH)
-    col = zeros(T,m)
-    getcol!(col,adjH,j)
+    m, n = size(adjH)
+    T = eltype(adjH)
+    col = zeros(T, m)
+    return getcol!(col, adjH, j)
 end
 
-function getcol!(col,adjH::Adjoint{<:Any,<:HMatrix},j::Int)
+function getcol!(col, adjH::Adjoint{<:Any,<:HMatrix}, j::Int)
     piv = pivot(adjH)
-    _getcol!(col,adjH,j,piv)
+    return _getcol!(col, adjH, j, piv)
 end
 
-function _getcol!(col,adjH::Adjoint{<:Any,<:HMatrix},j,piv)
+function _getcol!(col, adjH::Adjoint{<:Any,<:HMatrix}, j, piv)
     if hasdata(adjH)
-        shift        = pivot(adjH) .- 1
-        jl           = j - shift[2]
-        irange       = rowrange(adjH) .- (piv[1] - 1)
-        getcol!(view(col,irange),data(adjH),jl)
+        shift = pivot(adjH) .- 1
+        jl = j - shift[2]
+        irange = rowrange(adjH) .- (piv[1] - 1)
+        getcol!(view(col, irange), data(adjH), jl)
     end
     for child in children(adjH)
         if j ∈ colrange(child)
-            _getcol!(col,child,j,piv)
+            _getcol!(col, child, j, piv)
         end
     end
     return col
@@ -150,28 +149,28 @@ end
 
 # Trees interface
 children(H::AbstractHMatrix) = H.children
-children(H::AbstractHMatrix,idxs...) = H.children[idxs]
-parent(H::AbstractHMatrix)   = H.parent
-isleaf(H::AbstractHMatrix)   = isempty(children(H))
-isroot(H::AbstractHMatrix)   = parent(H) === H
+children(H::AbstractHMatrix, idxs...) = H.children[idxs]
+parent(H::AbstractHMatrix) = H.parent
+isleaf(H::AbstractHMatrix) = isempty(children(H))
+isroot(H::AbstractHMatrix) = parent(H) === H
 
 # interface to AbstractTrees. No children is determined by an empty tuple for
 # AbstractTrees.
 AbstractTrees.children(t::AbstractHMatrix) = isleaf(t) ? () : t.children
 AbstractTrees.nodetype(t::AbstractHMatrix) = typeof(t)
 
-rowrange(H::AbstractHMatrix)         = index_range(H.rowtree)
-colrange(H::AbstractHMatrix)         = index_range(H.coltree)
-rowperm(H::AbstractHMatrix)          =  H |> rowtree |> loc2glob
-colperm(H::AbstractHMatrix)          =  H |> coltree |> loc2glob
-pivot(H::AbstractHMatrix)            = (rowrange(H).start,colrange(H).start)
-offset(H::AbstractHMatrix)           = pivot(H) .- 1
+rowrange(H::AbstractHMatrix) = index_range(H.rowtree)
+colrange(H::AbstractHMatrix) = index_range(H.coltree)
+rowperm(H::AbstractHMatrix) = loc2glob(rowtree(H))
+colperm(H::AbstractHMatrix) = loc2glob(coltree(H))
+pivot(H::AbstractHMatrix) = (rowrange(H).start, colrange(H).start)
+offset(H::AbstractHMatrix) = pivot(H) .- 1
 
 # Base.axes(H::HMatrix) = rowrange(H),colrange(H)
 Base.size(H::AbstractHMatrix) = length(rowrange(H)), length(colrange(H))
 
 function blocksize(H::AbstractHMatrix)
-    size(children(H))
+    return size(children(H))
 end
 
 """
@@ -184,41 +183,44 @@ function compression_ratio(H::HMatrix)
     nr = length(H) # represented entries
     for block in AbstractTrees.Leaves(H)
         data = block.data
-        ns  += num_stored_elements(data)
-   end
-   return nr/ns
+        ns += num_stored_elements(data)
+    end
+    return nr / ns
 end
 
 num_stored_elements(M::Matrix) = length(M)
 
-function Base.show(io::IO,hmat::HMatrix)
-    isclean(hmat) || return print(io,"Dirty HMatrix")
-    print(io,"HMatrix of $(eltype(hmat)) with range $(rowrange(hmat)) × $(colrange(hmat))")
-    _show(io,hmat)
+function Base.show(io::IO, hmat::HMatrix)
+    isclean(hmat) || return print(io, "Dirty HMatrix")
+    print(io, "HMatrix of $(eltype(hmat)) with range $(rowrange(hmat)) × $(colrange(hmat))")
+    _show(io, hmat)
     return io
 end
-Base.show(io::IO,::MIME"text/plain",hmat::HMatrix) = show(io,hmat)
+Base.show(io::IO, ::MIME"text/plain", hmat::HMatrix) = show(io, hmat)
 
-function _show(io,hmat)
+function _show(io, hmat)
     nodes = collect(AbstractTrees.PreOrderDFS(hmat))
     @printf io "\n\t number of nodes in tree: %i" length(nodes)
     leaves = collect(AbstractTrees.Leaves(hmat))
-    sparse_leaves = filter(isadmissible,leaves)
-    dense_leaves  = filter(!isadmissible,leaves)
-    @printf(io,"\n\t number of leaves: %i (%i admissible + %i full)",length(leaves),length(sparse_leaves),
-    length(dense_leaves))
-    rmin,rmax = isempty(sparse_leaves) ? (-1,-1) : extrema(x->rank(x.data),sparse_leaves)
-    @printf(io,"\n\t min rank of sparse blocks : %i",rmin)
-    @printf(io,"\n\t max rank of sparse blocks : %i",rmax)
-    dense_min,dense_max = isempty(dense_leaves) ? (-1,-1) : extrema(x->length(x.data),dense_leaves)
-    @printf(io,"\n\t min length of dense blocks : %i",dense_min)
-    @printf(io,"\n\t max length of dense blocks : %i",dense_max)
-    points_per_leaf = map(length,leaves)
-    @printf(io,"\n\t min number of elements per leaf: %i",minimum(points_per_leaf))
-    @printf(io,"\n\t max number of elements per leaf: %i",maximum(points_per_leaf))
-    depth_per_leaf = map(depth,leaves)
-    @printf(io,"\n\t depth of tree: %i",maximum(depth_per_leaf))
-    @printf(io,"\n\t compression ratio: %f\n",compression_ratio(hmat))
+    sparse_leaves = filter(isadmissible, leaves)
+    dense_leaves = filter(!isadmissible, leaves)
+    @printf(io, "\n\t number of leaves: %i (%i admissible + %i full)", length(leaves),
+            length(sparse_leaves),
+            length(dense_leaves))
+    rmin, rmax = isempty(sparse_leaves) ? (-1, -1) :
+                 extrema(x -> rank(x.data), sparse_leaves)
+    @printf(io, "\n\t min rank of sparse blocks : %i", rmin)
+    @printf(io, "\n\t max rank of sparse blocks : %i", rmax)
+    dense_min, dense_max = isempty(dense_leaves) ? (-1, -1) :
+                           extrema(x -> length(x.data), dense_leaves)
+    @printf(io, "\n\t min length of dense blocks : %i", dense_min)
+    @printf(io, "\n\t max length of dense blocks : %i", dense_max)
+    points_per_leaf = map(length, leaves)
+    @printf(io, "\n\t min number of elements per leaf: %i", minimum(points_per_leaf))
+    @printf(io, "\n\t max number of elements per leaf: %i", maximum(points_per_leaf))
+    depth_per_leaf = map(depth, leaves)
+    @printf(io, "\n\t depth of tree: %i", maximum(depth_per_leaf))
+    @printf(io, "\n\t compression ratio: %f\n", compression_ratio(hmat))
     return io
 end
 
@@ -230,18 +232,18 @@ global indexing system (see [`HMatrix`](@ref) for more information); otherwise
 the *local* indexing system induced by the row and columns trees are used
 (default).
 """
-Matrix(hmat::HMatrix;global_index=true) = Matrix{eltype(hmat)}(hmat;global_index)
-function Base.Matrix{T}(hmat::HMatrix;global_index) where {T}
-    M = zeros(T,size(hmat)...)
+Matrix(hmat::HMatrix; global_index=true) = Matrix{eltype(hmat)}(hmat; global_index)
+function Base.Matrix{T}(hmat::HMatrix; global_index) where {T}
+    M = zeros(T, size(hmat)...)
     piv = pivot(hmat)
     for block in AbstractTrees.PreOrderDFS(hmat)
         hasdata(block) || continue
         irange = rowrange(block) .- piv[1] .+ 1
         jrange = colrange(block) .- piv[2] .+ 1
-        M[irange,jrange] += Matrix(block.data)
+        M[irange, jrange] += Matrix(block.data)
     end
     if global_index
-        P = PermutedMatrix(M,invperm(rowperm(hmat)),invperm(colperm(hmat)))
+        P = PermutedMatrix(M, invperm(rowperm(hmat)), invperm(colperm(hmat)))
         return Matrix(P)
     else
         return M
@@ -263,39 +265,43 @@ It is assumed that `K` supports `getindex(K,i,j)`, and that comp can be called
 as `comp(K,irange::UnitRange,jrange::UnitRange)` to produce a compressed version
 of `K[irange,jrange]` in the form of an [`RkMatrix`](@ref).
 """
-function assemble_hmat(K,rowtree,coltree;adm=StrongAdmissibilityStd(3),comp=PartialACA(),
-                       global_index=use_global_index(),threads=use_threads(),distributed=false)
-    T  = eltype(K)
+function assemble_hmat(K, rowtree, coltree; adm=StrongAdmissibilityStd(3),
+                       comp=PartialACA(),
+                       global_index=use_global_index(), threads=use_threads(),
+                       distributed=false)
+    T = eltype(K)
     if distributed
-        _assemble_hmat_distributed(K,rowtree,coltree;adm,comp,global_index,threads)
+        _assemble_hmat_distributed(K, rowtree, coltree; adm, comp, global_index, threads)
     else
         # create first the structure. No parellelism used as this should be light.
         @timeit_debug "initilizing block structure" begin
-            hmat = HMatrix{T}(rowtree,coltree,adm)
+            hmat = HMatrix{T}(rowtree, coltree, adm)
         end
         # if needed permute kernel entries into indexing induced by trees
-        global_index && (K = PermutedMatrix(K,loc2glob(rowtree),loc2glob(coltree)))
+        global_index && (K = PermutedMatrix(K, loc2glob(rowtree), loc2glob(coltree)))
         # now assemble the data in the blocks
         @timeit_debug "assembling hmatrix" begin
             if threads
                 @info "Assembling HMatrix on $(Threads.nthreads()) threads"
-                _assemble_threads!(hmat,K,comp)
+                _assemble_threads!(hmat, K, comp)
             else
                 @info "Assembling HMatrix on 1 thread"
-                _assemble_cpu!(hmat,K,comp)
+                _assemble_cpu!(hmat, K, comp)
             end
         end
     end
 end
 
-function assemble_hmat(K::AbstractKernelMatrix;atol=0,rank=typemax(Int),rtol=atol>0 || rank<typemax(Int) ? 0 : sqrt(eps(Float64)),kwargs...)
-    comp = PartialACA(;rtol,atol,rank)
-    adm  = StrongAdmissibilityStd(3)
-    X    = rowelements(K)
-    Y    = colelements(K)
+function assemble_hmat(K::AbstractKernelMatrix; atol=0, rank=typemax(Int),
+                       rtol=atol > 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64)),
+                       kwargs...)
+    comp = PartialACA(; rtol, atol, rank)
+    adm = StrongAdmissibilityStd(3)
+    X = rowelements(K)
+    Y = colelements(K)
     Xclt = ClusterTree(X)
     Yclt = ClusterTree(Y)
-    assemble_hmat(K,Xclt,Yclt;adm,comp,kwargs...)
+    return assemble_hmat(K, Xclt, Yclt; adm, comp, kwargs...)
 end
 
 """
@@ -308,9 +314,9 @@ hierarchical matrix, but **does not compute `data`** field in the blocks. See
 """
 function HMatrix{T}(rowtree::R, coltree::R, adm) where {R,T}
     #build root
-    root  = HMatrix{R,T}(rowtree,coltree,false,nothing,nothing,nothing)
+    root = HMatrix{R,T}(rowtree, coltree, false, nothing, nothing, nothing)
     # recurse
-    _build_block_structure!(adm,root)
+    _build_block_structure!(adm, root)
     return root
 end
 
@@ -324,16 +330,17 @@ function _build_block_structure!(adm, current_node::HMatrix{R,T}) where {R,T}
     Y = current_node.coltree
     if (isleaf(X) || isleaf(Y))
         current_node.admissible = false
-    elseif adm(X,Y)
+    elseif adm(X, Y)
         current_node.admissible = true
     else
         current_node.admissible = false
         row_children = X.children
         col_children = Y.children
-        children     = [HMatrix{R,T}(r,c,false,nothing,nothing,current_node) for r in row_children, c in col_children]
+        children = [HMatrix{R,T}(r, c, false, nothing, nothing, current_node)
+                    for r in row_children, c in col_children]
         current_node.children = children
         for child in children
-            _build_block_structure!(adm,child)
+            _build_block_structure!(adm, child)
         end
     end
     return current_node
@@ -347,17 +354,17 @@ using the compressor `comp`. This function assumes the structure of `hmat` has
 already been intialized, and therefore should not be called directly. See
 [`HMatrix`](@ref) information on constructors.
 """
-function _assemble_cpu!(hmat,K,comp)
+function _assemble_cpu!(hmat, K, comp)
     if isleaf(hmat) # base case
         if isadmissible(hmat)
-            _assemble_sparse_block!(hmat,K,comp)
+            _assemble_sparse_block!(hmat, K, comp)
         else
-            _assemble_dense_block!(hmat,K)
+            _assemble_dense_block!(hmat, K)
         end
     else
         # recurse on children
         for child in hmat.children
-            _assemble_cpu!(child,K,comp)
+            _assemble_cpu!(child, K, comp)
         end
     end
     return hmat
@@ -370,31 +377,31 @@ Like [`_assemble_cpu!`](@ref), but uses threads to assemble the leaves. Note
 that the threads are spanwned using `Threads.@spawn`, which means they are
 spawned on the same worker as the caller.
 """
-function _assemble_threads!(hmat,K,comp)
+function _assemble_threads!(hmat, K, comp)
     # FIXME: ideally something like `omp for schedule(guided)` should be used here
     # to avoid spawning too many (small) tasks. In the absece of such scheduling
     # strategy in julia at the moment (v1.6), we resort to manually limiting the size
     # of the tasks by directly calling the serial method for blocks which are
     # smaller than a given length (1000^2 here).
-    blocks = filter_tree(hmat,true) do x
-        (isleaf(x) || length(x)<1000*1000)
+    blocks = filter_tree(hmat, true) do x
+        return (isleaf(x) || length(x) < 1000 * 1000)
     end
-    sort!(blocks;lt=(x,y)->length(x)<length(y),rev=true)
+    sort!(blocks; lt=(x, y) -> length(x) < length(y), rev=true)
     n = length(blocks)
     @sync for i in 1:n
-        Threads.@spawn _assemble_cpu!(blocks[i],K,comp)
+        Threads.@spawn _assemble_cpu!(blocks[i], K, comp)
     end
     return hmat
 end
 
-function _assemble_sparse_block!(hmat,K,comp)
-    hmat.data = comp(K,hmat.rowtree,hmat.coltree)
+function _assemble_sparse_block!(hmat, K, comp)
+    return hmat.data = comp(K, hmat.rowtree, hmat.coltree)
 end
 
-function _assemble_dense_block!(hmat,K)
+function _assemble_dense_block!(hmat, K)
     irange = rowrange(hmat)
     jrange = colrange(hmat)
-    hmat.data = K[irange,jrange]
+    hmat.data = K[irange, jrange]
     return hmat
 end
 
@@ -409,14 +416,15 @@ isleaf(adjH::Adjoint{<:Any,<:HMatrix}) = isleaf(adjH.parent)
 
 Base.size(adjH::Adjoint{<:Any,<:HMatrix}) = reverse(size(adjH.parent))
 
-function Base.show(io::IO,adjH::Adjoint{<:Any,<:HMatrix})
+function Base.show(io::IO, adjH::Adjoint{<:Any,<:HMatrix})
     hmat = parent(adjH)
-    isclean(hmat) || return print(io,"Dirty HMatrix")
-    print(io,"Adjoint HMatrix of $(eltype(hmat)) with range $(rowrange(adjH)) × $(colrange(adjH))")
-    _show(io,hmat)
+    isclean(hmat) || return print(io, "Dirty HMatrix")
+    print(io,
+          "Adjoint HMatrix of $(eltype(hmat)) with range $(rowrange(adjH)) × $(colrange(adjH))")
+    _show(io, hmat)
     return io
 end
-Base.show(io::IO,::MIME"text/plain",adjH::Adjoint{<:Any,<:HMatrix}) = show(io,adjH)
+Base.show(io::IO, ::MIME"text/plain", adjH::Adjoint{<:Any,<:HMatrix}) = show(io, adjH)
 
 """
     struct StrongAdmissibilityStd
@@ -432,13 +440,13 @@ adm(Xnode,Ynode)
 ```
 """
 Base.@kwdef struct StrongAdmissibilityStd
-    eta::Float64=3.0
+    eta::Float64 = 3.0
 end
 
 function (adm::StrongAdmissibilityStd)(left_node, right_node)
-    diam_min = minimum(diameter,(left_node,right_node))
-    dist     = distance(left_node,right_node)
-    return diam_min < adm.eta*dist
+    diam_min = minimum(diameter, (left_node, right_node))
+    dist = distance(left_node, right_node)
+    return diam_min < adm.eta * dist
 end
 
 """
@@ -450,7 +458,7 @@ between them is positive.
 struct WeakAdmissibilityStd
 end
 
-(adm::WeakAdmissibilityStd)(left_node, right_node) = distance(left_node,right_node) > 0
+(adm::WeakAdmissibilityStd)(left_node, right_node) = distance(left_node, right_node) > 0
 
 """
     isclean(H::HMatrix)
@@ -477,58 +485,57 @@ function isclean(H::HMatrix)
     return true
 end
 
-function depth(tree::HMatrix,acc=0)
+function depth(tree::HMatrix, acc=0)
     if isroot(tree)
         return acc
     else
-        depth(parent(tree),acc+1)
+        depth(parent(tree), acc + 1)
     end
 end
 
 function Base.zero(H::HMatrix)
     H0 = deepcopy(H)
-    rmul!(H0,0)
+    rmul!(H0, 0)
     return H0
 end
 
-function compress!(H::HMatrix,comp)
+function compress!(H::HMatrix, comp)
     @assert isclean(H)
     for leaf in AbstractTrees.Leaves(H)
         d = data(leaf)
         if d isa RkMatrix
-            compress!(d,comp)
+            compress!(d, comp)
         end
     end
     return H
 end
 
-
 ############################################################################################
 # Recipes
 ############################################################################################
 @recipe function f(hmat::HMatrix)
     legend --> false
-    grid   --> false
+    grid --> false
     aspect_ratio --> :equal
-    yflip  := true
+    yflip := true
     seriestype := :shape
-    linecolor  --> :black
+    linecolor --> :black
     # all leaves
     for block in AbstractTrees.Leaves(hmat)
         @series begin
             if isadmissible(block)
-                fillcolor    --> :blue
-                seriesalpha  --> 1/compression_ratio(block.data)
+                fillcolor --> :blue
+                seriesalpha --> 1 / compression_ratio(block.data)
             else
-                fillcolor    --> :red
-                seriesalpha  --> 0.3
+                fillcolor --> :red
+                seriesalpha --> 0.3
             end
             pt1 = pivot(block)
             pt2 = pt1 .+ size(block) .- 1
-            y1, y2 = pt1[1],pt2[1]
-            x1, x2 = pt1[2],pt2[2]
+            y1, y2 = pt1[1], pt2[1]
+            x1, x2 = pt1[2], pt2[2]
             # annotations := ((x1+x2)/2,(y1+y2)/2, rank(block.data))
-            [x1,x2,x2,x1,x1],[y1,y1,y2,y2,y1]
+            [x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1]
         end
     end
 end
diff --git a/src/hyperrectangle.jl b/src/hyperrectangle.jl
index 2f4f8e0..321054b 100644
--- a/src/hyperrectangle.jl
+++ b/src/hyperrectangle.jl
@@ -34,7 +34,7 @@ half_width(r::HyperRectangle) = width(r) / 2
 Minimal Euclidean distance between a point `x ∈ Ω1` and `y ∈ Ω2`.
 """
 function distance(rec1::HyperRectangle{N},
-    rec2::HyperRectangle{N}) where {N}
+                  rec2::HyperRectangle{N}) where {N}
     d2 = 0
     rec1_low_corner = low_corner(rec1)
     rec1_high_corner = high_corner(rec1)
@@ -63,7 +63,6 @@ function distance(pt::SVector{N}, rec::HyperRectangle{N}) where {N}
 end
 distance(rec::HyperRectangle{N}, pt::SVector{N}) where {N} = distance(pt, rec)
 
-
 """
     diameter(Ω)
 
@@ -91,24 +90,24 @@ function vertices(rec::HyperRectangle{2})
     lc = low_corner(rec)
     hc = high_corner(rec)
     return SVector(SVector(lc[1], lc[2]),
-        SVector(hc[1], lc[2]),
-        SVector(hc[1], hc[2]),
-        SVector(lc[1], hc[2]))
+                   SVector(hc[1], lc[2]),
+                   SVector(hc[1], hc[2]),
+                   SVector(lc[1], hc[2]))
 end
 function vertices(rec::HyperRectangle{3})
     lc = low_corner(rec)
     hc = high_corner(rec)
     return SVector(
-        # lower face
-        SVector(lc[1], lc[2], lc[3]),
-        SVector(hc[1], lc[2], lc[3]),
-        SVector(hc[1], hc[2], lc[3]),
-        SVector(lc[1], hc[2], lc[3]),
-        # upper face
-        SVector(lc[1], lc[2], hc[3]),
-        SVector(hc[1], lc[2], hc[3]),
-        SVector(hc[1], hc[2], hc[3]),
-        SVector(lc[1], hc[2], hc[3]))
+                   # lower face
+                   SVector(lc[1], lc[2], lc[3]),
+                   SVector(hc[1], lc[2], lc[3]),
+                   SVector(hc[1], hc[2], lc[3]),
+                   SVector(lc[1], hc[2], lc[3]),
+                   # upper face
+                   SVector(lc[1], lc[2], hc[3]),
+                   SVector(hc[1], lc[2], hc[3]),
+                   SVector(hc[1], hc[2], hc[3]),
+                   SVector(lc[1], hc[2], hc[3]))
 end
 
 function bounding_box(els, cube=false)
@@ -133,7 +132,7 @@ function bounding_box(els, cube=false)
     return HyperRectangle(lb, ub)
 end
 center(x::SVector) = x
-center(x::NTuple)  = SVector(x)
+center(x::NTuple) = SVector(x)
 
 """
     split(rec::HyperRectangle,[axis]::Int,[place])
@@ -148,8 +147,8 @@ When no axis and no place is given, defaults to splitting along the largest axis
 function Base.split(rec::HyperRectangle{N}, axis, place) where {N}
     rec_low_corner = low_corner(rec)
     rec_high_corner = high_corner(rec)
-    high_corner1 = ntuple(n -> n == axis ? place : rec_high_corner[n], N) |> SVector
-    low_corner2  = ntuple(n -> n == axis ? place : rec_low_corner[n], N)  |> SVector
+    high_corner1 = SVector(ntuple(n -> n == axis ? place : rec_high_corner[n], N))
+    low_corner2 = SVector(ntuple(n -> n == axis ? place : rec_low_corner[n], N))
     rec1 = HyperRectangle(rec_low_corner, high_corner1)
     rec2 = HyperRectangle(low_corner2, rec_high_corner)
     return (rec1, rec2)
diff --git a/src/kernelmatrix.jl b/src/kernelmatrix.jl
index 702341a..230c7ad 100644
--- a/src/kernelmatrix.jl
+++ b/src/kernelmatrix.jl
@@ -36,14 +36,14 @@ struct KernelMatrix{Tf,Tx,Ty,T} <: AbstractKernelMatrix{T}
     Y::Ty
 end
 
-Base.size(K::KernelMatrix)                   = length(K.X), length(K.Y)
-Base.getindex(K::KernelMatrix,i::Int,j::Int) =  K.f(K.X[i],K.Y[j])
+Base.size(K::KernelMatrix) = length(K.X), length(K.Y)
+Base.getindex(K::KernelMatrix, i::Int, j::Int) = K.f(K.X[i], K.Y[j])
 
 rowelements(K::KernelMatrix) = K.X
 colelements(K::KernelMatrix) = K.Y
 kernel(K::KernelMatrix) = K.f
 
-function KernelMatrix(f,X,Y)
-    T = Base.promote_op(f,eltype(X),eltype(Y))
-    KernelMatrix{typeof(f),typeof(X),typeof(Y),T}(f,X,Y)
+function KernelMatrix(f, X, Y)
+    T = Base.promote_op(f, eltype(X), eltype(Y))
+    return KernelMatrix{typeof(f),typeof(X),typeof(Y),T}(f, X, Y)
 end
diff --git a/src/lu.jl b/src/lu.jl
index 0edfb14..bd686d9 100644
--- a/src/lu.jl
+++ b/src/lu.jl
@@ -1,13 +1,13 @@
 const NOPIVOT = VERSION >= v"1.7" ? NoPivot : Val{false}
 
-function Base.getproperty(LU::LU{<:Any,<:HMatrix},s::Symbol)
-    H = getfield(LU,:factors) # the underlying hierarchical matrix
+function Base.getproperty(LU::LU{<:Any,<:HMatrix}, s::Symbol)
+    H = getfield(LU, :factors) # the underlying hierarchical matrix
     if s == :L
         return UnitLowerTriangular(H)
     elseif s == :U
         return UpperTriangular(H)
     else
-        return getfield(LU,s)
+        return getfield(LU, s)
     end
 end
 
@@ -17,13 +17,13 @@ end
 Hierarhical LU facotrization of `M`, using `comp` to generate the compressed
 blocks during the multiplication routines.
 """
-function lu!(M::HMatrix,compressor;threads=use_threads())
+function lu!(M::HMatrix, compressor; threads=use_threads())
     # perform the lu decomposition of M in place
     @timeit_debug "lu factorization" begin
-        _lu!(M,compressor,threads)
+        _lu!(M, compressor, threads)
     end
     # wrap the result in the LU structure
-    return LU(M,LinearAlgebra.BlasInt[],LinearAlgebra.BlasInt(0))
+    return LU(M, LinearAlgebra.BlasInt[], LinearAlgebra.BlasInt(0))
 end
 
 """
@@ -32,9 +32,10 @@ end
 
 Hierarhical LU facotrization of `M`, using the `PartialACA(;atol,rtol;rank)` compressor.
 """
-function lu!(M::HMatrix;atol=0,rank=typemax(Int),rtol=atol>0 || rank<typemax(Int) ? 0 : sqrt(eps(Float64)),kwargs...)
-    compressor = PartialACA(atol,rank,rtol)
-    lu!(M,compressor)
+function lu!(M::HMatrix; atol=0, rank=typemax(Int),
+             rtol=atol > 0 || rank < typemax(Int) ? 0 : sqrt(eps(Float64)), kwargs...)
+    compressor = PartialACA(atol, rank, rtol)
+    return lu!(M, compressor)
 end
 
 """
@@ -42,43 +43,45 @@ end
 
 Hierarchical LU factorization. See [`lu!`](@ref) for the available options.
 """
-lu(M::HMatrix,args...;kwargs...) = lu!(deepcopy(M),args...;kwargs...)
+lu(M::HMatrix, args...; kwargs...) = lu!(deepcopy(M), args...; kwargs...)
 
-function _lu!(M::HMatrix,compressor,threads)
+function _lu!(M::HMatrix, compressor, threads)
     if isleaf(M)
         d = data(M)
         @assert d isa Matrix
         @timeit_debug "dense lu factorization" begin
-            lu!(d,NOPIVOT())
+            lu!(d, NOPIVOT())
         end
     else
         @assert !hasdata(M)
         chdM = children(M)
-        m,n = size(chdM)
-        for i=1:m
-            _lu!(chdM[i,i],compressor,threads)
-            for j=i+1:n
+        m, n = size(chdM)
+        for i in 1:m
+            _lu!(chdM[i, i], compressor, threads)
+            for j in (i + 1):n
                 @sync begin
                     @timeit_debug "ldiv! solution" begin
                         if threads
-                            Threads.@spawn ldiv!(UnitLowerTriangular(chdM[i,i]),chdM[i,j],compressor)
+                            Threads.@spawn ldiv!(UnitLowerTriangular(chdM[i, i]),
+                                                 chdM[i, j], compressor)
                         else
-                            ldiv!(UnitLowerTriangular(chdM[i,i]),chdM[i,j],compressor)
+                            ldiv!(UnitLowerTriangular(chdM[i, i]), chdM[i, j], compressor)
                         end
                     end
                     @timeit_debug "rdiv! solution" begin
                         if threads
-                            Threads.@spawn rdiv!(chdM[j,i],UpperTriangular(chdM[i,i]),compressor)
+                            Threads.@spawn rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]),
+                                                 compressor)
                         else
-                            rdiv!(chdM[j,i],UpperTriangular(chdM[i,i]),compressor)
+                            rdiv!(chdM[j, i], UpperTriangular(chdM[i, i]), compressor)
                         end
                     end
                 end
             end
-            for j in i+1:m
-                for k in i+1:n
+            for j in (i + 1):m
+                for k in (i + 1):n
                     @timeit_debug "hmul!" begin
-                        hmul!(chdM[j,k],chdM[j,i],chdM[i,k],-1,1,compressor)
+                        hmul!(chdM[j, k], chdM[j, i], chdM[i, k], -1, 1, compressor)
                     end
                 end
             end
@@ -87,19 +90,19 @@ function _lu!(M::HMatrix,compressor,threads)
     return M
 end
 
-function ldiv!(A::LU{<:Any,<:HMatrix},y::AbstractVector;global_index=true)
-    p         = A.factors # underlying data
-    ctree     = coltree(p)
-    rtree     = rowtree(p)
+function ldiv!(A::LU{<:Any,<:HMatrix}, y::AbstractVector; global_index=true)
+    p = A.factors # underlying data
+    ctree = coltree(p)
+    rtree = rowtree(p)
     # permute input
-    global_index && permute!(y,loc2glob(ctree))
-    L,U = A.L, A.U
+    global_index && permute!(y, loc2glob(ctree))
+    L, U = A.L, A.U
     # solve LUx = y through:
     # (a) L(z) = y
     # (b) Ux   = z
-    ldiv!(L,y)
-    ldiv!(U,y)
-    global_index && invpermute!(y,loc2glob(rtree))
+    ldiv!(L, y)
+    ldiv!(U, y)
+    global_index && invpermute!(y, loc2glob(rtree))
     return y
 end
 
@@ -111,19 +114,19 @@ function ldiv!(L::HUnitLowerTriangular, y::AbstractVector)
     else
         @assert !hasdata(H) "only leaves are allowed to have data when using `ldiv`!"
         shift = pivot(H) .- 1
-        chdH  = children(H)
-        m, n   = size(chdH)
+        chdH = children(H)
+        m, n = size(chdH)
         @assert m === n
-        for i = 1:m
-            irows  = colrange(chdH[i,i]) .- shift[2]
-            bi     = view(y, irows)
-            for j = 1:(i - 1)# j<i
-                jrows  = colrange(chdH[i,j]) .- shift[2]
-                bj     = view(y, jrows)
-                _mul131!(bi, chdH[i,j], bj, -1)
+        for i in 1:m
+            irows = colrange(chdH[i, i]) .- shift[2]
+            bi = view(y, irows)
+            for j in 1:(i - 1)# j<i
+                jrows = colrange(chdH[i, j]) .- shift[2]
+                bj = view(y, jrows)
+                _mul131!(bi, chdH[i, j], bj, -1)
             end
             # recursion stage
-            ldiv!(UnitLowerTriangular(chdH[i,i]), bi)
+            ldiv!(UnitLowerTriangular(chdH[i, i]), bi)
         end
     end
     return y
@@ -137,19 +140,19 @@ function ldiv!(U::HUpperTriangular, y::AbstractVector)
     else
         @assert !hasdata(H) "only leaves are allowed to have data when using `ldiv`!"
         shift = pivot(H) .- 1
-        chdH  = children(H)
-        m, n   = size(chdH)
+        chdH = children(H)
+        m, n = size(chdH)
         @assert m === n
-        for i = m:-1:1
-            irows  = colrange(chdH[i,i]) .- shift[2]
-            bi     = view(y, irows)
-            for j = i+1:n # j>i
-                jrows  = colrange(chdH[i,j]) .- shift[2]
-                bj     = view(y, jrows)
-                _mul131!(bi, chdH[i,j], bj, -1)
+        for i in m:-1:1
+            irows = colrange(chdH[i, i]) .- shift[2]
+            bi = view(y, irows)
+            for j in (i + 1):n # j>i
+                jrows = colrange(chdH[i, j]) .- shift[2]
+                bj = view(y, jrows)
+                _mul131!(bi, chdH[i, j], bj, -1)
             end
             # recursion stage
-            ldiv!(UpperTriangular(chdH[i,i]), bi)
+            ldiv!(UpperTriangular(chdH[i, i]), bi)
         end
     end
     return y
diff --git a/src/multiplication.jl b/src/multiplication.jl
index c873bf2..f163d88 100644
--- a/src/multiplication.jl
+++ b/src/multiplication.jl
@@ -6,21 +6,21 @@ hierarchical matrices and `compressor` is a function/functor used in the
 intermediate stages of the multiplication to avoid growring the rank of
 admissible blocks after addition is performed.
 """
-function hmul!(C::HMatrix,A::HMatrix,B::HMatrix,a,b,compressor)
-    b == true || rmul!(C,b)
+function hmul!(C::HMatrix, A::HMatrix, B::HMatrix, a, b, compressor)
+    b == true || rmul!(C, b)
     @timeit_debug "constructing plan" begin
-        plan = plan_hmul(C,A,B,a,1)
+        plan = plan_hmul(C, A, B, a, 1)
     end
     @timeit_debug "executing plan" begin
-        execute!(plan,compressor)
+        execute!(plan, compressor)
     end
     return C
 end
 
 # disable `mul` of hierarchial matrices
-function mul!(C::HMatrix,A::HMatrix,B::HMatrix,a::Number,b::Number)
+function mul!(C::HMatrix, A::HMatrix, B::HMatrix, a::Number, b::Number)
     msg = "use `hmul` to multiply hierarchical matrices"
-    error(msg)
+    return error(msg)
 end
 
 """
@@ -44,50 +44,50 @@ mutable struct HMulNode{T}
     multiplier::Float64
 end
 
-function HMulNode(C::HMatrix,a::Number)
-    T    = typeof(C)
+function HMulNode(C::HMatrix, a::Number)
+    T = typeof(C)
     chdC = children(C)
-    m,n  = size(chdC)
-    HMulNode{T}(C,Matrix{HMulNode{T}}(undef,m,n),T[],a)
+    m, n = size(chdC)
+    return HMulNode{T}(C, Matrix{HMulNode{T}}(undef, m, n), T[], a)
 end
 
-function build_HMulNode_structure(C::HMatrix,a)
-    node = HMulNode(C,a)
+function build_HMulNode_structure(C::HMatrix, a)
+    node = HMulNode(C, a)
     chdC = children(C)
-    m,n  = size(chdC)
+    m, n = size(chdC)
     for i in 1:m
         for j in 1:n
-            child = build_HMulNode_structure(chdC[i,j],a)
-            node.children[i,j] = child
+            child = build_HMulNode_structure(chdC[i, j], a)
+            node.children[i, j] = child
         end
     end
-    node
+    return node
 end
 
-function plan_hmul(C::T,A::T,B::T,a,b) where {T<:HMatrix}
+function plan_hmul(C::T, A::T, B::T, a, b) where {T<:HMatrix}
     @assert b == 1
     # root = HMulNode(C)
-    root = build_HMulNode_structure(C,a)
+    root = build_HMulNode_structure(C, a)
     # recurse
-    _build_hmul_tree!(root,A,B)
+    _build_hmul_tree!(root, A, B)
     return root
 end
 
-function _build_hmul_tree!(tree::HMulNode,A::HMatrix,B::HMatrix)
+function _build_hmul_tree!(tree::HMulNode, A::HMatrix, B::HMatrix)
     C = tree.target
     if isleaf(A) || isleaf(B) || isleaf(C)
-        push!(tree.sources,(A,B))
+        push!(tree.sources, (A, B))
     else
-        ni,nj = blocksize(C)
-        _ ,nk = blocksize(A)
+        ni, nj = blocksize(C)
+        _, nk = blocksize(A)
         A_children = children(A)
         B_children = children(B)
         C_children = children(C)
-        for i=1:ni
-            for j=1:nj
-                child = tree.children[i,j]
-                for k=1:nk
-                    _build_hmul_tree!(child,A_children[i,k],B_children[k,j])
+        for i in 1:ni
+            for j in 1:nj
+                child = tree.children[i, j]
+                for k in 1:nk
+                    _build_hmul_tree!(child, A_children[i, k], B_children[k, j])
                 end
             end
         end
@@ -95,27 +95,31 @@ function _build_hmul_tree!(tree::HMulNode,A::HMatrix,B::HMatrix)
     return tree
 end
 
-function Base.show(io::IO,::MIME"text/plain",tree::HMulNode)
-    print(io,"HMulNode with $(size(children(tree))) children and $(length(sources(tree))) pairs")
+function Base.show(io::IO, ::MIME"text/plain", tree::HMulNode)
+    return print(io,
+                 "HMulNode with $(size(children(tree))) children and $(length(sources(tree))) pairs")
 end
 
-function Base.show(io::IO,tree::HMulNode)
-    print(io,"HMulNode with $(size(children(tree))) children and $(length(sources(tree))) pairs")
+function Base.show(io::IO, tree::HMulNode)
+    return print(io,
+                 "HMulNode with $(size(children(tree))) children and $(length(sources(tree))) pairs")
 end
 
-function Base.show(io::IO,::MIME"text/plain",tree::Adjoint{<:Any,<:HMulNode})
+function Base.show(io::IO, ::MIME"text/plain", tree::Adjoint{<:Any,<:HMulNode})
     p = parent(tree)
-    print(io,"adjoint HMulNode with $(size(children(p))) children and $(length(sources(p))) pairs")
+    return print(io,
+                 "adjoint HMulNode with $(size(children(p))) children and $(length(sources(p))) pairs")
 end
 
-function Base.show(io::IO,tree::Adjoint{<:Any,<:HMulNode})
+function Base.show(io::IO, tree::Adjoint{<:Any,<:HMulNode})
     p = parent(tree)
-    print(io,"adjoint HMulNode with $(size(children(p))) children and $(length(sources(p))) pairs")
+    return print(io,
+                 "adjoint HMulNode with $(size(children(p))) children and $(length(sources(p))) pairs")
 end
 
 # compress the operator L = C + ∑ a*Aᵢ*Bᵢ
 function (paca::PartialACA)(plan::HMulNode)
-    _aca_partial(plan,:,:,paca.atol,paca.rank,paca.rtol)
+    return _aca_partial(plan, :, :, paca.atol, paca.rank, paca.rtol)
 end
 
 # getters
@@ -125,51 +129,51 @@ multiplier(node::HMulNode) = node.multiplier
 
 # Trees interface
 children(node::HMulNode) = node.children
-children(node::HMulNode,idxs...) = node.children[idxs]
-parent(node::HMulNode)   = node.parent
-isleaf(node::HMulNode)   = isempty(children(node))
-isroot(node::HMulNode)   = parent(node) === node
+children(node::HMulNode, idxs...) = node.children[idxs]
+parent(node::HMulNode) = node.parent
+isleaf(node::HMulNode) = isempty(children(node))
+isroot(node::HMulNode) = parent(node) === node
 
 # AbstractMatrix interface
 Base.size(node::HMulNode) = size(target(node))
 Base.eltype(node::HMulNode) = eltype(target(node))
 
-Base.getindex(node::HMulNode,::Colon,j::Int) = getcol(node,j)
+Base.getindex(node::HMulNode, ::Colon, j::Int) = getcol(node, j)
 
-function getcol(node::HMulNode,j)
-    m,n = size(node)
-    T   = eltype(node)
-    col = zeros(T,m)
-    getcol!(col,node,j)
+function getcol(node::HMulNode, j)
+    m, n = size(node)
+    T = eltype(node)
+    col = zeros(T, m)
+    getcol!(col, node, j)
     return col
 end
 
-function getcol!(col,node::HMulNode,j)
+function getcol!(col, node::HMulNode, j)
     a = multiplier(node)
     C = target(node)
-    m,n = size(C)
-    T  = eltype(C)
-    ej = zeros(T,n)
+    m, n = size(C)
+    T = eltype(C)
+    ej = zeros(T, n)
     ej[j] = 1
     # compute j-th column of ∑ Aᵢ Bᵢ
-    for (A,B) in sources(node)
-        m,k = size(A)
-        k,n = size(B)
-        tmp = zeros(T,k)
-        jg   = j + offset(B)[2] # global index on hierarchila matrix B
-        getcol!(tmp,B,jg)
-        _hgemv_recursive!(col,A,tmp,offset(A))
+    for (A, B) in sources(node)
+        m, k = size(A)
+        k, n = size(B)
+        tmp = zeros(T, k)
+        jg = j + offset(B)[2] # global index on hierarchila matrix B
+        getcol!(tmp, B, jg)
+        _hgemv_recursive!(col, A, tmp, offset(A))
     end
     # multiply by a
-    rmul!(col,a)
+    rmul!(col, a)
     # add the j-th column of C if C has data
     # jg  = j + offset(C)[2] # global index on hierarchila matrix B
     # cj  = getcol(C,jg)
     # axpy!(1,cj,col)
     if hasdata(C)
-        d  = data(C)
-        cj = getcol(d,j)
-        axpy!(1,cj,col)
+        d = data(C)
+        cj = getcol(d, j)
+        axpy!(1, cj, col)
     end
     return col
 end
@@ -178,79 +182,79 @@ adjoint(node::HMulNode) = Adjoint(node)
 Base.size(adjnode::Adjoint{<:Any,<:HMulNode}) = reverse(size(adjnode.parent))
 children(adjnode::Adjoint{<:Any,<:HMulNode}) = adjoint(children(adjnode.parent))
 
-Base.getindex(adjnode::Adjoint{<:Any,<:HMulNode},::Colon,j::Int) = getcol(adjnode,j)
+Base.getindex(adjnode::Adjoint{<:Any,<:HMulNode}, ::Colon, j::Int) = getcol(adjnode, j)
 
-function getcol(adjnode::Adjoint{<:Any,<:HMulNode},j)
-    m,n = size(adjnode)
-    T   = eltype(adjnode)
-    col = zeros(T,m)
-    getcol!(col,adjnode,j)
+function getcol(adjnode::Adjoint{<:Any,<:HMulNode}, j)
+    m, n = size(adjnode)
+    T = eltype(adjnode)
+    col = zeros(T, m)
+    getcol!(col, adjnode, j)
     return col
 end
 
-function getcol!(col,adjnode::Adjoint{<:Any,<:HMulNode},j)
-    node  = parent(adjnode)
-    a     = multiplier(node)
-    C     = target(node)
-    T     = eltype(C)
-    Ct    = adjoint(C)
-    m,n   = size(Ct)
-    ej    = zeros(T,n)
+function getcol!(col, adjnode::Adjoint{<:Any,<:HMulNode}, j)
+    node = parent(adjnode)
+    a = multiplier(node)
+    C = target(node)
+    T = eltype(C)
+    Ct = adjoint(C)
+    m, n = size(Ct)
+    ej = zeros(T, n)
     ej[j] = 1
     # compute j-th column of ∑ adjoint(Bᵢ)*adjoint(Aᵢ)
-    for (A,B) in sources(node)
-        At,Bt = adjoint(A), adjoint(B)
-        tmp = zeros(T,size(At,1))
+    for (A, B) in sources(node)
+        At, Bt = adjoint(A), adjoint(B)
+        tmp = zeros(T, size(At, 1))
         # _hgemv_recursive!(tmp,At,ej,offset(At))
-        jg  = j + offset(At)[2] # global index on hierarchila matrix B
-        getcol!(tmp,At,jg)
-        _hgemv_recursive!(col,Bt,tmp,offset(Bt))
+        jg = j + offset(At)[2] # global index on hierarchila matrix B
+        getcol!(tmp, At, jg)
+        _hgemv_recursive!(col, Bt, tmp, offset(Bt))
     end
     # multiply by a
-    rmul!(col,conj(a))
+    rmul!(col, conj(a))
     # add the j-th column of Ct if it has data
     # jg  = j + offset(Ct)[2] # global index on hierarchila matrix B
     # cj  = getcol(Ct,jg)
     # axpy!(1,cj,col)
     if hasdata(Ct)
-        d  = data(Ct)
-        cj = getcol(d,j)
-        axpy!(1,cj,col)
+        d = data(Ct)
+        cj = getcol(d, j)
+        axpy!(1, cj, col)
     end
     return col
 end
 
-function execute!(node::HMulNode,compressor)
-    execute_node!(node,compressor)
+function execute!(node::HMulNode, compressor)
+    execute_node!(node, compressor)
     C = target(node)
     flush_to_children!(C)
     # FIXME: using @threads below breaks things when the `RkMatrix` has its own
     # buffer due to a race condition that I do not yet undestand
     for chd in children(node)
-        execute!(chd,compressor)
+        execute!(chd, compressor)
     end
     return node
 end
 
 # non-recursive execution
-function execute_node!(node::HMulNode,compressor)
+function execute_node!(node::HMulNode, compressor)
     C = target(node)
     isempty(sources(node)) && (return node)
     a = multiplier(node)
     if isleaf(C) && !isadmissible(C)
         d = data(C)
-        for (A,B) in sources(node)
+        for (A, B) in sources(node)
             # _mul_leaf!(C,A,B,a,compressor)
-            _mul_dense!(d,A,B,a)
+            _mul_dense!(d, A, B, a)
         end
     else
         R = compressor(node)
-        setdata!(C,R)
+        setdata!(C, R)
     end
     return node
 end
 
-function _mul_dense!(C::Matrix,A,B,a)
+function _mul_dense!(C::Matrix, A, B, a)
     Adata = isleaf(A) ? A.data : A
     Bdata = isleaf(B) ? B.data : B
     if Adata isa HMatrix
@@ -285,25 +289,25 @@ Transfer the blocks `data` to its children. At the end, set `H.data` to `nothing
 """
 function flush_to_children!(H::HMatrix)
     T = eltype(H)
-    isleaf(H)  && (return H)
+    isleaf(H) && (return H)
     hasdata(H) || (return H)
-    R::RkMatrix{T}   = data(H)
-    _add_to_children!(H,R)
-    setdata!(H,nothing)
+    R::RkMatrix{T} = data(H)
+    _add_to_children!(H, R)
+    setdata!(H, nothing)
     return H
 end
 
-function _add_to_children!(H,R::RkMatrix)
+function _add_to_children!(H, R::RkMatrix)
     shift = pivot(H) .- 1
     for block in children(H)
-        irange   = rowrange(block) .- shift[1]
-        jrange   = colrange(block) .- shift[2]
-        bdata    = data(block)
-        tmp      = RkMatrix(R.A[irange,:],R.B[jrange,:])
+        irange = rowrange(block) .- shift[1]
+        jrange = colrange(block) .- shift[2]
+        bdata = data(block)
+        tmp = RkMatrix(R.A[irange, :], R.B[jrange, :])
         if bdata === nothing
-            setdata!(block,tmp)
+            setdata!(block, tmp)
         else
-            axpy!(true,tmp,bdata)
+            axpy!(true, tmp, bdata)
         end
     end
 end
@@ -317,38 +321,43 @@ Transfer the blocks `data` to its leaves. At the end, set `H.data` to `nothing`.
 """
 function flush_to_leaves!(H::HMatrix)
     T = eltype(H)
-    isleaf(H)  && (return H)
+    isleaf(H) && (return H)
     hasdata(H) || (return H)
-    R::RkMatrix{T}   = data(H)
-    _add_to_leaves!(H,R)
-    setdata!(H,nothing)
+    R::RkMatrix{T} = data(H)
+    _add_to_leaves!(H, R)
+    setdata!(H, nothing)
     return H
 end
 
-function _add_to_leaves!(H,R::RkMatrix)
+function _add_to_leaves!(H, R::RkMatrix)
     shift = pivot(H) .- 1
     for block in AbstractTrees.Leaves(H)
-        irange   = rowrange(block) .- shift[1]
-        jrange   = colrange(block) .- shift[2]
-        bdata    = data(block)
-        tmp      = RkMatrix(R.A[irange,:],R.B[jrange,:])
+        irange = rowrange(block) .- shift[1]
+        jrange = colrange(block) .- shift[2]
+        bdata = data(block)
+        tmp = RkMatrix(R.A[irange, :], R.B[jrange, :])
         if bdata === nothing
-            setdata!(block,tmp)
+            setdata!(block, tmp)
         else
-            axpy!(true,tmp,bdata)
+            axpy!(true, tmp, bdata)
         end
     end
 end
 
-_mul111!(C::Union{Matrix,SubArray,Adjoint},A::Union{Matrix,SubArray,Adjoint},B::Union{Matrix,SubArray,Adjoint},a::Number) = mul!(C,A,B,a,true)
+function _mul111!(C::Union{Matrix,SubArray,Adjoint}, A::Union{Matrix,SubArray,Adjoint},
+                  B::Union{Matrix,SubArray,Adjoint}, a::Number)
+    return mul!(C, A, B, a, true)
+end
 
-function _mul112!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, a::Number)
+function _mul112!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Adjoint},
+                  R::RkMatrix, a::Number)
     buffer = M * R.A
     _mul111!(C, buffer, R.Bt, a)
     return C
 end
 
-function _mul113!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Adjoint}, H::HMatrix, a::Number)
+function _mul113!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Adjoint},
+                  H::HMatrix, a::Number)
     T = eltype(C)
     if hasdata(H)
         mat = data(H)
@@ -361,19 +370,20 @@ function _mul113!(C::Union{Matrix,SubArray,Adjoint}, M::Union{Matrix,SubArray,Ad
         end
     end
     for child in children(H)
-        shift  = pivot(H) .- 1
+        shift = pivot(H) .- 1
         irange = rowrange(child) .- shift[1]
         jrange = colrange(child) .- shift[2]
-        Cview  = @views C[:, jrange]
-        Mview  = @views M[:, irange]
+        Cview = @views C[:, jrange]
+        Mview = @views M[:, irange]
         _mul113!(Cview, Mview, child, a)
     end
     return C
 end
 
-function _mul121!(C::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, M::Union{Matrix,SubArray,Adjoint}, a::Number)
+function _mul121!(C::Union{Matrix,SubArray,Adjoint}, R::RkMatrix,
+                  M::Union{Matrix,SubArray,Adjoint}, a::Number)
     buffer = R.Bt * M
-    _mul111!(C, R.A, buffer, a)
+    return _mul111!(C, R.A, buffer, a)
 end
 
 function _mul122!(C::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, S::RkMatrix, a::Number)
@@ -393,7 +403,8 @@ function _mul123!(C::Union{Matrix,SubArray,Adjoint}, R::RkMatrix, H::HMatrix, a:
     return C
 end
 
-function _mul131!(C::Union{Matrix,SubArray,Adjoint}, H::HMatrix, M::Union{Matrix,SubArray,Adjoint}, a::Number)
+function _mul131!(C::Union{Matrix,SubArray,Adjoint}, H::HMatrix,
+                  M::Union{Matrix,SubArray,Adjoint}, a::Number)
     if isleaf(H)
         mat = data(H)
         if mat isa Matrix
@@ -405,21 +416,21 @@ function _mul131!(C::Union{Matrix,SubArray,Adjoint}, H::HMatrix, M::Union{Matrix
         end
     end
     for child in children(H)
-        shift  = pivot(H) .- 1
+        shift = pivot(H) .- 1
         irange = rowrange(child) .- shift[1]
         jrange = colrange(child) .- shift[2]
-        Cview  = view(C, irange, :)
-        Mview  = view(M, jrange, :)
+        Cview = view(C, irange, :)
+        Mview = view(M, jrange, :)
         _mul131!(Cview, child, Mview, a)
     end
     return C
 end
 
 function _mul132!(C::Union{Matrix,SubArray,Adjoint}, H::HMatrix, R::RkMatrix, a::Number)
-    T = promote_type(eltype(H),eltype(R))
+    T = promote_type(eltype(H), eltype(R))
     buffer = zeros(T, size(H, 1), size(R.A, 2))
-    _mul131!(buffer,H,R.A,1)
-    _mul111!(C, buffer, R.Bt, a,)
+    _mul131!(buffer, H, R.A, 1)
+    _mul111!(C, buffer, R.Bt, a)
     return C
 end
 
@@ -430,18 +441,19 @@ end
 ############################################################################################
 
 # 1.2.1
-function mul!(y::AbstractVector,R::RkMatrix,x::AbstractVector,a::Number,b::Number)
+function mul!(y::AbstractVector, R::RkMatrix, x::AbstractVector, a::Number, b::Number)
     # tmp = R.Bt*x
-    tmp = mul!(R.buffer,adjoint(R.B),x)
-    mul!(y,R.A,tmp,a,b)
+    tmp = mul!(R.buffer, adjoint(R.B), x)
+    return mul!(y, R.A, tmp, a, b)
 end
 
 # 1.2.1
-function mul!(y::AbstractVector,adjR::Adjoint{<:Any,<:RkMatrix},x::AbstractVector,a::Number,b::Number)
+function mul!(y::AbstractVector, adjR::Adjoint{<:Any,<:RkMatrix}, x::AbstractVector,
+              a::Number, b::Number)
     R = parent(adjR)
     # tmp = R.At*x
-    tmp = mul!(R.buffer,adjoint(R.A),x)
-    mul!(y,R.B,tmp,a,b)
+    tmp = mul!(R.buffer, adjoint(R.A), x)
+    return mul!(y, R.B, tmp, a, b)
 end
 
 # 1.3.1
@@ -450,25 +462,25 @@ end
 
 Perform `y <-- H*x*a + y*b` in place.
 """
-function mul!(y::AbstractVector,A::HMatrix,x::AbstractVector,a::Number=1,b::Number=0;
-                            global_index=use_global_index(),threads=use_threads())
+function mul!(y::AbstractVector, A::HMatrix, x::AbstractVector, a::Number=1, b::Number=0;
+              global_index=use_global_index(), threads=use_threads())
     # since the HMatrix represents A = inv(Pr)*H*Pc, where Pr and Pc are row and column
     # permutations, we need first to rewrite C <-- b*C + a*(inv(Pr)*H*Pc)*B as
     # C <-- inv(Pr)*(b*Pr*C + a*H*(Pc*B)). Following this rewrite, the
     # multiplication is performed by first defining B <-- Pc*B, and C <--
     # Pr*C, doing the multiplication with the permuted entries, and then
     # permuting the result  back C <-- inv(Pr)*C at the end.
-    ctree     = coltree(A)
-    rtree     = rowtree(A)
+    ctree = coltree(A)
+    rtree = rowtree(A)
     # permute input
     if global_index
-        x         = x[colperm(A)]
-        y         = permute!(y,rowperm(A))
-        rmul!(x,a) # multiply in place since this is a new copy, so does not mutate exterior x
+        x = x[colperm(A)]
+        y = permute!(y, rowperm(A))
+        rmul!(x, a) # multiply in place since this is a new copy, so does not mutate exterior x
     elseif a != 1
-        x = a*x # new copy of x since we should not mutate the external x in mul!
+        x = a * x # new copy of x since we should not mutate the external x in mul!
     end
-    iszero(b) ? fill!(y,zero(eltype(y))) : rmul!(y,b)
+    iszero(b) ? fill!(y, zero(eltype(y))) : rmul!(y, b)
     # offset in case A is not indexed starting at (1,1); e.g. A is not the root
     # of and HMatrix
     offset = pivot(A) .- 1
@@ -485,22 +497,23 @@ function mul!(y::AbstractVector,A::HMatrix,x::AbstractVector,a::Number=1,b::Numb
         #    Right now the hilbert partition is chosen by default without proper
         #    testing.
         @timeit_debug "partitioning leaves" begin
-            haskey(CACHED_PARTITIONS,A) || hilbert_partition(A,Threads.nthreads(),_cost_gemv)
+            haskey(CACHED_PARTITIONS, A) ||
+                hilbert_partition(A, Threads.nthreads(), _cost_gemv)
             # haskey(CACHED_PARTITIONS,A) || col_partition(A,Threads.nthreads(),_cost_gemv)
             # haskey(CACHED_PARTITIONS,A) || row_partition(A,Threads.nthreads(),_cost_gemv)
         end
         @timeit_debug "threaded multiplication" begin
             p = CACHED_PARTITIONS[A]
-            _hgemv_static_partition!(y,x,p.partition,offset)
+            _hgemv_static_partition!(y, x, p.partition, offset)
             # _hgemv_threads!(y,x,p.partition,offset)  # threaded implementation
         end
     else
         @timeit_debug "serial multiplication" begin
-            _hgemv_recursive!(y,A,x,offset) # serial implementation
+            _hgemv_recursive!(y, A, x, offset) # serial implementation
         end
     end
     # permute output
-    global_index && invpermute!(y,rowperm(A))
+    global_index && invpermute!(y, rowperm(A))
     return y
 end
 
@@ -513,74 +526,75 @@ rowrange(A) - offset[1]` and `J = rowrange(B) - offset[2]`.
 The `offset` argument is used on the caller side to signal if the original
 hierarchical matrix had a `pivot` other than `(1,1)`.
 """
-function _hgemv_recursive!(C::AbstractVector,A::Union{HMatrix,Adjoint{<:Any,<:HMatrix}},B::AbstractVector,offset)
+function _hgemv_recursive!(C::AbstractVector, A::Union{HMatrix,Adjoint{<:Any,<:HMatrix}},
+                           B::AbstractVector, offset)
     T = eltype(A)
     if isleaf(A)
         irange = rowrange(A) .- offset[1]
         jrange = colrange(A) .- offset[2]
-        d   = data(A)
+        d = data(A)
         if T <: SMatrix
             # FIXME: there is bug with gemv and static arrays, so we convert
             # them to matrices of n × 1
             # (https://github.com/JuliaArrays/StaticArrays.jl/issues/966#issuecomment-943679214).
-            mul!(view(C,irange,1:1),d,view(B,jrange,1:1),1,1)
+            mul!(view(C, irange, 1:1), d, view(B, jrange, 1:1), 1, 1)
         else
             # C and B are the "global" vectors handled by the caller, so a view
             # is needed.
-            mul!(view(C,irange),d,view(B,jrange),1,1)
+            mul!(view(C, irange), d, view(B, jrange), 1, 1)
         end
     else
         for block in children(A)
-            _hgemv_recursive!(C,block,B,offset)
+            _hgemv_recursive!(C, block, B, offset)
         end
     end
     return C
 end
 
-function _hgemv_threads!(C::AbstractVector,B::AbstractVector,partition,offset)
-    nt        = Threads.nthreads()
+function _hgemv_threads!(C::AbstractVector, B::AbstractVector, partition, offset)
+    nt = Threads.nthreads()
     # make `nt` copies of C and run in parallel
-    Cthreads  = [zero(C) for _ in 1:nt]
+    Cthreads = [zero(C) for _ in 1:nt]
     @sync for p in partition
         for block in p
             Threads.@spawn begin
                 id = Threads.threadid()
-                _hgemv_recursive!(Cthreads[id],block,B,offset)
+                _hgemv_recursive!(Cthreads[id], block, B, offset)
             end
         end
     end
     # reduce
     for Ct in Cthreads
-        axpy!(1,Ct,C)
+        axpy!(1, Ct, C)
     end
     return C
 end
 
-function _hgemv_static_partition!(C::AbstractVector,B::AbstractVector,partition,offset)
+function _hgemv_static_partition!(C::AbstractVector, B::AbstractVector, partition, offset)
     # create a lock for the reduction step
     T = eltype(C)
     mutex = ReentrantLock()
-    np    = length(partition)
-    nt    = Threads.nthreads()
-    Cthreads  = [zero(C) for _ in 1:nt]
+    np = length(partition)
+    nt = Threads.nthreads()
+    Cthreads = [zero(C) for _ in 1:nt]
     @sync for n in 1:np
         Threads.@spawn begin
-            id     = Threads.threadid()
+            id = Threads.threadid()
             leaves = partition[n]
-            Cloc   = Cthreads[id]
+            Cloc = Cthreads[id]
             for leaf in leaves
                 irange = rowrange(leaf) .- offset[1]
                 jrange = colrange(leaf) .- offset[2]
-                data   = leaf.data
+                data = leaf.data
                 if T <: SVector
-                    mul!(view(Cloc,irange,1:1),data,view(B,jrange,1:1),1,1)
+                    mul!(view(Cloc, irange, 1:1), data, view(B, jrange, 1:1), 1, 1)
                 else
-                    mul!(view(Cloc,irange),data,view(B,jrange),1,1)
+                    mul!(view(Cloc, irange), data, view(B, jrange), 1, 1)
                 end
             end
             # reduction
             lock(mutex) do
-                axpy!(1,Cloc,C)
+                return axpy!(1, Cloc, C)
             end
         end
     end
@@ -614,10 +628,10 @@ end
 A proxy for the computational cost of a matrix/vector product.
 """
 function _cost_gemv(R::RkMatrix)
-    rank(R)*sum(size(R))
+    return rank(R) * sum(size(R))
 end
 function _cost_gemv(M::Matrix)
-    length(M)
+    return length(M)
 end
 function _cost_gemv(H::HMatrix)
     acc = 0.0
diff --git a/src/partitions.jl b/src/partitions.jl
index d51c605..c581f8f 100644
--- a/src/partitions.jl
+++ b/src/partitions.jl
@@ -23,8 +23,8 @@ recompute the partition for each matrix/vector product.
 """
 const CACHED_PARTITIONS = WeakKeyDict{HMatrix,Partition}()
 
-Base.hash(A::AbstractHMatrix,h::UInt) = hash(objectid(A),h)
-Base.:(==)(A::AbstractHMatrix,B::AbstractHMatrix) = A === B
+Base.hash(A::AbstractHMatrix, h::UInt) = hash(objectid(A), h)
+Base.:(==)(A::AbstractHMatrix, B::AbstractHMatrix) = A === B
 
 """
     build_sequence_partition(seq,nq,cost,nmax)
@@ -33,7 +33,7 @@ Partition the sequence `seq` into `nq` contiguous subsequences with a maximum of
 cost of `nmax` per set. Note that if `nmax` is too small, this may not be
 possible (see [`has_partition`](@ref)).
 """
-function build_sequence_partition(seq,np,cost,cmax)
+function build_sequence_partition(seq, np, cost, cmax)
     acc = 0
     partition = [empty(seq) for _ in 1:np]
     k = 1
@@ -42,11 +42,11 @@ function build_sequence_partition(seq,np,cost,cmax)
         acc += c
         if acc > cmax
             k += 1
-            push!(partition[k],el)
+            push!(partition[k], el)
             acc = c
             @assert k <= np "unable to build sequence partition. Value of  `cmax` may be too small."
         else
-            push!(partition[k],el)
+            push!(partition[k], el)
         end
     end
     return partition
@@ -59,17 +59,17 @@ Find an approximation to the cost of an optimal partitioning of `seq` into `nq`
 contiguous segments. The optimal cost is the smallest number `cmax` such that
 `has_partition(seq,nq,cost,cmax)` returns `true`.
 """
-function find_optimal_cost(seq,np,cost=identity,tol=1)
-    lbound = maximum(cost,seq) |> Float64
-    ubound = sum(cost,seq)     |> Float64
-    guess  = (lbound+ubound)/2
+function find_optimal_cost(seq, np, cost=identity, tol=1)
+    lbound = Float64(maximum(cost, seq))
+    ubound = Float64(sum(cost, seq))
+    guess = (lbound + ubound) / 2
     while ubound - lbound ≥ tol
-        if has_partition(seq,np,guess,cost)
+        if has_partition(seq, np, guess, cost)
             ubound = guess
         else
             lbound = guess
         end
-        guess  = (lbound+ubound)/2
+        guess = (lbound + ubound) / 2
     end
     return ubound
 end
@@ -85,9 +85,9 @@ which minimizes the maximum `cost` over all possible partitions of `seq` into
 The generated partition is optimal up to a tolerance `tol`; for integer valued
 `cost`, setting `tol=1` means the partition is optimal.
 """
-function find_optimal_partition(seq,np,cost=(x)->1,tol=1)
-    cmax = find_optimal_cost(seq,np,cost,tol)
-    p = build_sequence_partition(seq,np,cost,cmax)
+function find_optimal_partition(seq, np, cost=(x) -> 1, tol=1)
+    cmax = find_optimal_cost(seq, np, cost, tol)
+    p = build_sequence_partition(seq, np, cost, cmax)
     return p
 end
 
@@ -97,9 +97,9 @@ end
 Given a vector `v`, determine whether or not a partition into `np` segments is
 possible where the `cost` of each partition does not exceed `cmax`.
 """
-function has_partition(seq,np,cmax,cost=identity)
+function has_partition(seq, np, cmax, cost=identity)
     acc = 0
-    k   = 1
+    k = 1
     for el in seq
         c = cost(el)
         acc += c
@@ -119,63 +119,63 @@ Partiotion the leaves of `H` into `np` sequences of approximate equal cost (as
 determined by the `cost` function) while also trying to maximize the locality of
 each partition.
 """
-function hilbert_partition(H::HMatrix,np=Threads.nthreads(),cost=_cost_gemv)
+function hilbert_partition(H::HMatrix, np=Threads.nthreads(), cost=_cost_gemv)
     # the hilbert curve will be indexed from (0,0) × (N-1,N-1), so set N to be
     # the smallest power of two larger than max(m,n), where m,n = size(H)
-    m,n = size(H)
-    N   = max(m,n)
-    N   = nextpow(2,N)
+    m, n = size(H)
+    N = max(m, n)
+    N = nextpow(2, N)
     # sort the leaves by their hilbert index
-    leaves = AbstractTrees.Leaves(H) |> collect
+    leaves = collect(AbstractTrees.Leaves(H))
     hilbert_indices = map(leaves) do leaf
         # use the center of the leaf as a cartesian index
-        i,j = pivot(leaf) .- 1 .+ size(leaf) .÷ 2
-        hilbert_cartesian_to_linear(N,i,j)
+        i, j = pivot(leaf) .- 1 .+ size(leaf) .÷ 2
+        return hilbert_cartesian_to_linear(N, i, j)
     end
     p = sortperm(hilbert_indices)
-    permute!(leaves,p)
+    permute!(leaves, p)
     # now compute a quasi-optimal partition of leaves based `cost_mv`
-    cmax      = find_optimal_cost(leaves,np,cost,1)
-    _partition = build_sequence_partition(leaves,np,cost,cmax)
-    partition  = Partition(_partition,:hilbert)
-    push!(CACHED_PARTITIONS,H=>partition)
+    cmax = find_optimal_cost(leaves, np, cost, 1)
+    _partition = build_sequence_partition(leaves, np, cost, cmax)
+    partition = Partition(_partition, :hilbert)
+    push!(CACHED_PARTITIONS, H => partition)
     return partition
 end
 
 # TODO: benchmark the different partitioning strategies for gemv. Is the hilber
 # partition really faster than the simpler alternatives (row partition, col partition)?
-function row_partition(H::HMatrix,np=Threads.nthreads(),cost=_cost_gemv)
+function row_partition(H::HMatrix, np=Threads.nthreads(), cost=_cost_gemv)
     # sort the leaves by their row index
-    leaves = filter_tree(x -> isleaf(x),H)
+    leaves = filter_tree(x -> isleaf(x), H)
     row_indices = map(leaves) do leaf
         # use the center of the leaf as a cartesian index
-        i,j = pivot(leaf)
+        i, j = pivot(leaf)
         return i
     end
     p = sortperm(row_indices)
-    permute!(leaves,p)
+    permute!(leaves, p)
     # now compute a quasi-optimal partition of leaves based `cost_mv`
-    cmax = find_optimal_cost(leaves,np,cost,1)
-    _partition = build_sequence_partition(leaves,np,cost,cmax)
-    partition  = Partition(_partition,:row)
-    push!(CACHED_PARTITIONS,H=>partition)
+    cmax = find_optimal_cost(leaves, np, cost, 1)
+    _partition = build_sequence_partition(leaves, np, cost, cmax)
+    partition = Partition(_partition, :row)
+    push!(CACHED_PARTITIONS, H => partition)
     return partition
 end
 
-function col_partition(H::HMatrix,np=Threads.nthreads(),cost=_cost_gemv)
+function col_partition(H::HMatrix, np=Threads.nthreads(), cost=_cost_gemv)
     # sort the leaves by their row index
-    leaves = filter_tree(x -> isleaf(x),H)
+    leaves = filter_tree(x -> isleaf(x), H)
     row_indices = map(leaves) do leaf
         # use the center of the leaf as a cartesian index
-        i,j = pivot(leaf)
+        i, j = pivot(leaf)
         return j
     end
     p = sortperm(row_indices)
-    permute!(leaves,p)
+    permute!(leaves, p)
     # now compute a quasi-optimal partition of leaves based `cost_mv`
-    cmax = find_optimal_cost(leaves,np,cost,1)
-    _partition = build_sequence_partition(leaves,np,cost,cmax)
-    partition  = Partition(_partition,:col)
-    push!(CACHED_PARTITIONS,H=>partition)
+    cmax = find_optimal_cost(leaves, np, cost, 1)
+    _partition = build_sequence_partition(leaves, np, cost, cmax)
+    partition = Partition(_partition, :col)
+    push!(CACHED_PARTITIONS, H => partition)
     return partition
 end
diff --git a/src/rkmatrix.jl b/src/rkmatrix.jl
index de2c889..9f64fc4 100644
--- a/src/rkmatrix.jl
+++ b/src/rkmatrix.jl
@@ -11,29 +11,29 @@ mutable struct RkMatrix{T} <: AbstractMatrix{T}
     A::Matrix{T}
     B::Matrix{T}
     buffer::Vector{T} # used for gemv routines to avoid allocations
-    function RkMatrix(A::Matrix{T},B::Matrix{T}) where {T}
-        @assert size(A,2) == size(B,2) "second dimension of `A` and `B` must match"
-        m,r = size(A)
-        n  = size(B,1)
-        if  r*(m+n) >= m*n
+    function RkMatrix(A::Matrix{T}, B::Matrix{T}) where {T}
+        @assert size(A, 2) == size(B, 2) "second dimension of `A` and `B` must match"
+        m, r = size(A)
+        n = size(B, 1)
+        if r * (m + n) >= m * n
             @debug "Inefficient RkMatrix:" size(A) size(B)
             # error("Inefficient RkMatrix")
         end
-        buffer = Vector{T}(undef,r)
-        new{T}(A,B,buffer)
+        buffer = Vector{T}(undef, r)
+        return new{T}(A, B, buffer)
     end
 end
-RkMatrix(A,B) = RkMatrix(promote(A,B)...)
+RkMatrix(A, B) = RkMatrix(promote(A, B)...)
 
-function Base.setproperty!(R::RkMatrix,s::Symbol,mat::Matrix)
-    setfield!(R,s,mat)
+function Base.setproperty!(R::RkMatrix, s::Symbol, mat::Matrix)
+    setfield!(R, s, mat)
     # resize buffer
-    r = size(mat,2)
-    resize!(R.buffer,r)
+    r = size(mat, 2)
+    resize!(R.buffer, r)
     return R
 end
 
-function Base.getindex(rmat::RkMatrix,i::Int,j::Int)
+function Base.getindex(rmat::RkMatrix, i::Int, j::Int)
     msg = """
     method `getindex(::AbstractHMatrix,args...)` has been disabled to avoid
     performance pitfalls. Unless you made an explicit call to `getindex`, this
@@ -44,7 +44,7 @@ function Base.getindex(rmat::RkMatrix,i::Int,j::Int)
         r = rank(rmat)
         acc = zero(eltype(rmat))
         for k in 1:r
-            acc += rmat.A[i,k] * conj(rmat.B[j,k])
+            acc += rmat.A[i, k] * conj(rmat.B[j, k])
         end
     else
         error(msg)
@@ -53,15 +53,15 @@ function Base.getindex(rmat::RkMatrix,i::Int,j::Int)
 end
 
 # some "fast" ways of computing a column of R and row of ajoint(R)
-Base.getindex(R::RkMatrix, ::Colon, j::Int) = getcol(R,j)
+Base.getindex(R::RkMatrix, ::Colon, j::Int) = getcol(R, j)
 
 """
     getcol!(col,M::AbstractMatrix,j)
 
 Fill the entries of `col` with column `j` of `M`.
 """
-function getcol!(col,R::RkMatrix,j::Int)
-    mul!(col,R.A,conj(view(R.B,j,:)))
+function getcol!(col, R::RkMatrix, j::Int)
+    return mul!(col, R.A, conj(view(R.B, j, :)))
 end
 
 """
@@ -69,26 +69,26 @@ end
 
 Return a vector containing the `j`-th column of `M`.
 """
-function getcol(R::RkMatrix,j::Int)
-    m = size(R,1)
+function getcol(R::RkMatrix, j::Int)
+    m = size(R, 1)
     T = eltype(R)
-    col = zeros(T,m)
-    getcol!(col,R,j)
+    col = zeros(T, m)
+    return getcol!(col, R, j)
 end
 
-function getcol!(col,Ra::Adjoint{<:Any,<:RkMatrix},j::Int)
+function getcol!(col, Ra::Adjoint{<:Any,<:RkMatrix}, j::Int)
     R = parent(Ra)
-    mul!(col,R.B,conj(view(R.A,j,:)))
+    return mul!(col, R.B, conj(view(R.A, j, :)))
 end
 
-function getcol(Ra::Adjoint{<:Any,<:RkMatrix},j::Int)
-    m = size(Ra,1)
+function getcol(Ra::Adjoint{<:Any,<:RkMatrix}, j::Int)
+    m = size(Ra, 1)
     T = eltype(Ra)
-    col = zeros(T,m)
-    getcol!(col,Ra,j)
+    col = zeros(T, m)
+    return getcol!(col, Ra, j)
 end
 
-Base.getindex(Ra::Adjoint{<:Any,<:RkMatrix}, ::Colon, j::Int) = getcol(Ra,j)
+Base.getindex(Ra::Adjoint{<:Any,<:RkMatrix}, ::Colon, j::Int) = getcol(Ra, j)
 
 """
     RkMatrix(A::Vector{<:Vector},B::Vector{<:Vector})
@@ -97,35 +97,37 @@ Construct an `RkMatrix` from a vector of vectors. Assumes that `length(A) ==
 length(B)`, which determines the rank, and that all vectors in `A` (resp. `B`) have the same length `m`
 (resp. `n`).
 """
-function RkMatrix(_A::Vector{V},_B::Vector{V}) where {V<:AbstractVector}
-    T   = eltype(V)
+function RkMatrix(_A::Vector{V}, _B::Vector{V}) where {V<:AbstractVector}
+    T = eltype(V)
     @assert length(_A) == length(_B)
-    k   = length(_A)
-    m   = length(first(_A))
-    n   = length(first(_B))
-    A   = Matrix{T}(undef,m,k)
-    B  = Matrix{T}(undef,n,k)
+    k = length(_A)
+    m = length(first(_A))
+    n = length(first(_B))
+    A = Matrix{T}(undef, m, k)
+    B = Matrix{T}(undef, n, k)
     for i in 1:k
-        copyto!(view(A,:,i),_A[i])
-        copyto!(view(B,:,i),_B[i])
+        copyto!(view(A, :, i), _A[i])
+        copyto!(view(B, :, i), _B[i])
     end
-    return RkMatrix(A,B)
+    return RkMatrix(A, B)
 end
 
 Base.eltype(::RkMatrix{T}) where {T} = T
-Base.size(rmat::RkMatrix)                                        = (size(rmat.A,1), size(rmat.B,1))
-Base.length(rmat::RkMatrix)                                      = prod(size(rmat))
-Base.isapprox(rmat::RkMatrix,B::AbstractArray,args...;kwargs...) = isapprox(Matrix(rmat),B,args...;kwargs...)
+Base.size(rmat::RkMatrix) = (size(rmat.A, 1), size(rmat.B, 1))
+Base.length(rmat::RkMatrix) = prod(size(rmat))
+function Base.isapprox(rmat::RkMatrix, B::AbstractArray, args...; kwargs...)
+    return isapprox(Matrix(rmat), B, args...; kwargs...)
+end
 
-rank(M::RkMatrix) = size(M.A,2)
+rank(M::RkMatrix) = size(M.A, 2)
 
-function Base.getproperty(R::RkMatrix,s::Symbol)
-    if  s == :Bt
+function Base.getproperty(R::RkMatrix, s::Symbol)
+    if s == :Bt
         return adjoint(R.B)
-    elseif  s == :At
+    elseif s == :At
         return adjoint(R.A)
     else
-        return getfield(R,s)
+        return getfield(R, s)
     end
 end
 
@@ -135,17 +137,18 @@ end
 Concatenated `M1` and `M2` horizontally to produce a new `RkMatrix` of rank
 `rank(M1)+rank(M2)`.
 """
-function Base.hcat(M1::RkMatrix{T},M2::RkMatrix{T}) where {T}
-    m,n  = size(M1)
-    s,t  = size(M2)
-    (m == s) || throw(ArgumentError("number of rows of each array must match: got  ($m,$s)"))
-    r1   = size(M1.A,2)
-    r2   = size(M2.A,2)
-    A    = hcat(M1.A,M2.A)
-    B1   = vcat(M1.B,zeros(T,t,r1))
-    B2   = vcat(zeros(T,n,r2),M2.B)
-    B    = hcat(B1,B2)
-    return RkMatrix(A,B)
+function Base.hcat(M1::RkMatrix{T}, M2::RkMatrix{T}) where {T}
+    m, n = size(M1)
+    s, t = size(M2)
+    (m == s) ||
+        throw(ArgumentError("number of rows of each array must match: got  ($m,$s)"))
+    r1 = size(M1.A, 2)
+    r2 = size(M2.A, 2)
+    A = hcat(M1.A, M2.A)
+    B1 = vcat(M1.B, zeros(T, t, r1))
+    B2 = vcat(zeros(T, n, r2), M2.B)
+    B = hcat(B1, B2)
+    return RkMatrix(A, B)
 end
 
 """
@@ -154,28 +157,29 @@ end
 Concatenated `M1` and `M2` vertically to produce a new `RkMatrix` of rank
 `rank(M1)+rank(M2)`
 """
-function Base.vcat(M1::RkMatrix{T},M2::RkMatrix{T}) where {T}
-    m,n  = size(M1)
-    s,t  = size(M2)
-    n == t || throw(ArgumentError("number of columns of each array must match (got  ($n,$t))"))
-    r1   = size(M1.A,2)
-    r2   = size(M2.A,2)
-    A1   = vcat(M1.A,zeros(T,s,r1))
-    A2   = vcat(zeros(T,m,r2),M2.A)
-    A    = hcat(A1,A2)
-    B    = hcat(M1.B,M2.B)
-    return RkMatrix(A,B)
+function Base.vcat(M1::RkMatrix{T}, M2::RkMatrix{T}) where {T}
+    m, n = size(M1)
+    s, t = size(M2)
+    n == t ||
+        throw(ArgumentError("number of columns of each array must match (got  ($n,$t))"))
+    r1 = size(M1.A, 2)
+    r2 = size(M2.A, 2)
+    A1 = vcat(M1.A, zeros(T, s, r1))
+    A2 = vcat(zeros(T, m, r2), M2.A)
+    A = hcat(A1, A2)
+    B = hcat(M1.B, M2.B)
+    return RkMatrix(A, B)
 end
 
-Base.copy(R::RkMatrix) = RkMatrix(copy(R.A),copy(R.B))
+Base.copy(R::RkMatrix) = RkMatrix(copy(R.A), copy(R.B))
 
 function Base.Matrix(R::RkMatrix{<:Number})
-    Matrix(R.A*R.Bt)
+    return Matrix(R.A * R.Bt)
 end
 function Base.Matrix(R::RkMatrix{<:SMatrix})
     # collect must be used when we have a matrix of `SMatrix` because of this issue:
     # https://github.com/JuliaArrays/StaticArrays.jl/issues/966#issuecomment-943679214
-    R.A*collect(R.Bt)
+    return R.A * collect(R.Bt)
 end
 
 """
@@ -184,9 +188,9 @@ end
 Construct an [`RkMatrix`](@ref) from an `SVD` factorization.
 """
 function RkMatrix(F::SVD)
-    A  = F.U*Diagonal(F.S)
-    B  = copy(F.V)
-    return RkMatrix(A,B)
+    A = F.U * Diagonal(F.S)
+    B = copy(F.V)
+    return RkMatrix(A, B)
 end
 
 """
@@ -197,7 +201,7 @@ Construct an [`RkMatrix`](@ref) from a `Matrix` by passing through the full
 """
 function RkMatrix(M::Matrix)
     F = svd(M)
-    RkMatrix(F)
+    return RkMatrix(F)
 end
 
 """
@@ -206,7 +210,7 @@ end
 The number of entries stored in the representation. Note that this is *not*
 `length(R)`.
 """
-num_stored_elements(R::RkMatrix)        = size(R.A,2)*(sum(size(R)))
+num_stored_elements(R::RkMatrix) = size(R.A, 2) * (sum(size(R)))
 
 """
     compression_ratio(R::RkMatrix)
@@ -222,15 +226,16 @@ compression_ratio(R::RkMatrix) = prod(size(R)) / num_stored_elements(R)
 # problem in LinearAlgebra for the generic mulplication mul!(C,A,B,a,b) when
 # C and B are a vectors of static matrices, and A is a matrix of static
 # matrices. Should eventually be removed.
-function mul!(C::AbstractVector,Rk::RkMatrix{T},F::AbstractVector,a::Number,b::Number) where {T<:SMatrix}
-    m,n = size(Rk)
-    r   = rank(Rk)
-    tmp = Rk.Bt*F
-    rmul!(C,b)
+function mul!(C::AbstractVector, Rk::RkMatrix{T}, F::AbstractVector, a::Number,
+              b::Number) where {T<:SMatrix}
+    m, n = size(Rk)
+    r = rank(Rk)
+    tmp = Rk.Bt * F
+    rmul!(C, b)
     for k in 1:r
         tmp[k] *= a
         for i in 1:m
-            C[i] += Rk.A[i,k]*tmp[k]
+            C[i] += Rk.A[i, k] * tmp[k]
         end
     end
     return C
diff --git a/src/splitter.jl b/src/splitter.jl
index 9687010..04046a1 100644
--- a/src/splitter.jl
+++ b/src/splitter.jl
@@ -134,7 +134,7 @@ the "left" node) or `false` (point sorted on the "right" node). At the end a
 minimal `HyperRectangle` containing all left/right points is created.
 """
 function binary_split!(cluster::ClusterTree{N,T}, predicate::Function) where {N,T}
-    f   = predicate
+    f = predicate
     rec = container(cluster)
     els = root_elements(cluster)
     irange = index_range(cluster)
@@ -156,12 +156,12 @@ function binary_split!(cluster::ClusterTree{N,T}, predicate::Function) where {N,
         else
             xl_right = min.(xl_right, pt)
             xu_right = max.(xu_right, pt)
-            buff[n-npts_right] = l2g[i]
+            buff[n - npts_right] = l2g[i]
             npts_right += 1
         end
     end
     # bounding boxes
-    left_rec  = HyperRectangle(xl_left, xu_left)
+    left_rec = HyperRectangle(xl_left, xu_left)
     right_rec = HyperRectangle(xl_right, xu_right)
     @assert npts_left + npts_right == length(irange) "elements lost during split"
     # new ranges for children cluster
diff --git a/src/triangular.jl b/src/triangular.jl
index 902d076..3633f83 100644
--- a/src/triangular.jl
+++ b/src/triangular.jl
@@ -1,5 +1,5 @@
 const HUnitLowerTriangular = UnitLowerTriangular{<:Any,<:HMatrix}
-const HUpperTriangular     = UpperTriangular{<:Any,<:HMatrix}
+const HUpperTriangular = UpperTriangular{<:Any,<:HMatrix}
 
 function ldiv!(L::HUnitLowerTriangular, B::AbstractMatrix)
     H = parent(L)
@@ -9,19 +9,19 @@ function ldiv!(L::HUnitLowerTriangular, B::AbstractMatrix)
     else
         @assert !hasdata(H) "only leaves are allowed to have data when using `ldiv`!"
         shift = pivot(H) .- 1
-        chdH  = children(H)
-        m, n   = size(chdH)
+        chdH = children(H)
+        m, n = size(chdH)
         @assert m === n
-        for i = 1:m
-            irows  = colrange(chdH[i,i]) .- shift[2]
-            bi     = view(B, irows, :)
-            for j = 1:(i - 1)# j<i
-                jrows  = colrange(chdH[i,j]) .- shift[2]
-                bj     = view(B, jrows, :)
-                _mul131!(bi, chdH[i,j], bj, -1)
+        for i in 1:m
+            irows = colrange(chdH[i, i]) .- shift[2]
+            bi = view(B, irows, :)
+            for j in 1:(i - 1)# j<i
+                jrows = colrange(chdH[i, j]) .- shift[2]
+                bj = view(B, jrows, :)
+                _mul131!(bi, chdH[i, j], bj, -1)
             end
             # recursion stage
-            ldiv!(UnitLowerTriangular(chdH[i,i]), bi)
+            ldiv!(UnitLowerTriangular(chdH[i, i]), bi)
         end
     end
     return B
@@ -43,16 +43,16 @@ function ldiv!(L::HUnitLowerTriangular, X::HMatrix, compressor)
     else
         chdH = children(H)
         chdX = children(X)
-        m, n  = size(chdH)
+        m, n = size(chdH)
         @assert m == n
         for k in 1:size(chdX, 2)
-            for i = 1:m
-                for j = 1:(i - 1)# j<i
+            for i in 1:m
+                for j in 1:(i - 1)# j<i
                     @timeit_debug "hmul!" begin
-                        hmul!(chdX[i,k], chdH[i,j], chdX[j,k], -1, 1, compressor)
+                        hmul!(chdX[i, k], chdH[i, j], chdX[j, k], -1, 1, compressor)
                     end
                 end
-                ldiv!(UnitLowerTriangular(chdH[i,i]), chdX[i,k], compressor)
+                ldiv!(UnitLowerTriangular(chdH[i, i]), chdX[i, k], compressor)
             end
         end
     end
@@ -67,19 +67,19 @@ function ldiv!(U::HUpperTriangular, B::AbstractMatrix)
     else
         @assert !hasdata(H) "only leaves are allowed to have data when using `ldiv`!"
         shift = pivot(H) .- 1
-        chdH  = children(H)
-        m, n   = size(chdH)
+        chdH = children(H)
+        m, n = size(chdH)
         @assert m === n
-        for i = m:-1:1
-            irows  = colrange(chdH[i,i]) .- shift[2]
-            bi     = view(B, irows, :)
-            for j = i+1:n # j>i
-                jrows  = colrange(chdH[i,j]) .- shift[2]
-                bj     = view(B, jrows, :)
-                _mul131!(bi, chdH[i,j], bj, -1)
+        for i in m:-1:1
+            irows = colrange(chdH[i, i]) .- shift[2]
+            bi = view(B, irows, :)
+            for j in (i + 1):n # j>i
+                jrows = colrange(chdH[i, j]) .- shift[2]
+                bj = view(B, jrows, :)
+                _mul131!(bi, chdH[i, j], bj, -1)
             end
             # recursion stage
-            ldiv!(UpperTriangular(chdH[i,i]), bi)
+            ldiv!(UpperTriangular(chdH[i, i]), bi)
         end
     end
     return B
@@ -93,20 +93,20 @@ function rdiv!(B::StridedMatrix, U::HUpperTriangular)
         rdiv!(B, UpperTriangular(d)) # b <-- b/L
     else
         @assert !hasdata(H) "only leaves are allowed to have data when using `rdiv`!"
-        shift = pivot(H) .- 1 |> reverse
-        chdH  = children(H)
-        m, n   = size(chdH)
+        shift = reverse(pivot(H) .- 1)
+        chdH = children(H)
+        m, n = size(chdH)
         @assert m === n
-        for i = 1:m
-            icols  = rowrange(chdH[i,i]) .- shift[1]
-            bi     = view(B, :, icols)
-            for j = 1:(i - 1)
-                jcols  = rowrange(chdH[j,i]) .- shift[1]
-                bj     = view(B, :, jcols)
-                _mul113!(bi, bj, chdH[j,i], -1)
+        for i in 1:m
+            icols = rowrange(chdH[i, i]) .- shift[1]
+            bi = view(B, :, icols)
+            for j in 1:(i - 1)
+                jcols = rowrange(chdH[j, i]) .- shift[1]
+                bj = view(B, :, jcols)
+                _mul113!(bi, bj, chdH[j, i], -1)
             end
             # recursion stage
-            rdiv!(bi, UpperTriangular(chdH[i,i]))
+            rdiv!(bi, UpperTriangular(chdH[i, i]))
         end
     end
     return B
@@ -135,13 +135,13 @@ function rdiv!(X::AbstractHMatrix, U::HUpperTriangular, compressor)
         chdH = children(H)
         m, n = size(chdH)
         for k in 1:size(chdX, 1)
-            for i = 1:m
-                for j = 1:(i - 1)
+            for i in 1:m
+                for j in 1:(i - 1)
                     @timeit_debug "hmul!" begin
-                        hmul!(chdX[k,i], chdX[k,j], chdH[j,i], -1, 1, compressor)
+                        hmul!(chdX[k, i], chdX[k, j], chdH[j, i], -1, 1, compressor)
                     end
                 end
-                rdiv!(chdX[k,i], UpperTriangular(chdH[i,i]), compressor)
+                rdiv!(chdX[k, i], UpperTriangular(chdH[i, i]), compressor)
             end
         end
     end
diff --git a/src/utils.jl b/src/utils.jl
index f9750b2..ad2a54c 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -30,18 +30,18 @@ struct PermutedMatrix{K,T} <: AbstractMatrix{T}
     data::K # original matrix
     rowperm::Vector{Int}
     colperm::Vector{Int}
-    function PermutedMatrix(orig,rowperm,colperm)
+    function PermutedMatrix(orig, rowperm, colperm)
         K = typeof(orig)
         T = eltype(orig)
-        new{K,T}(orig,rowperm,colperm)
+        return new{K,T}(orig, rowperm, colperm)
     end
 end
 Base.size(M::PermutedMatrix) = size(M.data)
 
-function Base.getindex(M::PermutedMatrix,i,j)
+function Base.getindex(M::PermutedMatrix, i, j)
     ip = M.rowperm[i]
     jp = M.colperm[j]
-    M.data[ip,jp]
+    return M.data[ip, jp]
 end
 
 """
@@ -53,20 +53,20 @@ output `d` ranges from `0` to `n^2-1`.
 
 See [https://en.wikipedia.org/wiki/Hilbert_curve](https://en.wikipedia.org/wiki/Hilbert_curve).
 """
-function hilbert_cartesian_to_linear(n::Integer,x,y)
+function hilbert_cartesian_to_linear(n::Integer, x, y)
     @assert ispow2(n)
-    @assert 0 ≤ x ≤ n-1
-    @assert 0 ≤ y ≤ n-1
+    @assert 0 ≤ x ≤ n - 1
+    @assert 0 ≤ y ≤ n - 1
     d = 0
     s = n >> 1
-    while s>0
+    while s > 0
         rx = (x & s) > 0
         ry = (y & s) > 0
-        d += s^2*((3*rx)⊻ry)
-        x,y = _rot(n,x,y,rx,ry)
+        d += s^2 * ((3 * rx) ⊻ ry)
+        x, y = _rot(n, x, y, rx, ry)
         s = s >> 1
     end
-    @assert 0 ≤ d ≤ n^2-1
+    @assert 0 ≤ d ≤ n^2 - 1
     return d
 end
 
@@ -78,47 +78,47 @@ n-1` and `0 ≤ y ≤ n-1` on the Hilbert curve of order `n`.
 
 See [https://en.wikipedia.org/wiki/Hilbert_curve](https://en.wikipedia.org/wiki/Hilbert_curve).
 """
-function hilbert_linear_to_cartesian(n::Integer,d)
+function hilbert_linear_to_cartesian(n::Integer, d)
     @assert ispow2(n)
-    @assert 0 ≤ d ≤ n^2-1
-    x,y = 0,0
+    @assert 0 ≤ d ≤ n^2 - 1
+    x, y = 0, 0
     s = 1
-    while s<n
+    while s < n
         rx = 1 & (d >> 1)
         ry = 1 & (d ⊻ rx)
-        x,y = _rot(s,x,y,rx,ry)
-        x +=  s*rx
-        y +=  s*ry
+        x, y = _rot(s, x, y, rx, ry)
+        x += s * rx
+        y += s * ry
         d = d >> 2
         s = s << 1
     end
-    @assert 0 ≤ x ≤ n-1
-    @assert 0 ≤ y ≤ n-1
-    return x,y
+    @assert 0 ≤ x ≤ n - 1
+    @assert 0 ≤ y ≤ n - 1
+    return x, y
 end
 
 # auxiliary function using in hilbert curve. Rotates the points x,y
-function _rot(n,x,y,rx,ry)
+function _rot(n, x, y, rx, ry)
     if ry == 0
         if rx == 1
-            x = n-1 - x
-            y = n-1 - y
+            x = n - 1 - x
+            y = n - 1 - y
         end
-        x,y = y,x
+        x, y = y, x
     end
-    return x,y
+    return x, y
 end
 
 function hilbert_points(n::Integer)
     @assert ispow2(n)
     xx = Int[]
     yy = Int[]
-    for d in 0:n^2-1
-        x,y = hilbert_linear_to_cartesian(n,d)
-        push!(xx,x)
-        push!(yy,y)
+    for d in 0:(n^2 - 1)
+        x, y = hilbert_linear_to_cartesian(n, d)
+        push!(xx, x)
+        push!(yy, y)
     end
-    xx,yy
+    return xx, yy
 end
 
 """
@@ -136,7 +136,7 @@ disable_getindex() = (ALLOW_GETINDEX[] = false)
 
 The opposite of [`disable_getindex`](@ref).
 """
-enable_getindex()  = (ALLOW_GETINDEX[] = true)
+enable_getindex() = (ALLOW_GETINDEX[] = true)
 
 """
     filter_tree(f,tree,isterminal=true)
diff --git a/test/addition_test.jl b/test/addition_test.jl
index 97c5f39..adb52d7 100644
--- a/test/addition_test.jl
+++ b/test/addition_test.jl
@@ -5,36 +5,36 @@ using Random
 using StaticArrays
 using HMatrices: RkMatrix
 
-include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl"))
+include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
-N,r     = 500,3
-atol    = 1e-6
-X  = Y     = rand(SVector{3,Float64},N)
-splitter = CardinalitySplitter(nmax=50)
-Xclt = Yclt  = ClusterTree(X,splitter)
-adm       = StrongAdmissibilityStd(eta=3)
-rtol      = 1e-5
-comp      = PartialACA(rtol=rtol)
-K         = helmholtz_matrix(X,Y,1.3)
-H         = assemble_hmat(K,Xclt,Yclt;adm,comp,threads=false,distributed=false)
-T         = eltype(H)
-_H   = Matrix(H;global_index=false)
-R    = RkMatrix(rand(T,N,r),rand(T,N,r))
-_R   = Matrix(R)
-M    = rand(T,N,N)
-_M   = Matrix(M)
-a    = rand()
+N, r = 500, 3
+atol = 1e-6
+X = Y = rand(SVector{3,Float64}, N)
+splitter = CardinalitySplitter(; nmax=50)
+Xclt = Yclt = ClusterTree(X, splitter)
+adm = StrongAdmissibilityStd(; eta=3)
+rtol = 1e-5
+comp = PartialACA(; rtol=rtol)
+K = helmholtz_matrix(X, Y, 1.3)
+H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads=false, distributed=false)
+T = eltype(H)
+_H = Matrix(H; global_index=false)
+R = RkMatrix(rand(T, N, r), rand(T, N, r))
+_R = Matrix(R)
+M = rand(T, N, N)
+_M = Matrix(M)
+a = rand()
 
 @testset "axpy!" begin
-    @test axpy!(a,M,deepcopy(R)) ≈ a*_M + _R
-    tmp = axpy!(a,M,deepcopy(H))
-    @test Matrix(tmp;global_index=false) ≈ a*_M + _H
-    @test axpy!(a,R,deepcopy(M)) ≈ a*_R + _M
-    @test axpy!(a,R,deepcopy(R)) ≈ a*_R + _R
-    tmp = axpy!(a,R,deepcopy(H))
-    Matrix(tmp;global_index=false) ≈ a*_R + _H
-    @test axpy!(a,H,deepcopy(M)) ≈ a*_H + _M
-    @test axpy!(a,H,deepcopy(R)) ≈ a*_H + _R
-    tmp = axpy!(a,H,deepcopy(H))
-    @test Matrix(tmp;global_index=false) ≈ a*_H + _H
+    @test axpy!(a, M, deepcopy(R)) ≈ a * _M + _R
+    tmp = axpy!(a, M, deepcopy(H))
+    @test Matrix(tmp; global_index=false) ≈ a * _M + _H
+    @test axpy!(a, R, deepcopy(M)) ≈ a * _R + _M
+    @test axpy!(a, R, deepcopy(R)) ≈ a * _R + _R
+    tmp = axpy!(a, R, deepcopy(H))
+    Matrix(tmp; global_index=false) ≈ a * _R + _H
+    @test axpy!(a, H, deepcopy(M)) ≈ a * _H + _M
+    @test axpy!(a, H, deepcopy(R)) ≈ a * _H + _R
+    tmp = axpy!(a, H, deepcopy(H))
+    @test Matrix(tmp; global_index=false) ≈ a * _H + _H
 end
diff --git a/test/compressor_test.jl b/test/compressor_test.jl
index 7fc3c85..aafa3e7 100644
--- a/test/compressor_test.jl
+++ b/test/compressor_test.jl
@@ -4,142 +4,141 @@ using StaticArrays
 using LinearAlgebra
 using HMatrices: ACA, PartialACA, TSVD, RkMatrix
 
-include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl"))
+include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
 using Random
 Random.seed!(1)
 
 @testset "Scalar" begin
     T = ComplexF64
-    m,n = 100,100
-    X = rand(SVector{3,Float64},m)
-    Y = map(i->SVector(10,0,0) + rand(SVector{3,Float64}),1:n)
-    K = helmholtz_matrix(X,Y,1.0)
+    m, n = 100, 100
+    X = rand(SVector{3,Float64}, m)
+    Y = map(i -> SVector(10, 0, 0) + rand(SVector{3,Float64}), 1:n)
+    K = helmholtz_matrix(X, Y, 1.0)
     M = Matrix(K)
-    irange,jrange = 1:m,1:n
+    irange, jrange = 1:m, 1:n
     @testset "aca_full" begin
         atol = 1e-5
-        aca = ACA(atol=atol)
-        R = aca(K,irange,jrange)
+        aca = ACA(; atol=atol)
+        R = aca(K, irange, jrange)
         @test norm(Matrix(R) - M) < atol
         rtol = 1e-5
-        aca = ACA(rtol=rtol)
-        R   = aca(M,irange,jrange)
+        aca = ACA(; rtol=rtol)
+        R = aca(M, irange, jrange)
         norm(Matrix(R) - M)
-        @test norm(Matrix(R) - M) < rtol*norm(M)
+        @test norm(Matrix(R) - M) < rtol * norm(M)
         r = 10
-        aca = ACA(rank=r)
-        R = aca(M,irange,jrange)
+        aca = ACA(; rank=r)
+        R = aca(M, irange, jrange)
         @test rank(R) == r
     end
     @testset "aca_partial" begin
         atol = 1e-5
-        aca = PartialACA(atol=atol)
-        R = aca(M,irange,jrange)
+        aca = PartialACA(; atol=atol)
+        R = aca(M, irange, jrange)
         @test norm(Matrix(R) - M) < atol
         rtol = 1e-5
-        aca = PartialACA(rtol=rtol)
-        R = aca(M,irange,jrange)
-        @test norm(Matrix(R) - M) < rtol*norm(M)
+        aca = PartialACA(; rtol=rtol)
+        R = aca(M, irange, jrange)
+        @test norm(Matrix(R) - M) < rtol * norm(M)
         r = 10
-        aca = PartialACA(rank=r)
-        R = aca(M,irange,jrange)
+        aca = PartialACA(; rank=r)
+        R = aca(M, irange, jrange)
         @test rank(R) == r
 
         # test fast update of frobenius norm
-        m,n = 10000,1000
+        m, n = 10000, 1000
         r = 10
         T = ComplexF64
-        A = [rand(T,m) for _ in 1:r]
-        B = [rand(T,n) for _ in 1:r]
-        R = RkMatrix(A,B)
-        old_norm = norm(Matrix(R),2)
-        push!(A,rand(T,m))
-        push!(B,rand(T,n))
-        Rnew = RkMatrix(A,B)
-        new_norm = norm(Matrix(Rnew),2)
-        @test new_norm ≈ HMatrices._update_frob_norm(old_norm,A,B)
+        A = [rand(T, m) for _ in 1:r]
+        B = [rand(T, n) for _ in 1:r]
+        R = RkMatrix(A, B)
+        old_norm = norm(Matrix(R), 2)
+        push!(A, rand(T, m))
+        push!(B, rand(T, n))
+        Rnew = RkMatrix(A, B)
+        new_norm = norm(Matrix(Rnew), 2)
+        @test new_norm ≈ HMatrices._update_frob_norm(old_norm, A, B)
 
         # test simple case where things are not compressible
-        A  = rand(2,2)
-        comp = PartialACA(rtol=1e-5)
-        @test comp(A,1:2,1:2) ≈ A
+        A = rand(2, 2)
+        comp = PartialACA(; rtol=1e-5)
+        @test comp(A, 1:2, 1:2) ≈ A
     end
     @testset "truncated svd" begin
         atol = 1e-5
-        tsvd = TSVD(atol=atol)
-        R    = tsvd(M,irange,jrange)
+        tsvd = TSVD(; atol=atol)
+        R = tsvd(M, irange, jrange)
         # the inequality below is guaranteed to be true  for  the spectral norm
         # i.e. (the `opnorm` with `p=2`).
         @test opnorm(Matrix(R) - M) < atol
         rtol = 1e-5
-        tsvd = TSVD(rtol=rtol)
-        R    = tsvd(M,irange,jrange)
-        @test opnorm(Matrix(R) - M) < rtol*opnorm(M)
+        tsvd = TSVD(; rtol=rtol)
+        R = tsvd(M, irange, jrange)
+        @test opnorm(Matrix(R) - M) < rtol * opnorm(M)
         r = 10
-        tsvd = TSVD(rank=r)
-        R  = tsvd(M,irange,jrange)
+        tsvd = TSVD(; rank=r)
+        R = tsvd(M, irange, jrange)
         @test rank(R) == r
     end
 end
 
-
 @testset "Tensorial" begin
     T = SMatrix{3,3,ComplexF64,9}
     # T = SMatrix{3,3,Float64,9}
-    m,n = 100,100
-    X = rand(SVector{3,Float64},m)
-    Y = map(i->SVector(10,0,0) + rand(SVector{3,Float64}),1:n)
-    K = elastosdynamic_matrix(X,Y,1.0,2.0,1.0,1.0)
+    m, n = 100, 100
+    X = rand(SVector{3,Float64}, m)
+    Y = map(i -> SVector(10, 0, 0) + rand(SVector{3,Float64}), 1:n)
+    K = elastosdynamic_matrix(X, Y, 1.0, 2.0, 1.0, 1.0)
     # K = ElastostaticMatrix(X,Y,1.0,2.0)
     M = Matrix(K)
-    irange,jrange = 1:m,1:n
+    irange, jrange = 1:m, 1:n
     @testset "aca_full" begin
         atol = 1e-5
-        aca = ACA(atol=atol)
-        R = aca(K,irange,jrange);
+        aca = ACA(; atol=atol)
+        R = aca(K, irange, jrange)
         @test norm(Matrix(R) - M) < atol
         rtol = 1e-5
-        aca = ACA(rtol=rtol)
-        R   = aca(M,irange,jrange)
+        aca = ACA(; rtol=rtol)
+        R = aca(M, irange, jrange)
         norm(Matrix(R) - M)
-        @test norm(Matrix(R) - M) < rtol*norm(M)
+        @test norm(Matrix(R) - M) < rtol * norm(M)
         r = 10
-        aca = ACA(rank=r)
-        R = aca(M,irange,jrange);
+        aca = ACA(; rank=r)
+        R = aca(M, irange, jrange)
         @test rank(R) == r
     end
     @testset "aca_partial" begin
         atol = 1e-5
-        aca = PartialACA(atol=atol)
-        R = aca(M,irange,jrange)
+        aca = PartialACA(; atol=atol)
+        R = aca(M, irange, jrange)
         @test norm(Matrix(R) - M) < atol
         rtol = 1e-5
-        aca = PartialACA(rtol=rtol)
-        R = aca(M,irange,jrange)
-        @test norm(Matrix(R) - M) < rtol*norm(M)
+        aca = PartialACA(; rtol=rtol)
+        R = aca(M, irange, jrange)
+        @test norm(Matrix(R) - M) < rtol * norm(M)
         r = 10
-        aca = PartialACA(rank=r)
-        R = aca(M,irange,jrange);
+        aca = PartialACA(; rank=r)
+        R = aca(M, irange, jrange)
         @test rank(R) == r
 
         # test fast update of frobenius norm
-        m,n = 10000,1000
+        m, n = 10000, 1000
         r = 10
         T = ComplexF64
-        A = [rand(T,m) for _ in 1:r]
-        B = [rand(T,n) for _ in 1:r]
-        R = RkMatrix(A,B)
-        old_norm = norm(Matrix(R),2)
-        push!(A,rand(T,m))
-        push!(B,rand(T,n))
-        Rnew = RkMatrix(A,B)
-        new_norm = norm(Matrix(Rnew),2)
-        @test new_norm ≈ HMatrices._update_frob_norm(old_norm,A,B)
+        A = [rand(T, m) for _ in 1:r]
+        B = [rand(T, n) for _ in 1:r]
+        R = RkMatrix(A, B)
+        old_norm = norm(Matrix(R), 2)
+        push!(A, rand(T, m))
+        push!(B, rand(T, n))
+        Rnew = RkMatrix(A, B)
+        new_norm = norm(Matrix(Rnew), 2)
+        @test new_norm ≈ HMatrices._update_frob_norm(old_norm, A, B)
 
         # test simple case where things are not compressible
-        A  = rand(2,2)
-        comp = PartialACA(rtol=1e-5)
-        @test comp(A,1:2,1:2) ≈ A
+        A = rand(2, 2)
+        comp = PartialACA(; rtol=1e-5)
+        @test comp(A, 1:2, 1:2) ≈ A
     end
 end
diff --git a/test/dhmatrix_test.jl b/test/dhmatrix_test.jl
index 48ec243..e96a658 100644
--- a/test/dhmatrix_test.jl
+++ b/test/dhmatrix_test.jl
@@ -4,21 +4,21 @@ addprocs(4; exeflags=`--project=$(Base.active_project())`)
 using Test
 using StaticArrays
 @everywhere using HMatrices
-@everywhere include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl"))
+@everywhere include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
 @testset "Assemble" begin
-    m,n  = 10_000,10_000
-    X  = Y    = points_on_cylinder(1,m)
-    splitter  = CardinalitySplitter(nmax=100)
-    Xclt = Yclt = ClusterTree(X,splitter)
-    adm       = StrongAdmissibilityStd(eta=3)
-    atol      = 1e-6
-    comp      = PartialACA(;atol)
+    m, n = 10_000, 10_000
+    X = Y = points_on_cylinder(1, m)
+    splitter = CardinalitySplitter(; nmax=100)
+    Xclt = Yclt = ClusterTree(X, splitter)
+    adm = StrongAdmissibilityStd(; eta=3)
+    atol = 1e-6
+    comp = PartialACA(; atol)
     # Laplace
-    K         = laplace_matrix(X,Y)
-    H         = assemble_hmat(K,Xclt,Yclt;adm,comp,distributed=false,threads=true)
-    Hd        = assemble_hmat(K,Xclt,Yclt;adm,comp,distributed=true,threads=false)
+    K = laplace_matrix(X, Y)
+    H = assemble_hmat(K, Xclt, Yclt; adm, comp, distributed=false, threads=true)
+    Hd = assemble_hmat(K, Xclt, Yclt; adm, comp, distributed=true, threads=false)
     x = rand(n)
     y = rand(m)
-    @test H*x ≈ Hd*x
+    @test H * x ≈ Hd * x
 end
diff --git a/test/hmatrix_test.jl b/test/hmatrix_test.jl
index fd2b709..3000d46 100644
--- a/test/hmatrix_test.jl
+++ b/test/hmatrix_test.jl
@@ -3,38 +3,38 @@ using StaticArrays
 using HMatrices
 using LinearAlgebra
 
-include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl"))
+include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
 @testset "Assemble" begin
-    m,n  = 1_000,1_000
-    X  = Y   = rand(SVector{3,Float64},m)
-    splitter  = CardinalitySplitter(nmax=20)
-    Xclt = Yclt = ClusterTree(X,splitter)
-    adm       = StrongAdmissibilityStd(eta=3)
-    rtol      = 1e-5
-    comp      = PartialACA(rtol=rtol)
+    m, n = 1_000, 1_000
+    X = Y = rand(SVector{3,Float64}, m)
+    splitter = CardinalitySplitter(; nmax=20)
+    Xclt = Yclt = ClusterTree(X, splitter)
+    adm = StrongAdmissibilityStd(; eta=3)
+    rtol = 1e-5
+    comp = PartialACA(; rtol=rtol)
     # Laplace
-    for threads in (true,false)
-        K         = laplace_matrix(X,Y)
-        H         = assemble_hmat(K,Xclt,Yclt;adm,comp,threads)
-        @test norm(Matrix(K) - Matrix(H;global_index=true)) < rtol*norm(Matrix(K))
-        H         = assemble_hmat(K;threads,distributed=false)
-        @test norm(Matrix(K) - Matrix(H;global_index=true)) < rtol*norm(Matrix(K))
-        adjH      = adjoint(H)
-        H_full    = Matrix(H;global_index=false)
+    for threads in (true, false)
+        K = laplace_matrix(X, Y)
+        H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads)
+        @test norm(Matrix(K) - Matrix(H; global_index=true)) < rtol * norm(Matrix(K))
+        H = assemble_hmat(K; threads, distributed=false)
+        @test norm(Matrix(K) - Matrix(H; global_index=true)) < rtol * norm(Matrix(K))
+        adjH = adjoint(H)
+        H_full = Matrix(H; global_index=false)
         adjH_full = adjoint(H_full)
         @testset "getindex" begin
             HMatrices.enable_getindex()
-            @test H_full[8,64] ≈ H[8,64]
+            @test H_full[8, 64] ≈ H[8, 64]
             HMatrices.disable_getindex()
-            @test_throws ErrorException H_full[8,64] ≈ H[8,64]
+            @test_throws ErrorException H_full[8, 64] ≈ H[8, 64]
             HMatrices.enable_getindex()
-            @test H_full[:,666] ≈ H[:,666]
-            @test adjH_full[:,666] ≈ adjH[:,666]
+            @test H_full[:, 666] ≈ H[:, 666]
+            @test adjH_full[:, 666] ≈ adjH[:, 666]
         end
         # Elastostatic
-        K         = elastostatic_matrix(X,Y,1.1,1.2)
-        H         = assemble_hmat(K,Xclt,Yclt;adm,comp,threads)
-        @test norm(Matrix(K) - Matrix(H;global_index=true)) < rtol*norm(Matrix(K))
+        K = elastostatic_matrix(X, Y, 1.1, 1.2)
+        H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads)
+        @test norm(Matrix(K) - Matrix(H; global_index=true)) < rtol * norm(Matrix(K))
     end
 end
diff --git a/test/hyperrectangle_test.jl b/test/hyperrectangle_test.jl
index 3fdc82a..ef39b67 100644
--- a/test/hyperrectangle_test.jl
+++ b/test/hyperrectangle_test.jl
@@ -26,7 +26,8 @@ using StaticArrays
             push!(pts, SVector(x, y))
         end
     end
-    @test HMatrices.bounding_box(pts) == HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 1.0))
+    @test HMatrices.bounding_box(pts) ==
+          HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 1.0))
     @test HMatrices.bounding_box(pts, true) ==
           HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 1.0))
     pts = SVector{2,Float64}[]
@@ -35,7 +36,8 @@ using StaticArrays
             push!(pts, SVector(x, y))
         end
     end
-    @test HMatrices.bounding_box(pts) == HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 2.0))
+    @test HMatrices.bounding_box(pts) ==
+          HMatrices.HyperRectangle(SVector(-1.0, -1), SVector(1, 2.0))
     @test HMatrices.bounding_box(pts, true) ==
           HMatrices.HyperRectangle(SVector(-1.5, -1), SVector(3 / 2, 2.0))
     rec1 = HMatrices.HyperRectangle(SVector(0, 0), SVector(1, 1))
diff --git a/test/lu_test.jl b/test/lu_test.jl
index 5294864..2af2c73 100644
--- a/test/lu_test.jl
+++ b/test/lu_test.jl
@@ -6,29 +6,29 @@ using StaticArrays
 
 using HMatrices: RkMatrix
 
-include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl"))
+include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
 Random.seed!(1)
 
 m = 5000
 T = Float64
-X    = points_on_sphere(m)
-Y    = X
+X = points_on_sphere(m)
+Y = X
 
-K = laplace_matrix(X,X)
+K = laplace_matrix(X, X)
 
-splitter  = CardinalitySplitter(nmax=50)
-Xclt      = ClusterTree(X,splitter)
-Yclt      = ClusterTree(Y,splitter)
+splitter = CardinalitySplitter(; nmax=50)
+Xclt = ClusterTree(X, splitter)
+Yclt = ClusterTree(Y, splitter)
 adm = StrongAdmissibilityStd(3)
-comp = PartialACA(;atol=1e-10);
-H = assemble_hmat(K,Xclt,Yclt;adm,comp,threads=false,distributed=false)
-H_full      = Matrix(H)
+comp = PartialACA(; atol=1e-10);
+H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads=false, distributed=false)
+H_full = Matrix(H)
 
 ##
-hlu   = lu(H;atol=1e-5)
-y     = rand(m)
-M     = Matrix(K)
-exact = M\y
-approx = hlu\y
-@test norm(exact - approx,Inf) < 1e-6
+hlu = lu(H; atol=1e-5)
+y = rand(m)
+M = Matrix(K)
+exact = M \ y
+approx = hlu \ y
+@test norm(exact - approx, Inf) < 1e-6
diff --git a/test/multiplication_test.jl b/test/multiplication_test.jl
index 495bc40..d973b6b 100755
--- a/test/multiplication_test.jl
+++ b/test/multiplication_test.jl
@@ -6,59 +6,58 @@ using StaticArrays
 
 using HMatrices: RkMatrix
 
-include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl"))
+include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
 Random.seed!(1)
 
 m = 2000
 n = 2000
 
-X    = rand(SVector{3,Float64},m)
-Y    = [rand(SVector{3,Float64}) for _  in 1:n]
-splitter  = CardinalitySplitter(nmax=40)
-Xclt      = ClusterTree(X,splitter)
-Yclt      = ClusterTree(Y,splitter)
-adm       = StrongAdmissibilityStd(eta=3)
-rtol      = 1e-5
-comp      = PartialACA(rtol=rtol)
-K         = laplace_matrix(X,Y)
-H         = assemble_hmat(K,Xclt,Yclt;adm,comp,threads=false,distributed=false)
+X = rand(SVector{3,Float64}, m)
+Y = [rand(SVector{3,Float64}) for _ in 1:n]
+splitter = CardinalitySplitter(; nmax=40)
+Xclt = ClusterTree(X, splitter)
+Yclt = ClusterTree(Y, splitter)
+adm = StrongAdmissibilityStd(; eta=3)
+rtol = 1e-5
+comp = PartialACA(; rtol=rtol)
+K = laplace_matrix(X, Y)
+H = assemble_hmat(K, Xclt, Yclt; adm, comp, threads=false, distributed=false)
 
-H_full = Matrix(H;global_index=false)
+H_full = Matrix(H; global_index=false)
 
 α = rand() - 0.5
 β = rand() - 0.5
 @testset "hmul!" begin
-    C  = deepcopy(H);
-    tmp = β*H_full + α*H_full*H_full
-    HMatrices.hmul!(C,H,H,α,β,PartialACA(;atol=1e-6))
-    @test Matrix(C;global_index=false) ≈ tmp
+    C = deepcopy(H)
+    tmp = β * H_full + α * H_full * H_full
+    HMatrices.hmul!(C, H, H, α, β, PartialACA(; atol=1e-6))
+    @test Matrix(C; global_index=false) ≈ tmp
 end
 
 @testset "gemv" begin
-
-    α = rand()-0.5
-    β = rand()-0.5
+    α = rand() - 0.5
+    β = rand() - 0.5
     T = eltype(H)
-    m,n = size(H)
-    x = rand(T,n)
-    y = rand(T,m)
+    m, n = size(H)
+    x = rand(T, n)
+    y = rand(T, m)
 
     @testset "serial" begin
-        exact  = β*y + α*H_full*x
-        approx = mul!(copy(y),H,x,α,β;threads=false,global_index=false)
+        exact = β * y + α * H_full * x
+        approx = mul!(copy(y), H, x, α, β; threads=false, global_index=false)
         @test exact ≈ approx
-        exact  = β*y + α*Matrix(H;global_index=true)*x
-        approx = mul!(copy(y),H,x,α,β;threads=false,global_index=true)
+        exact = β * y + α * Matrix(H; global_index=true) * x
+        approx = mul!(copy(y), H, x, α, β; threads=false, global_index=true)
         @test exact ≈ approx
     end
 
     @testset "threads" begin
-        exact  = β*y + α*H_full*x
-        approx = mul!(copy(y),H,x,α,β;threads=true,global_index=false)
+        exact = β * y + α * H_full * x
+        approx = mul!(copy(y), H, x, α, β; threads=true, global_index=false)
         @test exact ≈ approx
-        exact  = β*y + α*Matrix(H;global_index=true)*x
-        approx = mul!(copy(y),H,x,α,β;threads=false,global_index=true)
+        exact = β * y + α * Matrix(H; global_index=true) * x
+        approx = mul!(copy(y), H, x, α, β; threads=false, global_index=true)
         @test exact ≈ approx
     end
 end
diff --git a/test/rkmatrix_test.jl b/test/rkmatrix_test.jl
index 05d63c8..f65833a 100644
--- a/test/rkmatrix_test.jl
+++ b/test/rkmatrix_test.jl
@@ -9,20 +9,20 @@ using HMatrices: RkMatrix, compression_ratio
         m = 20
         n = 30
         r = 5
-        A = rand(ComplexF64,m,r)
-        B = rand(ComplexF64,n,r)
-        R  = RkMatrix(A,B);
-        Ra  = adjoint(R);
-        M  = A*adjoint(B)
-        Ma  = adjoint(M)
+        A = rand(ComplexF64, m, r)
+        B = rand(ComplexF64, n, r)
+        R = RkMatrix(A, B)
+        Ra = adjoint(R)
+        M = A * adjoint(B)
+        Ma = adjoint(M)
 
         ## basic tests
-        @test size(R) == (m,n)
+        @test size(R) == (m, n)
         @test rank(R) == r
         @test Matrix(R) ≈ M
-        @test compression_ratio(R) ≈ m*n / (r*(m+n))
-        @test R[:,5] ≈ M[:,5]
-        @test Ra[:,5] ≈ Ma[:,5]
+        @test compression_ratio(R) ≈ m * n / (r * (m + n))
+        @test R[:, 5] ≈ M[:, 5]
+        @test Ra[:, 5] ≈ Ma[:, 5]
     end
 
     @testset "Matrix entries" begin
@@ -30,12 +30,12 @@ using HMatrices: RkMatrix, compression_ratio
         n = 30
         r = 10
         T = SMatrix{3,3,ComplexF64,9}
-        A = rand(T,m,r)
-        B = rand(T,n,r)
-        R  = RkMatrix(A,B)
+        A = rand(T, m, r)
+        B = rand(T, n, r)
+        R = RkMatrix(A, B)
         ## basic tests
-        @test size(R) == (m,n)
+        @test size(R) == (m, n)
         @test rank(R) == r
-        @test compression_ratio(R) ≈ m*n / (r*(m+n))
+        @test compression_ratio(R) ≈ m * n / (r * (m + n))
     end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 5d43f48..0f245e2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,23 +1,43 @@
 using SafeTestsets
 
-@safetestset "Utils" begin include("utils_test.jl") end
+@safetestset "Utils" begin
+    include("utils_test.jl")
+end
 
-@safetestset "HyperRectangle" begin include("hyperrectangle_test.jl") end
+@safetestset "HyperRectangle" begin
+    include("hyperrectangle_test.jl")
+end
 
-@safetestset "ClusterTree" begin include("clustertree_test.jl") end
+@safetestset "ClusterTree" begin
+    include("clustertree_test.jl")
+end
 
-@safetestset "RkMatrix" begin include("rkmatrix_test.jl") end
+@safetestset "RkMatrix" begin
+    include("rkmatrix_test.jl")
+end
 
-@safetestset "Compressors" begin include("compressor_test.jl") end
+@safetestset "Compressors" begin
+    include("compressor_test.jl")
+end
 
-@safetestset "HMatrix" begin include("hmatrix_test.jl") end
+@safetestset "HMatrix" begin
+    include("hmatrix_test.jl")
+end
 
-@safetestset "Addition" begin include("addition_test.jl") end
+@safetestset "Addition" begin
+    include("addition_test.jl")
+end
 
-@safetestset "Multiplication" begin include("multiplication_test.jl") end
+@safetestset "Multiplication" begin
+    include("multiplication_test.jl")
+end
 
-@safetestset "Triangular" begin include("triangular_test.jl") end
+@safetestset "Triangular" begin
+    include("triangular_test.jl")
+end
 
-@safetestset "LU" begin include("lu_test.jl") end
+@safetestset "LU" begin
+    include("lu_test.jl")
+end
 
 # @safetestset "DHMatrix" begin include("dhmatrix_test.jl") end
diff --git a/test/testutils.jl b/test/testutils.jl
index da4f10c..23af927 100644
--- a/test/testutils.jl
+++ b/test/testutils.jl
@@ -12,8 +12,8 @@ function laplace_matrix(X, Y)
         d = norm(x - y) + EPS
         inv(4π * d)
     end
-    KernelMatrix(f, X, Y)
-    end
+    return KernelMatrix(f, X, Y)
+end
 
 function helmholtz_matrix(X, Y, k)
     f = (x, y) -> begin
@@ -21,7 +21,7 @@ function helmholtz_matrix(X, Y, k)
         d = norm(x - y) + EPS
         exp(im * k * d) * inv(4π * d)
     end
-    KernelMatrix(f, X, Y)
+    return KernelMatrix(f, X, Y)
 end
 
 function elastostatic_matrix(X, Y, μ, λ)
@@ -32,7 +32,7 @@ function elastostatic_matrix(X, Y, μ, λ)
         RRT = r * transpose(r) # r ⊗ rᵗ
         return 1 / (16π * μ * (1 - ν) * d) * ((3 - 4 * ν) * I + RRT / d^2)
     end
-    KernelMatrix(f, X, Y)
+    return KernelMatrix(f, X, Y)
 end
 
 function elastosdynamic_matrix(X, Y, μ, λ, ω, ρ)
@@ -40,17 +40,18 @@ function elastosdynamic_matrix(X, Y, μ, λ, ω, ρ)
         c1 = sqrt((λ + 2μ) / ρ)
         c2 = sqrt(μ / ρ)
         r = x - y
-        d   = norm(r) + EPS
+        d = norm(r) + EPS
         RRT = r * transpose(r) # r ⊗ rᵗ
-        s   = -im * ω
-        z1  = s * d / c1
-        z2  = s * d / c2
-        α   = 4
-        ψ   = exp(-z2) / d + (1 + z2) / (z2^2) * exp(-z2) / d - c2^2 / c1^2 * (1 + z1) / (z1^2) * exp(-z1) / d
+        s = -im * ω
+        z1 = s * d / c1
+        z2 = s * d / c2
+        α = 4
+        ψ = exp(-z2) / d + (1 + z2) / (z2^2) * exp(-z2) / d -
+            c2^2 / c1^2 * (1 + z1) / (z1^2) * exp(-z1) / d
         chi = 3 * ψ - 2 * exp(-z2) / d - c2^2 / c1^2 * exp(-z1) / d
         return 1 / (α * π * μ) * (ψ * I - chi * RRT / d^2) + x * transpose(y)
     end
-    KernelMatrix(f, X, Y)
+    return KernelMatrix(f, X, Y)
 end
 
 """
@@ -63,22 +64,23 @@ struct LaplaceMatrixVec{T,Td} <: AbstractKernelMatrix{T}
     Y::Matrix{Td}
     function LaplaceMatrixVec{T}(X::Matrix{Td}, Y::Matrix{Td}) where {T,Td}
         @assert size(X, 2) == size(Y, 2) == 3
-        new{T,Td}(X, Y)
+        return new{T,Td}(X, Y)
     end
 end
 
 Base.size(K::LaplaceMatrixVec) = size(K.X, 1), size(K.Y, 1)
 
 # constructor based on Vector of StaticVector
-function LaplaceMatrixVec{T}(_X::Vector{SVector{3,Td}}, _Y::Vector{SVector{3,Td}}) where {T,Td}
-    X = reshape(reinterpret(Td, _X), 3, :) |> transpose |> collect
-    Y = reshape(reinterpret(Td, _Y), 3, :) |> transpose |> collect
-    LaplaceMatrixVec{T}(X, Y)
+function LaplaceMatrixVec{T}(_X::Vector{SVector{3,Td}},
+                             _Y::Vector{SVector{3,Td}}) where {T,Td}
+    X = collect(transpose(reshape(reinterpret(Td, _X), 3, :)))
+    Y = collect(transpose(reshape(reinterpret(Td, _Y), 3, :)))
+    return LaplaceMatrixVec{T}(X, Y)
 end
 LaplaceMatrixVec(args...) = LaplaceMatrixVec{Float64}(args...) # default to Float64
 
 function Base.getindex(K::LaplaceMatrixVec{T}, i::Int, j::Int)::T where {T}
-    d2 = (K.X[i,1] - K.Y[j,1])^2 + (K.X[i,2] - K.Y[j,2])^2 + (K.X[i,3] - K.Y[j,3])^2
+    d2 = (K.X[i, 1] - K.Y[j, 1])^2 + (K.X[i, 2] - K.Y[j, 2])^2 + (K.X[i, 3] - K.Y[j, 3])^2
     d = sqrt(d2) + EPS
     return inv(4π * d)
 end
@@ -91,21 +93,21 @@ function Base.getindex(K::LaplaceMatrixVec, I::UnitRange, J::UnitRange)
     out = Matrix{T}(undef, m, n)
     @avx for j in 1:n
         for i in 1:m
-            d2 = (Xv[i,1] - Yv[j,1])^2
-            d2 += (Xv[i,2] - Yv[j,2])^2
-            d2 += (Xv[i,3] - Yv[j,3])^2
+            d2 = (Xv[i, 1] - Yv[j, 1])^2
+            d2 += (Xv[i, 2] - Yv[j, 2])^2
+            d2 += (Xv[i, 3] - Yv[j, 3])^2
             d = sqrt(d2) + EPS
-            out[i,j] = inv(4 * π * d)
+            out[i, j] = inv(4 * π * d)
         end
     end
     return out
 end
 function Base.getindex(K::LaplaceMatrixVec, I::UnitRange, j::Int)
-    return vec(K[I,j:j])
+    return vec(K[I, j:j])
 end
 function Base.getindex(adjK::Adjoint{<:Any,LaplaceMatrixVec}, I::UnitRange, j::Int)
     K = parent(adjK)
-    vec(K[j:j,I])
+    return vec(K[j:j, I])
 end
 
 """
@@ -119,25 +121,27 @@ struct HelmholtzMatrixVec{T,Td,Tk} <: AbstractKernelMatrix{T}
     k::Tk
     function HelmholtzMatrixVec{T}(X::Matrix{Td}, Y::Matrix{Td}, k::Tk) where {T,Td,Tk}
         @assert size(X, 2) == size(Y, 2) == 3
-        new{T,Td,Tk}(X, Y, k)
+        return new{T,Td,Tk}(X, Y, k)
     end
 end
 HelmholtzMatrixVec(args...) = HelmholtzMatrixVec{ComplexF64}(args...)
 
 Base.size(K::HelmholtzMatrixVec) = size(K.X, 1), size(K.Y, 1)
 
-function HelmholtzMatrixVec{T}(_X::Vector{SVector{3,Td}}, _Y::Vector{SVector{3,Td}}, k) where {T,Td}
-    X = reshape(reinterpret(Td, _X), 3, :) |> transpose |> collect
-    Y = reshape(reinterpret(Td, _Y), 3, :) |> transpose |> collect
-    HelmholtzMatrixVec{T}(X, Y, k)
+function HelmholtzMatrixVec{T}(_X::Vector{SVector{3,Td}}, _Y::Vector{SVector{3,Td}},
+                               k) where {T,Td}
+    X = collect(transpose(reshape(reinterpret(Td, _X), 3, :)))
+    Y = collect(transpose(reshape(reinterpret(Td, _Y), 3, :)))
+    return HelmholtzMatrixVec{T}(X, Y, k)
 end
 
 function Base.getindex(K::HelmholtzMatrixVec{T}, i::Int, j::Int)::T where {T}
-    d2 = (K.X[i,1] - K.Y[j,1])^2 + (K.X[i,2] - K.Y[j,2])^2 + (K.X[i,3] - K.Y[j,3])^2
-    d  = sqrt(d2) + EPS
+    d2 = (K.X[i, 1] - K.Y[j, 1])^2 + (K.X[i, 2] - K.Y[j, 2])^2 + (K.X[i, 3] - K.Y[j, 3])^2
+    d = sqrt(d2) + EPS
     return inv(4π * d) * exp(im * K.k * d)
 end
-function Base.getindex(K::HelmholtzMatrixVec{Complex{T}}, I::UnitRange, J::UnitRange) where {T}
+function Base.getindex(K::HelmholtzMatrixVec{Complex{T}}, I::UnitRange,
+                       J::UnitRange) where {T}
     k = K.k
     m = length(I)
     n = length(J)
@@ -146,55 +150,55 @@ function Base.getindex(K::HelmholtzMatrixVec{Complex{T}}, I::UnitRange, J::UnitR
     # since LoopVectorization does not (yet) support Complex{T} types, we will
     # reinterpret the output as a Matrix{T}, then use views. This can probably be
     # done better.
-    out    = Matrix{Complex{T}}(undef, m, n)
-    out_T  = reinterpret(T, out)
-    out_r = @views out_T[1:2:end,:]
-    out_i = @views out_T[2:2:end,:]
+    out = Matrix{Complex{T}}(undef, m, n)
+    out_T = reinterpret(T, out)
+    out_r = @views out_T[1:2:end, :]
+    out_i = @views out_T[2:2:end, :]
     @avx for j in 1:n
         for i in 1:m
-            d2 = (Xv[i,1] - Yv[j,1])^2
-            d2 += (Xv[i,2] - Yv[j,2])^2
-            d2 += (Xv[i,3] - Yv[j,3])^2
-            d  = sqrt(d2) + EPS
+            d2 = (Xv[i, 1] - Yv[j, 1])^2
+            d2 += (Xv[i, 2] - Yv[j, 2])^2
+            d2 += (Xv[i, 3] - Yv[j, 3])^2
+            d = sqrt(d2) + EPS
             s, c = sincos(k * d)
             zr = inv(4π * d) * c
             zi = inv(4π * d) * s
-            out_r[i,j] = zr
-            out_i[i,j] = zi
+            out_r[i, j] = zr
+            out_i[i, j] = zi
         end
     end
     return out
 end
 function Base.getindex(K::HelmholtzMatrixVec, I::UnitRange, j::Int)
-    vec(K[I,j:j])
+    return vec(K[I, j:j])
 end
 function Base.getindex(adjK::Adjoint{<:Any,<:HelmholtzMatrixVec}, I::UnitRange, j::Int)
     K = parent(adjK)
-    conj!(K[j:j,I]) |> vec
+    return vec(conj!(K[j:j, I]))
 end
 
 function points_on_sphere(npts, R=1)
     theta = π * rand(npts)
-    phi   = 2 * π * rand(npts)
-    x     = @. sin(theta) * cos(phi)
-    y     = @. R * sin(theta) * sin(phi)
-    z     = @. R * cos(theta)
-    data  = vcat(x', y', z')
-    pts = reinterpret(SVector{3,Float64}, vec(data)) |> collect
+    phi = 2 * π * rand(npts)
+    x = @. sin(theta) * cos(phi)
+    y = @. R * sin(theta) * sin(phi)
+    z = @. R * cos(theta)
+    data = vcat(x', y', z')
+    pts = collect(reinterpret(SVector{3,Float64}, vec(data)))
     return pts
 end
 
-function points_on_cylinder(radius,n,shift=SVector(0,0,0))
-    step     = 1.75*π*radius/sqrt(n)
-    result          = Vector{SVector{3,Float64}}(undef,n)
-    length          = 2*π*radius
-    pointsPerCircle = length/step
-    angleStep       = 2*π/pointsPerCircle
-    for i=0:n-1
-        x = radius * cos(angleStep*i)
-        y = radius * sin(angleStep*i)
-        z = step*i/pointsPerCircle
-        result[i+1] = shift + SVector(x,y,z)
+function points_on_cylinder(radius, n, shift=SVector(0, 0, 0))
+    step = 1.75 * π * radius / sqrt(n)
+    result = Vector{SVector{3,Float64}}(undef, n)
+    length = 2 * π * radius
+    pointsPerCircle = length / step
+    angleStep = 2 * π / pointsPerCircle
+    for i in 0:(n - 1)
+        x = radius * cos(angleStep * i)
+        y = radius * sin(angleStep * i)
+        z = step * i / pointsPerCircle
+        result[i + 1] = shift + SVector(x, y, z)
     end
     return result
 end
diff --git a/test/triangular_test.jl b/test/triangular_test.jl
index be0756d..b7c2d38 100755
--- a/test/triangular_test.jl
+++ b/test/triangular_test.jl
@@ -6,74 +6,74 @@ using StaticArrays
 
 using HMatrices: RkMatrix
 
-include(joinpath(HMatrices.PROJECT_ROOT,"test","testutils.jl"))
+include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
 Random.seed!(1)
 
 m = 1000
 T = Float64
-X    = points_on_sphere(m)
-Y    = X
+X = points_on_sphere(m)
+Y = X
 struct ExponentialKernel <: AbstractMatrix{Float64}
     X::Vector{SVector{3,Float64}}
     Y::Vector{SVector{3,Float64}}
 end
-function Base.getindex(K::ExponentialKernel,i::Int,j::Int)
-    x,y = K.X[i], K.Y[j]
-    exp(-norm(x-y))
+function Base.getindex(K::ExponentialKernel, i::Int, j::Int)
+    x, y = K.X[i], K.Y[j]
+    return exp(-norm(x - y))
 end
 Base.size(K::ExponentialKernel) = length(K.X), length(K.Y)
-K = ExponentialKernel(X,X)
+K = ExponentialKernel(X, X)
 
-splitter  = CardinalitySplitter(nmax=50)
-Xclt      = ClusterTree(X,splitter)
-Yclt      = ClusterTree(Y,splitter)
-H = assemble_hmat(K,Xclt,Yclt;threads=false,distributed=false)
-H_full      = Matrix(H;global_index=false)
+splitter = CardinalitySplitter(; nmax=50)
+Xclt = ClusterTree(X, splitter)
+Yclt = ClusterTree(Y, splitter)
+H = assemble_hmat(K, Xclt, Yclt; threads=false, distributed=false)
+H_full = Matrix(H; global_index=false)
 
 @testset "ldiv!" begin
-    B = rand(m,2)
-    R = RkMatrix(rand(m,3),rand(m,3))
+    B = rand(m, 2)
+    R = RkMatrix(rand(m, 3), rand(m, 3))
     R_full = Matrix(R)
 
     ## 3.1
-    exact  = ldiv!(UnitLowerTriangular(H_full),copy(B))
-    approx = ldiv!(UnitLowerTriangular(H),copy(B))
+    exact = ldiv!(UnitLowerTriangular(H_full), copy(B))
+    approx = ldiv!(UnitLowerTriangular(H), copy(B))
     @test exact ≈ approx
 
-    exact  = ldiv!(UpperTriangular(H_full),copy(B))
-    approx = ldiv!(UpperTriangular(H),copy(B))
+    exact = ldiv!(UpperTriangular(H_full), copy(B))
+    approx = ldiv!(UpperTriangular(H), copy(B))
     @test exact ≈ approx
 
     ## 3.2
-    exact  = ldiv!(UnitLowerTriangular(H_full),copy(R_full))
-    approx = ldiv!(UnitLowerTriangular(H),copy(R))
+    exact = ldiv!(UnitLowerTriangular(H_full), copy(R_full))
+    approx = ldiv!(UnitLowerTriangular(H), copy(R))
     @test exact ≈ Matrix(approx)
 
     ## 3.3
-    compressor = PartialACA(;atol=1e-8)
-    exact      = ldiv!(UnitLowerTriangular(H_full),copy(H_full))
-    approx     = ldiv!(UnitLowerTriangular(H),deepcopy(H),compressor)
-    @test exact ≈ Matrix(approx;global_index=false)
+    compressor = PartialACA(; atol=1e-8)
+    exact = ldiv!(UnitLowerTriangular(H_full), copy(H_full))
+    approx = ldiv!(UnitLowerTriangular(H), deepcopy(H), compressor)
+    @test exact ≈ Matrix(approx; global_index=false)
 end
 
 @testset "rdiv!" begin
-    B = rand(2,m)
-    R = RkMatrix(rand(m,3),rand(m,3))
+    B = rand(2, m)
+    R = RkMatrix(rand(m, 3), rand(m, 3))
     R_full = Matrix(R)
     # 3.1
-    exact  = rdiv!(copy(B),UpperTriangular(H_full))
-    approx = rdiv!(copy(B),UpperTriangular(H))
+    exact = rdiv!(copy(B), UpperTriangular(H_full))
+    approx = rdiv!(copy(B), UpperTriangular(H))
     @test exact ≈ approx
 
     ## 3.2
-    exact  = rdiv!(copy(R_full),UpperTriangular(H_full))
-    approx = rdiv!(copy(R),UpperTriangular(H))
+    exact = rdiv!(copy(R_full), UpperTriangular(H_full))
+    approx = rdiv!(copy(R), UpperTriangular(H))
     @test exact ≈ approx
 
     ## 3.3
-    compressor = PartialACA(;atol=1e-10)
-    exact  = rdiv!(deepcopy(H_full),UpperTriangular(H_full))
-    approx = rdiv!(deepcopy(H),UpperTriangular(H),compressor)
-    @test exact ≈ Matrix(approx;global_index=false)
+    compressor = PartialACA(; atol=1e-10)
+    exact = rdiv!(deepcopy(H_full), UpperTriangular(H_full))
+    approx = rdiv!(deepcopy(H), UpperTriangular(H), compressor)
+    @test exact ≈ Matrix(approx; global_index=false)
 end
diff --git a/test/utils_test.jl b/test/utils_test.jl
index 1323663..795938f 100644
--- a/test/utils_test.jl
+++ b/test/utils_test.jl
@@ -5,11 +5,11 @@ using HMatrices: hilbert_linear_to_cartesian, hilbert_cartesian_to_linear
 @testset "Hilbert points" begin
     # check that hilbert curves spans the lattice, and the inverse is correct
     for n in [2^p for p in 0:3]
-        lattice = Set((i,j) for i in 0:n-1,j in 0:n-1)
-        for d in 0:(n^2-1)
-            x,y = hilbert_linear_to_cartesian(n,d)
-            pop!(lattice,(x,y))
-            @test hilbert_cartesian_to_linear(n,x,y) == d
+        lattice = Set((i, j) for i in 0:(n - 1), j in 0:(n - 1))
+        for d in 0:(n^2 - 1)
+            x, y = hilbert_linear_to_cartesian(n, d)
+            pop!(lattice, (x, y))
+            @test hilbert_cartesian_to_linear(n, x, y) == d
         end
         @test isempty(lattice)
     end