From ba72645291436fc85cf78f3ab6d7eb983b2e1975 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Thu, 26 May 2022 18:55:09 +0200
Subject: [PATCH 001/124] Add initial (non-functional) version of mesh
 partitioning.

---
 Project.toml                      |   7 +-
 src/Dofs/DistributedDofHandler.jl | 280 ++++++++++++++++++++++++++++++
 src/Dofs/DofHandler.jl            |  39 ++---
 src/Ferrite.jl                    |   2 +
 src/Grid/DistributedGrid.jl       |  78 +++++++++
 src/deprecations.jl               |   1 +
 src/exports.jl                    |   2 +
 7 files changed, 386 insertions(+), 23 deletions(-)
 create mode 100644 src/Dofs/DistributedDofHandler.jl
 create mode 100644 src/Grid/DistributedGrid.jl

diff --git a/Project.toml b/Project.toml
index 0d62e980a2..df0a18711a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,6 +5,9 @@ version = "0.3.4"
 [deps]
 EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
@@ -35,6 +38,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 
 [targets]
-test = ["BlockArrays", "Downloads", "ForwardDiff", "IterativeSolvers", "KrylovMethods",
-        "NBInclude", "Pkg", "ProgressMeter", "Random", "SHA", "StableRNGs", "Test",
-        "TimerOutputs"]
+test = ["BlockArrays", "Downloads", "ForwardDiff", "IterativeSolvers", "KrylovMethods", "NBInclude", "Pkg", "ProgressMeter", "Random", "SHA", "StableRNGs", "Test", "TimerOutputs"]
diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
new file mode 100644
index 0000000000..f32e84200d
--- /dev/null
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -0,0 +1,280 @@
+"""
+    DistributedDofHandler(grid::AbstractDistributedGrid)
+
+Construct a `DistributedDofHandler` based on `grid`.
+
+Distributed version of [`DofHandler`](@docs). 
+
+Supports:
+- `Grid`s with a single concrete cell type.
+- One or several fields on the whole domaine.
+"""
+struct DistributedDofHandler{dim,T,G<:AbstractDistributedGrid{dim}} <: AbstractDofHandler
+    field_names::Vector{Symbol}
+    field_dims::Vector{Int}
+    # TODO: field_interpolations can probably be better typed: We should at least require
+    #       all the interpolations to have the same dimension and reference shape
+    field_interpolations::Vector{Interpolation}
+    bc_values::Vector{BCValues{T}} # TODO: BcValues is created/handeld by the constrainthandler, so this can be removed
+    cell_dofs::Vector{Int}
+    cell_dofs_offset::Vector{Int}
+    closed::ScalarWrapper{Bool}
+    grid::G
+    ndofs::ScalarWrapper{Int}
+
+    vertexdicts::Array{Dict{Int,Int}}
+    edgedicts::Array{Tuple{Int,Int},Tuple{Int,Bool}}
+    facedicts::Array{Tuple{Int,Int},Int}
+end
+
+function DistributedDofHandler(grid::AbstractDistributedGrid)
+    isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. DistributedMixedDofHandler not implemented yet.")
+    DistributedDofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1))
+end
+
+function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
+    println(io, "DistributedDofHandler")
+    println(io, "  Fields:")
+    for i in 1:nfields(dh)
+        println(io, "    ", repr(dh.field_names[i]), ", interpolation: ", dh.field_interpolations[i],", dim: ", dh.field_dims[i])
+    end
+    if !isclosed(dh)
+        print(io, "  Not closed!")
+    else
+        println(io, "  Dofs per cell: ", ndofs_per_cell(dh))
+        print(io, "  Total local dofs: ", ndofs(dh))
+    end
+end
+
+# Calculate the offset to the first local dof of a field
+function field_offset(dh::DofHandler, field_name::Symbol)
+    offset = 0
+    for i in 1:find_field(dh, field_name)-1
+        offset += getnbasefunctions(dh.field_interpolations[i])::Int * dh.field_dims[i]
+    end
+    return offset
+end
+
+function getfielddim(dh::DofHandler, name::Symbol)
+    field_pos = findfirst(i->i == name, getfieldnames(dh))
+    field_pos === nothing && error("did not find field $name")
+    return dh.field_dims[field_pos]
+end
+
+"""
+    dof_range(dh:DofHandler, field_name)
+
+Return the local dof range for `field_name`. Example:
+
+```jldoctest
+julia> grid = generate_grid(Triangle, (3, 3))
+Grid{2, Triangle, Float64} with 18 Triangle cells and 16 nodes
+
+julia> dh = DofHandler(grid); push!(dh, :u, 3); push!(dh, :p, 1); close!(dh);
+
+julia> dof_range(dh, :u)
+1:9
+
+julia> dof_range(dh, :p)
+10:12
+```
+"""
+function dof_range(dh::DofHandler, field_name::Symbol)
+    f = find_field(dh, field_name)
+    offset = field_offset(dh, field_name)
+    n_field_dofs = getnbasefunctions(dh.field_interpolations[f])::Int * dh.field_dims[f]
+    return (offset+1):(offset+n_field_dofs)
+end
+
+
+function close!(dh::DistributedDofHandler{dim}) where {dim}
+    @assert !isclosed(dh)
+
+    # `vertexdict` keeps track of the visited vertices. We store the global vertex
+    # number and the first dof we added to that vertex.
+    vertexdicts = [Dict{Int,Int}() for _ in 1:nfields(dh)]
+
+    # `edgedict` keeps track of the visited edges, this will only be used for a 3D problem
+    # An edge is determined from two vertices, but we also need to store the direction
+    # of the first edge we encounter and add dofs too. When we encounter the same edge
+    # the next time we check if the direction is the same, otherwise we reuse the dofs
+    # in the reverse order
+    edgedicts = [Dict{Tuple{Int,Int},Tuple{Int,Bool}}() for _ in 1:nfields(dh)]
+
+    # `facedict` keeps track of the visited faces. We only need to store the first dof we
+    # added to the face; if we encounter the same face again we *always* reverse the order
+    # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
+    # face (i.e. a surface) is uniquely determined by 3 vertices.
+    facedicts = [Dict{NTuple{dim,Int},Int}() for _ in 1:nfields(dh)]
+
+    # celldofs are never shared between different cells so there is no need
+    # for a `celldict` to keep track of which cells we have added dofs too.
+
+    # We create the `InterpolationInfo` structs with precomputed information for each
+    # interpolation since that allows having the cell loop as the outermost loop,
+    # and the interpolation loop inside without using a function barrier
+    interpolation_infos = InterpolationInfo[]
+    for interpolation in dh.field_interpolations
+        # push!(dh.interpolation_info, InterpolationInfo(interpolation))
+        push!(interpolation_infos, InterpolationInfo(interpolation))
+    end
+
+    # not implemented yet: more than one facedof per face in 3D
+    dim == 3 && @assert(!any(x->x.nfacedofs > 1, interpolation_infos))
+
+    nextdof = 1 # next free dof to distribute
+    push!(dh.cell_dofs_offset, 1) # dofs for the first cell start at 1
+
+    # loop over all the cells, and distribute dofs for all the fields
+    for (ci, cell) in enumerate(getcells(dh.grid))
+        @debug println("cell #$ci")
+        for fi in 1:nfields(dh)
+            interpolation_info = interpolation_infos[fi]
+            @debug println("  field: $(dh.field_names[fi])")
+            if interpolation_info.nvertexdofs > 0
+                for vertex in vertices(cell)
+                    @debug println("    vertex#$vertex")
+                    token = Base.ht_keyindex2!(vertexdicts[fi], vertex)
+                    if token > 0 # haskey(vertexdicts[fi], vertex) # reuse dofs
+                        reuse_dof = vertexdicts[fi].vals[token] # vertexdicts[fi][vertex]
+                        for d in 1:dh.field_dims[fi]
+                            @debug println("      reusing dof #$(reuse_dof + (d-1))")
+                            push!(dh.cell_dofs, reuse_dof + (d-1))
+                        end
+                    else # token <= 0, distribute new dofs
+                        for vertexdof in 1:interpolation_info.nvertexdofs
+                            Base._setindex!(vertexdicts[fi], nextdof, vertex, -token) # vertexdicts[fi][vertex] = nextdof
+                            for d in 1:dh.field_dims[fi]
+                                @debug println("      adding dof#$nextdof")
+                                push!(dh.cell_dofs, nextdof)
+                                nextdof += 1
+                            end
+                        end
+                    end
+                end # vertex loop
+            end
+            if dim == 3 # edges only in 3D
+                if interpolation_info.nedgedofs > 0
+                    for edge in edges(cell)
+                        sedge, dir = sortedge(edge)
+                        @debug println("    edge#$sedge dir: $(dir)")
+                        token = Base.ht_keyindex2!(edgedicts[fi], sedge)
+                        if token > 0 # haskey(edgedicts[fi], sedge), reuse dofs
+                            startdof, olddir = edgedicts[fi].vals[token] # edgedicts[fi][sedge] # first dof for this edge (if dir == true)
+                            for edgedof in (dir == olddir ? (1:interpolation_info.nedgedofs) : (interpolation_info.nedgedofs:-1:1))
+                                for d in 1:dh.field_dims[fi]
+                                    reuse_dof = startdof + (d-1) + (edgedof-1)*dh.field_dims[fi]
+                                    @debug println("      reusing dof#$(reuse_dof)")
+                                    push!(dh.cell_dofs, reuse_dof)
+                                end
+                            end
+                        else # token <= 0, distribute new dofs
+                            Base._setindex!(edgedicts[fi], (nextdof, dir), sedge, -token) # edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
+                            for edgedof in 1:interpolation_info.nedgedofs
+                                for d in 1:dh.field_dims[fi]
+                                    @debug println("      adding dof#$nextdof")
+                                    push!(dh.cell_dofs, nextdof)
+                                    nextdof += 1
+                                end
+                            end
+                        end
+                    end # edge loop
+                end
+            end
+            if interpolation_info.nfacedofs > 0 && (interpolation_info.dim == dim)
+                for face in faces(cell)
+                    sface = sortface(face) # TODO: faces(cell) may as well just return the sorted list
+                    @debug println("    face#$sface")
+                    token = Base.ht_keyindex2!(facedicts[fi], sface)
+                    if token > 0 # haskey(facedicts[fi], sface), reuse dofs
+                        startdof = facedicts[fi].vals[token] # facedicts[fi][sface]
+                        for facedof in interpolation_info.nfacedofs:-1:1 # always reverse (YOLO)
+                            for d in 1:dh.field_dims[fi]
+                                reuse_dof = startdof + (d-1) + (facedof-1)*dh.field_dims[fi]
+                                @debug println("      reusing dof#$(reuse_dof)")
+                                push!(dh.cell_dofs, reuse_dof)
+                            end
+                        end
+                    else # distribute new dofs
+                        Base._setindex!(facedicts[fi], nextdof, sface, -token)# facedicts[fi][sface] = nextdof,  store the first dof for this face
+                        for facedof in 1:interpolation_info.nfacedofs
+                            for d in 1:dh.field_dims[fi]
+                                @debug println("      adding dof#$nextdof")
+                                push!(dh.cell_dofs, nextdof)
+                                nextdof += 1
+                            end
+                        end
+                    end
+                end # face loop
+            end
+            if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
+                @debug println("    cell#$ci")
+                for celldof in 1:interpolation_info.ncelldofs
+                    for d in 1:dh.field_dims[fi]
+                        @debug println("      adding dof#$nextdof")
+                        push!(dh.cell_dofs, nextdof)
+                        nextdof += 1
+                    end
+                end # cell loop
+            end
+        end # field loop
+        # push! the first index of the next cell to the offset vector
+        push!(dh.cell_dofs_offset, length(dh.cell_dofs)+1)
+    end # cell loop
+    dh.ndofs[] = maximum(dh.cell_dofs)
+    dh.closed[] = true
+
+    return dh, vertexdicts, edgedicts, facedicts
+
+end
+
+function _create_sparsity_pattern(dh::DistributedDofHandler, ch#=::Union{ConstraintHandler, Nothing}=#, sym::Bool)
+    ncells = getncells(dh.grid)
+    n = ndofs_per_cell(dh)
+    N = sym ? div(n*(n+1), 2) * ncells : n^2 * ncells
+    N += ndofs(dh) # always add the diagonal elements
+    I = Int[]; resize!(I, N)
+    J = Int[]; resize!(J, N)
+    global_dofs = zeros(Int, n)
+    cnt = 0
+    for element_id in 1:ncells
+        celldofs!(global_dofs, dh, element_id)
+        @inbounds for j in 1:n, i in 1:n
+            dofi = global_dofs[i]
+            dofj = global_dofs[j]
+            sym && (dofi > dofj && continue)
+            cnt += 1
+            if cnt > length(J)
+                resize!(I, trunc(Int, length(I) * 1.5))
+                resize!(J, trunc(Int, length(J) * 1.5))
+            end
+            I[cnt] = dofi
+            J[cnt] = dofj
+
+        end
+    end
+    @inbounds for d in 1:ndofs(dh)
+        cnt += 1
+        if cnt > length(J)
+            resize!(I, trunc(Int, length(I) + ndofs(dh)))
+            resize!(J, trunc(Int, length(J) + ndofs(dh)))
+        end
+        I[cnt] = d
+        J[cnt] = d
+    end
+
+    resize!(I, cnt)
+    resize!(J, cnt)
+    V = zeros(length(I))
+    K = sparse(I, J, V)
+
+    # Add entries to K corresponding to condensation due the linear constraints
+    # Note, this requires the K matrix, which is why we can't push!() to the I,J,V
+    # triplet directly.
+    if ch !== nothing
+        @assert isclosed(ch)
+        _condense_sparsity_pattern!(K, ch.acs)
+    end
+
+    return K
+end
\ No newline at end of file
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index 50a7120ad3..f63ad26e4d 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -52,30 +52,29 @@ ndofs_per_cell(dh::AbstractDofHandler, cell::Int=1) = dh.cell_dofs_offset[cell+1
 isclosed(dh::AbstractDofHandler) = dh.closed[]
 nfields(dh::AbstractDofHandler) = length(dh.field_names)
 getfieldnames(dh::AbstractDofHandler) = dh.field_names
-ndim(dh::AbstractDofHandler, field_name::Symbol) = dh.field_dims[find_field(dh, field_name)]
-function find_field(dh::DofHandler, field_name::Symbol)
-    j = findfirst(i->i == field_name, dh.field_names)
+getfieldinterpolation(dh::AbstractDofHandler, field_idx::Int) = dh.field_interpolations[field_idx]
+getfielddim(dh::AbstractDofHandler, field_idx::Int) = dh.field_dims[field_idx]
+getbcvalue(dh::AbstractDofHandler, field_idx::Int) = dh.bc_values[field_idx]
+
+function find_field(dh::AbstractDofHandler, field_name::Symbol)
+    j = findfirst(i->i == field_name, getfieldnames(dh))
     j == 0 && error("did not find field $field_name")
     return j
 end
 
 # Calculate the offset to the first local dof of a field
-function field_offset(dh::DofHandler, field_name::Symbol)
+function field_offset(dh::AbstractDofHandler, field_name::Symbol)
     offset = 0
     for i in 1:find_field(dh, field_name)-1
-        offset += getnbasefunctions(dh.field_interpolations[i])::Int * dh.field_dims[i]
+        offset += getnbasefunctions(getfieldinterpolation(dh,i))::Int * getfielddim(dh, i)
     end
     return offset
 end
 
-getfieldinterpolation(dh::DofHandler, field_idx::Int) = dh.field_interpolations[field_idx]
-getfielddim(dh::DofHandler, field_idx::Int) = dh.field_dims[field_idx]
-getbcvalue(dh::DofHandler, field_idx::Int) = dh.bc_values[field_idx]
-
-function getfielddim(dh::DofHandler, name::Symbol)
+function getfielddim(dh::AbstractDofHandler, name::Symbol)
     field_pos = findfirst(i->i == name, getfieldnames(dh))
     field_pos === nothing && error("did not find field $name")
-    return dh.field_dims[field_pos]
+    return getfielddim(dh, field_pos)
 end
 
 """
@@ -96,10 +95,10 @@ julia> dof_range(dh, :p)
 10:12
 ```
 """
-function dof_range(dh::DofHandler, field_name::Symbol)
+function dof_range(dh::AbstractDofHandler, field_name::Symbol)
     f = find_field(dh, field_name)
     offset = field_offset(dh, field_name)
-    n_field_dofs = getnbasefunctions(dh.field_interpolations[f])::Int * dh.field_dims[f]
+    n_field_dofs = getnbasefunctions(dh.field_interpolations[f])::Int * getfielddim(dh, f)
     return (offset+1):(offset+n_field_dofs)
 end
 
@@ -112,7 +111,7 @@ The field is added to all cells of the underlying grid. In case no interpolation
 the default interpolation of the grid's celltype is used. 
 If the grid uses several celltypes, [`push!(dh::MixedDofHandler, fh::FieldHandler)`](@ref) must be used instead.
 """
-function Base.push!(dh::DofHandler, name::Symbol, dim::Int, ip::Interpolation=default_interpolation(getcelltype(dh.grid)))
+function Base.push!(dh::AbstractDofHandler, name::Symbol, dim::Int, ip::Interpolation=default_interpolation(getcelltype(dh.grid)))
     @assert !isclosed(dh)
     @assert !in(name, dh.field_names)
     push!(dh.field_names, name)
@@ -335,26 +334,26 @@ end
 # Creates a sparsity pattern from the dofs in a DofHandler.
 # Returns a sparse matrix with the correct storage pattern.
 """
-    create_sparsity_pattern(dh::DofHandler)
+    create_sparsity_pattern(dh::AbstractDofHandler)
 
 Create the sparsity pattern corresponding to the degree of freedom
-numbering in the [`DofHandler`](@ref). Return a `SparseMatrixCSC`
+numbering in the [`AbstractDofHandler`](@ref). Return a `SparseMatrixCSC`
 with stored values in the correct places.
 
 See the [Sparsity Pattern](@ref) section of the manual.
 """
-create_sparsity_pattern(dh::DofHandler) = _create_sparsity_pattern(dh, nothing, false)
+create_sparsity_pattern(dh::AbstractDofHandler) = _create_sparsity_pattern(dh, nothing, false)
 
 """
-    create_symmetric_sparsity_pattern(dh::DofHandler)
+    create_symmetric_sparsity_pattern(dh::AbstractDofHandler)
 
 Create the symmetric sparsity pattern corresponding to the degree of freedom
-numbering in the [`DofHandler`](@ref) by only considering the upper
+numbering in the [`AbstractDofHandler`](@ref) by only considering the upper
 triangle of the matrix. Return a `Symmetric{SparseMatrixCSC}`.
 
 See the [Sparsity Pattern](@ref) section of the manual.
 """
-create_symmetric_sparsity_pattern(dh::DofHandler) = Symmetric(_create_sparsity_pattern(dh, nothing, true), :U)
+create_symmetric_sparsity_pattern(dh::AbstractDofHandler) = Symmetric(_create_sparsity_pattern(dh, nothing, true), :U)
 
 function _create_sparsity_pattern(dh::DofHandler, ch#=::Union{ConstraintHandler, Nothing}=#, sym::Bool)
     ncells = getncells(dh.grid)
diff --git a/src/Ferrite.jl b/src/Ferrite.jl
index 5526856297..bc05a0e79e 100644
--- a/src/Ferrite.jl
+++ b/src/Ferrite.jl
@@ -50,11 +50,13 @@ include("FEValues/face_integrals.jl")
 
 # Grid
 include("Grid/grid.jl")
+include("Grid/DistributedGrid.jl")
 include("Grid/grid_generators.jl")
 include("Grid/coloring.jl")
 
 # Dofs
 include("Dofs/DofHandler.jl")
+include("Dofs/DistributedDofHandler.jl")
 include("Dofs/MixedDofHandler.jl")
 include("Dofs/ConstraintHandler.jl")
 
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
new file mode 100644
index 0000000000..c721772d7f
--- /dev/null
+++ b/src/Grid/DistributedGrid.jl
@@ -0,0 +1,78 @@
+using Metis
+using MPI
+
+"""
+"""
+abstract type AbstractDistributedGrid{sdim} <: AbstractGrid{sdim} end
+
+struct SharedVertex
+    local_idx::VertexIndex
+    remote_vertices::Dict{Int,Vector{VertexIndex}}
+end
+
+struct SharedFace
+    local_idx::FaceIndex
+    remote_edges::Dict{Int,Vector{FaceIndex}}
+end
+
+struct SharedEdge 
+    local_idx::EdgeIndex
+    remote_edges::Dict{Int,Vector{EdgeIndex}}
+end
+
+"""
+"""
+mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistributedGrid{dim}
+    # Dense comminicator on the grid
+    grid_comm::MPI.Comm
+    # Sparse communicator along the shared vertex neighbors
+    # We only need this one because the vertices induce the edge and face neighbors.
+    interface_comm::MPI.Comm
+    # Here we store the full local grid
+    local_grid::Grid
+    # Local copies of the shared entities of the form (local index, (process id in grid_comm, remote index))
+    shared_vertices::Vector{SharedVertex}
+    shared_edges::Vector{SharedEdge}
+    shared_faces::Vector{SharedFace}
+end
+
+"""
+"""
+function DistributedGrid(grid_to_distribute::Grid, grid_comm::MPI.Comm; partition_alg = :RECURSIVE)
+    grid_topology = ExclusiveTopology(grid_to_distribute)
+    return DistributedGrid(grid_to_distribute, grid_topology, grid_comm; partition_alg=partition_alg)
+end
+
+"""
+"""
+function DistributedGrid(grid_to_distribute::Grid, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE)
+    N = getncells(grid_to_distribute)
+    @assert N > 0
+
+    xadj = Vector{Metis.idx_t}(undef, N+1)
+    xadj[1] = 1
+    adjncy = Vector{Metis.idx_t}(undef, 0)
+    @inbounds for i in 1:N
+        n_neighbors = 0
+        for neighbor ∈ getneighborhood(grid_topology, grid_to_distribute, CellIndex(i))
+            push!(adjncy, neighbor)
+            n_neighbors += 1
+        end
+        xadj[i+1] = xadj[i] + n_neighbors
+    end
+
+    parts = Metis.partition(
+        Metis.Graph(
+            Metis.idx_t(N),
+            xadj,
+            adjncy
+        ),
+        MPI.Comm_size(grid_comm);
+        alg=partition_alg
+    )
+
+    return Grid(
+        getcells(grid_to_distribute)[[i for i ∈ 1:N if parts[i] == (MPI.Comm_rank(grid_comm)+1)]],
+        grid_to_distribute.nodes
+    )
+end
\ No newline at end of file
diff --git a/src/deprecations.jl b/src/deprecations.jl
index eae65e06be..ea0a96ff30 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -1,2 +1,3 @@
 Base.@deprecate_binding DirichletBoundaryConditions ConstraintHandler
 Base.@deprecate_binding DirichletBoundaryCondition Dirichlet
+@deprecate ndim(dh::AbstractDofHandler, field_name::Symbol) getfielddim(dh::AbstractDofHandler, field_name::Symbol)
\ No newline at end of file
diff --git a/src/exports.jl b/src/exports.jl
index a08e592823..91e9fd90e3 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -40,6 +40,7 @@ export
 
 # Grid
     Grid,
+    DistributedGrid,
     Node,
     Cell,
     Line,
@@ -97,6 +98,7 @@ export
 
 # Dofs
     DofHandler,
+    DistributedDofHandler,
     close!,
     ndofs,
     ndofs_per_cell,

From 4d775905a784362dc842a7dfb7cdabeec199de46 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <termi-official@users.noreply.github.com>
Date: Fri, 27 May 2022 00:44:37 +0200
Subject: [PATCH 002/124] Local grid extraction functional.

---
 docs/src/literate/distributed_assembly.jl | 211 ++++++++++++++++++++++
 src/Export/VTK.jl                         |   6 +
 src/Grid/DistributedGrid.jl               | 119 +++++++++++-
 3 files changed, 330 insertions(+), 6 deletions(-)
 create mode 100644 docs/src/literate/distributed_assembly.jl

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
new file mode 100644
index 0000000000..4f8a404d78
--- /dev/null
+++ b/docs/src/literate/distributed_assembly.jl
@@ -0,0 +1,211 @@
+# # Heat Equation
+#
+# ![](heat_square.png)
+#
+# *Figure 1*: Temperature field on the unit square with an internal uniform heat source
+# solved with homogeneous Dirichlet boundary conditions on the boundary.
+#
+#
+# ## Introduction
+#
+# The heat equation is the "Hello, world!" equation of finite elements.
+# Here we solve the equation on a unit square, with a uniform internal source.
+# The strong form of the (linear) heat equation is given by
+#
+# ```math
+#  -\nabla \cdot (k \nabla u) = f  \quad x \in \Omega,
+# ```
+#
+# where $u$ is the unknown temperature field, $k$ the heat conductivity,
+# $f$ the heat source and $\Omega$ the domain. For simplicity we set $f = 1$
+# and $k = 1$. We will consider homogeneous Dirichlet boundary conditions such that
+# ```math
+# u(x) = 0 \quad x \in \partial \Omega,
+# ```
+# where $\partial \Omega$ denotes the boundary of $\Omega$.
+#
+# The resulting weak form is given by
+# ```math
+# \int_{\Omega} \nabla v \cdot \nabla u \ d\Omega = \int_{\Omega} v \ d\Omega,
+# ```
+# where $v$ is a suitable test function.
+#-
+# ## Commented Program
+#
+# Now we solve the problem in Ferrite. What follows is a program spliced with comments.
+#md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
+#
+# First we load Ferrite, and some other packages we need
+using Ferrite, SparseArrays, MPI
+
+# Launch MPI
+MPI.Init()
+
+# We start  generating a simple grid with 20x20 quadrilateral elements
+# using `generate_grid`. The generator defaults to the unit square,
+# so we don't need to specify the corners of the domain.
+grid = generate_grid(Quadrilateral, (4, 4));
+
+dgrid = DistributedGrid(grid, MPI.COMM_WORLD)
+
+vtk_grid("grid", dgrid; compress=false) do pvtk
+end
+
+# Shutdown MPI
+MPI.Finalize()
+
+# Early out for testing.
+exit(0)
+
+# ### Trial and test functions
+# A `CellValues` facilitates the process of evaluating values and gradients of
+# test and trial functions (among other things). Since the problem
+# is a scalar problem we will use a `CellScalarValues` object. To define
+# this we need to specify an interpolation space for the shape functions.
+# We use Lagrange functions (both for interpolating the function and the geometry)
+# based on the reference "cube". We also define a quadrature rule based on the
+# same reference cube. We combine the interpolation and the quadrature rule
+# to a `CellScalarValues` object.
+dim = 2
+ip = Lagrange{dim, RefCube, 1}()
+qr = QuadratureRule{dim, RefCube}(2)
+cellvalues = CellScalarValues(qr, ip);
+
+# ### Degrees of freedom
+# Next we need to define a `DofHandler`, which will take care of numbering
+# and distribution of degrees of freedom for our approximated fields.
+# We create the `DofHandler` and then add a single field called `u`.
+# Lastly we `close!` the `DofHandler`, it is now that the dofs are distributed
+# for all the elements.
+dh = DistributedDofHandler(grid)
+push!(dh, :u, 1)
+close!(dh);
+
+# Now that we have distributed all our dofs we can create our tangent matrix,
+# using `create_sparsity_pattern`. This function returns a sparse matrix
+# with the correct elements stored.
+K = create_sparsity_pattern(dh)
+
+# ### Boundary conditions
+# In Ferrite constraints like Dirichlet boundary conditions
+# are handled by a `ConstraintHandler`.
+ch = ConstraintHandler(dh);
+
+# Next we need to add constraints to `ch`. For this problem we define
+# homogeneous Dirichlet boundary conditions on the whole boundary, i.e.
+# the `union` of all the face sets on the boundary.
+∂Ω = union(getfaceset.((grid, ), ["left", "right", "top", "bottom"])...);
+
+# Now we are set up to define our constraint. We specify which field
+# the condition is for, and our combined face set `∂Ω`. The last
+# argument is a function which takes the spatial coordinate $x$ and
+# the current time $t$ and returns the prescribed value. In this case
+# it is trivial -- no matter what $x$ and $t$ we return $0$. When we have
+# specified our constraint we `add!` it to `ch`.
+dbc = Dirichlet(:u, ∂Ω, (x, t) -> 0)
+add!(ch, dbc);
+
+# We also need to `close!` and `update!` our boundary conditions. When we call `close!`
+# the dofs which will be constrained by the boundary conditions are calculated and stored
+# in our `ch` object. Since the boundary conditions are, in this case,
+# independent of time we can `update!` them directly with e.g. $t = 0$.
+close!(ch)
+update!(ch, 0.0);
+
+# ### Assembling the linear system
+# Now we have all the pieces needed to assemble the linear system, $K u = f$.
+# We define a function, `doassemble` to do the assembly, which takes our `cellvalues`,
+# the sparse matrix and our DofHandler as input arguments. The function returns the
+# assembled stiffness matrix, and the force vector.
+function doassemble(cellvalues::CellScalarValues{dim}, K::SparseMatrixCSC, dh::DofHandler) where {dim}
+    # We allocate the element stiffness matrix and element force vector
+    # just once before looping over all the cells instead of allocating
+    # them every time in the loop.
+    #+
+    n_basefuncs = getnbasefunctions(cellvalues)
+    Ke = zeros(n_basefuncs, n_basefuncs)
+    fe = zeros(n_basefuncs)
+
+    # Next we define the global force vector `f` and use that and
+    # the stiffness matrix `K` and create an assembler. The assembler
+    # is just a thin wrapper around `f` and `K` and some extra storage
+    # to make the assembling faster.
+    #+
+    f = zeros(ndofs(dh))
+    assembler = start_assemble(K, f)
+
+    # It is now time to loop over all the cells in our grid. We do this by iterating
+    # over a `CellIterator`. The iterator caches some useful things for us, for example
+    # the nodal coordinates for the cell, and the local degrees of freedom.
+    #+
+    for cell in CellIterator(dh)
+        # Always remember to reset the element stiffness matrix and
+        # force vector since we reuse them for all elements.
+        #+
+        fill!(Ke, 0)
+        fill!(fe, 0)
+
+        # For each cell we also need to reinitialize the cached values in `cellvalues`.
+        #+
+        reinit!(cellvalues, cell)
+
+        # It is now time to loop over all the quadrature points in the cell and
+        # assemble the contribution to `Ke` and `fe`. The integration weight
+        # can be queried from `cellvalues` by `getdetJdV`.
+        #+
+        for q_point in 1:getnquadpoints(cellvalues)
+            dΩ = getdetJdV(cellvalues, q_point)
+            # For each quadrature point we loop over all the (local) shape functions.
+            # We need the value and gradient of the testfunction `v` and also the gradient
+            # of the trial function `u`. We get all of these from `cellvalues`.
+            #+
+            for i in 1:n_basefuncs
+                v  = shape_value(cellvalues, q_point, i)
+                ∇v = shape_gradient(cellvalues, q_point, i)
+                fe[i] += v * dΩ
+                for j in 1:n_basefuncs
+                    ∇u = shape_gradient(cellvalues, q_point, j)
+                    Ke[i, j] += (∇v ⋅ ∇u) * dΩ
+                end
+            end
+        end
+
+        # The last step in the element loop is to assemble `Ke` and `fe`
+        # into the global `K` and `f` with `assemble!`.
+        #+
+        assemble!(assembler, celldofs(cell), fe, Ke)
+    end
+    return K, f
+end
+#md nothing # hide
+
+# ### Solution of the system
+# The last step is to solve the system. First we call `doassemble`
+# to obtain the global stiffness matrix `K` and force vector `f`.
+K, f = doassemble(cellvalues, K, dh);
+
+# To account for the boundary conditions we use the `apply!` function.
+# This modifies elements in `K` and `f` respectively, such that
+# we can get the correct solution vector `u` by using `\`.
+apply!(K, f, ch)
+u = cg(K, f);
+
+# ### Exporting to VTK
+# To visualize the result we export the grid and our field `u`
+# to a VTK-file, which can be viewed in e.g. [ParaView](https://www.paraview.org/).
+vtk_grid("heat_equation_distributed", dh) do vtk
+    vtk_point_data(vtk, dh, u)
+end
+
+## test the result                #src
+using Test                        #src
+@test norm(u) ≈ 3.307743912641305 #src
+
+#md # ## [Plain program](@id distributed-assembly-plain-program)
+#md #
+#md # Here follows a version of the program without any comments.
+#md # The file is also available here: [`distributed_assembly.jl`](distributed_assembly.jl).
+#md #
+#md # ```julia
+#md # @__CODE__
+#md # ```
diff --git a/src/Export/VTK.jl b/src/Export/VTK.jl
index 1977d8a692..ee749d3b3d 100644
--- a/src/Export/VTK.jl
+++ b/src/Export/VTK.jl
@@ -33,6 +33,12 @@ function WriteVTK.vtk_grid(filename::AbstractString, grid::Grid{dim,C,T}; compre
     return vtk_grid(filename, coords, cls; compress=compress)
 end
 
+function WriteVTK.vtk_grid(filename::AbstractString, dgrid::DistributedGrid{dim,C,T}; compress::Bool=true) where {dim,C,T}
+    my_rank = MPI.Comm_rank(dgrid.grid_comm)
+    return vtk_grid("$filename.$my_rank", dgrid.local_grid; compress=compress)
+end
+
+
 function toparaview!(v, x::Vec{D}) where D
     v[1:D] .= x
 end
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index c721772d7f..196b66fae7 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -10,12 +10,16 @@ struct SharedVertex
     remote_vertices::Dict{Int,Vector{VertexIndex}}
 end
 
+"""
+"""
 struct SharedFace
     local_idx::FaceIndex
     remote_edges::Dict{Int,Vector{FaceIndex}}
 end
 
-struct SharedEdge 
+"""
+"""
+struct SharedEdge
     local_idx::EdgeIndex
     remote_edges::Dict{Int,Vector{EdgeIndex}}
 end
@@ -29,7 +33,7 @@ mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistribut
     # We only need this one because the vertices induce the edge and face neighbors.
     interface_comm::MPI.Comm
     # Here we store the full local grid
-    local_grid::Grid
+    local_grid::Grid{dim,C,T}
     # Local copies of the shared entities of the form (local index, (process id in grid_comm, remote index))
     shared_vertices::Vector{SharedVertex}
     shared_edges::Vector{SharedEdge}
@@ -45,10 +49,11 @@ end
 
 """
 """
-function DistributedGrid(grid_to_distribute::Grid, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE)
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
     N = getncells(grid_to_distribute)
     @assert N > 0
 
+    # Set up the element connectivity graph
     xadj = Vector{Metis.idx_t}(undef, N+1)
     xadj[1] = 1
     adjncy = Vector{Metis.idx_t}(undef, 0)
@@ -61,6 +66,7 @@ function DistributedGrid(grid_to_distribute::Grid, grid_topology::ExclusiveTopol
         xadj[i+1] = xadj[i] + n_neighbors
     end
 
+    # Generate a partitioning
     parts = Metis.partition(
         Metis.Graph(
             Metis.idx_t(N),
@@ -71,8 +77,109 @@ function DistributedGrid(grid_to_distribute::Grid, grid_topology::ExclusiveTopol
         alg=partition_alg
     )
 
-    return Grid(
-        getcells(grid_to_distribute)[[i for i ∈ 1:N if parts[i] == (MPI.Comm_rank(grid_comm)+1)]],
-        grid_to_distribute.nodes
+    # Start extraction of local grid
+    # 1. Extract local cells
+    local_cells = getcells(grid_to_distribute)[[i for i ∈ 1:N if parts[i] == (MPI.Comm_rank(grid_comm)+1)]]
+
+    # 2. Find unique nodes
+    local_node_index_set = Set{Int}()
+    for cell ∈ local_cells
+        for global_node_idx ∈ cell.nodes # @TODO abstraction
+            push!(local_node_index_set, global_node_idx)
+        end
+    end
+
+    # 3. Build a map for global to local node indices
+    next_local_node_idx = 1
+    global_to_local_node_map = Dict{Int,Int}()
+    for global_node_idx ∈ local_node_index_set
+        global_to_local_node_map[global_node_idx] = next_local_node_idx
+        next_local_node_idx += 1
+    end
+
+    # 4. Extract local nodes
+    local_nodes = Vector{Node{dim,T}}(undef,length(local_node_index_set))
+    global_nodes = getnodes(grid_to_distribute)
+    for global_node_idx ∈ local_node_index_set
+        local_node_idx = global_to_local_node_map[global_node_idx]
+        local_nodes[local_node_idx] = global_nodes[global_node_idx]
+    end
+
+    # 5. Transform cell indices
+    for local_cell_idx ∈ 1:length(local_cells)
+        local_cells[local_cell_idx] = C(map(global_node_idx -> global_to_local_node_map[global_node_idx], local_cells[local_cell_idx].nodes))
+    end
+
+    # 6. Extract sets
+    # @TODO deduplicate the code. We should be able to merge each of these into a macro or function.
+    global_to_local_cell_map = Dict{Int,Int}()
+    next_local_cell_idx = 1
+    for global_cell_idx ∈ 1:N
+        if parts[global_cell_idx] == (MPI.Comm_rank(grid_comm)+1)
+            global_to_local_cell_map[global_cell_idx] = next_local_cell_idx
+            next_local_cell_idx += 1
+        end
+    end
+
+    cellsets = Dict{String,Set{Int}}()
+    for key ∈ keys(grid_to_distribute.cellsets)
+        cellsets[key] = Set{Int}() # create empty set, so it does not crash during assembly
+        for global_cell_idx ∈ grid_to_distribute.cellsets[key]
+            if haskey(global_to_local_cell_map, global_cell_idx)
+                push!(cellsets[key], global_to_local_cell_map[global_cell_idx])
+            end
+        end
+    end
+
+    nodesets = Dict{String,Set{Int}}()
+    for key ∈ keys(grid_to_distribute.nodesets)
+        nodesets[key] = Set{Int}() # create empty set, so it does not crash during assembly
+        for global_node_idx ∈ grid_to_distribute.nodesets[key]
+            if haskey(global_to_local_node_map, global_node_idx)
+                push!(nodesets[key], global_to_local_node_map[global_node_idx])
+            end
+        end
+    end
+
+    facesets = Dict{String,Set{FaceIndex}}()
+    for key ∈ keys(grid_to_distribute.facesets)
+        facesets[key] = Set{FaceIndex}() # create empty set, so it does not crash during assembly
+        for (global_cell_idx, i) ∈ grid_to_distribute.facesets[key]
+            if haskey(global_to_local_cell_map, global_cell_idx)
+                push!(facesets[key], FaceIndex(global_to_local_cell_map[global_cell_idx], i))
+            end
+        end
+    end
+
+    edgesets = Dict{String,Set{EdgeIndex}}()
+    for key ∈ keys(grid_to_distribute.edgesets)
+        edgesets[key] = Set{EdgeIndex}() # create empty set, so it does not crash during assembly
+        for (global_cell_idx, i) ∈ grid_to_distribute.edgesets[key]
+            if haskey(global_to_local_cell_map, global_cell_idx)
+                push!(edgesets[key], EdgeIndex(global_to_local_cell_map[global_cell_idx], i))
+            end
+        end
+    end
+
+    vertexsets = Dict{String,Set{VertexIndex}}()
+    for key ∈ keys(grid_to_distribute.vertexsets)
+        vertexsets[key] = Set{VertexIndex}() # create empty set, so it does not crash during assembly
+        for (global_cell_idx, i) ∈ grid_to_distribute.vertexsets[key]
+            if haskey(global_to_local_cell_map, global_cell_idx)
+                push!(vertexsets[key], VertexIndex(global_to_local_cell_map[global_cell_idx], i))
+            end
+        end
+    end
+
+    local_grid = Grid(
+        local_cells,
+        local_nodes,
+        cellsets=cellsets,
+        nodesets=nodesets,
+        facesets=facesets,
+        edgesets=edgesets,
+        vertexsets=vertexsets
     )
+
+    return DistributedGrid(grid_comm,grid_comm,local_grid,Vector{SharedVertex}([]),Vector{SharedEdge}([]),Vector{SharedFace}([]))
 end
\ No newline at end of file

From 7ba155b99886e003b85f0e6a4c5b78a891c65343 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <termi-official@users.noreply.github.com>
Date: Fri, 27 May 2022 04:01:01 +0200
Subject: [PATCH 003/124] Add debug print for shared vertices. Works. But uses
 too much memory.

---
 docs/src/literate/distributed_assembly.jl |  14 ++-
 src/Grid/DistributedGrid.jl               | 100 ++++++++++++++++++----
 src/Grid/grid.jl                          |  84 +++++++++---------
 3 files changed, 137 insertions(+), 61 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 4f8a404d78..e0bcae50ed 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -48,7 +48,19 @@ grid = generate_grid(Quadrilateral, (4, 4));
 
 dgrid = DistributedGrid(grid, MPI.COMM_WORLD)
 
-vtk_grid("grid", dgrid; compress=false) do pvtk
+vtk_grid("grid", dgrid; compress=false) do vtk
+    u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
+    for rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
+        fill!(u, 0.0)
+        for sv ∈ dgrid.shared_vertices
+            if haskey(sv.remote_vertices, rank)
+                (cellidx,i) = sv.local_idx
+                nodeidx = dgrid.local_grid.cells[cellidx].nodes[i]
+                u[nodeidx] = rank
+            end
+        end
+        vtk_point_data(vtk, u,"sv $rank")
+    end
 end
 
 # Shutdown MPI
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 196b66fae7..80de9e1351 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -47,10 +47,8 @@ function DistributedGrid(grid_to_distribute::Grid, grid_comm::MPI.Comm; partitio
     return DistributedGrid(grid_to_distribute, grid_topology, grid_comm; partition_alg=partition_alg)
 end
 
-"""
-"""
-function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
-    N = getncells(grid_to_distribute)
+function create_partitioning(grid::Grid, grid_topology::ExclusiveTopology, n_partitions, partition_alg)
+    N = getncells(grid)
     @assert N > 0
 
     # Set up the element connectivity graph
@@ -59,7 +57,7 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     adjncy = Vector{Metis.idx_t}(undef, 0)
     @inbounds for i in 1:N
         n_neighbors = 0
-        for neighbor ∈ getneighborhood(grid_topology, grid_to_distribute, CellIndex(i))
+        for neighbor ∈ getneighborhood(grid_topology, grid, CellIndex(i))
             push!(adjncy, neighbor)
             n_neighbors += 1
         end
@@ -67,19 +65,30 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     end
 
     # Generate a partitioning
-    parts = Metis.partition(
+    return Metis.partition(
         Metis.Graph(
             Metis.idx_t(N),
             xadj,
             adjncy
         ),
-        MPI.Comm_size(grid_comm);
+        n_partitions;
         alg=partition_alg
     )
+end
+
+"""
+"""
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
+    N = getncells(grid_to_distribute)
+    @assert N > 0
+
+    my_rank = MPI.Comm_rank(grid_comm)+1
+
+    parts = create_partitioning(grid_to_distribute, grid_topology, MPI.Comm_size(grid_comm), partition_alg)
 
     # Start extraction of local grid
     # 1. Extract local cells
-    local_cells = getcells(grid_to_distribute)[[i for i ∈ 1:N if parts[i] == (MPI.Comm_rank(grid_comm)+1)]]
+    local_cells = getcells(grid_to_distribute)[[i for i ∈ 1:N if parts[i] == my_rank]]
 
     # 2. Find unique nodes
     local_node_index_set = Set{Int}()
@@ -99,10 +108,12 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
 
     # 4. Extract local nodes
     local_nodes = Vector{Node{dim,T}}(undef,length(local_node_index_set))
-    global_nodes = getnodes(grid_to_distribute)
-    for global_node_idx ∈ local_node_index_set
-        local_node_idx = global_to_local_node_map[global_node_idx]
-        local_nodes[local_node_idx] = global_nodes[global_node_idx]
+    begin
+        global_nodes = getnodes(grid_to_distribute)
+        for global_node_idx ∈ local_node_index_set
+            local_node_idx = global_to_local_node_map[global_node_idx]
+            local_nodes[local_node_idx] = global_nodes[global_node_idx]
+        end
     end
 
     # 5. Transform cell indices
@@ -113,11 +124,13 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     # 6. Extract sets
     # @TODO deduplicate the code. We should be able to merge each of these into a macro or function.
     global_to_local_cell_map = Dict{Int,Int}()
-    next_local_cell_idx = 1
-    for global_cell_idx ∈ 1:N
-        if parts[global_cell_idx] == (MPI.Comm_rank(grid_comm)+1)
-            global_to_local_cell_map[global_cell_idx] = next_local_cell_idx
-            next_local_cell_idx += 1
+    begin
+        next_local_cell_idx = 1
+        for global_cell_idx ∈ 1:N
+            if parts[global_cell_idx] == (my_rank)
+                global_to_local_cell_map[global_cell_idx] = next_local_cell_idx
+                next_local_cell_idx += 1
+            end
         end
     end
 
@@ -181,5 +194,56 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
         vertexsets=vertexsets
     )
 
-    return DistributedGrid(grid_comm,grid_comm,local_grid,Vector{SharedVertex}([]),Vector{SharedEdge}([]),Vector{SharedFace}([]))
+    shared_vertices = Vector{SharedVertex}()
+    shared_edges = Vector{SharedEdge}()
+    shared_faces = Vector{SharedFace}()
+    for (global_cell_idx,global_cell) ∈ enumerate(getcells(grid_to_distribute))
+        if parts[global_cell_idx] == my_rank
+            # Vertex
+            for (i, global_node_idx) ∈ enumerate(vertices(global_cell))
+                cell_vertex = VertexIndex(global_cell_idx, i)
+                remote_vertices = Dict{Int,Vector{VertexIndex}}()
+                for (global_cell_neighbor_idx, j) ∈ getneighborhood(grid_topology, grid_to_distribute, cell_vertex)
+                    other_rank = parts[global_cell_neighbor_idx]
+                    if other_rank != my_rank
+                        # Todo remote local cell id
+                        if !haskey(remote_vertices,other_rank)
+                            remote_vertices[other_rank] = Vector(undef,0)
+                        end
+                        push!(remote_vertices[other_rank], VertexIndex(global_cell_neighbor_idx, j))
+                    end
+                end
+
+                if length(remote_vertices) > 0
+                    push!(shared_vertices, SharedVertex(VertexIndex(global_to_local_cell_map[global_cell_idx], i), remote_vertices))
+                end
+            end
+
+            # # Edge
+            # if dim > 2
+            #     for (i, global_vertex_idx) ∈ enumerate(edges(global_cell))
+            #         cell_edge = EdgeIndex(global_cell_idx, i)
+            #         for (global_cell_neighbor_idx, j) ∈ getneighborhood(grid_topology, grid_to_distribute, cell_edge)
+            #             if parts[global_cell_neighbor_idx] != my_rank
+            #                 push!(shared_edges, cell_edge)
+            #             end
+            #         end
+            #     end
+            # end
+
+            # # Face
+            # if dim > 1
+            #     for (i, global_vertex_idx) ∈ enumerate(faces(global_cell))
+            #         cell_face = FaceIndex(global_cell_idx, i)
+            #         for (global_cell_neighbor_idx, j) ∈ getneighborhood(grid_topology, grid_to_distribute, cell_face)
+            #             if parts[global_cell_neighbor_idx] != my_rank
+            #                 push!(shared_faces, cell_face)
+            #             end
+            #         end
+            #     end
+            # end
+        end
+    end
+
+    return DistributedGrid(grid_comm,grid_comm,local_grid,shared_vertices,shared_edges,shared_faces)
 end
\ No newline at end of file
diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 536502fccc..e5653919ec 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -144,7 +144,7 @@ The struct saves the highest dimensional neighborhood, i.e. if something is conn
 - `vertex_neighbor::SparseMatrixCSC{EntityNeighborhood,Int}`: `vertex_neighbor[cellid,local_vertex_id]` -> neighboring vertex
 - `edge_neighbor::SparseMatrixCSC{EntityNeighborhood,Int}`: `edge_neighbor[cellid_local_vertex_id]` -> neighboring edge
 - `vertex_vertex_neighbor::Dict{Int,EntityNeighborhood{VertexIndex}}`: global vertex id -> all connected vertices by edge or face
-- `face_skeleton::Vector{FaceIndex}`: list of unique faces in the grid 
+- `face_skeleton::Vector{FaceIndex}`: list of unique faces in the grid
 """
 struct ExclusiveTopology <: AbstractTopology
     # maps a global vertex id to all cells containing the vertex
@@ -165,8 +165,8 @@ end
 
 function ExclusiveTopology(cells::Vector{C}) where C <: AbstractCell
     cell_vertices_table = vertices.(cells) #needs generic interface for <: AbstractCell
-    vertex_cell_table = Dict{Int,Vector{Int}}() 
-    
+    vertex_cell_table = Dict{Int,Vector{Int}}()
+
     for (cellid, cell_nodes) in enumerate(cell_vertices_table)
        for node in cell_nodes
             if haskey(vertex_cell_table, node)
@@ -174,20 +174,20 @@ function ExclusiveTopology(cells::Vector{C}) where C <: AbstractCell
             else
                 vertex_cell_table[node] = [cellid]
             end
-        end 
+        end
     end
 
     I_face = Int[]; J_face = Int[]; V_face = EntityNeighborhood[]
     I_edge = Int[]; J_edge = Int[]; V_edge = EntityNeighborhood[]
-    I_vertex = Int[]; J_vertex = Int[]; V_vertex = EntityNeighborhood[]   
-    cell_neighbor_table = Vector{EntityNeighborhood{CellIndex}}(undef, length(cells)) 
+    I_vertex = Int[]; J_vertex = Int[]; V_vertex = EntityNeighborhood[]
+    cell_neighbor_table = Vector{EntityNeighborhood{CellIndex}}(undef, length(cells))
 
-    for (cellid, cell) in enumerate(cells)    
+    for (cellid, cell) in enumerate(cells)
         #cell neighborhood
         cell_neighbors = getindex.((vertex_cell_table,), cell_vertices_table[cellid]) # cell -> vertex -> cell
-        cell_neighbors = unique(reduce(vcat,cell_neighbors)) # non unique list initially 
+        cell_neighbors = unique(reduce(vcat,cell_neighbors)) # non unique list initially
         filter!(x->x!=cellid, cell_neighbors) # get rid of self neighborhood
-        cell_neighbor_table[cellid] = EntityNeighborhood(CellIndex.(cell_neighbors)) 
+        cell_neighbor_table[cellid] = EntityNeighborhood(CellIndex.(cell_neighbors))
 
         for neighbor in cell_neighbors
             neighbor_local_ids = findall(x->x in cell.nodes, cells[neighbor].nodes)
@@ -197,23 +197,23 @@ function ExclusiveTopology(cells::Vector{C}) where C <: AbstractCell
                 _vertex_neighbor!(V_vertex, I_vertex, J_vertex, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
             # face neighbor
             elseif length(cell_local_ids) == face_npoints(cell)
-                _face_neighbor!(V_face, I_face, J_face, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor]) 
+                _face_neighbor!(V_face, I_face, J_face, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
             # edge neighbor
             elseif getdim(cell) > 2 && length(cell_local_ids) == edge_npoints(cell)
                 _edge_neighbor!(V_edge, I_edge, J_edge, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
             end
-        end       
+        end
     end
 
     face_neighbor = sparse(I_face,J_face,V_face)
-    vertex_neighbor = sparse(I_vertex,J_vertex,V_vertex) 
+    vertex_neighbor = sparse(I_vertex,J_vertex,V_vertex)
     edge_neighbor = sparse(I_edge,J_edge,V_edge)
 
     vertex_vertex_table = Dict{Int,EntityNeighborhood}()
     vertex_vertex_global = Dict{Int,Vector{Int}}()
     # Vertex Connectivity
     for global_vertexid in keys(vertex_cell_table)
-        #Cellset that contains given vertex 
+        #Cellset that contains given vertex
         cellset = vertex_cell_table[global_vertexid]
         vertex_neighbors_local = VertexIndex[]
         vertex_neighbors_global = Int[]
@@ -227,7 +227,7 @@ function ExclusiveTopology(cells::Vector{C}) where C <: AbstractCell
         end
         vertex_vertex_table[global_vertexid] =  EntityNeighborhood(vertex_neighbors_local)
         vertex_vertex_global[global_vertexid] = vertex_neighbors_global
-    end 
+    end
 
     # Face Skeleton
     face_skeleton_global = Set{NTuple}()
@@ -238,7 +238,7 @@ function ExclusiveTopology(cells::Vector{C}) where C <: AbstractCell
             push!(face_skeleton_global, sortface(face))
             fs_length_new = length(face_skeleton_global)
             if fs_length != fs_length_new
-                push!(face_skeleton_local, FaceIndex(cellid,local_face_id)) 
+                push!(face_skeleton_local, FaceIndex(cellid,local_face_id))
                 fs_length = fs_length_new
             end
         end
@@ -297,7 +297,7 @@ ExclusiveTopology(grid::AbstractGrid) = ExclusiveTopology(getcells(grid))
 
 A `Grid` is a collection of `Cells` and `Node`s which covers the computational domain, together with Sets of cells, nodes and faces.
 There are multiple helper structures to apply boundary conditions or define subdomains. They are gathered in the `cellsets`, `nodesets`,
-`facesets`, `edgesets` and `vertexsets`. 
+`facesets`, `edgesets` and `vertexsets`.
 
 # Fields
 - `cells::Vector{C}`: stores all cells of the grid
@@ -305,7 +305,7 @@ There are multiple helper structures to apply boundary conditions or define subd
 - `cellsets::Dict{String,Set{Int}}`: maps a `String` key to a `Set` of cell ids
 - `nodesets::Dict{String,Set{Int}}`: maps a `String` key to a `Set` of global node ids
 - `facesets::Dict{String,Set{FaceIndex}}`: maps a `String` to a `Set` of `Set{FaceIndex} (global_cell_id, local_face_id)`
-- `edgesets::Dict{String,Set{EdgeIndex}}`: maps a `String` to a `Set` of `Set{EdgeIndex} (global_cell_id, local_edge_id` 
+- `edgesets::Dict{String,Set{EdgeIndex}}`: maps a `String` to a `Set` of `Set{EdgeIndex} (global_cell_id, local_edge_id`
 - `vertexsets::Dict{String,Set{VertexIndex}}`: maps a `String` key to a `Set` of local vertex ids
 - `boundary_matrix::SparseMatrixCSC{Bool,Int}`: optional, only needed by `onboundary` to check if a cell is on the boundary, see, e.g. Helmholtz example
 """
@@ -315,9 +315,9 @@ mutable struct Grid{dim,C<:AbstractCell,T<:Real} <: AbstractGrid{dim}
     # Sets
     cellsets::Dict{String,Set{Int}}
     nodesets::Dict{String,Set{Int}}
-    facesets::Dict{String,Set{FaceIndex}} 
-    edgesets::Dict{String,Set{EdgeIndex}} 
-    vertexsets::Dict{String,Set{VertexIndex}} 
+    facesets::Dict{String,Set{FaceIndex}}
+    edgesets::Dict{String,Set{EdgeIndex}}
+    vertexsets::Dict{String,Set{VertexIndex}}
     # Boundary matrix (faces per cell × cell)
     boundary_matrix::SparseMatrixCSC{Bool,Int}
 end
@@ -343,7 +343,7 @@ end
     getneighborhood(top::ExclusiveTopology, grid::Grid{dim,C,T}, edgeidx::EdgeIndex, include_self=false)
 
 Returns all directly connected entities of the same type, i.e. calling the function with a `VertexIndex` will return
-a list of directly connected vertices (connected via face/edge). If `include_self` is true, the given `*Index` is included 
+a list of directly connected vertices (connected via face/edge). If `include_self` is true, the given `*Index` is included
 in the returned list.
 
 !!! warning
@@ -353,13 +353,13 @@ function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, cellidx::Ce
     patch = getcells(top.cell_neighbor[cellidx.idx])
     if include_self
         return [patch; cellidx.idx]
-    else 
+    else
         return patch
     end
 end
 
 function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
-    if include_self 
+    if include_self
         return [top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info; faceidx]
     else
         return top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info
@@ -378,7 +378,7 @@ function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, vertexidx::
 end
 
 function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid{3}, edgeidx::EdgeIndex, include_self=false)
-    if include_self 
+    if include_self
         return [top.edge_neighbor[edgeidx[1],edgeidx[2]].neighbor_info; edgeidx]
     else
         return top.edge_neighbor[edgeidx[1],edgeidx[2]].neighbor_info
@@ -387,7 +387,7 @@ end
 
 """
     faceskeleton(grid) -> Vector{FaceIndex}
-Returns an iterateable face skeleton. The skeleton consists of `FaceIndex` that can be used to `reinit` 
+Returns an iterateable face skeleton. The skeleton consists of `FaceIndex` that can be used to `reinit`
 `FaceValues`.
 """
 faceskeleton(top::ExclusiveTopology, grid::AbstractGrid) =  top.face_skeleton
@@ -397,8 +397,8 @@ toglobal(grid::Grid,vertexidx::Vector{VertexIndex}) = unique(toglobal.((grid,),v
 
 @inline getdim(::AbstractGrid{dim}) where {dim} = dim
 """
-    getcells(grid::AbstractGrid) 
-    getcells(grid::AbstractGrid, v::Union{Int,Vector{Int}} 
+    getcells(grid::AbstractGrid)
+    getcells(grid::AbstractGrid, v::Union{Int,Vector{Int}}
     getcells(grid::AbstractGrid, setname::String)
 
 Returns either all `cells::Collection{C<:AbstractCell}` of a `<:AbstractGrid` or a subset based on an `Int`, `Vector{Int}` or `String`.
@@ -414,7 +414,7 @@ Whereas the last option tries to call a `cellset` of the `grid`. `Collection` ca
 @inline getcelltype(grid::AbstractGrid, i::Int) = typeof(grid.cells[i])
 
 """
-    getnodes(grid::AbstractGrid) 
+    getnodes(grid::AbstractGrid)
     getnodes(grid::AbstractGrid, v::Union{Int,Vector{Int}}
     getnodes(grid::AbstractGrid, setname::String)
 
@@ -499,11 +499,11 @@ n_faces_per_cell(grid::Grid) = nfaces(eltype(grid.cells))
 
 """
     function compute_vertex_values(grid::AbstractGrid, f::Function)
-    function compute_vertex_values(grid::AbstractGrid, v::Vector{Int}, f::Function)    
+    function compute_vertex_values(grid::AbstractGrid, v::Vector{Int}, f::Function)
     function compute_vertex_values(grid::AbstractGrid, set::String, f::Function)
 
 Given a `grid` and some function `f`, `compute_vertex_values` computes all nodal values,
- i.e. values at the nodes,  of the function `f`. 
+ i.e. values at the nodes,  of the function `f`.
 The function implements two dispatches, where only a subset of the grid's node is used.
 
 ```julia
@@ -555,7 +555,7 @@ _warn_emptyset(set, name) = length(set) == 0 && @warn("no entities added to the
 
 Adds a cellset to the grid with key `name`.
 Cellsets are typically used to define subdomains of the problem, e.g. two materials in the computational domain.
-The `MixedDofHandler` can construct different fields which live not on the whole domain, but rather on a cellset. 
+The `MixedDofHandler` can construct different fields which live not on the whole domain, but rather on a cellset.
 
 ```julia
 addcellset!(grid, "left", Set((1,3))) #add cells with id 1 and 3 to cellset left
@@ -589,7 +589,7 @@ end
 
 """
     addfaceset!(grid::AbstractGrid, name::String, faceid::Union{Set{FaceIndex},Vector{FaceIndex}})
-    addfaceset!(grid::AbstractGrid, name::String, f::Function; all::Bool=true) 
+    addfaceset!(grid::AbstractGrid, name::String, f::Function; all::Bool=true)
 
 Adds a faceset to the grid with key `name`.
 A faceset maps a `String` key to a `Set` of tuples corresponding to `(global_cell_id, local_face_id)`.
@@ -600,11 +600,11 @@ addfaceset!(gird, "right", Set(((2,2),(4,2))) #see grid manual example for refer
 addfaceset!(grid, "clamped", x -> norm(x[1]) ≈ 0.0) #see incompressible elasticity example for reference
 ```
 """
-addfaceset!(grid::Grid, name::String, set::Union{Set{FaceIndex},Vector{FaceIndex}}) = 
+addfaceset!(grid::Grid, name::String, set::Union{Set{FaceIndex},Vector{FaceIndex}}) =
     _addset!(grid, name, set, grid.facesets)
-addedgeset!(grid::Grid, name::String, set::Union{Set{EdgeIndex},Vector{EdgeIndex}}) = 
+addedgeset!(grid::Grid, name::String, set::Union{Set{EdgeIndex},Vector{EdgeIndex}}) =
     _addset!(grid, name, set, grid.edgesets)
-addvertexset!(grid::Grid, name::String, set::Union{Set{VertexIndex},Vector{VertexIndex}}) = 
+addvertexset!(grid::Grid, name::String, set::Union{Set{VertexIndex},Vector{VertexIndex}}) =
     _addset!(grid, name, set, grid.vertexsets)
 function _addset!(grid::AbstractGrid, name::String, _set, dict::Dict)
     _check_setname(dict, name)
@@ -614,11 +614,11 @@ function _addset!(grid::AbstractGrid, name::String, _set, dict::Dict)
     grid
 end
 
-addfaceset!(grid::AbstractGrid, name::String, f::Function; all::Bool=true) = 
+addfaceset!(grid::AbstractGrid, name::String, f::Function; all::Bool=true) =
     _addset!(grid, name, f, Ferrite.faces, grid.facesets, FaceIndex; all=all)
-addedgeset!(grid::AbstractGrid, name::String, f::Function; all::Bool=true) = 
+addedgeset!(grid::AbstractGrid, name::String, f::Function; all::Bool=true) =
     _addset!(grid, name, f, Ferrite.edges, grid.edgesets, EdgeIndex; all=all)
-addvertexset!(grid::AbstractGrid, name::String, f::Function; all::Bool=true) = 
+addvertexset!(grid::AbstractGrid, name::String, f::Function; all::Bool=true) =
     _addset!(grid, name, f, Ferrite.vertices, grid.vertexsets, VertexIndex; all=all)
 function _addset!(grid::AbstractGrid, name::String, f::Function, _ftype::Function, dict::Dict, _indextype::Type; all::Bool=true)
     _check_setname(dict, name)
@@ -640,9 +640,9 @@ end
 
 """
     addnodeset!(grid::AbstractGrid, name::String, nodeid::Union{Vector{Int},Set{Int}})
-    addnodeset!(grid::AbstractGrid, name::String, f::Function)    
+    addnodeset!(grid::AbstractGrid, name::String, f::Function)
 
-Adds a `nodeset::Dict{String, Set{Int}}` to the `grid` with key `name`. Has the same interface as `addcellset`. 
+Adds a `nodeset::Dict{String, Set{Int}}` to the `grid` with key `name`. Has the same interface as `addcellset`.
 However, instead of mapping a cell id to the `String` key, a set of node ids is returned.
 """
 function addnodeset!(grid::AbstractGrid, name::String, nodeid::Union{Vector{Int},Set{Int}})
@@ -751,12 +751,12 @@ boundaryfunction(::Type{EdgeIndex}) = Ferrite.edges
 boundaryfunction(::Type{VertexIndex}) = Ferrite.vertices
 
 for INDEX in (:VertexIndex, :EdgeIndex, :FaceIndex)
-    @eval begin  
+    @eval begin
         #Constructor
         ($INDEX)(a::Int, b::Int) = ($INDEX)((a,b))
 
         Base.getindex(I::($INDEX), i::Int) = I.idx[i]
-        
+
         #To be able to do a,b = faceidx
         Base.iterate(I::($INDEX), state::Int=1) = (state==3) ?  nothing : (I[state], state+1)
 

From fe76900c43803d84ffe4078d2041b93719dbbc8b Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 1 Jun 2022 03:03:08 +0200
Subject: [PATCH 004/124] Hotfix self-inclusion of vertex neighborhood.

---
 src/Grid/grid.jl | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index e5653919ec..53cf40ce21 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -371,7 +371,21 @@ function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, vertexidx::
     cell_vertices = vertices(getcells(grid,cellid))
     global_vertexid = cell_vertices[local_vertexid]
     if include_self
-        return [top.vertex_vertex_neighbor[global_vertexid].neighbor_info; vertexidx]
+        myself = Dict{Int,Int}()
+        result = copy(top.vertex_vertex_neighbor[global_vertexid].neighbor_info)
+        push!(result, vertexidx)
+        for neighbor ∈ top.vertex_vertex_neighbor[global_vertexid].neighbor_info
+            other_cellid = neighbor[1]
+            for (other_local_vertexid, other_vertex) ∈ enumerate(vertices(getcells(grid,other_cellid)))
+                if global_vertexid==other_vertex
+                    myself[other_cellid] = other_local_vertexid
+                end
+            end
+        end
+        for (other_cellid, other_local_vertexid) ∈ myself
+            push!(result, VertexIndex(other_cellid, other_local_vertexid))
+        end
+        return result
     else
         return top.vertex_vertex_neighbor[global_vertexid].neighbor_info
     end

From 30b3f177cba07de7a5094eabc37fc4cb7f8d9547 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 1 Jun 2022 04:18:47 +0200
Subject: [PATCH 005/124] Distributed dof assignment works for 2 processes. >2
 fails.

---
 docs/src/literate/distributed_assembly.jl | 198 ++++++++++++++++++++--
 src/Dofs/DofHandler.jl                    |  54 +++---
 src/Grid/DistributedGrid.jl               |  84 +++++----
 src/exports.jl                            |   2 +
 4 files changed, 277 insertions(+), 61 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index e0bcae50ed..f27699654f 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -38,22 +38,27 @@
 # First we load Ferrite, and some other packages we need
 using Ferrite, SparseArrays, MPI
 
+macro debug(ex)
+    return :($(esc(ex)))
+end
+
 # Launch MPI
 MPI.Init()
 
 # We start  generating a simple grid with 20x20 quadrilateral elements
 # using `generate_grid`. The generator defaults to the unit square,
 # so we don't need to specify the corners of the domain.
-grid = generate_grid(Quadrilateral, (4, 4));
+grid = generate_grid(Quadrilateral, (3, 3));
 
 dgrid = DistributedGrid(grid, MPI.COMM_WORLD)
 
+# TODO refactor this into a utility function
 vtk_grid("grid", dgrid; compress=false) do vtk
     u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
     for rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
         fill!(u, 0.0)
-        for sv ∈ dgrid.shared_vertices
-            if haskey(sv.remote_vertices, rank)
+        for sv ∈ values(dgrid.shared_vertices)
+            if haskey(sv.remote_vertices,rank)
                 (cellidx,i) = sv.local_idx
                 nodeidx = dgrid.local_grid.cells[cellidx].nodes[i]
                 u[nodeidx] = rank
@@ -63,12 +68,6 @@ vtk_grid("grid", dgrid; compress=false) do vtk
     end
 end
 
-# Shutdown MPI
-MPI.Finalize()
-
-# Early out for testing.
-exit(0)
-
 # ### Trial and test functions
 # A `CellValues` facilitates the process of evaluating values and gradients of
 # test and trial functions (among other things). Since the problem
@@ -89,10 +88,186 @@ cellvalues = CellScalarValues(qr, ip);
 # We create the `DofHandler` and then add a single field called `u`.
 # Lastly we `close!` the `DofHandler`, it is now that the dofs are distributed
 # for all the elements.
-dh = DistributedDofHandler(grid)
+dh = DofHandler(dgrid.local_grid)
 push!(dh, :u, 1)
 close!(dh);
 
+# We have to renumber the dofs to their global numbering.
+function adjust_numbering!(dh)
+    my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
+
+    local_to_global = Vector{Int}(undef,ndofs(dh))
+    fill!(local_to_global,0) # 0 is the invalid index!
+    # Start by numbering local dofs only from 1:#local_dofs
+
+    # # Lookup for synchronization in the form (Remote Rank,Shared Entity)
+    # # @TODO replace dict with vector and tie to MPI neighborhood graph of the mesh
+    # vertices_send = Dict{Int,Vector{Ferrite.SharedVertex}}()
+    # vertices_recv = Dict{Int,Vector{Ferrite.SharedVertex}}()
+    vertices_send = Dict{Int,Vector{VertexIndex}}()
+    n_vertices_recv = Dict{Int,Int}()
+
+    next_local_idx = 1
+    for (ci, cell) in enumerate(getcells(dh.grid))
+        @debug println("cell #$ci (R$my_rank)")
+        for fi in 1:Ferrite.nfields(dh)
+            @debug println("  field: $(dh.field_names[fi]) (R$my_rank)")
+            interpolation_info = Ferrite.InterpolationInfo(dh.field_interpolations[fi])
+            if interpolation_info.nvertexdofs > 0
+                for (vi,vertex) in enumerate(Ferrite.vertices(cell))
+                    @debug println("    vertex#$vertex (R$my_rank)")
+                    # Dof is owned if it is local or if my rank is the smallest in the neighborhood
+                    if !haskey(dgrid.shared_vertices,VertexIndex(ci,vi)) || all(keys(dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices) .> my_rank)
+                        # Update dof assignment
+                        dof_local_idx = dh.vertexdicts[fi][vertex]
+                        if local_to_global[dof_local_idx] == 0
+                            @debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
+                            local_to_global[dof_local_idx] = next_local_idx
+                            next_local_idx += 1
+                        else
+                            @debug println("      vertex dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
+                        end
+                    end
+
+                    # Update shared vertex lookup table
+                    if haskey(dgrid.shared_vertices,VertexIndex(ci,vi))
+                        for (remote_rank, svs) ∈ dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices
+                            if remote_rank > my_rank # I own the dof - we have to send information
+                                if !haskey(vertices_send,remote_rank)
+                                    # vertices_send[remote_rank] = Vector{Ferrite.SharedVertex}()
+                                    vertices_send[remote_rank] = Vector{Ferrite.VertexIndex}()
+                                end
+                                @debug println("      prepare sending vertex #$(VertexIndex(ci,vi)) to $remote_rank (R$my_rank)")
+                                push!(vertices_send[remote_rank],VertexIndex(ci,vi))
+                                # push!(vertices_send,Ferrite.SharedVertex)
+                            else # dof is owned by remote - we have to receive information
+                                if !haskey(n_vertices_recv,remote_rank)
+                                    # vertices_recv[remote_rank] = Vector{Ferrite.SharedVertex}()
+                                    n_vertices_recv[remote_rank] = 1
+                                else
+                                    n_vertices_recv[remote_rank] += 1
+                                end
+                                @debug println("      prepare receiving vertex #$(VertexIndex(ci,vi)) from $remote_rank (R$my_rank)")
+                                # push!(vertices_recv,svs)
+                            end
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    #
+    num_true_local_dofs = next_local_idx-1
+    @debug println("#true local dofs $num_true_local_dofs (R$my_rank)")
+
+    # @TODO optimize the following synchronization with MPI line graph topology 
+    # and allgather
+    # Set true local indices
+    local_offset = 0
+    if my_rank > 1
+        local_offset = MPI.Recv(Int, MPI.COMM_WORLD; source=my_rank-1-1)
+    end
+    if my_rank < MPI.Comm_size(MPI.COMM_WORLD)
+        MPI.Send(local_offset+num_true_local_dofs, MPI.COMM_WORLD; dest=my_rank+1-1)
+    end
+    @debug println("#shifted local dof range $(local_offset+1):$(local_offset+num_true_local_dofs) (R$my_rank)")
+
+    for i ∈ 1:length(local_to_global)
+        if local_to_global[i] != 0
+            local_to_global[i] += local_offset
+        end
+    end
+
+    # Sync remote dofs
+    for sending_rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
+        if my_rank == sending_rank
+            for remote_rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
+                if haskey(vertices_send, remote_rank)
+                    n_vertices = length(vertices_send[remote_rank])
+                    @debug println("Sending $n_vertices vertices to rank $remote_rank (R$my_rank)")
+                    remote_cells = Array{Int64}(undef,n_vertices)
+                    remote_cell_vis = Array{Int64}(undef,n_vertices)
+                    next_buffer_idx = 1
+                    for lvi ∈ vertices_send[remote_rank]
+                        sv = dgrid.shared_vertices[lvi]
+                        @assert haskey(sv.remote_vertices, remote_rank)
+                        for (cvi, llvi) ∈ sv.remote_vertices[remote_rank][1:1] # Just don't ask :)
+                            remote_cells[next_buffer_idx] = cvi
+                            remote_cell_vis[next_buffer_idx] = llvi 
+                            next_buffer_idx += 1
+                        end
+                    end
+                    MPI.Send(remote_cells, MPI.COMM_WORLD; dest=remote_rank-1)
+                    MPI.Send(remote_cell_vis, MPI.COMM_WORLD; dest=remote_rank-1)
+                    for fi ∈ 1:Ferrite.nfields(dh)
+                        next_buffer_idx = 1
+                        if length(dh.vertexdicts[fi]) == 0
+                            @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                            continue
+                        end
+                        # fill correspondence array
+                        corresponding_global_dofs = Array{Int64}(undef,n_vertices)
+                        for (lci,lclvi) ∈ vertices_send[remote_rank]
+                            vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
+                            if haskey(dh.vertexdicts[fi], vi)
+                                corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.vertexdicts[fi][vi]]
+                            end
+                            next_buffer_idx += 1
+                        end
+                        MPI.Send(corresponding_global_dofs, MPI.COMM_WORLD; dest=remote_rank-1)
+                    end
+                end
+            end
+        else
+            if haskey(n_vertices_recv, sending_rank)
+                n_vertices = n_vertices_recv[sending_rank]
+                @debug println("Receiving $n_vertices vertices from rank $sending_rank (R$my_rank)")
+                local_cells = Array{Int64}(undef,n_vertices)
+                local_cell_vis = Array{Int64}(undef,n_vertices)
+                MPI.Recv!(local_cells, MPI.COMM_WORLD; source=sending_rank-1)
+                MPI.Recv!(local_cell_vis, MPI.COMM_WORLD; source=sending_rank-1)
+                for fi in 1:Ferrite.nfields(dh)
+                    if length(dh.vertexdicts[fi]) == 0
+                        @debug println("  Skipping recv on field $(dh.field_names[fi]) (R$my_rank)")
+                        continue
+                    end
+                    corresponding_global_dofs = Array{Int64}(undef,n_vertices)
+                    MPI.Recv!(corresponding_global_dofs, MPI.COMM_WORLD; source=sending_rank-1)
+                    for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
+                        vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
+                        if haskey(dh.vertexdicts[fi], vi)
+                            local_to_global[dh.vertexdicts[fi][vi]] = corresponding_global_dofs[cdi]
+                            @debug println("  Updating field $(dh.field_names[fi]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                        else
+                            @debug println("  Skipping recv on field $(dh.field_names[fi]) vertex $vi (R$my_rank)")
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    # Postcondition: All local dofs need a corresponding global dof!
+    @assert findfirst(local_to_global .== 0) == nothing
+
+    vtk_grid("dofs", dgrid; compress=false) do vtk
+        u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
+        fill!(u, 0.0)
+        for i=1:length(u)
+            u[i] = local_to_global[dh.vertexdicts[1][i]]
+        end
+        vtk_point_data(vtk, u,"dof")
+    end
+end
+adjust_numbering!(dh)
+
+# Shutdown MPI
+MPI.Finalize()
+
+# Early out for testing.
+exit(0)
+
 # Now that we have distributed all our dofs we can create our tangent matrix,
 # using `create_sparsity_pattern`. This function returns a sparse matrix
 # with the correct elements stored.
@@ -200,7 +375,8 @@ K, f = doassemble(cellvalues, K, dh);
 # This modifies elements in `K` and `f` respectively, such that
 # we can get the correct solution vector `u` by using `\`.
 apply!(K, f, ch)
-u = cg(K, f);
+#u = PartitionedArray...
+cg!(u, K, f);
 
 # ### Exporting to VTK
 # To visualize the result we export the grid and our field `u`
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index f63ad26e4d..3133800f9f 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -21,11 +21,15 @@ struct DofHandler{dim,T,G<:AbstractGrid{dim}} <: AbstractDofHandler
     closed::ScalarWrapper{Bool}
     grid::G
     ndofs::ScalarWrapper{Int}
+
+    vertexdicts::Vector{Dict{Int,Int}}
+    edgedicts::Vector{Dict{Tuple{Int,Int},Tuple{Int,Bool}}}
+    facedicts::Vector{Dict{NTuple{dim,Int},Int}}
 end
 
-function DofHandler(grid::AbstractGrid)
+function DofHandler(grid::AbstractGrid{dim}) where {dim}
     isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. Use MixedDofHandler instead of DofHandler")
-    DofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1))
+    DofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[])
 end
 
 function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
@@ -55,6 +59,7 @@ getfieldnames(dh::AbstractDofHandler) = dh.field_names
 getfieldinterpolation(dh::AbstractDofHandler, field_idx::Int) = dh.field_interpolations[field_idx]
 getfielddim(dh::AbstractDofHandler, field_idx::Int) = dh.field_dims[field_idx]
 getbcvalue(dh::AbstractDofHandler, field_idx::Int) = dh.bc_values[field_idx]
+getgrid(dh::AbstractDofHandler) = dh.grid
 
 function find_field(dh::AbstractDofHandler, field_name::Symbol)
     j = findfirst(i->i == field_name, getfieldnames(dh))
@@ -148,8 +153,7 @@ function sortface(face::Tuple{Int,Int,Int,Int})
 end
 
 function close!(dh::DofHandler)
-    dh, _, _, _ = __close!(dh)
-    return dh
+    return __close!(dh)
 end
 
 # close the DofHandler and distribute all the dofs
@@ -158,20 +162,29 @@ function __close!(dh::DofHandler{dim}) where {dim}
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
     # number and the first dof we added to that vertex.
-    vertexdicts = [Dict{Int,Int}() for _ in 1:nfields(dh)]
+    resize!(dh.vertexdicts, nfields(dh))
+    for i in 1:nfields(dh)
+        dh.vertexdicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
+    end
 
     # `edgedict` keeps track of the visited edges, this will only be used for a 3D problem
     # An edge is determined from two vertices, but we also need to store the direction
     # of the first edge we encounter and add dofs too. When we encounter the same edge
     # the next time we check if the direction is the same, otherwise we reuse the dofs
     # in the reverse order
-    edgedicts = [Dict{Tuple{Int,Int},Tuple{Int,Bool}}() for _ in 1:nfields(dh)]
+    resize!(dh.edgedicts, nfields(dh))
+    for i in 1:nfields(dh)
+        dh.edgedicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
+    end
 
     # `facedict` keeps track of the visited faces. We only need to store the first dof we
     # added to the face; if we encounter the same face again we *always* reverse the order
     # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
     # face (i.e. a surface) is uniquely determined by 3 vertices.
-    facedicts = [Dict{NTuple{dim,Int},Int}() for _ in 1:nfields(dh)]
+    resize!(dh.facedicts, nfields(dh))
+    for i in 1:nfields(dh)
+        dh.facedicts[i] = Dict{NTuple{dim,Int},Int}()
+    end
 
     # celldofs are never shared between different cells so there is no need
     # for a `celldict` to keep track of which cells we have added dofs too.
@@ -200,16 +213,16 @@ function __close!(dh::DofHandler{dim}) where {dim}
             if interpolation_info.nvertexdofs > 0
                 for vertex in vertices(cell)
                     @debug println("    vertex#$vertex")
-                    token = Base.ht_keyindex2!(vertexdicts[fi], vertex)
-                    if token > 0 # haskey(vertexdicts[fi], vertex) # reuse dofs
-                        reuse_dof = vertexdicts[fi].vals[token] # vertexdicts[fi][vertex]
+                    token = Base.ht_keyindex2!(dh.vertexdicts[fi], vertex)
+                    if token > 0 # haskey(dh.vertexdicts[fi], vertex) # reuse dofs
+                        reuse_dof = dh.vertexdicts[fi].vals[token] # dh.vertexdicts[fi][vertex]
                         for d in 1:dh.field_dims[fi]
                             @debug println("      reusing dof #$(reuse_dof + (d-1))")
                             push!(dh.cell_dofs, reuse_dof + (d-1))
                         end
                     else # token <= 0, distribute new dofs
                         for vertexdof in 1:interpolation_info.nvertexdofs
-                            Base._setindex!(vertexdicts[fi], nextdof, vertex, -token) # vertexdicts[fi][vertex] = nextdof
+                            Base._setindex!(dh.vertexdicts[fi], nextdof, vertex, -token) # dh.vertexdicts[fi][vertex] = nextdof
                             for d in 1:dh.field_dims[fi]
                                 @debug println("      adding dof#$nextdof")
                                 push!(dh.cell_dofs, nextdof)
@@ -224,9 +237,9 @@ function __close!(dh::DofHandler{dim}) where {dim}
                     for edge in edges(cell)
                         sedge, dir = sortedge(edge)
                         @debug println("    edge#$sedge dir: $(dir)")
-                        token = Base.ht_keyindex2!(edgedicts[fi], sedge)
-                        if token > 0 # haskey(edgedicts[fi], sedge), reuse dofs
-                            startdof, olddir = edgedicts[fi].vals[token] # edgedicts[fi][sedge] # first dof for this edge (if dir == true)
+                        token = Base.ht_keyindex2!(dh.edgedicts[fi], sedge)
+                        if token > 0 # haskey(dh.edgedicts[fi], sedge), reuse dofs
+                            startdof, olddir = dh.edgedicts[fi].vals[token] # dh.edgedicts[fi][sedge] # first dof for this edge (if dir == true)
                             for edgedof in (dir == olddir ? (1:interpolation_info.nedgedofs) : (interpolation_info.nedgedofs:-1:1))
                                 for d in 1:dh.field_dims[fi]
                                     reuse_dof = startdof + (d-1) + (edgedof-1)*dh.field_dims[fi]
@@ -235,7 +248,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
                                 end
                             end
                         else # token <= 0, distribute new dofs
-                            Base._setindex!(edgedicts[fi], (nextdof, dir), sedge, -token) # edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
+                            Base._setindex!(dh.edgedicts[fi], (nextdof, dir), sedge, -token) # dh.edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
                             for edgedof in 1:interpolation_info.nedgedofs
                                 for d in 1:dh.field_dims[fi]
                                     @debug println("      adding dof#$nextdof")
@@ -251,9 +264,9 @@ function __close!(dh::DofHandler{dim}) where {dim}
                 for face in faces(cell)
                     sface = sortface(face) # TODO: faces(cell) may as well just return the sorted list
                     @debug println("    face#$sface")
-                    token = Base.ht_keyindex2!(facedicts[fi], sface)
-                    if token > 0 # haskey(facedicts[fi], sface), reuse dofs
-                        startdof = facedicts[fi].vals[token] # facedicts[fi][sface]
+                    token = Base.ht_keyindex2!(dh.facedicts[fi], sface)
+                    if token > 0 # haskey(dh.facedicts[fi], sface), reuse dofs
+                        startdof = dh.facedicts[fi].vals[token] # dh.facedicts[fi][sface]
                         for facedof in interpolation_info.nfacedofs:-1:1 # always reverse (YOLO)
                             for d in 1:dh.field_dims[fi]
                                 reuse_dof = startdof + (d-1) + (facedof-1)*dh.field_dims[fi]
@@ -262,7 +275,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
                             end
                         end
                     else # distribute new dofs
-                        Base._setindex!(facedicts[fi], nextdof, sface, -token)# facedicts[fi][sface] = nextdof,  store the first dof for this face
+                        Base._setindex!(dh.facedicts[fi], nextdof, sface, -token)# dh.facedicts[fi][sface] = nextdof,  store the first dof for this face
                         for facedof in 1:interpolation_info.nfacedofs
                             for d in 1:dh.field_dims[fi]
                                 @debug println("      adding dof#$nextdof")
@@ -290,8 +303,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
     dh.ndofs[] = maximum(dh.cell_dofs)
     dh.closed[] = true
 
-    return dh, vertexdicts, edgedicts, facedicts
-
+    return dh
 end
 
 function celldofs!(global_dofs::Vector{Int}, dh::DofHandler, i::Int)
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 80de9e1351..92186b85b4 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -5,6 +5,9 @@ using MPI
 """
 abstract type AbstractDistributedGrid{sdim} <: AbstractGrid{sdim} end
 
+# TODO the following three structs can be merged to one struct with type parameter.
+"""
+"""
 struct SharedVertex
     local_idx::VertexIndex
     remote_vertices::Dict{Int,Vector{VertexIndex}}
@@ -14,7 +17,7 @@ end
 """
 struct SharedFace
     local_idx::FaceIndex
-    remote_edges::Dict{Int,Vector{FaceIndex}}
+    remote_faces::Dict{Int,Vector{FaceIndex}}
 end
 
 """
@@ -35,21 +38,26 @@ mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistribut
     # Here we store the full local grid
     local_grid::Grid{dim,C,T}
     # Local copies of the shared entities of the form (local index, (process id in grid_comm, remote index))
-    shared_vertices::Vector{SharedVertex}
-    shared_edges::Vector{SharedEdge}
-    shared_faces::Vector{SharedFace}
+    # The entities consistently contain their *Index, because faces and edges are not materialized. 
+    shared_vertices::Dict{VertexIndex,SharedVertex}
+    shared_edges::Dict{EdgeIndex,SharedEdge}
+    shared_faces::Dict{FaceIndex,SharedFace}
 end
 
 """
 """
-function DistributedGrid(grid_to_distribute::Grid, grid_comm::MPI.Comm; partition_alg = :RECURSIVE)
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
     grid_topology = ExclusiveTopology(grid_to_distribute)
     return DistributedGrid(grid_to_distribute, grid_topology, grid_comm; partition_alg=partition_alg)
 end
 
-function create_partitioning(grid::Grid, grid_topology::ExclusiveTopology, n_partitions, partition_alg)
+function create_partitioning(grid::Grid{dim,C,T}, grid_topology::ExclusiveTopology, n_partitions, partition_alg) where {dim,C,T}
     N = getncells(grid)
     @assert N > 0
+    
+    if n_partitions == 1
+        return ones(N)
+    end
 
     # Set up the element connectivity graph
     xadj = Vector{Metis.idx_t}(undef, N+1)
@@ -123,12 +131,14 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
 
     # 6. Extract sets
     # @TODO deduplicate the code. We should be able to merge each of these into a macro or function.
-    global_to_local_cell_map = Dict{Int,Int}()
-    begin
+    # We build this map now, so we avoid the communication later.
+    global_to_local_cell_map = Dict{Int,Dict{Int,Int}}()
+    for rank ∈ 1:MPI.Comm_size(grid_comm)
+        global_to_local_cell_map[rank] = Dict{Int,Int}()
         next_local_cell_idx = 1
         for global_cell_idx ∈ 1:N
-            if parts[global_cell_idx] == (my_rank)
-                global_to_local_cell_map[global_cell_idx] = next_local_cell_idx
+            if parts[global_cell_idx] == rank
+                global_to_local_cell_map[rank][global_cell_idx] = next_local_cell_idx
                 next_local_cell_idx += 1
             end
         end
@@ -138,8 +148,8 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     for key ∈ keys(grid_to_distribute.cellsets)
         cellsets[key] = Set{Int}() # create empty set, so it does not crash during assembly
         for global_cell_idx ∈ grid_to_distribute.cellsets[key]
-            if haskey(global_to_local_cell_map, global_cell_idx)
-                push!(cellsets[key], global_to_local_cell_map[global_cell_idx])
+            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
+                push!(cellsets[key], global_to_local_cell_map[my_rank][global_cell_idx])
             end
         end
     end
@@ -158,8 +168,8 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     for key ∈ keys(grid_to_distribute.facesets)
         facesets[key] = Set{FaceIndex}() # create empty set, so it does not crash during assembly
         for (global_cell_idx, i) ∈ grid_to_distribute.facesets[key]
-            if haskey(global_to_local_cell_map, global_cell_idx)
-                push!(facesets[key], FaceIndex(global_to_local_cell_map[global_cell_idx], i))
+            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
+                push!(facesets[key], FaceIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
             end
         end
     end
@@ -168,8 +178,8 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     for key ∈ keys(grid_to_distribute.edgesets)
         edgesets[key] = Set{EdgeIndex}() # create empty set, so it does not crash during assembly
         for (global_cell_idx, i) ∈ grid_to_distribute.edgesets[key]
-            if haskey(global_to_local_cell_map, global_cell_idx)
-                push!(edgesets[key], EdgeIndex(global_to_local_cell_map[global_cell_idx], i))
+            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
+                push!(edgesets[key], EdgeIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
             end
         end
     end
@@ -178,8 +188,8 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     for key ∈ keys(grid_to_distribute.vertexsets)
         vertexsets[key] = Set{VertexIndex}() # create empty set, so it does not crash during assembly
         for (global_cell_idx, i) ∈ grid_to_distribute.vertexsets[key]
-            if haskey(global_to_local_cell_map, global_cell_idx)
-                push!(vertexsets[key], VertexIndex(global_to_local_cell_map[global_cell_idx], i))
+            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
+                push!(vertexsets[key], VertexIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
             end
         end
     end
@@ -194,28 +204,33 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
         vertexsets=vertexsets
     )
 
-    shared_vertices = Vector{SharedVertex}()
-    shared_edges = Vector{SharedEdge}()
-    shared_faces = Vector{SharedFace}()
+    shared_vertices = Dict{VertexIndex,SharedVertex}()
+    shared_edges = Dict{EdgeIndex,SharedEdge}()
+    shared_faces = Dict{FaceIndex,SharedFace}()
     for (global_cell_idx,global_cell) ∈ enumerate(getcells(grid_to_distribute))
         if parts[global_cell_idx] == my_rank
             # Vertex
-            for (i, global_node_idx) ∈ enumerate(vertices(global_cell))
+            for (i, _) ∈ enumerate(vertices(global_cell))
                 cell_vertex = VertexIndex(global_cell_idx, i)
                 remote_vertices = Dict{Int,Vector{VertexIndex}}()
-                for (global_cell_neighbor_idx, j) ∈ getneighborhood(grid_topology, grid_to_distribute, cell_vertex)
+                for (global_cell_neighbor_idx, j) ∈ getneighborhood(grid_topology, grid_to_distribute, cell_vertex, true)
                     other_rank = parts[global_cell_neighbor_idx]
                     if other_rank != my_rank
-                        # Todo remote local cell id
-                        if !haskey(remote_vertices,other_rank)
-                            remote_vertices[other_rank] = Vector(undef,0)
+                        n1 = vertices(getcells(grid_to_distribute,global_cell_idx))[i]
+                        n2 = vertices(getcells(grid_to_distribute,global_cell_neighbor_idx))[j]
+                        if n1 == n2
+                            if !haskey(remote_vertices,other_rank)
+                                remote_vertices[other_rank] = Vector(undef,0)
+                            end
+                            @debug println("Detected shared vertex $cell_vertex neighbor $(VertexIndex(global_cell_neighbor_idx,j)) (R$my_rank)")
+                            push!(remote_vertices[other_rank], VertexIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
                         end
-                        push!(remote_vertices[other_rank], VertexIndex(global_cell_neighbor_idx, j))
                     end
                 end
 
                 if length(remote_vertices) > 0
-                    push!(shared_vertices, SharedVertex(VertexIndex(global_to_local_cell_map[global_cell_idx], i), remote_vertices))
+                    idx = VertexIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
+                    shared_vertices[idx] = SharedVertex(idx, remote_vertices)
                 end
             end
 
@@ -246,4 +261,15 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     end
 
     return DistributedGrid(grid_comm,grid_comm,local_grid,shared_vertices,shared_edges,shared_faces)
-end
\ No newline at end of file
+end
+
+@inline getlocalgrid(dgrid::AbstractDistributedGrid) = dgrid.local_grid
+
+@inline getcells(dgrid::AbstractDistributedGrid) = getcells(getlocalgrid(grid))
+@inline getcells(dgrid::AbstractDistributedGrid, v::Union{Int, Vector{Int}}) = getcells(getlocalgrid(grid),v)
+@inline getcells(dgrid::AbstractDistributedGrid, setname::String) = getcells(getlocalgrid(grid),setname)
+"Returns the number of cells in the `<:AbstractDistributedGrid`."
+@inline getncells(dgrid::AbstractDistributedGrid) = length(getcells(getlocalgrid(dgrid)))
+"Returns the celltype of the `<:AbstractDistributedGrid`."
+@inline getcelltype(dgrid::AbstractDistributedGrid) = eltype(getcells(getlocalgrid(dgrid)))
+@inline getcelltype(dgrid::AbstractDistributedGrid, i::Int) = typeof(getcells(getlocalgrid(dgrid),i))
diff --git a/src/exports.jl b/src/exports.jl
index 91e9fd90e3..7a8577b988 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -64,6 +64,8 @@ export
     getneighborhood,
     faceskeleton,
     getcells,
+    getgrid,
+    getlocalgrid,
     getncells,
     getnodes,
     getnnodes,

From bd19f31673e7de0a2644fbd3b92d99290cbdc6bc Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 1 Jun 2022 05:01:27 +0200
Subject: [PATCH 006/124] Fix dof assignment for >2 with a massive amount of
 redundant communication.

---
 docs/src/literate/distributed_assembly.jl | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index f27699654f..26f6248372 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -48,7 +48,7 @@ MPI.Init()
 # We start  generating a simple grid with 20x20 quadrilateral elements
 # using `generate_grid`. The generator defaults to the unit square,
 # so we don't need to specify the corners of the domain.
-grid = generate_grid(Quadrilateral, (3, 3));
+grid = generate_grid(Quadrilateral, (20, 20));
 
 dgrid = DistributedGrid(grid, MPI.COMM_WORLD)
 
@@ -131,21 +131,26 @@ function adjust_numbering!(dh)
 
                     # Update shared vertex lookup table
                     if haskey(dgrid.shared_vertices,VertexIndex(ci,vi))
+                        master_rank = my_rank
+                        for master_rank_new ∈ keys(dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices)
+                            master_rank = min(master_rank, master_rank_new)
+                        end
                         for (remote_rank, svs) ∈ dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices
-                            if remote_rank > my_rank # I own the dof - we have to send information
+                            if master_rank == my_rank # I own the dof - we have to send information
                                 if !haskey(vertices_send,remote_rank)
                                     # vertices_send[remote_rank] = Vector{Ferrite.SharedVertex}()
                                     vertices_send[remote_rank] = Vector{Ferrite.VertexIndex}()
                                 end
                                 @debug println("      prepare sending vertex #$(VertexIndex(ci,vi)) to $remote_rank (R$my_rank)")
-                                push!(vertices_send[remote_rank],VertexIndex(ci,vi))
-                                # push!(vertices_send,Ferrite.SharedVertex)
-                            else # dof is owned by remote - we have to receive information
+                                for i ∈ svs
+                                    push!(vertices_send[remote_rank],VertexIndex(ci,vi))
+                                end
+                            elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
                                 if !haskey(n_vertices_recv,remote_rank)
                                     # vertices_recv[remote_rank] = Vector{Ferrite.SharedVertex}()
-                                    n_vertices_recv[remote_rank] = 1
+                                    n_vertices_recv[remote_rank] = length(svs)
                                 else
-                                    n_vertices_recv[remote_rank] += 1
+                                    n_vertices_recv[remote_rank] += length(svs)
                                 end
                                 @debug println("      prepare receiving vertex #$(VertexIndex(ci,vi)) from $remote_rank (R$my_rank)")
                                 # push!(vertices_recv,svs)

From 7d7b9a97a7a599a77e12baff70ca87cc39d9f826 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Thu, 2 Jun 2022 15:24:48 +0200
Subject: [PATCH 007/124] Distributed assembly loop sketched - does not work
 because neighborhood is wrong.

---
 docs/src/literate/distributed_assembly.jl | 110 ++++++++++++++++++----
 1 file changed, 91 insertions(+), 19 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 26f6248372..3a50dd4434 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -36,7 +36,7 @@
 #md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
 #
 # First we load Ferrite, and some other packages we need
-using Ferrite, SparseArrays, MPI
+using Ferrite, SparseArrays, MPI, PartitionedArrays
 
 macro debug(ex)
     return :($(esc(ex)))
@@ -48,7 +48,7 @@ MPI.Init()
 # We start  generating a simple grid with 20x20 quadrilateral elements
 # using `generate_grid`. The generator defaults to the unit square,
 # so we don't need to specify the corners of the domain.
-grid = generate_grid(Quadrilateral, (20, 20));
+grid = generate_grid(Quadrilateral, (2, 2));
 
 dgrid = DistributedGrid(grid, MPI.COMM_WORLD)
 
@@ -93,7 +93,7 @@ push!(dh, :u, 1)
 close!(dh);
 
 # We have to renumber the dofs to their global numbering.
-function adjust_numbering!(dh)
+function local_to_global_numbering(dh, dgrid)
     my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
 
     local_to_global = Vector{Int}(undef,ndofs(dh))
@@ -102,8 +102,6 @@ function adjust_numbering!(dh)
 
     # # Lookup for synchronization in the form (Remote Rank,Shared Entity)
     # # @TODO replace dict with vector and tie to MPI neighborhood graph of the mesh
-    # vertices_send = Dict{Int,Vector{Ferrite.SharedVertex}}()
-    # vertices_recv = Dict{Int,Vector{Ferrite.SharedVertex}}()
     vertices_send = Dict{Int,Vector{VertexIndex}}()
     n_vertices_recv = Dict{Int,Int}()
 
@@ -138,7 +136,6 @@ function adjust_numbering!(dh)
                         for (remote_rank, svs) ∈ dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices
                             if master_rank == my_rank # I own the dof - we have to send information
                                 if !haskey(vertices_send,remote_rank)
-                                    # vertices_send[remote_rank] = Vector{Ferrite.SharedVertex}()
                                     vertices_send[remote_rank] = Vector{Ferrite.VertexIndex}()
                                 end
                                 @debug println("      prepare sending vertex #$(VertexIndex(ci,vi)) to $remote_rank (R$my_rank)")
@@ -147,13 +144,11 @@ function adjust_numbering!(dh)
                                 end
                             elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
                                 if !haskey(n_vertices_recv,remote_rank)
-                                    # vertices_recv[remote_rank] = Vector{Ferrite.SharedVertex}()
                                     n_vertices_recv[remote_rank] = length(svs)
                                 else
                                     n_vertices_recv[remote_rank] += length(svs)
                                 end
                                 @debug println("      prepare receiving vertex #$(VertexIndex(ci,vi)) from $remote_rank (R$my_rank)")
-                                # push!(vertices_recv,svs)
                             end
                         end
                     end
@@ -264,19 +259,42 @@ function adjust_numbering!(dh)
         end
         vtk_point_data(vtk, u,"dof")
     end
+
+    return local_to_global
 end
-adjust_numbering!(dh)
+local_to_global = local_to_global_numbering(dh, dgrid);
 
-# Shutdown MPI
-MPI.Finalize()
+function compute_dof_ownership(dh, dgrid)
+    my_rank = MPI.Comm_rank(dgrid.grid_comm)+1
 
-# Early out for testing.
-exit(0)
+    dof_owner = Vector{Int}(undef,ndofs(dh))
+    fill!(dof_owner, my_rank)
+
+    for ((lci, lclvi),sv) ∈ dgrid.shared_vertices
+        owner_rank = minimum([collect(keys(sv.remote_vertices));my_rank])
+
+        if owner_rank != my_rank
+            for fi in 1:Ferrite.nfields(dh)
+                vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
+                if haskey(dh.vertexdicts[fi], vi)
+                    local_dof_idx = dh.vertexdicts[fi][vi]
+                    dof_owner[local_dof_idx] = owner_rank
+                end
+            end
+        end
+    end
+
+    return dof_owner
+end
+dof_owner = compute_dof_ownership(dh, dgrid);
+
+nltdofs = sum(dof_owner.==(MPI.Comm_rank(MPI.COMM_WORLD)+1))
+ndofs_total = MPI.Allreduce(nltdofs, MPI.SUM, MPI.COMM_WORLD)
 
 # Now that we have distributed all our dofs we can create our tangent matrix,
 # using `create_sparsity_pattern`. This function returns a sparse matrix
 # with the correct elements stored.
-K = create_sparsity_pattern(dh)
+#K = create_sparsity_pattern(dh)
 
 # ### Boundary conditions
 # In Ferrite constraints like Dirichlet boundary conditions
@@ -309,7 +327,7 @@ update!(ch, 0.0);
 # We define a function, `doassemble` to do the assembly, which takes our `cellvalues`,
 # the sparse matrix and our DofHandler as input arguments. The function returns the
 # assembled stiffness matrix, and the force vector.
-function doassemble(cellvalues::CellScalarValues{dim}, K::SparseMatrixCSC, dh::DofHandler) where {dim}
+function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_gdof, ldof_to_part, ngdofs) where {dim}
     # We allocate the element stiffness matrix and element force vector
     # just once before looping over all the cells instead of allocating
     # them every time in the loop.
@@ -318,19 +336,55 @@ function doassemble(cellvalues::CellScalarValues{dim}, K::SparseMatrixCSC, dh::D
     Ke = zeros(n_basefuncs, n_basefuncs)
     fe = zeros(n_basefuncs)
 
+    # I have no idea why we have to convert the types 5000 times like this........ look todo below.
+    comm = MPI.COMM_WORLD
+    np = MPI.Comm_size(comm)
+    my_rank = MPI.Comm_rank(comm)+1
+
+    @debug println("starting assembly... (R$my_rank)")
+
+    # Neighborhood - self
+    neighbors_unique = unique(ldof_to_part)
+    neighbors = MPIData(Int32.(neighbors_unique[neighbors_unique.!=my_rank]), comm, (np,))
+
+    @debug println("neighbors $neighbors (R$my_rank)")
+
+    # Extract locally owned dofs
+    ltdof_indices = ldof_to_part.==my_rank
+    ltdof_to_gdof = ldof_to_gdof[ltdof_indices]
+
+    @debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
+
+    # Process owns rows
+    row_indices = PartitionedArrays.IndexSet(my_rank, ltdof_to_gdof, repeat(Int32[my_rank], sum(ltdof_indices)))
+    row_data = MPIData(row_indices, comm, (np,))
+    row_exchanger = Exchanger(row_data,neighbors)
+    rows = PRange(ngdofs,row_data,row_exchanger)
+
+    # And shares some cols
+    col_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_part))
+    col_data = MPIData(col_indices, comm, (np,))
+    col_exchanger = Exchanger(col_data,neighbors)
+    cols = PRange(ngdofs,col_data,col_exchanger)
+
     # Next we define the global force vector `f` and use that and
     # the stiffness matrix `K` and create an assembler. The assembler
     # is just a thin wrapper around `f` and `K` and some extra storage
     # to make the assembling faster.
     #+
-    f = zeros(ndofs(dh))
-    assembler = start_assemble(K, f)
+    @debug println("cols and rows constructed (R$my_rank)")
+    f = PartitionedArrays.PVector(0.0,cols)
+    @debug println("f constructed (R$my_rank)")
+    assembler = start_assemble()
+    @debug println("starting assembly (R$my_rank)")
 
     # It is now time to loop over all the cells in our grid. We do this by iterating
     # over a `CellIterator`. The iterator caches some useful things for us, for example
     # the nodal coordinates for the cell, and the local degrees of freedom.
     #+
     for cell in CellIterator(dh)
+        @debug println("assembling cell #$(cell.current_cellid.x) (R$my_rank)")
+
         # Always remember to reset the element stiffness matrix and
         # force vector since we reuse them for all elements.
         #+
@@ -346,6 +400,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, K::SparseMatrixCSC, dh::D
         # can be queried from `cellvalues` by `getdetJdV`.
         #+
         for q_point in 1:getnquadpoints(cellvalues)
+            @debug println("assembling qp $q_point (R$my_rank)")
             dΩ = getdetJdV(cellvalues, q_point)
             # For each quadrature point we loop over all the (local) shape functions.
             # We need the value and gradient of the testfunction `v` and also the gradient
@@ -365,8 +420,19 @@ function doassemble(cellvalues::CellScalarValues{dim}, K::SparseMatrixCSC, dh::D
         # The last step in the element loop is to assemble `Ke` and `fe`
         # into the global `K` and `f` with `assemble!`.
         #+
-        assemble!(assembler, celldofs(cell), fe, Ke)
+        @debug println("assembling cell finished local (R$my_rank)")
+        Ferrite.assemble!(assembler, celldofs(cell), Ke)
+        @debug println("assembling cell finished global (R$my_rank)")
+        #Ferrite.assemble!(f, celldofs(cell), fe)
     end
+
+    @debug println("done assembling (R$my_rank)")
+
+    I_ = MPIData(assembler.I, comm, (np,))
+    J_ = MPIData(assembler.J, comm, (np,))
+    V_ = MPIData(assembler.V, comm, (np,))
+    # println(dof_partition_prange)
+    K = PartitionedArrays.PSparseMatrix(I_, J_, V_, rows, cols, ids=:local)
     return K, f
 end
 #md nothing # hide
@@ -374,7 +440,13 @@ end
 # ### Solution of the system
 # The last step is to solve the system. First we call `doassemble`
 # to obtain the global stiffness matrix `K` and force vector `f`.
-K, f = doassemble(cellvalues, K, dh);
+K, f = doassemble(cellvalues, dh, local_to_global, dof_owner, ndofs_total);
+
+# Shutdown MPI
+MPI.Finalize()
+
+# Early out for testing.
+exit(0)
 
 # To account for the boundary conditions we use the `apply!` function.
 # This modifies elements in `K` and `f` respectively, such that

From 3c34b162d6592f2b7d63bc7e2d5bf487ee63a701 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 19 Jul 2022 15:31:20 +0200
Subject: [PATCH 008/124] Fix process neighborhood construction in assembly.

---
 docs/src/literate/distributed_assembly.jl | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 3a50dd4434..058bcc2028 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -53,7 +53,7 @@ grid = generate_grid(Quadrilateral, (2, 2));
 dgrid = DistributedGrid(grid, MPI.COMM_WORLD)
 
 # TODO refactor this into a utility function
-vtk_grid("grid", dgrid; compress=false) do vtk
+@debug vtk_grid("grid", dgrid; compress=false) do vtk
     u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
     for rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
         fill!(u, 0.0)
@@ -251,7 +251,7 @@ function local_to_global_numbering(dh, dgrid)
     # Postcondition: All local dofs need a corresponding global dof!
     @assert findfirst(local_to_global .== 0) == nothing
 
-    vtk_grid("dofs", dgrid; compress=false) do vtk
+    @debug vtk_grid("dofs", dgrid; compress=false) do vtk
         u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
         fill!(u, 0.0)
         for i=1:length(u)
@@ -327,7 +327,7 @@ update!(ch, 0.0);
 # We define a function, `doassemble` to do the assembly, which takes our `cellvalues`,
 # the sparse matrix and our DofHandler as input arguments. The function returns the
 # assembled stiffness matrix, and the force vector.
-function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_gdof, ldof_to_part, ngdofs) where {dim}
+function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_gdof, ldof_to_part, ngdofs, dgrid) where {dim}
     # We allocate the element stiffness matrix and element force vector
     # just once before looping over all the cells instead of allocating
     # them every time in the loop.
@@ -344,8 +344,13 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     @debug println("starting assembly... (R$my_rank)")
 
     # Neighborhood - self
-    neighbors_unique = unique(ldof_to_part)
-    neighbors = MPIData(Int32.(neighbors_unique[neighbors_unique.!=my_rank]), comm, (np,))
+    neighbors_set = Set()
+    for (vi, sv) ∈ dgrid.shared_vertices
+        for (rank, vvi) ∈ sv.remote_vertices
+            push!(neighbors_set, rank)
+        end
+    end
+    neighbors = MPIData(Int32.(neighbors_set), comm, (np,))
 
     @debug println("neighbors $neighbors (R$my_rank)")
 
@@ -354,6 +359,8 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     ltdof_to_gdof = ldof_to_gdof[ltdof_indices]
 
     @debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
+    @debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
+    @debug println("ldof_to_part $ldof_to_part (R$my_rank)")
 
     # Process owns rows
     row_indices = PartitionedArrays.IndexSet(my_rank, ltdof_to_gdof, repeat(Int32[my_rank], sum(ltdof_indices)))
@@ -361,6 +368,8 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     row_exchanger = Exchanger(row_data,neighbors)
     rows = PRange(ngdofs,row_data,row_exchanger)
 
+    @debug println("rows done (R$my_rank)")
+
     # And shares some cols
     col_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_part))
     col_data = MPIData(col_indices, comm, (np,))
@@ -440,7 +449,7 @@ end
 # ### Solution of the system
 # The last step is to solve the system. First we call `doassemble`
 # to obtain the global stiffness matrix `K` and force vector `f`.
-K, f = doassemble(cellvalues, dh, local_to_global, dof_owner, ndofs_total);
+K, f = doassemble(cellvalues, dh, local_to_global, dof_owner, ndofs_total, dgrid);
 
 # Shutdown MPI
 MPI.Finalize()

From c45c3c9bdb000a1d724d7b00d6a3a5dbb2608db8 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 19 Jul 2022 16:12:10 +0200
Subject: [PATCH 009/124] Manually fixed distributed assembly for the concrete
 example on two processes.

---
 docs/src/literate/distributed_assembly.jl | 44 +++++++++++++++++++----
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 058bcc2028..74da6ea2b5 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -327,7 +327,7 @@ update!(ch, 0.0);
 # We define a function, `doassemble` to do the assembly, which takes our `cellvalues`,
 # the sparse matrix and our DofHandler as input arguments. The function returns the
 # assembled stiffness matrix, and the force vector.
-function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_gdof, ldof_to_part, ngdofs, dgrid) where {dim}
+function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_gdof, ldof_to_rank, ngdofs, dgrid) where {dim}
     # We allocate the element stiffness matrix and element force vector
     # just once before looping over all the cells instead of allocating
     # them every time in the loop.
@@ -355,23 +355,34 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     @debug println("neighbors $neighbors (R$my_rank)")
 
     # Extract locally owned dofs
-    ltdof_indices = ldof_to_part.==my_rank
+    ltdof_indices = ldof_to_rank.==my_rank
     ltdof_to_gdof = ldof_to_gdof[ltdof_indices]
 
     @debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
     @debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
-    @debug println("ldof_to_part $ldof_to_part (R$my_rank)")
+    @debug println("ldof_to_rank $ldof_to_rank (R$my_rank)")
 
     # Process owns rows
-    row_indices = PartitionedArrays.IndexSet(my_rank, ltdof_to_gdof, repeat(Int32[my_rank], sum(ltdof_indices)))
+    row_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_rank))
     row_data = MPIData(row_indices, comm, (np,))
     row_exchanger = Exchanger(row_data,neighbors)
     rows = PRange(ngdofs,row_data,row_exchanger)
 
     @debug println("rows done (R$my_rank)")
 
-    # And shares some cols
-    col_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_part))
+    # And all corresponding non-zero rows
+    #all_local_rows = copy(ldof_to_gdof)
+    #all_local_row_ranks = ...
+    #TODO optimize with graph topology
+    #for neighbor ∈
+    if my_rank == 1 
+        all_local_cols = collect(1:9)
+        all_local_col_ranks = [1,1,1,1,1,1,2,2,2]
+    else
+        all_local_cols = copy(ldof_to_gdof)
+        all_local_col_ranks = copy(ldof_to_rank)
+    end
+    col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, Int32.(all_local_col_ranks))
     col_data = MPIData(col_indices, comm, (np,))
     col_exchanger = Exchanger(col_data,neighbors)
     cols = PRange(ngdofs,col_data,col_exchanger)
@@ -437,10 +448,29 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
 
     @debug println("done assembling (R$my_rank)")
 
+    # Fix ghost layer - the locations for remote processes to write their data into
+    if my_rank == 1
+        # ltdofs
+        append!(assembler.I, [3,3,3,4,4,6,6])
+        append!(assembler.J, [7,8,9,7,8,7,9])
+        append!(assembler.V, zeros(7))
+        # # gdofs?
+        # # append!(assembler.I, [7,7,7,7,7,7, 8,8,8,8, 9,9,9,9])
+        # # append!(assembler.J, [3,4,6,7,6,9, 3,4,7,8, 3,6,7,9])
+        # # append!(assembler.V, zeros(6+4+4))
+        # append!(assembler.I, [7,7,7,7, 8,8, 9,9])
+        # append!(assembler.J, [3,4,6,6, 3,4, 3,6])
+        # append!(assembler.V, zeros(4+2+2))
+    else
+        # append!(assembler.I, [1,1,2,2,2,5,5])
+        # append!(assembler.J, [7,8,9,7,8,8,9])
+        # append!(assembler.V, zeros(7))
+    end
     I_ = MPIData(assembler.I, comm, (np,))
     J_ = MPIData(assembler.J, comm, (np,))
     V_ = MPIData(assembler.V, comm, (np,))
-    # println(dof_partition_prange)
+    @debug println("I=$(assembler.I) (R$my_rank)")
+    @debug println("J=$(assembler.J) (R$my_rank)")
     K = PartitionedArrays.PSparseMatrix(I_, J_, V_, rows, cols, ids=:local)
     return K, f
 end

From bea2003ec12a433f3bc3c5235c69ec6417cf8d21 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 19 Jul 2022 17:27:47 +0200
Subject: [PATCH 010/124] Small refactor to see where exactly algorithms are
 necessary and their input/output.

---
 docs/src/literate/distributed_assembly.jl | 35 +++++++++--------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 74da6ea2b5..1d0bf62e1b 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -371,18 +371,19 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     @debug println("rows done (R$my_rank)")
 
     # And all corresponding non-zero rows
-    #all_local_rows = copy(ldof_to_gdof)
-    #all_local_row_ranks = ...
-    #TODO optimize with graph topology
-    #for neighbor ∈
-    if my_rank == 1 
-        all_local_cols = collect(1:9)
-        all_local_col_ranks = [1,1,1,1,1,1,2,2,2]
-    else
-        all_local_cols = copy(ldof_to_gdof)
-        all_local_col_ranks = copy(ldof_to_rank)
+    ghost_dof_to_global = Int[]
+    ghost_dof_rank = Int32[]
+    #TODO obtain ghosts algorithmic
+    if my_rank == 1
+        append!(ghost_dof_to_global, collect(7:9))
+        append!(ghost_dof_rank, [2,2,2])
     end
-    col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, Int32.(all_local_col_ranks))
+    all_local_cols = Int[ldof_to_gdof; ghost_dof_to_global]
+    all_local_col_ranks = Int32[ldof_to_rank; ghost_dof_rank]
+    @debug println("all_local_cols $all_local_cols (R$my_rank)")
+    @debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
+
+    col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, all_local_col_ranks)
     col_data = MPIData(col_indices, comm, (np,))
     col_exchanger = Exchanger(col_data,neighbors)
     cols = PRange(ngdofs,col_data,col_exchanger)
@@ -449,22 +450,14 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     @debug println("done assembling (R$my_rank)")
 
     # Fix ghost layer - the locations for remote processes to write their data into
+    #TODO obtain ghost interaction algorithmic
     if my_rank == 1
         # ltdofs
         append!(assembler.I, [3,3,3,4,4,6,6])
         append!(assembler.J, [7,8,9,7,8,7,9])
         append!(assembler.V, zeros(7))
-        # # gdofs?
-        # # append!(assembler.I, [7,7,7,7,7,7, 8,8,8,8, 9,9,9,9])
-        # # append!(assembler.J, [3,4,6,7,6,9, 3,4,7,8, 3,6,7,9])
-        # # append!(assembler.V, zeros(6+4+4))
-        # append!(assembler.I, [7,7,7,7, 8,8, 9,9])
-        # append!(assembler.J, [3,4,6,6, 3,4, 3,6])
-        # append!(assembler.V, zeros(4+2+2))
     else
-        # append!(assembler.I, [1,1,2,2,2,5,5])
-        # append!(assembler.J, [7,8,9,7,8,8,9])
-        # append!(assembler.V, zeros(7))
+        # no ghost layer
     end
     I_ = MPIData(assembler.I, comm, (np,))
     J_ = MPIData(assembler.J, comm, (np,))

From 92ed5faf80efd8d5c47e35bfcad7314a747ff720 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 19 Jul 2022 18:24:17 +0200
Subject: [PATCH 011/124] Some documentation.

---
 docs/src/literate/distributed_assembly.jl | 36 ++++++++++++++++++-----
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 1d0bf62e1b..c98418ffac 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -92,19 +92,31 @@ dh = DofHandler(dgrid.local_grid)
 push!(dh, :u, 1)
 close!(dh);
 
-# We have to renumber the dofs to their global numbering.
-function local_to_global_numbering(dh, dgrid)
+# Renumber the dofs in local ordering to their corresponding global numbering.
+# TODO: Refactor for MixedDofHandler integration
+function local_to_global_numbering(dh::DofHandler, dgrid)
+    # MPI rank starting with 1 to match Julia's index convention
     my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
 
     local_to_global = Vector{Int}(undef,ndofs(dh))
     fill!(local_to_global,0) # 0 is the invalid index!
     # Start by numbering local dofs only from 1:#local_dofs
 
-    # # Lookup for synchronization in the form (Remote Rank,Shared Entity)
-    # # @TODO replace dict with vector and tie to MPI neighborhood graph of the mesh
+    # Lookup for synchronization in the form (Remote Rank,Shared Entity)
+    # @TODO replace dict with vector and tie to MPI neighborhood graph of the mesh
     vertices_send = Dict{Int,Vector{VertexIndex}}()
     n_vertices_recv = Dict{Int,Int}()
 
+    # We start by assigning a local dof to all owned entities.
+    # An entity is owned if:
+    # 1. *All* topological neighbors are on the local process
+    # 2. If the rank of the local process it lower than the rank of *all* topological neighbors
+    # A topological neighbor in this context is hereby defined per entity:
+    # * vertex: All elements whose vertex is the vertex in question
+    # * cell: Just the cell itself
+    # * All other entities: All cells for which one of the corresponding entities interior intersects 
+    #                       with the interior of the entity in question.
+    # TODO: implement for entitied with dim > 0
     next_local_idx = 1
     for (ci, cell) in enumerate(getcells(dh.grid))
         @debug println("cell #$ci (R$my_rank)")
@@ -173,13 +185,17 @@ function local_to_global_numbering(dh, dgrid)
     end
     @debug println("#shifted local dof range $(local_offset+1):$(local_offset+num_true_local_dofs) (R$my_rank)")
 
+    # Shift assigned local dofs (dofs with value >0) into the global range
+    # At this point in the algorithm the dofs with value 0 are the dofs owned of neighboring processes
     for i ∈ 1:length(local_to_global)
         if local_to_global[i] != 0
             local_to_global[i] += local_offset
         end
     end
 
-    # Sync remote dofs
+    # Sync non-owned dofs with neighboring processes.
+    # TODO: implement for entitied with dim > 0
+    # TODO: Use MPI graph primitives to simplify this code
     for sending_rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
         if my_rank == sending_rank
             for remote_rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
@@ -362,7 +378,11 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     @debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
     @debug println("ldof_to_rank $ldof_to_rank (R$my_rank)")
 
-    # Process owns rows
+    # Process owns rows of owned dofs. The process also may write to some remote dofs, 
+    # which correspond to non-owned share entities. Here we construct the rows for the
+    # distributed matrix.
+    # We decide for row (i.e. test function) ownership, because it the image of
+    # SpMV is process local.
     row_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_rank))
     row_data = MPIData(row_indices, comm, (np,))
     row_exchanger = Exchanger(row_data,neighbors)
@@ -370,7 +390,9 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
 
     @debug println("rows done (R$my_rank)")
 
-    # And all corresponding non-zero rows
+    # For the locally visible columns we also have to take into account that remote
+    # processes will write their data in some of these, because their remotely 
+    # owned trial functions overlap with the locally owned test functions.
     ghost_dof_to_global = Int[]
     ghost_dof_rank = Int32[]
     #TODO obtain ghosts algorithmic

From 84fb508eb0af81f87194d0325ade5c173004a7ba Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 19 Jul 2022 18:40:20 +0200
Subject: [PATCH 012/124] Abstraction to enforce correct communicator in
 distributed example.

---
 docs/src/literate/distributed_assembly.jl | 36 +++++++++++------------
 src/Grid/DistributedGrid.jl               |  4 ++-
 src/exports.jl                            |  1 +
 3 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index c98418ffac..e29e365f25 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -50,12 +50,12 @@ MPI.Init()
 # so we don't need to specify the corners of the domain.
 grid = generate_grid(Quadrilateral, (2, 2));
 
-dgrid = DistributedGrid(grid, MPI.COMM_WORLD)
+dgrid = DistributedGrid(grid)
 
 # TODO refactor this into a utility function
 @debug vtk_grid("grid", dgrid; compress=false) do vtk
     u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
-    for rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
+    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
         fill!(u, 0.0)
         for sv ∈ values(dgrid.shared_vertices)
             if haskey(sv.remote_vertices,rank)
@@ -96,7 +96,7 @@ close!(dh);
 # TODO: Refactor for MixedDofHandler integration
 function local_to_global_numbering(dh::DofHandler, dgrid)
     # MPI rank starting with 1 to match Julia's index convention
-    my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
     local_to_global = Vector{Int}(undef,ndofs(dh))
     fill!(local_to_global,0) # 0 is the invalid index!
@@ -178,10 +178,10 @@ function local_to_global_numbering(dh::DofHandler, dgrid)
     # Set true local indices
     local_offset = 0
     if my_rank > 1
-        local_offset = MPI.Recv(Int, MPI.COMM_WORLD; source=my_rank-1-1)
+        local_offset = MPI.Recv(Int, global_comm(dgrid); source=my_rank-1-1)
     end
-    if my_rank < MPI.Comm_size(MPI.COMM_WORLD)
-        MPI.Send(local_offset+num_true_local_dofs, MPI.COMM_WORLD; dest=my_rank+1-1)
+    if my_rank < MPI.Comm_size(global_comm(dgrid))
+        MPI.Send(local_offset+num_true_local_dofs, global_comm(dgrid); dest=my_rank+1-1)
     end
     @debug println("#shifted local dof range $(local_offset+1):$(local_offset+num_true_local_dofs) (R$my_rank)")
 
@@ -196,9 +196,9 @@ function local_to_global_numbering(dh::DofHandler, dgrid)
     # Sync non-owned dofs with neighboring processes.
     # TODO: implement for entitied with dim > 0
     # TODO: Use MPI graph primitives to simplify this code
-    for sending_rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
+    for sending_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
         if my_rank == sending_rank
-            for remote_rank ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
+            for remote_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
                 if haskey(vertices_send, remote_rank)
                     n_vertices = length(vertices_send[remote_rank])
                     @debug println("Sending $n_vertices vertices to rank $remote_rank (R$my_rank)")
@@ -214,8 +214,8 @@ function local_to_global_numbering(dh::DofHandler, dgrid)
                             next_buffer_idx += 1
                         end
                     end
-                    MPI.Send(remote_cells, MPI.COMM_WORLD; dest=remote_rank-1)
-                    MPI.Send(remote_cell_vis, MPI.COMM_WORLD; dest=remote_rank-1)
+                    MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
+                    MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
                     for fi ∈ 1:Ferrite.nfields(dh)
                         next_buffer_idx = 1
                         if length(dh.vertexdicts[fi]) == 0
@@ -231,7 +231,7 @@ function local_to_global_numbering(dh::DofHandler, dgrid)
                             end
                             next_buffer_idx += 1
                         end
-                        MPI.Send(corresponding_global_dofs, MPI.COMM_WORLD; dest=remote_rank-1)
+                        MPI.Send(corresponding_global_dofs, global_comm(dgrid); dest=remote_rank-1)
                     end
                 end
             end
@@ -241,15 +241,15 @@ function local_to_global_numbering(dh::DofHandler, dgrid)
                 @debug println("Receiving $n_vertices vertices from rank $sending_rank (R$my_rank)")
                 local_cells = Array{Int64}(undef,n_vertices)
                 local_cell_vis = Array{Int64}(undef,n_vertices)
-                MPI.Recv!(local_cells, MPI.COMM_WORLD; source=sending_rank-1)
-                MPI.Recv!(local_cell_vis, MPI.COMM_WORLD; source=sending_rank-1)
+                MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
+                MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
                 for fi in 1:Ferrite.nfields(dh)
                     if length(dh.vertexdicts[fi]) == 0
                         @debug println("  Skipping recv on field $(dh.field_names[fi]) (R$my_rank)")
                         continue
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_vertices)
-                    MPI.Recv!(corresponding_global_dofs, MPI.COMM_WORLD; source=sending_rank-1)
+                    MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
                         vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
                         if haskey(dh.vertexdicts[fi], vi)
@@ -281,7 +281,7 @@ end
 local_to_global = local_to_global_numbering(dh, dgrid);
 
 function compute_dof_ownership(dh, dgrid)
-    my_rank = MPI.Comm_rank(dgrid.grid_comm)+1
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
     dof_owner = Vector{Int}(undef,ndofs(dh))
     fill!(dof_owner, my_rank)
@@ -304,8 +304,8 @@ function compute_dof_ownership(dh, dgrid)
 end
 dof_owner = compute_dof_ownership(dh, dgrid);
 
-nltdofs = sum(dof_owner.==(MPI.Comm_rank(MPI.COMM_WORLD)+1))
-ndofs_total = MPI.Allreduce(nltdofs, MPI.SUM, MPI.COMM_WORLD)
+nltdofs = sum(dof_owner.==(MPI.Comm_rank(global_comm(dgrid))+1))
+ndofs_total = MPI.Allreduce(nltdofs, MPI.SUM, global_comm(dgrid))
 
 # Now that we have distributed all our dofs we can create our tangent matrix,
 # using `create_sparsity_pattern`. This function returns a sparse matrix
@@ -353,7 +353,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     fe = zeros(n_basefuncs)
 
     # I have no idea why we have to convert the types 5000 times like this........ look todo below.
-    comm = MPI.COMM_WORLD
+    comm = global_comm(dgrid)
     np = MPI.Comm_size(comm)
     my_rank = MPI.Comm_rank(comm)+1
 
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 92186b85b4..127f6fb1f5 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -44,9 +44,11 @@ mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistribut
     shared_faces::Dict{FaceIndex,SharedFace}
 end
 
+global_comm(dgrid::DistributedGrid) = dgrid.grid_comm
+
 """
 """
-function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}; grid_comm::MPI.Comm = MPI.COMM_WORLD, partition_alg = :RECURSIVE) where {dim,C,T}
     grid_topology = ExclusiveTopology(grid_to_distribute)
     return DistributedGrid(grid_to_distribute, grid_topology, grid_comm; partition_alg=partition_alg)
 end
diff --git a/src/exports.jl b/src/exports.jl
index 7a8577b988..bbc7f9b554 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -82,6 +82,7 @@ export
     getfacesets,
     getedgesets,
     getvertexsets,
+    global_comm,
     onboundary,
     nfaces,
     addnodeset!,

From 9a91fba0ee4889e8a74597f92581a4e4ca27d854 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 19 Jul 2022 23:05:03 +0200
Subject: [PATCH 013/124] toglobal should work on general grids.

---
 src/Grid/grid.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 53cf40ce21..28b0f857e4 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -406,8 +406,8 @@ Returns an iterateable face skeleton. The skeleton consists of `FaceIndex` that
 """
 faceskeleton(top::ExclusiveTopology, grid::AbstractGrid) =  top.face_skeleton
 
-toglobal(grid::Grid,vertexidx::VertexIndex) = vertices(getcells(grid,vertexidx[1]))[vertexidx[2]]
-toglobal(grid::Grid,vertexidx::Vector{VertexIndex}) = unique(toglobal.((grid,),vertexidx))
+toglobal(grid::AbstractGrid,vertexidx::VertexIndex) = vertices(getcells(grid,vertexidx[1]))[vertexidx[2]]
+toglobal(grid::AbstractGrid,vertexidx::Vector{VertexIndex}) = unique(toglobal.((grid,),vertexidx))
 
 @inline getdim(::AbstractGrid{dim}) where {dim} = dim
 """

From 8c1051b01928f0f2c832eecb5e2bbad9cc495a8d Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 20 Jul 2022 04:16:24 +0200
Subject: [PATCH 014/124] Ghost dof sync works for simple cases.

---
 docs/src/literate/distributed_assembly.jl | 188 +++++++++++++++++++---
 1 file changed, 167 insertions(+), 21 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index e29e365f25..e839fa8fb7 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -352,23 +352,40 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     Ke = zeros(n_basefuncs, n_basefuncs)
     fe = zeros(n_basefuncs)
 
-    # I have no idea why we have to convert the types 5000 times like this........ look todo below.
+    # @TODO put the code below into a "distributed assembler" struct and functions
+    # @TODO the code below can be massively simplified by introducing a ghost layer to the 
+    #       distributed grid, which can efficiently precompute some of the values below.
     comm = global_comm(dgrid)
     np = MPI.Comm_size(comm)
     my_rank = MPI.Comm_rank(comm)+1
 
     @debug println("starting assembly... (R$my_rank)")
 
-    # Neighborhood - self
-    neighbors_set = Set()
-    for (vi, sv) ∈ dgrid.shared_vertices
-        for (rank, vvi) ∈ sv.remote_vertices
-            push!(neighbors_set, rank)
-        end
+    # Neighborhood graph
+    # @TODO cleanup old code below and use graph primitives instead.
+    (source_len, destination_len, _) = MPI.Dist_graph_neighbors_count(vertex_comm(dgrid))
+    sources = Vector{Cint}(undef, source_len)
+    destinations = Vector{Cint}(undef, destination_len)
+    MPI.Dist_graph_neighbors!(vertex_comm(dgrid), sources, destinations)
+
+    # Adjust to Julia index convention
+    sources .+= 1
+    destinations .+= 1
+
+    @debug println("Neighborhood | $sources | $destinations (R$my_rank)")
+
+    # Invert the relations to clarify the code
+    source_index = Dict{Cint, Int}()
+    for (i,remote_rank) ∈ enumerate(sources)
+        source_index[remote_rank] = i
+    end
+    destination_index = Dict{Int, Cint}()
+    for (i,remote_rank) ∈ enumerate(destinations)
+        destination_index[remote_rank] = i
     end
-    neighbors = MPIData(Int32.(neighbors_set), comm, (np,))
 
-    @debug println("neighbors $neighbors (R$my_rank)")
+    # Note: We assume a symmetric neighborhood for now... this may not be true in general.
+    neighbors = MPIData(Int32.(sources), comm, (np,))
 
     # Extract locally owned dofs
     ltdof_indices = ldof_to_rank.==my_rank
@@ -394,12 +411,124 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     # processes will write their data in some of these, because their remotely 
     # owned trial functions overlap with the locally owned test functions.
     ghost_dof_to_global = Int[]
+    ghost_dof_element_index = Int[]
     ghost_dof_rank = Int32[]
-    #TODO obtain ghosts algorithmic
-    if my_rank == 1
-        append!(ghost_dof_to_global, collect(7:9))
-        append!(ghost_dof_rank, [2,2,2])
+
+    # ------------ Ghost dof synchronization ----------   
+    # Prepare sending ghost dofs to neighbors
+    #@TODO comminication can be optimized by deduplicating entries in the following arrays
+    #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
+    ghost_dof_to_send = [Int[] for i ∈ 1:destination_len] # global dof id
+    ghost_rank_to_send = [Int[] for i ∈ 1:destination_len] # rank of dof
+    ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
+    ghost_element_to_send = [Int[] for i ∈ 1:destination_len] # corresponding element
+    ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
+    for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
+        # We start by searching shared vertices which are not owned by us
+        pivot_vertex_owner_rank = Ferrite.compute_owner(dgrid, pivot_sv)
+        pivot_cell_idx = pivot_vi[1]
+
+        if my_rank != pivot_vertex_owner_rank
+            sender_slot = destination_index[pivot_vertex_owner_rank]
+
+            @debug println("$pivot_vi may require synchronization (R$my_rank)")
+            # We have to send ALL dofs on the element to the remote.
+            # @TODO send actually ALL dofs (currently only vertex dofs for a first version...)
+            pivot_cell = getcells(dgrid, pivot_cell_idx)
+            for (other_vertex_idx, other_vertex) ∈ enumerate(Ferrite.vertices(pivot_cell))
+                # Skip self
+                other_vi = VertexIndex(pivot_cell_idx, other_vertex_idx)
+                if other_vi == pivot_vi
+                    continue
+                end
+
+                if is_shared_vertex(dgrid, other_vi)
+                    #@TODO We should be able to remove more redundant communication is many cases.
+                    other_sv = dgrid.shared_vertices[other_vi]
+                    other_vertex_owner_rank = Ferrite.compute_owner(dgrid, other_sv)
+                    # Also skip if the "other vertex" is already owned by the process owning the pivot vertex
+                    if other_vertex_owner_rank == pivot_vertex_owner_rank
+                        continue;
+                    end
+                    # A vertex is also not a ghost vertex if it touches the domain of the rank of the pivot
+                    if pivot_vertex_owner_rank ∈ keys(other_sv.remote_vertices)
+                        continue
+                    end
+                else
+                    other_vertex_owner_rank = my_rank
+                end
+
+                # Now we have to sync all fields separately
+                @debug println("  Ghost candidate $other_vi for $pivot_vi (R$my_rank)")
+                for field_idx in 1:Ferrite.nfields(dh)
+                    pivot_vertex = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vi)
+                    # If any of the two vertices is not defined on the current field, just skip.
+                    if !haskey(dh.vertexdicts[field_idx], pivot_vertex) || !haskey(dh.vertexdicts[field_idx], other_vertex)
+                        continue
+                    end
+                    @debug println("    $other_vi is ghost for $pivot_vi in field $field_idx (R$my_rank)")
+
+                    other_vertex_dof = dh.vertexdicts[field_idx][other_vertex]
+
+                    append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[other_vertex_dof])
+                    append!(ghost_rank_to_send[sender_slot], other_vertex_owner_rank)
+                    append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                    append!(ghost_element_to_send[sender_slot], pivot_cell_idx)
+                end
+            end
+        end
+    end
+
+    ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_element_to_send]
+    ghost_recv_buffer_lengths = zeros(Int, destination_len)
+    MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), vertex_comm(dgrid));
+    @debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
+        println("receiving $ghost_recv_buffer_length ghosts from $(sources[i])  (R$my_rank)")
     end
+
+    # Communicate ghost information
+    # @TODO coalesce communication
+    ghost_send_buffer_dofs = vcat(ghost_dof_to_send...)
+    ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
+    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+    ghost_send_buffer_elements = vcat(ghost_element_to_send...)
+    ghost_recv_buffer_elements = zeros(Int, sum(ghost_recv_buffer_lengths))
+    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_elements,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_elements,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+    ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
+    ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
+    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+    ghost_send_buffer_ranks = vcat(ghost_rank_to_send...)
+    ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
+    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+    println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
+
+    return  0, 0
+
+    # #TODO obtain ghosts algorithmic
+    if np == 2
+        if my_rank == 1
+            append!(ghost_dof_to_global, collect(7:9))
+            append!(ghost_dof_rank, [2,2,2])
+        else
+            # no ghosts
+        end
+    elseif np == 3
+        if my_rank == 1
+            append!(ghost_dof_to_global, [5,6,7,8,9])
+            append!(ghost_dof_rank, [2,2,2,3,3])
+        elseif my_rank == 2
+            append!(ghost_dof_to_global, [1,3,8,9])
+            append!(ghost_dof_rank, [1,1,3,3])
+        else
+            # no ghosts
+        end        
+    end
+
+    # ------------- Construct rows and cols of distributed matrix --------
     all_local_cols = Int[ldof_to_gdof; ghost_dof_to_global]
     all_local_col_ranks = Int32[ldof_to_rank; ghost_dof_rank]
     @debug println("all_local_cols $all_local_cols (R$my_rank)")
@@ -416,7 +545,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     # to make the assembling faster.
     #+
     @debug println("cols and rows constructed (R$my_rank)")
-    f = PartitionedArrays.PVector(0.0,cols)
+    f = PartitionedArrays.PVector(0.0,rows)
     @debug println("f constructed (R$my_rank)")
     assembler = start_assemble()
     @debug println("starting assembly (R$my_rank)")
@@ -473,13 +602,30 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
 
     # Fix ghost layer - the locations for remote processes to write their data into
     #TODO obtain ghost interaction algorithmic
-    if my_rank == 1
-        # ltdofs
-        append!(assembler.I, [3,3,3,4,4,6,6])
-        append!(assembler.J, [7,8,9,7,8,7,9])
-        append!(assembler.V, zeros(7))
-    else
-        # no ghost layer
+    if np == 2
+        if my_rank == 1
+            # ltdofs
+            append!(assembler.I, [3,3,3, 4,4, 6,6])
+            append!(assembler.J, [7,8,9, 7,8, 7,9])
+            append!(assembler.V, zeros(7))
+        else
+            # no ghost layer
+        end
+    elseif np == 3
+        if my_rank == 1
+            # ltdofs
+            append!(assembler.I, [3,3, 1,1, 4,4,4,4,4])
+            append!(assembler.J, [9,6, 5,8, 8,5,7,6,9])
+            append!(assembler.V, zeros(9))
+        elseif my_rank == 2
+            # all_local_cols [5, 4, 6, 7, 1, 3, 8, 9] (R2)
+            # ltdofs
+            append!(assembler.I, [1,1, 3,3])
+            append!(assembler.J, [5,7, 6,8])
+            append!(assembler.V, zeros(4))
+        else
+            # no ghost layer
+        end
     end
     I_ = MPIData(assembler.I, comm, (np,))
     J_ = MPIData(assembler.J, comm, (np,))

From 18e33d2ede97db62319f0dc98956003a8ce7adb1 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 20 Jul 2022 04:32:01 +0200
Subject: [PATCH 015/124] Algorithmic construction of rows and cols in
 distributed matrix works basically.

---
 docs/src/literate/distributed_assembly.jl | 43 +++++++++--------
 src/Grid/DistributedGrid.jl               | 56 ++++++++++++++++++++---
 2 files changed, 73 insertions(+), 26 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index e839fa8fb7..a3d3fa84c7 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -506,26 +506,31 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
 
     println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
 
-    return  0, 0
-
     # #TODO obtain ghosts algorithmic
-    if np == 2
-        if my_rank == 1
-            append!(ghost_dof_to_global, collect(7:9))
-            append!(ghost_dof_rank, [2,2,2])
-        else
-            # no ghosts
-        end
-    elseif np == 3
-        if my_rank == 1
-            append!(ghost_dof_to_global, [5,6,7,8,9])
-            append!(ghost_dof_rank, [2,2,2,3,3])
-        elseif my_rank == 2
-            append!(ghost_dof_to_global, [1,3,8,9])
-            append!(ghost_dof_rank, [1,1,3,3])
-        else
-            # no ghosts
-        end        
+    # if np == 2
+    #     if my_rank == 1
+    #         append!(ghost_dof_to_global, collect(7:9))
+    #         append!(ghost_dof_rank, [2,2,2])
+    #     else
+    #         # no ghosts
+    #     end
+    # elseif np == 3
+    #     if my_rank == 1
+    #         append!(ghost_dof_to_global, [5,6,7,8,9])
+    #         append!(ghost_dof_rank, [2,2,2,3,3])
+    #     elseif my_rank == 2
+    #         append!(ghost_dof_to_global, [1,3,8,9])
+    #         append!(ghost_dof_rank, [1,1,3,3])
+    #     else
+    #         # no ghosts
+    #     end        
+    # end
+
+    unique_ghosts = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
+    # unzip manually
+    for (dof,rank) ∈ unique_ghosts
+        push!(ghost_dof_to_global, dof)
+        push!(ghost_dof_rank, rank)
     end
 
     # ------------- Construct rows and cols of distributed matrix --------
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 127f6fb1f5..82b300a29e 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -5,29 +5,39 @@ using MPI
 """
 abstract type AbstractDistributedGrid{sdim} <: AbstractGrid{sdim} end
 
+abstract type SharedEntity end
+
 # TODO the following three structs can be merged to one struct with type parameter.
 """
 """
-struct SharedVertex
+struct SharedVertex <: SharedEntity
     local_idx::VertexIndex
     remote_vertices::Dict{Int,Vector{VertexIndex}}
 end
 
+remote_entities(sv::SharedVertex) = sv.remote_vertices
+
 """
 """
-struct SharedFace
+struct SharedFace <: SharedEntity
     local_idx::FaceIndex
     remote_faces::Dict{Int,Vector{FaceIndex}}
 end
 
+remote_entities(sf::SharedFace) = sf.remote_faces
+
 """
 """
-struct SharedEdge
+struct SharedEdge <: SharedEntity
     local_idx::EdgeIndex
     remote_edges::Dict{Int,Vector{EdgeIndex}}
 end
 
+remote_entities(se::SharedEdge) = se.remote_edges
+
 """
+@TODO docs
+@TODO PArrays ready constructor
 """
 mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistributedGrid{dim}
     # Dense comminicator on the grid
@@ -44,7 +54,21 @@ mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistribut
     shared_faces::Dict{FaceIndex,SharedFace}
 end
 
-global_comm(dgrid::DistributedGrid) = dgrid.grid_comm
+"""
+"""
+is_shared_vertex(dgrid::AbstractDistributedGrid, vi::VertexIndex) = haskey(dgrid.shared_vertices, vi)
+
+
+"""
+Global dense communicator of the distributed grid.
+"""
+global_comm(dgrid::AbstractDistributedGrid) = dgrid.grid_comm
+
+"""
+Graph communicator for shared vertices. Guaranteed to be derived from the communicator 
+returned by @global_comm .
+"""
+vertex_comm(dgrid::AbstractDistributedGrid) = dgrid.interface_comm
 
 """
 """
@@ -262,16 +286,34 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
         end
     end
 
-    return DistributedGrid(grid_comm,grid_comm,local_grid,shared_vertices,shared_edges,shared_faces)
+    # Neighborhood graph
+    neighbors_set = Set{Cint}()
+    for (vi, sv) ∈ shared_vertices
+        for (rank, vvi) ∈ sv.remote_vertices
+            push!(neighbors_set, rank)
+        end
+    end
+    # Adjust ranks back to to C index convention
+    dest = collect(neighbors_set).-1
+    degree = length(dest)
+    interface_comm = MPI.Dist_graph_create(grid_comm, Cint[my_rank-1], Cint[degree], Cint.(dest))
+
+    return DistributedGrid(grid_comm,interface_comm,local_grid,shared_vertices,shared_edges,shared_faces)
 end
 
 @inline getlocalgrid(dgrid::AbstractDistributedGrid) = dgrid.local_grid
 
 @inline getcells(dgrid::AbstractDistributedGrid) = getcells(getlocalgrid(grid))
-@inline getcells(dgrid::AbstractDistributedGrid, v::Union{Int, Vector{Int}}) = getcells(getlocalgrid(grid),v)
-@inline getcells(dgrid::AbstractDistributedGrid, setname::String) = getcells(getlocalgrid(grid),setname)
+@inline getcells(dgrid::AbstractDistributedGrid, v::Union{Int, Vector{Int}}) = getcells(getlocalgrid(dgrid),v)
+@inline getcells(dgrid::AbstractDistributedGrid, setname::String) = getcells(getlocalgrid(dgrid),setname)
 "Returns the number of cells in the `<:AbstractDistributedGrid`."
 @inline getncells(dgrid::AbstractDistributedGrid) = length(getcells(getlocalgrid(dgrid)))
 "Returns the celltype of the `<:AbstractDistributedGrid`."
 @inline getcelltype(dgrid::AbstractDistributedGrid) = eltype(getcells(getlocalgrid(dgrid)))
 @inline getcelltype(dgrid::AbstractDistributedGrid, i::Int) = typeof(getcells(getlocalgrid(dgrid),i))
+
+# Here we define the entity ownership by the process sharing an entity with lowest rank in the grid communicator.
+function compute_owner(dgrid::AbstractDistributedGrid, shared_entity::SharedEntity)::Int32
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1 # Shift rank up by 1 to match Julia's indexing convention
+    return minimum([my_rank; [remote_rank for (remote_rank, _) ∈ remote_entities(shared_entity)]])
+end

From bc108e4c1013d29ec4da9085f538988025611eb6 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 20 Jul 2022 05:24:18 +0200
Subject: [PATCH 016/124] COO assembly of ghost layer almost functional.

---
 docs/src/literate/distributed_assembly.jl | 86 ++++++++++++++++-------
 1 file changed, 60 insertions(+), 26 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index a3d3fa84c7..147a6357d5 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -526,9 +526,9 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     #     end        
     # end
 
-    unique_ghosts = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
+    unique_ghosts_dr = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
     # unzip manually
-    for (dof,rank) ∈ unique_ghosts
+    for (dof,rank) ∈ unique_ghosts_dr
         push!(ghost_dof_to_global, dof)
         push!(ghost_dof_rank, rank)
     end
@@ -606,32 +606,66 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     @debug println("done assembling (R$my_rank)")
 
     # Fix ghost layer - the locations for remote processes to write their data into
-    #TODO obtain ghost interaction algorithmic
-    if np == 2
-        if my_rank == 1
-            # ltdofs
-            append!(assembler.I, [3,3,3, 4,4, 6,6])
-            append!(assembler.J, [7,8,9, 7,8, 7,9])
-            append!(assembler.V, zeros(7))
-        else
-            # no ghost layer
-        end
-    elseif np == 3
-        if my_rank == 1
-            # ltdofs
-            append!(assembler.I, [3,3, 1,1, 4,4,4,4,4])
-            append!(assembler.J, [9,6, 5,8, 8,5,7,6,9])
-            append!(assembler.V, zeros(9))
-        elseif my_rank == 2
-            # all_local_cols [5, 4, 6, 7, 1, 3, 8, 9] (R2)
-            # ltdofs
-            append!(assembler.I, [1,1, 3,3])
-            append!(assembler.J, [5,7, 6,8])
-            append!(assembler.V, zeros(4))
-        else
-            # no ghost layer
+    unique_ghosts_dre = zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks,ghost_recv_buffer_elements)
+    @debug println("unique_ghosts_dre $unique_ghosts_dre (R$my_rank)")
+
+    # @TODO this is dead slow. optimize.
+    IJ = []
+    for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
+        # Loop over owned shared vertices
+        if my_rank == Ferrite.compute_owner(dgrid, pivot_sv)
+            pivot_vertex = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vi)
+            for (d,r,e) ∈ unique_ghosts_dre
+                re = Ferrite.remote_entities(pivot_sv)
+                d_local = findfirst(x->x==d,ghost_dof_to_global)
+                if haskey(re, r)
+                    for (remote_cell_idx,_) ∈ re[r]
+                        if remote_cell_idx == e
+                            for field_idx in 1:Ferrite.nfields(dh)
+                                if !haskey(dh.vertexdicts[field_idx], pivot_vertex)
+                                    continue
+                                end
+
+                                pivot_dof = dh.vertexdicts[field_idx][pivot_vertex]
+                                push!(IJ, (pivot_dof, d_local))
+                            end
+                        end
+                    end
+                end
+            end
         end
     end
+    unique!(IJ)
+    @debug println("IJ=$(IJ) (R$my_rank)")
+
+    for (i,j) ∈ IJ
+        push!(assembler.I, i)
+        push!(assembler.J, j+length(ldof_to_gdof))
+        push!(assembler.V, 0.0)
+    end
+
+    # if np == 2
+    #     if my_rank == 1
+    #         append!(assembler.I, [3,3,3, 4,4, 6,6])
+    #         append!(assembler.J, [7,8,9, 7,8, 7,9])
+    #         append!(assembler.V, zeros(7))
+    #     else
+    #         # no ghost layer
+    #     end
+    # elseif np == 3
+    #     if my_rank == 1
+    #         append!(assembler.I, [3,3, 1,1, 4,4,4,4,4])
+    #         append!(assembler.J, [9,6, 5,8, 8,5,7,6,9])
+    #         append!(assembler.V, zeros(9))
+    #     elseif my_rank == 2
+    #         # all_local_cols [5, 4, 6, 7, 1, 3, 8, 9] (R2)
+    #         append!(assembler.I, [1,1, 3,3])
+    #         append!(assembler.J, [5,7, 6,8])
+    #         append!(assembler.V, zeros(4))
+    #     else
+    #         # no ghost layer
+    #     end
+    # end
     I_ = MPIData(assembler.I, comm, (np,))
     J_ = MPIData(assembler.J, comm, (np,))
     V_ = MPIData(assembler.V, comm, (np,))

From 42cd5435c0e0dff7449baa934c583c1db8eebc34 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 20 Jul 2022 15:33:44 +0200
Subject: [PATCH 017/124] 2 processes seem to work out well and 1,2,3,4 on a 2
 by 2 grid. 4 by 4 grid still has the wrong ghost layer.

---
 docs/src/literate/distributed_assembly.jl | 35 ++++++++++++++++-------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 147a6357d5..b81e776760 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -504,6 +504,12 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
     MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), vertex_comm(dgrid))
 
+    # Reconstruct source ranks
+    ghost_recv_buffer_source_ranks = Int[]
+    for (source_idx, recv_len) ∈ enumerate(ghost_recv_buffer_lengths)
+        append!(ghost_recv_buffer_source_ranks, ones(recv_len)*sources[source_idx])
+    end
+    
     println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
 
     # #TODO obtain ghosts algorithmic
@@ -544,6 +550,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     col_exchanger = Exchanger(col_data,neighbors)
     cols = PRange(ngdofs,col_data,col_exchanger)
 
+    # --------------------- Local assembly --------------------
     # Next we define the global force vector `f` and use that and
     # the stiffness matrix `K` and create an assembler. The assembler
     # is just a thin wrapper around `f` and `K` and some extra storage
@@ -605,6 +612,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
 
     @debug println("done assembling (R$my_rank)")
 
+    # --------------------- Add ghost entries in IJ --------------------
     # Fix ghost layer - the locations for remote processes to write their data into
     unique_ghosts_dre = zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks,ghost_recv_buffer_elements)
     @debug println("unique_ghosts_dre $unique_ghosts_dre (R$my_rank)")
@@ -614,20 +622,27 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
         # Loop over owned shared vertices
         if my_rank == Ferrite.compute_owner(dgrid, pivot_sv)
+            @debug println("Fixing ghost layer $pivot_vi (R$my_rank)")
             pivot_vertex = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vi)
-            for (d,r,e) ∈ unique_ghosts_dre
-                re = Ferrite.remote_entities(pivot_sv)
-                d_local = findfirst(x->x==d,ghost_dof_to_global)
-                if haskey(re, r)
-                    for (remote_cell_idx,_) ∈ re[r]
-                        if remote_cell_idx == e
+            # Now compare the vertex against EVERY ghost dof...
+            for (i,(global_ghost_dof,ghost_owner_rank,ghost_cell_idx)) ∈ enumerate(unique_ghosts_dre)
+                source_rank = ghost_recv_buffer_source_ranks[i]
+                pivot_remotes = Ferrite.remote_entities(pivot_sv)
+                local_ghost_dof = findfirst(x->x==global_ghost_dof, all_local_cols)
+                # ...where we have to check that the ghost is on the correct rank ...
+                if haskey(pivot_remotes, source_rank)
+                    @debug println("  $pivot_vi found for rank $source_rank with dof $global_ghost_dof (R$my_rank)")
+                    for (remote_cell_idx,_) ∈ pivot_remotes[source_rank]
+                        # ... and that it is the correct element.
+                        if remote_cell_idx == ghost_cell_idx
+                            @debug println("    $pivot_vi synced against remote cell $remote_cell_idx (R$my_rank)")
                             for field_idx in 1:Ferrite.nfields(dh)
                                 if !haskey(dh.vertexdicts[field_idx], pivot_vertex)
                                     continue
                                 end
-
+                                @debug println("      handling field $field_idx (R$my_rank)")
                                 pivot_dof = dh.vertexdicts[field_idx][pivot_vertex]
-                                push!(IJ, (pivot_dof, d_local))
+                                push!(IJ, (pivot_dof, local_ghost_dof))
                             end
                         end
                     end
@@ -635,12 +650,12 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
             end
         end
     end
+    # Deduplicate entries.
     unique!(IJ)
-    @debug println("IJ=$(IJ) (R$my_rank)")
 
     for (i,j) ∈ IJ
         push!(assembler.I, i)
-        push!(assembler.J, j+length(ldof_to_gdof))
+        push!(assembler.J, j)
         push!(assembler.V, 0.0)
     end
 

From 30e1d1fd79a4e9523063bec8211ef43087f23f54 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 24 Aug 2022 03:43:01 +0200
Subject: [PATCH 018/124] Hotfix confirming that non-local interactions of
 local basis functions are missing.

---
 docs/src/literate/distributed_assembly.jl | 148 +++++++++++++++++++++-
 src/Grid/DistributedGrid.jl               |   8 +-
 src/exports.jl                            |   2 +
 3 files changed, 152 insertions(+), 6 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index b81e776760..e6c77d14e2 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -42,13 +42,75 @@ macro debug(ex)
     return :($(esc(ex)))
 end
 
+# @TODO contribute diagnostics upstream
+function PartitionedArrays.matrix_exchanger(values,row_exchanger,row_lids,col_lids)
+    part = get_part_ids(row_lids)
+    parts_rcv = row_exchanger.parts_rcv
+    parts_snd = row_exchanger.parts_snd
+
+    function setup_rcv(part,parts_rcv,row_lids,col_lids,values)
+        owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_rcv) ))
+        ptrs = zeros(Int32,length(parts_rcv)+1)
+        for (li,lj,v) in nziterator(values)
+            owner = row_lids.lid_to_part[li]
+            if owner != part
+            ptrs[owner_to_i[owner]+1] +=1
+            end
+        end
+        length_to_ptrs!(ptrs)
+        k_rcv_data = zeros(Int,ptrs[end]-1)
+        gi_rcv_data = zeros(Int,ptrs[end]-1)
+        gj_rcv_data = zeros(Int,ptrs[end]-1)
+        for (k,(li,lj,v)) in enumerate(nziterator(values))
+            owner = row_lids.lid_to_part[li]
+            if owner != part
+            p = ptrs[owner_to_i[owner]]
+            k_rcv_data[p] = k
+            gi_rcv_data[p] = row_lids.lid_to_gid[li]
+            gj_rcv_data[p] = col_lids.lid_to_gid[lj]
+            ptrs[owner_to_i[owner]] += 1
+            end
+        end
+        rewind_ptrs!(ptrs)
+        k_rcv = Table(k_rcv_data,ptrs)
+        gi_rcv = Table(gi_rcv_data,ptrs)
+        gj_rcv = Table(gj_rcv_data,ptrs)
+        k_rcv, gi_rcv, gj_rcv
+    end
+
+    k_rcv, gi_rcv, gj_rcv = map_parts(setup_rcv,part,parts_rcv,row_lids,col_lids,values)
+
+    gi_snd = exchange(gi_rcv,parts_snd,parts_rcv)
+    gj_snd = exchange(gj_rcv,parts_snd,parts_rcv)
+
+    function setup_snd(part,row_lids,col_lids,gi_snd,gj_snd,values)
+        ptrs = gi_snd.ptrs
+        k_snd_data = zeros(Int,ptrs[end]-1)
+        for p in 1:length(gi_snd.data)
+            gi = gi_snd.data[p]
+            gj = gj_snd.data[p]
+            li = row_lids.gid_to_lid[gi]
+            lj = col_lids.gid_to_lid[gj]
+            k = nzindex(values,li,lj)
+            PartitionedArrays.@check k > 0 "The sparsity pattern of the ghost layer is inconsistent - $part | ($li, $lj) | ($gi, $gj)"
+            k_snd_data[p] = k
+        end
+        k_snd = Table(k_snd_data,ptrs)
+        k_snd
+    end
+
+    k_snd = map_parts(setup_snd,part,row_lids,col_lids,gi_snd,gj_snd,values)
+
+    Exchanger(parts_rcv,parts_snd,k_rcv,k_snd)
+end
+
 # Launch MPI
 MPI.Init()
 
 # We start  generating a simple grid with 20x20 quadrilateral elements
 # using `generate_grid`. The generator defaults to the unit square,
 # so we don't need to specify the corners of the domain.
-grid = generate_grid(Quadrilateral, (2, 2));
+grid = generate_grid(Quadrilateral, (2, 3));
 
 dgrid = DistributedGrid(grid)
 
@@ -265,7 +327,7 @@ function local_to_global_numbering(dh::DofHandler, dgrid)
     end
 
     # Postcondition: All local dofs need a corresponding global dof!
-    @assert findfirst(local_to_global .== 0) == nothing
+    @assert findfirst(local_to_global .== 0) === nothing
 
     @debug vtk_grid("dofs", dgrid; compress=false) do vtk
         u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
@@ -423,6 +485,72 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
     ghost_element_to_send = [Int[] for i ∈ 1:destination_len] # corresponding element
     ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
+    # for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
+    #     # We start by searching shared vertices which are not owned by us
+    #     pivot_vertex_owner_rank = Ferrite.compute_owner(dgrid, pivot_sv)
+    #     pivot_cell_idx = pivot_vi[1]
+        
+    #     # Ghost is per definition non-local
+    #     if my_rank != pivot_vertex_owner_rank
+    #         sender_slot = destination_index[pivot_vertex_owner_rank]
+
+    #         @debug println("$pivot_vi may require synchronization (R$my_rank)")
+    #         # We have to send ALL dofs on the element to the remote.
+    #         # @TODO send actually ALL dofs (currently only vertex dofs for a first version...)
+    #         pivot_cell = getcells(dgrid, pivot_cell_idx)
+    #         for (other_vertex_idx, other_vertex) ∈ enumerate(Ferrite.vertices(pivot_cell))
+    #             # Skip self
+    #             other_vi = VertexIndex(pivot_cell_idx, other_vertex_idx)
+    #             if other_vi == pivot_vi
+    #                 continue
+    #             end
+
+    #             if is_shared_vertex(dgrid, other_vi)
+    #                 #@TODO We should be able to remove more redundant communication is many cases.
+    #                 other_sv = dgrid.shared_vertices[other_vi]
+    #                 other_vertex_owner_rank = Ferrite.compute_owner(dgrid, other_sv)
+
+    #                 # "Other vertex" is not a ghost vertex if it touches the element itself
+    #                 if haskey(other_sv.remote_vertices, pivot_vertex_owner_rank)
+    #                     pivot_vertex_adjacent_elements = [nei for (nei, _) ∈ pivot_sv.remote_vertices[pivot_vertex_owner_rank]]
+    #                     skip_me = false
+    #                     for (nei, _) ∈ other_sv.remote_vertices[pivot_vertex_owner_rank]
+    #                         if nei ∈ pivot_vertex_adjacent_elements
+    #                             skip_me = true
+    #                             break
+    #                         end
+    #                     end
+    #                     if skip_me
+    #                         @debug println("  Skipping $other_vi for $pivot_vi (R$my_rank)")
+    #                         continue
+    #                     end
+    #                 end
+    #             else
+    #                 # If the vertex is not a shared one, we always have to sync it
+    #                 other_vertex_owner_rank = my_rank
+    #             end
+
+    #             # Now we have to sync all fields separately
+    #             @debug println("  Ghost candidate $other_vi for $pivot_vi (R$my_rank)")
+    #             for field_idx in 1:Ferrite.nfields(dh)
+    #                 pivot_vertex = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vi)
+    #                 # If any of the two vertices is not defined on the current field, just skip.
+    #                 if !haskey(dh.vertexdicts[field_idx], pivot_vertex) || !haskey(dh.vertexdicts[field_idx], other_vertex)
+    #                     continue
+    #                 end
+    #                 @debug println("    $other_vi is ghost for $pivot_vi in field $field_idx (R$my_rank)")
+
+    #                 other_vertex_dof = dh.vertexdicts[field_idx][other_vertex]
+
+    #                 append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[other_vertex_dof])
+    #                 append!(ghost_rank_to_send[sender_slot], other_vertex_owner_rank)
+    #                 append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+    #                 append!(ghost_element_to_send[sender_slot], pivot_cell_idx)
+    #             end
+    #         end
+    #     end
+    # end
+
     for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
         # We start by searching shared vertices which are not owned by us
         pivot_vertex_owner_rank = Ferrite.compute_owner(dgrid, pivot_sv)
@@ -652,6 +780,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     end
     # Deduplicate entries.
     unique!(IJ)
+    @debug println("IJ $IJ (R$my_rank)")
 
     for (i,j) ∈ IJ
         push!(assembler.I, i)
@@ -659,6 +788,21 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
         push!(assembler.V, 0.0)
     end
 
+    # Manually check if these are the missing ones.
+    if my_rank == 2
+        push!(assembler.I, 5)
+        push!(assembler.J, 2)
+        push!(assembler.V, 0.0)
+
+        push!(assembler.I, 4)
+        push!(assembler.J, 7)
+        push!(assembler.V, 0.0)
+
+        push!(assembler.I, 7)
+        push!(assembler.J, 4)
+        push!(assembler.V, 0.0)
+    end
+
     # if np == 2
     #     if my_rank == 1
     #         append!(assembler.I, [3,3,3, 4,4, 6,6])
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 82b300a29e..a9b4d864ba 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -15,7 +15,7 @@ struct SharedVertex <: SharedEntity
     remote_vertices::Dict{Int,Vector{VertexIndex}}
 end
 
-remote_entities(sv::SharedVertex) = sv.remote_vertices
+@inline remote_entities(sv::SharedVertex) = sv.remote_vertices
 
 """
 """
@@ -24,7 +24,7 @@ struct SharedFace <: SharedEntity
     remote_faces::Dict{Int,Vector{FaceIndex}}
 end
 
-remote_entities(sf::SharedFace) = sf.remote_faces
+@inline remote_entities(sf::SharedFace) = sf.remote_faces
 
 """
 """
@@ -33,7 +33,7 @@ struct SharedEdge <: SharedEntity
     remote_edges::Dict{Int,Vector{EdgeIndex}}
 end
 
-remote_entities(se::SharedEdge) = se.remote_edges
+@inline remote_entities(se::SharedEdge) = se.remote_edges
 
 """
 @TODO docs
@@ -303,7 +303,7 @@ end
 
 @inline getlocalgrid(dgrid::AbstractDistributedGrid) = dgrid.local_grid
 
-@inline getcells(dgrid::AbstractDistributedGrid) = getcells(getlocalgrid(grid))
+@inline getcells(dgrid::AbstractDistributedGrid) = getcells(getlocalgrid(dgrid))
 @inline getcells(dgrid::AbstractDistributedGrid, v::Union{Int, Vector{Int}}) = getcells(getlocalgrid(dgrid),v)
 @inline getcells(dgrid::AbstractDistributedGrid, setname::String) = getcells(getlocalgrid(dgrid),setname)
 "Returns the number of cells in the `<:AbstractDistributedGrid`."
diff --git a/src/exports.jl b/src/exports.jl
index bbc7f9b554..3159633d1a 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -83,6 +83,7 @@ export
     getedgesets,
     getvertexsets,
     global_comm,
+    vertex_comm,
     onboundary,
     nfaces,
     addnodeset!,
@@ -93,6 +94,7 @@ export
     transform!,
     generate_grid,
     compute_vertex_values,
+    is_shared_vertex,
 
 # Grid coloring
     create_coloring,

From c51db6a64ea31f7ce369d8c8902a7da5e3923830 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Fri, 30 Sep 2022 23:18:14 +0200
Subject: [PATCH 019/124] Ghost layer looks correct for linear ansatz in 2D
 now.

---
 docs/src/literate/distributed_assembly.jl | 219 ++++------------------
 1 file changed, 35 insertions(+), 184 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index e6c77d14e2..2583c3f452 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -485,74 +485,9 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
     ghost_element_to_send = [Int[] for i ∈ 1:destination_len] # corresponding element
     ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
-    # for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
-    #     # We start by searching shared vertices which are not owned by us
-    #     pivot_vertex_owner_rank = Ferrite.compute_owner(dgrid, pivot_sv)
-    #     pivot_cell_idx = pivot_vi[1]
-        
-    #     # Ghost is per definition non-local
-    #     if my_rank != pivot_vertex_owner_rank
-    #         sender_slot = destination_index[pivot_vertex_owner_rank]
-
-    #         @debug println("$pivot_vi may require synchronization (R$my_rank)")
-    #         # We have to send ALL dofs on the element to the remote.
-    #         # @TODO send actually ALL dofs (currently only vertex dofs for a first version...)
-    #         pivot_cell = getcells(dgrid, pivot_cell_idx)
-    #         for (other_vertex_idx, other_vertex) ∈ enumerate(Ferrite.vertices(pivot_cell))
-    #             # Skip self
-    #             other_vi = VertexIndex(pivot_cell_idx, other_vertex_idx)
-    #             if other_vi == pivot_vi
-    #                 continue
-    #             end
-
-    #             if is_shared_vertex(dgrid, other_vi)
-    #                 #@TODO We should be able to remove more redundant communication is many cases.
-    #                 other_sv = dgrid.shared_vertices[other_vi]
-    #                 other_vertex_owner_rank = Ferrite.compute_owner(dgrid, other_sv)
-
-    #                 # "Other vertex" is not a ghost vertex if it touches the element itself
-    #                 if haskey(other_sv.remote_vertices, pivot_vertex_owner_rank)
-    #                     pivot_vertex_adjacent_elements = [nei for (nei, _) ∈ pivot_sv.remote_vertices[pivot_vertex_owner_rank]]
-    #                     skip_me = false
-    #                     for (nei, _) ∈ other_sv.remote_vertices[pivot_vertex_owner_rank]
-    #                         if nei ∈ pivot_vertex_adjacent_elements
-    #                             skip_me = true
-    #                             break
-    #                         end
-    #                     end
-    #                     if skip_me
-    #                         @debug println("  Skipping $other_vi for $pivot_vi (R$my_rank)")
-    #                         continue
-    #                     end
-    #                 end
-    #             else
-    #                 # If the vertex is not a shared one, we always have to sync it
-    #                 other_vertex_owner_rank = my_rank
-    #             end
-
-    #             # Now we have to sync all fields separately
-    #             @debug println("  Ghost candidate $other_vi for $pivot_vi (R$my_rank)")
-    #             for field_idx in 1:Ferrite.nfields(dh)
-    #                 pivot_vertex = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vi)
-    #                 # If any of the two vertices is not defined on the current field, just skip.
-    #                 if !haskey(dh.vertexdicts[field_idx], pivot_vertex) || !haskey(dh.vertexdicts[field_idx], other_vertex)
-    #                     continue
-    #                 end
-    #                 @debug println("    $other_vi is ghost for $pivot_vi in field $field_idx (R$my_rank)")
-
-    #                 other_vertex_dof = dh.vertexdicts[field_idx][other_vertex]
-
-    #                 append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[other_vertex_dof])
-    #                 append!(ghost_rank_to_send[sender_slot], other_vertex_owner_rank)
-    #                 append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
-    #                 append!(ghost_element_to_send[sender_slot], pivot_cell_idx)
-    #             end
-    #         end
-    #     end
-    # end
-
+    ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
     for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
-        # We start by searching shared vertices which are not owned by us
+        # Start by searching shared vertices which are not owned
         pivot_vertex_owner_rank = Ferrite.compute_owner(dgrid, pivot_sv)
         pivot_cell_idx = pivot_vi[1]
 
@@ -571,17 +506,8 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
                 end
 
                 if is_shared_vertex(dgrid, other_vi)
-                    #@TODO We should be able to remove more redundant communication is many cases.
                     other_sv = dgrid.shared_vertices[other_vi]
                     other_vertex_owner_rank = Ferrite.compute_owner(dgrid, other_sv)
-                    # Also skip if the "other vertex" is already owned by the process owning the pivot vertex
-                    if other_vertex_owner_rank == pivot_vertex_owner_rank
-                        continue;
-                    end
-                    # A vertex is also not a ghost vertex if it touches the domain of the rank of the pivot
-                    if pivot_vertex_owner_rank ∈ keys(other_sv.remote_vertices)
-                        continue
-                    end
                 else
                     other_vertex_owner_rank = my_rank
                 end
@@ -596,8 +522,10 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
                     end
                     @debug println("    $other_vi is ghost for $pivot_vi in field $field_idx (R$my_rank)")
 
+                    pivot_vertex_dof = dh.vertexdicts[field_idx][pivot_vertex]
                     other_vertex_dof = dh.vertexdicts[field_idx][other_vertex]
 
+                    append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof])
                     append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[other_vertex_dof])
                     append!(ghost_rank_to_send[sender_slot], other_vertex_owner_rank)
                     append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
@@ -632,39 +560,25 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
     MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), vertex_comm(dgrid))
 
+    ghost_send_buffer_dofs_piv = vcat(ghost_dof_pivot_to_send...)
+    ghost_recv_buffer_dofs_piv = zeros(Int, sum(ghost_recv_buffer_lengths))
+    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs_piv,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs_piv,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
     # Reconstruct source ranks
     ghost_recv_buffer_source_ranks = Int[]
     for (source_idx, recv_len) ∈ enumerate(ghost_recv_buffer_lengths)
         append!(ghost_recv_buffer_source_ranks, ones(recv_len)*sources[source_idx])
     end
-    
-    println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
-
-    # #TODO obtain ghosts algorithmic
-    # if np == 2
-    #     if my_rank == 1
-    #         append!(ghost_dof_to_global, collect(7:9))
-    #         append!(ghost_dof_rank, [2,2,2])
-    #     else
-    #         # no ghosts
-    #     end
-    # elseif np == 3
-    #     if my_rank == 1
-    #         append!(ghost_dof_to_global, [5,6,7,8,9])
-    #         append!(ghost_dof_rank, [2,2,2,3,3])
-    #     elseif my_rank == 2
-    #         append!(ghost_dof_to_global, [1,3,8,9])
-    #         append!(ghost_dof_rank, [1,1,3,3])
-    #     else
-    #         # no ghosts
-    #     end        
-    # end
+
+    @debug println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
 
     unique_ghosts_dr = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
-    # unzip manually
+    # unzip manually and make sure we do not add duplicate entries to our columns
     for (dof,rank) ∈ unique_ghosts_dr
-        push!(ghost_dof_to_global, dof)
-        push!(ghost_dof_rank, rank)
+        if rank != my_rank && dof ∉ ldof_to_gdof
+            push!(ghost_dof_to_global, dof)
+            push!(ghost_dof_rank, rank)
+        end
     end
 
     # ------------- Construct rows and cols of distributed matrix --------
@@ -742,95 +656,32 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
 
     # --------------------- Add ghost entries in IJ --------------------
     # Fix ghost layer - the locations for remote processes to write their data into
-    unique_ghosts_dre = zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks,ghost_recv_buffer_elements)
+    unique_ghosts_dre = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks)
     @debug println("unique_ghosts_dre $unique_ghosts_dre (R$my_rank)")
-
-    # @TODO this is dead slow. optimize.
-    IJ = []
-    for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
-        # Loop over owned shared vertices
-        if my_rank == Ferrite.compute_owner(dgrid, pivot_sv)
-            @debug println("Fixing ghost layer $pivot_vi (R$my_rank)")
-            pivot_vertex = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vi)
-            # Now compare the vertex against EVERY ghost dof...
-            for (i,(global_ghost_dof,ghost_owner_rank,ghost_cell_idx)) ∈ enumerate(unique_ghosts_dre)
-                source_rank = ghost_recv_buffer_source_ranks[i]
-                pivot_remotes = Ferrite.remote_entities(pivot_sv)
-                local_ghost_dof = findfirst(x->x==global_ghost_dof, all_local_cols)
-                # ...where we have to check that the ghost is on the correct rank ...
-                if haskey(pivot_remotes, source_rank)
-                    @debug println("  $pivot_vi found for rank $source_rank with dof $global_ghost_dof (R$my_rank)")
-                    for (remote_cell_idx,_) ∈ pivot_remotes[source_rank]
-                        # ... and that it is the correct element.
-                        if remote_cell_idx == ghost_cell_idx
-                            @debug println("    $pivot_vi synced against remote cell $remote_cell_idx (R$my_rank)")
-                            for field_idx in 1:Ferrite.nfields(dh)
-                                if !haskey(dh.vertexdicts[field_idx], pivot_vertex)
-                                    continue
-                                end
-                                @debug println("      handling field $field_idx (R$my_rank)")
-                                pivot_dof = dh.vertexdicts[field_idx][pivot_vertex]
-                                push!(IJ, (pivot_dof, local_ghost_dof))
-                            end
-                        end
-                    end
-                end
-            end
-        end
+    IJfix = []
+    for (i,(pivot_dof, global_ghost_dof, ghost_owner_rank)) ∈ enumerate(unique_ghosts_dre)
+        push!(IJfix, (pivot_dof, global_ghost_dof))
     end
-    # Deduplicate entries.
-    unique!(IJ)
-    @debug println("IJ $IJ (R$my_rank)")
-
-    for (i,j) ∈ IJ
-        push!(assembler.I, i)
-        push!(assembler.J, j)
-        push!(assembler.V, 0.0)
-    end
-
-    # Manually check if these are the missing ones.
-    if my_rank == 2
-        push!(assembler.I, 5)
-        push!(assembler.J, 2)
-        push!(assembler.V, 0.0)
+    @debug println("IJfix $IJfix (R$my_rank)")
 
-        push!(assembler.I, 4)
-        push!(assembler.J, 7)
-        push!(assembler.V, 0.0)
+    I = map(i->ldof_to_gdof[i], assembler.I)
+    J = map(j->ldof_to_gdof[j], assembler.J)
+    V = map(v->v, assembler.V)
 
-        push!(assembler.I, 7)
-        push!(assembler.J, 4)
-        push!(assembler.V, 0.0)
+    for (i,j) ∈ IJfix
+        push!(I, i)
+        push!(J, j)
+        push!(V, 0.0)
     end
 
-    # if np == 2
-    #     if my_rank == 1
-    #         append!(assembler.I, [3,3,3, 4,4, 6,6])
-    #         append!(assembler.J, [7,8,9, 7,8, 7,9])
-    #         append!(assembler.V, zeros(7))
-    #     else
-    #         # no ghost layer
-    #     end
-    # elseif np == 3
-    #     if my_rank == 1
-    #         append!(assembler.I, [3,3, 1,1, 4,4,4,4,4])
-    #         append!(assembler.J, [9,6, 5,8, 8,5,7,6,9])
-    #         append!(assembler.V, zeros(9))
-    #     elseif my_rank == 2
-    #         # all_local_cols [5, 4, 6, 7, 1, 3, 8, 9] (R2)
-    #         append!(assembler.I, [1,1, 3,3])
-    #         append!(assembler.J, [5,7, 6,8])
-    #         append!(assembler.V, zeros(4))
-    #     else
-    #         # no ghost layer
-    #     end
-    # end
-    I_ = MPIData(assembler.I, comm, (np,))
-    J_ = MPIData(assembler.J, comm, (np,))
-    V_ = MPIData(assembler.V, comm, (np,))
-    @debug println("I=$(assembler.I) (R$my_rank)")
-    @debug println("J=$(assembler.J) (R$my_rank)")
-    K = PartitionedArrays.PSparseMatrix(I_, J_, V_, rows, cols, ids=:local)
+    @debug println("I=$(I) (R$my_rank)")
+    @debug println("J=$(J) (R$my_rank)")
+    K = PartitionedArrays.PSparseMatrix(
+        MPIData(I, comm, (np,)), 
+        MPIData(J, comm, (np,)), 
+        MPIData(V, comm, (np,)), 
+        rows, cols, ids=:global
+    )
     return K, f
 end
 #md nothing # hide

From f5c2ae67843990290d0b91e32f14e53fea86a4c2 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sat, 1 Oct 2022 17:09:07 +0200
Subject: [PATCH 020/124] Fix RHS assembly.

---
 docs/src/literate/distributed_assembly.jl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 2583c3f452..06b03f8e64 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -649,9 +649,10 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
         @debug println("assembling cell finished local (R$my_rank)")
         Ferrite.assemble!(assembler, celldofs(cell), Ke)
         @debug println("assembling cell finished global (R$my_rank)")
-        #Ferrite.assemble!(f, celldofs(cell), fe)
+        map_parts(local_view(f, f.rows)) do f_local
+            Ferrite.assemble!(f_local, celldofs(cell), fe)
+        end
     end
-
     @debug println("done assembling (R$my_rank)")
 
     # --------------------- Add ghost entries in IJ --------------------
@@ -682,6 +683,10 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
         MPIData(V, comm, (np,)), 
         rows, cols, ids=:global
     )
+
+    PartitionedArrays.assemble!(K)
+    PartitionedArrays.assemble!(f)
+
     return K, f
 end
 #md nothing # hide

From d188b1a7a5eac2fad6f481750cb51c1d91b5b6be Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sat, 1 Oct 2022 18:05:19 +0200
Subject: [PATCH 021/124] Fix Dirichlet constraint application.

---
 docs/src/literate/distributed_assembly.jl | 51 ++++++++++++++---------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 06b03f8e64..24ddf2b058 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -36,7 +36,7 @@
 #md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
 #
 # First we load Ferrite, and some other packages we need
-using Ferrite, SparseArrays, MPI, PartitionedArrays
+using Ferrite, SparseArrays, MPI, PartitionedArrays, IterativeSolvers
 
 macro debug(ex)
     return :($(esc(ex)))
@@ -110,7 +110,7 @@ MPI.Init()
 # We start  generating a simple grid with 20x20 quadrilateral elements
 # using `generate_grid`. The generator defaults to the unit square,
 # so we don't need to specify the corners of the domain.
-grid = generate_grid(Quadrilateral, (2, 3));
+grid = generate_grid(Quadrilateral, (20, 20));
 
 dgrid = DistributedGrid(grid)
 
@@ -369,11 +369,6 @@ dof_owner = compute_dof_ownership(dh, dgrid);
 nltdofs = sum(dof_owner.==(MPI.Comm_rank(global_comm(dgrid))+1))
 ndofs_total = MPI.Allreduce(nltdofs, MPI.SUM, global_comm(dgrid))
 
-# Now that we have distributed all our dofs we can create our tangent matrix,
-# using `create_sparsity_pattern`. This function returns a sparse matrix
-# with the correct elements stored.
-#K = create_sparsity_pattern(dh)
-
 # ### Boundary conditions
 # In Ferrite constraints like Dirichlet boundary conditions
 # are handled by a `ConstraintHandler`.
@@ -382,7 +377,7 @@ ch = ConstraintHandler(dh);
 # Next we need to add constraints to `ch`. For this problem we define
 # homogeneous Dirichlet boundary conditions on the whole boundary, i.e.
 # the `union` of all the face sets on the boundary.
-∂Ω = union(getfaceset.((grid, ), ["left", "right", "top", "bottom"])...);
+∂Ω = union(getfaceset.((getlocalgrid(dgrid), ), ["left", "right", "top", "bottom"])...);
 
 # Now we are set up to define our constraint. We specify which field
 # the condition is for, and our combined face set `∂Ω`. The last
@@ -691,35 +686,53 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
 end
 #md nothing # hide
 
+my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+
 # ### Solution of the system
 # The last step is to solve the system. First we call `doassemble`
 # to obtain the global stiffness matrix `K` and force vector `f`.
 K, f = doassemble(cellvalues, dh, local_to_global, dof_owner, ndofs_total, dgrid);
 
-# Shutdown MPI
-MPI.Finalize()
-
-# Early out for testing.
-exit(0)
-
 # To account for the boundary conditions we use the `apply!` function.
 # This modifies elements in `K` and `f` respectively, such that
-# we can get the correct solution vector `u` by using `\`.
+# we can get the correct solution vector `u` by using a parallel 
+# iterative solver.
+"""
+Poor man's Dirichlet BC application for PartitionedArrays. :)
+"""
+function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+    map_parts(local_view(f, f.rows)) do f_local
+        f_local[ch.prescribed_dofs] .= 0.0
+    end
+
+    map_parts(local_view(K, K.rows, K.cols)) do K_local
+        for cdof in ch.prescribed_dofs
+            K_local[cdof, :] .= 0.0 
+            K_local[:, cdof] .= 0.0 
+            K_local[cdof, cdof] = 1.0 
+        end
+    end
+end
+
 apply!(K, f, ch)
-#u = PartitionedArray...
-cg!(u, K, f);
+u = cg(K, f);
 
 # ### Exporting to VTK
 # To visualize the result we export the grid and our field `u`
 # to a VTK-file, which can be viewed in e.g. [ParaView](https://www.paraview.org/).
-vtk_grid("heat_equation_distributed", dh) do vtk
-    vtk_point_data(vtk, dh, u)
+vtk_grid("heat_equation_distributed-$my_rank", dh) do vtk
+    map_parts(local_view(u, u.rows)) do u_local
+        vtk_point_data(vtk, dh, u_local)
+    end
 end
 
 ## test the result                #src
 using Test                        #src
 @test norm(u) ≈ 3.307743912641305 #src
 
+# Shutdown MPI
+MPI.Finalize()
+
 #md # ## [Plain program](@id distributed-assembly-plain-program)
 #md #
 #md # Here follows a version of the program without any comments.

From 5d2c4f9c43576f4a0e83d65ad684aacf839efe88 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 3 Oct 2022 22:45:21 +0200
Subject: [PATCH 022/124] Add management for non-zero valued Dirichlet boundary
 conditions.

---
 docs/src/literate/distributed_assembly.jl | 72 +++++++++++++++++++++--
 1 file changed, 66 insertions(+), 6 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 24ddf2b058..4ef4558558 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -385,7 +385,7 @@ ch = ConstraintHandler(dh);
 # the current time $t$ and returns the prescribed value. In this case
 # it is trivial -- no matter what $x$ and $t$ we return $0$. When we have
 # specified our constraint we `add!` it to `ch`.
-dbc = Dirichlet(:u, ∂Ω, (x, t) -> 0)
+dbc = Dirichlet(:u, ∂Ω, (x, t) -> 1)
 add!(ch, dbc);
 
 # We also need to `close!` and `update!` our boundary conditions. When we call `close!`
@@ -700,16 +700,76 @@ K, f = doassemble(cellvalues, dh, local_to_global, dof_owner, ndofs_total, dgrid
 """
 Poor man's Dirichlet BC application for PartitionedArrays. :)
 """
-function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
-    map_parts(local_view(f, f.rows)) do f_local
+function apply_zero!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
         f_local[ch.prescribed_dofs] .= 0.0
     end
 
     map_parts(local_view(K, K.rows, K.cols)) do K_local
         for cdof in ch.prescribed_dofs
-            K_local[cdof, :] .= 0.0 
-            K_local[:, cdof] .= 0.0 
-            K_local[cdof, cdof] = 1.0 
+            K_local[cdof, :] .= 0.0
+            K_local[:, cdof] .= 0.0
+            K_local[cdof, cdof] = 1.0
+        end
+    end
+end
+
+function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
+        # Note: RHS only non-zero for owned RHS entries
+        f_local[ch.prescribed_dofs] .= ch.inhomogeneities .* map(p -> p == partition.part, partition.lid_to_part[ch.prescribed_dofs])
+    end
+
+    # Zero out locally visible rows and columns
+    map_parts(local_view(K, K.rows, K.cols)) do K_local
+        for cdof ∈ ch.prescribed_dofs
+            K_local[cdof, :] .= 0.0
+            K_local[:, cdof] .= 0.0
+            K_local[cdof, cdof] = 1.0
+        end
+    end
+
+    # Zero out columns associated to the ghost dofs constrained on a remote process
+    # TODO optimize
+
+    # Step 1: Send out all local ghosts to all other processes...
+    remote_ghost_gdofs, remote_ghost_parts = map_parts(K.cols.partition) do partition
+        remote_ghost_ldofs = partition.hid_to_lid
+        remote_ghost_parts = partition.lid_to_part[remote_ghost_ldofs]
+        remote_ghost_gdofs = partition.lid_to_gid[remote_ghost_ldofs]
+        return (remote_ghost_gdofs, remote_ghost_parts)
+    end
+
+    comm = remote_ghost_parts.comm
+    my_rank = MPI.Comm_rank(comm)+1
+    buffer_sizes_send = zeros(Cint, MPI.Comm_size(comm))
+    buffer_sizes_recv = Vector{Cint}(undef, MPI.Comm_size(comm))
+    for part ∈ remote_ghost_parts.part
+        buffer_sizes_send[part] += 1
+    end
+    MPI.Alltoall!(UBuffer(buffer_sizes_send, 1), UBuffer(buffer_sizes_recv, 1), comm)
+    @debug println("Got $buffer_sizes_recv (R$my_rank)")
+
+    remote_ghosts_recv = Vector{Int}(undef, sum(buffer_sizes_recv))
+    MPI.Alltoallv!(VBuffer(remote_ghost_gdofs.part, buffer_sizes_send), VBuffer(remote_ghosts_recv, buffer_sizes_recv), comm)
+    @debug println("Got $remote_ghosts_recv (R$my_rank)")
+
+    # Step 2: Union with all locally constrained dofs
+    remote_ghosts_constrained_send = copy(remote_ghosts_recv)
+    for (i, remote_ghost_dof) ∈ enumerate(remote_ghosts_recv)
+        remote_ghosts_constrained_send[i] = remote_ghost_dof ∈ K.cols.partition.part.lid_to_gid[ch.prescribed_dofs]
+    end
+
+    # Step 3: Send trash back
+    remote_ghosts_constrained_recv = Vector{Int}(undef, sum(buffer_sizes_send))
+    MPI.Alltoallv!(VBuffer(remote_ghosts_constrained_send, buffer_sizes_recv), VBuffer(remote_ghosts_constrained_recv, buffer_sizes_send), comm)
+
+    @debug println("$my_rank : remote constraints on $(remote_ghost_gdofs.part[remote_ghosts_constrained_recv .== 1])")
+
+    # Step 4: Constrain remaining columns
+    map_parts(local_view(K, K.rows, K.cols), K.cols.partition) do K_local, partition
+        for cdof ∈ partition.hid_to_lid[remote_ghosts_constrained_recv .== 1]
+            K_local[:, cdof] .= 0.0
         end
     end
 end

From ebbe1e74afa5889efe4fadcb4cee16286f164c63 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 3 Oct 2022 22:50:21 +0200
Subject: [PATCH 023/124] Hotfix test. :)

---
 docs/src/literate/distributed_assembly.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 4ef4558558..08f7821b76 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -788,7 +788,7 @@ end
 
 ## test the result                #src
 using Test                        #src
-@test norm(u) ≈ 3.307743912641305 #src
+@test norm(u) ≈ 9.536307974872432 #src
 
 # Shutdown MPI
 MPI.Finalize()

From fe2ac0f3ed3a2c996760cc0b6986cd49fdabcb8d Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 3 Oct 2022 23:55:51 +0200
Subject: [PATCH 024/124] Generalize ghost dof synchronization.

---
 docs/src/literate/distributed_assembly.jl | 78 +++++++++--------------
 src/Dofs/DofHandler.jl                    |  8 +++
 src/Grid/grid.jl                          |  6 ++
 3 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 08f7821b76..ff4a2e8871 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -141,8 +141,9 @@ end
 # to a `CellScalarValues` object.
 dim = 2
 ip = Lagrange{dim, RefCube, 1}()
-qr = QuadratureRule{dim, RefCube}(2)
-cellvalues = CellScalarValues(qr, ip);
+ip_geo = Lagrange{dim, RefCube, 1}()
+qr = QuadratureRule{dim, RefCube}(3)
+cellvalues = CellScalarValues(qr, ip, ip_geo);
 
 # ### Degrees of freedom
 # Next we need to define a `DofHandler`, which will take care of numbering
@@ -151,7 +152,7 @@ cellvalues = CellScalarValues(qr, ip);
 # Lastly we `close!` the `DofHandler`, it is now that the dofs are distributed
 # for all the elements.
 dh = DofHandler(dgrid.local_grid)
-push!(dh, :u, 1)
+push!(dh, :u, 1, ip)
 close!(dh);
 
 # Renumber the dofs in local ordering to their corresponding global numbering.
@@ -473,7 +474,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
 
     # ------------ Ghost dof synchronization ----------   
     # Prepare sending ghost dofs to neighbors
-    #@TODO comminication can be optimized by deduplicating entries in the following arrays
+    #@TODO communication can be optimized by deduplicating entries in, and compressing the following arrays
     #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
     ghost_dof_to_send = [Int[] for i ∈ 1:destination_len] # global dof id
     ghost_rank_to_send = [Int[] for i ∈ 1:destination_len] # rank of dof
@@ -481,50 +482,33 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     ghost_element_to_send = [Int[] for i ∈ 1:destination_len] # corresponding element
     ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
     ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
-    for (pivot_vi, pivot_sv) ∈ dgrid.shared_vertices
-        # Start by searching shared vertices which are not owned
-        pivot_vertex_owner_rank = Ferrite.compute_owner(dgrid, pivot_sv)
-        pivot_cell_idx = pivot_vi[1]
-
-        if my_rank != pivot_vertex_owner_rank
-            sender_slot = destination_index[pivot_vertex_owner_rank]
-
-            @debug println("$pivot_vi may require synchronization (R$my_rank)")
-            # We have to send ALL dofs on the element to the remote.
-            # @TODO send actually ALL dofs (currently only vertex dofs for a first version...)
-            pivot_cell = getcells(dgrid, pivot_cell_idx)
-            for (other_vertex_idx, other_vertex) ∈ enumerate(Ferrite.vertices(pivot_cell))
-                # Skip self
-                other_vi = VertexIndex(pivot_cell_idx, other_vertex_idx)
-                if other_vi == pivot_vi
-                    continue
-                end
-
-                if is_shared_vertex(dgrid, other_vi)
-                    other_sv = dgrid.shared_vertices[other_vi]
-                    other_vertex_owner_rank = Ferrite.compute_owner(dgrid, other_sv)
-                else
-                    other_vertex_owner_rank = my_rank
-                end
-
-                # Now we have to sync all fields separately
-                @debug println("  Ghost candidate $other_vi for $pivot_vi (R$my_rank)")
-                for field_idx in 1:Ferrite.nfields(dh)
-                    pivot_vertex = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vi)
-                    # If any of the two vertices is not defined on the current field, just skip.
-                    if !haskey(dh.vertexdicts[field_idx], pivot_vertex) || !haskey(dh.vertexdicts[field_idx], other_vertex)
-                        continue
+    for shared_entity_set ∈ [dgrid.shared_vertices, dgrid.shared_faces, dgrid.shared_edges]
+        for (pivot_entity, pivot_shared_entity) ∈ shared_entity_set
+            # Start by searching shared entities which are not owned
+            pivot_entity_owner_rank = Ferrite.compute_owner(dgrid, pivot_shared_entity)
+            pivot_cell_idx = pivot_entity[1]
+
+            if my_rank != pivot_entity_owner_rank
+                sender_slot = destination_index[pivot_entity_owner_rank]
+
+                @debug println("$pivot_entity may require synchronization (R$my_rank)")
+                # Note: We have to send ALL dofs on the element to the remote.
+                cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
+                cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
+
+                pivot_entity_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_entity)
+
+                for (field_idx, field_name) in zip(1:Ferrite.nfields(dh), Ferrite.getfieldnames(dh))
+                    pivot_entity_dof = Ferrite.entity_dofs(dh, field_idx, pivot_entity_global)
+                    # Extract dofs belonging to the current field
+                    cell_field_dofs = cell_dofs[Ferrite.dof_range(dh, field_name)]
+                    for cell_field_dof ∈ cell_field_dofs
+                        append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_entity_dof])
+                        append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
+                        append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
+                        append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                        append!(ghost_element_to_send[sender_slot], pivot_cell_idx)
                     end
-                    @debug println("    $other_vi is ghost for $pivot_vi in field $field_idx (R$my_rank)")
-
-                    pivot_vertex_dof = dh.vertexdicts[field_idx][pivot_vertex]
-                    other_vertex_dof = dh.vertexdicts[field_idx][other_vertex]
-
-                    append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof])
-                    append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[other_vertex_dof])
-                    append!(ghost_rank_to_send[sender_slot], other_vertex_owner_rank)
-                    append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
-                    append!(ghost_element_to_send[sender_slot], pivot_cell_idx)
                 end
             end
         end
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index 3133800f9f..a150d1aae3 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -46,6 +46,14 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
     end
 end
 
+has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[fi], vertex)
+has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.vertexdicts[fi], vertex)
+has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.vertexdicts[fi], vertex)
+
+entity_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
+entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
+entity_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
+
 """
     ndofs(dh::AbstractDofHandler)
 
diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 28b0f857e4..9c1da24547 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -409,6 +409,12 @@ faceskeleton(top::ExclusiveTopology, grid::AbstractGrid) =  top.face_skeleton
 toglobal(grid::AbstractGrid,vertexidx::VertexIndex) = vertices(getcells(grid,vertexidx[1]))[vertexidx[2]]
 toglobal(grid::AbstractGrid,vertexidx::Vector{VertexIndex}) = unique(toglobal.((grid,),vertexidx))
 
+toglobal(grid::AbstractGrid,faceidx::FaceIndex) = sortface(faces(getcells(grid,faceidx[1])[faceidx[2]]))
+toglobal(grid::AbstractGrid,faceidx::Vector{FaceIndex}) = unique(toglobal.((grid,),faceidx))
+
+toglobal(grid::AbstractGrid,edgeidx::EdgeIndex) = sortedge(faces(getcells(grid,edgeidx[1])[edgeidx[2]]))
+toglobal(grid::AbstractGrid,edgeidx::Vector{EdgeIndex}) = unique(toglobal.((grid,),edgeidx))
+
 @inline getdim(::AbstractGrid{dim}) where {dim} = dim
 """
     getcells(grid::AbstractGrid)

From 69dd9a401f78c62ed9bc879d00231c0839b5cde4 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 19:58:56 +0200
Subject: [PATCH 025/124] Add some FIXMES and comments for future work.

---
 docs/src/literate/distributed_assembly.jl | 27 +++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index ff4a2e8871..71340e5b7e 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -36,7 +36,7 @@
 #md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
 #
 # First we load Ferrite, and some other packages we need
-using Ferrite, SparseArrays, MPI, PartitionedArrays, IterativeSolvers
+using Ferrite, SparseArrays, MPI, PartitionedArrays, IterativeSolvers, HYPRE
 
 macro debug(ex)
     return :($(esc(ex)))
@@ -459,6 +459,8 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     # We decide for row (i.e. test function) ownership, because it the image of
     # SpMV is process local.
     row_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_rank))
+    #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
+    #row_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], ldof_to_gdof[.!ltdof_indices], Int32.(ldof_to_rank[.!ltdof_indices]))
     row_data = MPIData(row_indices, comm, (np,))
     row_exchanger = Exchanger(row_data,neighbors)
     rows = PRange(ngdofs,row_data,row_exchanger)
@@ -567,6 +569,8 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_g
     @debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
 
     col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, all_local_col_ranks)
+    #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
+    #col_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], all_local_cols[all_local_col_ranks .!= my_rank], Int32.(all_local_col_ranks[all_local_col_ranks .!= my_rank]))
     col_data = MPIData(col_indices, comm, (np,))
     col_exchanger = Exchanger(col_data,neighbors)
     cols = PRange(ngdofs,col_data,col_exchanger)
@@ -759,7 +763,26 @@ function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector
 end
 
 apply!(K, f, ch)
-u = cg(K, f);
+
+# Compute the solution
+# Note: At the moment of writing this we have no good preconditioners for PSparseMatrix in Julia, 
+# partly due to unimplemented multiplication operators for the matrix data type.
+u = cg(K, f)
+
+# Compute the solution with HYPRE (needs the hotfix in https://github.com/fredrikekre/HYPRE.jl/pull/4 to function)
+# u_ = HYPRE.solve(
+#     HYPRE.PCG(
+#         global_comm(dgrid);
+#         Precond = HYPRE.BoomerAMG()
+#     ),
+#     HYPRE.HYPREMatrix(K),
+#     HYPRE.HYPREVector(f)
+# )
+
+# Convert back to PartitionedArrays vector
+# u = PVector(0.0, K.cols)
+# copy!(u, u_)
+# PartitionedArrays.assemble!(u)
 
 # ### Exporting to VTK
 # To visualize the result we export the grid and our field `u`

From c7d8b678d438a823e681d3770fee54d87f5d33df Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 20:08:54 +0200
Subject: [PATCH 026/124] Move grid generator into its own file.

---
 docs/src/literate/distributed_assembly.jl | 4 +---
 src/Grid/grid_generators.jl               | 8 ++++++++
 src/exports.jl                            | 1 +
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 71340e5b7e..3fc7459fbc 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -110,9 +110,7 @@ MPI.Init()
 # We start  generating a simple grid with 20x20 quadrilateral elements
 # using `generate_grid`. The generator defaults to the unit square,
 # so we don't need to specify the corners of the domain.
-grid = generate_grid(Quadrilateral, (20, 20));
-
-dgrid = DistributedGrid(grid)
+dgrid = generate_distributed_grid(Quadrilateral, (20, 20));
 
 # TODO refactor this into a utility function
 @debug vtk_grid("grid", dgrid; compress=false) do vtk
diff --git a/src/Grid/grid_generators.jl b/src/Grid/grid_generators.jl
index 7b2b0e424a..aef0b1a4ab 100644
--- a/src/Grid/grid_generators.jl
+++ b/src/Grid/grid_generators.jl
@@ -454,3 +454,11 @@ function generate_grid(::Type{Tetrahedron}, cells_per_dim::NTuple{3,Int}, left::
 
     return Grid(cells, nodes, facesets=facesets, boundary_matrix=boundary_matrix)
 end
+
+"""
+Helper to generate distributed grids.
+It is designed to replace the call to [`generate_grid`](@ref) for use in distributed environments.
+"""
+function generate_distributed_grid(args...)
+    return DistributedGrid(generate_grid(args...))
+end
diff --git a/src/exports.jl b/src/exports.jl
index 3159633d1a..716ab532e8 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -93,6 +93,7 @@ export
     addcellset!,
     transform!,
     generate_grid,
+    generate_distributed_grid,
     compute_vertex_values,
     is_shared_vertex,
 

From aaf9a307936cfb11595f1d1ebd84944843d3e16c Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 20:38:37 +0200
Subject: [PATCH 027/124] Move dof handler code from docs to Ferrite.

---
 docs/src/literate/distributed_assembly.jl | 224 +-----------
 src/Dofs/DistributedDofHandler.jl         | 400 +++++++++++-----------
 src/Dofs/DofHandler.jl                    |  26 +-
 src/exports.jl                            |   4 +
 4 files changed, 229 insertions(+), 425 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 3fc7459fbc..02bf83bea2 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -149,225 +149,10 @@ cellvalues = CellScalarValues(qr, ip, ip_geo);
 # We create the `DofHandler` and then add a single field called `u`.
 # Lastly we `close!` the `DofHandler`, it is now that the dofs are distributed
 # for all the elements.
-dh = DofHandler(dgrid.local_grid)
+dh = DistributedDofHandler(dgrid)
 push!(dh, :u, 1, ip)
 close!(dh);
 
-# Renumber the dofs in local ordering to their corresponding global numbering.
-# TODO: Refactor for MixedDofHandler integration
-function local_to_global_numbering(dh::DofHandler, dgrid)
-    # MPI rank starting with 1 to match Julia's index convention
-    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
-
-    local_to_global = Vector{Int}(undef,ndofs(dh))
-    fill!(local_to_global,0) # 0 is the invalid index!
-    # Start by numbering local dofs only from 1:#local_dofs
-
-    # Lookup for synchronization in the form (Remote Rank,Shared Entity)
-    # @TODO replace dict with vector and tie to MPI neighborhood graph of the mesh
-    vertices_send = Dict{Int,Vector{VertexIndex}}()
-    n_vertices_recv = Dict{Int,Int}()
-
-    # We start by assigning a local dof to all owned entities.
-    # An entity is owned if:
-    # 1. *All* topological neighbors are on the local process
-    # 2. If the rank of the local process it lower than the rank of *all* topological neighbors
-    # A topological neighbor in this context is hereby defined per entity:
-    # * vertex: All elements whose vertex is the vertex in question
-    # * cell: Just the cell itself
-    # * All other entities: All cells for which one of the corresponding entities interior intersects 
-    #                       with the interior of the entity in question.
-    # TODO: implement for entitied with dim > 0
-    next_local_idx = 1
-    for (ci, cell) in enumerate(getcells(dh.grid))
-        @debug println("cell #$ci (R$my_rank)")
-        for fi in 1:Ferrite.nfields(dh)
-            @debug println("  field: $(dh.field_names[fi]) (R$my_rank)")
-            interpolation_info = Ferrite.InterpolationInfo(dh.field_interpolations[fi])
-            if interpolation_info.nvertexdofs > 0
-                for (vi,vertex) in enumerate(Ferrite.vertices(cell))
-                    @debug println("    vertex#$vertex (R$my_rank)")
-                    # Dof is owned if it is local or if my rank is the smallest in the neighborhood
-                    if !haskey(dgrid.shared_vertices,VertexIndex(ci,vi)) || all(keys(dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices) .> my_rank)
-                        # Update dof assignment
-                        dof_local_idx = dh.vertexdicts[fi][vertex]
-                        if local_to_global[dof_local_idx] == 0
-                            @debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
-                            local_to_global[dof_local_idx] = next_local_idx
-                            next_local_idx += 1
-                        else
-                            @debug println("      vertex dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
-                        end
-                    end
-
-                    # Update shared vertex lookup table
-                    if haskey(dgrid.shared_vertices,VertexIndex(ci,vi))
-                        master_rank = my_rank
-                        for master_rank_new ∈ keys(dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices)
-                            master_rank = min(master_rank, master_rank_new)
-                        end
-                        for (remote_rank, svs) ∈ dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices
-                            if master_rank == my_rank # I own the dof - we have to send information
-                                if !haskey(vertices_send,remote_rank)
-                                    vertices_send[remote_rank] = Vector{Ferrite.VertexIndex}()
-                                end
-                                @debug println("      prepare sending vertex #$(VertexIndex(ci,vi)) to $remote_rank (R$my_rank)")
-                                for i ∈ svs
-                                    push!(vertices_send[remote_rank],VertexIndex(ci,vi))
-                                end
-                            elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
-                                if !haskey(n_vertices_recv,remote_rank)
-                                    n_vertices_recv[remote_rank] = length(svs)
-                                else
-                                    n_vertices_recv[remote_rank] += length(svs)
-                                end
-                                @debug println("      prepare receiving vertex #$(VertexIndex(ci,vi)) from $remote_rank (R$my_rank)")
-                            end
-                        end
-                    end
-                end
-            end
-        end
-    end
-
-    #
-    num_true_local_dofs = next_local_idx-1
-    @debug println("#true local dofs $num_true_local_dofs (R$my_rank)")
-
-    # @TODO optimize the following synchronization with MPI line graph topology 
-    # and allgather
-    # Set true local indices
-    local_offset = 0
-    if my_rank > 1
-        local_offset = MPI.Recv(Int, global_comm(dgrid); source=my_rank-1-1)
-    end
-    if my_rank < MPI.Comm_size(global_comm(dgrid))
-        MPI.Send(local_offset+num_true_local_dofs, global_comm(dgrid); dest=my_rank+1-1)
-    end
-    @debug println("#shifted local dof range $(local_offset+1):$(local_offset+num_true_local_dofs) (R$my_rank)")
-
-    # Shift assigned local dofs (dofs with value >0) into the global range
-    # At this point in the algorithm the dofs with value 0 are the dofs owned of neighboring processes
-    for i ∈ 1:length(local_to_global)
-        if local_to_global[i] != 0
-            local_to_global[i] += local_offset
-        end
-    end
-
-    # Sync non-owned dofs with neighboring processes.
-    # TODO: implement for entitied with dim > 0
-    # TODO: Use MPI graph primitives to simplify this code
-    for sending_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
-        if my_rank == sending_rank
-            for remote_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
-                if haskey(vertices_send, remote_rank)
-                    n_vertices = length(vertices_send[remote_rank])
-                    @debug println("Sending $n_vertices vertices to rank $remote_rank (R$my_rank)")
-                    remote_cells = Array{Int64}(undef,n_vertices)
-                    remote_cell_vis = Array{Int64}(undef,n_vertices)
-                    next_buffer_idx = 1
-                    for lvi ∈ vertices_send[remote_rank]
-                        sv = dgrid.shared_vertices[lvi]
-                        @assert haskey(sv.remote_vertices, remote_rank)
-                        for (cvi, llvi) ∈ sv.remote_vertices[remote_rank][1:1] # Just don't ask :)
-                            remote_cells[next_buffer_idx] = cvi
-                            remote_cell_vis[next_buffer_idx] = llvi 
-                            next_buffer_idx += 1
-                        end
-                    end
-                    MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
-                    MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
-                    for fi ∈ 1:Ferrite.nfields(dh)
-                        next_buffer_idx = 1
-                        if length(dh.vertexdicts[fi]) == 0
-                            @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
-                            continue
-                        end
-                        # fill correspondence array
-                        corresponding_global_dofs = Array{Int64}(undef,n_vertices)
-                        for (lci,lclvi) ∈ vertices_send[remote_rank]
-                            vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
-                            if haskey(dh.vertexdicts[fi], vi)
-                                corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.vertexdicts[fi][vi]]
-                            end
-                            next_buffer_idx += 1
-                        end
-                        MPI.Send(corresponding_global_dofs, global_comm(dgrid); dest=remote_rank-1)
-                    end
-                end
-            end
-        else
-            if haskey(n_vertices_recv, sending_rank)
-                n_vertices = n_vertices_recv[sending_rank]
-                @debug println("Receiving $n_vertices vertices from rank $sending_rank (R$my_rank)")
-                local_cells = Array{Int64}(undef,n_vertices)
-                local_cell_vis = Array{Int64}(undef,n_vertices)
-                MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
-                MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
-                for fi in 1:Ferrite.nfields(dh)
-                    if length(dh.vertexdicts[fi]) == 0
-                        @debug println("  Skipping recv on field $(dh.field_names[fi]) (R$my_rank)")
-                        continue
-                    end
-                    corresponding_global_dofs = Array{Int64}(undef,n_vertices)
-                    MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
-                    for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
-                        vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
-                        if haskey(dh.vertexdicts[fi], vi)
-                            local_to_global[dh.vertexdicts[fi][vi]] = corresponding_global_dofs[cdi]
-                            @debug println("  Updating field $(dh.field_names[fi]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
-                        else
-                            @debug println("  Skipping recv on field $(dh.field_names[fi]) vertex $vi (R$my_rank)")
-                        end
-                    end
-                end
-            end
-        end
-    end
-
-    # Postcondition: All local dofs need a corresponding global dof!
-    @assert findfirst(local_to_global .== 0) === nothing
-
-    @debug vtk_grid("dofs", dgrid; compress=false) do vtk
-        u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
-        fill!(u, 0.0)
-        for i=1:length(u)
-            u[i] = local_to_global[dh.vertexdicts[1][i]]
-        end
-        vtk_point_data(vtk, u,"dof")
-    end
-
-    return local_to_global
-end
-local_to_global = local_to_global_numbering(dh, dgrid);
-
-function compute_dof_ownership(dh, dgrid)
-    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
-
-    dof_owner = Vector{Int}(undef,ndofs(dh))
-    fill!(dof_owner, my_rank)
-
-    for ((lci, lclvi),sv) ∈ dgrid.shared_vertices
-        owner_rank = minimum([collect(keys(sv.remote_vertices));my_rank])
-
-        if owner_rank != my_rank
-            for fi in 1:Ferrite.nfields(dh)
-                vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
-                if haskey(dh.vertexdicts[fi], vi)
-                    local_dof_idx = dh.vertexdicts[fi][vi]
-                    dof_owner[local_dof_idx] = owner_rank
-                end
-            end
-        end
-    end
-
-    return dof_owner
-end
-dof_owner = compute_dof_ownership(dh, dgrid);
-
-nltdofs = sum(dof_owner.==(MPI.Comm_rank(global_comm(dgrid))+1))
-ndofs_total = MPI.Allreduce(nltdofs, MPI.SUM, global_comm(dgrid))
-
 # ### Boundary conditions
 # In Ferrite constraints like Dirichlet boundary conditions
 # are handled by a `ConstraintHandler`.
@@ -399,7 +184,12 @@ update!(ch, 0.0);
 # We define a function, `doassemble` to do the assembly, which takes our `cellvalues`,
 # the sparse matrix and our DofHandler as input arguments. The function returns the
 # assembled stiffness matrix, and the force vector.
-function doassemble(cellvalues::CellScalarValues{dim}, dh::DofHandler, ldof_to_gdof, ldof_to_rank, ngdofs, dgrid) where {dim}
+function doassemble(cellvalues::CellScalarValues{dim}, dh::DistributedDofHandler) where {dim}
+    ldof_to_gdof = dh.ldof_to_gdof
+    ldof_to_rank = dh.ldof_to_rank
+    ngdofs = num_global_dofs(dh)
+    dgrid = getglobalgrid(dh)
+
     # We allocate the element stiffness matrix and element force vector
     # just once before looping over all the cells instead of allocating
     # them every time in the loop.
diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index f32e84200d..34bc21fb91 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -25,6 +25,9 @@ struct DistributedDofHandler{dim,T,G<:AbstractDistributedGrid{dim}} <: AbstractD
     vertexdicts::Array{Dict{Int,Int}}
     edgedicts::Array{Tuple{Int,Int},Tuple{Int,Bool}}
     facedicts::Array{Tuple{Int,Int},Int}
+
+    ldof_to_gdof::Vector{Int}
+    ldof_to_rank::Vector{Int}
 end
 
 function DistributedDofHandler(grid::AbstractDistributedGrid)
@@ -46,235 +49,242 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
     end
 end
 
-# Calculate the offset to the first local dof of a field
-function field_offset(dh::DofHandler, field_name::Symbol)
-    offset = 0
-    for i in 1:find_field(dh, field_name)-1
-        offset += getnbasefunctions(dh.field_interpolations[i])::Int * dh.field_dims[i]
-    end
-    return offset
-end
-
-function getfielddim(dh::DofHandler, name::Symbol)
-    field_pos = findfirst(i->i == name, getfieldnames(dh))
-    field_pos === nothing && error("did not find field $name")
-    return dh.field_dims[field_pos]
-end
+getlocalgrid(dh::DistributedDofHandler) = getlocalgrid(dh.grid)
+getglobalgrid(dh::DistributedDofHandler) = dh.grid
 
-"""
-    dof_range(dh:DofHandler, field_name)
+# Compat layer against serial code
+getgrid(dh::DistributedDofHandler) = getlocalgrid(dh)
 
-Return the local dof range for `field_name`. Example:
+function compute_dof_ownership(dh, dgrid)
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
-```jldoctest
-julia> grid = generate_grid(Triangle, (3, 3))
-Grid{2, Triangle, Float64} with 18 Triangle cells and 16 nodes
+    dof_owner = Vector{Int}(undef,ndofs(dh))
+    fill!(dof_owner, my_rank)
 
-julia> dh = DofHandler(grid); push!(dh, :u, 3); push!(dh, :p, 1); close!(dh);
+    for ((lci, lclvi),sv) ∈ dgrid.shared_vertices
+        owner_rank = minimum([collect(keys(sv.remote_vertices));my_rank])
 
-julia> dof_range(dh, :u)
-1:9
+        if owner_rank != my_rank
+            for fi in 1:Ferrite.nfields(dh)
+                vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
+                if haskey(dh.vertexdicts[fi], vi)
+                    local_dof_idx = dh.vertexdicts[fi][vi]
+                    dof_owner[local_dof_idx] = owner_rank
+                end
+            end
+        end
+    end
 
-julia> dof_range(dh, :p)
-10:12
-```
-"""
-function dof_range(dh::DofHandler, field_name::Symbol)
-    f = find_field(dh, field_name)
-    offset = field_offset(dh, field_name)
-    n_field_dofs = getnbasefunctions(dh.field_interpolations[f])::Int * dh.field_dims[f]
-    return (offset+1):(offset+n_field_dofs)
+    return dof_owner
 end
 
+"""
+Compute the number of dofs owned by the current process.
+"""
+num_local_true_dofs(dh::DistributedDofHandler) = sum(dof_owner.==(MPI.Comm_rank(global_comm(dgrid))+1))
 
-function close!(dh::DistributedDofHandler{dim}) where {dim}
-    @assert !isclosed(dh)
-
-    # `vertexdict` keeps track of the visited vertices. We store the global vertex
-    # number and the first dof we added to that vertex.
-    vertexdicts = [Dict{Int,Int}() for _ in 1:nfields(dh)]
-
-    # `edgedict` keeps track of the visited edges, this will only be used for a 3D problem
-    # An edge is determined from two vertices, but we also need to store the direction
-    # of the first edge we encounter and add dofs too. When we encounter the same edge
-    # the next time we check if the direction is the same, otherwise we reuse the dofs
-    # in the reverse order
-    edgedicts = [Dict{Tuple{Int,Int},Tuple{Int,Bool}}() for _ in 1:nfields(dh)]
+"""
+Compute the number of dofs visible to the current process.
+"""
+num_local_dofs(dh::DistributedDofHandler) = length(dh.ldof_to_gdof)
 
-    # `facedict` keeps track of the visited faces. We only need to store the first dof we
-    # added to the face; if we encounter the same face again we *always* reverse the order
-    # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
-    # face (i.e. a surface) is uniquely determined by 3 vertices.
-    facedicts = [Dict{NTuple{dim,Int},Int}() for _ in 1:nfields(dh)]
+"""
+Compute the number of dofs in the global system.
+"""
+num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(nltdofs(dh), MPI.SUM, global_comm(dgrid))
 
-    # celldofs are never shared between different cells so there is no need
-    # for a `celldict` to keep track of which cells we have added dofs too.
+"""
+Renumber the dofs in local ordering to their corresponding global numbering.
 
-    # We create the `InterpolationInfo` structs with precomputed information for each
-    # interpolation since that allows having the cell loop as the outermost loop,
-    # and the interpolation loop inside without using a function barrier
-    interpolation_infos = InterpolationInfo[]
-    for interpolation in dh.field_interpolations
-        # push!(dh.interpolation_info, InterpolationInfo(interpolation))
-        push!(interpolation_infos, InterpolationInfo(interpolation))
-    end
+TODO: Refactor for MixedDofHandler integration
+"""
+function local_to_global_numbering(dh::DistributedDofHandler, dgrid::AbstractDistributedGrid)
+    # MPI rank starting with 1 to match Julia's index convention
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
-    # not implemented yet: more than one facedof per face in 3D
-    dim == 3 && @assert(!any(x->x.nfacedofs > 1, interpolation_infos))
+    local_to_global = Vector{Int}(undef,ndofs(dh))
+    fill!(local_to_global,0) # 0 is the invalid index!
+    # Start by numbering local dofs only from 1:#local_dofs
 
-    nextdof = 1 # next free dof to distribute
-    push!(dh.cell_dofs_offset, 1) # dofs for the first cell start at 1
+    # Lookup for synchronization in the form (Remote Rank,Shared Entity)
+    # @TODO replace dict with vector and tie to MPI neighborhood graph of the mesh
+    vertices_send = Dict{Int,Vector{VertexIndex}}()
+    n_vertices_recv = Dict{Int,Int}()
 
-    # loop over all the cells, and distribute dofs for all the fields
-    for (ci, cell) in enumerate(getcells(dh.grid))
-        @debug println("cell #$ci")
-        for fi in 1:nfields(dh)
-            interpolation_info = interpolation_infos[fi]
-            @debug println("  field: $(dh.field_names[fi])")
+    # We start by assigning a local dof to all owned entities.
+    # An entity is owned if:
+    # 1. *All* topological neighbors are on the local process
+    # 2. If the rank of the local process it lower than the rank of *all* topological neighbors
+    # A topological neighbor in this context is hereby defined per entity:
+    # * vertex: All elements whose vertex is the vertex in question
+    # * cell: Just the cell itself
+    # * All other entities: All cells for which one of the corresponding entities interior intersects 
+    #                       with the interior of the entity in question.
+    # TODO: implement for entitied with dim > 0
+    next_local_idx = 1
+    for (ci, cell) in enumerate(getcells(getgrid(dh)))
+        @debug println("cell #$ci (R$my_rank)")
+        for fi in 1:Ferrite.nfields(dh)
+            @debug println("  field: $(dh.field_names[fi]) (R$my_rank)")
+            interpolation_info = Ferrite.InterpolationInfo(dh.field_interpolations[fi])
             if interpolation_info.nvertexdofs > 0
-                for vertex in vertices(cell)
-                    @debug println("    vertex#$vertex")
-                    token = Base.ht_keyindex2!(vertexdicts[fi], vertex)
-                    if token > 0 # haskey(vertexdicts[fi], vertex) # reuse dofs
-                        reuse_dof = vertexdicts[fi].vals[token] # vertexdicts[fi][vertex]
-                        for d in 1:dh.field_dims[fi]
-                            @debug println("      reusing dof #$(reuse_dof + (d-1))")
-                            push!(dh.cell_dofs, reuse_dof + (d-1))
-                        end
-                    else # token <= 0, distribute new dofs
-                        for vertexdof in 1:interpolation_info.nvertexdofs
-                            Base._setindex!(vertexdicts[fi], nextdof, vertex, -token) # vertexdicts[fi][vertex] = nextdof
-                            for d in 1:dh.field_dims[fi]
-                                @debug println("      adding dof#$nextdof")
-                                push!(dh.cell_dofs, nextdof)
-                                nextdof += 1
-                            end
+                for (vi,vertex) in enumerate(Ferrite.vertices(cell))
+                    @debug println("    vertex#$vertex (R$my_rank)")
+                    # Dof is owned if it is local or if my rank is the smallest in the neighborhood
+                    if !haskey(dgrid.shared_vertices,VertexIndex(ci,vi)) || all(keys(dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices) .> my_rank)
+                        # Update dof assignment
+                        dof_local_idx = dh.vertexdicts[fi][vertex]
+                        if local_to_global[dof_local_idx] == 0
+                            @debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
+                            local_to_global[dof_local_idx] = next_local_idx
+                            next_local_idx += 1
+                        else
+                            @debug println("      vertex dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
                         end
                     end
-                end # vertex loop
-            end
-            if dim == 3 # edges only in 3D
-                if interpolation_info.nedgedofs > 0
-                    for edge in edges(cell)
-                        sedge, dir = sortedge(edge)
-                        @debug println("    edge#$sedge dir: $(dir)")
-                        token = Base.ht_keyindex2!(edgedicts[fi], sedge)
-                        if token > 0 # haskey(edgedicts[fi], sedge), reuse dofs
-                            startdof, olddir = edgedicts[fi].vals[token] # edgedicts[fi][sedge] # first dof for this edge (if dir == true)
-                            for edgedof in (dir == olddir ? (1:interpolation_info.nedgedofs) : (interpolation_info.nedgedofs:-1:1))
-                                for d in 1:dh.field_dims[fi]
-                                    reuse_dof = startdof + (d-1) + (edgedof-1)*dh.field_dims[fi]
-                                    @debug println("      reusing dof#$(reuse_dof)")
-                                    push!(dh.cell_dofs, reuse_dof)
+
+                    # Update shared vertex lookup table
+                    if haskey(dgrid.shared_vertices,VertexIndex(ci,vi))
+                        master_rank = my_rank
+                        for master_rank_new ∈ keys(dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices)
+                            master_rank = min(master_rank, master_rank_new)
+                        end
+                        for (remote_rank, svs) ∈ dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices
+                            if master_rank == my_rank # I own the dof - we have to send information
+                                if !haskey(vertices_send,remote_rank)
+                                    vertices_send[remote_rank] = Vector{Ferrite.VertexIndex}()
                                 end
-                            end
-                        else # token <= 0, distribute new dofs
-                            Base._setindex!(edgedicts[fi], (nextdof, dir), sedge, -token) # edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
-                            for edgedof in 1:interpolation_info.nedgedofs
-                                for d in 1:dh.field_dims[fi]
-                                    @debug println("      adding dof#$nextdof")
-                                    push!(dh.cell_dofs, nextdof)
-                                    nextdof += 1
+                                @debug println("      prepare sending vertex #$(VertexIndex(ci,vi)) to $remote_rank (R$my_rank)")
+                                for i ∈ svs
+                                    push!(vertices_send[remote_rank],VertexIndex(ci,vi))
+                                end
+                            elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
+                                if !haskey(n_vertices_recv,remote_rank)
+                                    n_vertices_recv[remote_rank] = length(svs)
+                                else
+                                    n_vertices_recv[remote_rank] += length(svs)
                                 end
+                                @debug println("      prepare receiving vertex #$(VertexIndex(ci,vi)) from $remote_rank (R$my_rank)")
                             end
                         end
-                    end # edge loop
+                    end
                 end
             end
-            if interpolation_info.nfacedofs > 0 && (interpolation_info.dim == dim)
-                for face in faces(cell)
-                    sface = sortface(face) # TODO: faces(cell) may as well just return the sorted list
-                    @debug println("    face#$sface")
-                    token = Base.ht_keyindex2!(facedicts[fi], sface)
-                    if token > 0 # haskey(facedicts[fi], sface), reuse dofs
-                        startdof = facedicts[fi].vals[token] # facedicts[fi][sface]
-                        for facedof in interpolation_info.nfacedofs:-1:1 # always reverse (YOLO)
-                            for d in 1:dh.field_dims[fi]
-                                reuse_dof = startdof + (d-1) + (facedof-1)*dh.field_dims[fi]
-                                @debug println("      reusing dof#$(reuse_dof)")
-                                push!(dh.cell_dofs, reuse_dof)
-                            end
+        end
+    end
+
+    #
+    num_true_local_dofs = next_local_idx-1
+    @debug println("#true local dofs $num_true_local_dofs (R$my_rank)")
+
+    # @TODO optimize the following synchronization with MPI line graph topology 
+    # and allgather
+    # Set true local indices
+    local_offset = 0
+    if my_rank > 1
+        local_offset = MPI.Recv(Int, global_comm(dgrid); source=my_rank-1-1)
+    end
+    if my_rank < MPI.Comm_size(global_comm(dgrid))
+        MPI.Send(local_offset+num_true_local_dofs, global_comm(dgrid); dest=my_rank+1-1)
+    end
+    @debug println("#shifted local dof range $(local_offset+1):$(local_offset+num_true_local_dofs) (R$my_rank)")
+
+    # Shift assigned local dofs (dofs with value >0) into the global range
+    # At this point in the algorithm the dofs with value 0 are the dofs owned of neighboring processes
+    for i ∈ 1:length(local_to_global)
+        if local_to_global[i] != 0
+            local_to_global[i] += local_offset
+        end
+    end
+
+    # Sync non-owned dofs with neighboring processes.
+    # TODO: implement for entitied with dim > 0
+    # TODO: Use MPI graph primitives to simplify this code
+    for sending_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
+        if my_rank == sending_rank
+            for remote_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
+                if haskey(vertices_send, remote_rank)
+                    n_vertices = length(vertices_send[remote_rank])
+                    @debug println("Sending $n_vertices vertices to rank $remote_rank (R$my_rank)")
+                    remote_cells = Array{Int64}(undef,n_vertices)
+                    remote_cell_vis = Array{Int64}(undef,n_vertices)
+                    next_buffer_idx = 1
+                    for lvi ∈ vertices_send[remote_rank]
+                        sv = dgrid.shared_vertices[lvi]
+                        @assert haskey(sv.remote_vertices, remote_rank)
+                        for (cvi, llvi) ∈ sv.remote_vertices[remote_rank][1:1] # Just don't ask :)
+                            remote_cells[next_buffer_idx] = cvi
+                            remote_cell_vis[next_buffer_idx] = llvi 
+                            next_buffer_idx += 1
                         end
-                    else # distribute new dofs
-                        Base._setindex!(facedicts[fi], nextdof, sface, -token)# facedicts[fi][sface] = nextdof,  store the first dof for this face
-                        for facedof in 1:interpolation_info.nfacedofs
-                            for d in 1:dh.field_dims[fi]
-                                @debug println("      adding dof#$nextdof")
-                                push!(dh.cell_dofs, nextdof)
-                                nextdof += 1
+                    end
+                    MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
+                    MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
+                    for fi ∈ 1:Ferrite.nfields(dh)
+                        next_buffer_idx = 1
+                        if length(dh.vertexdicts[fi]) == 0
+                            @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                            continue
+                        end
+                        # fill correspondence array
+                        corresponding_global_dofs = Array{Int64}(undef,n_vertices)
+                        for (lci,lclvi) ∈ vertices_send[remote_rank]
+                            vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
+                            if haskey(dh.vertexdicts[fi], vi)
+                                corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.vertexdicts[fi][vi]]
                             end
+                            next_buffer_idx += 1
                         end
+                        MPI.Send(corresponding_global_dofs, global_comm(dgrid); dest=remote_rank-1)
                     end
-                end # face loop
+                end
             end
-            if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
-                @debug println("    cell#$ci")
-                for celldof in 1:interpolation_info.ncelldofs
-                    for d in 1:dh.field_dims[fi]
-                        @debug println("      adding dof#$nextdof")
-                        push!(dh.cell_dofs, nextdof)
-                        nextdof += 1
+        else
+            if haskey(n_vertices_recv, sending_rank)
+                n_vertices = n_vertices_recv[sending_rank]
+                @debug println("Receiving $n_vertices vertices from rank $sending_rank (R$my_rank)")
+                local_cells = Array{Int64}(undef,n_vertices)
+                local_cell_vis = Array{Int64}(undef,n_vertices)
+                MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
+                MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
+                for fi in 1:Ferrite.nfields(dh)
+                    if length(dh.vertexdicts[fi]) == 0
+                        @debug println("  Skipping recv on field $(dh.field_names[fi]) (R$my_rank)")
+                        continue
                     end
-                end # cell loop
-            end
-        end # field loop
-        # push! the first index of the next cell to the offset vector
-        push!(dh.cell_dofs_offset, length(dh.cell_dofs)+1)
-    end # cell loop
-    dh.ndofs[] = maximum(dh.cell_dofs)
-    dh.closed[] = true
-
-    return dh, vertexdicts, edgedicts, facedicts
-
-end
-
-function _create_sparsity_pattern(dh::DistributedDofHandler, ch#=::Union{ConstraintHandler, Nothing}=#, sym::Bool)
-    ncells = getncells(dh.grid)
-    n = ndofs_per_cell(dh)
-    N = sym ? div(n*(n+1), 2) * ncells : n^2 * ncells
-    N += ndofs(dh) # always add the diagonal elements
-    I = Int[]; resize!(I, N)
-    J = Int[]; resize!(J, N)
-    global_dofs = zeros(Int, n)
-    cnt = 0
-    for element_id in 1:ncells
-        celldofs!(global_dofs, dh, element_id)
-        @inbounds for j in 1:n, i in 1:n
-            dofi = global_dofs[i]
-            dofj = global_dofs[j]
-            sym && (dofi > dofj && continue)
-            cnt += 1
-            if cnt > length(J)
-                resize!(I, trunc(Int, length(I) * 1.5))
-                resize!(J, trunc(Int, length(J) * 1.5))
+                    corresponding_global_dofs = Array{Int64}(undef,n_vertices)
+                    MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
+                    for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
+                        vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
+                        if haskey(dh.vertexdicts[fi], vi)
+                            local_to_global[dh.vertexdicts[fi][vi]] = corresponding_global_dofs[cdi]
+                            @debug println("  Updating field $(dh.field_names[fi]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                        else
+                            @debug println("  Skipping recv on field $(dh.field_names[fi]) vertex $vi (R$my_rank)")
+                        end
+                    end
+                end
             end
-            I[cnt] = dofi
-            J[cnt] = dofj
-
-        end
-    end
-    @inbounds for d in 1:ndofs(dh)
-        cnt += 1
-        if cnt > length(J)
-            resize!(I, trunc(Int, length(I) + ndofs(dh)))
-            resize!(J, trunc(Int, length(J) + ndofs(dh)))
         end
-        I[cnt] = d
-        J[cnt] = d
     end
 
-    resize!(I, cnt)
-    resize!(J, cnt)
-    V = zeros(length(I))
-    K = sparse(I, J, V)
+    # Postcondition: All local dofs need a corresponding global dof!
+    @assert findfirst(local_to_global .== 0) === nothing
 
-    # Add entries to K corresponding to condensation due the linear constraints
-    # Note, this requires the K matrix, which is why we can't push!() to the I,J,V
-    # triplet directly.
-    if ch !== nothing
-        @assert isclosed(ch)
-        _condense_sparsity_pattern!(K, ch.acs)
+    @debug vtk_grid("dofs", dgrid; compress=false) do vtk
+        u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
+        fill!(u, 0.0)
+        for i=1:length(u)
+            u[i] = local_to_global[dh.vertexdicts[1][i]]
+        end
+        vtk_point_data(vtk, u,"dof")
     end
 
-    return K
-end
\ No newline at end of file
+    return local_to_global
+end
+
+function close!(dh::DistributedDofHandler)
+    __close!(dh)
+    dh.ldof_to_gdof = local_to_global_numbering(dh, getglobalgrid(dh));
+    dh.ldof_to_rank = compute_dof_ownership(dh, dgrid);
+end
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index a150d1aae3..e99b27f0a4 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -32,7 +32,7 @@ function DofHandler(grid::AbstractGrid{dim}) where {dim}
     DofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[])
 end
 
-function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
+function Base.show(io::IO, ::MIME"text/plain", dh::Union{DofHandler, DistributedDofHandler})
     println(io, "DofHandler")
     println(io, "  Fields:")
     for i in 1:nfields(dh)
@@ -124,13 +124,13 @@ The field is added to all cells of the underlying grid. In case no interpolation
 the default interpolation of the grid's celltype is used. 
 If the grid uses several celltypes, [`push!(dh::MixedDofHandler, fh::FieldHandler)`](@ref) must be used instead.
 """
-function Base.push!(dh::AbstractDofHandler, name::Symbol, dim::Int, ip::Interpolation=default_interpolation(getcelltype(dh.grid)))
+function Base.push!(dh::AbstractDofHandler, name::Symbol, dim::Int, ip::Interpolation=default_interpolation(getcelltype(getgrid(dh))))
     @assert !isclosed(dh)
     @assert !in(name, dh.field_names)
     push!(dh.field_names, name)
     push!(dh.field_dims, dim)
     push!(dh.field_interpolations, ip)
-    push!(dh.bc_values, BCValues(ip, default_interpolation(getcelltype(dh.grid))))
+    push!(dh.bc_values, BCValues(ip, default_interpolation(getcelltype(getgrid(dh)))))
     return dh
 end
 
@@ -165,7 +165,7 @@ function close!(dh::DofHandler)
 end
 
 # close the DofHandler and distribute all the dofs
-function __close!(dh::DofHandler{dim}) where {dim}
+function __close!(dh::Union{DofHandler{dim}, DistributedDofHandler{dim}}) where {dim}
     @assert !isclosed(dh)
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
@@ -213,7 +213,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
     push!(dh.cell_dofs_offset, 1) # dofs for the first cell start at 1
 
     # loop over all the cells, and distribute dofs for all the fields
-    for (ci, cell) in enumerate(getcells(dh.grid))
+    for (ci, cell) in enumerate(getcells(getgrid(dh)))
         @debug println("cell #$ci")
         for fi in 1:nfields(dh)
             interpolation_info = interpolation_infos[fi]
@@ -314,7 +314,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
     return dh
 end
 
-function celldofs!(global_dofs::Vector{Int}, dh::DofHandler, i::Int)
+function celldofs!(global_dofs::Vector{Int}, dh::Union{DofHandler, DistributedDofHandler}, i::Int)
     @assert isclosed(dh)
     @assert length(global_dofs) == ndofs_per_cell(dh, i)
     unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], length(global_dofs))
@@ -341,9 +341,9 @@ function cellcoords!(global_coords::Vector{Vec{dim,T}}, grid::AbstractGrid{dim},
     return global_coords
 end
 
-cellcoords!(global_coords::Vector{<:Vec}, dh::DofHandler, i::Int) = cellcoords!(global_coords, dh.grid, i)
+cellcoords!(global_coords::Vector{<:Vec}, dh::Union{DofHandler, DistributedDofHandler}, i::Int) = cellcoords!(global_coords, getgrid(dh), i)
 
-function celldofs(dh::DofHandler, i::Int)
+function celldofs(dh::Union{DofHandler, DistributedDofHandler}, i::Int)
     @assert isclosed(dh)
     n = ndofs_per_cell(dh, i)
     global_dofs = zeros(Int, n)
@@ -376,7 +376,7 @@ See the [Sparsity Pattern](@ref) section of the manual.
 create_symmetric_sparsity_pattern(dh::AbstractDofHandler) = Symmetric(_create_sparsity_pattern(dh, nothing, true), :U)
 
 function _create_sparsity_pattern(dh::DofHandler, ch#=::Union{ConstraintHandler, Nothing}=#, sym::Bool)
-    ncells = getncells(dh.grid)
+    ncells = getncells(getgrid(dh))
     n = ndofs_per_cell(dh)
     N = sym ? div(n*(n+1), 2) * ncells : n^2 * ncells
     N += ndofs(dh) # always add the diagonal elements
@@ -447,7 +447,7 @@ function renumber!(dh::AbstractDofHandler, perm::AbstractVector{<:Integer})
 end
 
 function WriteVTK.vtk_grid(filename::AbstractString, dh::AbstractDofHandler; compress::Bool=true)
-    vtk_grid(filename, dh.grid; compress=compress)
+    vtk_grid(filename, getgrid(dh); compress=compress)
 end
 
 """
@@ -457,7 +457,7 @@ Reshape the entries of the dof-vector `u` which correspond to the field `fieldna
 Return a matrix with a column for every node and a row for every dimension of the field.
 For superparametric fields only the entries corresponding to nodes of the grid will be returned. Do not use this function for subparametric approximations.
 """
-function reshape_to_nodes(dh::DofHandler, u::Vector{T}, fieldname::Symbol) where T
+function reshape_to_nodes(dh::Union{DofHandler, DistributedDofHandler}, u::Vector{T}, fieldname::Symbol) where T
     # make sure the field exists
     fieldname ∈ Ferrite.getfieldnames(dh) || error("Field $fieldname not found.")
 
@@ -466,14 +466,14 @@ function reshape_to_nodes(dh::DofHandler, u::Vector{T}, fieldname::Symbol) where
     field_dim = getfielddim(dh, field_idx)
 
     space_dim = field_dim == 2 ? 3 : field_dim
-    data = fill(zero(T), space_dim, getnnodes(dh.grid))
+    data = fill(zero(T), space_dim, getnnodes(getgrid(dh)))
 
     reshape_field_data!(data, dh, u, offset, field_dim)
 
     return data
 end
 
-function reshape_field_data!(data::Matrix{T}, dh::AbstractDofHandler, u::Vector{T}, field_offset::Int, field_dim::Int, cellset=Set{Int}(1:getncells(dh.grid))) where T
+function reshape_field_data!(data::Matrix{T}, dh::AbstractDofHandler, u::Vector{T}, field_offset::Int, field_dim::Int, cellset=Set{Int}(1:getncells(getgrid(dh)))) where T
 
     _celldofs = Vector{Int}(undef, ndofs_per_cell(dh, first(cellset)))
     for cell in CellIterator(dh, collect(cellset))
diff --git a/src/exports.jl b/src/exports.jl
index 716ab532e8..ec50702912 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -66,6 +66,7 @@ export
     getcells,
     getgrid,
     getlocalgrid,
+    getglobalgrid,
     getncells,
     getnodes,
     getnnodes,
@@ -107,6 +108,9 @@ export
     DistributedDofHandler,
     close!,
     ndofs,
+    num_local_true_dofs,
+    num_local_dofs,
+    num_global_dofs,
     ndofs_per_cell,
     celldofs!,
     celldofs,

From 93356d1aa2b272741f358f8143fc8165a987f4bc Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 20:46:23 +0200
Subject: [PATCH 028/124] Fix precompile errors.

---
 src/Dofs/DistributedDofHandler.jl | 199 ++++++++++++++++++++++++++++--
 src/Dofs/DofHandler.jl            |  30 ++---
 2 files changed, 201 insertions(+), 28 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 34bc21fb91..aafa2ebb8d 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -22,23 +22,23 @@ struct DistributedDofHandler{dim,T,G<:AbstractDistributedGrid{dim}} <: AbstractD
     grid::G
     ndofs::ScalarWrapper{Int}
 
-    vertexdicts::Array{Dict{Int,Int}}
-    edgedicts::Array{Tuple{Int,Int},Tuple{Int,Bool}}
-    facedicts::Array{Tuple{Int,Int},Int}
+    vertexdicts::Vector{Dict{Int,Int}}
+    edgedicts::Vector{Dict{Tuple{Int,Int},Tuple{Int,Bool}}}
+    facedicts::Vector{Dict{NTuple{dim,Int},Int}}
 
     ldof_to_gdof::Vector{Int}
-    ldof_to_rank::Vector{Int}
+    ldof_to_rank::Vector{Int32}
 end
 
-function DistributedDofHandler(grid::AbstractDistributedGrid)
+function DistributedDofHandler(grid::AbstractDistributedGrid{dim}) where {dim}
     isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. DistributedMixedDofHandler not implemented yet.")
-    DistributedDofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1))
+    DistributedDofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[], Int[], Int32[])
 end
 
 function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
     println(io, "DistributedDofHandler")
     println(io, "  Fields:")
-    for i in 1:nfields(dh)
+    for i in 1:num_fields(dh)
         println(io, "    ", repr(dh.field_names[i]), ", interpolation: ", dh.field_interpolations[i],", dim: ", dh.field_dims[i])
     end
     if !isclosed(dh)
@@ -55,6 +55,28 @@ getglobalgrid(dh::DistributedDofHandler) = dh.grid
 # Compat layer against serial code
 getgrid(dh::DistributedDofHandler) = getlocalgrid(dh)
 
+# TODO this is copy pasta from DofHandler.jl
+function celldofs!(global_dofs::Vector{Int}, dh::DistributedDofHandler, i::Int)
+    @assert isclosed(dh)
+    @assert length(global_dofs) == ndofs_per_cell(dh, i)
+    unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], length(global_dofs))
+    return global_dofs
+end
+
+# TODO this is copy pasta from DofHandler.jl
+cellcoords!(global_coords::Vector{<:Vec}, dh::DistributedDofHandler, i::Int) = cellcoords!(global_coords, getgrid(dh), i)
+
+# TODO this is copy pasta from DofHandler.jl
+function celldofs(dh::DistributedDofHandler, i::Int)
+    @assert isclosed(dh)
+    n = ndofs_per_cell(dh, i)
+    global_dofs = zeros(Int, n)
+    unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], n)
+    return global_dofs
+end
+
+renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = (@assert false) && "Unimplemented"
+
 function compute_dof_ownership(dh, dgrid)
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
@@ -65,7 +87,7 @@ function compute_dof_ownership(dh, dgrid)
         owner_rank = minimum([collect(keys(sv.remote_vertices));my_rank])
 
         if owner_rank != my_rank
-            for fi in 1:Ferrite.nfields(dh)
+            for fi in 1:Ferrite.num_fields(dh)
                 vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
                 if haskey(dh.vertexdicts[fi], vi)
                     local_dof_idx = dh.vertexdicts[fi][vi]
@@ -124,7 +146,7 @@ function local_to_global_numbering(dh::DistributedDofHandler, dgrid::AbstractDis
     next_local_idx = 1
     for (ci, cell) in enumerate(getcells(getgrid(dh)))
         @debug println("cell #$ci (R$my_rank)")
-        for fi in 1:Ferrite.nfields(dh)
+        for fi in 1:Ferrite.num_fields(dh)
             @debug println("  field: $(dh.field_names[fi]) (R$my_rank)")
             interpolation_info = Ferrite.InterpolationInfo(dh.field_interpolations[fi])
             if interpolation_info.nvertexdofs > 0
@@ -220,7 +242,7 @@ function local_to_global_numbering(dh::DistributedDofHandler, dgrid::AbstractDis
                     end
                     MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
                     MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
-                    for fi ∈ 1:Ferrite.nfields(dh)
+                    for fi ∈ 1:Ferrite.num_fields(dh)
                         next_buffer_idx = 1
                         if length(dh.vertexdicts[fi]) == 0
                             @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
@@ -247,7 +269,7 @@ function local_to_global_numbering(dh::DistributedDofHandler, dgrid::AbstractDis
                 local_cell_vis = Array{Int64}(undef,n_vertices)
                 MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
                 MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
-                for fi in 1:Ferrite.nfields(dh)
+                for fi in 1:Ferrite.num_fields(dh)
                     if length(dh.vertexdicts[fi]) == 0
                         @debug println("  Skipping recv on field $(dh.field_names[fi]) (R$my_rank)")
                         continue
@@ -285,6 +307,157 @@ end
 
 function close!(dh::DistributedDofHandler)
     __close!(dh)
-    dh.ldof_to_gdof = local_to_global_numbering(dh, getglobalgrid(dh));
-    dh.ldof_to_rank = compute_dof_ownership(dh, dgrid);
+    append!(dh.ldof_to_gdof, local_to_global_numbering(dh, getglobalgrid(dh)))
+    append!(dh.ldof_to_rank, compute_dof_ownership(dh, getglobalgrid(dh)))
+end
+
+# TODO this is copy pasta from DofHandler.jl
+# close the DofHandler and distribute all the dofs
+function __close!(dh::DistributedDofHandler{dim}) where {dim}
+    @assert !isclosed(dh)
+
+    # `vertexdict` keeps track of the visited vertices. We store the global vertex
+    # number and the first dof we added to that vertex.
+    resize!(dh.vertexdicts, num_fields(dh))
+    for i in 1:num_fields(dh)
+        dh.vertexdicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
+    end
+
+    # `edgedict` keeps track of the visited edges, this will only be used for a 3D problem
+    # An edge is determined from two vertices, but we also need to store the direction
+    # of the first edge we encounter and add dofs too. When we encounter the same edge
+    # the next time we check if the direction is the same, otherwise we reuse the dofs
+    # in the reverse order
+    resize!(dh.edgedicts, num_fields(dh))
+    for i in 1:num_fields(dh)
+        dh.edgedicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
+    end
+
+    # `facedict` keeps track of the visited faces. We only need to store the first dof we
+    # added to the face; if we encounter the same face again we *always* reverse the order
+    # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
+    # face (i.e. a surface) is uniquely determined by 3 vertices.
+    resize!(dh.facedicts, num_fields(dh))
+    for i in 1:num_fields(dh)
+        dh.facedicts[i] = Dict{NTuple{dim,Int},Int}()
+    end
+
+    # celldofs are never shared between different cells so there is no need
+    # for a `celldict` to keep track of which cells we have added dofs too.
+
+    # We create the `InterpolationInfo` structs with precomputed information for each
+    # interpolation since that allows having the cell loop as the outermost loop,
+    # and the interpolation loop inside without using a function barrier
+    interpolation_infos = InterpolationInfo[]
+    for interpolation in dh.field_interpolations
+        # push!(dh.interpolation_info, InterpolationInfo(interpolation))
+        push!(interpolation_infos, InterpolationInfo(interpolation))
+    end
+
+    # not implemented yet: more than one facedof per face in 3D
+    dim == 3 && @assert(!any(x->x.nfacedofs > 1, interpolation_infos))
+
+    nextdof = 1 # next free dof to distribute
+    push!(dh.cell_dofs_offset, 1) # dofs for the first cell start at 1
+
+    # loop over all the cells, and distribute dofs for all the fields
+    for (ci, cell) in enumerate(getcells(getgrid(dh)))
+        @debug println("cell #$ci")
+        for fi in 1:num_fields(dh)
+            interpolation_info = interpolation_infos[fi]
+            @debug println("  field: $(dh.field_names[fi])")
+            if interpolation_info.nvertexdofs > 0
+                for vertex in vertices(cell)
+                    @debug println("    vertex#$vertex")
+                    token = Base.ht_keyindex2!(dh.vertexdicts[fi], vertex)
+                    if token > 0 # haskey(dh.vertexdicts[fi], vertex) # reuse dofs
+                        reuse_dof = dh.vertexdicts[fi].vals[token] # dh.vertexdicts[fi][vertex]
+                        for d in 1:dh.field_dims[fi]
+                            @debug println("      reusing dof #$(reuse_dof + (d-1))")
+                            push!(dh.cell_dofs, reuse_dof + (d-1))
+                        end
+                    else # token <= 0, distribute new dofs
+                        for vertexdof in 1:interpolation_info.nvertexdofs
+                            Base._setindex!(dh.vertexdicts[fi], nextdof, vertex, -token) # dh.vertexdicts[fi][vertex] = nextdof
+                            for d in 1:dh.field_dims[fi]
+                                @debug println("      adding dof#$nextdof")
+                                push!(dh.cell_dofs, nextdof)
+                                nextdof += 1
+                            end
+                        end
+                    end
+                end # vertex loop
+            end
+            if dim == 3 # edges only in 3D
+                if interpolation_info.nedgedofs > 0
+                    for edge in edges(cell)
+                        sedge, dir = sortedge(edge)
+                        @debug println("    edge#$sedge dir: $(dir)")
+                        token = Base.ht_keyindex2!(dh.edgedicts[fi], sedge)
+                        if token > 0 # haskey(dh.edgedicts[fi], sedge), reuse dofs
+                            startdof, olddir = dh.edgedicts[fi].vals[token] # dh.edgedicts[fi][sedge] # first dof for this edge (if dir == true)
+                            for edgedof in (dir == olddir ? (1:interpolation_info.nedgedofs) : (interpolation_info.nedgedofs:-1:1))
+                                for d in 1:dh.field_dims[fi]
+                                    reuse_dof = startdof + (d-1) + (edgedof-1)*dh.field_dims[fi]
+                                    @debug println("      reusing dof#$(reuse_dof)")
+                                    push!(dh.cell_dofs, reuse_dof)
+                                end
+                            end
+                        else # token <= 0, distribute new dofs
+                            Base._setindex!(dh.edgedicts[fi], (nextdof, dir), sedge, -token) # dh.edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
+                            for edgedof in 1:interpolation_info.nedgedofs
+                                for d in 1:dh.field_dims[fi]
+                                    @debug println("      adding dof#$nextdof")
+                                    push!(dh.cell_dofs, nextdof)
+                                    nextdof += 1
+                                end
+                            end
+                        end
+                    end # edge loop
+                end
+            end
+            if interpolation_info.nfacedofs > 0 && (interpolation_info.dim == dim)
+                for face in faces(cell)
+                    sface = sortface(face) # TODO: faces(cell) may as well just return the sorted list
+                    @debug println("    face#$sface")
+                    token = Base.ht_keyindex2!(dh.facedicts[fi], sface)
+                    if token > 0 # haskey(dh.facedicts[fi], sface), reuse dofs
+                        startdof = dh.facedicts[fi].vals[token] # dh.facedicts[fi][sface]
+                        for facedof in interpolation_info.nfacedofs:-1:1 # always reverse (YOLO)
+                            for d in 1:dh.field_dims[fi]
+                                reuse_dof = startdof + (d-1) + (facedof-1)*dh.field_dims[fi]
+                                @debug println("      reusing dof#$(reuse_dof)")
+                                push!(dh.cell_dofs, reuse_dof)
+                            end
+                        end
+                    else # distribute new dofs
+                        Base._setindex!(dh.facedicts[fi], nextdof, sface, -token)# dh.facedicts[fi][sface] = nextdof,  store the first dof for this face
+                        for facedof in 1:interpolation_info.nfacedofs
+                            for d in 1:dh.field_dims[fi]
+                                @debug println("      adding dof#$nextdof")
+                                push!(dh.cell_dofs, nextdof)
+                                nextdof += 1
+                            end
+                        end
+                    end
+                end # face loop
+            end
+            if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
+                @debug println("    cell#$ci")
+                for celldof in 1:interpolation_info.ncelldofs
+                    for d in 1:dh.field_dims[fi]
+                        @debug println("      adding dof#$nextdof")
+                        push!(dh.cell_dofs, nextdof)
+                        nextdof += 1
+                    end
+                end # cell loop
+            end
+        end # field loop
+        # push! the first index of the next cell to the offset vector
+        push!(dh.cell_dofs_offset, length(dh.cell_dofs)+1)
+    end # cell loop
+    dh.ndofs[] = maximum(dh.cell_dofs)
+    dh.closed[] = true
+
+    return dh
 end
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index e99b27f0a4..92383057f8 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -32,10 +32,10 @@ function DofHandler(grid::AbstractGrid{dim}) where {dim}
     DofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[])
 end
 
-function Base.show(io::IO, ::MIME"text/plain", dh::Union{DofHandler, DistributedDofHandler})
+function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
     println(io, "DofHandler")
     println(io, "  Fields:")
-    for i in 1:nfields(dh)
+    for i in 1:num_fields(dh)
         println(io, "    ", repr(dh.field_names[i]), ", interpolation: ", dh.field_interpolations[i],", dim: ", dh.field_dims[i])
     end
     if !isclosed(dh)
@@ -62,7 +62,7 @@ Return the number of degrees of freedom in `dh`
 ndofs(dh::AbstractDofHandler) = dh.ndofs[]
 ndofs_per_cell(dh::AbstractDofHandler, cell::Int=1) = dh.cell_dofs_offset[cell+1] - dh.cell_dofs_offset[cell]
 isclosed(dh::AbstractDofHandler) = dh.closed[]
-nfields(dh::AbstractDofHandler) = length(dh.field_names)
+num_fields(dh::AbstractDofHandler) = length(dh.field_names)
 getfieldnames(dh::AbstractDofHandler) = dh.field_names
 getfieldinterpolation(dh::AbstractDofHandler, field_idx::Int) = dh.field_interpolations[field_idx]
 getfielddim(dh::AbstractDofHandler, field_idx::Int) = dh.field_dims[field_idx]
@@ -165,13 +165,13 @@ function close!(dh::DofHandler)
 end
 
 # close the DofHandler and distribute all the dofs
-function __close!(dh::Union{DofHandler{dim}, DistributedDofHandler{dim}}) where {dim}
+function __close!(dh::DofHandler{dim}) where {dim}
     @assert !isclosed(dh)
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
     # number and the first dof we added to that vertex.
-    resize!(dh.vertexdicts, nfields(dh))
-    for i in 1:nfields(dh)
+    resize!(dh.vertexdicts, num_fields(dh))
+    for i in 1:num_fields(dh)
         dh.vertexdicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
     end
 
@@ -180,8 +180,8 @@ function __close!(dh::Union{DofHandler{dim}, DistributedDofHandler{dim}}) where
     # of the first edge we encounter and add dofs too. When we encounter the same edge
     # the next time we check if the direction is the same, otherwise we reuse the dofs
     # in the reverse order
-    resize!(dh.edgedicts, nfields(dh))
-    for i in 1:nfields(dh)
+    resize!(dh.edgedicts, num_fields(dh))
+    for i in 1:num_fields(dh)
         dh.edgedicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
     end
 
@@ -189,8 +189,8 @@ function __close!(dh::Union{DofHandler{dim}, DistributedDofHandler{dim}}) where
     # added to the face; if we encounter the same face again we *always* reverse the order
     # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
     # face (i.e. a surface) is uniquely determined by 3 vertices.
-    resize!(dh.facedicts, nfields(dh))
-    for i in 1:nfields(dh)
+    resize!(dh.facedicts, num_fields(dh))
+    for i in 1:num_fields(dh)
         dh.facedicts[i] = Dict{NTuple{dim,Int},Int}()
     end
 
@@ -215,7 +215,7 @@ function __close!(dh::Union{DofHandler{dim}, DistributedDofHandler{dim}}) where
     # loop over all the cells, and distribute dofs for all the fields
     for (ci, cell) in enumerate(getcells(getgrid(dh)))
         @debug println("cell #$ci")
-        for fi in 1:nfields(dh)
+        for fi in 1:num_fields(dh)
             interpolation_info = interpolation_infos[fi]
             @debug println("  field: $(dh.field_names[fi])")
             if interpolation_info.nvertexdofs > 0
@@ -314,7 +314,7 @@ function __close!(dh::Union{DofHandler{dim}, DistributedDofHandler{dim}}) where
     return dh
 end
 
-function celldofs!(global_dofs::Vector{Int}, dh::Union{DofHandler, DistributedDofHandler}, i::Int)
+function celldofs!(global_dofs::Vector{Int}, dh::DofHandler, i::Int)
     @assert isclosed(dh)
     @assert length(global_dofs) == ndofs_per_cell(dh, i)
     unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], length(global_dofs))
@@ -341,9 +341,9 @@ function cellcoords!(global_coords::Vector{Vec{dim,T}}, grid::AbstractGrid{dim},
     return global_coords
 end
 
-cellcoords!(global_coords::Vector{<:Vec}, dh::Union{DofHandler, DistributedDofHandler}, i::Int) = cellcoords!(global_coords, getgrid(dh), i)
+cellcoords!(global_coords::Vector{<:Vec}, dh::DofHandler, i::Int) = cellcoords!(global_coords, getgrid(dh), i)
 
-function celldofs(dh::Union{DofHandler, DistributedDofHandler}, i::Int)
+function celldofs(dh::DofHandler, i::Int)
     @assert isclosed(dh)
     n = ndofs_per_cell(dh, i)
     global_dofs = zeros(Int, n)
@@ -457,7 +457,7 @@ Reshape the entries of the dof-vector `u` which correspond to the field `fieldna
 Return a matrix with a column for every node and a row for every dimension of the field.
 For superparametric fields only the entries corresponding to nodes of the grid will be returned. Do not use this function for subparametric approximations.
 """
-function reshape_to_nodes(dh::Union{DofHandler, DistributedDofHandler}, u::Vector{T}, fieldname::Symbol) where T
+function reshape_to_nodes(dh::DofHandler, u::Vector{T}, fieldname::Symbol) where T
     # make sure the field exists
     fieldname ∈ Ferrite.getfieldnames(dh) || error("Field $fieldname not found.")
 

From 827b356459578554d5c4bf41b4ea37e013085c1f Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 21:13:16 +0200
Subject: [PATCH 029/124] Generalize constraint handler.

---
 src/Dofs/ConstraintHandler.jl | 40 +++++++++++++++++------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/Dofs/ConstraintHandler.jl b/src/Dofs/ConstraintHandler.jl
index a80bc4de15..d3160dc495 100644
--- a/src/Dofs/ConstraintHandler.jl
+++ b/src/Dofs/ConstraintHandler.jl
@@ -209,7 +209,7 @@ Add a `Dirichlet` boundary condition to the `ConstraintHandler`.
 """
 function add!(ch::ConstraintHandler, dbc::Dirichlet)
     dbc_check(ch, dbc)
-    celltype = getcelltype(ch.dh.grid)
+    celltype = getcelltype(getgrid(ch.dh))
     @assert isconcretetype(celltype)
 
     field_idx = find_field(ch.dh, dbc.field_name)
@@ -247,7 +247,7 @@ function add!(ch::ConstraintHandler, newac::AffineConstraint)
     return ch
 end
 
-function _add!(ch::ConstraintHandler, dbc::Dirichlet, bcfaces::Set{Index}, interpolation::Interpolation, field_dim::Int, offset::Int, bcvalue::BCValues, cellset::Set{Int}=Set{Int}(1:getncells(ch.dh.grid))) where {Index<:BoundaryIndex}
+function _add!(ch::ConstraintHandler, dbc::Dirichlet, bcfaces::Set{Index}, interpolation::Interpolation, field_dim::Int, offset::Int, bcvalue::BCValues, cellset::Set{Int}=Set{Int}(1:getncells(getgrid(ch.dh)))) where {Index<:BoundaryIndex}
     local_face_dofs, local_face_dofs_offset =
         _local_face_dofs_for_bc(interpolation, field_dim, dbc.components, offset, boundaryfunction(eltype(bcfaces)))
     copy!(dbc.local_face_dofs, local_face_dofs)
@@ -295,13 +295,13 @@ function _local_face_dofs_for_bc(interpolation, field_dim, components, offset, b
     return local_face_dofs, local_face_dofs_offset
 end
 
-function _add!(ch::ConstraintHandler, dbc::Dirichlet, bcnodes::Set{Int}, interpolation::Interpolation, field_dim::Int, offset::Int, bcvalue::BCValues, cellset::Set{Int}=Set{Int}(1:getncells(ch.dh.grid)))
-    if interpolation !== default_interpolation(typeof(ch.dh.grid.cells[first(cellset)]))
+function _add!(ch::ConstraintHandler, dbc::Dirichlet, bcnodes::Set{Int}, interpolation::Interpolation, field_dim::Int, offset::Int, bcvalue::BCValues, cellset::Set{Int}=Set{Int}(1:getncells(getgrid(ch.dh))))
+    if interpolation !== default_interpolation(typeof(getcells(getgrid(ch.dh), first(cellset))))
         @warn("adding constraint to nodeset is not recommended for sub/super-parametric approximations.")
     end
 
     ncomps = length(dbc.components)
-    nnodes = getnnodes(ch.dh.grid)
+    nnodes = getnnodes(getgrid(ch.dh))
     interpol_points = getnbasefunctions(interpolation)
     _celldofs = fill(0, ndofs_per_cell(ch.dh, first(cellset)))
     node_dofs = zeros(Int, ncomps, nnodes)
@@ -361,10 +361,10 @@ function _update!(inhomogeneities::Vector{Float64}, f::Function, faces::Set{<:Bo
                   components::Vector{Int}, dh::AbstractDofHandler, facevalues::BCValues,
                   dofmapping::Dict{Int,Int}, time::T) where {T}
 
-    dim = getdim(dh.grid)
+    dim = getdim(getgrid(dh))
     _tmp_cellid = first(faces)[1]
 
-    N = nnodes_per_cell(dh.grid, _tmp_cellid)
+    N = nnodes_per_cell(getgrid(dh), _tmp_cellid)
     xh = zeros(Vec{dim, T}, N) # pre-allocate
     _celldofs = fill(0, ndofs_per_cell(dh, _tmp_cellid))
 
@@ -403,7 +403,7 @@ function _update!(inhomogeneities::Vector{Float64}, f::Function, nodes::Set{Int}
                   dofmapping::Dict{Int,Int}, time::Float64)
     counter = 1
     for (idx, nodenumber) in enumerate(nodeidxs)
-        x = dh.grid.nodes[nodenumber].x
+        x = getgrid(dh).nodes[nodenumber].x
         bc_value = f(x, time)
         @assert length(bc_value) == length(components)
         for v in bc_value
@@ -427,13 +427,13 @@ function WriteVTK.vtk_point_data(vtkfile, ch::ConstraintHandler)
 
     for field in unique_fields
         nd = ndim(ch.dh, field)
-        data = zeros(Float64, nd, getnnodes(ch.dh.grid))
+        data = zeros(Float64, nd, getnnodes(getgrid(ch.dh)))
         for dbc in ch.dbcs
             dbc.field_name != field && continue
             if eltype(dbc.faces) <: BoundaryIndex
                 functype = boundaryfunction(eltype(dbc.faces))
                 for (cellidx, faceidx) in dbc.faces
-                    for facenode in functype(ch.dh.grid.cells[cellidx])[faceidx]
+                    for facenode in functype(getcells(getgrid(ch.dh), cellidx))[faceidx]
                         for component in dbc.components
                             data[component, facenode] = 1
                         end
@@ -783,9 +783,9 @@ end
 
 #Function for adding constraint when using multiple celltypes
 function add!(ch::ConstraintHandler, fh::FieldHandler, dbc::Dirichlet)
-    _check_cellset_dirichlet(ch.dh.grid, fh.cellset, dbc.faces)
+    _check_cellset_dirichlet(getgrid(ch.dh), fh.cellset, dbc.faces)
 
-    celltype = getcelltype(ch.dh.grid, first(fh.cellset)) #Assume same celltype of all cells in fh.cellset
+    celltype = getcelltype(getgrid(ch.dh), first(fh.cellset)) #Assume same celltype of all cells in fh.cellset
 
     field_idx = find_field(fh, dbc.field_name)
     # Extract stuff for the field
@@ -813,7 +813,7 @@ end
 function _check_cellset_dirichlet(grid::AbstractGrid, cellset::Set{Int}, nodeset::Set{Int})
     nodes = Set{Int}()
     for cellid in cellset
-        for nodeid in grid.cells[cellid].nodes
+        for nodeid in getcells(grid, cellid).nodes
             nodeid ∈ nodes || push!(nodes, nodeid)
         end
     end
@@ -865,8 +865,8 @@ end
 
 function _add!(ch::ConstraintHandler, pdbc::PeriodicDirichlet, interpolation::Interpolation,
                field_dim::Int, offset::Int)
-    grid = ch.dh.grid
-    Tx = typeof(first(ch.dh.grid.nodes).x) # Vec{D,T}
+    grid = getgrid(ch.dh)
+    Tx = typeof(first(getgrid(ch.dh).nodes).x) # Vec{D,T}
     # TODO: This requires full periodicity for now. (Why? I don't remember,
     #       but something in the code below assumes this... I think...)
     @assert length(pdbc.face_pairs) == length(Tx)
@@ -891,23 +891,23 @@ function _add!(ch::ConstraintHandler, pdbc::PeriodicDirichlet, interpolation::In
         mirror_faceset = collect(getfaceset(grid, mirror))
         mirror_mean_x = Tx[] # mean face coordinates
         for (c, f) in mirror_faceset
-            fn = faces(grid.cells[c])[f]
+            fn = faces(getcells(grid, c))[f]
             push!(mirror_mean_x, sum(grid.nodes[i].x for i in fn) / length(fn))
             # Also keep track of all nodes for corner finding
-            union!(all_node_idxs, grid.cells[c].nodes)
+            union!(all_node_idxs, getcells(grid, c).nodes)
         end
         # Same dance for the image
         image_faceset = collect(getfaceset(grid,  image))
         image_mean_x = Tx[]
         for (c, f) in image_faceset
-            fn = faces(grid.cells[c])[f]
+            fn = faces(getcells(grid, c))[f]
             push!(image_mean_x, sum(grid.nodes[i].x for i in fn) / length(fn))
             # Also keep track of all nodes for corner finding
-            union!(all_node_idxs, grid.cells[c].nodes)
+            union!(all_node_idxs, getcells(grid, c).nodes)
         end
         function extract_single_x_on_face(fs) # :)
             c, f = first(fs)
-            fn = faces(grid.cells[c])[f]
+            fn = faces(getcells(grid, c))[f]
             return grid.nodes[fn[1]].x
         end
         # Add a shift to help the tree search

From 62a9839c51841967fcbabbc2a736dfe55488c7db Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 21:21:27 +0200
Subject: [PATCH 030/124] Derp.

---
 src/Dofs/DistributedDofHandler.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index aafa2ebb8d..2d4384b46a 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -113,7 +113,7 @@ num_local_dofs(dh::DistributedDofHandler) = length(dh.ldof_to_gdof)
 """
 Compute the number of dofs in the global system.
 """
-num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(nltdofs(dh), MPI.SUM, global_comm(dgrid))
+num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(num_local_true_dofs(dh), MPI.SUM, global_comm(dgrid))
 
 """
 Renumber the dofs in local ordering to their corresponding global numbering.

From c6c7acd4c5484d64fb16ff418daba471d8540abc Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 21:24:01 +0200
Subject: [PATCH 031/124] Derp.

---
 src/Dofs/DistributedDofHandler.jl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 2d4384b46a..dddfd548a9 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -103,7 +103,7 @@ end
 """
 Compute the number of dofs owned by the current process.
 """
-num_local_true_dofs(dh::DistributedDofHandler) = sum(dof_owner.==(MPI.Comm_rank(global_comm(dgrid))+1))
+num_local_true_dofs(dh::DistributedDofHandler) = sum(dh.ldof_to_rank==(MPI.Comm_rank(global_comm(dh.grid))+1))
 
 """
 Compute the number of dofs visible to the current process.
@@ -113,7 +113,7 @@ num_local_dofs(dh::DistributedDofHandler) = length(dh.ldof_to_gdof)
 """
 Compute the number of dofs in the global system.
 """
-num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(num_local_true_dofs(dh), MPI.SUM, global_comm(dgrid))
+num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(num_local_true_dofs(dh), MPI.SUM, global_comm(dh.grid))
 
 """
 Renumber the dofs in local ordering to their corresponding global numbering.
@@ -309,6 +309,7 @@ function close!(dh::DistributedDofHandler)
     __close!(dh)
     append!(dh.ldof_to_gdof, local_to_global_numbering(dh, getglobalgrid(dh)))
     append!(dh.ldof_to_rank, compute_dof_ownership(dh, getglobalgrid(dh)))
+    dh.ndofs.x = num_local_dofs(dh)
 end
 
 # TODO this is copy pasta from DofHandler.jl

From 3e742da6de476e24c97984f10163318a5525127b Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 21:29:06 +0200
Subject: [PATCH 032/124] Add some missing exports.

---
 src/exports.jl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/exports.jl b/src/exports.jl
index ec50702912..c3fed3485a 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -122,6 +122,10 @@ export
     FieldHandler,
     Field,
     reshape_to_nodes,
+    num_fields,
+    getfieldnames,
+    dof_range,
+    entity_dofs,
 
 # Constraints
     ConstraintHandler,

From 5ad1c897faf5192e2e7b8708fd429a89e6fa6f7c Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 21:32:26 +0200
Subject: [PATCH 033/124] Fix cell iterator interface.

---
 src/iterators.jl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/iterators.jl b/src/iterators.jl
index a643158fbb..67a2a8b620 100644
--- a/src/iterators.jl
+++ b/src/iterators.jl
@@ -41,15 +41,15 @@ struct CellIterator{dim,C,T,DH<:Union{AbstractDofHandler,Nothing}}
     dh::Union{DH,Nothing}
     celldofs::Vector{Int}
 
-    function CellIterator{dim,C,T}(dh::Union{DofHandler{dim,T,G},MixedDofHandler{dim,T,G},Nothing}, cellset::Union{AbstractVector{Int},Nothing}, flags::UpdateFlags) where {dim,C,T,G}
-        isconcretetype(C) || _check_same_celltype(dh.grid, cellset)
-        N = nnodes_per_cell(dh.grid, cellset === nothing ? 1 : first(cellset))
+    function CellIterator{dim,C,T}(dh::Union{DofHandler{dim,T,G},DistributedDofHandler{dim,T,G},MixedDofHandler{dim,T,G},Nothing}, cellset::Union{AbstractVector{Int},Nothing}, flags::UpdateFlags) where {dim,C,T,G}
+        isconcretetype(C) || _check_same_celltype(getgrid(dh), cellset)
+        N = nnodes_per_cell(getgrid(dh), cellset === nothing ? 1 : first(cellset))
         cell = ScalarWrapper(0)
         nodes = zeros(Int, N)
         coords = zeros(Vec{dim,T}, N)
         n = ndofs_per_cell(dh, cellset === nothing ? 1 : first(cellset))
         celldofs = zeros(Int, n)
-        return new{dim,C,T,typeof(dh)}(flags, dh.grid, cell, nodes, coords, cellset, dh, celldofs)
+        return new{dim,C,T,typeof(dh)}(flags, getgrid(dh), cell, nodes, coords, cellset, dh, celldofs)
     end
 
     function CellIterator{dim,C,T}(grid::Grid{dim,C,T}, cellset::Union{AbstractVector{Int},Nothing}, flags::UpdateFlags) where {dim,C,T}
@@ -66,6 +66,8 @@ CellIterator(grid::Grid{dim,C,T}, cellset::Union{AbstractVector{Int},Nothing}=no
     CellIterator{dim,C,T}(grid, cellset, flags)
 CellIterator(dh::DofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,C,T} =
     CellIterator{dim,getcelltype(dh.grid),T}(dh, cellset, flags)
+CellIterator(dh::DistributedDofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,C,T} =
+    CellIterator{dim,getcelltype(getlocalgrid(dh)),T}(dh, cellset, flags)
 CellIterator(dh::MixedDofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,T} =
     CellIterator{dim,getcelltype(dh.grid),T}(dh, cellset, flags)
 

From 83f3b5c19896c105221cf17291e9413d499784c9 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 21:39:06 +0200
Subject: [PATCH 034/124] Add 'reshape_to_nodes' to distributed dof handler.

---
 src/Dofs/DistributedDofHandler.jl | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index dddfd548a9..758b5eee95 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -462,3 +462,20 @@ function __close!(dh::DistributedDofHandler{dim}) where {dim}
 
     return dh
 end
+
+# TODO this is copy pasta from DofHandler.jl
+function reshape_to_nodes(dh::DistributedDofHandler, u::Vector{T}, fieldname::Symbol) where T
+    # make sure the field exists
+    fieldname ∈ Ferrite.getfieldnames(dh) || error("Field $fieldname not found.")
+
+    field_idx = findfirst(i->i==fieldname, getfieldnames(dh))
+    offset = field_offset(dh, fieldname)
+    field_dim = getfielddim(dh, field_idx)
+
+    space_dim = field_dim == 2 ? 3 : field_dim
+    data = fill(zero(T), space_dim, getnnodes(getgrid(dh)))
+
+    reshape_field_data!(data, dh, u, offset, field_dim)
+
+    return data
+end

From 2c23b5b52ed771a6fd122c1887b77220d1f058c0 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 22:08:54 +0200
Subject: [PATCH 035/124] Turn on debug mode.

---
 src/utils.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils.jl b/src/utils.jl
index 8da7a34887..d856b78eb4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,4 +1,4 @@
-const DEBUG = false
+const DEBUG = true
 
 @static if DEBUG
     @eval begin

From 217594f8bdae2a4e9ecaf3e29166b99e4a209823 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 22:21:04 +0200
Subject: [PATCH 036/124] Minor refactor.

---
 src/Dofs/DistributedDofHandler.jl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 758b5eee95..30a01cf2ba 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -77,7 +77,8 @@ end
 
 renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = (@assert false) && "Unimplemented"
 
-function compute_dof_ownership(dh, dgrid)
+function compute_dof_ownership(dh)
+    dgrid = dh.grid
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
     dof_owner = Vector{Int}(undef,ndofs(dh))
@@ -120,7 +121,8 @@ Renumber the dofs in local ordering to their corresponding global numbering.
 
 TODO: Refactor for MixedDofHandler integration
 """
-function local_to_global_numbering(dh::DistributedDofHandler, dgrid::AbstractDistributedGrid)
+function local_to_global_numbering(dh::DistributedDofHandler)
+    dgrid = dh.grid
     # MPI rank starting with 1 to match Julia's index convention
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
@@ -307,8 +309,8 @@ end
 
 function close!(dh::DistributedDofHandler)
     __close!(dh)
-    append!(dh.ldof_to_gdof, local_to_global_numbering(dh, getglobalgrid(dh)))
-    append!(dh.ldof_to_rank, compute_dof_ownership(dh, getglobalgrid(dh)))
+    append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
+    append!(dh.ldof_to_rank, compute_dof_ownership(dh))
     dh.ndofs.x = num_local_dofs(dh)
 end
 

From 602360f1af9ba336bcc1fcf93a90bda063e7c368 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 23:20:30 +0200
Subject: [PATCH 037/124] Fix typo.

---
 src/Dofs/DistributedDofHandler.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 30a01cf2ba..37efd6f232 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -104,7 +104,7 @@ end
 """
 Compute the number of dofs owned by the current process.
 """
-num_local_true_dofs(dh::DistributedDofHandler) = sum(dh.ldof_to_rank==(MPI.Comm_rank(global_comm(dh.grid))+1))
+num_local_true_dofs(dh::DistributedDofHandler) = sum(dh.ldof_to_rank .== (MPI.Comm_rank(global_comm(dh.grid))+1))
 
 """
 Compute the number of dofs visible to the current process.

From 2447b00156a51770cf8f195512ce88220ce9b1eb Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Sun, 9 Oct 2022 23:42:27 +0200
Subject: [PATCH 038/124] Add pvtk export.

---
 src/Export/VTK.jl | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/src/Export/VTK.jl b/src/Export/VTK.jl
index ee749d3b3d..e6508a416b 100644
--- a/src/Export/VTK.jl
+++ b/src/Export/VTK.jl
@@ -25,7 +25,7 @@ which data can be appended to, see `vtk_point_data` and `vtk_cell_data`.
 """
 function WriteVTK.vtk_grid(filename::AbstractString, grid::Grid{dim,C,T}; compress::Bool=true) where {dim,C,T}
     cls = MeshCell[]
-    for cell in grid.cells
+    for cell in getcells(grid)
         celltype = Ferrite.cell_to_vtkcell(typeof(cell))
         push!(cls, MeshCell(celltype, nodes_to_vtkorder(cell)))
     end
@@ -34,8 +34,16 @@ function WriteVTK.vtk_grid(filename::AbstractString, grid::Grid{dim,C,T}; compre
 end
 
 function WriteVTK.vtk_grid(filename::AbstractString, dgrid::DistributedGrid{dim,C,T}; compress::Bool=true) where {dim,C,T}
-    my_rank = MPI.Comm_rank(dgrid.grid_comm)
-    return vtk_grid("$filename.$my_rank", dgrid.local_grid; compress=compress)
+    part   = MPI.Comm_rank(global_comm(dgrid))+1
+    nparts = MPI.Comm_size(global_comm(dgrid))
+    grid = getlocalgrid(dgrid)
+    cls = MeshCell[]
+    for cell in getcells(grid)
+        celltype = Ferrite.cell_to_vtkcell(typeof(cell))
+        push!(cls, MeshCell(celltype, nodes_to_vtkorder(cell)))
+    end
+    coords = reshape(reinterpret(T, getnodes(grid)), (dim, getnnodes(grid)))
+    return pvtk_grid(filename, coords, cls; part=part, nparts=nparts, compress=compress)
 end
 
 
@@ -68,6 +76,15 @@ function WriteVTK.vtk_point_data(
     return vtk_point_data(vtk, out, name; component_names=component_names(S))
 end
 
+
+function WriteVTK.vtk_point_data(
+    vtk::WriteVTK.PVTKFile,
+    data::Vector{S},
+    name::AbstractString
+    ) where {O, D, T, M, S <: Union{AbstractFloat, Tensor{O, D, T, M}, SymmetricTensor{O, D, T, M}}}
+    return vtk_point_data(vtk.vtk, data, name)
+end
+
 function component_names(::Type{S}) where S
     names =
         S <:             Vec{1}   ? ["x"] :
@@ -94,7 +111,7 @@ end
 Export all cell sets in the grid. Each cell set is exported with
 `vtk_cell_data` with value 1 if the cell is in the set, and 0 otherwise.
 """
-function vtk_cellset(vtk::WriteVTK.DatasetFile, grid::AbstractGrid, cellsets=keys(grid.cellsets))
+function vtk_cellset(vtk::WriteVTK.DatasetFile, grid::AbstractGrid, cellsets=keys(getcells(grid)ets))
     z = zeros(getncells(grid))
     for cellset in cellsets
         z .= 0.0
@@ -124,3 +141,13 @@ function WriteVTK.vtk_point_data(vtkfile, dh::AbstractDofHandler, u::Vector, suf
 
     return vtkfile
 end
+
+using PartitionedArrays
+
+"""
+"""
+function WriteVTK.vtk_point_data(vtk, dh::AbstractDofHandler, u::PVector)
+    map_parts(local_view(u, u.rows)) do u_local
+        vtk_point_data(vtk, dh, u_local)
+    end
+end

From 04f3374b216e2d2651e932a7bd3cfcbcedce4273 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 00:04:23 +0200
Subject: [PATCH 039/124] Add some viz utils to aid debugging.

---
 src/Dofs/DistributedDofHandler.jl |  4 +++
 src/Export/VTK.jl                 | 46 +++++++++++++++++++++++++------
 src/Grid/DistributedGrid.jl       | 19 +++++++++----
 src/exports.jl                    |  5 ++++
 4 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 37efd6f232..57e768cd7f 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -481,3 +481,7 @@ function reshape_to_nodes(dh::DistributedDofHandler, u::Vector{T}, fieldname::Sy
 
     return data
 end
+
+function WriteVTK.vtk_grid(filename::AbstractString, dh::DistributedDofHandler; compress::Bool=true)
+    vtk_grid(filename, getglobalgrid(dh); compress=compress)
+end
diff --git a/src/Export/VTK.jl b/src/Export/VTK.jl
index e6508a416b..e9dc6f898d 100644
--- a/src/Export/VTK.jl
+++ b/src/Export/VTK.jl
@@ -17,6 +17,9 @@ cell_to_vtkcell(::Type{QuadraticTetrahedron}) = VTKCellTypes.VTK_QUADRATIC_TETRA
 
 nodes_to_vtkorder(cell::AbstractCell) = collect(cell.nodes)
 
+pvtkwrapper(vtkfile) = vtkfile
+pvtkwrapper(pvtkfile::WriteVTK.PVTKFile) = pvtkfile.vtk
+
 """
     vtk_grid(filename::AbstractString, grid::Grid)
 
@@ -36,13 +39,12 @@ end
 function WriteVTK.vtk_grid(filename::AbstractString, dgrid::DistributedGrid{dim,C,T}; compress::Bool=true) where {dim,C,T}
     part   = MPI.Comm_rank(global_comm(dgrid))+1
     nparts = MPI.Comm_size(global_comm(dgrid))
-    grid = getlocalgrid(dgrid)
     cls = MeshCell[]
-    for cell in getcells(grid)
+    for cell in getcells(dgrid)
         celltype = Ferrite.cell_to_vtkcell(typeof(cell))
         push!(cls, MeshCell(celltype, nodes_to_vtkorder(cell)))
     end
-    coords = reshape(reinterpret(T, getnodes(grid)), (dim, getnnodes(grid)))
+    coords = reshape(reinterpret(T, getnodes(dgrid)), (dim, getnnodes(dgrid)))
     return pvtk_grid(filename, coords, cls; part=part, nparts=nparts, compress=compress)
 end
 
@@ -73,16 +75,16 @@ function WriteVTK.vtk_point_data(
     for i in 1:npoints
         toparaview!(@view(out[:, i]), data[i])
     end
-    return vtk_point_data(vtk, out, name; component_names=component_names(S))
+    return vtk_point_data(pvtkwrapper(vtk), out, name; component_names=component_names(S))
 end
 
 
 function WriteVTK.vtk_point_data(
-    vtk::WriteVTK.PVTKFile,
+    pvtk::WriteVTK.PVTKFile,
     data::Vector{S},
     name::AbstractString
     ) where {O, D, T, M, S <: Union{AbstractFloat, Tensor{O, D, T, M}, SymmetricTensor{O, D, T, M}}}
-    return vtk_point_data(vtk.vtk, data, name)
+    return vtk_point_data(pvtk.vtk, data, name)
 end
 
 function component_names(::Type{S}) where S
@@ -136,7 +138,7 @@ function WriteVTK.vtk_point_data(vtkfile, dh::AbstractDofHandler, u::Vector, suf
 
     for name in fieldnames
         data = reshape_to_nodes(dh, u, name)
-        vtk_point_data(vtkfile, data, string(name, suffix))
+        vtk_point_data(pvtkwrapper(vtkfile), data, string(name, suffix))
     end
 
     return vtkfile
@@ -148,6 +150,34 @@ using PartitionedArrays
 """
 function WriteVTK.vtk_point_data(vtk, dh::AbstractDofHandler, u::PVector)
     map_parts(local_view(u, u.rows)) do u_local
-        vtk_point_data(vtk, dh, u_local)
+        vtk_point_data(pvtkwrapper(vtk), dh, u_local)
+    end
+end
+
+"""
+Enrich the VTK file with meta information about shared vertices.
+"""
+function vtk_shared_vertices(vtk, dgrid::DistributedGrid)
+    u = Vector{Float64}(undef, getnnodes(dgrid))
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
+        fill!(u, 0.0)
+        for sv ∈ values(get_shared_vertices(dgrid))
+            if haskey(sv.remote_vertices, rank)
+                (cellidx, i) = sv.local_idx
+                cell = getcells(dgrid, cellidx)
+                u[vertices(cell)[i]] = rank
+            end
+        end
+        vtk_point_data(pvtkwrapper(vtk), u, "shared vertices of $my_rank")
     end
 end
+
+"""
+Enrich the VTK file with partitioning meta information.
+"""
+function vtk_partitioning(vtk, dgrid::DistributedGrid)
+    u  = Vector{Float64}(undef, getncells(dgrid))
+    u .= MPI.Comm_rank(global_comm(dgrid))+1
+    vtk_cell_data(pvtkwrapper(vtk), u, "partitioning")
+end
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index a9b4d864ba..77379e1200 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -54,21 +54,25 @@ mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistribut
     shared_faces::Dict{FaceIndex,SharedFace}
 end
 
+@inline get_shared_vertices(dgrid::AbstractDistributedGrid) = dgrid.shared_vertices
+@inline get_shared_edges(dgrid::AbstractDistributedGrid) = dgrid.shared_edges
+@inline get_shared_faces(dgrid::AbstractDistributedGrid) = dgrid.shared_faces
+
 """
 """
-is_shared_vertex(dgrid::AbstractDistributedGrid, vi::VertexIndex) = haskey(dgrid.shared_vertices, vi)
+@inline is_shared_vertex(dgrid::AbstractDistributedGrid, vi::VertexIndex) = haskey(dgrid.shared_vertices, vi)
 
 
 """
 Global dense communicator of the distributed grid.
 """
-global_comm(dgrid::AbstractDistributedGrid) = dgrid.grid_comm
+@inline global_comm(dgrid::AbstractDistributedGrid) = dgrid.grid_comm
 
 """
 Graph communicator for shared vertices. Guaranteed to be derived from the communicator 
 returned by @global_comm .
 """
-vertex_comm(dgrid::AbstractDistributedGrid) = dgrid.interface_comm
+@inline vertex_comm(dgrid::AbstractDistributedGrid) = dgrid.interface_comm
 
 """
 """
@@ -303,11 +307,16 @@ end
 
 @inline getlocalgrid(dgrid::AbstractDistributedGrid) = dgrid.local_grid
 
+@inline getnodes(dgrid::AbstractDistributedGrid) = getnodes(getlocalgrid(dgrid))
+@inline getnodes(grid::AbstractDistributedGrid, v::Union{Int, Vector{Int}}) = getnodes(getlocalgrid(dgrid), v)
+@inline getnodes(grid::AbstractDistributedGrid, setname::String) = getnodes(getlocalgrid(dgrid), setname)
+@inline getnnodes(dgrid::AbstractDistributedGrid) = getnnodes(getlocalgrid(dgrid))
+
 @inline getcells(dgrid::AbstractDistributedGrid) = getcells(getlocalgrid(dgrid))
 @inline getcells(dgrid::AbstractDistributedGrid, v::Union{Int, Vector{Int}}) = getcells(getlocalgrid(dgrid),v)
 @inline getcells(dgrid::AbstractDistributedGrid, setname::String) = getcells(getlocalgrid(dgrid),setname)
 "Returns the number of cells in the `<:AbstractDistributedGrid`."
-@inline getncells(dgrid::AbstractDistributedGrid) = length(getcells(getlocalgrid(dgrid)))
+@inline getncells(dgrid::AbstractDistributedGrid) = getncells(getlocalgrid(dgrid))
 "Returns the celltype of the `<:AbstractDistributedGrid`."
 @inline getcelltype(dgrid::AbstractDistributedGrid) = eltype(getcells(getlocalgrid(dgrid)))
 @inline getcelltype(dgrid::AbstractDistributedGrid, i::Int) = typeof(getcells(getlocalgrid(dgrid),i))
@@ -316,4 +325,4 @@ end
 function compute_owner(dgrid::AbstractDistributedGrid, shared_entity::SharedEntity)::Int32
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1 # Shift rank up by 1 to match Julia's indexing convention
     return minimum([my_rank; [remote_rank for (remote_rank, _) ∈ remote_entities(shared_entity)]])
-end
+end
\ No newline at end of file
diff --git a/src/exports.jl b/src/exports.jl
index c3fed3485a..8a4e257ee7 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -97,6 +97,9 @@ export
     generate_distributed_grid,
     compute_vertex_values,
     is_shared_vertex,
+    get_shared_vertices,
+    get_shared_faces,
+    get_shared_edges,
 
 # Grid coloring
     create_coloring,
@@ -158,6 +161,8 @@ export
     vtk_nodeset,
     vtk_cellset,
     vtk_save,
+    vtk_shared_vertices,
+    vtk_partitioning,
 
 # L2 Projection
     project,

From dc91cfc8440d073fcac9f999eeabff55de2bf35c Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 01:01:55 +0200
Subject: [PATCH 040/124] Refactor DBC management from docs to Ferrite.jl.

---
 src/Dofs/ConstraintHandler.jl | 87 +++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/src/Dofs/ConstraintHandler.jl b/src/Dofs/ConstraintHandler.jl
index d3160dc495..08941f7139 100644
--- a/src/Dofs/ConstraintHandler.jl
+++ b/src/Dofs/ConstraintHandler.jl
@@ -1057,3 +1057,90 @@ end
 function mirror_local_dofs(_, _, ::Lagrange{3})
     error("not implemented yet, please contribute :)")
 end
+
+using PartitionedArrays
+
+"""
+Poor man's Dirichlet BC application for PartitionedArrays. :)
+
+    TODO integrate with constraints.
+"""
+function apply_zero!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
+        f_local[ch.prescribed_dofs] .= 0.0
+    end
+
+    map_parts(local_view(K, K.rows, K.cols)) do K_local
+        for cdof in ch.prescribed_dofs
+            K_local[cdof, :] .= 0.0
+            K_local[:, cdof] .= 0.0
+            K_local[cdof, cdof] = 1.0
+        end
+    end
+end
+
+"""
+Poor man's Dirichlet BC application for PartitionedArrays. :)
+
+    TODO integrate with constraints.
+    TODO optimize.
+"""
+function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
+        # Note: RHS only non-zero for owned RHS entries
+        f_local[ch.prescribed_dofs] .= ch.inhomogeneities .* map(p -> p == partition.part, partition.lid_to_part[ch.prescribed_dofs])
+    end
+
+    # Zero out locally visible rows and columns
+    map_parts(local_view(K, K.rows, K.cols)) do K_local
+        for cdof ∈ ch.prescribed_dofs
+            K_local[cdof, :] .= 0.0
+            K_local[:, cdof] .= 0.0
+            K_local[cdof, cdof] = 1.0
+        end
+    end
+
+    # Zero out columns associated to the ghost dofs constrained on a remote process
+    # TODO optimize
+
+    # Step 1: Send out all local ghosts to all other processes...
+    remote_ghost_gdofs, remote_ghost_parts = map_parts(K.cols.partition) do partition
+        remote_ghost_ldofs = partition.hid_to_lid
+        remote_ghost_parts = partition.lid_to_part[remote_ghost_ldofs]
+        remote_ghost_gdofs = partition.lid_to_gid[remote_ghost_ldofs]
+        return (remote_ghost_gdofs, remote_ghost_parts)
+    end
+
+    comm = remote_ghost_parts.comm
+    my_rank = MPI.Comm_rank(comm)+1
+    buffer_sizes_send = zeros(Cint, MPI.Comm_size(comm))
+    buffer_sizes_recv = Vector{Cint}(undef, MPI.Comm_size(comm))
+    for part ∈ remote_ghost_parts.part
+        buffer_sizes_send[part] += 1
+    end
+    MPI.Alltoall!(UBuffer(buffer_sizes_send, 1), UBuffer(buffer_sizes_recv, 1), comm)
+    @debug println("Got $buffer_sizes_recv (R$my_rank)")
+
+    remote_ghosts_recv = Vector{Int}(undef, sum(buffer_sizes_recv))
+    MPI.Alltoallv!(VBuffer(remote_ghost_gdofs.part, buffer_sizes_send), VBuffer(remote_ghosts_recv, buffer_sizes_recv), comm)
+    @debug println("Got $remote_ghosts_recv (R$my_rank)")
+
+    # Step 2: Union with all locally constrained dofs
+    remote_ghosts_constrained_send = copy(remote_ghosts_recv)
+    for (i, remote_ghost_dof) ∈ enumerate(remote_ghosts_recv)
+        remote_ghosts_constrained_send[i] = remote_ghost_dof ∈ K.cols.partition.part.lid_to_gid[ch.prescribed_dofs]
+    end
+
+    # Step 3: Send trash back
+    remote_ghosts_constrained_recv = Vector{Int}(undef, sum(buffer_sizes_send))
+    MPI.Alltoallv!(VBuffer(remote_ghosts_constrained_send, buffer_sizes_recv), VBuffer(remote_ghosts_constrained_recv, buffer_sizes_send), comm)
+
+    @debug println("$my_rank : remote constraints on $(remote_ghost_gdofs.part[remote_ghosts_constrained_recv .== 1])")
+
+    # Step 4: Constrain remaining columns
+    map_parts(local_view(K, K.rows, K.cols), K.cols.partition) do K_local, partition
+        for cdof ∈ partition.hid_to_lid[remote_ghosts_constrained_recv .== 1]
+            K_local[:, cdof] .= 0.0
+        end
+    end
+end

From 5429962fe944613b61e9d467e43fd72bd1ecd57d Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 01:35:09 +0200
Subject: [PATCH 041/124] Factor out COO assembly infrastructure.

---
 src/assembler.jl | 262 +++++++++++++++++++++++++++++++++++++++++++++++
 src/exports.jl   |   1 +
 2 files changed, 263 insertions(+)

diff --git a/src/assembler.jl b/src/assembler.jl
index b99b74f0af..67743ae5fc 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -208,3 +208,265 @@ function InsertionSort!(A, order, ii=1, jj=length(A))
     end  # i
     return
 end
+
+using PartitionedArrays
+
+"""
+Simplest partitioned assembler in COO format to obtain a PSparseMatrix and a PVector.
+"""
+struct PartitionedArraysCOOAssembler{T}
+    I::Vector{Int}
+    J::Vector{Int}
+    V::Vector{T}
+
+    cols
+    rows
+    f::PVector
+
+    unique_ghosts_dre
+    dh
+
+    # TODO PartitionedArrays backend as additional input arg
+    PartitionedArraysCOOAssembler(dh::DistributedDofHandler) = PartitionedArraysCOOAssembler{Float64}(dh)
+
+    # TODO PartitionedArrays backend as additional input arg
+    function PartitionedArraysCOOAssembler{T}(dh::DistributedDofHandler) where {T}
+        ldof_to_gdof = dh.ldof_to_gdof
+        ldof_to_rank = dh.ldof_to_rank
+        nldofs = num_local_dofs(dh)
+        ngdofs = num_global_dofs(dh)
+        dgrid = getglobalgrid(dh)
+
+        I = Int[]
+        J = Int[]
+        V = T[]
+        sizehint!(I, nldofs)
+        sizehint!(J, nldofs)
+        sizehint!(V, nldofs)
+
+        # @TODO the code below can be massively simplified by introducing a ghost layer to the
+        #       distributed grid, which can efficiently precompute some of the values below.
+        comm = global_comm(dgrid)
+        np = MPI.Comm_size(comm)
+        my_rank = MPI.Comm_rank(comm)+1
+
+        @debug println("starting assembly... (R$my_rank)")
+
+        # Neighborhood graph
+        # @TODO cleanup old code below and use graph primitives instead.
+        (source_len, destination_len, _) = MPI.Dist_graph_neighbors_count(vertex_comm(dgrid))
+        sources = Vector{Cint}(undef, source_len)
+        destinations = Vector{Cint}(undef, destination_len)
+        MPI.Dist_graph_neighbors!(vertex_comm(dgrid), sources, destinations)
+
+        # Adjust to Julia index convention
+        sources .+= 1
+        destinations .+= 1
+
+        @debug println("Neighborhood | $sources | $destinations (R$my_rank)")
+
+        # Invert the relations to clarify the code
+        source_index = Dict{Cint, Int}()
+        for (i,remote_rank) ∈ enumerate(sources)
+            source_index[remote_rank] = i
+        end
+        destination_index = Dict{Int, Cint}()
+        for (i,remote_rank) ∈ enumerate(destinations)
+            destination_index[remote_rank] = i
+        end
+
+        # Note: We assume a symmetric neighborhood for now... this may not be true in general.
+        neighbors = MPIData(Int32.(sources), comm, (np,))
+
+        # Extract locally owned dofs
+        ltdof_indices = ldof_to_rank.==my_rank
+        ltdof_to_gdof = ldof_to_gdof[ltdof_indices]
+
+        @debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
+        @debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
+        @debug println("ldof_to_rank $ldof_to_rank (R$my_rank)")
+
+        # Process owns rows of owned dofs. The process also may write to some remote dofs,
+        # which correspond to non-owned share entities. Here we construct the rows for the
+        # distributed matrix.
+        # We decide for row (i.e. test function) ownership, because it the image of
+        # SpMV is process local.
+        row_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_rank))
+        #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
+        #row_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], ldof_to_gdof[.!ltdof_indices], Int32.(ldof_to_rank[.!ltdof_indices]))
+        row_data = MPIData(row_indices, comm, (np,))
+        row_exchanger = Exchanger(row_data,neighbors)
+        rows = PRange(ngdofs,row_data,row_exchanger)
+
+        @debug println("rows done (R$my_rank)")
+
+        # For the locally visible columns we also have to take into account that remote
+        # processes will write their data in some of these, because their remotely
+        # owned trial functions overlap with the locally owned test functions.
+        ghost_dof_to_global = Int[]
+        ghost_dof_rank = Int32[]
+
+        # ------------ Ghost dof synchronization ----------
+        # Prepare sending ghost dofs to neighbors
+        #@TODO move relevant parts into dof handler
+        #@TODO communication can be optimized by deduplicating entries in, and compressing the following arrays
+        #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
+        ghost_dof_to_send = [Int[] for i ∈ 1:destination_len] # global dof id
+        ghost_rank_to_send = [Int[] for i ∈ 1:destination_len] # rank of dof
+        ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
+        ghost_element_to_send = [Int[] for i ∈ 1:destination_len] # corresponding element
+        ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
+        ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
+        for shared_entity_set ∈ [dgrid.shared_vertices, dgrid.shared_faces, dgrid.shared_edges]
+            for (pivot_entity, pivot_shared_entity) ∈ shared_entity_set
+                # Start by searching shared entities which are not owned
+                pivot_entity_owner_rank = Ferrite.compute_owner(dgrid, pivot_shared_entity)
+                pivot_cell_idx = pivot_entity[1]
+
+                if my_rank != pivot_entity_owner_rank
+                    sender_slot = destination_index[pivot_entity_owner_rank]
+
+                    @debug println("$pivot_entity may require synchronization (R$my_rank)")
+                    # Note: We have to send ALL dofs on the element to the remote.
+                    cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
+                    cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
+
+                    pivot_entity_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_entity)
+
+                    for (field_idx, field_name) in zip(1:num_fields(dh), Ferrite.getfieldnames(dh))
+                        pivot_entity_dof = Ferrite.entity_dofs(dh, field_idx, pivot_entity_global)
+                        # Extract dofs belonging to the current field
+                        cell_field_dofs = cell_dofs[Ferrite.dof_range(dh, field_name)]
+                        for cell_field_dof ∈ cell_field_dofs
+                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_entity_dof])
+                            append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
+                            append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
+                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                            append!(ghost_element_to_send[sender_slot], pivot_cell_idx)
+                        end
+                    end
+                end
+            end
+        end
+
+        ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_element_to_send]
+        ghost_recv_buffer_lengths = zeros(Int, destination_len)
+        MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), vertex_comm(dgrid));
+        @debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
+            println("receiving $ghost_recv_buffer_length ghosts from $(sources[i])  (R$my_rank)")
+        end
+
+        # Communicate ghost information
+        # @TODO coalesce communication
+        ghost_send_buffer_dofs = vcat(ghost_dof_to_send...)
+        ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        ghost_send_buffer_elements = vcat(ghost_element_to_send...)
+        ghost_recv_buffer_elements = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_elements,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_elements,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
+        ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        ghost_send_buffer_ranks = vcat(ghost_rank_to_send...)
+        ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        ghost_send_buffer_dofs_piv = vcat(ghost_dof_pivot_to_send...)
+        ghost_recv_buffer_dofs_piv = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs_piv,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs_piv,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        # Reconstruct source ranks
+        ghost_recv_buffer_source_ranks = Int[]
+        for (source_idx, recv_len) ∈ enumerate(ghost_recv_buffer_lengths)
+            append!(ghost_recv_buffer_source_ranks, ones(recv_len)*sources[source_idx])
+        end
+
+        @debug println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
+
+        unique_ghosts_dr = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
+        # unzip manually and make sure we do not add duplicate entries to our columns
+        for (dof,rank) ∈ unique_ghosts_dr
+            if rank != my_rank && dof ∉ ldof_to_gdof
+                push!(ghost_dof_to_global, dof)
+                push!(ghost_dof_rank, rank)
+            end
+        end
+
+        # ------------- Construct rows and cols of distributed matrix --------
+        all_local_cols = Int[ldof_to_gdof; ghost_dof_to_global]
+        all_local_col_ranks = Int32[ldof_to_rank; ghost_dof_rank]
+        @debug println("all_local_cols $all_local_cols (R$my_rank)")
+        @debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
+
+        col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, all_local_col_ranks)
+        #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
+        #col_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], all_local_cols[all_local_col_ranks .!= my_rank], Int32.(all_local_col_ranks[all_local_col_ranks .!= my_rank]))
+        col_data = MPIData(col_indices, comm, (np,))
+        col_exchanger = Exchanger(col_data,neighbors)
+        cols = PRange(ngdofs,col_data,col_exchanger)
+
+        @debug println("cols and rows constructed (R$my_rank)")
+
+        f = PartitionedArrays.PVector(0.0,rows)
+        @debug println("f constructed (R$my_rank)")
+
+        unique_ghosts_dre = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks)
+        @debug println("unique_ghosts_dre $unique_ghosts_dre (R$my_rank)")
+
+        return new(I, J, V, cols, rows, f, unique_ghosts_dre, dh)
+    end
+end
+
+@propagate_inbounds function assemble!(a::PartitionedArraysCOOAssembler{T}, edof::AbstractVector{Int}, Ke::AbstractMatrix{T}) where {T}
+    n_dofs = length(edof)
+    append!(a.V, Ke)
+    @inbounds for j in 1:n_dofs
+        append!(a.I, edof)
+        for i in 1:n_dofs
+            push!(a.J, edof[j])
+        end
+    end
+end
+
+@propagate_inbounds function assemble!(a::PartitionedArraysCOOAssembler{T}, dofs::AbstractVector{Int}, fe::AbstractVector{T}, Ke::AbstractMatrix{T}) where {T}
+    Ferrite.assemble!(a, dofs, Ke)
+    map_parts(local_view(a.f, a.f.rows)) do f_local
+        Ferrite.assemble!(f_local, dofs, fe)
+    end
+end
+
+function end_assemble(assembler::PartitionedArraysCOOAssembler{T}) where {T}
+    comm = global_comm(getglobalgrid(assembler.dh))
+    np = MPI.Comm_size(comm)
+    my_rank = MPI.Comm_rank(comm)+1
+
+    # --------------------- Add ghost entries in IJ --------------------
+    I = map(i->assembler.dh.ldof_to_gdof[i], assembler.I)
+    J = map(j->assembler.dh.ldof_to_gdof[j], assembler.J)
+    V = map(v->v, assembler.V)
+
+    # Fix ghost layer! Note that the locations for remote processes to write their
+    # data into are missing up to this point.
+    for (i,(pivot_dof, global_ghost_dof, ghost_owner_rank)) ∈ enumerate(assembler.unique_ghosts_dre)
+        push!(I, pivot_dof)
+        push!(J, global_ghost_dof)
+        push!(V, 0.0)
+    end
+
+    @debug println("I=$(I) (R$my_rank)")
+    @debug println("J=$(J) (R$my_rank)")
+    K = PartitionedArrays.PSparseMatrix(
+        MPIData(I, comm, (np,)),
+        MPIData(J, comm, (np,)),
+        MPIData(V, comm, (np,)),
+        assembler.rows, assembler.cols, ids=:global
+    )
+
+    PartitionedArrays.assemble!(K)
+    PartitionedArrays.assemble!(assembler.f)
+
+    return K, assembler.f
+end
diff --git a/src/exports.jl b/src/exports.jl
index 8a4e257ee7..71264a6b10 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -153,6 +153,7 @@ export
     start_assemble,
     assemble!,
     end_assemble,
+    PartitionedArraysCOOAssembler,
 
 # VTK export
     vtk_grid,

From 3b512651a73b923518e5030c941194ca2e573e75 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 02:20:06 +0200
Subject: [PATCH 042/124] Clean up example.

---
 docs/src/literate/distributed_assembly.jl | 560 +++-------------------
 1 file changed, 57 insertions(+), 503 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 02bf83bea2..582c6f1852 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -1,34 +1,12 @@
-# # Heat Equation
-#
-# ![](heat_square.png)
-#
-# *Figure 1*: Temperature field on the unit square with an internal uniform heat source
-# solved with homogeneous Dirichlet boundary conditions on the boundary.
-#
+# # Distributed Assembly of Heat Equation
 #
 # ## Introduction
 #
-# The heat equation is the "Hello, world!" equation of finite elements.
-# Here we solve the equation on a unit square, with a uniform internal source.
-# The strong form of the (linear) heat equation is given by
-#
-# ```math
-#  -\nabla \cdot (k \nabla u) = f  \quad x \in \Omega,
-# ```
+# Now we want to solve the heat problem in parallel. To be specific, this example shows
+# how to utilize process parallelism to assemble finite element matrices in parallel.
+# This example presumes that the reader is familiar with solving the heat problem in
+# serial with Ferrite.jl, as presented in [the first example](@ref heat_example).
 #
-# where $u$ is the unknown temperature field, $k$ the heat conductivity,
-# $f$ the heat source and $\Omega$ the domain. For simplicity we set $f = 1$
-# and $k = 1$. We will consider homogeneous Dirichlet boundary conditions such that
-# ```math
-# u(x) = 0 \quad x \in \partial \Omega,
-# ```
-# where $\partial \Omega$ denotes the boundary of $\Omega$.
-#
-# The resulting weak form is given by
-# ```math
-# \int_{\Omega} \nabla v \cdot \nabla u \ d\Omega = \int_{\Omega} v \ d\Omega,
-# ```
-# where $v$ is a suitable test function.
 #-
 # ## Commented Program
 #
@@ -36,107 +14,18 @@
 #md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
 #
 # First we load Ferrite, and some other packages we need
-using Ferrite, SparseArrays, MPI, PartitionedArrays, IterativeSolvers, HYPRE
-
-macro debug(ex)
-    return :($(esc(ex)))
-end
-
-# @TODO contribute diagnostics upstream
-function PartitionedArrays.matrix_exchanger(values,row_exchanger,row_lids,col_lids)
-    part = get_part_ids(row_lids)
-    parts_rcv = row_exchanger.parts_rcv
-    parts_snd = row_exchanger.parts_snd
-
-    function setup_rcv(part,parts_rcv,row_lids,col_lids,values)
-        owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_rcv) ))
-        ptrs = zeros(Int32,length(parts_rcv)+1)
-        for (li,lj,v) in nziterator(values)
-            owner = row_lids.lid_to_part[li]
-            if owner != part
-            ptrs[owner_to_i[owner]+1] +=1
-            end
-        end
-        length_to_ptrs!(ptrs)
-        k_rcv_data = zeros(Int,ptrs[end]-1)
-        gi_rcv_data = zeros(Int,ptrs[end]-1)
-        gj_rcv_data = zeros(Int,ptrs[end]-1)
-        for (k,(li,lj,v)) in enumerate(nziterator(values))
-            owner = row_lids.lid_to_part[li]
-            if owner != part
-            p = ptrs[owner_to_i[owner]]
-            k_rcv_data[p] = k
-            gi_rcv_data[p] = row_lids.lid_to_gid[li]
-            gj_rcv_data[p] = col_lids.lid_to_gid[lj]
-            ptrs[owner_to_i[owner]] += 1
-            end
-        end
-        rewind_ptrs!(ptrs)
-        k_rcv = Table(k_rcv_data,ptrs)
-        gi_rcv = Table(gi_rcv_data,ptrs)
-        gj_rcv = Table(gj_rcv_data,ptrs)
-        k_rcv, gi_rcv, gj_rcv
-    end
-
-    k_rcv, gi_rcv, gj_rcv = map_parts(setup_rcv,part,parts_rcv,row_lids,col_lids,values)
-
-    gi_snd = exchange(gi_rcv,parts_snd,parts_rcv)
-    gj_snd = exchange(gj_rcv,parts_snd,parts_rcv)
-
-    function setup_snd(part,row_lids,col_lids,gi_snd,gj_snd,values)
-        ptrs = gi_snd.ptrs
-        k_snd_data = zeros(Int,ptrs[end]-1)
-        for p in 1:length(gi_snd.data)
-            gi = gi_snd.data[p]
-            gj = gj_snd.data[p]
-            li = row_lids.gid_to_lid[gi]
-            lj = col_lids.gid_to_lid[gj]
-            k = nzindex(values,li,lj)
-            PartitionedArrays.@check k > 0 "The sparsity pattern of the ghost layer is inconsistent - $part | ($li, $lj) | ($gi, $gj)"
-            k_snd_data[p] = k
-        end
-        k_snd = Table(k_snd_data,ptrs)
-        k_snd
-    end
-
-    k_snd = map_parts(setup_snd,part,row_lids,col_lids,gi_snd,gj_snd,values)
-
-    Exchanger(parts_rcv,parts_snd,k_rcv,k_snd)
-end
+using Ferrite, MPI #, PartitionedArrays
+using IterativeSolvers #, HYPRE
 
 # Launch MPI
 MPI.Init()
 
-# We start  generating a simple grid with 20x20 quadrilateral elements
-# using `generate_grid`. The generator defaults to the unit square,
-# so we don't need to specify the corners of the domain.
+# We start generating a simple grid with 20x20 quadrilateral elements
+# and distribute it across our processors using `generate_distributed_grid`. 
 dgrid = generate_distributed_grid(Quadrilateral, (20, 20));
 
-# TODO refactor this into a utility function
-@debug vtk_grid("grid", dgrid; compress=false) do vtk
-    u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
-    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
-        fill!(u, 0.0)
-        for sv ∈ values(dgrid.shared_vertices)
-            if haskey(sv.remote_vertices,rank)
-                (cellidx,i) = sv.local_idx
-                nodeidx = dgrid.local_grid.cells[cellidx].nodes[i]
-                u[nodeidx] = rank
-            end
-        end
-        vtk_point_data(vtk, u,"sv $rank")
-    end
-end
-
 # ### Trial and test functions
-# A `CellValues` facilitates the process of evaluating values and gradients of
-# test and trial functions (among other things). Since the problem
-# is a scalar problem we will use a `CellScalarValues` object. To define
-# this we need to specify an interpolation space for the shape functions.
-# We use Lagrange functions (both for interpolating the function and the geometry)
-# based on the reference "cube". We also define a quadrature rule based on the
-# same reference cube. We combine the interpolation and the quadrature rule
-# to a `CellScalarValues` object.
+# Nothing changes here.
 dim = 2
 ip = Lagrange{dim, RefCube, 1}()
 ip_geo = Lagrange{dim, RefCube, 1}()
@@ -144,265 +33,47 @@ qr = QuadratureRule{dim, RefCube}(3)
 cellvalues = CellScalarValues(qr, ip, ip_geo);
 
 # ### Degrees of freedom
-# Next we need to define a `DofHandler`, which will take care of numbering
-# and distribution of degrees of freedom for our approximated fields.
-# We create the `DofHandler` and then add a single field called `u`.
-# Lastly we `close!` the `DofHandler`, it is now that the dofs are distributed
-# for all the elements.
+# To handle the dofs correctly we now utilize the `DistributedDofHandle` 
+# instead of the `DofHandler`. For the user the interface is the same.
 dh = DistributedDofHandler(dgrid)
 push!(dh, :u, 1, ip)
 close!(dh);
 
 # ### Boundary conditions
-# In Ferrite constraints like Dirichlet boundary conditions
-# are handled by a `ConstraintHandler`.
+# Nothing has to be changed here either.
 ch = ConstraintHandler(dh);
-
-# Next we need to add constraints to `ch`. For this problem we define
-# homogeneous Dirichlet boundary conditions on the whole boundary, i.e.
-# the `union` of all the face sets on the boundary.
 ∂Ω = union(getfaceset.((getlocalgrid(dgrid), ), ["left", "right", "top", "bottom"])...);
-
-# Now we are set up to define our constraint. We specify which field
-# the condition is for, and our combined face set `∂Ω`. The last
-# argument is a function which takes the spatial coordinate $x$ and
-# the current time $t$ and returns the prescribed value. In this case
-# it is trivial -- no matter what $x$ and $t$ we return $0$. When we have
-# specified our constraint we `add!` it to `ch`.
 dbc = Dirichlet(:u, ∂Ω, (x, t) -> 1)
 add!(ch, dbc);
-
-# We also need to `close!` and `update!` our boundary conditions. When we call `close!`
-# the dofs which will be constrained by the boundary conditions are calculated and stored
-# in our `ch` object. Since the boundary conditions are, in this case,
-# independent of time we can `update!` them directly with e.g. $t = 0$.
 close!(ch)
 update!(ch, 0.0);
 
 # ### Assembling the linear system
-# Now we have all the pieces needed to assemble the linear system, $K u = f$.
-# We define a function, `doassemble` to do the assembly, which takes our `cellvalues`,
-# the sparse matrix and our DofHandler as input arguments. The function returns the
-# assembled stiffness matrix, and the force vector.
+# Assembling the system works also mostly analogue.
 function doassemble(cellvalues::CellScalarValues{dim}, dh::DistributedDofHandler) where {dim}
-    ldof_to_gdof = dh.ldof_to_gdof
-    ldof_to_rank = dh.ldof_to_rank
-    ngdofs = num_global_dofs(dh)
-    dgrid = getglobalgrid(dh)
-
-    # We allocate the element stiffness matrix and element force vector
-    # just once before looping over all the cells instead of allocating
-    # them every time in the loop.
-    #+
     n_basefuncs = getnbasefunctions(cellvalues)
     Ke = zeros(n_basefuncs, n_basefuncs)
     fe = zeros(n_basefuncs)
 
-    # @TODO put the code below into a "distributed assembler" struct and functions
-    # @TODO the code below can be massively simplified by introducing a ghost layer to the 
-    #       distributed grid, which can efficiently precompute some of the values below.
-    comm = global_comm(dgrid)
-    np = MPI.Comm_size(comm)
-    my_rank = MPI.Comm_rank(comm)+1
-
-    @debug println("starting assembly... (R$my_rank)")
-
-    # Neighborhood graph
-    # @TODO cleanup old code below and use graph primitives instead.
-    (source_len, destination_len, _) = MPI.Dist_graph_neighbors_count(vertex_comm(dgrid))
-    sources = Vector{Cint}(undef, source_len)
-    destinations = Vector{Cint}(undef, destination_len)
-    MPI.Dist_graph_neighbors!(vertex_comm(dgrid), sources, destinations)
-
-    # Adjust to Julia index convention
-    sources .+= 1
-    destinations .+= 1
-
-    @debug println("Neighborhood | $sources | $destinations (R$my_rank)")
-
-    # Invert the relations to clarify the code
-    source_index = Dict{Cint, Int}()
-    for (i,remote_rank) ∈ enumerate(sources)
-        source_index[remote_rank] = i
-    end
-    destination_index = Dict{Int, Cint}()
-    for (i,remote_rank) ∈ enumerate(destinations)
-        destination_index[remote_rank] = i
-    end
-
-    # Note: We assume a symmetric neighborhood for now... this may not be true in general.
-    neighbors = MPIData(Int32.(sources), comm, (np,))
-
-    # Extract locally owned dofs
-    ltdof_indices = ldof_to_rank.==my_rank
-    ltdof_to_gdof = ldof_to_gdof[ltdof_indices]
-
-    @debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
-    @debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
-    @debug println("ldof_to_rank $ldof_to_rank (R$my_rank)")
-
-    # Process owns rows of owned dofs. The process also may write to some remote dofs, 
-    # which correspond to non-owned share entities. Here we construct the rows for the
-    # distributed matrix.
-    # We decide for row (i.e. test function) ownership, because it the image of
-    # SpMV is process local.
-    row_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_rank))
-    #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
-    #row_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], ldof_to_gdof[.!ltdof_indices], Int32.(ldof_to_rank[.!ltdof_indices]))
-    row_data = MPIData(row_indices, comm, (np,))
-    row_exchanger = Exchanger(row_data,neighbors)
-    rows = PRange(ngdofs,row_data,row_exchanger)
-
-    @debug println("rows done (R$my_rank)")
-
-    # For the locally visible columns we also have to take into account that remote
-    # processes will write their data in some of these, because their remotely 
-    # owned trial functions overlap with the locally owned test functions.
-    ghost_dof_to_global = Int[]
-    ghost_dof_element_index = Int[]
-    ghost_dof_rank = Int32[]
-
-    # ------------ Ghost dof synchronization ----------   
-    # Prepare sending ghost dofs to neighbors
-    #@TODO communication can be optimized by deduplicating entries in, and compressing the following arrays
-    #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
-    ghost_dof_to_send = [Int[] for i ∈ 1:destination_len] # global dof id
-    ghost_rank_to_send = [Int[] for i ∈ 1:destination_len] # rank of dof
-    ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
-    ghost_element_to_send = [Int[] for i ∈ 1:destination_len] # corresponding element
-    ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
-    ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
-    for shared_entity_set ∈ [dgrid.shared_vertices, dgrid.shared_faces, dgrid.shared_edges]
-        for (pivot_entity, pivot_shared_entity) ∈ shared_entity_set
-            # Start by searching shared entities which are not owned
-            pivot_entity_owner_rank = Ferrite.compute_owner(dgrid, pivot_shared_entity)
-            pivot_cell_idx = pivot_entity[1]
-
-            if my_rank != pivot_entity_owner_rank
-                sender_slot = destination_index[pivot_entity_owner_rank]
-
-                @debug println("$pivot_entity may require synchronization (R$my_rank)")
-                # Note: We have to send ALL dofs on the element to the remote.
-                cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
-                cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
-
-                pivot_entity_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_entity)
-
-                for (field_idx, field_name) in zip(1:Ferrite.nfields(dh), Ferrite.getfieldnames(dh))
-                    pivot_entity_dof = Ferrite.entity_dofs(dh, field_idx, pivot_entity_global)
-                    # Extract dofs belonging to the current field
-                    cell_field_dofs = cell_dofs[Ferrite.dof_range(dh, field_name)]
-                    for cell_field_dof ∈ cell_field_dofs
-                        append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_entity_dof])
-                        append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
-                        append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
-                        append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
-                        append!(ghost_element_to_send[sender_slot], pivot_cell_idx)
-                    end
-                end
-            end
-        end
-    end
-
-    ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_element_to_send]
-    ghost_recv_buffer_lengths = zeros(Int, destination_len)
-    MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), vertex_comm(dgrid));
-    @debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
-        println("receiving $ghost_recv_buffer_length ghosts from $(sources[i])  (R$my_rank)")
-    end
-
-    # Communicate ghost information
-    # @TODO coalesce communication
-    ghost_send_buffer_dofs = vcat(ghost_dof_to_send...)
-    ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
-    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-    ghost_send_buffer_elements = vcat(ghost_element_to_send...)
-    ghost_recv_buffer_elements = zeros(Int, sum(ghost_recv_buffer_lengths))
-    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_elements,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_elements,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-    ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
-    ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
-    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-    ghost_send_buffer_ranks = vcat(ghost_rank_to_send...)
-    ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
-    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-    ghost_send_buffer_dofs_piv = vcat(ghost_dof_pivot_to_send...)
-    ghost_recv_buffer_dofs_piv = zeros(Int, sum(ghost_recv_buffer_lengths))
-    MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs_piv,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs_piv,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-    # Reconstruct source ranks
-    ghost_recv_buffer_source_ranks = Int[]
-    for (source_idx, recv_len) ∈ enumerate(ghost_recv_buffer_lengths)
-        append!(ghost_recv_buffer_source_ranks, ones(recv_len)*sources[source_idx])
-    end
-
-    @debug println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
-
-    unique_ghosts_dr = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
-    # unzip manually and make sure we do not add duplicate entries to our columns
-    for (dof,rank) ∈ unique_ghosts_dr
-        if rank != my_rank && dof ∉ ldof_to_gdof
-            push!(ghost_dof_to_global, dof)
-            push!(ghost_dof_rank, rank)
-        end
-    end
-
-    # ------------- Construct rows and cols of distributed matrix --------
-    all_local_cols = Int[ldof_to_gdof; ghost_dof_to_global]
-    all_local_col_ranks = Int32[ldof_to_rank; ghost_dof_rank]
-    @debug println("all_local_cols $all_local_cols (R$my_rank)")
-    @debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
+    # --------------------- Distributed assembly --------------------
+    # The synchronization with the global sparse matrix is handled by 
+    # an assembler again. You can choose from different backends, which
+    # are described in the docs and will be expaned over time. This call
+    # may trigger a large amount of communication.
+    # NOTE: At the time of writing the only backend available is a COO 
+    #       assembly via PartitionedArrays.jl .
+    assembler = PartitionedArraysCOOAssembler{Float64}(dh)
 
-    col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, all_local_col_ranks)
-    #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
-    #col_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], all_local_cols[all_local_col_ranks .!= my_rank], Int32.(all_local_col_ranks[all_local_col_ranks .!= my_rank]))
-    col_data = MPIData(col_indices, comm, (np,))
-    col_exchanger = Exchanger(col_data,neighbors)
-    cols = PRange(ngdofs,col_data,col_exchanger)
-
-    # --------------------- Local assembly --------------------
-    # Next we define the global force vector `f` and use that and
-    # the stiffness matrix `K` and create an assembler. The assembler
-    # is just a thin wrapper around `f` and `K` and some extra storage
-    # to make the assembling faster.
-    #+
-    @debug println("cols and rows constructed (R$my_rank)")
-    f = PartitionedArrays.PVector(0.0,rows)
-    @debug println("f constructed (R$my_rank)")
-    assembler = start_assemble()
-    @debug println("starting assembly (R$my_rank)")
-
-    # It is now time to loop over all the cells in our grid. We do this by iterating
-    # over a `CellIterator`. The iterator caches some useful things for us, for example
-    # the nodal coordinates for the cell, and the local degrees of freedom.
-    #+
+    # For the local assembly nothing changes
     for cell in CellIterator(dh)
-        @debug println("assembling cell #$(cell.current_cellid.x) (R$my_rank)")
-
-        # Always remember to reset the element stiffness matrix and
-        # force vector since we reuse them for all elements.
-        #+
         fill!(Ke, 0)
         fill!(fe, 0)
 
-        # For each cell we also need to reinitialize the cached values in `cellvalues`.
-        #+
         reinit!(cellvalues, cell)
 
-        # It is now time to loop over all the quadrature points in the cell and
-        # assemble the contribution to `Ke` and `fe`. The integration weight
-        # can be queried from `cellvalues` by `getdetJdV`.
-        #+
         for q_point in 1:getnquadpoints(cellvalues)
-            @debug println("assembling qp $q_point (R$my_rank)")
             dΩ = getdetJdV(cellvalues, q_point)
-            # For each quadrature point we loop over all the (local) shape functions.
-            # We need the value and gradient of the testfunction `v` and also the gradient
-            # of the trial function `u`. We get all of these from `cellvalues`.
-            #+
+            
             for i in 1:n_basefuncs
                 v  = shape_value(cellvalues, q_point, i)
                 ∇v = shape_gradient(cellvalues, q_point, i)
@@ -414,178 +85,61 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DistributedDofHandler
             end
         end
 
-        # The last step in the element loop is to assemble `Ke` and `fe`
-        # into the global `K` and `f` with `assemble!`.
-        #+
-        @debug println("assembling cell finished local (R$my_rank)")
-        Ferrite.assemble!(assembler, celldofs(cell), Ke)
-        @debug println("assembling cell finished global (R$my_rank)")
-        map_parts(local_view(f, f.rows)) do f_local
-            Ferrite.assemble!(f_local, celldofs(cell), fe)
-        end
+        # Note that this call should be communication-free!
+        Ferrite.assemble!(assembler, celldofs(cell), fe, Ke)
     end
-    @debug println("done assembling (R$my_rank)")
-
-    # --------------------- Add ghost entries in IJ --------------------
-    # Fix ghost layer - the locations for remote processes to write their data into
-    unique_ghosts_dre = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks)
-    @debug println("unique_ghosts_dre $unique_ghosts_dre (R$my_rank)")
-    IJfix = []
-    for (i,(pivot_dof, global_ghost_dof, ghost_owner_rank)) ∈ enumerate(unique_ghosts_dre)
-        push!(IJfix, (pivot_dof, global_ghost_dof))
-    end
-    @debug println("IJfix $IJfix (R$my_rank)")
-
-    I = map(i->ldof_to_gdof[i], assembler.I)
-    J = map(j->ldof_to_gdof[j], assembler.J)
-    V = map(v->v, assembler.V)
 
-    for (i,j) ∈ IJfix
-        push!(I, i)
-        push!(J, j)
-        push!(V, 0.0)
-    end
-
-    @debug println("I=$(I) (R$my_rank)")
-    @debug println("J=$(J) (R$my_rank)")
-    K = PartitionedArrays.PSparseMatrix(
-        MPIData(I, comm, (np,)), 
-        MPIData(J, comm, (np,)), 
-        MPIData(V, comm, (np,)), 
-        rows, cols, ids=:global
-    )
-
-    PartitionedArrays.assemble!(K)
-    PartitionedArrays.assemble!(f)
-
-    return K, f
+    # Finally, for the `PartitionedArraysCOOAssembler` we have to call
+    # `end_assemble` to construct the global sparse matrix and the global
+    # right hand side vector.
+    return end_assemble(assembler)
 end
 #md nothing # hide
 
-my_rank = MPI.Comm_rank(global_comm(dgrid))+1
-
 # ### Solution of the system
-# The last step is to solve the system. First we call `doassemble`
-# to obtain the global stiffness matrix `K` and force vector `f`.
-K, f = doassemble(cellvalues, dh, local_to_global, dof_owner, ndofs_total, dgrid);
-
-# To account for the boundary conditions we use the `apply!` function.
-# This modifies elements in `K` and `f` respectively, such that
-# we can get the correct solution vector `u` by using a parallel 
-# iterative solver.
-"""
-Poor man's Dirichlet BC application for PartitionedArrays. :)
-"""
-function apply_zero!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
-    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
-        f_local[ch.prescribed_dofs] .= 0.0
-    end
-
-    map_parts(local_view(K, K.rows, K.cols)) do K_local
-        for cdof in ch.prescribed_dofs
-            K_local[cdof, :] .= 0.0
-            K_local[:, cdof] .= 0.0
-            K_local[cdof, cdof] = 1.0
-        end
-    end
-end
-
-function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
-    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
-        # Note: RHS only non-zero for owned RHS entries
-        f_local[ch.prescribed_dofs] .= ch.inhomogeneities .* map(p -> p == partition.part, partition.lid_to_part[ch.prescribed_dofs])
-    end
-
-    # Zero out locally visible rows and columns
-    map_parts(local_view(K, K.rows, K.cols)) do K_local
-        for cdof ∈ ch.prescribed_dofs
-            K_local[cdof, :] .= 0.0
-            K_local[:, cdof] .= 0.0
-            K_local[cdof, cdof] = 1.0
-        end
-    end
-
-    # Zero out columns associated to the ghost dofs constrained on a remote process
-    # TODO optimize
-
-    # Step 1: Send out all local ghosts to all other processes...
-    remote_ghost_gdofs, remote_ghost_parts = map_parts(K.cols.partition) do partition
-        remote_ghost_ldofs = partition.hid_to_lid
-        remote_ghost_parts = partition.lid_to_part[remote_ghost_ldofs]
-        remote_ghost_gdofs = partition.lid_to_gid[remote_ghost_ldofs]
-        return (remote_ghost_gdofs, remote_ghost_parts)
-    end
-
-    comm = remote_ghost_parts.comm
-    my_rank = MPI.Comm_rank(comm)+1
-    buffer_sizes_send = zeros(Cint, MPI.Comm_size(comm))
-    buffer_sizes_recv = Vector{Cint}(undef, MPI.Comm_size(comm))
-    for part ∈ remote_ghost_parts.part
-        buffer_sizes_send[part] += 1
-    end
-    MPI.Alltoall!(UBuffer(buffer_sizes_send, 1), UBuffer(buffer_sizes_recv, 1), comm)
-    @debug println("Got $buffer_sizes_recv (R$my_rank)")
-
-    remote_ghosts_recv = Vector{Int}(undef, sum(buffer_sizes_recv))
-    MPI.Alltoallv!(VBuffer(remote_ghost_gdofs.part, buffer_sizes_send), VBuffer(remote_ghosts_recv, buffer_sizes_recv), comm)
-    @debug println("Got $remote_ghosts_recv (R$my_rank)")
-
-    # Step 2: Union with all locally constrained dofs
-    remote_ghosts_constrained_send = copy(remote_ghosts_recv)
-    for (i, remote_ghost_dof) ∈ enumerate(remote_ghosts_recv)
-        remote_ghosts_constrained_send[i] = remote_ghost_dof ∈ K.cols.partition.part.lid_to_gid[ch.prescribed_dofs]
-    end
-
-    # Step 3: Send trash back
-    remote_ghosts_constrained_recv = Vector{Int}(undef, sum(buffer_sizes_send))
-    MPI.Alltoallv!(VBuffer(remote_ghosts_constrained_send, buffer_sizes_recv), VBuffer(remote_ghosts_constrained_recv, buffer_sizes_send), comm)
-
-    @debug println("$my_rank : remote constraints on $(remote_ghost_gdofs.part[remote_ghosts_constrained_recv .== 1])")
-
-    # Step 4: Constrain remaining columns
-    map_parts(local_view(K, K.rows, K.cols), K.cols.partition) do K_local, partition
-        for cdof ∈ partition.hid_to_lid[remote_ghosts_constrained_recv .== 1]
-            K_local[:, cdof] .= 0.0
-        end
-    end
-end
-
+# Again, we assemble our problem and apply the constraints as needed.
+K, f = doassemble(cellvalues, dh);
 apply!(K, f, ch)
 
-# Compute the solution
-# Note: At the moment of writing this we have no good preconditioners for PSparseMatrix in Julia, 
+# To compute the solution we utilize conjugate gradients because at the time of writing 
+# this is the only available scalable working solver.
+# Additional note: At the moment of writing this we have no good preconditioners for PSparseMatrix in Julia, 
 # partly due to unimplemented multiplication operators for the matrix data type.
 u = cg(K, f)
 
-# Compute the solution with HYPRE (needs the hotfix in https://github.com/fredrikekre/HYPRE.jl/pull/4 to function)
-# u_ = HYPRE.solve(
-#     HYPRE.PCG(
-#         global_comm(dgrid);
-#         Precond = HYPRE.BoomerAMG()
-#     ),
-#     HYPRE.HYPREMatrix(K),
-#     HYPRE.HYPREVector(f)
+#FIXME #src
+# Compute the solution with HYPRE (needs the hotfix in https://github.com/fredrikekre/HYPRE.jl/pull/4 to function partially) #src
+# u_ = HYPRE.solve(                         #src
+#     HYPRE.PCG(                            #src
+#         global_comm(dgrid);               #src
+#         Precond = HYPRE.BoomerAMG()       #src
+#     ),                                    #src
+#     HYPRE.HYPREMatrix(K),                 #src
+#     HYPRE.HYPREVector(f)                  #src
 # )
 
-# Convert back to PartitionedArrays vector
-# u = PVector(0.0, K.cols)
-# copy!(u, u_)
-# PartitionedArrays.assemble!(u)
+# Convert back to PartitionedArrays vector #src
+# u = PVector(0.0, K.cols) #src
+# copy!(u, u_) #src
+# PartitionedArrays.assemble!(u) #src
 
-# ### Exporting to VTK
+# ### Exporting via PVTK
 # To visualize the result we export the grid and our field `u`
 # to a VTK-file, which can be viewed in e.g. [ParaView](https://www.paraview.org/).
-vtk_grid("heat_equation_distributed-$my_rank", dh) do vtk
-    map_parts(local_view(u, u.rows)) do u_local
-        vtk_point_data(vtk, dh, u_local)
-    end
+vtk_grid("heat_equation_distributed", dh) do vtk
+    vtk_point_data(vtk, dh, u)
+    # For debugging purposes it can be helpful to enrich 
+    # the visualization with some meta  information about 
+    # the grid and its partitioning
+    vtk_shared_vertices(vtk, dgrid)
+    vtk_partitioning(vtk, dgrid)
 end
 
 ## test the result                #src
 using Test                        #src
 @test norm(u) ≈ 9.536307974872432 #src
 
-# Shutdown MPI
+# Finally, we gracefully shutdown MPI
 MPI.Finalize()
 
 #md # ## [Plain program](@id distributed-assembly-plain-program)

From e7a9cbe5d0836c26f7642c1db226f511c78bbb72 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 02:21:35 +0200
Subject: [PATCH 043/124] Add PartitionedArrays.jl as dependency.

---
 Project.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Project.toml b/Project.toml
index df0a18711a..7ce3b129d2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,6 +9,7 @@ MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
 Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
+PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"

From d4934e872263032e5da0189fa1cb387da813b9ed Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 02:25:56 +0200
Subject: [PATCH 044/124] Improve variable naming and docs for ghost layer
 stuff.

---
 src/assembler.jl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/assembler.jl b/src/assembler.jl
index 67743ae5fc..dc6c444419 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -223,7 +223,7 @@ struct PartitionedArraysCOOAssembler{T}
     rows
     f::PVector
 
-    unique_ghosts_dre
+    👻
     dh
 
     # TODO PartitionedArrays backend as additional input arg
@@ -307,7 +307,7 @@ struct PartitionedArraysCOOAssembler{T}
         ghost_dof_rank = Int32[]
 
         # ------------ Ghost dof synchronization ----------
-        # Prepare sending ghost dofs to neighbors
+        # Prepare sending 👻 ghost dofs to neighbors
         #@TODO move relevant parts into dof handler
         #@TODO communication can be optimized by deduplicating entries in, and compressing the following arrays
         #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
@@ -356,7 +356,7 @@ struct PartitionedArraysCOOAssembler{T}
             println("receiving $ghost_recv_buffer_length ghosts from $(sources[i])  (R$my_rank)")
         end
 
-        # Communicate ghost information
+        # Communicate ghost information 👻
         # @TODO coalesce communication
         ghost_send_buffer_dofs = vcat(ghost_dof_to_send...)
         ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
@@ -413,10 +413,10 @@ struct PartitionedArraysCOOAssembler{T}
         f = PartitionedArrays.PVector(0.0,rows)
         @debug println("f constructed (R$my_rank)")
 
-        unique_ghosts_dre = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks)
-        @debug println("unique_ghosts_dre $unique_ghosts_dre (R$my_rank)")
+        👻 = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks)
+        @debug println("👻 $👻 (R$my_rank)")
 
-        return new(I, J, V, cols, rows, f, unique_ghosts_dre, dh)
+        return new(I, J, V, cols, rows, f, 👻, dh)
     end
 end
 
@@ -443,14 +443,14 @@ function end_assemble(assembler::PartitionedArraysCOOAssembler{T}) where {T}
     np = MPI.Comm_size(comm)
     my_rank = MPI.Comm_rank(comm)+1
 
-    # --------------------- Add ghost entries in IJ --------------------
+    # --------------------- Add ghost entries in IJ 👻 --------------------
     I = map(i->assembler.dh.ldof_to_gdof[i], assembler.I)
     J = map(j->assembler.dh.ldof_to_gdof[j], assembler.J)
     V = map(v->v, assembler.V)
 
-    # Fix ghost layer! Note that the locations for remote processes to write their
+    # Fix ghost layer 👻! Note that the locations for remote processes to write their
     # data into are missing up to this point.
-    for (i,(pivot_dof, global_ghost_dof, ghost_owner_rank)) ∈ enumerate(assembler.unique_ghosts_dre)
+    for (i, (pivot_dof, global_ghost_dof, ghost_owner_rank)) ∈ enumerate(assembler.👻)
         push!(I, pivot_dof)
         push!(J, global_ghost_dof)
         push!(V, 0.0)

From 2fdbdadab31a7a0425efa8fe1e4f481bb767c6c8 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 02:37:39 +0200
Subject: [PATCH 045/124] Some minor fixes.

---
 docs/Manifest.toml                | 834 +++++++++++++++++++-----------
 docs/Project.toml                 |   5 +
 src/Dofs/DistributedDofHandler.jl |  15 +-
 src/assembler.jl                  |  30 +-
 4 files changed, 550 insertions(+), 334 deletions(-)

diff --git a/docs/Manifest.toml b/docs/Manifest.toml
index d60213634e..7501110aa1 100644
--- a/docs/Manifest.toml
+++ b/docs/Manifest.toml
@@ -7,30 +7,60 @@ version = "0.0.1"
 
 [[Adapt]]
 deps = ["LinearAlgebra"]
-git-tree-sha1 = "af92965fb30777147966f58acb05da51c5616b5f"
+git-tree-sha1 = "195c5505521008abea5aee4f96930717958eac6f"
 uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
-version = "3.3.3"
+version = "3.4.0"
 
 [[ArgTools]]
 uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
 
 [[ArnoldiMethod]]
 deps = ["LinearAlgebra", "Random", "StaticArrays"]
-git-tree-sha1 = "62e51b39331de8911e4a7ff6f5aaf38a5f4cc0ae"
+git-tree-sha1 = "f87e559f87a45bece9c9ed97458d3afe98b1ebb9"
 uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
-version = "0.2.0"
+version = "0.1.0"
 
 [[ArrayInterface]]
-deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
-git-tree-sha1 = "1ee88c4c76caa995a885dc2f22a5d548dfbbc0ba"
+deps = ["ArrayInterfaceCore", "Compat", "IfElse", "LinearAlgebra", "Static"]
+git-tree-sha1 = "d6173480145eb632d6571c148d94b9d3d773820e"
 uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "3.2.2"
+version = "6.0.23"
+
+[[ArrayInterfaceCore]]
+deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "5bb0f8292405a516880a3809954cb832ae7a31c5"
+uuid = "30b0a656-2188-435a-8636-2ec0e6a096e2"
+version = "0.1.20"
+
+[[ArrayInterfaceGPUArrays]]
+deps = ["Adapt", "ArrayInterfaceCore", "GPUArraysCore", "LinearAlgebra"]
+git-tree-sha1 = "fc114f550b93d4c79632c2ada2924635aabfa5ed"
+uuid = "6ba088a2-8465-4c0a-af30-387133b534db"
+version = "0.2.2"
+
+[[ArrayInterfaceOffsetArrays]]
+deps = ["ArrayInterface", "OffsetArrays", "Static"]
+git-tree-sha1 = "c49f6bad95a30defff7c637731f00934c7289c50"
+uuid = "015c0d05-e682-4f19-8f0a-679ce4c54826"
+version = "0.1.6"
+
+[[ArrayInterfaceStaticArrays]]
+deps = ["Adapt", "ArrayInterface", "ArrayInterfaceStaticArraysCore", "LinearAlgebra", "Static", "StaticArrays"]
+git-tree-sha1 = "efb000a9f643f018d5154e56814e338b5746c560"
+uuid = "b0d46f97-bff5-4637-a19a-dd75974142cd"
+version = "0.1.4"
+
+[[ArrayInterfaceStaticArraysCore]]
+deps = ["Adapt", "ArrayInterfaceCore", "LinearAlgebra", "StaticArraysCore"]
+git-tree-sha1 = "a1e2cf6ced6505cbad2490532388683f1e88c3ed"
+uuid = "dd5226c6-a4d4-4bc7-8575-46859f9c95b9"
+version = "0.1.0"
 
 [[ArrayLayouts]]
 deps = ["FillArrays", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "56c347caf09ad8acb3e261fe75f8e09652b7b05b"
+git-tree-sha1 = "ac5cc6021f32a272ee572dd2a325049a1fa0d034"
 uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
-version = "0.7.10"
+version = "0.8.11"
 
 [[Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -38,17 +68,22 @@ uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 [[Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 
+[[BitFlags]]
+git-tree-sha1 = "84259bb6172806304b9101094a7cc4bc6f56dbc6"
+uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35"
+version = "0.1.5"
+
 [[BitTwiddlingConvenienceFunctions]]
 deps = ["Static"]
-git-tree-sha1 = "28bbdbf0354959db89358d1d79d421ff31ef0b5e"
+git-tree-sha1 = "eaee37f76339077f86679787a71990c4e465477f"
 uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b"
-version = "0.1.3"
+version = "0.1.4"
 
 [[BlockArrays]]
 deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra"]
-git-tree-sha1 = "21490270d1fcf2efa9ddb2126d6958e9b72a4db0"
+git-tree-sha1 = "0c0dd27be59bc76a3da6243d8172aeedd6420037"
 uuid = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
-version = "0.16.11"
+version = "0.16.20"
 
 [[Bzip2_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -57,10 +92,10 @@ uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
 version = "1.0.8+0"
 
 [[CPUSummary]]
-deps = ["Hwloc", "IfElse", "Preferences", "Static"]
-git-tree-sha1 = "68150205edbf60f0410ba2463b5b38eae44cad1f"
+deps = ["CpuId", "IfElse", "Static"]
+git-tree-sha1 = "9bdd5aceea9fa109073ace6b430a24839d79315e"
 uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
-version = "0.1.15"
+version = "0.1.27"
 
 [[Cairo_jll]]
 deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
@@ -76,21 +111,21 @@ version = "0.5.1"
 
 [[ChainRulesCore]]
 deps = ["Compat", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "c9a6160317d1abe9c44b3beb367fd448117679ca"
+git-tree-sha1 = "e7ff6cadf743c098e08fca25c91103ee4303c9bb"
 uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-version = "1.13.0"
+version = "1.15.6"
 
 [[ChangesOfVariables]]
 deps = ["ChainRulesCore", "LinearAlgebra", "Test"]
-git-tree-sha1 = "bf98fa45a0a4cee295de98d4c1462be26345b9a1"
+git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8"
 uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
-version = "0.1.2"
+version = "0.1.4"
 
 [[CloseOpenIntervals]]
 deps = ["ArrayInterface", "Static"]
-git-tree-sha1 = "f576084239e6bdf801007c80e27e2cc2cd963fe0"
+git-tree-sha1 = "5522c338564580adf5d58d91e43a55db0fa5fb39"
 uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9"
-version = "0.1.6"
+version = "0.1.10"
 
 [[CodecZlib]]
 deps = ["TranscodingStreams", "Zlib_jll"]
@@ -99,16 +134,22 @@ uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
 version = "0.7.0"
 
 [[ColorSchemes]]
-deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random"]
-git-tree-sha1 = "12fc73e5e0af68ad3137b886e3f7c1eacfca2640"
+deps = ["ColorTypes", "ColorVectorSpace", "Colors", "FixedPointNumbers", "Random"]
+git-tree-sha1 = "1fd869cc3875b57347f7027521f561cf46d1fcd8"
 uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
-version = "3.17.1"
+version = "3.19.0"
 
 [[ColorTypes]]
 deps = ["FixedPointNumbers", "Random"]
-git-tree-sha1 = "024fe24d83e4a5bf5fc80501a314ce0d1aa35597"
+git-tree-sha1 = "eb7f0f8307f71fac7c606984ea5fb2817275d6e4"
 uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
-version = "0.11.0"
+version = "0.11.4"
+
+[[ColorVectorSpace]]
+deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "SpecialFunctions", "Statistics", "TensorCore"]
+git-tree-sha1 = "d08c20eef1f2cbc6e60fd3612ac4340b89fea322"
+uuid = "c3611d14-8923-5661-9e6a-0046d554d3a4"
+version = "0.9.9"
 
 [[Colors]]
 deps = ["ColorTypes", "FixedPointNumbers", "Reexport"]
@@ -117,9 +158,9 @@ uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
 version = "0.12.8"
 
 [[CommonSolve]]
-git-tree-sha1 = "68a0743f578349ada8bc911a5cbd5a2ef6ed6d1f"
+git-tree-sha1 = "332a332c97c7071600984b3c31d9067e1a4e6e25"
 uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
-version = "0.2.0"
+version = "0.2.1"
 
 [[CommonSubexpressions]]
 deps = ["MacroTools", "Test"]
@@ -128,10 +169,10 @@ uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
 version = "0.3.0"
 
 [[Compat]]
-deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "96b0bc6c52df76506efc8a441c6cf1adcb1babc4"
+deps = ["Dates", "LinearAlgebra", "UUIDs"]
+git-tree-sha1 = "3ca828fe1b75fa84b021a7860bd039eaea84d2f2"
 uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "3.42.0"
+version = "4.3.0"
 
 [[CompilerSupportLibraries_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -139,32 +180,31 @@ uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 
 [[ConstructionBase]]
 deps = ["LinearAlgebra"]
-git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
+git-tree-sha1 = "fb21ddd70a051d882a1686a5a550990bbe371a95"
 uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
-version = "1.3.0"
+version = "1.4.1"
 
 [[Contour]]
-deps = ["StaticArrays"]
-git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7"
+git-tree-sha1 = "d05d9e7b7aedff4e5b51a029dced05cfb6125781"
 uuid = "d38c429a-6771-53c6-b99e-75d170b6e991"
-version = "0.5.7"
+version = "0.6.2"
 
-[[DEDataArrays]]
-deps = ["ArrayInterface", "DocStringExtensions", "LinearAlgebra", "RecursiveArrayTools", "SciMLBase", "StaticArrays"]
-git-tree-sha1 = "5e5f8f363c8c9a2415ef9185c4e0ff6966c87d52"
-uuid = "754358af-613d-5f8d-9788-280bf1605d4c"
-version = "0.2.2"
+[[CpuId]]
+deps = ["Markdown"]
+git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406"
+uuid = "adafc99b-e345-5852-983c-f28acb93d879"
+version = "0.3.1"
 
 [[DataAPI]]
-git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8"
+git-tree-sha1 = "46d2680e618f8abd007bce0c3026cb0c4a8f2032"
 uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
-version = "1.9.0"
+version = "1.12.0"
 
 [[DataStructures]]
 deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
-git-tree-sha1 = "3daef5523dd2e769dad2365274f760ff5f282c7d"
+git-tree-sha1 = "d1fff3a548102f48987a52a2e0d114fa97d730f0"
 uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-version = "0.18.11"
+version = "0.18.13"
 
 [[DataValueInterfaces]]
 git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
@@ -186,22 +226,22 @@ uuid = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
 version = "0.4.0"
 
 [[DiffEqBase]]
-deps = ["ArrayInterface", "ChainRulesCore", "DEDataArrays", "DataStructures", "Distributions", "DocStringExtensions", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "IterativeSolvers", "LabelledArrays", "LinearAlgebra", "Logging", "MuladdMacro", "NonlinearSolve", "Parameters", "PreallocationTools", "Printf", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "StaticArrays", "Statistics", "SuiteSparse", "ZygoteRules"]
-git-tree-sha1 = "df03eb34293066d699f8a535d1ccdcff94cb9765"
+deps = ["ArrayInterfaceCore", "ChainRulesCore", "DataStructures", "Distributions", "DocStringExtensions", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "MuladdMacro", "NonlinearSolve", "Parameters", "Printf", "RecursiveArrayTools", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "Static", "StaticArrays", "Statistics", "Tricks", "ZygoteRules"]
+git-tree-sha1 = "c272e6fb3c3558d807886d5247ed2a0b9c6f3823"
 uuid = "2b5f629d-d688-5b77-993f-72d75c75574e"
-version = "6.82.1"
+version = "6.105.1"
 
 [[DiffResults]]
-deps = ["StaticArrays"]
-git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805"
+deps = ["StaticArraysCore"]
+git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621"
 uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
-version = "1.0.3"
+version = "1.1.0"
 
 [[DiffRules]]
 deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "dd933c4ef7b4c270aacd4eb88fa64c147492acf0"
+git-tree-sha1 = "992a23afdb109d0d2f8802a30cf5ae4b1fe7ea68"
 uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.10.0"
+version = "1.11.1"
 
 [[Distances]]
 deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"]
@@ -215,21 +255,21 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[Distributions]]
 deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "c43e992f186abaf9965cc45e372f4693b7754b22"
+git-tree-sha1 = "0d7d213133d948c56e8c2d9f4eab0293491d8e4a"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.52"
+version = "0.25.75"
 
 [[DocStringExtensions]]
 deps = ["LibGit2"]
-git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b"
+git-tree-sha1 = "5158c2b41018c5f7eb1470d558127ac274eca0c9"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.6"
+version = "0.9.1"
 
 [[Documenter]]
 deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "7d9a46421aef53cbd6b8ecc40c3dcbacbceaf40e"
+git-tree-sha1 = "6030186b00a38e9d0434518627426570aac2ef95"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.27.15"
+version = "0.27.23"
 
 [[Downloads]]
 deps = ["ArgTools", "LibCURL", "NetworkOptions"]
@@ -237,32 +277,26 @@ uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 
 [[DualNumbers]]
 deps = ["Calculus", "NaNMath", "SpecialFunctions"]
-git-tree-sha1 = "90b158083179a6ccbce2c7eb1446d5bf9d7ae571"
+git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566"
 uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74"
-version = "0.6.7"
-
-[[EarCut_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "3f3a2501fa7236e9b911e0f7a588c657e822bb6d"
-uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5"
-version = "2.2.3+0"
+version = "0.6.8"
 
 [[EnumX]]
-git-tree-sha1 = "1d2621e1a6246c5cf1116be0055686f305210b80"
+git-tree-sha1 = "e5333cd1e1c713ee21d07b6ed8b0d8853fabe650"
 uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
-version = "1.0.2"
+version = "1.0.3"
 
 [[Expat_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "ae13fcbc7ab8f16b0856729b050ef0c446aa3492"
+git-tree-sha1 = "bad72f730e9e91c08d9427d5e8db95478a3c323d"
 uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
-version = "2.4.4+0"
+version = "2.4.8+0"
 
 [[ExponentialUtilities]]
-deps = ["ArrayInterface", "LinearAlgebra", "Printf", "Requires", "SparseArrays", "libblastrampoline_jll"]
-git-tree-sha1 = "b026981973ccbe38682fbb4ccb0732fd6b1e1207"
+deps = ["ArrayInterfaceCore", "GPUArraysCore", "GenericSchur", "LinearAlgebra", "Printf", "SparseArrays", "libblastrampoline_jll"]
+git-tree-sha1 = "b19c3f5001b11b71d0f970f354677d604f3a1a97"
 uuid = "d4d017d3-3776-5f7e-afef-a10c40355c18"
-version = "1.13.0"
+version = "1.19.0"
 
 [[ExprTools]]
 git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d"
@@ -276,47 +310,61 @@ uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a"
 version = "0.4.1"
 
 [[FFMPEG_jll]]
-deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"]
-git-tree-sha1 = "d8a578692e3077ac998b50c0217dfd67f21d1e5f"
+deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "PCRE2_jll", "Pkg", "Zlib_jll", "libaom_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"]
+git-tree-sha1 = "74faea50c1d007c85837327f6775bea60b5492dd"
 uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5"
-version = "4.4.0+0"
+version = "4.4.2+2"
+
+[[FLTK_jll]]
+deps = ["Artifacts", "Fontconfig_jll", "FreeType2_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll", "Xorg_libXfixes_jll", "Xorg_libXft_jll", "Xorg_libXinerama_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
+git-tree-sha1 = "72a4842f93e734f378cf381dae2ca4542f019d23"
+uuid = "4fce6fc7-ba6a-5f4c-898f-77e99806d6f8"
+version = "1.3.8+0"
 
 [[FastBroadcast]]
-deps = ["LinearAlgebra", "Polyester", "Static"]
-git-tree-sha1 = "f39bcc05eb0dcbd2c0195762df7a5737041289b9"
+deps = ["ArrayInterface", "ArrayInterfaceCore", "LinearAlgebra", "Polyester", "Static", "StrideArraysCore"]
+git-tree-sha1 = "21cdeff41e5a1822c2acd7fc7934c5f450588e00"
 uuid = "7034ab61-46d4-4ed7-9d0f-46aef9175898"
-version = "0.1.14"
+version = "0.2.1"
 
 [[FastClosures]]
 git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef"
 uuid = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a"
 version = "0.3.2"
 
+[[FastLapackInterface]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "14a6f7a21125f715d935fe8f83560ee833f7d79d"
+uuid = "29a986be-02c6-4525-aec4-84b980013641"
+version = "1.2.7"
+
 [[Ferrite]]
-deps = ["EnumX", "LinearAlgebra", "NearestNeighbors", "Reexport", "SparseArrays", "Tensors", "WriteVTK"]
-path = ".."
+deps = ["EnumX", "LinearAlgebra", "MPI", "MPIPreferences", "Metis", "NearestNeighbors", "PartitionedArrays", "Reexport", "SparseArrays", "Tensors", "WriteVTK"]
+git-tree-sha1 = "405029dd65d15cbc8682b1715da69c8d0bd24a82"
+repo-rev = "do/distributed-assembly"
+repo-url = ".."
 uuid = "c061ca5d-56c9-439f-9c0e-210fe06d3992"
 version = "0.3.4"
 
 [[FerriteGmsh]]
-deps = ["Ferrite", "Reexport", "gmsh_jll"]
-git-tree-sha1 = "502e8fe43ff4342e42207d8ecbdc76787970ef69"
+deps = ["Ferrite", "Gmsh"]
+git-tree-sha1 = "410520ec83bdd07c10f387f2489e7215af8f03c5"
 repo-rev = "master"
 repo-url = "https://github.com/Ferrite-FEM/FerriteGmsh.jl.git"
 uuid = "4f95f4f8-b27c-4ae5-9a39-ea55e634e36b"
-version = "0.1.0"
+version = "1.0.0"
 
 [[FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
-git-tree-sha1 = "0dbc5b9683245f905993b51d2814202d75b34f1a"
+git-tree-sha1 = "87519eb762f85534445f5cda35be12e32759ee14"
 uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.13.1"
+version = "0.13.4"
 
 [[FiniteDiff]]
-deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
-git-tree-sha1 = "56956d1e4c1221000b7781104c58c34019792951"
+deps = ["ArrayInterfaceCore", "LinearAlgebra", "Requires", "Setfield", "SparseArrays", "StaticArrays"]
+git-tree-sha1 = "5a2cff9b6b77b33b89f3d97a4d367747adce647e"
 uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
-version = "2.11.0"
+version = "2.15.0"
 
 [[FixedPointNumbers]]
 deps = ["Statistics"]
@@ -338,9 +386,9 @@ version = "0.4.2"
 
 [[ForwardDiff]]
 deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"]
-git-tree-sha1 = "1bd6fc0c344fc0cbee1f42f8d2e7ec8253dda2d2"
+git-tree-sha1 = "187198a4ed8ccd7b5d99c41b69c679269ea2b2d4"
 uuid = "f6369f11-7733-5829-9624-2563aa707210"
-version = "0.10.25"
+version = "0.10.32"
 
 [[FreeType2_jll]]
 deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
@@ -355,9 +403,15 @@ uuid = "559328eb-81f9-559d-9380-de523a88c83c"
 version = "1.0.10+0"
 
 [[FunctionWrappers]]
-git-tree-sha1 = "241552bc2209f0fa068b6415b1942cc0aa486bcc"
+git-tree-sha1 = "d62485945ce5ae9c0c48f124a84998d755bae00e"
 uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
-version = "1.1.2"
+version = "1.1.3"
+
+[[FunctionWrappersWrappers]]
+deps = ["FunctionWrappers"]
+git-tree-sha1 = "a5e6e7f12607e90d71b09e6ce2c965e41b337968"
+uuid = "77dc65aa-8811-40c2-897b-53d922fa7daf"
+version = "0.1.1"
 
 [[Future]]
 deps = ["Random"]
@@ -365,27 +419,43 @@ uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
 
 [[GLFW_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"]
-git-tree-sha1 = "51d2dfe8e590fbd74e7a842cf6d13d8a2f45dc01"
+git-tree-sha1 = "d972031d28c8c8d9d7b41a536ad7bb0c2579caca"
 uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89"
-version = "3.3.6+0"
+version = "3.3.8+0"
+
+[[GLU_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg"]
+git-tree-sha1 = "65af046f4221e27fb79b28b6ca89dd1d12bc5ec7"
+uuid = "bd17208b-e95e-5925-bf81-e2f59b3e5c61"
+version = "9.0.1+0"
+
+[[GMP_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+
+[[GPUArraysCore]]
+deps = ["Adapt"]
+git-tree-sha1 = "6872f5ec8fd1a38880f027a26739d42dcda6691f"
+uuid = "46192b85-c4d5-4398-a991-12ede77f4527"
+version = "0.1.2"
 
 [[GR]]
-deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Printf", "Random", "RelocatableFolders", "Serialization", "Sockets", "Test", "UUIDs"]
-git-tree-sha1 = "9f836fb62492f4b0f0d3b06f55983f2704ed0883"
+deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Preferences", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"]
+git-tree-sha1 = "cf7bf90e483228f6c988e474b420064e5351b892"
 uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
-version = "0.64.0"
+version = "0.69.4"
 
 [[GR_jll]]
 deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt5Base_jll", "Zlib_jll", "libpng_jll"]
-git-tree-sha1 = "a6c850d77ad5118ad3be4bd188919ce97fffac47"
+git-tree-sha1 = "bc9f7725571ddb4ab2c4bc74fa397c1c5ad08943"
 uuid = "d2c73de3-f751-5644-a686-071e5b155ba9"
-version = "0.64.0+0"
+version = "0.69.1+0"
 
-[[GeometryBasics]]
-deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"]
-git-tree-sha1 = "83ea630384a13fc4f002b77690bc0afeb4255ac9"
-uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
-version = "0.4.2"
+[[GenericSchur]]
+deps = ["LinearAlgebra", "Printf"]
+git-tree-sha1 = "fb69b2a645fa69ba5f474af09221b9308b160ce6"
+uuid = "c145ed77-6b09-5dd9-b285-bf645a82121e"
+version = "0.5.3"
 
 [[Gettext_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"]
@@ -394,10 +464,16 @@ uuid = "78b55507-aeef-58d4-861c-77aaff3498b1"
 version = "0.21.0+0"
 
 [[Glib_jll]]
-deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"]
-git-tree-sha1 = "a32d672ac2c967f3deb8a81d828afc739c838a06"
+deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE2_jll", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "fb83fbe02fe57f2c068013aa94bcdf6760d3a7a7"
 uuid = "7746bdde-850d-59dc-9ae8-88ece973131d"
-version = "2.68.3+2"
+version = "2.74.0+1"
+
+[[Gmsh]]
+deps = ["gmsh_jll"]
+git-tree-sha1 = "4d4dedef84147934837c683538467cea54c44d44"
+uuid = "705231aa-382f-11e9-3f0c-b7cb4346fdeb"
+version = "0.2.2"
 
 [[Graphite2_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -407,20 +483,26 @@ version = "1.3.14+0"
 
 [[Graphs]]
 deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
-git-tree-sha1 = "57c021de207e234108a6f1454003120a1bf350c4"
+git-tree-sha1 = "ba2d094a88b6b287bd25cfa86f301e7693ffae2f"
 uuid = "86223c79-3864-5bf0-83f7-82e725a168b6"
-version = "1.6.0"
+version = "1.7.4"
 
 [[Grisu]]
 git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2"
 uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe"
 version = "1.0.2"
 
+[[HDF5_jll]]
+deps = ["Artifacts", "JLLWrappers", "LibCURL_jll", "Libdl", "OpenSSL_jll", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "4cc2bb72df6ff40b055295fdef6d92955f9dede8"
+uuid = "0234f1f7-429e-5d53-9886-15a909be8d59"
+version = "1.12.2+2"
+
 [[HTTP]]
-deps = ["Base64", "Dates", "IniFile", "Logging", "MbedTLS", "NetworkOptions", "Sockets", "URIs"]
-git-tree-sha1 = "0fa77022fe4b511826b39c894c90daf5fce3334a"
+deps = ["Base64", "CodecZlib", "Dates", "IniFile", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
+git-tree-sha1 = "4abede886fcba15cd5fd041fef776b230d004cee"
 uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
-version = "0.9.17"
+version = "1.4.0"
 
 [[HarfBuzz_jll]]
 deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"]
@@ -430,27 +512,15 @@ version = "2.8.1+1"
 
 [[HostCPUFeatures]]
 deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"]
-git-tree-sha1 = "18be5268cf415b5e27f34980ed25a7d34261aa83"
+git-tree-sha1 = "b7b88a4716ac33fe31d6556c02fc60017594343c"
 uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
-version = "0.1.7"
-
-[[Hwloc]]
-deps = ["Hwloc_jll"]
-git-tree-sha1 = "92d99146066c5c6888d5a3abc871e6a214388b91"
-uuid = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
-version = "2.0.0"
-
-[[Hwloc_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "d8bccde6fc8300703673ef9e1383b11403ac1313"
-uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8"
-version = "2.7.0+0"
+version = "0.1.8"
 
 [[HypergeometricFunctions]]
-deps = ["DualNumbers", "LinearAlgebra", "SpecialFunctions", "Test"]
-git-tree-sha1 = "65e4589030ef3c44d3b90bdc5aac462b4bb05567"
+deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions", "Test"]
+git-tree-sha1 = "709d864e3ed6e3545230601f94e11ebc65994641"
 uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a"
-version = "0.3.8"
+version = "0.3.11"
 
 [[IOCapture]]
 deps = ["Logging", "Random"]
@@ -464,9 +534,9 @@ uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
 version = "0.1.1"
 
 [[Inflate]]
-git-tree-sha1 = "f5fc07d4e706b84f72d54eedcc1c13d92fb0871c"
+git-tree-sha1 = "5cd07aab533df5170988219191dfad0519391428"
 uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
-version = "0.1.2"
+version = "0.1.3"
 
 [[IniFile]]
 git-tree-sha1 = "f550e6e32074c939295eb5ea6de31849ac2c9625"
@@ -479,20 +549,15 @@ uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 
 [[InverseFunctions]]
 deps = ["Test"]
-git-tree-sha1 = "91b5dcf362c5add98049e6c29ee756910b03051d"
+git-tree-sha1 = "49510dfcb407e572524ba94aeae2fced1f3feb0f"
 uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
-version = "0.1.3"
+version = "0.1.8"
 
 [[IrrationalConstants]]
 git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151"
 uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
 version = "0.1.1"
 
-[[IterTools]]
-git-tree-sha1 = "fa6287a4469f5e048d763df38279ee729fbd44e5"
-uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
-version = "1.4.0"
-
 [[IterativeSolvers]]
 deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"]
 git-tree-sha1 = "1169632f425f79429f245113b775a0e3d121457c"
@@ -504,6 +569,12 @@ git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
 uuid = "82899510-4779-5014-852e-03e436cf321d"
 version = "1.0.0"
 
+[[JLFzf]]
+deps = ["Pipe", "REPL", "Random", "fzf_jll"]
+git-tree-sha1 = "f377670cda23b6b7c1c0b3893e37451c5c1a2185"
+uuid = "1019f520-868f-41f5-a6de-eb00f4b6a39c"
+version = "0.1.5"
+
 [[JLLWrappers]]
 deps = ["Preferences"]
 git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1"
@@ -530,15 +601,15 @@ version = "0.3.0"
 
 [[Krylov]]
 deps = ["LinearAlgebra", "Printf", "SparseArrays"]
-git-tree-sha1 = "a024280a69c49f51ba29d2deb66f07508f0b9b49"
+git-tree-sha1 = "92256444f81fb094ff5aa742ed10835a621aef75"
 uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
-version = "0.7.13"
+version = "0.8.4"
 
 [[KrylovKit]]
 deps = ["LinearAlgebra", "Printf"]
-git-tree-sha1 = "0328ad9966ae29ccefb4e1b9bfd8c8867e4360df"
+git-tree-sha1 = "49b0c1dd5c292870577b8f58c51072bd558febb9"
 uuid = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
-version = "0.5.3"
+version = "0.5.4"
 
 [[KrylovMethods]]
 deps = ["LinearAlgebra", "Printf", "SparseArrays"]
@@ -558,6 +629,12 @@ git-tree-sha1 = "bf36f528eec6634efc60d7ec062008f171071434"
 uuid = "88015f11-f218-50d7-93a8-a6af411a945d"
 version = "3.0.0+1"
 
+[[LLVMOpenMP_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "ad927676766e6529a2d5152f12040620447c0c9b"
+uuid = "1d63c593-3942-5779-bab2-d838dc0a180e"
+version = "14.0.4+0"
+
 [[LZO_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
 git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6"
@@ -569,23 +646,21 @@ git-tree-sha1 = "f2355693d6778a178ade15952b7ac47a4ff97996"
 uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 version = "1.3.0"
 
-[[LabelledArrays]]
-deps = ["ArrayInterface", "ChainRulesCore", "LinearAlgebra", "MacroTools", "StaticArrays"]
-git-tree-sha1 = "fbd884a02f8bf98fd90c53c1c9d2b21f9f30f42a"
-uuid = "2ee39098-c373-598a-b85f-a56591580800"
-version = "1.8.0"
-
 [[Latexify]]
-deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"]
-git-tree-sha1 = "4f00cc36fede3c04b8acf9b2e2763decfdcecfa6"
+deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "OrderedCollections", "Printf", "Requires"]
+git-tree-sha1 = "ab9aa169d2160129beb241cb2750ca499b4e90e9"
 uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
-version = "0.15.13"
+version = "0.15.17"
 
 [[LayoutPointers]]
-deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"]
-git-tree-sha1 = "b651f573812d6c36c22c944dd66ef3ab2283dfa1"
+deps = ["ArrayInterface", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"]
+git-tree-sha1 = "b67e749fb35530979839e7b4b606a97105fe4f1c"
 uuid = "10f19ff3-798f-405d-979b-55457f8fc047"
-version = "0.1.6"
+version = "0.1.10"
+
+[[LazyArtifacts]]
+deps = ["Artifacts", "Pkg"]
+uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
 
 [[LibCURL]]
 deps = ["LibCURL_jll", "MozillaCACerts_jll"]
@@ -644,9 +719,9 @@ version = "2.35.0+0"
 
 [[Libtiff_jll]]
 deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "LERC_jll", "Libdl", "Pkg", "Zlib_jll", "Zstd_jll"]
-git-tree-sha1 = "c9551dd26e31ab17b86cbd00c2ede019c08758eb"
+git-tree-sha1 = "3eb79b0ca5764d4799c06699573fd8f533259713"
 uuid = "89763e89-9b03-5906-acba-b20f662cd828"
-version = "4.3.0+1"
+version = "4.4.0+0"
 
 [[Libuuid_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -654,6 +729,12 @@ git-tree-sha1 = "7f3efec06033682db852f8b3bc3c1d2b0a0ab066"
 uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700"
 version = "2.36.0+0"
 
+[[LightGraphs]]
+deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
+git-tree-sha1 = "432428df5f360964040ed60418dd5601ecd240b6"
+uuid = "093fc24a-ae57-5d10-9952-331d41423f4d"
+version = "1.3.5"
+
 [[LightXML]]
 deps = ["Libdl", "XML2_jll"]
 git-tree-sha1 = "e129d9391168c677cd4800f5c0abb1ed8cb3794f"
@@ -662,46 +743,94 @@ version = "0.9.0"
 
 [[LineSearches]]
 deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"]
-git-tree-sha1 = "f27132e551e959b3667d8c93eae90973225032dd"
+git-tree-sha1 = "7bbea35cec17305fc70a0e5b4641477dc0789d9d"
 uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
-version = "7.1.1"
+version = "7.2.0"
 
 [[LinearAlgebra]]
 deps = ["Libdl", "libblastrampoline_jll"]
 uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
+[[LinearElasticity_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "71e8ee0f9fe0e86a8f8c7f28361e5118eab2f93f"
+uuid = "18c40d15-f7cd-5a6d-bc92-87468d86c5db"
+version = "5.0.0+0"
+
 [[LinearSolve]]
-deps = ["ArrayInterface", "DocStringExtensions", "IterativeSolvers", "KLU", "Krylov", "KrylovKit", "LinearAlgebra", "RecursiveFactorization", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "SuiteSparse", "UnPack"]
-git-tree-sha1 = "a25bc80647e44d0e1e1694b47000603497700b18"
+deps = ["ArrayInterfaceCore", "DocStringExtensions", "FastLapackInterface", "GPUArraysCore", "IterativeSolvers", "KLU", "Krylov", "KrylovKit", "LinearAlgebra", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "SnoopPrecompile", "SparseArrays", "SuiteSparse", "UnPack"]
+git-tree-sha1 = "d1a5a61fa3728fcf63c5798458bce6ec57129065"
 uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
-version = "1.13.0"
+version = "1.26.1"
 
 [[Literate]]
 deps = ["Base64", "IOCapture", "JSON", "REPL"]
-git-tree-sha1 = "b856be4fe3dfa4146e5c890f1d9865f4e2e4779d"
+git-tree-sha1 = "1c4418beaa6664041e0f9b48f0710f57bff2fcbe"
 uuid = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
-version = "2.13.0"
+version = "2.14.0"
 
 [[LogExpFunctions]]
 deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
-git-tree-sha1 = "58f25e56b706f95125dcb796f39e1fb01d913a71"
+git-tree-sha1 = "94d9c52ca447e23eac0c0f074effbcd38830deb5"
 uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
-version = "0.3.10"
+version = "0.3.18"
 
 [[Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
+[[LoggingExtras]]
+deps = ["Dates", "Logging"]
+git-tree-sha1 = "5d4d2d9904227b8bd66386c1138cf4d5ffa826bf"
+uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36"
+version = "0.4.9"
+
 [[LoopVectorization]]
-deps = ["ArrayInterface", "CPUSummary", "ChainRulesCore", "CloseOpenIntervals", "DocStringExtensions", "ForwardDiff", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDDualNumbers", "SLEEFPirates", "SpecialFunctions", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
-git-tree-sha1 = "077c7c9d746cbe30ac5f001ea4c1277f64cc5dad"
+deps = ["ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "CPUSummary", "ChainRulesCore", "CloseOpenIntervals", "DocStringExtensions", "ForwardDiff", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDDualNumbers", "SIMDTypes", "SLEEFPirates", "SnoopPrecompile", "SpecialFunctions", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
+git-tree-sha1 = "39af6a1e398a29f568dc9fe469f459ad3aacb03b"
 uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
-version = "0.12.103"
+version = "0.12.133"
+
+[[METIS_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "1d31872bb9c5e7ec1f618e8c4a56c8b0d9bddc7e"
+uuid = "d00139f3-1899-568f-a2f0-47f597d42d70"
+version = "5.1.1+0"
+
+[[MMG_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "LinearElasticity_jll", "Pkg", "SCOTCH_jll"]
+git-tree-sha1 = "70a59df96945782bb0d43b56d0fbfdf1ce2e4729"
+uuid = "86086c02-e288-5929-a127-40944b0018b7"
+version = "5.6.0+0"
+
+[[MPI]]
+deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Requires", "Serialization", "Sockets"]
+git-tree-sha1 = "a330c3fc517b52723645283a1d18569c58f703dd"
+uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+version = "0.20.2"
+
+[[MPICH_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"]
+git-tree-sha1 = "6d4fa43afab4611d090b11617ecea1a144b21d35"
+uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4"
+version = "4.0.2+5"
+
+[[MPIPreferences]]
+deps = ["Libdl", "Preferences"]
+git-tree-sha1 = "34892fb69751a76bcf8b7add84ec77015208a1ec"
+uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
+version = "0.1.6"
+
+[[MPItrampoline_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"]
+git-tree-sha1 = "b3f9e42685b4ad614eca0b44bd863cd41b1c86ea"
+uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748"
+version = "5.0.2+1"
 
 [[MacroTools]]
 deps = ["Markdown", "Random"]
-git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf"
+git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2"
 uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
-version = "0.5.9"
+version = "0.5.10"
 
 [[ManualMemory]]
 git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd"
@@ -713,10 +842,10 @@ deps = ["Base64"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 
 [[MbedTLS]]
-deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"]
-git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe"
+deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "Random", "Sockets"]
+git-tree-sha1 = "6872f9594ff273da6d13c7c1a1545d5a8c7d0c1c"
 uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
-version = "1.0.3"
+version = "1.1.6"
 
 [[MbedTLS_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -727,6 +856,18 @@ git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f"
 uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
 version = "0.3.1"
 
+[[Metis]]
+deps = ["Graphs", "LightGraphs", "LinearAlgebra", "METIS_jll", "SparseArrays"]
+git-tree-sha1 = "3285c93a67ed2effccf6ecf862a6346fcf5c565e"
+uuid = "2679e427-3c69-5b7f-982b-ece356f1e94b"
+version = "1.2.0"
+
+[[MicrosoftMPI_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "a16aa086d335ed7e0170c5265247db29172af2f9"
+uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf"
+version = "10.1.3+2"
+
 [[Missings]]
 deps = ["DataAPI"]
 git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f"
@@ -757,30 +898,37 @@ uuid = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
 version = "4.5.1"
 
 [[NaNMath]]
-git-tree-sha1 = "b086b7ea07f8e38cf122f5016af580881ac914fe"
+deps = ["OpenLibm_jll"]
+git-tree-sha1 = "a7c3d1da1189a1c2fe843a3bfa04d18d20eb3211"
 uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
-version = "0.3.7"
+version = "1.0.1"
 
 [[NearestNeighbors]]
 deps = ["Distances", "StaticArrays"]
-git-tree-sha1 = "16baacfdc8758bc374882566c9187e785e85c2f0"
+git-tree-sha1 = "440165bf08bc500b8fe4a7be2dc83271a00c0716"
 uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
-version = "0.4.9"
+version = "0.4.12"
 
 [[NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 
 [[NonlinearSolve]]
-deps = ["ArrayInterface", "FiniteDiff", "ForwardDiff", "IterativeSolvers", "LinearAlgebra", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "StaticArrays", "UnPack"]
-git-tree-sha1 = "aeebff6a2a23506e5029fd2248a26aca98e477b3"
+deps = ["ArrayInterfaceCore", "FiniteDiff", "ForwardDiff", "IterativeSolvers", "LinearAlgebra", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "StaticArrays", "UnPack"]
+git-tree-sha1 = "a754a21521c0ab48d37f44bbac1eefd1387bdcfc"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
-version = "0.3.16"
+version = "0.3.22"
+
+[[OCCT_jll]]
+deps = ["Artifacts", "FreeType2_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll", "Xorg_libXfixes_jll", "Xorg_libXft_jll", "Xorg_libXinerama_jll", "Xorg_libXrender_jll"]
+git-tree-sha1 = "acc8099ae8ed10226dc8424fb256ec9fe367a1f0"
+uuid = "baad4e97-8daa-5946-aac2-2edac59d34e1"
+version = "7.6.2+2"
 
 [[OffsetArrays]]
 deps = ["Adapt"]
-git-tree-sha1 = "043017e0bdeff61cfbb7afeb558ab29536bbb5ed"
+git-tree-sha1 = "1ea784113a6aa054c5ebd95945fa5e52c2f378e7"
 uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
-version = "1.10.8"
+version = "1.12.7"
 
 [[Ogg_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -796,11 +944,23 @@ uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
 deps = ["Artifacts", "Libdl"]
 uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
 
+[[OpenMPI_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"]
+git-tree-sha1 = "346d6b357a480300ed7854dbc70e746ac52e10fd"
+uuid = "fe0851c0-eecd-5654-98d4-656369965a5c"
+version = "4.1.3+3"
+
+[[OpenSSL]]
+deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"]
+git-tree-sha1 = "ebe81469e9d7b471d7ddb611d9e147ea16de0add"
+uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c"
+version = "1.2.1"
+
 [[OpenSSL_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "ab05aa4cc89736e95915b01e7279e61b1bfe33b8"
+git-tree-sha1 = "e60321e3f2616584ff98f0a4f18d98ae6f89bbb3"
 uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
-version = "1.1.14+0"
+version = "1.1.17+0"
 
 [[OpenSpecFun_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
@@ -810,9 +970,9 @@ version = "0.5.5+0"
 
 [[Optim]]
 deps = ["Compat", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"]
-git-tree-sha1 = "bc0a748740e8bc5eeb9ea6031e6f050de1fc0ba2"
+git-tree-sha1 = "b9fe76d1a39807fdcf790b991981a922de0c3050"
 uuid = "429524aa-4258-5aef-a3af-852621145aeb"
-version = "1.6.2"
+version = "1.7.3"
 
 [[Opus_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -826,22 +986,20 @@ uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 version = "1.4.1"
 
 [[OrdinaryDiffEq]]
-deps = ["Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastClosures", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "RecursiveArrayTools", "Reexport", "SciMLBase", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
-git-tree-sha1 = "509aa6d3b2773e5109d4a4dd9a300259ac727961"
+deps = ["Adapt", "ArrayInterface", "ArrayInterfaceGPUArrays", "ArrayInterfaceStaticArrays", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
+git-tree-sha1 = "06dbf3ab4f2530d5c5464f78c9aba4cc300ed069"
 uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
-version = "6.7.1"
+version = "6.28.0"
 
-[[PCRE_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "b2a7af664e098055a7529ad1a900ded962bca488"
-uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc"
-version = "8.44.0+0"
+[[PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
 
 [[PDMats]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "e8185b83b9fc56eb6456200e873ce598ebc7f262"
+git-tree-sha1 = "cf494dca75a69712a72b80bc48f59dcf3dea63ec"
 uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
-version = "0.11.7"
+version = "0.11.16"
 
 [[Parameters]]
 deps = ["OrderedCollections", "UnPack"]
@@ -851,9 +1009,22 @@ version = "0.12.3"
 
 [[Parsers]]
 deps = ["Dates"]
-git-tree-sha1 = "85b5da0fa43588c75bb1ff986493443f821c70b7"
+git-tree-sha1 = "595c0b811cf2bab8b0849a70d9bd6379cc1cfb52"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.2.3"
+version = "2.4.1"
+
+[[PartitionedArrays]]
+deps = ["Distances", "IterativeSolvers", "LinearAlgebra", "MPI", "Printf", "SparseArrays", "SparseMatricesCSR"]
+git-tree-sha1 = "94291b7ddeac39816572660383055870b41bca64"
+repo-rev = "master"
+repo-url = "https://github.com/fverdugo/PartitionedArrays.jl"
+uuid = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
+version = "0.2.11"
+
+[[Pipe]]
+git-tree-sha1 = "6842804e7867b115ca9de748a0cf6b364523c16d"
+uuid = "b98c9c47-44ae-5843-9183-064241ee97a0"
+version = "1.3.0"
 
 [[Pixman_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -866,34 +1037,34 @@ deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markd
 uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 
 [[PlotThemes]]
-deps = ["PlotUtils", "Requires", "Statistics"]
-git-tree-sha1 = "a3a964ce9dc7898193536002a6dd892b1b5a6f1d"
+deps = ["PlotUtils", "Statistics"]
+git-tree-sha1 = "8162b2f8547bc23876edd0c5181b27702ae58dce"
 uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a"
-version = "2.0.1"
+version = "3.0.0"
 
 [[PlotUtils]]
-deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"]
-git-tree-sha1 = "bb16469fd5224100e422f0b027d26c5a25de1200"
+deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "SnoopPrecompile", "Statistics"]
+git-tree-sha1 = "21303256d239f6b484977314674aef4bb1fe4420"
 uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043"
-version = "1.2.0"
+version = "1.3.1"
 
 [[Plots]]
-deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "JSON", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs", "UnicodeFun", "Unzip"]
-git-tree-sha1 = "1690b713c3b460c955a2957cd7487b1b725878a7"
+deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "JLFzf", "JSON", "LaTeXStrings", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "RelocatableFolders", "Requires", "Scratch", "Showoff", "SnoopPrecompile", "SparseArrays", "Statistics", "StatsBase", "UUIDs", "UnicodeFun", "Unzip"]
+git-tree-sha1 = "524d9ff1b2f4473fef59678c06f9f77160a204b1"
 uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-version = "1.27.1"
+version = "1.35.3"
 
 [[Polyester]]
 deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"]
-git-tree-sha1 = "ad769d3f29cffb33380ab28318a10c1ccb19c827"
+git-tree-sha1 = "cb2ede4b9cc432c1cba4d4452a62ae1d2a4141bb"
 uuid = "f517fe37-dbe3-4b94-8317-1923a5111588"
-version = "0.6.7"
+version = "0.6.16"
 
 [[PolyesterWeave]]
 deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"]
-git-tree-sha1 = "7e597df97e46ffb1c8adbaddfa56908a7a20194b"
+git-tree-sha1 = "b42fb2292fbbaed36f25d33a15c8cc0b4f287fcf"
 uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad"
-version = "0.1.5"
+version = "0.1.10"
 
 [[PositiveFactorizations]]
 deps = ["LinearAlgebra"]
@@ -902,16 +1073,16 @@ uuid = "85a6dd25-e78a-55b7-8502-1745935b8125"
 version = "0.2.4"
 
 [[PreallocationTools]]
-deps = ["Adapt", "ArrayInterface", "ForwardDiff", "LabelledArrays"]
-git-tree-sha1 = "6c138c8510111fa47b5d2ed8ada482d97e279bee"
+deps = ["Adapt", "ArrayInterfaceCore", "ForwardDiff"]
+git-tree-sha1 = "3953d18698157e1d27a51678c89c88d53e071a42"
 uuid = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
-version = "0.2.4"
+version = "0.4.4"
 
 [[Preferences]]
 deps = ["TOML"]
-git-tree-sha1 = "d3538e7f8a790dc8903519090857ef8e1283eecd"
+git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d"
 uuid = "21216c6a-2e73-6563-6e65-726566657250"
-version = "1.2.5"
+version = "1.3.0"
 
 [[Printf]]
 deps = ["Unicode"]
@@ -919,21 +1090,21 @@ uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 
 [[ProgressMeter]]
 deps = ["Distributed", "Printf"]
-git-tree-sha1 = "afadeba63d90ff223a6a48d2009434ecee2ec9e8"
+git-tree-sha1 = "d7a7aef8f8f2d537104f170139553b14dfe39fe9"
 uuid = "92933f4c-e287-5a05-a399-4b506db050ca"
-version = "1.7.1"
+version = "1.7.2"
 
 [[Qt5Base_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"]
-git-tree-sha1 = "ad368663a5e20dbb8d6dc2fddeefe4dae0781ae8"
+git-tree-sha1 = "c6c0f690d0cc7caddb74cef7aa847b824a16b256"
 uuid = "ea2cea3b-5b76-57ae-a6ef-0a8af62496e1"
-version = "5.15.3+0"
+version = "5.15.3+1"
 
 [[QuadGK]]
 deps = ["DataStructures", "LinearAlgebra"]
-git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
+git-tree-sha1 = "3c009334f45dfd546a16a57960a821a1a023d241"
 uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-version = "2.4.2"
+version = "2.5.0"
 
 [[REPL]]
 deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
@@ -944,27 +1115,28 @@ deps = ["SHA", "Serialization"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [[RecipesBase]]
-git-tree-sha1 = "6bf3f380ff52ce0832ddd3a2a7b9538ed1bcca7d"
+deps = ["SnoopPrecompile"]
+git-tree-sha1 = "612a4d76ad98e9722c8ba387614539155a59e30c"
 uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-version = "1.2.1"
+version = "1.3.0"
 
 [[RecipesPipeline]]
-deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"]
-git-tree-sha1 = "995a812c6f7edea7527bb570f0ac39d0fb15663c"
+deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase", "SnoopPrecompile"]
+git-tree-sha1 = "9b1c0c8e9188950e66fc28f40bfe0f8aac311fe0"
 uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c"
-version = "0.5.1"
+version = "0.6.7"
 
 [[RecursiveArrayTools]]
-deps = ["Adapt", "ArrayInterface", "ChainRulesCore", "DocStringExtensions", "FillArrays", "LinearAlgebra", "RecipesBase", "Requires", "StaticArrays", "Statistics", "ZygoteRules"]
-git-tree-sha1 = "f5dd036acee4462949cc10c55544cc2bee2545d6"
+deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceStaticArraysCore", "ChainRulesCore", "DocStringExtensions", "FillArrays", "GPUArraysCore", "IteratorInterfaceExtensions", "LinearAlgebra", "RecipesBase", "StaticArraysCore", "Statistics", "Tables", "ZygoteRules"]
+git-tree-sha1 = "3004608dc42101a944e44c1c68b599fa7c669080"
 uuid = "731186ca-8d62-57ce-b412-fbd966d074cd"
-version = "2.25.1"
+version = "2.32.0"
 
 [[RecursiveFactorization]]
-deps = ["LinearAlgebra", "LoopVectorization", "Polyester", "StrideArraysCore", "TriangularSolve"]
-git-tree-sha1 = "7ad4c2ef15b7aecd767b3921c0d255d39b3603ea"
+deps = ["LinearAlgebra", "LoopVectorization", "Polyester", "SnoopPrecompile", "StrideArraysCore", "TriangularSolve"]
+git-tree-sha1 = "0a2dfb3358fcde3676beb75405e782faa8c9aded"
 uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
-version = "0.2.9"
+version = "0.2.12"
 
 [[Reexport]]
 git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
@@ -973,9 +1145,9 @@ version = "1.2.2"
 
 [[RelocatableFolders]]
 deps = ["SHA", "Scratch"]
-git-tree-sha1 = "cdbd3b1338c72ce29d9584fdbe9e9b70eeb5adca"
+git-tree-sha1 = "90bc7a7c96410424509e4263e277e43250c05691"
 uuid = "05181044-ff0b-4ac5-8273-598c1e38db00"
-version = "0.1.3"
+version = "1.0.0"
 
 [[Requires]]
 deps = ["UUIDs"]
@@ -995,6 +1167,12 @@ git-tree-sha1 = "68db32dff12bb6127bac73c209881191bf0efbb7"
 uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f"
 version = "0.3.0+0"
 
+[[SCOTCH_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
+git-tree-sha1 = "7110b749766853054ce8a2afaa73325d72d32129"
+uuid = "a8d0f55d-b80e-548d-aff6-1a04c175f0f9"
+version = "6.1.3+0"
+
 [[SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
@@ -1005,9 +1183,9 @@ version = "3.4.1"
 
 [[SIMDDualNumbers]]
 deps = ["ForwardDiff", "IfElse", "SLEEFPirates", "VectorizationBase"]
-git-tree-sha1 = "62c2da6eb66de8bb88081d20528647140d4daa0e"
+git-tree-sha1 = "dd4195d308df24f33fb10dde7c22103ba88887fa"
 uuid = "3cdde19b-5bb0-4aaf-8931-af3e248e098b"
-version = "0.1.0"
+version = "0.1.1"
 
 [[SIMDTypes]]
 git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c"
@@ -1016,30 +1194,30 @@ version = "0.1.0"
 
 [[SLEEFPirates]]
 deps = ["IfElse", "Static", "VectorizationBase"]
-git-tree-sha1 = "d4c366b135fc2e1af7a000473e08edc5afd94819"
+git-tree-sha1 = "938c9ecffb28338a6b8b970bda0f3806a65e7906"
 uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
-version = "0.6.31"
+version = "0.6.36"
 
 [[SciMLBase]]
-deps = ["ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "RecipesBase", "RecursiveArrayTools", "StaticArrays", "Statistics", "Tables", "TreeViews"]
-git-tree-sha1 = "c086056df381502621dc6b5f1d1a0a1c2d0185e7"
+deps = ["ArrayInterfaceCore", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Preferences", "RecipesBase", "RecursiveArrayTools", "StaticArraysCore", "Statistics", "Tables"]
+git-tree-sha1 = "1da462b691464c9170bcbf35022c8ea12329053d"
 uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
-version = "1.28.0"
+version = "1.59.5"
 
 [[Scratch]]
 deps = ["Dates"]
-git-tree-sha1 = "0b4b7f1393cff97c33891da2a0bf69c6ed241fda"
+git-tree-sha1 = "f94f779c94e58bf9ea243e77a37e16d9de9126bd"
 uuid = "6c6a2e73-6563-6170-7368-637461726353"
-version = "1.1.0"
+version = "1.1.1"
 
 [[Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 
 [[Setfield]]
-deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
-git-tree-sha1 = "38d88503f695eb0301479bc9b0d4320b378bafe5"
+deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"]
+git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac"
 uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
-version = "0.8.2"
+version = "1.1.1"
 
 [[SharedArrays]]
 deps = ["Distributed", "Mmap", "Random", "Serialization"]
@@ -1051,12 +1229,22 @@ git-tree-sha1 = "91eddf657aca81df9ae6ceb20b959ae5653ad1de"
 uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f"
 version = "1.0.3"
 
+[[SimpleBufferStream]]
+git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1"
+uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7"
+version = "1.1.0"
+
 [[SimpleTraits]]
 deps = ["InteractiveUtils", "MacroTools"]
 git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
 uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
 version = "0.9.4"
 
+[[SnoopPrecompile]]
+git-tree-sha1 = "f604441450a3c0569830946e5b33b78c928e1a85"
+uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c"
+version = "1.0.1"
+
 [[Sockets]]
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
 
@@ -1071,28 +1259,39 @@ deps = ["LinearAlgebra", "Random"]
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [[SparseDiffTools]]
-deps = ["Adapt", "ArrayInterface", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "Graphs", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays", "VertexSafeGraphs"]
-git-tree-sha1 = "87efd1676d87706f4079e8e717a7a5f02b6ea1ad"
+deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceStaticArrays", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "Graphs", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays", "VertexSafeGraphs"]
+git-tree-sha1 = "5fb8ba9180f467885e87a2c99cae178b67934be1"
 uuid = "47a9eef4-7e08-11e9-0b38-333d64bd3804"
-version = "1.20.2"
+version = "1.26.2"
+
+[[SparseMatricesCSR]]
+deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "4870b3e7db7063927b163fb981bd579410b68b2d"
+uuid = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
+version = "0.6.6"
 
 [[SpecialFunctions]]
 deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
-git-tree-sha1 = "5ba658aeecaaf96923dce0da9e703bd1fe7666f9"
+git-tree-sha1 = "d75bda01f8c31ebb72df80a46c88b25d1c79c56d"
 uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "2.1.4"
+version = "2.1.7"
 
 [[Static]]
 deps = ["IfElse"]
-git-tree-sha1 = "7f5a513baec6f122401abfc8e9c074fdac54f6c1"
+git-tree-sha1 = "de4f0a4f049a4c87e4948c04acff37baf1be01a6"
 uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
-version = "0.4.1"
+version = "0.7.7"
 
 [[StaticArrays]]
-deps = ["LinearAlgebra", "Random", "Statistics"]
-git-tree-sha1 = "6976fab022fea2ffea3d945159317556e5dad87c"
+deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"]
+git-tree-sha1 = "f86b3a049e5d05227b10e15dbb315c5b90f14988"
 uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "1.4.2"
+version = "1.5.9"
+
+[[StaticArraysCore]]
+git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a"
+uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
+version = "1.4.0"
 
 [[Statistics]]
 deps = ["LinearAlgebra", "SparseArrays"]
@@ -1100,33 +1299,27 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [[StatsAPI]]
 deps = ["LinearAlgebra"]
-git-tree-sha1 = "c3d8ba7f3fa0625b062b82853a7d5229cb728b6b"
+git-tree-sha1 = "f9af7f195fb13589dd2e2d57fdb401717d2eb1f6"
 uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
-version = "1.2.1"
+version = "1.5.0"
 
 [[StatsBase]]
 deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
-git-tree-sha1 = "8977b17906b0a1cc74ab2e3a05faa16cf08a8291"
+git-tree-sha1 = "d1bf48bfcc554a3761a133fe3a9bb01488e06916"
 uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-version = "0.33.16"
+version = "0.33.21"
 
 [[StatsFuns]]
 deps = ["ChainRulesCore", "HypergeometricFunctions", "InverseFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"]
-git-tree-sha1 = "25405d7016a47cf2bd6cd91e66f4de437fd54a07"
+git-tree-sha1 = "5783b877201a82fc0014cbf381e7e6eb130473a4"
 uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-version = "0.9.16"
+version = "1.0.1"
 
 [[StrideArraysCore]]
-deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "Requires", "SIMDTypes", "Static", "ThreadingUtilities"]
-git-tree-sha1 = "49d616ef230fec080d02ada0ca5639e652cca06b"
+deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "SIMDTypes", "Static", "ThreadingUtilities"]
+git-tree-sha1 = "ac730bd978bf35f9fe45daa0bd1f51e493e97eb4"
 uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da"
-version = "0.2.13"
-
-[[StructArrays]]
-deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"]
-git-tree-sha1 = "57617b34fa34f91d536eb265df67c2d4519b8b98"
-uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
-version = "0.6.5"
+version = "0.3.15"
 
 [[SuiteSparse]]
 deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
@@ -1148,19 +1341,25 @@ version = "1.0.1"
 
 [[Tables]]
 deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits", "Test"]
-git-tree-sha1 = "5ce79ce186cc678bbb5c5681ca3379d1ddae11a1"
+git-tree-sha1 = "2d7164f7b8a066bcfa6224e67736ce0eb54aef5b"
 uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-version = "1.7.0"
+version = "1.9.0"
 
 [[Tar]]
 deps = ["ArgTools", "SHA"]
 uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
 
+[[TensorCore]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "1feb45f88d133a655e001435632f019a9a1bcdb6"
+uuid = "62fd8b95-f654-4bbd-a8a5-9c27f68ccd50"
+version = "0.1.1"
+
 [[Tensors]]
-deps = ["ForwardDiff", "LinearAlgebra", "SIMD", "Statistics"]
-git-tree-sha1 = "986ddcbf240792fc81898eb5d662540fc32cbcd5"
+deps = ["ForwardDiff", "LinearAlgebra", "SIMD", "StaticArrays", "Statistics"]
+git-tree-sha1 = "2aeb143305a3ff33d3241263d13d14db64948a2d"
 uuid = "48a634ad-e948-5137-8d70-aa71f2a747f4"
-version = "1.10.0"
+version = "1.12.0"
 
 [[Test]]
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
@@ -1174,32 +1373,31 @@ version = "0.5.0"
 
 [[TimerOutputs]]
 deps = ["ExprTools", "Printf"]
-git-tree-sha1 = "d60b0c96a16aaa42138d5d38ad386df672cb8bd8"
+git-tree-sha1 = "9dfcb767e17b0849d6aaf85997c98a5aea292513"
 uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
-version = "0.5.16"
+version = "0.5.21"
 
 [[TranscodingStreams]]
 deps = ["Random", "Test"]
-git-tree-sha1 = "216b95ea110b5972db65aa90f88d8d89dcb8851c"
+git-tree-sha1 = "8a75929dcd3c38611db2f8d08546decb514fcadf"
 uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.9.6"
-
-[[TreeViews]]
-deps = ["Test"]
-git-tree-sha1 = "8d0d7a3fe2f30d6a7f833a5f19f7c7a5b396eae6"
-uuid = "a2a6695c-b41b-5b7d-aed9-dbfdeacea5d7"
-version = "0.3.0"
+version = "0.9.9"
 
 [[TriangularSolve]]
-deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "Static", "VectorizationBase"]
-git-tree-sha1 = "b8d08f55b02625770c09615d96927b3a8396925e"
+deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "SnoopPrecompile", "Static", "VectorizationBase"]
+git-tree-sha1 = "fdddcf6b2c7751cd97de69c18157aacc18fbc660"
 uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
-version = "0.1.11"
+version = "0.1.14"
+
+[[Tricks]]
+git-tree-sha1 = "6bac775f2d42a611cdfcd1fb217ee719630c4175"
+uuid = "410a4b4d-49e4-4fbc-ab6d-cb71b17b3775"
+version = "0.1.6"
 
 [[URIs]]
-git-tree-sha1 = "97bbe755a53fe859669cd907f2d96aee8d2c1355"
+git-tree-sha1 = "e59ecc5a41b000fa94423a578d29290c7266fc10"
 uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
-version = "1.3.0"
+version = "1.4.0"
 
 [[UUIDs]]
 deps = ["Random", "SHA"]
@@ -1220,15 +1418,15 @@ uuid = "1cfade01-22cf-5700-b092-accc4b62d6e1"
 version = "0.4.1"
 
 [[Unzip]]
-git-tree-sha1 = "34db80951901073501137bdbc3d5a8e7bbd06670"
+git-tree-sha1 = "ca0969166a028236229f63514992fc073799bb78"
 uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d"
-version = "0.1.2"
+version = "0.2.0"
 
 [[VectorizationBase]]
-deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "Hwloc", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"]
-git-tree-sha1 = "1901efb08ce6c4526ddf7fdfa9181dc3593fe6a2"
+deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"]
+git-tree-sha1 = "3bc5ea8fbf25f233c4c49c0a75f14b276d2f9a69"
 uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
-version = "0.21.25"
+version = "0.21.51"
 
 [[VertexSafeGraphs]]
 deps = ["Graphs"]
@@ -1250,15 +1448,15 @@ version = "1.25.0+0"
 
 [[WriteVTK]]
 deps = ["Base64", "CodecZlib", "FillArrays", "LightXML", "TranscodingStreams"]
-git-tree-sha1 = "bff2f6b5ff1e60d89ae2deba51500ce80014f8f6"
+git-tree-sha1 = "f50c47d715199601a54afdd5267f24c8174842ae"
 uuid = "64499a7a-5c06-52f2-abe2-ccb03c286192"
-version = "1.14.2"
+version = "1.16.0"
 
 [[XML2_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
-git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
+git-tree-sha1 = "58443b63fb7e465a8a7210828c91c08b92132dff"
 uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
-version = "2.9.12+0"
+version = "2.9.14+0"
 
 [[XSLT_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
@@ -1302,6 +1500,12 @@ git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4"
 uuid = "d091e8ba-531a-589c-9de9-94069b037ed8"
 version = "5.0.3+4"
 
+[[Xorg_libXft_jll]]
+deps = ["Fontconfig_jll", "Libdl", "Pkg", "Xorg_libXrender_jll"]
+git-tree-sha1 = "754b542cdc1057e0a2f1888ec5414ee17a4ca2a1"
+uuid = "2c808117-e144-5220-80d1-69d4eaa9352c"
+version = "2.3.3+1"
+
 [[Xorg_libXi_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"]
 git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246"
@@ -1408,11 +1612,23 @@ git-tree-sha1 = "8c1a8e4dfacb1fd631745552c8db35d0deb09ea0"
 uuid = "700de1a5-db45-46bc-99cf-38207098b444"
 version = "0.2.2"
 
+[[fzf_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "868e669ccb12ba16eaf50cb2957ee2ff61261c56"
+uuid = "214eeab7-80f7-51ab-84ad-2988db7cef09"
+version = "0.29.0+0"
+
 [[gmsh_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "9554bb1cad1926e7d3afb68b0ab117d0b9bb73ee"
+deps = ["Artifacts", "Cairo_jll", "CompilerSupportLibraries_jll", "FLTK_jll", "FreeType2_jll", "GLU_jll", "GMP_jll", "HDF5_jll", "JLLWrappers", "JpegTurbo_jll", "LLVMOpenMP_jll", "Libdl", "Libglvnd_jll", "METIS_jll", "MMG_jll", "OCCT_jll", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll", "Xorg_libXfixes_jll", "Xorg_libXft_jll", "Xorg_libXinerama_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
+git-tree-sha1 = "9774ebf68348b3b56c74a78b829051310163fd76"
 uuid = "630162c2-fc9b-58b3-9910-8442a8a132e6"
-version = "4.9.3+0"
+version = "4.10.2+0"
+
+[[libaom_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
+git-tree-sha1 = "3a2ea60308f0996d26f1e5354e10c24e9ef905d4"
+uuid = "a4ae2306-e953-59d6-aa16-d00cac43593b"
+version = "3.4.0+0"
 
 [[libass_jll]]
 deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
@@ -1464,6 +1680,6 @@ version = "3.5.0+0"
 
 [[xkbcommon_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"]
-git-tree-sha1 = "ece2350174195bb31de1a63bea3a41ae1aa593b6"
+git-tree-sha1 = "9ebfc140cc56e8c2156a15ceac2f0302e327ac0a"
 uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd"
-version = "0.9.1+5"
+version = "1.4.1+0"
diff --git a/docs/Project.toml b/docs/Project.toml
index 4cc65e01bc..31546ceb6d 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -8,11 +8,16 @@ IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 KrylovMethods = "9a2cd570-f05c-5dc1-9209-93ad6f5727f7"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
+PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
+
+[extras]
+MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 57e768cd7f..cc209fc8f1 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -154,8 +154,9 @@ function local_to_global_numbering(dh::DistributedDofHandler)
             if interpolation_info.nvertexdofs > 0
                 for (vi,vertex) in enumerate(Ferrite.vertices(cell))
                     @debug println("    vertex#$vertex (R$my_rank)")
+                    lvi = VertexIndex(ci,vi)
                     # Dof is owned if it is local or if my rank is the smallest in the neighborhood
-                    if !haskey(dgrid.shared_vertices,VertexIndex(ci,vi)) || all(keys(dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices) .> my_rank)
+                    if !haskey(dgrid.shared_vertices,lvi) || all(keys(dgrid.shared_vertices[lvi].remote_vertices) .> my_rank)
                         # Update dof assignment
                         dof_local_idx = dh.vertexdicts[fi][vertex]
                         if local_to_global[dof_local_idx] == 0
@@ -168,19 +169,19 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     end
 
                     # Update shared vertex lookup table
-                    if haskey(dgrid.shared_vertices,VertexIndex(ci,vi))
+                    if haskey(dgrid.shared_vertices,lvi)
                         master_rank = my_rank
-                        for master_rank_new ∈ keys(dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices)
+                        for master_rank_new ∈ keys(dgrid.shared_vertices[lvi].remote_vertices)
                             master_rank = min(master_rank, master_rank_new)
                         end
-                        for (remote_rank, svs) ∈ dgrid.shared_vertices[VertexIndex(ci,vi)].remote_vertices
+                        for (remote_rank, svs) ∈ dgrid.shared_vertices[lvi].remote_vertices
                             if master_rank == my_rank # I own the dof - we have to send information
                                 if !haskey(vertices_send,remote_rank)
                                     vertices_send[remote_rank] = Vector{Ferrite.VertexIndex}()
                                 end
-                                @debug println("      prepare sending vertex #$(VertexIndex(ci,vi)) to $remote_rank (R$my_rank)")
+                                @debug println("      prepare sending vertex #$(lvi) to $remote_rank (R$my_rank)")
                                 for i ∈ svs
-                                    push!(vertices_send[remote_rank],VertexIndex(ci,vi))
+                                    push!(vertices_send[remote_rank],lvi)
                                 end
                             elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
                                 if !haskey(n_vertices_recv,remote_rank)
@@ -188,7 +189,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                 else
                                     n_vertices_recv[remote_rank] += length(svs)
                                 end
-                                @debug println("      prepare receiving vertex #$(VertexIndex(ci,vi)) from $remote_rank (R$my_rank)")
+                                @debug println("      prepare receiving vertex #$(lvi) from $remote_rank (R$my_rank)")
                             end
                         end
                     end
diff --git a/src/assembler.jl b/src/assembler.jl
index dc6c444419..9a79fe78e4 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -223,7 +223,7 @@ struct PartitionedArraysCOOAssembler{T}
     rows
     f::PVector
 
-    👻
+    👻remotes
     dh
 
     # TODO PartitionedArrays backend as additional input arg
@@ -307,14 +307,13 @@ struct PartitionedArraysCOOAssembler{T}
         ghost_dof_rank = Int32[]
 
         # ------------ Ghost dof synchronization ----------
-        # Prepare sending 👻 ghost dofs to neighbors
+        # Prepare sending ghost dofs to neighbors 👻
         #@TODO move relevant parts into dof handler
         #@TODO communication can be optimized by deduplicating entries in, and compressing the following arrays
         #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
         ghost_dof_to_send = [Int[] for i ∈ 1:destination_len] # global dof id
         ghost_rank_to_send = [Int[] for i ∈ 1:destination_len] # rank of dof
-        ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
-        ghost_element_to_send = [Int[] for i ∈ 1:destination_len] # corresponding element
+        # ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
         ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
         ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
         for shared_entity_set ∈ [dgrid.shared_vertices, dgrid.shared_faces, dgrid.shared_edges]
@@ -341,15 +340,14 @@ struct PartitionedArraysCOOAssembler{T}
                             append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_entity_dof])
                             append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
                             append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
-                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
-                            append!(ghost_element_to_send[sender_slot], pivot_cell_idx)
+                            # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
                         end
                     end
                 end
             end
         end
 
-        ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_element_to_send]
+        ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_dof_to_send]
         ghost_recv_buffer_lengths = zeros(Int, destination_len)
         MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), vertex_comm(dgrid));
         @debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
@@ -362,13 +360,9 @@ struct PartitionedArraysCOOAssembler{T}
         ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
         MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), vertex_comm(dgrid))
 
-        ghost_send_buffer_elements = vcat(ghost_element_to_send...)
-        ghost_recv_buffer_elements = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_elements,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_elements,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-        ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
-        ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+        # ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
+        # ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
+        # MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
 
         ghost_send_buffer_ranks = vcat(ghost_rank_to_send...)
         ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
@@ -413,10 +407,10 @@ struct PartitionedArraysCOOAssembler{T}
         f = PartitionedArrays.PVector(0.0,rows)
         @debug println("f constructed (R$my_rank)")
 
-        👻 = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks)
-        @debug println("👻 $👻 (R$my_rank)")
+        👻remotes = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks)
+        @debug println("👻remotes $👻remotes (R$my_rank)")
 
-        return new(I, J, V, cols, rows, f, 👻, dh)
+        return new(I, J, V, cols, rows, f, 👻remotes, dh)
     end
 end
 
@@ -450,7 +444,7 @@ function end_assemble(assembler::PartitionedArraysCOOAssembler{T}) where {T}
 
     # Fix ghost layer 👻! Note that the locations for remote processes to write their
     # data into are missing up to this point.
-    for (i, (pivot_dof, global_ghost_dof, ghost_owner_rank)) ∈ enumerate(assembler.👻)
+    for (i, (pivot_dof, global_ghost_dof, ghost_owner_rank)) ∈ enumerate(assembler.👻remotes)
         push!(I, pivot_dof)
         push!(J, global_ghost_dof)
         push!(V, 0.0)

From b017d2d5bcabafe84526506e5a05f9bbc62abfaf Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 17:50:56 +0200
Subject: [PATCH 046/124] Change to manufactured solution because closed form
 for f=1 is quite complicated.

---
 docs/src/literate/distributed_assembly.jl | 34 ++++++++++++++++++-----
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 582c6f1852..3eb109effe 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -14,8 +14,9 @@
 #md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
 #
 # First we load Ferrite, and some other packages we need
-using Ferrite, MPI #, PartitionedArrays
+using Ferrite, MPI
 using IterativeSolvers #, HYPRE
+using PartitionedArrays #src
 
 # Launch MPI
 MPI.Init()
@@ -43,7 +44,7 @@ close!(dh);
 # Nothing has to be changed here either.
 ch = ConstraintHandler(dh);
 ∂Ω = union(getfaceset.((getlocalgrid(dgrid), ), ["left", "right", "top", "bottom"])...);
-dbc = Dirichlet(:u, ∂Ω, (x, t) -> 1)
+dbc = Dirichlet(:u, ∂Ω, (x, t) -> 0)
 add!(ch, dbc);
 close!(ch)
 update!(ch, 0.0);
@@ -70,14 +71,18 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DistributedDofHandler
         fill!(fe, 0)
 
         reinit!(cellvalues, cell)
-
+        coords = getcoordinates(cell)
+                
         for q_point in 1:getnquadpoints(cellvalues)
             dΩ = getdetJdV(cellvalues, q_point)
             
             for i in 1:n_basefuncs
                 v  = shape_value(cellvalues, q_point, i)
                 ∇v = shape_gradient(cellvalues, q_point, i)
-                fe[i] += v * dΩ
+                # Manufactured solution of Π cos(xᵢπ)
+                x = spatial_coordinate(cellvalues, q_point, coords)
+                fe[i] += (π/2)^2 * dim * prod(cos, x*π/2) * v * dΩ
+
                 for j in 1:n_basefuncs
                     ∇u = shape_gradient(cellvalues, q_point, j)
                     Ke[i, j] += (∇v ⋅ ∇u) * dΩ
@@ -135,9 +140,24 @@ vtk_grid("heat_equation_distributed", dh) do vtk
     vtk_partitioning(vtk, dgrid)
 end
 
-## test the result                #src
-using Test                        #src
-@test norm(u) ≈ 9.536307974872432 #src
+## Test the result against the manufactured solution                    #src
+using Test                                                              #src
+for cell in CellIterator(dh)                                            #src
+    reinit!(cellvalues, cell)                                           #src
+    n_basefuncs = getnbasefunctions(cellvalues)                         #src
+    coords = getcoordinates(cell)                                       #src
+    map_parts(local_view(u, u.rows)) do u_local                         #src
+        uₑ = u_local[celldofs(cell)]                                    #src
+        for q_point in 1:getnquadpoints(cellvalues)                     #src
+            x = spatial_coordinate(cellvalues, q_point, coords)         #src
+            for i in 1:n_basefuncs                                      #src
+                uₐₙₐ    = prod(cos, x*π/2)                              #src
+                uₐₚₚᵣₒₓ = function_value(cellvalues, q_point, uₑ)       #src
+                @test isapprox(uₐₙₐ, uₐₚₚᵣₒₓ; atol=1e-1)                #src
+            end                                                         #src
+        end                                                             #src
+    end                                                                 #src
+end                                                                     #src
 
 # Finally, we gracefully shutdown MPI
 MPI.Finalize()

From 979a70d331e787421a4956088dd44f2ade281e7e Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 18:10:38 +0200
Subject: [PATCH 047/124] Fix inhomogeneous boundary condition application for
 distributed problems.

---
 src/Dofs/ConstraintHandler.jl | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/Dofs/ConstraintHandler.jl b/src/Dofs/ConstraintHandler.jl
index 08941f7139..0ab7232b02 100644
--- a/src/Dofs/ConstraintHandler.jl
+++ b/src/Dofs/ConstraintHandler.jl
@@ -1060,6 +1060,19 @@ end
 
 using PartitionedArrays
 
+function meandiag(K::PartitionedArrays.PSparseMatrix)
+    # Get local portion of z
+    z_pa = map_parts(local_view(K, K.rows, K.cols)) do K_local
+        z = zero(eltype(K_local))
+        for i in 1:size(K_local, 1)
+            z += abs(K_local[i, i])
+        end
+        return z;
+    end
+    # z = get_part(z_pa, MPI.Comm_rank(z_pa.comm)+1) # Crashes :)
+    return MPI.Allreduce(z_pa.part, MPI.SUM, z_pa.comm) / size(K, 1)
+end
+
 """
 Poor man's Dirichlet BC application for PartitionedArrays. :)
 
@@ -1070,7 +1083,7 @@ function apply_zero!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PV
         f_local[ch.prescribed_dofs] .= 0.0
     end
 
-    map_parts(local_view(K, K.rows, K.cols)) do K_local
+    map_parts(local_view(K, K.rows, K.cols), local_view(f, f.rows)) do K_local, f_local
         for cdof in ch.prescribed_dofs
             K_local[cdof, :] .= 0.0
             K_local[:, cdof] .= 0.0
@@ -1086,9 +1099,19 @@ Poor man's Dirichlet BC application for PartitionedArrays. :)
     TODO optimize.
 """
 function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+    # Start by substracting the inhomogeneous solution from the right hand side
+    u_constrained = PartitionedArrays.PVector(0.0, K.cols)
+    map_parts(local_view(u_constrained, u_constrained.rows)) do u_local
+        u_local[ch.prescribed_dofs] .= ch.inhomogeneities
+    end
+    f .-= K*u_constrained
+
+    m = meandiag(K)
+
+    # Then fix the 
     map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
         # Note: RHS only non-zero for owned RHS entries
-        f_local[ch.prescribed_dofs] .= ch.inhomogeneities .* map(p -> p == partition.part, partition.lid_to_part[ch.prescribed_dofs])
+        f_local[ch.prescribed_dofs] .= ch.inhomogeneities .* map(p -> p == partition.part, partition.lid_to_part[ch.prescribed_dofs]) * m
     end
 
     # Zero out locally visible rows and columns
@@ -1096,12 +1119,13 @@ function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector
         for cdof ∈ ch.prescribed_dofs
             K_local[cdof, :] .= 0.0
             K_local[:, cdof] .= 0.0
-            K_local[cdof, cdof] = 1.0
+            K_local[cdof, cdof] = m
         end
     end
 
     # Zero out columns associated to the ghost dofs constrained on a remote process
-    # TODO optimize
+    # TODO optimize. If we assume that the sparsity pattern is symmetric, then we can constrain
+    #      via the column information of the matrix.
 
     # Step 1: Send out all local ghosts to all other processes...
     remote_ghost_gdofs, remote_ghost_parts = map_parts(K.cols.partition) do partition

From 0b73395f8e81f0bbd2bab2f03962996291519d53 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 19:45:10 +0200
Subject: [PATCH 048/124] Change to inhomogeneous boundary in example.

---
 docs/src/literate/distributed_assembly.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 3eb109effe..8c2fa9fc2c 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -44,7 +44,7 @@ close!(dh);
 # Nothing has to be changed here either.
 ch = ConstraintHandler(dh);
 ∂Ω = union(getfaceset.((getlocalgrid(dgrid), ), ["left", "right", "top", "bottom"])...);
-dbc = Dirichlet(:u, ∂Ω, (x, t) -> 0)
+dbc = Dirichlet(:u, ∂Ω, (x, t) -> 1)
 add!(ch, dbc);
 close!(ch)
 update!(ch, 0.0);
@@ -151,7 +151,7 @@ for cell in CellIterator(dh)                                            #src
         for q_point in 1:getnquadpoints(cellvalues)                     #src
             x = spatial_coordinate(cellvalues, q_point, coords)         #src
             for i in 1:n_basefuncs                                      #src
-                uₐₙₐ    = prod(cos, x*π/2)                              #src
+                uₐₙₐ    = prod(cos, x*π/2)+1.0                          #src
                 uₐₚₚᵣₒₓ = function_value(cellvalues, q_point, uₑ)       #src
                 @test isapprox(uₐₙₐ, uₐₚₚᵣₒₓ; atol=1e-1)                #src
             end                                                         #src

From 4810b75e255e3f371d99ec901f1dc4f7241f436b Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 19:47:45 +0200
Subject: [PATCH 049/124] Fix shared vertices viz.

---
 src/Export/VTK.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Export/VTK.jl b/src/Export/VTK.jl
index e9dc6f898d..4ef629a23b 100644
--- a/src/Export/VTK.jl
+++ b/src/Export/VTK.jl
@@ -166,10 +166,10 @@ function vtk_shared_vertices(vtk, dgrid::DistributedGrid)
             if haskey(sv.remote_vertices, rank)
                 (cellidx, i) = sv.local_idx
                 cell = getcells(dgrid, cellidx)
-                u[vertices(cell)[i]] = rank
+                u[vertices(cell)[i]] = my_rank
             end
         end
-        vtk_point_data(pvtkwrapper(vtk), u, "shared vertices of $my_rank")
+        vtk_point_data(pvtkwrapper(vtk), u, "shared vertices with $rank")
     end
 end
 

From e5442b8a36d463fa802ecb774dbf212d5482dff0 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 20:12:38 +0200
Subject: [PATCH 050/124] Add edges and faces to distributed grid.

---
 src/Export/VTK.jl           | 42 +++++++++++++++++++
 src/Grid/DistributedGrid.jl | 83 ++++++++++++++++++++++++-------------
 src/Grid/grid.jl            |  4 +-
 src/exports.jl              |  2 +
 4 files changed, 101 insertions(+), 30 deletions(-)

diff --git a/src/Export/VTK.jl b/src/Export/VTK.jl
index 4ef629a23b..13f6592a17 100644
--- a/src/Export/VTK.jl
+++ b/src/Export/VTK.jl
@@ -173,6 +173,48 @@ function vtk_shared_vertices(vtk, dgrid::DistributedGrid)
     end
 end
 
+
+"""
+Enrich the VTK file with meta information about shared faces.
+"""
+function vtk_shared_faces(vtk, dgrid::DistributedGrid)
+    u = Vector{Float64}(undef, getnnodes(dgrid))
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
+        fill!(u, 0.0)
+        for sf ∈ values(get_shared_faces(dgrid))
+            if haskey(sf.remote_faces, rank)
+                (cellidx, i) = sf.local_idx
+                cell = getcells(dgrid, cellidx)
+                facenodes = faces(cell)[i]
+                u[[facenodes...]] .= my_rank
+            end
+        end
+        vtk_point_data(pvtkwrapper(vtk), u, "shared faces with $rank")
+    end
+end
+
+
+"""
+Enrich the VTK file with meta information about shared edges.
+"""
+function vtk_shared_edges(vtk, dgrid::DistributedGrid)
+    u = Vector{Float64}(undef, getnnodes(dgrid))
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
+        fill!(u, 0.0)
+        for se ∈ values(get_shared_edges(dgrid))
+            if haskey(se.remote_edges, rank)
+                (cellidx, i) = se.local_idx
+                cell = getcells(dgrid, cellidx)
+                edgenodes = edges(cell)[i]
+                u[[edgenodes...]] .= my_rank
+            end
+        end
+        vtk_point_data(pvtkwrapper(vtk), u, "shared edges with $rank")
+    end
+end
+
 """
 Enrich the VTK file with partitioning meta information.
 """
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 77379e1200..0b2d648c8d 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -243,16 +243,15 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
             for (i, _) ∈ enumerate(vertices(global_cell))
                 cell_vertex = VertexIndex(global_cell_idx, i)
                 remote_vertices = Dict{Int,Vector{VertexIndex}}()
-                for (global_cell_neighbor_idx, j) ∈ getneighborhood(grid_topology, grid_to_distribute, cell_vertex, true)
+                for other_vertex ∈ getneighborhood(grid_topology, grid_to_distribute, cell_vertex, true)
+                    (global_cell_neighbor_idx, j) = other_vertex
                     other_rank = parts[global_cell_neighbor_idx]
                     if other_rank != my_rank
-                        n1 = vertices(getcells(grid_to_distribute,global_cell_idx))[i]
-                        n2 = vertices(getcells(grid_to_distribute,global_cell_neighbor_idx))[j]
-                        if n1 == n2
+                        if toglobal(grid_to_distribute,cell_vertex) == toglobal(grid_to_distribute,other_vertex)
                             if !haskey(remote_vertices,other_rank)
                                 remote_vertices[other_rank] = Vector(undef,0)
                             end
-                            @debug println("Detected shared vertex $cell_vertex neighbor $(VertexIndex(global_cell_neighbor_idx,j)) (R$my_rank)")
+                            @debug println("Detected shared vertex $cell_vertex neighbor $other_vertex (R$my_rank)")
                             push!(remote_vertices[other_rank], VertexIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
                         end
                     end
@@ -264,29 +263,57 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
                 end
             end
 
-            # # Edge
-            # if dim > 2
-            #     for (i, global_vertex_idx) ∈ enumerate(edges(global_cell))
-            #         cell_edge = EdgeIndex(global_cell_idx, i)
-            #         for (global_cell_neighbor_idx, j) ∈ getneighborhood(grid_topology, grid_to_distribute, cell_edge)
-            #             if parts[global_cell_neighbor_idx] != my_rank
-            #                 push!(shared_edges, cell_edge)
-            #             end
-            #         end
-            #     end
-            # end
-
-            # # Face
-            # if dim > 1
-            #     for (i, global_vertex_idx) ∈ enumerate(faces(global_cell))
-            #         cell_face = FaceIndex(global_cell_idx, i)
-            #         for (global_cell_neighbor_idx, j) ∈ getneighborhood(grid_topology, grid_to_distribute, cell_face)
-            #             if parts[global_cell_neighbor_idx] != my_rank
-            #                 push!(shared_faces, cell_face)
-            #             end
-            #         end
-            #     end
-            # end
+            # Face
+            if dim > 1
+                for (i, global_face_idx) ∈ enumerate(faces(global_cell))
+                    cell_face = FaceIndex(global_cell_idx, i)
+                    remote_faces = Dict{Int,Vector{FaceIndex}}()
+                    for other_face ∈ getneighborhood(grid_topology, grid_to_distribute, cell_face, true)
+                        (global_cell_neighbor_idx, j) = other_face
+                        other_rank = parts[global_cell_neighbor_idx]
+                        if other_rank != my_rank
+                            if toglobal(grid_to_distribute,cell_face) == toglobal(grid_to_distribute,other_face)
+                                if !haskey(remote_faces,other_rank)
+                                    remote_faces[other_rank] = Vector(undef,0)
+                                end
+                                @debug println("Detected shared face $cell_face neighbor $other_face (R$my_rank)")
+                                push!(remote_faces[other_rank], FaceIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
+                            end
+                        end
+                    end
+
+                    if length(remote_faces) > 0
+                        idx = FaceIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
+                        shared_faces[idx] = SharedFace(idx, remote_faces)
+                    end
+                end
+            end
+
+            # Edge
+            if dim > 2
+                for (i, global_vertex_idx) ∈ enumerate(edges(global_cell))
+                    cell_edge = EdgeIndex(global_cell_idx, i)
+                    remote_edges = Dict{Int,Vector{EdgeIndex}}()
+                    for other_edge ∈ getneighborhood(grid_topology, grid_to_distribute, cell_edge, true)
+                        (global_cell_neighbor_idx, j) = other_edge
+                        other_rank = parts[global_cell_neighbor_idx]
+                        if other_rank != my_rank
+                            if toglobal(grid_to_distribute,cell_edge) == toglobal(grid_to_distribute,other_edge)
+                                if !haskey(remote_edges,other_edge)
+                                    remote_edges[other_edge] = Vector(undef,0)
+                                end
+                                @debug println("Detected shared edge $cell_edge neighbor $other_edge (R$my_rank)")
+                                push!(remote_edges[other_edge], EdgeIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
+                            end
+                        end
+                    end
+
+                    if length(remote_edges) > 0
+                        idx = EdgeIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
+                        shared_edges[idx] = SharedEdge(idx, remote_edges)
+                    end
+                end
+            end
         end
     end
 
diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 9c1da24547..f2480f1f35 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -409,10 +409,10 @@ faceskeleton(top::ExclusiveTopology, grid::AbstractGrid) =  top.face_skeleton
 toglobal(grid::AbstractGrid,vertexidx::VertexIndex) = vertices(getcells(grid,vertexidx[1]))[vertexidx[2]]
 toglobal(grid::AbstractGrid,vertexidx::Vector{VertexIndex}) = unique(toglobal.((grid,),vertexidx))
 
-toglobal(grid::AbstractGrid,faceidx::FaceIndex) = sortface(faces(getcells(grid,faceidx[1])[faceidx[2]]))
+toglobal(grid::AbstractGrid,faceidx::FaceIndex) = sortface(faces(getcells(grid,faceidx[1]))[faceidx[2]])
 toglobal(grid::AbstractGrid,faceidx::Vector{FaceIndex}) = unique(toglobal.((grid,),faceidx))
 
-toglobal(grid::AbstractGrid,edgeidx::EdgeIndex) = sortedge(faces(getcells(grid,edgeidx[1])[edgeidx[2]]))
+toglobal(grid::AbstractGrid,edgeidx::EdgeIndex) = sortedge(edges(getcells(grid,edgeidx[1]))[edgeidx[2]])
 toglobal(grid::AbstractGrid,edgeidx::Vector{EdgeIndex}) = unique(toglobal.((grid,),edgeidx))
 
 @inline getdim(::AbstractGrid{dim}) where {dim} = dim
diff --git a/src/exports.jl b/src/exports.jl
index 71264a6b10..1d75ddb713 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -163,6 +163,8 @@ export
     vtk_cellset,
     vtk_save,
     vtk_shared_vertices,
+    vtk_shared_faces,
+    vtk_shared_edges,
     vtk_partitioning,
 
 # L2 Projection

From 49c0390505f7d7280d8036021136ba51d6c6ec04 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 20:59:08 +0200
Subject: [PATCH 051/124] Fix dof distribution for higher order.

---
 src/Dofs/DofHandler.jl |  8 ++++----
 src/assembler.jl       | 11 ++++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index 92383057f8..d4686de640 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -46,12 +46,12 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
     end
 end
 
-has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[fi], vertex)
-has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.vertexdicts[fi], vertex)
-has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.vertexdicts[fi], vertex)
+has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[field_idx], vertex)
+#has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
+has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.facedicts[field_idx], face)
 
 entity_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
-entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
+#entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
 entity_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
 
 """
diff --git a/src/assembler.jl b/src/assembler.jl
index 9a79fe78e4..97a3dfa408 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -319,7 +319,7 @@ struct PartitionedArraysCOOAssembler{T}
         for shared_entity_set ∈ [dgrid.shared_vertices, dgrid.shared_faces, dgrid.shared_edges]
             for (pivot_entity, pivot_shared_entity) ∈ shared_entity_set
                 # Start by searching shared entities which are not owned
-                pivot_entity_owner_rank = Ferrite.compute_owner(dgrid, pivot_shared_entity)
+                pivot_entity_owner_rank = compute_owner(dgrid, pivot_shared_entity)
                 pivot_cell_idx = pivot_entity[1]
 
                 if my_rank != pivot_entity_owner_rank
@@ -330,12 +330,13 @@ struct PartitionedArraysCOOAssembler{T}
                     cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                     cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
-                    pivot_entity_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_entity)
+                    pivot_entity_global = toglobal(getlocalgrid(dgrid), pivot_entity)
 
-                    for (field_idx, field_name) in zip(1:num_fields(dh), Ferrite.getfieldnames(dh))
-                        pivot_entity_dof = Ferrite.entity_dofs(dh, field_idx, pivot_entity_global)
+                    for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
+                        !has_entity_dof(dh, field_idx, pivot_entity_global) && continue
+                        pivot_entity_dof = entity_dofs(dh, field_idx, pivot_entity_global)
                         # Extract dofs belonging to the current field
-                        cell_field_dofs = cell_dofs[Ferrite.dof_range(dh, field_name)]
+                        cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
                         for cell_field_dof ∈ cell_field_dofs
                             append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_entity_dof])
                             append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])

From ae8823dd2dd29856f6b883a49b473b1aa87bd61d Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 21:10:58 +0200
Subject: [PATCH 052/124] Typo.

---
 src/Grid/DistributedGrid.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 0b2d648c8d..0ee9ab45db 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -300,10 +300,10 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
                         if other_rank != my_rank
                             if toglobal(grid_to_distribute,cell_edge) == toglobal(grid_to_distribute,other_edge)
                                 if !haskey(remote_edges,other_edge)
-                                    remote_edges[other_edge] = Vector(undef,0)
+                                    remote_edges[other_rank] = Vector(undef,0)
                                 end
                                 @debug println("Detected shared edge $cell_edge neighbor $other_edge (R$my_rank)")
-                                push!(remote_edges[other_edge], EdgeIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
+                                push!(remote_edges[other_rank], EdgeIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
                             end
                         end
                     end

From a53b258f7f4584ed40ef79c20375d235cd013532 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 10 Oct 2022 21:15:54 +0200
Subject: [PATCH 053/124] Fix dof assignment try 2.

---
 src/Dofs/DofHandler.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index d4686de640..5c74db857a 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -47,11 +47,11 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
 end
 
 has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[field_idx], vertex)
-#has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
+has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
 has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.facedicts[field_idx], face)
 
 entity_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
-#entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
+entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
 entity_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
 
 """

From eddb222b379545a79f7e3a76cd7d9af26be2e31e Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 11 Oct 2022 15:14:51 +0200
Subject: [PATCH 054/124] Revert generalziation of entity loops.

---
 src/Dofs/DofHandler.jl | 24 ++++++++----
 src/Grid/grid.jl       |  2 +-
 src/assembler.jl       | 85 ++++++++++++++++++++++++++++++++++++------
 3 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index 5c74db857a..723d307653 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -46,13 +46,23 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
     end
 end
 
-has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[field_idx], vertex)
-has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
-has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.facedicts[field_idx], face)
-
-entity_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
-entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
-entity_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
+# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[field_idx], vertex)
+# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
+# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.facedicts[field_idx], face)
+
+has_cell_dofs(dh::AbstractDofHandler, field_idx::Int, cell::Int) = haskey(dh.celldicts[field_idx], cell)
+has_vertex_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[field_idx], vertex)
+has_edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
+has_face_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.facedicts[field_idx], face)
+
+# entity_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
+# entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
+# entity_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
+
+cell_dofs(dh::AbstractDofHandler, field_idx::Int, cell::Int) = dh.celldicts[field_idx][cell]
+vertex_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
+edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
+face_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
 
 """
     ndofs(dh::AbstractDofHandler)
diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index f2480f1f35..d69a387c31 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -412,7 +412,7 @@ toglobal(grid::AbstractGrid,vertexidx::Vector{VertexIndex}) = unique(toglobal.((
 toglobal(grid::AbstractGrid,faceidx::FaceIndex) = sortface(faces(getcells(grid,faceidx[1]))[faceidx[2]])
 toglobal(grid::AbstractGrid,faceidx::Vector{FaceIndex}) = unique(toglobal.((grid,),faceidx))
 
-toglobal(grid::AbstractGrid,edgeidx::EdgeIndex) = sortedge(edges(getcells(grid,edgeidx[1]))[edgeidx[2]])
+toglobal(grid::AbstractGrid,edgeidx::EdgeIndex) = sortedge(edges(getcells(grid,edgeidx[1]))[edgeidx[2]])[1]
 toglobal(grid::AbstractGrid,edgeidx::Vector{EdgeIndex}) = unique(toglobal.((grid,),edgeidx))
 
 @inline getdim(::AbstractGrid{dim}) where {dim} = dim
diff --git a/src/assembler.jl b/src/assembler.jl
index 97a3dfa408..134fccc520 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -236,6 +236,7 @@ struct PartitionedArraysCOOAssembler{T}
         nldofs = num_local_dofs(dh)
         ngdofs = num_global_dofs(dh)
         dgrid = getglobalgrid(dh)
+        dim = getdim(dgrid)
 
         I = Int[]
         J = Int[]
@@ -316,29 +317,91 @@ struct PartitionedArraysCOOAssembler{T}
         # ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
         ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
         ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
-        for shared_entity_set ∈ [dgrid.shared_vertices, dgrid.shared_faces, dgrid.shared_edges]
-            for (pivot_entity, pivot_shared_entity) ∈ shared_entity_set
+        for (pivot_vertex, pivot_shared_vertex) ∈ dgrid.shared_vertices
+            # Start by searching shared entities which are not owned
+            pivot_vertex_owner_rank = compute_owner(dgrid, pivot_shared_vertex)
+            pivot_cell_idx = pivot_vertex[1]
+
+            if my_rank != pivot_vertex_owner_rank
+                sender_slot = destination_index[pivot_vertex_owner_rank]
+
+                @debug println("$pivot_vertex may require synchronization (R$my_rank)")
+                # Note: We have to send ALL dofs on the element to the remote.
+                cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
+                cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
+
+                pivot_vertex_global = toglobal(getlocalgrid(dgrid), pivot_vertex)
+
+                for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
+                    !has_vertex_dofs(dh, field_idx, pivot_vertex_global) && continue
+                    pivot_vertex_dof = vertex_dofs(dh, field_idx, pivot_vertex_global)
+                    # Extract dofs belonging to the current field
+                    cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
+                    for cell_field_dof ∈ cell_field_dofs
+                        append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof])
+                        append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
+                        append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
+                        # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                    end
+                end
+            end
+        end
+
+        if dim > 1
+            for (pivot_face, pivot_shared_face) ∈ dgrid.shared_faces
+                # Start by searching shared entities which are not owned
+                pivot_face_owner_rank = compute_owner(dgrid, pivot_shared_face)
+                pivot_cell_idx = pivot_face[1]
+
+                if my_rank != pivot_face_owner_rank
+                    sender_slot = destination_index[pivot_face_owner_rank]
+
+                    @debug println("$pivot_face may require synchronization (R$my_rank)")
+                    # Note: We have to send ALL dofs on the element to the remote.
+                    cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
+                    cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
+
+                    pivot_face_global = toglobal(getlocalgrid(dgrid), pivot_face)
+
+                    for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
+                        !has_face_dofs(dh, field_idx, pivot_face_global) && continue
+                        pivot_face_dof = face_dofs(dh, field_idx, pivot_face_global)
+                        # Extract dofs belonging to the current field
+                        cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
+                        for cell_field_dof ∈ cell_field_dofs
+                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_face_dof])
+                            append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
+                            append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
+                            # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                        end
+                    end
+                end
+            end
+        end
+
+        if dim > 2
+            for (pivot_edge, pivot_shared_edge) ∈ dgrid.shared_edges
                 # Start by searching shared entities which are not owned
-                pivot_entity_owner_rank = compute_owner(dgrid, pivot_shared_entity)
-                pivot_cell_idx = pivot_entity[1]
+                pivot_edge_owner_rank = compute_owner(dgrid, pivot_shared_edge)
+                pivot_cell_idx = pivot_edge[1]
 
-                if my_rank != pivot_entity_owner_rank
-                    sender_slot = destination_index[pivot_entity_owner_rank]
+                if my_rank != pivot_edge_owner_rank
+                    sender_slot = destination_index[pivot_edge_owner_rank]
 
-                    @debug println("$pivot_entity may require synchronization (R$my_rank)")
+                    @debug println("$pivot_edge may require synchronization (R$my_rank)")
                     # Note: We have to send ALL dofs on the element to the remote.
                     cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                     cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
-                    pivot_entity_global = toglobal(getlocalgrid(dgrid), pivot_entity)
+                    pivot_edge_global = toglobal(getlocalgrid(dgrid), pivot_edge)
 
                     for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                        !has_entity_dof(dh, field_idx, pivot_entity_global) && continue
-                        pivot_entity_dof = entity_dofs(dh, field_idx, pivot_entity_global)
+                        !has_edge_dofs(dh, field_idx, pivot_edge_global) && continue
+                        pivot_edge_dof = edge_dofs(dh, field_idx, pivot_edge_global)
                         # Extract dofs belonging to the current field
                         cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
                         for cell_field_dof ∈ cell_field_dofs
-                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_entity_dof])
+                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dof])
                             append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
                             append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
                             # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)

From 5670dd2f635bdd5ca8eaf253b03d003127910c79 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 11 Oct 2022 16:05:11 +0200
Subject: [PATCH 055/124] Copy pasta party.

---
 src/Dofs/DistributedDofHandler.jl | 315 ++++++++++++++++++++++++++----
 src/Dofs/DofHandler.jl            |   2 +-
 src/Grid/DistributedGrid.jl       |   8 +-
 3 files changed, 288 insertions(+), 37 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index cc209fc8f1..3eea3885b0 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -25,6 +25,7 @@ struct DistributedDofHandler{dim,T,G<:AbstractDistributedGrid{dim}} <: AbstractD
     vertexdicts::Vector{Dict{Int,Int}}
     edgedicts::Vector{Dict{Tuple{Int,Int},Tuple{Int,Bool}}}
     facedicts::Vector{Dict{NTuple{dim,Int},Int}}
+    celldicts::Vector{Dict{Int,Vector{Int}}}
 
     ldof_to_gdof::Vector{Int}
     ldof_to_rank::Vector{Int32}
@@ -32,7 +33,7 @@ end
 
 function DistributedDofHandler(grid::AbstractDistributedGrid{dim}) where {dim}
     isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. DistributedMixedDofHandler not implemented yet.")
-    DistributedDofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[], Int[], Int32[])
+    DistributedDofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[], Dict{Int,Vector{Int}}[], Int[], Int32[])
 end
 
 function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
@@ -88,8 +89,8 @@ function compute_dof_ownership(dh)
         owner_rank = minimum([collect(keys(sv.remote_vertices));my_rank])
 
         if owner_rank != my_rank
-            for fi in 1:Ferrite.num_fields(dh)
-                vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
+            for fi in 1:num_fields(dh)
+                vi = vertices(getcells(getgrid(dh),lci))[lclvi]
                 if haskey(dh.vertexdicts[fi], vi)
                     local_dof_idx = dh.vertexdicts[fi][vi]
                     dof_owner[local_dof_idx] = owner_rank
@@ -122,7 +123,8 @@ Renumber the dofs in local ordering to their corresponding global numbering.
 TODO: Refactor for MixedDofHandler integration
 """
 function local_to_global_numbering(dh::DistributedDofHandler)
-    dgrid = dh.grid
+    dgrid = getglobalgrid(dh)
+    dim = getdim(dgrid)
     # MPI rank starting with 1 to match Julia's index convention
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
@@ -134,6 +136,10 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     # @TODO replace dict with vector and tie to MPI neighborhood graph of the mesh
     vertices_send = Dict{Int,Vector{VertexIndex}}()
     n_vertices_recv = Dict{Int,Int}()
+    faces_send = Dict{Int,Vector{FaceIndex}}()
+    n_faces_recv = Dict{Int,Int}()
+    edges_send = Dict{Int,Vector{EdgeIndex}}()
+    n_edges_recv = Dict{Int,Int}()
 
     # We start by assigning a local dof to all owned entities.
     # An entity is owned if:
@@ -148,17 +154,17 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     next_local_idx = 1
     for (ci, cell) in enumerate(getcells(getgrid(dh)))
         @debug println("cell #$ci (R$my_rank)")
-        for fi in 1:Ferrite.num_fields(dh)
-            @debug println("  field: $(dh.field_names[fi]) (R$my_rank)")
-            interpolation_info = Ferrite.InterpolationInfo(dh.field_interpolations[fi])
+        for field_idx in 1:num_fields(dh)
+            @debug println("  field: $(dh.field_names[field_idx]) (R$my_rank)")
+            interpolation_info = InterpolationInfo(dh.field_interpolations[field_idx])
             if interpolation_info.nvertexdofs > 0
-                for (vi,vertex) in enumerate(Ferrite.vertices(cell))
+                for (vi,vertex) in enumerate(vertices(cell))
                     @debug println("    vertex#$vertex (R$my_rank)")
                     lvi = VertexIndex(ci,vi)
                     # Dof is owned if it is local or if my rank is the smallest in the neighborhood
-                    if !haskey(dgrid.shared_vertices,lvi) || all(keys(dgrid.shared_vertices[lvi].remote_vertices) .> my_rank)
+                    if !is_shared_vertex(dgrid, lvi) || (compute_owner(dgrid, get_shared_vertex(dgrid, lvi)) == my_rank)
                         # Update dof assignment
-                        dof_local_idx = dh.vertexdicts[fi][vertex]
+                        dof_local_idx = dh.vertexdicts[field_idx][vertex]
                         if local_to_global[dof_local_idx] == 0
                             @debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
                             local_to_global[dof_local_idx] = next_local_idx
@@ -169,15 +175,16 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     end
 
                     # Update shared vertex lookup table
-                    if haskey(dgrid.shared_vertices,lvi)
+                    if is_shared_vertex(dgrid, lvi)
                         master_rank = my_rank
-                        for master_rank_new ∈ keys(dgrid.shared_vertices[lvi].remote_vertices)
+                        remote_vertex_dict = remote_entities(get_shared_vertex(dgrid, lvi))
+                        for master_rank_new ∈ keys(remote_vertex_dict)
                             master_rank = min(master_rank, master_rank_new)
                         end
-                        for (remote_rank, svs) ∈ dgrid.shared_vertices[lvi].remote_vertices
+                        for (remote_rank, svs) ∈ remote_vertex_dict
                             if master_rank == my_rank # I own the dof - we have to send information
                                 if !haskey(vertices_send,remote_rank)
-                                    vertices_send[remote_rank] = Vector{Ferrite.VertexIndex}()
+                                    vertices_send[remote_rank] = Vector{VertexIndex}()
                                 end
                                 @debug println("      prepare sending vertex #$(lvi) to $remote_rank (R$my_rank)")
                                 for i ∈ svs
@@ -195,7 +202,119 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     end
                 end
             end
-        end
+
+            if dim > 2 # edges only in 3D
+                if interpolation_info.nedgedofs > 0
+                    for (ei,edge) in enumerate(edges(cell))
+                        @debug println("    edge#$edge (R$my_rank)")
+                        lei = EdgeIndex(ci,ei)
+                        # Dof is owned if it is local or if my rank is the smallest in the neighborhood
+                        if !is_shared_edge(dgrid, lei) || (compute_owner(dgrid, get_shared_edge(dgrid, lei)) == my_rank)
+                            # Update dof assignment
+                            dof_local_idx = dh.edgedicts[field_idx][toglobal(getlocalgrid(dgrid), lei)][1]
+                            if local_to_global[dof_local_idx] == 0
+                                @debug println("      mapping edge dof#$dof_local_idx to $next_local_idx (R$my_rank)")
+                                local_to_global[dof_local_idx] = next_local_idx
+                                next_local_idx += 1
+                            else
+                                @debug println("      edge dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
+                            end
+                        end
+
+                        # Update shared edge lookup table
+                        if is_shared_edge(dgrid, lei)
+                            master_rank = my_rank
+                            remote_edge_dict = remote_entities(get_shared_edge(dgrid, lei))
+                            for master_rank_new ∈ keys(remote_edge_dict)
+                                master_rank = min(master_rank, master_rank_new)
+                            end
+                            for (remote_rank, svs) ∈ remote_edge_dict
+                                if master_rank == my_rank # I own the dof - we have to send information
+                                    if !haskey(edges_send,remote_rank)
+                                        edges_send[remote_rank] = Vector{EdgeIndex}()
+                                    end
+                                    @debug println("      prepare sending edge #$(lei) to $remote_rank (R$my_rank)")
+                                    for i ∈ svs
+                                        push!(edges_send[remote_rank],lei)
+                                    end
+                                elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
+                                    if !haskey(n_edges_recv,remote_rank)
+                                        n_edges_recv[remote_rank] = length(svs)
+                                    else
+                                        n_edges_recv[remote_rank] += length(svs)
+                                    end
+                                    @debug println("      prepare receiving edge #$(lei) from $remote_rank (R$my_rank)")
+                                end
+                            end
+                        end
+                    end
+                end
+            end
+
+            if interpolation_info.nfacedofs > 0 && (interpolation_info.dim == dim)
+                for (fi,face) in enumerate(faces(cell))
+                    @debug println("    face#$face (R$my_rank)")
+                    lfi = FaceIndex(ci,fi)
+                    # Dof is owned if it is local or if my rank is the smallest in the neighborhood
+                    if !is_shared_face(dgrid, lfi) || (compute_owner(dgrid, get_shared_face(dgrid, lfi)) == my_rank)
+                        # Update dof assignment
+                        dof_local_idx = dh.facedicts[field_idx][toglobal(getlocalgrid(dgrid), lfi)]
+                        if local_to_global[dof_local_idx] == 0
+                            @debug println("      mapping face dof#$dof_local_idx to $next_local_idx (R$my_rank)")
+                            local_to_global[dof_local_idx] = next_local_idx
+                            next_local_idx += 1
+                        else
+                            @debug println("      face dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
+                        end
+                    end
+
+                    # Update shared face lookup table
+                    if is_shared_face(dgrid, lfi)
+                        master_rank = my_rank
+                        remote_face_dict = remote_entities(get_shared_face(dgrid, lfi))
+                        for master_rank_new ∈ keys(remote_face_dict)
+                            master_rank = min(master_rank, master_rank_new)
+                        end
+                        for (remote_rank, svs) ∈ remote_face_dict
+                            if master_rank == my_rank # I own the dof - we have to send information
+                                if !haskey(faces_send,remote_rank)
+                                    faces_send[remote_rank] = Vector{FaceIndex}()
+                                end
+                                @debug println("      prepare sending face #$(lfi) to $remote_rank (R$my_rank)")
+                                for i ∈ svs
+                                    push!(faces_send[remote_rank],lfi)
+                                end
+                            elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
+                                if !haskey(n_faces_recv,remote_rank)
+                                    n_faces_recv[remote_rank] = length(svs)
+                                else
+                                    n_faces_recv[remote_rank] += length(svs)
+                                end
+                                @debug println("      prepare receiving face #$(lfi) from $remote_rank (R$my_rank)")
+                            end
+                        end
+                    end
+                end # face loop
+            end
+
+            if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
+                @debug println("    cell#$ci")
+                for celldof in 1:interpolation_info.ncelldofs
+                    for d in 1:dh.field_dims[field_idx]
+                        # Update dof assignment
+                        dof_local_idx = dh.celldicts[field_idx][ci][celldof]
+                        if local_to_global[dof_local_idx] == 0
+                            @debug println("      mapping cell dof#$dof_local_idx to $next_local_idx (R$my_rank)")
+                            local_to_global[dof_local_idx] = next_local_idx
+                            next_local_idx += 1
+                        else
+                            # Should never happen...
+                            @debug println("      cell dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
+                        end
+                    end
+                end # cell loop
+            end
+        end # field loop
     end
 
     #
@@ -225,6 +344,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     # Sync non-owned dofs with neighboring processes.
     # TODO: implement for entitied with dim > 0
     # TODO: Use MPI graph primitives to simplify this code
+    # TODO: Simplify with dimension-agnostic code...
     for sending_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
         if my_rank == sending_rank
             for remote_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
@@ -245,7 +365,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     end
                     MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
                     MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
-                    for fi ∈ 1:Ferrite.num_fields(dh)
+                    for fi ∈ 1:num_fields(dh)
                         next_buffer_idx = 1
                         if length(dh.vertexdicts[fi]) == 0
                             @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
@@ -254,7 +374,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         # fill correspondence array
                         corresponding_global_dofs = Array{Int64}(undef,n_vertices)
                         for (lci,lclvi) ∈ vertices_send[remote_rank]
-                            vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
+                            vi = vertices(getcells(getgrid(dh),lci))[lclvi]
                             if haskey(dh.vertexdicts[fi], vi)
                                 corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.vertexdicts[fi][vi]]
                             end
@@ -263,6 +383,78 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         MPI.Send(corresponding_global_dofs, global_comm(dgrid); dest=remote_rank-1)
                     end
                 end
+
+                if haskey(faces_send, remote_rank)
+                    n_faces = length(faces_send[remote_rank])
+                    @debug println("Sending $n_faces faces to rank $remote_rank (R$my_rank)")
+                    remote_cells = Array{Int64}(undef,n_faces)
+                    remote_cell_vis = Array{Int64}(undef,n_faces)
+                    next_buffer_idx = 1
+                    for lvi ∈ faces_send[remote_rank]
+                        sv = dgrid.shared_faces[lvi]
+                        @assert haskey(sv.remote_faces, remote_rank)
+                        for (cvi, llvi) ∈ sv.remote_faces[remote_rank][1:1] # Just don't ask :)
+                            remote_cells[next_buffer_idx] = cvi
+                            remote_cell_vis[next_buffer_idx] = llvi 
+                            next_buffer_idx += 1
+                        end
+                    end
+                    MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
+                    MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
+                    for fi ∈ 1:num_fields(dh)
+                        next_buffer_idx = 1
+                        if length(dh.facedicts[fi]) == 0
+                            @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                            continue
+                        end
+                        # fill correspondence array
+                        corresponding_global_dofs = Array{Int64}(undef,n_faces)
+                        for (lci,lclvi) ∈ faces_send[remote_rank]
+                            vi = sortface(faces(getcells(getgrid(dh),lci))[lclvi])
+                            if haskey(dh.facedicts[fi], vi)
+                                corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.facedicts[fi][vi]]
+                            end
+                            next_buffer_idx += 1
+                        end
+                        MPI.Send(corresponding_global_dofs, global_comm(dgrid); dest=remote_rank-1)
+                    end
+                end
+
+                if haskey(edges_send, remote_rank)
+                    n_edges = length(edges_send[remote_rank])
+                    @debug println("Sending $n_edges edges to rank $remote_rank (R$my_rank)")
+                    remote_cells = Array{Int64}(undef,n_edges)
+                    remote_cell_vis = Array{Int64}(undef,n_edges)
+                    next_buffer_idx = 1
+                    for lvi ∈ edges_send[remote_rank]
+                        sv = dgrid.shared_edges[lvi]
+                        @assert haskey(sv.remote_edges, remote_rank)
+                        for (cvi, llvi) ∈ sv.remote_edges[remote_rank][1:1] # Just don't ask :)
+                            remote_cells[next_buffer_idx] = cvi
+                            remote_cell_vis[next_buffer_idx] = llvi 
+                            next_buffer_idx += 1
+                        end
+                    end
+                    MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
+                    MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
+                    for fi ∈ 1:num_fields(dh)
+                        next_buffer_idx = 1
+                        if length(dh.facedicts[fi]) == 0
+                            @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                            continue
+                        end
+                        # fill correspondence array
+                        corresponding_global_dofs = Array{Int64}(undef,n_edges)
+                        for (lci,lclvi) ∈ edges_send[remote_rank]
+                            vi = sortedge(edges(getcells(getgrid(dh),lci))[lclvi])[1]
+                            if haskey(dh.edgedicts[fi], vi)
+                                corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.edgedicts[fi][vi][1]]
+                            end
+                            next_buffer_idx += 1
+                        end
+                        MPI.Send(corresponding_global_dofs, global_comm(dgrid); dest=remote_rank-1)
+                    end
+                end
             end
         else
             if haskey(n_vertices_recv, sending_rank)
@@ -272,20 +464,72 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                 local_cell_vis = Array{Int64}(undef,n_vertices)
                 MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
                 MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
-                for fi in 1:Ferrite.num_fields(dh)
-                    if length(dh.vertexdicts[fi]) == 0
-                        @debug println("  Skipping recv on field $(dh.field_names[fi]) (R$my_rank)")
+                for field_idx in 1:num_fields(dh)
+                    if length(dh.vertexdicts[field_idx]) == 0
+                        @debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
                         continue
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_vertices)
                     MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
-                        vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
-                        if haskey(dh.vertexdicts[fi], vi)
-                            local_to_global[dh.vertexdicts[fi][vi]] = corresponding_global_dofs[cdi]
-                            @debug println("  Updating field $(dh.field_names[fi]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                        vi = vertices(getcells(getgrid(dh),lci))[lclvi]
+                        if haskey(dh.vertexdicts[field_idx], vi)
+                            local_to_global[dh.vertexdicts[field_idx][vi]] = corresponding_global_dofs[cdi]
+                            @debug println("  Updating field $(dh.field_names[field_idx]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
                         else
-                            @debug println("  Skipping recv on field $(dh.field_names[fi]) vertex $vi (R$my_rank)")
+                            @debug println("  Skipping recv on field $(dh.field_names[field_idx]) vertex $vi (R$my_rank)")
+                        end
+                    end
+                end
+            end
+
+            if haskey(n_faces_recv, sending_rank)
+                n_faces = n_faces_recv[sending_rank]
+                @debug println("Receiving $n_faces faces from rank $sending_rank (R$my_rank)")
+                local_cells = Array{Int64}(undef,n_faces)
+                local_cell_vis = Array{Int64}(undef,n_faces)
+                MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
+                MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
+                for field_idx in 1:num_fields(dh)
+                    if length(dh.facedicts[field_idx]) == 0
+                        @debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
+                        continue
+                    end
+                    corresponding_global_dofs = Array{Int64}(undef,n_faces)
+                    MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
+                    for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
+                        vi = sortface(faces(getcells(getgrid(dh),lci))[lclvi])
+                        if haskey(dh.facedicts[field_idx], vi)
+                            local_to_global[dh.facedicts[field_idx][vi]] = corresponding_global_dofs[cdi]
+                            @debug println("  Updating field $(dh.field_names[field_idx]) face $(FaceIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                        else
+                            @debug println("  Skipping recv on field $(dh.field_names[field_idx]) face $vi (R$my_rank)")
+                        end
+                    end
+                end
+            end
+
+            if haskey(n_edges_recv, sending_rank)
+                n_edges = n_edges_recv[sending_rank]
+                @debug println("Receiving $n_edges edges from rank $sending_rank (R$my_rank)")
+                local_cells = Array{Int64}(undef,n_edges)
+                local_cell_vis = Array{Int64}(undef,n_edges)
+                MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
+                MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
+                for field_idx in 1:num_fields(dh)
+                    if length(dh.edgedicts[field_idx]) == 0
+                        @debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
+                        continue
+                    end
+                    corresponding_global_dofs = Array{Int64}(undef,n_edges)
+                    MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
+                    for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
+                        vi = sortedge(edges(getcells(getgrid(dh),lci))[lclvi])[1]
+                        if haskey(dh.edgedicts[field_idx], vi)
+                            local_to_global[dh.edgedicts[field_idx][vi][1]] = corresponding_global_dofs[cdi]
+                            @debug println("  Updating field $(dh.field_names[field_idx]) edge $(EdgeIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                        else
+                            @debug println("  Skipping recv on field $(dh.field_names[field_idx]) edge $vi (R$my_rank)")
                         end
                     end
                 end
@@ -294,17 +538,9 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     end
 
     # Postcondition: All local dofs need a corresponding global dof!
+    @debug println("Local to global mapping: $local_to_global")
     @assert findfirst(local_to_global .== 0) === nothing
 
-    @debug vtk_grid("dofs", dgrid; compress=false) do vtk
-        u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
-        fill!(u, 0.0)
-        for i=1:length(u)
-            u[i] = local_to_global[dh.vertexdicts[1][i]]
-        end
-        vtk_point_data(vtk, u,"dof")
-    end
-
     return local_to_global
 end
 
@@ -346,6 +582,11 @@ function __close!(dh::DistributedDofHandler{dim}) where {dim}
         dh.facedicts[i] = Dict{NTuple{dim,Int},Int}()
     end
 
+    resize!(dh.celldicts, num_fields(dh))
+    for i in 1:num_fields(dh)
+        dh.celldicts[i] = Dict{Int,Vector{Int}}()
+    end
+
     # celldofs are never shared between different cells so there is no need
     # for a `celldict` to keep track of which cells we have added dofs too.
 
@@ -451,6 +692,10 @@ function __close!(dh::DistributedDofHandler{dim}) where {dim}
                 for celldof in 1:interpolation_info.ncelldofs
                     for d in 1:dh.field_dims[fi]
                         @debug println("      adding dof#$nextdof")
+                        if !haskey(dh.celldicts[fi], ci)
+                            dh.celldicts[fi][ci] = Vector{Int}(undef,0)
+                        end
+                        push!(dh.celldicts[fi][ci], nextdof)
                         push!(dh.cell_dofs, nextdof)
                         nextdof += 1
                     end
@@ -469,7 +714,7 @@ end
 # TODO this is copy pasta from DofHandler.jl
 function reshape_to_nodes(dh::DistributedDofHandler, u::Vector{T}, fieldname::Symbol) where T
     # make sure the field exists
-    fieldname ∈ Ferrite.getfieldnames(dh) || error("Field $fieldname not found.")
+    fieldname ∈ getfieldnames(dh) || error("Field $fieldname not found.")
 
     field_idx = findfirst(i->i==fieldname, getfieldnames(dh))
     offset = field_offset(dh, fieldname)
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index 723d307653..c6b6f0c1c2 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -61,7 +61,7 @@ has_face_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) whe
 
 cell_dofs(dh::AbstractDofHandler, field_idx::Int, cell::Int) = dh.celldicts[field_idx][cell]
 vertex_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
-edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
+edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge][1]
 face_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
 
 """
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 0ee9ab45db..058eaf6d46 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -58,9 +58,15 @@ end
 @inline get_shared_edges(dgrid::AbstractDistributedGrid) = dgrid.shared_edges
 @inline get_shared_faces(dgrid::AbstractDistributedGrid) = dgrid.shared_faces
 
+@inline get_shared_vertex(dgrid::AbstractDistributedGrid, vi::VertexIndex) = dgrid.shared_vertices[vi]
+@inline get_shared_edge(dgrid::AbstractDistributedGrid, ei::EdgeIndex) = dgrid.shared_edges[ei]
+@inline get_shared_face(dgrid::AbstractDistributedGrid, fi::FaceIndex) = dgrid.shared_faces[fi]
+
 """
 """
 @inline is_shared_vertex(dgrid::AbstractDistributedGrid, vi::VertexIndex) = haskey(dgrid.shared_vertices, vi)
+@inline is_shared_edge(dgrid::AbstractDistributedGrid, ei::EdgeIndex) = haskey(dgrid.shared_edges, ei)
+@inline is_shared_face(dgrid::AbstractDistributedGrid, fi::FaceIndex) = haskey(dgrid.shared_faces, fi)
 
 
 """
@@ -352,4 +358,4 @@ end
 function compute_owner(dgrid::AbstractDistributedGrid, shared_entity::SharedEntity)::Int32
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1 # Shift rank up by 1 to match Julia's indexing convention
     return minimum([my_rank; [remote_rank for (remote_rank, _) ∈ remote_entities(shared_entity)]])
-end
\ No newline at end of file
+end

From 9c777472e58bcfa0ff23f71dab4aee9386a64891 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 11 Oct 2022 21:16:48 +0200
Subject: [PATCH 056/124] Debug distributed grid construction....

---
 src/Grid/DistributedGrid.jl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 058eaf6d46..17211f417a 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -240,6 +240,10 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
         vertexsets=vertexsets
     )
 
+    @debug if my_rank == 1
+        @show grid_topology
+    end
+
     shared_vertices = Dict{VertexIndex,SharedVertex}()
     shared_edges = Dict{EdgeIndex,SharedEdge}()
     shared_faces = Dict{FaceIndex,SharedFace}()

From 60242a55944976315dcfb19f3736512873379ff7 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 11 Oct 2022 21:55:47 +0200
Subject: [PATCH 057/124] Add Maxi's fix for the edge topology.

---
 src/Grid/grid.jl | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index d69a387c31..91782e19e3 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -392,13 +392,25 @@ function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, vertexidx::
 end
 
 function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid{3}, edgeidx::EdgeIndex, include_self=false)
-    if include_self
-        return [top.edge_neighbor[edgeidx[1],edgeidx[2]].neighbor_info; edgeidx]
+    cellid, local_edgeidx = edgeidx[1], edgeidx[2]
+    cell_edges = edges(getcells(grid,cellid))
+    nonlocal_edgeid = cell_edges[local_edgeidx] 
+    if include_self  
+        cell_neighbors = getneighborhood(top,grid,CellIndex(cellid))
+        self_reference_local = EdgeIndex[]
+        for cellid in cell_neighbors
+            local_neighbor_edgeid = findfirst(x->issubset(x,nonlocal_edgeid),edges(getcells(grid,cellid)))
+            local_neighbor_edgeid === nothing && continue
+            local_edge = EdgeIndex(cellid,local_neighbor_edgeid)
+            push!(self_reference_local, local_edge)
+        end
+        return unique([top.edge_neighbor[cellid, local_edgeidx].neighbor_info; self_reference_local; edgeidx])
     else
-        return top.edge_neighbor[edgeidx[1],edgeidx[2]].neighbor_info
+        return top.edge_neighbor[cellid, local_edgeidx].neighbor_info
     end
 end
 
+
 """
     faceskeleton(grid) -> Vector{FaceIndex}
 Returns an iterateable face skeleton. The skeleton consists of `FaceIndex` that can be used to `reinit`

From 3297c0611219a1240908019ec5366a568c89defd Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Tue, 11 Oct 2022 22:08:14 +0200
Subject: [PATCH 058/124] Fix some bugs.

---
 src/Dofs/ConstraintHandler.jl     |  2 ++
 src/Dofs/DistributedDofHandler.jl | 52 ++++++++++++++++++++++++-------
 src/Grid/DistributedGrid.jl       | 21 ++++++++-----
 src/assembler.jl                  | 14 ++++++---
 4 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/src/Dofs/ConstraintHandler.jl b/src/Dofs/ConstraintHandler.jl
index 0ab7232b02..ce0180900a 100644
--- a/src/Dofs/ConstraintHandler.jl
+++ b/src/Dofs/ConstraintHandler.jl
@@ -1150,12 +1150,14 @@ function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector
     @debug println("Got $remote_ghosts_recv (R$my_rank)")
 
     # Step 2: Union with all locally constrained dofs
+    @debug println("$my_rank : Step 2....")
     remote_ghosts_constrained_send = copy(remote_ghosts_recv)
     for (i, remote_ghost_dof) ∈ enumerate(remote_ghosts_recv)
         remote_ghosts_constrained_send[i] = remote_ghost_dof ∈ K.cols.partition.part.lid_to_gid[ch.prescribed_dofs]
     end
 
     # Step 3: Send trash back
+    @debug println("$my_rank : Step 3....")
     remote_ghosts_constrained_recv = Vector{Int}(undef, sum(buffer_sizes_send))
     MPI.Alltoallv!(VBuffer(remote_ghosts_constrained_send, buffer_sizes_recv), VBuffer(remote_ghosts_constrained_recv, buffer_sizes_send), comm)
 
diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 3eea3885b0..5ae88338ee 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -79,22 +79,38 @@ end
 renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = (@assert false) && "Unimplemented"
 
 function compute_dof_ownership(dh)
-    dgrid = dh.grid
+    dgrid = getglobalgrid(dh)
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
 
     dof_owner = Vector{Int}(undef,ndofs(dh))
     fill!(dof_owner, my_rank)
 
-    for ((lci, lclvi),sv) ∈ dgrid.shared_vertices
-        owner_rank = minimum([collect(keys(sv.remote_vertices));my_rank])
+    for (lvi, sv) ∈ get_shared_vertices(dgrid)
+        for field_idx in 1:num_fields(dh)
+            vi = toglobal(dgrid, lvi)
+            if has_vertex_dofs(dh, field_idx, vi)
+                local_dof_idx = vertex_dofs(dh, field_idx, vi)
+                dof_owner[local_dof_idx] = compute_owner(dgrid, sv)
+            end
+        end
+    end
 
-        if owner_rank != my_rank
-            for fi in 1:num_fields(dh)
-                vi = vertices(getcells(getgrid(dh),lci))[lclvi]
-                if haskey(dh.vertexdicts[fi], vi)
-                    local_dof_idx = dh.vertexdicts[fi][vi]
-                    dof_owner[local_dof_idx] = owner_rank
-                end
+    for (lfi, sf) ∈ get_shared_faces(dgrid)
+        for field_idx in 1:num_fields(dh)
+            fi = toglobal(dgrid, lfi)
+            if has_face_dofs(dh, field_idx, fi)
+                local_dof_idx = face_dofs(dh, field_idx, fi)
+                dof_owner[local_dof_idx] = compute_owner(dgrid, sf)
+            end
+        end
+    end
+
+    for (lei, se) ∈ get_shared_edges(dgrid)
+        for field_idx in 1:num_fields(dh)
+            ei = toglobal(dgrid, lei)
+            if has_edge_dofs(dh, field_idx, ei)
+                local_dof_idx = edge_dofs(dh, field_idx, ei)
+                dof_owner[local_dof_idx] = compute_owner(dgrid, se)
             end
         end
     end
@@ -205,6 +221,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 
             if dim > 2 # edges only in 3D
                 if interpolation_info.nedgedofs > 0
+                    error("Broken. Each process counts a different number of local edges and hence we have a mismatch in the MPI messages.")
                     for (ei,edge) in enumerate(edges(cell))
                         @debug println("    edge#$edge (R$my_rank)")
                         lei = EdgeIndex(ci,ei)
@@ -235,7 +252,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                     end
                                     @debug println("      prepare sending edge #$(lei) to $remote_rank (R$my_rank)")
                                     for i ∈ svs
-                                        push!(edges_send[remote_rank],lei)
+                                        push!(edges_send[remote_rank], lei)
                                     end
                                 elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
                                     if !haskey(n_edges_recv,remote_rank)
@@ -523,6 +540,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_edges)
                     MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
+                    @debug println("   Received $corresponding_global_dofs edge dofs from $sending_rank (R$my_rank)")
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
                         vi = sortedge(edges(getcells(getgrid(dh),lci))[lclvi])[1]
                         if haskey(dh.edgedicts[field_idx], vi)
@@ -538,9 +556,19 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     end
 
     # Postcondition: All local dofs need a corresponding global dof!
-    @debug println("Local to global mapping: $local_to_global")
+    @debug println("Local to global mapping: $local_to_global (R$my_rank)")
     @assert findfirst(local_to_global .== 0) === nothing
 
+    @debug vtk_grid("dofs", dgrid; compress=false) do vtk
+        u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
+        fill!(u, 0.0)
+        for i=1:length(u)
+            u[i] = local_to_global[dh.vertexdicts[1][i]]
+        end
+        vtk_point_data(vtk, u,"dof")
+        vtk_partitioning(vtk, dgrid)
+    end
+
     return local_to_global
 end
 
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 17211f417a..ffb162b5eb 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -126,10 +126,19 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     N = getncells(grid_to_distribute)
     @assert N > 0
 
-    my_rank = MPI.Comm_rank(grid_comm)+1
-
     parts = create_partitioning(grid_to_distribute, grid_topology, MPI.Comm_size(grid_comm), partition_alg)
 
+    DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts)
+end
+
+"""
+"""    
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts::Vector{Int32}) where {dim,C,T}
+    N = getncells(grid_to_distribute)
+    @assert N > 0
+
+    my_rank = MPI.Comm_rank(grid_comm)+1
+
     # Start extraction of local grid
     # 1. Extract local cells
     local_cells = getcells(grid_to_distribute)[[i for i ∈ 1:N if parts[i] == my_rank]]
@@ -240,10 +249,6 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
         vertexsets=vertexsets
     )
 
-    @debug if my_rank == 1
-        @show grid_topology
-    end
-
     shared_vertices = Dict{VertexIndex,SharedVertex}()
     shared_edges = Dict{EdgeIndex,SharedEdge}()
     shared_faces = Dict{FaceIndex,SharedFace}()
@@ -275,7 +280,7 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
 
             # Face
             if dim > 1
-                for (i, global_face_idx) ∈ enumerate(faces(global_cell))
+                for (i, _) ∈ enumerate(faces(global_cell))
                     cell_face = FaceIndex(global_cell_idx, i)
                     remote_faces = Dict{Int,Vector{FaceIndex}}()
                     for other_face ∈ getneighborhood(grid_topology, grid_to_distribute, cell_face, true)
@@ -301,7 +306,7 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
 
             # Edge
             if dim > 2
-                for (i, global_vertex_idx) ∈ enumerate(edges(global_cell))
+                for (i, _) ∈ enumerate(edges(global_cell))
                     cell_edge = EdgeIndex(global_cell_idx, i)
                     remote_edges = Dict{Int,Vector{EdgeIndex}}()
                     for other_edge ∈ getneighborhood(grid_topology, grid_to_distribute, cell_edge, true)
diff --git a/src/assembler.jl b/src/assembler.jl
index 134fccc520..166fc128cd 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -296,7 +296,7 @@ struct PartitionedArraysCOOAssembler{T}
         #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
         #row_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], ldof_to_gdof[.!ltdof_indices], Int32.(ldof_to_rank[.!ltdof_indices]))
         row_data = MPIData(row_indices, comm, (np,))
-        row_exchanger = Exchanger(row_data,neighbors)
+        row_exchanger = Exchanger(row_data)
         rows = PRange(ngdofs,row_data,row_exchanger)
 
         @debug println("rows done (R$my_rank)")
@@ -321,6 +321,7 @@ struct PartitionedArraysCOOAssembler{T}
             # Start by searching shared entities which are not owned
             pivot_vertex_owner_rank = compute_owner(dgrid, pivot_shared_vertex)
             pivot_cell_idx = pivot_vertex[1]
+            pivot_vertex_global = toglobal(getlocalgrid(dgrid), pivot_vertex)
 
             if my_rank != pivot_vertex_owner_rank
                 sender_slot = destination_index[pivot_vertex_owner_rank]
@@ -330,11 +331,12 @@ struct PartitionedArraysCOOAssembler{T}
                 cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                 cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
-                pivot_vertex_global = toglobal(getlocalgrid(dgrid), pivot_vertex)
-
                 for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
                     !has_vertex_dofs(dh, field_idx, pivot_vertex_global) && continue
                     pivot_vertex_dof = vertex_dofs(dh, field_idx, pivot_vertex_global)
+
+                    @debug println("  adding dof $pivot_vertex_dof to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+
                     # Extract dofs belonging to the current field
                     cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
                     for cell_field_dof ∈ cell_field_dofs
@@ -366,6 +368,9 @@ struct PartitionedArraysCOOAssembler{T}
                     for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
                         !has_face_dofs(dh, field_idx, pivot_face_global) && continue
                         pivot_face_dof = face_dofs(dh, field_idx, pivot_face_global)
+                        
+                        @debug println("  adding dof $pivot_face_dof to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                        
                         # Extract dofs belonging to the current field
                         cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
                         for cell_field_dof ∈ cell_field_dofs
@@ -463,11 +468,10 @@ struct PartitionedArraysCOOAssembler{T}
         #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
         #col_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], all_local_cols[all_local_col_ranks .!= my_rank], Int32.(all_local_col_ranks[all_local_col_ranks .!= my_rank]))
         col_data = MPIData(col_indices, comm, (np,))
-        col_exchanger = Exchanger(col_data,neighbors)
+        col_exchanger = Exchanger(col_data)
         cols = PRange(ngdofs,col_data,col_exchanger)
 
         @debug println("cols and rows constructed (R$my_rank)")
-
         f = PartitionedArrays.PVector(0.0,rows)
         @debug println("f constructed (R$my_rank)")
 

From 2cd15b92f48479ea8d810daad5234bba4eb80a76 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 03:15:41 +0200
Subject: [PATCH 059/124] Fix edge dof distribution.

---
 src/Dofs/DistributedDofHandler.jl | 36 +++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 5ae88338ee..9aa15556a9 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -155,7 +155,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     faces_send = Dict{Int,Vector{FaceIndex}}()
     n_faces_recv = Dict{Int,Int}()
     edges_send = Dict{Int,Vector{EdgeIndex}}()
-    n_edges_recv = Dict{Int,Int}()
+    edges_recv = Dict{Int,Vector{EdgeIndex}}()
 
     # We start by assigning a local dof to all owned entities.
     # An entity is owned if:
@@ -221,7 +221,6 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 
             if dim > 2 # edges only in 3D
                 if interpolation_info.nedgedofs > 0
-                    error("Broken. Each process counts a different number of local edges and hence we have a mismatch in the MPI messages.")
                     for (ei,edge) in enumerate(edges(cell))
                         @debug println("    edge#$edge (R$my_rank)")
                         lei = EdgeIndex(ci,ei)
@@ -255,11 +254,10 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                         push!(edges_send[remote_rank], lei)
                                     end
                                 elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
-                                    if !haskey(n_edges_recv,remote_rank)
-                                        n_edges_recv[remote_rank] = length(svs)
-                                    else
-                                        n_edges_recv[remote_rank] += length(svs)
+                                    if !haskey(edges_recv,remote_rank)
+                                        edges_recv[remote_rank] = Array{EdgeIndex}()
                                     end
+                                    push!(edges_recv[remote_rank], lei)
                                     @debug println("      prepare receiving edge #$(lei) from $remote_rank (R$my_rank)")
                                 end
                             end
@@ -438,12 +436,22 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                 end
 
                 if haskey(edges_send, remote_rank)
-                    n_edges = length(edges_send[remote_rank])
+                    # Well .... that some hotfix straight outta hell.
+                    edges_send_unique_set = Set{Tuple{Int,Int}}()
+                    edges_send_unique = Set{EdgeIndex}()
+                    for lei ∈ edges_send[remote_rank]
+                        edge = toglobal(dgrid, lei)
+                        if edge ∉ edges_send_unique_set
+                            push!(edges_send_unique_set, edge)
+                            push!(edges_send_unique, lei)
+                        end
+                    end
+                    n_edges = length(edges_send_unique)
                     @debug println("Sending $n_edges edges to rank $remote_rank (R$my_rank)")
                     remote_cells = Array{Int64}(undef,n_edges)
                     remote_cell_vis = Array{Int64}(undef,n_edges)
                     next_buffer_idx = 1
-                    for lvi ∈ edges_send[remote_rank]
+                    for lvi ∈ edges_send_unique
                         sv = dgrid.shared_edges[lvi]
                         @assert haskey(sv.remote_edges, remote_rank)
                         for (cvi, llvi) ∈ sv.remote_edges[remote_rank][1:1] # Just don't ask :)
@@ -462,7 +470,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         end
                         # fill correspondence array
                         corresponding_global_dofs = Array{Int64}(undef,n_edges)
-                        for (lci,lclvi) ∈ edges_send[remote_rank]
+                        for (lci,lclvi) ∈ edges_send_unique
                             vi = sortedge(edges(getcells(getgrid(dh),lci))[lclvi])[1]
                             if haskey(dh.edgedicts[fi], vi)
                                 corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.edgedicts[fi][vi][1]]
@@ -526,8 +534,14 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                 end
             end
 
-            if haskey(n_edges_recv, sending_rank)
-                n_edges = n_edges_recv[sending_rank]
+            if haskey(edges_recv, sending_rank)
+                edges_recv_unique_set = Set{Tuple{Int,Int}}()
+                for lei ∈ edges_recv[remote_rank]
+                    edge = toglobal(dgrid, lei)
+                    push!(edges_send_unique_set, edge)
+                    end
+                end
+                n_edges = length(edges_recv_unique_set)
                 @debug println("Receiving $n_edges edges from rank $sending_rank (R$my_rank)")
                 local_cells = Array{Int64}(undef,n_edges)
                 local_cell_vis = Array{Int64}(undef,n_edges)

From a49342291e034a48c9522c00f6b1e3af46dd5514 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 03:17:00 +0200
Subject: [PATCH 060/124] Fix edge dof distribution.

---
 src/Dofs/DistributedDofHandler.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 9aa15556a9..6b00b3d33a 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -539,7 +539,6 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                 for lei ∈ edges_recv[remote_rank]
                     edge = toglobal(dgrid, lei)
                     push!(edges_send_unique_set, edge)
-                    end
                 end
                 n_edges = length(edges_recv_unique_set)
                 @debug println("Receiving $n_edges edges from rank $sending_rank (R$my_rank)")

From 5a50b85c683b9d3ef77107721c51fbb3422638b8 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 03:19:16 +0200
Subject: [PATCH 061/124] Fix edge dof distribution.

---
 src/Dofs/DistributedDofHandler.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 6b00b3d33a..6a87c1bc12 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -247,7 +247,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                             for (remote_rank, svs) ∈ remote_edge_dict
                                 if master_rank == my_rank # I own the dof - we have to send information
                                     if !haskey(edges_send,remote_rank)
-                                        edges_send[remote_rank] = Vector{EdgeIndex}()
+                                        edges_send[remote_rank] = EdgeIndex[]
                                     end
                                     @debug println("      prepare sending edge #$(lei) to $remote_rank (R$my_rank)")
                                     for i ∈ svs
@@ -255,7 +255,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                     end
                                 elseif master_rank == remote_rank  # dof is owned by remote - we have to receive information
                                     if !haskey(edges_recv,remote_rank)
-                                        edges_recv[remote_rank] = Array{EdgeIndex}()
+                                        edges_recv[remote_rank] = EdgeIndex[]
                                     end
                                     push!(edges_recv[remote_rank], lei)
                                     @debug println("      prepare receiving edge #$(lei) from $remote_rank (R$my_rank)")
@@ -293,7 +293,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         for (remote_rank, svs) ∈ remote_face_dict
                             if master_rank == my_rank # I own the dof - we have to send information
                                 if !haskey(faces_send,remote_rank)
-                                    faces_send[remote_rank] = Vector{FaceIndex}()
+                                    faces_send[remote_rank] = FaceIndex[]
                                 end
                                 @debug println("      prepare sending face #$(lfi) to $remote_rank (R$my_rank)")
                                 for i ∈ svs

From c9bbcb3c191cc6af0251afe488784f7359b53fb4 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 03:21:13 +0200
Subject: [PATCH 062/124] Fix edge dof distribution.

---
 src/Dofs/DistributedDofHandler.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 6a87c1bc12..fde001c87e 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -536,7 +536,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 
             if haskey(edges_recv, sending_rank)
                 edges_recv_unique_set = Set{Tuple{Int,Int}}()
-                for lei ∈ edges_recv[remote_rank]
+                for lei ∈ edges_recv[sending_rank]
                     edge = toglobal(dgrid, lei)
                     push!(edges_send_unique_set, edge)
                 end

From 13c974a412331cc8d9841610c9a648a259074001 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 03:22:45 +0200
Subject: [PATCH 063/124] Fix edge dof distribution.

---
 src/Dofs/DistributedDofHandler.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index fde001c87e..bd9030fb7f 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -538,7 +538,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                 edges_recv_unique_set = Set{Tuple{Int,Int}}()
                 for lei ∈ edges_recv[sending_rank]
                     edge = toglobal(dgrid, lei)
-                    push!(edges_send_unique_set, edge)
+                    push!(edges_recv_unique_set, edge)
                 end
                 n_edges = length(edges_recv_unique_set)
                 @debug println("Receiving $n_edges edges from rank $sending_rank (R$my_rank)")

From d2622c99d83e2bbc3431787b0612a8c4cdd3e58b Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 04:19:29 +0200
Subject: [PATCH 064/124] Hotfix for bug in edge neighborhood access.

---
 src/Grid/grid.jl | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 91782e19e3..926b2b87db 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -359,10 +359,12 @@ function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, cellidx::Ce
 end
 
 function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
+    # TODO cleaner solution...
+    data = faceidx[2] <= size(top.edge_neighbor, 2) ? top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info : []
     if include_self
-        return [top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info; faceidx]
+        return [data; faceidx]
     else
-        return top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info
+        return data
     end
 end
 
@@ -394,8 +396,10 @@ end
 function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid{3}, edgeidx::EdgeIndex, include_self=false)
     cellid, local_edgeidx = edgeidx[1], edgeidx[2]
     cell_edges = edges(getcells(grid,cellid))
-    nonlocal_edgeid = cell_edges[local_edgeidx] 
-    if include_self  
+    nonlocal_edgeid = cell_edges[local_edgeidx]
+    # TODO cleaner solution...
+    data = local_edgeidx <= size(top.edge_neighbor, 2) ? top.edge_neighbor[cellid, local_edgeidx].neighbor_info : []
+    if include_self
         cell_neighbors = getneighborhood(top,grid,CellIndex(cellid))
         self_reference_local = EdgeIndex[]
         for cellid in cell_neighbors
@@ -404,9 +408,9 @@ function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid{3}, edgeidx:
             local_edge = EdgeIndex(cellid,local_neighbor_edgeid)
             push!(self_reference_local, local_edge)
         end
-        return unique([top.edge_neighbor[cellid, local_edgeidx].neighbor_info; self_reference_local; edgeidx])
+        return unique([data; self_reference_local; edgeidx])
     else
-        return top.edge_neighbor[cellid, local_edgeidx].neighbor_info
+        return unique(data)
     end
 end
 

From abd68c2cdac597235d58578a529f9590cbedb9eb Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 04:35:41 +0200
Subject: [PATCH 065/124] Hotfix for bug in edge neighborhood access. Again.

---
 src/Grid/grid.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 926b2b87db..8b281ab0ed 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -360,7 +360,7 @@ end
 
 function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
     # TODO cleaner solution...
-    data = faceidx[2] <= size(top.edge_neighbor, 2) ? top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info : []
+    data = faceidx[2] <= size(top.face_neighbor, 2) ? top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info : []
     if include_self
         return [data; faceidx]
     else

From 900ac88eaa37bd2299b67c25241c4ed965b7d5c6 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 18:40:57 +0200
Subject: [PATCH 066/124] Apply Fredriks bugfix for 3D P2 on hex

---
 src/Dofs/DistributedDofHandler.jl | 2 +-
 src/interpolations.jl             | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index bd9030fb7f..5c34b1c85e 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -674,7 +674,7 @@ function __close!(dh::DistributedDofHandler{dim}) where {dim}
                     end
                 end # vertex loop
             end
-            if dim == 3 # edges only in 3D
+            if dim > 2 # edges only in 3D
                 if interpolation_info.nedgedofs > 0
                     for edge in edges(cell)
                         sedge, dir = sortedge(edge)
diff --git a/src/interpolations.jl b/src/interpolations.jl
index b4c2f0946a..849223f07c 100644
--- a/src/interpolations.jl
+++ b/src/interpolations.jl
@@ -446,7 +446,14 @@ nedgedofs(::Lagrange{3,RefCube,2}) = 1
 nfacedofs(::Lagrange{3,RefCube,2}) = 1
 ncelldofs(::Lagrange{3,RefCube,2}) = 1
 
-faces(::Lagrange{3,RefCube,2}) = ((1,2,6,5, 9,18,13,17, 23), (2,3,7,6, 10,19,14,18, 22), (3,4,8,7, 11,20,15,19, 24), (1,5,8,4, 12,17,16,20, 21), (1,4,3,2, 9,10,11,12, 25), (5,6,7,8, 13,14,15,16, 26))
+faces(::Lagrange{3,RefCube,2}) = (
+    (1,4,3,2, 9,10,11,12, 25),
+    (1,2,6,5, 9,18,13,17, 23),
+    (2,3,7,6, 10,19,14,18, 22),
+    (3,4,8,7, 11,20,15,19, 24),
+    (1,5,8,4, 12,17,16,20, 21),
+    (5,6,7,8, 13,14,15,16, 26),
+)
 edges(::Lagrange{3,RefCube,2}) = ((1,2, 9), (2,3, 10), (3,4, 11), (4,1, 12), (1,5, 16), (2,6, 19), (3,7, 18), (4,8, 19), (5,6, 13), (6,7, 14), (7,8, 15), (8,5, 16))
 
 function reference_coordinates(::Lagrange{3,RefCube,2})

From 13f904b5739775c8be78a60a2b326695d066709a Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 19:35:32 +0200
Subject: [PATCH 067/124] Fix higher order assembly.

---
 src/Dofs/DistributedDofHandler.jl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 5c34b1c85e..284e3c78b4 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -572,15 +572,15 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     @debug println("Local to global mapping: $local_to_global (R$my_rank)")
     @assert findfirst(local_to_global .== 0) === nothing
 
-    @debug vtk_grid("dofs", dgrid; compress=false) do vtk
-        u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
-        fill!(u, 0.0)
-        for i=1:length(u)
-            u[i] = local_to_global[dh.vertexdicts[1][i]]
-        end
-        vtk_point_data(vtk, u,"dof")
-        vtk_partitioning(vtk, dgrid)
-    end
+    # @debug vtk_grid("dofs", dgrid; compress=false) do vtk
+    #     u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
+    #     fill!(u, 0.0)
+    #     for i=1:length(u)
+    #         u[i] = local_to_global[dh.vertexdicts[1][i]]
+    #     end
+    #     vtk_point_data(vtk, u,"dof")
+    #     vtk_partitioning(vtk, dgrid)
+    # end
 
     return local_to_global
 end

From d3b26232d7f8065449f6500df92ee359ddbf7222 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 20:11:06 +0200
Subject: [PATCH 068/124] Add simple manufactured solution to heat example to
 quickly check elements.

---
 docs/src/literate/heat_equation.jl | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/docs/src/literate/heat_equation.jl b/docs/src/literate/heat_equation.jl
index 46c8b2d85b..3feae68a4f 100644
--- a/docs/src/literate/heat_equation.jl
+++ b/docs/src/literate/heat_equation.jl
@@ -138,6 +138,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, K::SparseMatrixCSC, dh::D
         # For each cell we also need to reinitialize the cached values in `cellvalues`.
         #+
         reinit!(cellvalues, cell)
+        coords = getcoordinates(cell)                               #src
 
         # It is now time to loop over all the quadrature points in the cell and
         # assemble the contribution to `Ke` and `fe`. The integration weight
@@ -149,10 +150,15 @@ function doassemble(cellvalues::CellScalarValues{dim}, K::SparseMatrixCSC, dh::D
             # We need the value and gradient of the testfunction `v` and also the gradient
             # of the trial function `u`. We get all of these from `cellvalues`.
             #+
+            x = spatial_coordinate(cellvalues, q_point, coords)     #src
             for i in 1:n_basefuncs
                 v  = shape_value(cellvalues, q_point, i)
                 ∇v = shape_gradient(cellvalues, q_point, i)
+                if false                                            #src
                 fe[i] += v * dΩ
+                else                                                #src
+                fe[i] += (π/2)^2 * dim * prod(cos, x*π/2) * v * dΩ  #src
+                end                                                 #src
                 for j in 1:n_basefuncs
                     ∇u = shape_gradient(cellvalues, q_point, j)
                     Ke[i, j] += (∇v ⋅ ∇u) * dΩ
@@ -187,9 +193,22 @@ vtk_grid("heat_equation", dh) do vtk
     vtk_point_data(vtk, dh, u)
 end
 
-## test the result                #src
-using Test                        #src
-@test norm(u) ≈ 3.307743912641305 #src
+## Test the result against the manufactured solution                #src
+using Test                                                          #src
+for cell in CellIterator(dh)                                        #src
+    reinit!(cellvalues, cell)                                       #src
+    n_basefuncs = getnbasefunctions(cellvalues)                     #src
+    coords = getcoordinates(cell)                                   #src
+    uₑ = u[celldofs(cell)]                                          #src
+    for q_point in 1:getnquadpoints(cellvalues)                     #src
+        x = spatial_coordinate(cellvalues, q_point, coords)         #src
+        for i in 1:n_basefuncs                                      #src
+            uₐₙₐ    = prod(cos, x*π/2)                              #src
+            uₐₚₚᵣₒₓ = function_value(cellvalues, q_point, uₑ)       #src
+            @test isapprox(uₐₙₐ, uₐₚₚᵣₒₓ; atol=1e-1)                #src
+        end                                                         #src
+    end                                                             #src
+end                                                                 #src
 
 #md # ## [Plain program](@id heat_equation-plain-program)
 #md #

From 3250c7730c67a22b2c80d17ed4284e13dfdb1d4b Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 20:58:21 +0200
Subject: [PATCH 069/124] Fix bug in distributed dof assignment.

---
 src/Dofs/DistributedDofHandler.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 284e3c78b4..06994c3f7a 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -76,7 +76,7 @@ function celldofs(dh::DistributedDofHandler, i::Int)
     return global_dofs
 end
 
-renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = (@assert false) && "Unimplemented"
+renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = error("Not implemented.")
 
 function compute_dof_ownership(dh)
     dgrid = getglobalgrid(dh)
@@ -464,7 +464,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
                     for fi ∈ 1:num_fields(dh)
                         next_buffer_idx = 1
-                        if length(dh.facedicts[fi]) == 0
+                        if length(dh.edgedicts[fi]) == 0
                             @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
                             continue
                         end

From f88ae96a3517901a3008eebd5721274796ad005f Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 21:13:26 +0200
Subject: [PATCH 070/124] Apply Fredriks bugfix for 3D P2 on hex again.

---
 src/interpolations.jl | 49 +++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/src/interpolations.jl b/src/interpolations.jl
index 849223f07c..e3449a2c06 100644
--- a/src/interpolations.jl
+++ b/src/interpolations.jl
@@ -447,27 +447,27 @@ nfacedofs(::Lagrange{3,RefCube,2}) = 1
 ncelldofs(::Lagrange{3,RefCube,2}) = 1
 
 faces(::Lagrange{3,RefCube,2}) = (
-    (1,4,3,2, 9,10,11,12, 25),
-    (1,2,6,5, 9,18,13,17, 23),
-    (2,3,7,6, 10,19,14,18, 22),
+    (1,4,3,2, 12,11,10,9, 21),
+    (1,2,6,5, 9,18,13,17, 22),
+    (2,3,7,6, 10,19,14,18, 23),
     (3,4,8,7, 11,20,15,19, 24),
-    (1,5,8,4, 12,17,16,20, 21),
+    (1,5,8,4, 17,16,20,12, 25),
     (5,6,7,8, 13,14,15,16, 26),
 )
-edges(::Lagrange{3,RefCube,2}) = ((1,2, 9), (2,3, 10), (3,4, 11), (4,1, 12), (1,5, 16), (2,6, 19), (3,7, 18), (4,8, 19), (5,6, 13), (6,7, 14), (7,8, 15), (8,5, 16))
+edges(::Lagrange{3,RefCube,2}) = ((1,2, 9), (2,3, 10), (3,4, 11), (4,1, 12), (5,6, 13), (6,7, 14), (7,8, 15), (8,5, 16), (1,5, 17), (2,6, 18), (3,7, 19), (4,8, 20))
 
 function reference_coordinates(::Lagrange{3,RefCube,2})
            # vertex
-    return [Vec{3, Float64}((-1.0, -1.0, -1.0)), #  0
-            Vec{3, Float64}(( 1.0, -1.0, -1.0)), #  1
-            Vec{3, Float64}(( 1.0,  1.0, -1.0)), #  2
-            Vec{3, Float64}((-1.0,  1.0, -1.0)), #  3
-            Vec{3, Float64}((-1.0, -1.0,  1.0)), #  4
-            Vec{3, Float64}(( 1.0, -1.0,  1.0)), #  5
-            Vec{3, Float64}(( 1.0,  1.0,  1.0)), #  6
-            Vec{3, Float64}((-1.0,  1.0,  1.0)), #  7
+    return [Vec{3, Float64}((-1.0, -1.0, -1.0)), #  1
+            Vec{3, Float64}(( 1.0, -1.0, -1.0)), #  2
+            Vec{3, Float64}(( 1.0,  1.0, -1.0)), #  3
+            Vec{3, Float64}((-1.0,  1.0, -1.0)), #  4
+            Vec{3, Float64}((-1.0, -1.0,  1.0)), #  5
+            Vec{3, Float64}(( 1.0, -1.0,  1.0)), #  6
+            Vec{3, Float64}(( 1.0,  1.0,  1.0)), #  7
+            Vec{3, Float64}((-1.0,  1.0,  1.0)), #  8
             # edge
-            Vec{3, Float64}(( 0.0, -1.0, -1.0)), #  8
+            Vec{3, Float64}(( 0.0, -1.0, -1.0)), #  9
             Vec{3, Float64}(( 1.0,  0.0, -1.0)),
             Vec{3, Float64}(( 0.0,  1.0, -1.0)),
             Vec{3, Float64}((-1.0,  0.0, -1.0)),
@@ -478,16 +478,15 @@ function reference_coordinates(::Lagrange{3,RefCube,2})
             Vec{3, Float64}((-1.0, -1.0,  0.0)),
             Vec{3, Float64}(( 1.0, -1.0,  0.0)),
             Vec{3, Float64}(( 1.0,  1.0,  0.0)),
-            Vec{3, Float64}((-1.0,  1.0,  0.0)), # 19
-            # face
-            Vec{3, Float64}(( 0.0, -1.0,  0.0)), # 20
+            Vec{3, Float64}((-1.0,  1.0,  0.0)), # 20
+            Vec{3, Float64}(( 0.0,  0.0, -1.0)),
+            Vec{3, Float64}(( 0.0, -1.0,  0.0)),
             Vec{3, Float64}(( 1.0,  0.0,  0.0)),
             Vec{3, Float64}(( 0.0,  1.0,  0.0)),
             Vec{3, Float64}((-1.0,  0.0,  0.0)),
-            Vec{3, Float64}(( 0.0,  0.0, -1.0)),
-            Vec{3, Float64}(( 0.0,  0.0,  1.0)), # 25
+            Vec{3, Float64}(( 0.0,  0.0,  1.0)), # 26
             # interior
-            Vec{3, Float64}((0.0, 0.0, 0.0)),    # 26
+            Vec{3, Float64}((0.0, 0.0, 0.0)),    # 27
             ]
 end
 
@@ -520,11 +519,11 @@ function value(ip::Lagrange{3,RefCube,2}, i::Int, ξ::Vec{3, T}) where {T}
     i == 19 && return φ₃(ξ_x) * φ₃(ξ_y) * φ₂(ξ_z)
     i == 20 && return φ₁(ξ_x) * φ₃(ξ_y) * φ₂(ξ_z)
     # faces
-    i == 21 && return φ₂(ξ_x) * φ₁(ξ_y) * φ₂(ξ_z)
-    i == 22 && return φ₃(ξ_x) * φ₂(ξ_y) * φ₂(ξ_z)
-    i == 23 && return φ₂(ξ_x) * φ₃(ξ_y) * φ₂(ξ_z)
-    i == 24 && return φ₁(ξ_x) * φ₂(ξ_y) * φ₂(ξ_z)
-    i == 25 && return φ₂(ξ_x) * φ₂(ξ_y) * φ₁(ξ_z)
+    i == 21 && return φ₂(ξ_x) * φ₂(ξ_y) * φ₁(ξ_z)
+    i == 22 && return φ₂(ξ_x) * φ₁(ξ_y) * φ₂(ξ_z)
+    i == 23 && return φ₃(ξ_x) * φ₂(ξ_y) * φ₂(ξ_z)
+    i == 24 && return φ₂(ξ_x) * φ₃(ξ_y) * φ₂(ξ_z)
+    i == 25 && return φ₁(ξ_x) * φ₂(ξ_y) * φ₂(ξ_z)
     i == 26 && return φ₂(ξ_x) * φ₂(ξ_y) * φ₃(ξ_z)
     # interior
     i == 27 && return φ₂(ξ_x) * φ₂(ξ_y) * φ₂(ξ_z)

From 49852a9afd0d56752485562c8aa0628db684af79 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 21:18:22 +0200
Subject: [PATCH 071/124] Update distributed assembly example.

---
 docs/src/literate/distributed_assembly.jl | 35 +++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 8c2fa9fc2c..fa81cc1151 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -23,14 +23,22 @@ MPI.Init()
 
 # We start generating a simple grid with 20x20 quadrilateral elements
 # and distribute it across our processors using `generate_distributed_grid`. 
-dgrid = generate_distributed_grid(Quadrilateral, (20, 20));
+# dgrid = generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
+# dgrid = generate_distributed_grid(Tetrahedron, (2, 2, 2));
+dgrid = generate_distributed_grid(Hexahedron, (2, 2, 2)); #src
+# dgrid = generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
 
 # ### Trial and test functions
 # Nothing changes here.
 dim = 2
-ip = Lagrange{dim, RefCube, 1}()
-ip_geo = Lagrange{dim, RefCube, 1}()
-qr = QuadratureRule{dim, RefCube}(3)
+dim = 3 #src
+ref = RefCube
+# ref = RefTetrahedron #src
+ip = Lagrange{dim, ref, 1}()
+ip = Lagrange{dim, ref, 2}() #src
+ip_geo = Lagrange{dim, ref, 1}()
+qr = QuadratureRule{dim, ref}(2)
+qr = QuadratureRule{dim, ref}(4) #src
 cellvalues = CellScalarValues(qr, ip, ip_geo);
 
 # ### Degrees of freedom
@@ -44,11 +52,17 @@ close!(dh);
 # Nothing has to be changed here either.
 ch = ConstraintHandler(dh);
 ∂Ω = union(getfaceset.((getlocalgrid(dgrid), ), ["left", "right", "top", "bottom"])...);
-dbc = Dirichlet(:u, ∂Ω, (x, t) -> 1)
+∂Ω = union(getfaceset.((getlocalgrid(dgrid), ), ["left", "right", "top", "bottom", "front", "back"])...); #src
+dbc = Dirichlet(:u, ∂Ω, (x, t) -> 0)
+dbc_val = 0 #src
+dbc = Dirichlet(:u, ∂Ω, (x, t) -> dbc_val) #src
 add!(ch, dbc);
 close!(ch)
 update!(ch, 0.0);
 
+my_rank = MPI.Comm_rank(MPI.COMM_WORLD)
+println("R$my_rank: prescribing $(ch.prescribed_dofs) on $∂Ω")
+
 # ### Assembling the linear system
 # Assembling the system works also mostly analogue.
 function doassemble(cellvalues::CellScalarValues{dim}, dh::DistributedDofHandler) where {dim}
@@ -137,9 +151,18 @@ vtk_grid("heat_equation_distributed", dh) do vtk
     # the visualization with some meta  information about 
     # the grid and its partitioning
     vtk_shared_vertices(vtk, dgrid)
+    vtk_shared_faces(vtk, dgrid)
+    vtk_shared_edges(vtk, dgrid) #src
     vtk_partitioning(vtk, dgrid)
 end
 
+map_parts(local_view(u, u.rows)) do u_local
+    my_rank = MPI.Comm_rank(MPI.COMM_WORLD)
+    open("solution-$(my_rank)","w") do io
+        println(io, u_local)
+    end
+end
+
 ## Test the result against the manufactured solution                    #src
 using Test                                                              #src
 for cell in CellIterator(dh)                                            #src
@@ -151,7 +174,7 @@ for cell in CellIterator(dh)                                            #src
         for q_point in 1:getnquadpoints(cellvalues)                     #src
             x = spatial_coordinate(cellvalues, q_point, coords)         #src
             for i in 1:n_basefuncs                                      #src
-                uₐₙₐ    = prod(cos, x*π/2)+1.0                          #src
+                uₐₙₐ    = prod(cos, x*π/2)+dbc_val                      #src
                 uₐₚₚᵣₒₓ = function_value(cellvalues, q_point, uₑ)       #src
                 @test isapprox(uₐₙₐ, uₐₚₚᵣₒₓ; atol=1e-1)                #src
             end                                                         #src

From 773528cd51b313be507fd375a8517dcdb106635b Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 12 Oct 2022 21:47:32 +0200
Subject: [PATCH 072/124] Allow empty Dirichlet sets.

---
 src/Dofs/ConstraintHandler.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Dofs/ConstraintHandler.jl b/src/Dofs/ConstraintHandler.jl
index ce0180900a..e52006747a 100644
--- a/src/Dofs/ConstraintHandler.jl
+++ b/src/Dofs/ConstraintHandler.jl
@@ -360,6 +360,7 @@ end
 function _update!(inhomogeneities::Vector{Float64}, f::Function, faces::Set{<:BoundaryIndex}, field::Symbol, local_face_dofs::Vector{Int}, local_face_dofs_offset::Vector{Int},
                   components::Vector{Int}, dh::AbstractDofHandler, facevalues::BCValues,
                   dofmapping::Dict{Int,Int}, time::T) where {T}
+    length(faces) == 0 && return
 
     dim = getdim(getgrid(dh))
     _tmp_cellid = first(faces)[1]
@@ -401,6 +402,8 @@ end
 function _update!(inhomogeneities::Vector{Float64}, f::Function, nodes::Set{Int}, field::Symbol, nodeidxs::Vector{Int}, globaldofs::Vector{Int},
                   components::Vector{Int}, dh::AbstractDofHandler, facevalues::BCValues,
                   dofmapping::Dict{Int,Int}, time::Float64)
+    length(nodes) == 0 && return
+
     counter = 1
     for (idx, nodenumber) in enumerate(nodeidxs)
         x = getgrid(dh).nodes[nodenumber].x

From 9c9042bf17fa07599df6a72dbb114692d82772b9 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <termi-official@users.noreply.github.com>
Date: Fri, 14 Oct 2022 06:17:28 +0200
Subject: [PATCH 073/124] Metis type.

Co-authored-by: Fredrik Ekre <ekrefredrik@gmail.com>
---
 src/Grid/DistributedGrid.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index ffb162b5eb..85c96c842a 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -92,7 +92,7 @@ function create_partitioning(grid::Grid{dim,C,T}, grid_topology::ExclusiveTopolo
     @assert N > 0
     
     if n_partitions == 1
-        return ones(N)
+        return ones(Metis.idx_t, N)
     end
 
     # Set up the element connectivity graph

From a2e65c89b661a0a8b85608c2995d8f53f8b5af58 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 19 Oct 2022 00:45:26 +0200
Subject: [PATCH 074/124] Fetch empty partitions.

---
 src/Grid/DistributedGrid.jl | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 85c96c842a..bbdad19fc2 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -88,18 +88,18 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}; grid_comm::MPI.Comm
 end
 
 function create_partitioning(grid::Grid{dim,C,T}, grid_topology::ExclusiveTopology, n_partitions, partition_alg) where {dim,C,T}
-    N = getncells(grid)
-    @assert N > 0
+    n_cells_global = getncells(grid)
+    @assert n_cells_global > 0
     
     if n_partitions == 1
-        return ones(Metis.idx_t, N)
+        return ones(Metis.idx_t, n_cells_global)
     end
 
     # Set up the element connectivity graph
-    xadj = Vector{Metis.idx_t}(undef, N+1)
+    xadj = Vector{Metis.idx_t}(undef, n_cells_global+1)
     xadj[1] = 1
     adjncy = Vector{Metis.idx_t}(undef, 0)
-    @inbounds for i in 1:N
+    @inbounds for i in 1:n_cells_global
         n_neighbors = 0
         for neighbor ∈ getneighborhood(grid_topology, grid, CellIndex(i))
             push!(adjncy, neighbor)
@@ -111,7 +111,7 @@ function create_partitioning(grid::Grid{dim,C,T}, grid_topology::ExclusiveTopolo
     # Generate a partitioning
     return Metis.partition(
         Metis.Graph(
-            Metis.idx_t(N),
+            Metis.idx_t(n_cells_global),
             xadj,
             adjncy
         ),
@@ -123,8 +123,8 @@ end
 """
 """
 function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
-    N = getncells(grid_to_distribute)
-    @assert N > 0
+    n_cells_global = getncells(grid_to_distribute)
+    @assert n_cells_global > 0
 
     parts = create_partitioning(grid_to_distribute, grid_topology, MPI.Comm_size(grid_comm), partition_alg)
 
@@ -134,14 +134,15 @@ end
 """
 """    
 function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts::Vector{Int32}) where {dim,C,T}
-    N = getncells(grid_to_distribute)
-    @assert N > 0
+    n_cells_global = getncells(grid_to_distribute)
+    @assert n_cells_global > 0 # Empty input mesh...
 
     my_rank = MPI.Comm_rank(grid_comm)+1
 
     # Start extraction of local grid
     # 1. Extract local cells
-    local_cells = getcells(grid_to_distribute)[[i for i ∈ 1:N if parts[i] == my_rank]]
+    local_cells = getcells(grid_to_distribute)[[i for i ∈ 1:n_cells_global if parts[i] == my_rank]]
+    @assert length(local_cells) > 0 # Cannot handle empty partitions yet
 
     # 2. Find unique nodes
     local_node_index_set = Set{Int}()
@@ -181,7 +182,7 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     for rank ∈ 1:MPI.Comm_size(grid_comm)
         global_to_local_cell_map[rank] = Dict{Int,Int}()
         next_local_cell_idx = 1
-        for global_cell_idx ∈ 1:N
+        for global_cell_idx ∈ 1:n_cells_global
             if parts[global_cell_idx] == rank
                 global_to_local_cell_map[rank][global_cell_idx] = next_local_cell_idx
                 next_local_cell_idx += 1

From c55abb59e1bb406b23f3a2772708570682c4c1b1 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 19 Oct 2022 01:28:08 +0200
Subject: [PATCH 075/124] Fix set accessors.

---
 docs/src/literate/distributed_assembly.jl |  4 ++--
 src/Grid/DistributedGrid.jl               | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index fa81cc1151..7a8a09adc5 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -51,8 +51,8 @@ close!(dh);
 # ### Boundary conditions
 # Nothing has to be changed here either.
 ch = ConstraintHandler(dh);
-∂Ω = union(getfaceset.((getlocalgrid(dgrid), ), ["left", "right", "top", "bottom"])...);
-∂Ω = union(getfaceset.((getlocalgrid(dgrid), ), ["left", "right", "top", "bottom", "front", "back"])...); #src
+∂Ω = union(getfaceset.((dgrid, ), ["left", "right", "top", "bottom"])...);
+∂Ω = union(getfaceset.((dgrid, ), ["left", "right", "top", "bottom", "front", "back"])...); #src
 dbc = Dirichlet(:u, ∂Ω, (x, t) -> 0)
 dbc_val = 0 #src
 dbc = Dirichlet(:u, ∂Ω, (x, t) -> dbc_val) #src
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index bbdad19fc2..4ad75ee82d 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -369,3 +369,18 @@ function compute_owner(dgrid::AbstractDistributedGrid, shared_entity::SharedEnti
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1 # Shift rank up by 1 to match Julia's indexing convention
     return minimum([my_rank; [remote_rank for (remote_rank, _) ∈ remote_entities(shared_entity)]])
 end
+
+@inline getcellset(grid::AbstractDistributedGrid, setname::String) = getcellset(getlocalgrid(grid), setname)
+@inline getcellsets(grid::AbstractDistributedGrid) = getcellsets(getlocalgrid(grid))
+
+@inline getnodeset(grid::AbstractDistributedGrid, setname::String) = getnodeset(getlocalgrid(grid), setname)
+@inline getnodesets(grid::AbstractDistributedGrid) = getnodeset(getlocalgrid(grid), setname)
+
+@inline getfaceset(grid::AbstractDistributedGrid, setname::String) = getfaceset(getlocalgrid(grid), setname)
+@inline getfacesets(grid::AbstractDistributedGrid) = getfaceset(getlocalgrid(grid), setname)
+
+@inline getedgeset(grid::AbstractDistributedGrid, setname::String) = getedgeset(getlocalgrid(grid), setname)
+@inline getedgesets(grid::AbstractDistributedGrid) = getedgeset(getlocalgrid(grid), setname)
+
+@inline getvertexset(grid::AbstractDistributedGrid, setname::String) = getvertexset(getlocalgrid(grid), setname)
+@inline getvertexsets(grid::AbstractDistributedGrid) = getvertexset(getlocalgrid(grid), setname)

From 2c4152ca74a0b58951402e917b443b5aa14cb8c4 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 19 Oct 2022 02:35:15 +0200
Subject: [PATCH 076/124] Fix vector assembly.

---
 src/Dofs/DistributedDofHandler.jl | 74 ++++++++++++++++++++-----------
 src/assembler.jl                  | 29 +++++++-----
 2 files changed, 65 insertions(+), 38 deletions(-)

diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 06994c3f7a..496e4ff859 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -90,7 +90,9 @@ function compute_dof_ownership(dh)
             vi = toglobal(dgrid, lvi)
             if has_vertex_dofs(dh, field_idx, vi)
                 local_dof_idx = vertex_dofs(dh, field_idx, vi)
-                dof_owner[local_dof_idx] = compute_owner(dgrid, sv)
+                for d in 1:dh.field_dims[field_idx]
+                    dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, sv)
+                end
             end
         end
     end
@@ -100,7 +102,9 @@ function compute_dof_ownership(dh)
             fi = toglobal(dgrid, lfi)
             if has_face_dofs(dh, field_idx, fi)
                 local_dof_idx = face_dofs(dh, field_idx, fi)
-                dof_owner[local_dof_idx] = compute_owner(dgrid, sf)
+                for d in 1:dh.field_dims[field_idx]
+                    dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, sf)
+                end
             end
         end
     end
@@ -110,7 +114,9 @@ function compute_dof_ownership(dh)
             ei = toglobal(dgrid, lei)
             if has_edge_dofs(dh, field_idx, ei)
                 local_dof_idx = edge_dofs(dh, field_idx, ei)
-                dof_owner[local_dof_idx] = compute_owner(dgrid, se)
+                for d in 1:dh.field_dims[field_idx]
+                    dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, se)
+                end
             end
         end
     end
@@ -182,11 +188,15 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         # Update dof assignment
                         dof_local_idx = dh.vertexdicts[field_idx][vertex]
                         if local_to_global[dof_local_idx] == 0
-                            @debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
-                            local_to_global[dof_local_idx] = next_local_idx
-                            next_local_idx += 1
+                            for d in 1:dh.field_dims[field_idx]
+                                @debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
+                                local_to_global[dof_local_idx+d-1] = next_local_idx
+                                next_local_idx += 1
+                            end
                         else
-                            @debug println("      vertex dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
+                            for d in 1:dh.field_dims[field_idx]
+                                @debug println("      vertex dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                            end
                         end
                     end
 
@@ -229,11 +239,15 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                             # Update dof assignment
                             dof_local_idx = dh.edgedicts[field_idx][toglobal(getlocalgrid(dgrid), lei)][1]
                             if local_to_global[dof_local_idx] == 0
-                                @debug println("      mapping edge dof#$dof_local_idx to $next_local_idx (R$my_rank)")
-                                local_to_global[dof_local_idx] = next_local_idx
-                                next_local_idx += 1
+                                for d in 1:dh.field_dims[field_idx]
+                                    @debug println("      mapping edge dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
+                                    local_to_global[dof_local_idx+d-1] = next_local_idx
+                                    next_local_idx += 1
+                                end
                             else
-                                @debug println("      edge dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
+                                for d in 1:dh.field_dims[field_idx]
+                                    @debug println("      edge dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                                end
                             end
                         end
 
@@ -275,11 +289,15 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         # Update dof assignment
                         dof_local_idx = dh.facedicts[field_idx][toglobal(getlocalgrid(dgrid), lfi)]
                         if local_to_global[dof_local_idx] == 0
-                            @debug println("      mapping face dof#$dof_local_idx to $next_local_idx (R$my_rank)")
-                            local_to_global[dof_local_idx] = next_local_idx
-                            next_local_idx += 1
+                            for d in 1:dh.field_dims[field_idx]
+                                @debug println("      mapping face dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
+                                local_to_global[dof_local_idx+d-1] = next_local_idx
+                                next_local_idx += 1
+                            end
                         else
-                            @debug println("      face dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
+                            for d in 1:dh.field_dims[field_idx]
+                                @debug println("      face dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                            end
                         end
                     end
 
@@ -315,16 +333,18 @@ function local_to_global_numbering(dh::DistributedDofHandler)
             if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
                 @debug println("    cell#$ci")
                 for celldof in 1:interpolation_info.ncelldofs
-                    for d in 1:dh.field_dims[field_idx]
-                        # Update dof assignment
-                        dof_local_idx = dh.celldicts[field_idx][ci][celldof]
-                        if local_to_global[dof_local_idx] == 0
-                            @debug println("      mapping cell dof#$dof_local_idx to $next_local_idx (R$my_rank)")
-                            local_to_global[dof_local_idx] = next_local_idx
+                    # Update dof assignment
+                    dof_local_idx = dh.celldicts[field_idx][ci][celldof]
+                    if local_to_global[dof_local_idx+d-1] == 0
+                        for d in 1:dh.field_dims[field_idx]
+                            @debug println("      mapping cell dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
+                            local_to_global[dof_local_idx+d-1] = next_local_idx
                             next_local_idx += 1
-                        else
+                        end
+                    else
+                        for d in 1:dh.field_dims[field_idx]
                             # Should never happen...
-                            @debug println("      cell dof#$dof_local_idx already mapped to $(local_to_global[dof_local_idx]) (R$my_rank)")
+                            @debug println("      WARNING! cell dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
                         end
                     end
                 end # cell loop
@@ -374,7 +394,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         @assert haskey(sv.remote_vertices, remote_rank)
                         for (cvi, llvi) ∈ sv.remote_vertices[remote_rank][1:1] # Just don't ask :)
                             remote_cells[next_buffer_idx] = cvi
-                            remote_cell_vis[next_buffer_idx] = llvi 
+                            remote_cell_vis[next_buffer_idx] = llvi
                             next_buffer_idx += 1
                         end
                     end
@@ -499,8 +519,10 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
                         vi = vertices(getcells(getgrid(dh),lci))[lclvi]
                         if haskey(dh.vertexdicts[field_idx], vi)
-                            local_to_global[dh.vertexdicts[field_idx][vi]] = corresponding_global_dofs[cdi]
-                            @debug println("  Updating field $(dh.field_names[field_idx]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                            for d in 1:dh.field_dims[field_idx]
+                                local_to_global[dh.vertexdicts[field_idx][vi]+d-1] = corresponding_global_dofs[cdi]+d-1
+                                @debug println("  Updating field $(dh.field_names[field_idx]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]+d-1) (R$my_rank)")
+                            end
                         else
                             @debug println("  Skipping recv on field $(dh.field_names[field_idx]) vertex $vi (R$my_rank)")
                         end
diff --git a/src/assembler.jl b/src/assembler.jl
index e2da208bf9..56d2937cbe 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -371,7 +371,7 @@ struct PartitionedArraysCOOAssembler{T}
         #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
         ghost_dof_to_send = [Int[] for i ∈ 1:destination_len] # global dof id
         ghost_rank_to_send = [Int[] for i ∈ 1:destination_len] # rank of dof
-        # ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
+        ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
         ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
         ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
         for (pivot_vertex, pivot_shared_vertex) ∈ dgrid.shared_vertices
@@ -400,7 +400,7 @@ struct PartitionedArraysCOOAssembler{T}
                         append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof])
                         append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
                         append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
-                        # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                        append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
                     end
                 end
             end
@@ -434,7 +434,7 @@ struct PartitionedArraysCOOAssembler{T}
                             append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_face_dof])
                             append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
                             append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
-                            # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
                         end
                     end
                 end
@@ -466,7 +466,7 @@ struct PartitionedArraysCOOAssembler{T}
                             append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dof])
                             append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
                             append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
-                            # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
                         end
                     end
                 end
@@ -486,9 +486,9 @@ struct PartitionedArraysCOOAssembler{T}
         ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
         MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), vertex_comm(dgrid))
 
-        # ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
-        # ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
-        # MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+        ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
+        ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
 
         ghost_send_buffer_ranks = vcat(ghost_rank_to_send...)
         ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
@@ -532,7 +532,7 @@ struct PartitionedArraysCOOAssembler{T}
         f = PartitionedArrays.PVector(0.0,rows)
         @debug println("f constructed (R$my_rank)")
 
-        👻remotes = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks)
+        👻remotes = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks,ghost_recv_buffer_fields)
         @debug println("👻remotes $👻remotes (R$my_rank)")
 
         return new(I, J, V, cols, rows, f, 👻remotes, dh)
@@ -569,10 +569,15 @@ function end_assemble(assembler::PartitionedArraysCOOAssembler{T}) where {T}
 
     # Fix ghost layer 👻! Note that the locations for remote processes to write their
     # data into are missing up to this point.
-    for (i, (pivot_dof, global_ghost_dof, ghost_owner_rank)) ∈ enumerate(assembler.👻remotes)
-        push!(I, pivot_dof)
-        push!(J, global_ghost_dof)
-        push!(V, 0.0)
+    # TODO here still the interaction between fields is missing...
+    for (i, (pivot_dof, global_ghost_dof, ghost_owner_rank, ghost_field_idx)) ∈ enumerate(assembler.👻remotes)
+        for dᵢ ∈ 1:assembler.dh.field_dims[ghost_field_idx]
+            for dⱼ ∈ 1:assembler.dh.field_dims[ghost_field_idx]
+                push!(I, pivot_dof+dᵢ-1)
+                push!(J, global_ghost_dof+dⱼ-1)
+                push!(V, 0.0)
+            end
+        end
     end
 
     @debug println("I=$(I) (R$my_rank)")

From a11493378b8dbba173d51b7ad8b43e6ea4c6df82 Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Wed, 19 Oct 2022 13:13:36 +0200
Subject: [PATCH 077/124] Update manifest

---
 docs/Manifest.toml | 115 ++++++++++++++++++++++++++++-----------------
 1 file changed, 73 insertions(+), 42 deletions(-)

diff --git a/docs/Manifest.toml b/docs/Manifest.toml
index e26bd13eaf..b08369986d 100644
--- a/docs/Manifest.toml
+++ b/docs/Manifest.toml
@@ -1,8 +1,8 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.7.2"
+julia_version = "1.8.2"
 manifest_format = "2.0"
-project_hash = "36aa80ebfd72e2016135d6d7b3122eb6efdc74ea"
+project_hash = "aa7aaa4a7e0a82eb77e2b1e6ab54d762b5e9cff6"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
@@ -17,6 +17,7 @@ version = "3.4.0"
 
 [[deps.ArgTools]]
 uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.1"
 
 [[deps.ArnoldiMethod]]
 deps = ["LinearAlgebra", "Random", "StaticArrays"]
@@ -32,9 +33,9 @@ version = "6.0.23"
 
 [[deps.ArrayInterfaceCore]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "5bb0f8292405a516880a3809954cb832ae7a31c5"
+git-tree-sha1 = "e9f7992287edfc27b3cbe0046c544bace004ca5b"
 uuid = "30b0a656-2188-435a-8636-2ec0e6a096e2"
-version = "0.1.20"
+version = "0.1.22"
 
 [[deps.ArrayInterfaceGPUArrays]]
 deps = ["Adapt", "ArrayInterfaceCore", "GPUArraysCore", "LinearAlgebra"]
@@ -56,9 +57,9 @@ version = "0.1.4"
 
 [[deps.ArrayInterfaceStaticArraysCore]]
 deps = ["Adapt", "ArrayInterfaceCore", "LinearAlgebra", "StaticArraysCore"]
-git-tree-sha1 = "a1e2cf6ced6505cbad2490532388683f1e88c3ed"
+git-tree-sha1 = "93c8ba53d8d26e124a5a8d4ec914c3a16e6a0970"
 uuid = "dd5226c6-a4d4-4bc7-8575-46859f9c95b9"
-version = "0.1.0"
+version = "0.1.3"
 
 [[deps.ArrayLayouts]]
 deps = ["FillArrays", "LinearAlgebra", "SparseArrays"]
@@ -85,9 +86,9 @@ version = "0.1.4"
 
 [[deps.BlockArrays]]
 deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra"]
-git-tree-sha1 = "0c0dd27be59bc76a3da6243d8172aeedd6420037"
+git-tree-sha1 = "f9f6d3f5e6ac9d78e461c183bfe0945db679f514"
 uuid = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
-version = "0.16.20"
+version = "0.16.21"
 
 [[deps.Bzip2_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -181,6 +182,7 @@ version = "4.3.0"
 [[deps.CompilerSupportLibraries_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "0.5.2+0"
 
 [[deps.ConstructionBase]]
 deps = ["LinearAlgebra"]
@@ -243,9 +245,9 @@ version = "1.1.0"
 
 [[deps.DiffRules]]
 deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "992a23afdb109d0d2f8802a30cf5ae4b1fe7ea68"
+git-tree-sha1 = "8b7a4d23e22f5d44883671da70865ca98f2ebf9d"
 uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.11.1"
+version = "1.12.0"
 
 [[deps.Distances]]
 deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"]
@@ -276,8 +278,9 @@ uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 version = "0.27.23"
 
 [[deps.Downloads]]
-deps = ["ArgTools", "LibCURL", "NetworkOptions"]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
 uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
 
 [[deps.DualNumbers]]
 deps = ["Calculus", "NaNMath", "SpecialFunctions"]
@@ -360,11 +363,14 @@ git-tree-sha1 = "0d698039c5a670d88a5f058d60b4245ad22fd713"
 uuid = "0f8c756f-80dd-4a75-85c6-b0a5ab9d4620"
 version = "0.1.3"
 
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+
 [[deps.FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
-git-tree-sha1 = "87519eb762f85534445f5cda35be12e32759ee14"
+git-tree-sha1 = "802bfc139833d2ba893dd9e62ba1767c88d708ae"
 uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.13.4"
+version = "0.13.5"
 
 [[deps.FiniteDiff]]
 deps = ["ArrayInterfaceCore", "LinearAlgebra", "Requires", "Setfield", "SparseArrays", "StaticArrays"]
@@ -438,6 +444,7 @@ version = "9.0.1+0"
 [[deps.GMP_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+version = "6.2.1+2"
 
 [[deps.GPUArraysCore]]
 deps = ["Adapt"]
@@ -506,9 +513,9 @@ version = "1.12.2+2"
 
 [[deps.HTTP]]
 deps = ["Base64", "CodecZlib", "Dates", "IniFile", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
-git-tree-sha1 = "e8c58d5f03b9d9eb9ed7067a2f34c7c371ab130b"
+git-tree-sha1 = "3cdd8948c55d8b53b5323f23c9581555dc2e30e1"
 uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
-version = "1.4.1"
+version = "1.5.0"
 
 [[deps.HarfBuzz_jll]]
 deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"]
@@ -654,9 +661,9 @@ version = "0.15.17"
 
 [[deps.LayoutPointers]]
 deps = ["ArrayInterface", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"]
-git-tree-sha1 = "b67e749fb35530979839e7b4b606a97105fe4f1c"
+git-tree-sha1 = "73e2e40eb02d6ccd191a8a9f8cee20db8d5df010"
 uuid = "10f19ff3-798f-405d-979b-55457f8fc047"
-version = "0.1.10"
+version = "0.1.11"
 
 [[deps.LazyArtifacts]]
 deps = ["Artifacts", "Pkg"]
@@ -665,10 +672,12 @@ uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
 [[deps.LibCURL]]
 deps = ["LibCURL_jll", "MozillaCACerts_jll"]
 uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.3"
 
 [[deps.LibCURL_jll]]
 deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "7.84.0+0"
 
 [[deps.LibGit2]]
 deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
@@ -677,6 +686,7 @@ uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
 [[deps.LibSSH2_jll]]
 deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
 uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.10.2+0"
 
 [[deps.Libdl]]
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
@@ -758,10 +768,10 @@ uuid = "18c40d15-f7cd-5a6d-bc92-87468d86c5db"
 version = "5.0.0+0"
 
 [[deps.LinearSolve]]
-deps = ["ArrayInterfaceCore", "DocStringExtensions", "FastLapackInterface", "GPUArraysCore", "IterativeSolvers", "KLU", "Krylov", "KrylovKit", "LinearAlgebra", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "SparseArrays", "SuiteSparse", "UnPack"]
-git-tree-sha1 = "92cc95b66f1459d230af9e67089eeeea6c6b2ee9"
+deps = ["ArrayInterfaceCore", "DocStringExtensions", "FastLapackInterface", "GPUArraysCore", "IterativeSolvers", "KLU", "Krylov", "KrylovKit", "LinearAlgebra", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "SnoopPrecompile", "SparseArrays", "SuiteSparse", "UnPack"]
+git-tree-sha1 = "70db49cbaec1cdf4def39c4ac51a3abe56b2e421"
 uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
-version = "1.23.0"
+version = "1.27.0"
 
 [[deps.Literate]]
 deps = ["Base64", "IOCapture", "JSON", "REPL"]
@@ -792,9 +802,9 @@ version = "0.4.9"
 
 [[deps.LoopVectorization]]
 deps = ["ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "CPUSummary", "ChainRulesCore", "CloseOpenIntervals", "DocStringExtensions", "ForwardDiff", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDDualNumbers", "SIMDTypes", "SLEEFPirates", "SnoopPrecompile", "SpecialFunctions", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
-git-tree-sha1 = "39af6a1e398a29f568dc9fe469f459ad3aacb03b"
+git-tree-sha1 = "9f6030ca92d1a816e931abb657219c9fc4991a96"
 uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
-version = "0.12.133"
+version = "0.12.136"
 
 [[deps.METIS_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -861,6 +871,7 @@ version = "1.1.6"
 [[deps.MbedTLS_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.0+0"
 
 [[deps.Measures]]
 git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f"
@@ -890,6 +901,7 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 
 [[deps.MozillaCACerts_jll]]
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2022.2.1"
 
 [[deps.MuladdMacro]]
 git-tree-sha1 = "c6190f9a7fc5d9d5915ab29f2134421b12d24a68"
@@ -922,6 +934,7 @@ version = "0.4.12"
 
 [[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
 
 [[deps.NonlinearSolve]]
 deps = ["ArrayInterfaceCore", "FiniteDiff", "ForwardDiff", "IterativeSolvers", "LinearAlgebra", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "StaticArrays", "UnPack"]
@@ -950,10 +963,12 @@ version = "1.3.5+1"
 [[deps.OpenBLAS_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+version = "0.3.20+0"
 
 [[deps.OpenLibm_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+version = "0.8.1+0"
 
 [[deps.OpenMPI_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"]
@@ -963,15 +978,15 @@ version = "4.1.3+3"
 
 [[deps.OpenSSL]]
 deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"]
-git-tree-sha1 = "ebe81469e9d7b471d7ddb611d9e147ea16de0add"
+git-tree-sha1 = "3c3c4a401d267b04942545b1e964a20279587fd7"
 uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c"
-version = "1.2.1"
+version = "1.3.0"
 
 [[deps.OpenSSL_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "a94dc0169bffbf7e5250fb7e1efb1a85b09105c7"
+git-tree-sha1 = "e60321e3f2616584ff98f0a4f18d98ae6f89bbb3"
 uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
-version = "1.1.18+0"
+version = "1.1.17+0"
 
 [[deps.OpenSpecFun_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
@@ -997,14 +1012,15 @@ uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 version = "1.4.1"
 
 [[deps.OrdinaryDiffEq]]
-deps = ["Adapt", "ArrayInterface", "ArrayInterfaceGPUArrays", "ArrayInterfaceStaticArrays", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
-git-tree-sha1 = "33a819c1355faeccc68d57a3c7d7c871680d49f2"
+deps = ["Adapt", "ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceGPUArrays", "ArrayInterfaceStaticArrays", "ArrayInterfaceStaticArraysCore", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
+git-tree-sha1 = "88b3bc390fe76e559bef97b6abe55e8d3a440a56"
 uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
-version = "6.28.1"
+version = "6.29.3"
 
 [[deps.PCRE2_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.40.0+0"
 
 [[deps.PDMats]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
@@ -1020,9 +1036,9 @@ version = "0.12.3"
 
 [[deps.Parsers]]
 deps = ["Dates"]
-git-tree-sha1 = "595c0b811cf2bab8b0849a70d9bd6379cc1cfb52"
+git-tree-sha1 = "6c01a9b494f6d2a9fc180a08b182fcb06f0958a0"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.4.1"
+version = "2.4.2"
 
 [[deps.PartitionedArrays]]
 deps = ["Distances", "IterativeSolvers", "LinearAlgebra", "MPI", "Printf", "SparseArrays", "SparseMatricesCSR"]
@@ -1044,6 +1060,7 @@ version = "0.40.1+0"
 [[deps.Pkg]]
 deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
 uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.8.0"
 
 [[deps.PlotThemes]]
 deps = ["PlotUtils", "Statistics"]
@@ -1059,9 +1076,9 @@ version = "1.3.1"
 
 [[deps.Plots]]
 deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "JLFzf", "JSON", "LaTeXStrings", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "RelocatableFolders", "Requires", "Scratch", "Showoff", "SnoopPrecompile", "SparseArrays", "Statistics", "StatsBase", "UUIDs", "UnicodeFun", "Unzip"]
-git-tree-sha1 = "524d9ff1b2f4473fef59678c06f9f77160a204b1"
+git-tree-sha1 = "041704a5182f25cdcbb1369f13d9d9f94a86b5fd"
 uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-version = "1.35.3"
+version = "1.35.4"
 
 [[deps.Polyester]]
 deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"]
@@ -1125,9 +1142,9 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [[deps.RecipesBase]]
 deps = ["SnoopPrecompile"]
-git-tree-sha1 = "612a4d76ad98e9722c8ba387614539155a59e30c"
+git-tree-sha1 = "d12e612bba40d189cead6ff857ddb67bd2e6a387"
 uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-version = "1.3.0"
+version = "1.3.1"
 
 [[deps.RecipesPipeline]]
 deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase", "SnoopPrecompile"]
@@ -1176,6 +1193,12 @@ git-tree-sha1 = "68db32dff12bb6127bac73c209881191bf0efbb7"
 uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f"
 version = "0.3.0+0"
 
+[[deps.RuntimeGeneratedFunctions]]
+deps = ["ExprTools", "SHA", "Serialization"]
+git-tree-sha1 = "cdc1e4278e91a6ad530770ebb327f9ed83cf10c4"
+uuid = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
+version = "0.5.3"
+
 [[deps.SCOTCH_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
 git-tree-sha1 = "7110b749766853054ce8a2afaa73325d72d32129"
@@ -1184,6 +1207,7 @@ version = "6.1.3+0"
 
 [[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
 
 [[deps.SIMD]]
 git-tree-sha1 = "7dbc15af7ed5f751a82bf3ed37757adf76c32402"
@@ -1208,10 +1232,10 @@ uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
 version = "0.6.36"
 
 [[deps.SciMLBase]]
-deps = ["ArrayInterfaceCore", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Preferences", "RecipesBase", "RecursiveArrayTools", "StaticArraysCore", "Statistics", "Tables"]
-git-tree-sha1 = "5227af27f04ad30a68e2bb48300bf1b1a965d145"
+deps = ["ArrayInterfaceCore", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Preferences", "RecipesBase", "RecursiveArrayTools", "RuntimeGeneratedFunctions", "StaticArraysCore", "Statistics", "Tables"]
+git-tree-sha1 = "d41daf11db3383bd979ba00e1590d2f4297ace61"
 uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
-version = "1.61.2"
+version = "1.63.0"
 
 [[deps.Scratch]]
 deps = ["Dates"]
@@ -1337,10 +1361,12 @@ uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 [[deps.SuiteSparse_jll]]
 deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"]
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+version = "5.10.1+0"
 
 [[deps.TOML]]
 deps = ["Dates"]
 uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.0"
 
 [[deps.TableTraits]]
 deps = ["IteratorInterfaceExtensions"]
@@ -1357,6 +1383,7 @@ version = "1.10.0"
 [[deps.Tar]]
 deps = ["ArgTools", "SHA"]
 uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.1"
 
 [[deps.TensorCore]]
 deps = ["LinearAlgebra"]
@@ -1433,9 +1460,9 @@ version = "0.2.0"
 
 [[deps.VectorizationBase]]
 deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"]
-git-tree-sha1 = "3bc5ea8fbf25f233c4c49c0a75f14b276d2f9a69"
+git-tree-sha1 = "866e77ea9c675306652f5b5b9010ccbccc684c79"
 uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
-version = "0.21.51"
+version = "0.21.53"
 
 [[deps.VertexSafeGraphs]]
 deps = ["Graphs"]
@@ -1608,6 +1635,7 @@ version = "1.4.0+3"
 [[deps.Zlib_jll]]
 deps = ["Libdl"]
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.2.12+3"
 
 [[deps.Zstd_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -1629,9 +1657,9 @@ version = "0.29.0+0"
 
 [[deps.gmsh_jll]]
 deps = ["Artifacts", "Cairo_jll", "CompilerSupportLibraries_jll", "FLTK_jll", "FreeType2_jll", "GLU_jll", "GMP_jll", "HDF5_jll", "JLLWrappers", "JpegTurbo_jll", "LLVMOpenMP_jll", "Libdl", "Libglvnd_jll", "METIS_jll", "MMG_jll", "OCCT_jll", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll", "Xorg_libXfixes_jll", "Xorg_libXft_jll", "Xorg_libXinerama_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
-git-tree-sha1 = "9774ebf68348b3b56c74a78b829051310163fd76"
+git-tree-sha1 = "d4cf3bb87fa0669f569e51f6f06cd083771bab65"
 uuid = "630162c2-fc9b-58b3-9910-8442a8a132e6"
-version = "4.10.2+0"
+version = "4.10.2+1"
 
 [[deps.libaom_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -1648,6 +1676,7 @@ version = "0.15.1+0"
 [[deps.libblastrampoline_jll]]
 deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+version = "5.1.1+0"
 
 [[deps.libfdk_aac_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -1670,10 +1699,12 @@ version = "1.3.7+1"
 [[deps.nghttp2_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.48.0+0"
 
 [[deps.p7zip_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.4.0+0"
 
 [[deps.x264_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]

From aa2fb90844ba4a4b368bbeb3ab4ef219c1cb7c1c Mon Sep 17 00:00:00 2001
From: termi-official <termi-official@users.noreply.github.com>
Date: Mon, 24 Oct 2022 21:28:33 +0200
Subject: [PATCH 078/124] Fix distributed assembly for multi-field vectorial
 problems

---
 .../distributed_assembly_plasticity.jl        | 295 ++++++++++++++++++
 src/Dofs/DistributedDofHandler.jl             |  12 +-
 src/assembler.jl                              |  65 ++--
 3 files changed, 341 insertions(+), 31 deletions(-)
 create mode 100644 docs/src/literate/distributed_assembly_plasticity.jl

diff --git a/docs/src/literate/distributed_assembly_plasticity.jl b/docs/src/literate/distributed_assembly_plasticity.jl
new file mode 100644
index 0000000000..9bcf2b6fa6
--- /dev/null
+++ b/docs/src/literate/distributed_assembly_plasticity.jl
@@ -0,0 +1,295 @@
+# # Distributed Assembly of Heat Equation
+#
+# ## Introduction
+#
+# Now we want to solve the heat problem in parallel. To be specific, this example shows
+# how to utilize process parallelism to assemble finite element matrices in parallel.
+# This example presumes that the reader is familiar with solving the heat problem in
+# serial with Ferrite.jl, as presented in [the first example](@ref heat_example).
+#
+#-
+# ## Commented Program
+#
+# Now we solve the problem in Ferrite. What follows is a program spliced with comments.
+#md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
+#
+# First we load Ferrite, and some other packages we need
+using Ferrite, MPI
+using IterativeSolvers #, HYPRE
+using PartitionedArrays #src
+using SparseArrays, BlockArrays
+
+
+function PartitionedArrays.matrix_exchanger(values,row_exchanger,row_lids,col_lids)
+
+    part = PartitionedArrays.get_part_ids(row_lids)
+    parts_rcv = row_exchanger.parts_rcv
+    parts_snd = row_exchanger.parts_snd
+  
+    function setup_rcv(part,parts_rcv,row_lids,col_lids,values)
+      owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_rcv) ))
+      ptrs = zeros(Int32,length(parts_rcv)+1)
+      for (li,lj,v) in nziterator(values)
+        owner = row_lids.lid_to_part[li]
+        if owner != part
+          ptrs[owner_to_i[owner]+1] +=1
+        end
+      end
+      length_to_ptrs!(ptrs)
+      k_rcv_data = zeros(Int,ptrs[end]-1)
+      gi_rcv_data = zeros(Int,ptrs[end]-1)
+      gj_rcv_data = zeros(Int,ptrs[end]-1)
+      for (k,(li,lj,v)) in enumerate(nziterator(values))
+        owner = row_lids.lid_to_part[li]
+        if owner != part
+          p = ptrs[owner_to_i[owner]]
+          k_rcv_data[p] = k
+          gi_rcv_data[p] = row_lids.lid_to_gid[li]
+          gj_rcv_data[p] = col_lids.lid_to_gid[lj]
+          ptrs[owner_to_i[owner]] += 1
+        end
+      end
+      rewind_ptrs!(ptrs)
+      k_rcv = Table(k_rcv_data,ptrs)
+      gi_rcv = Table(gi_rcv_data,ptrs)
+      gj_rcv = Table(gj_rcv_data,ptrs)
+      k_rcv, gi_rcv, gj_rcv
+    end
+  
+    k_rcv, gi_rcv, gj_rcv = PartitionedArrays.map_parts(setup_rcv,part,parts_rcv,row_lids,col_lids,values)
+  
+    gi_snd = PartitionedArrays.exchange(gi_rcv,parts_snd,parts_rcv)
+    gj_snd = PartitionedArrays.exchange(gj_rcv,parts_snd,parts_rcv)
+  
+    function setup_snd(part,row_lids,col_lids,gi_snd,gj_snd,values)
+      ptrs = gi_snd.ptrs
+      k_snd_data = zeros(Int,ptrs[end]-1)
+      for p in 1:length(gi_snd.data)
+        gi = gi_snd.data[p]
+        gj = gj_snd.data[p]
+        li = row_lids.gid_to_lid[gi]
+        lj = col_lids.gid_to_lid[gj]
+        k = nzindex(values,li,lj)
+        PartitionedArrays.@check k > 0 "The sparsity pattern of the ghost layer is inconsistent on part $part | local index ($li,$lj) | global index ($gi, $gj)"
+        k_snd_data[p] = k
+      end
+      k_snd = Table(k_snd_data,ptrs)
+      k_snd
+    end
+  
+    k_snd = map_parts(setup_snd,part,row_lids,col_lids,gi_snd,gj_snd,values)
+  
+    PartitionedArrays.Exchanger(parts_rcv,parts_snd,k_rcv,k_snd)
+  end
+
+# Launch MPI
+MPI.Init()
+# First we generate a simple grid, specifying the 4 corners of Cooks membrane.
+function create_cook_grid(nx, ny)
+    corners = [Vec{2}((0.0,   0.0)),
+               Vec{2}((48.0, 44.0)),
+               Vec{2}((48.0, 60.0)),
+               Vec{2}((0.0,  44.0))]
+    grid = generate_grid(Triangle, (nx, ny), corners);
+    ## facesets for boundary conditions
+    addfaceset!(grid, "clamped", x -> norm(x[1]) ≈ 0.0);
+    addfaceset!(grid, "traction", x -> norm(x[1]) ≈ 48.0);
+    return DistributedGrid(grid)
+end;
+
+# Next we define a function to set up our cell- and facevalues.
+function create_values(interpolation_u, interpolation_p)
+    ## quadrature rules
+    qr      = QuadratureRule{2,RefTetrahedron}(3)
+    face_qr = QuadratureRule{1,RefTetrahedron}(3)
+
+    ## geometric interpolation
+    interpolation_geom = Lagrange{2,RefTetrahedron,1}()
+
+    ## cell and facevalues for u
+    cellvalues_u = CellVectorValues(qr, interpolation_u, interpolation_geom)
+    facevalues_u = FaceVectorValues(face_qr, interpolation_u, interpolation_geom)
+
+    ## cellvalues for p
+    cellvalues_p = CellScalarValues(qr, interpolation_p, interpolation_geom)
+
+    return cellvalues_u, cellvalues_p, facevalues_u
+end;
+
+
+# We create a DofHandler, with two fields, `:u` and `:p`,
+# with possibly different interpolations
+function create_dofhandler(grid, ipu, ipp)
+    dh = DistributedDofHandler(grid)
+    push!(dh, :u, 2, ipu) # displacement
+    push!(dh, :p, 1, ipp) # pressure
+    close!(dh)
+    return dh
+end;
+
+# We also need to add Dirichlet boundary conditions on the `"clamped"` faceset.
+# We specify a homogeneous Dirichlet bc on the displacement field, `:u`.
+function create_bc(dh)
+    dbc = ConstraintHandler(dh)
+    add!(dbc, Dirichlet(:u, getfaceset(dh.grid, "clamped"), (x,t) -> zero(Vec{2}), [1,2]))
+    close!(dbc)
+    t = 0.0
+    update!(dbc, t)
+    return dbc
+end;
+
+# The material is linear elastic, which is here specified by the shear and bulk moduli
+struct LinearElasticity{T}
+    G::T
+    K::T
+end
+
+# Now to the assembling of the stiffness matrix. This mixed formulation leads to a blocked
+# element matrix. Since Ferrite does not force us to use any particular matrix type we will
+# use a `PseudoBlockArray` from `BlockArrays.jl`.
+function doassemble(cellvalues_u::CellVectorValues{dim}, cellvalues_p::CellScalarValues{dim},
+                    facevalues_u::FaceVectorValues{dim}, grid::DistributedGrid,
+                    dh::DistributedDofHandler, mp::LinearElasticity) where {dim}
+
+    assembler = PartitionedArraysCOOAssembler{Float64}(dh)
+    nu = getnbasefunctions(cellvalues_u)
+    np = getnbasefunctions(cellvalues_p)
+
+    fe = PseudoBlockArray(zeros(nu + np), [nu, np]) # local force vector
+    ke = PseudoBlockArray(zeros(nu + np, nu + np), [nu, np], [nu, np]) # local stiffness matrix
+
+    ## traction vector
+    t = Vec{2}((0.0, 1/16))
+    ## cache ɛdev outside the element routine to avoid some unnecessary allocations
+    ɛdev = [zero(SymmetricTensor{2, dim}) for i in 1:getnbasefunctions(cellvalues_u)]
+
+    for cell in CellIterator(dh)
+        fill!(ke, 0)
+        fill!(fe, 0)
+        assemble_up!(ke, fe, cell, cellvalues_u, cellvalues_p, facevalues_u, grid, mp, ɛdev, t)
+        Ferrite.assemble!(assembler, celldofs(cell), fe, ke)
+    end
+
+    return end_assemble(assembler)
+end;
+
+# The element routine integrates the local stiffness and force vector for all elements.
+# Since the problem results in a symmetric matrix we choose to only assemble the lower part,
+# and then symmetrize it after the loop over the quadrature points.
+function assemble_up!(Ke, fe, cell, cellvalues_u, cellvalues_p, facevalues_u, grid, mp, ɛdev, t)
+
+    n_basefuncs_u = getnbasefunctions(cellvalues_u)
+    n_basefuncs_p = getnbasefunctions(cellvalues_p)
+    u▄, p▄ = 1, 2
+    reinit!(cellvalues_u, cell)
+    reinit!(cellvalues_p, cell)
+
+    ## We only assemble lower half triangle of the stiffness matrix and then symmetrize it.
+    @inbounds for q_point in 1:getnquadpoints(cellvalues_u)
+        for i in 1:n_basefuncs_u
+            ɛdev[i] = dev(symmetric(shape_gradient(cellvalues_u, q_point, i)))
+        end
+        dΩ = getdetJdV(cellvalues_u, q_point)
+        for i in 1:n_basefuncs_u
+            divδu = shape_divergence(cellvalues_u, q_point, i)
+            δu = shape_value(cellvalues_u, q_point, i)
+            for j in 1:i
+                Ke[BlockIndex((u▄, u▄), (i, j))] += 2 * mp.G * ɛdev[i] ⊡ ɛdev[j] * dΩ
+            end
+        end
+
+        for i in 1:n_basefuncs_p
+            δp = shape_value(cellvalues_p, q_point, i)
+            for j in 1:n_basefuncs_u
+                divδu = shape_divergence(cellvalues_u, q_point, j)
+                Ke[BlockIndex((p▄, u▄), (i, j))] += -δp * divδu * dΩ
+            end
+            for j in 1:i
+                p = shape_value(cellvalues_p, q_point, j)
+                Ke[BlockIndex((p▄, p▄), (i, j))] += - 1/mp.K * δp * p * dΩ
+            end
+
+        end
+    end
+
+    symmetrize_lower!(Ke)
+
+    ## We integrate the Neumann boundary using the facevalues.
+    ## We loop over all the faces in the cell, then check if the face
+    ## is in our `"traction"` faceset.
+    @inbounds for face in 1:nfaces(cell)
+        if (cellid(cell), face) ∈ getfaceset(grid, "traction")
+            reinit!(facevalues_u, cell, face)
+            for q_point in 1:getnquadpoints(facevalues_u)
+                dΓ = getdetJdV(facevalues_u, q_point)
+                for i in 1:n_basefuncs_u
+                    δu = shape_value(facevalues_u, q_point, i)
+                    fe[i] += (δu ⋅ t) * dΓ
+                end
+            end
+        end
+    end
+end
+
+function symmetrize_lower!(K)
+    for i in 1:size(K,1)
+        for j in i+1:size(K,1)
+            K[i,j] = K[j,i]
+        end
+    end
+end;
+
+
+function solve(ν, interpolation_u, interpolation_p)
+    ## material
+    Emod = 1.
+    Gmod = Emod / 2(1 + ν)
+    Kmod = Emod * ν / ((1+ν) * (1-2ν))
+    mp = LinearElasticity(Gmod, Kmod)
+
+    ## grid, dofhandler, boundary condition
+    #n = 2
+    grid = create_cook_grid(50, 40)
+    dh = create_dofhandler(grid, interpolation_u, interpolation_p)
+    dbc = create_bc(dh)
+    vtk_grid("cook_dgrid", dh) do vtk
+        vtk_partitioning(vtk, grid)
+    end
+    ## cellvalues
+    cellvalues_u, cellvalues_p, facevalues_u = create_values(interpolation_u, interpolation_p)
+
+    ## assembly and solve
+    K, f = doassemble(cellvalues_u, cellvalues_p, facevalues_u, grid, dh, mp);
+    apply!(K, f, dbc)
+    u = cg(K, f);
+
+    ## export
+    filename = "cook_distributed_" * (isa(interpolation_u, Lagrange{2,RefTetrahedron,1}) ? "linear" : "quadratic") *
+                         "_linear"
+    vtk_grid(filename, dh) do vtkfile
+        vtk_point_data(vtkfile, dh, u)
+        vtk_partitioning(vtkfile, grid)
+    end
+    return u
+end
+
+linear    = Lagrange{2,RefTetrahedron,1}()
+quadratic = Lagrange{2,RefTetrahedron,2}()
+u1 = solve(0.4999999, linear, linear);
+u2 = solve(0.4999999, quadratic, linear);
+
+## test the result                 #src
+using Test                         #src
+# @test norm(u2) ≈ 919.2122668839389 #src
+
+# Finally, we gracefully shutdown MPI
+MPI.Finalize()
+
+#md # ## [Plain program](@id distributed-assembly-plain-program)
+#md #
+#md # Here follows a version of the program without any comments.
+#md # The file is also available here: [`distributed_assembly.jl`](distributed_assembly.jl).
+#md #
+#md # ```julia
+#md # @__CODE__
+#md # ```
diff --git a/src/Dofs/DistributedDofHandler.jl b/src/Dofs/DistributedDofHandler.jl
index 496e4ff859..6fbedc04f0 100644
--- a/src/Dofs/DistributedDofHandler.jl
+++ b/src/Dofs/DistributedDofHandler.jl
@@ -547,8 +547,10 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
                         vi = sortface(faces(getcells(getgrid(dh),lci))[lclvi])
                         if haskey(dh.facedicts[field_idx], vi)
-                            local_to_global[dh.facedicts[field_idx][vi]] = corresponding_global_dofs[cdi]
-                            @debug println("  Updating field $(dh.field_names[field_idx]) face $(FaceIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                            for d in 1:dh.field_dims[field_idx]
+                                local_to_global[dh.facedicts[field_idx][vi]+d-1] = corresponding_global_dofs[cdi]+d-1
+                                @debug println("  Updating field $(dh.field_names[field_idx]) face $(FaceIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                            end
                         else
                             @debug println("  Skipping recv on field $(dh.field_names[field_idx]) face $vi (R$my_rank)")
                         end
@@ -579,8 +581,10 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
                         vi = sortedge(edges(getcells(getgrid(dh),lci))[lclvi])[1]
                         if haskey(dh.edgedicts[field_idx], vi)
-                            local_to_global[dh.edgedicts[field_idx][vi][1]] = corresponding_global_dofs[cdi]
-                            @debug println("  Updating field $(dh.field_names[field_idx]) edge $(EdgeIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                            for d in 1:dh.field_dims[field_idx]
+                                local_to_global[dh.edgedicts[field_idx][vi][1]+d-1] = corresponding_global_dofs[cdi]+d-1
+                                @debug println("  Updating field $(dh.field_names[field_idx]) edge $(EdgeIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                            end
                         else
                             @debug println("  Skipping recv on field $(dh.field_names[field_idx]) edge $vi (R$my_rank)")
                         end
diff --git a/src/assembler.jl b/src/assembler.jl
index 56d2937cbe..750d24e2fe 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -391,16 +391,19 @@ struct PartitionedArraysCOOAssembler{T}
                 for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
                     !has_vertex_dofs(dh, field_idx, pivot_vertex_global) && continue
                     pivot_vertex_dof = vertex_dofs(dh, field_idx, pivot_vertex_global)
+                    
+                    for d ∈ 1:dh.field_dims[field_idx]
+                        @debug println("  adding dof $(pivot_vertex_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
 
-                    @debug println("  adding dof $pivot_vertex_dof to ghost sync synchronization on slot $sender_slot (R$my_rank)")
-
-                    # Extract dofs belonging to the current field
-                    cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
-                    for cell_field_dof ∈ cell_field_dofs
-                        append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof])
-                        append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
-                        append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
-                        append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                        # Extract dofs belonging to the current field
+                        #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
+                        #for cell_field_dof ∈ cell_field_dofs
+                        for cell_dof ∈ cell_dofs
+                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof+d-1])
+                            append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
+                            append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
+                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                        end
                     end
                 end
             end
@@ -426,15 +429,18 @@ struct PartitionedArraysCOOAssembler{T}
                         !has_face_dofs(dh, field_idx, pivot_face_global) && continue
                         pivot_face_dof = face_dofs(dh, field_idx, pivot_face_global)
                         
-                        @debug println("  adding dof $pivot_face_dof to ghost sync synchronization on slot $sender_slot (R$my_rank)")
-                        
-                        # Extract dofs belonging to the current field
-                        cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
-                        for cell_field_dof ∈ cell_field_dofs
-                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_face_dof])
-                            append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
-                            append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
-                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                        for d ∈ 1:dh.field_dims[field_idx]
+                            @debug println("  adding dof $(pivot_face_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                            
+                            # Extract dofs belonging to the current field
+                            #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
+                            #for cell_field_dof ∈ cell_field_dofs
+                            for cell_dof ∈ cell_dofs
+                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_face_dof+d-1])
+                                append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
+                                append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
+                                append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                            end
                         end
                     end
                 end
@@ -460,13 +466,18 @@ struct PartitionedArraysCOOAssembler{T}
                     for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
                         !has_edge_dofs(dh, field_idx, pivot_edge_global) && continue
                         pivot_edge_dof = edge_dofs(dh, field_idx, pivot_edge_global)
-                        # Extract dofs belonging to the current field
-                        cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
-                        for cell_field_dof ∈ cell_field_dofs
-                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dof])
-                            append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_field_dof])
-                            append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_field_dof])
-                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+
+                        for d ∈ 1:dh.field_dims[field_idx]
+                            @debug println("  adding dof $(pivot_edge_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                            # Extract dofs belonging to the current field
+                            #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
+                            #for cell_field_dof ∈ cell_field_dofs
+                            for cell_dof ∈ cell_dofs
+                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dof+d-1])
+                                append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
+                                append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
+                                # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                            end
                         end
                     end
                 end
@@ -571,8 +582,8 @@ function end_assemble(assembler::PartitionedArraysCOOAssembler{T}) where {T}
     # data into are missing up to this point.
     # TODO here still the interaction between fields is missing...
     for (i, (pivot_dof, global_ghost_dof, ghost_owner_rank, ghost_field_idx)) ∈ enumerate(assembler.👻remotes)
-        for dᵢ ∈ 1:assembler.dh.field_dims[ghost_field_idx]
-            for dⱼ ∈ 1:assembler.dh.field_dims[ghost_field_idx]
+        for dᵢ ∈ 1:1#assembler.dh.field_dims[ghost_field_idx]
+            for dⱼ ∈ 1:1#assembler.dh.field_dims[ghost_field_idx]
                 push!(I, pivot_dof+dᵢ-1)
                 push!(J, global_ghost_dof+dⱼ-1)
                 push!(V, 0.0)

From 5cb2c2c3169516784204cd5ca4f4d1cc3cdd24df Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 14:56:06 +0100
Subject: [PATCH 079/124] Add first MPI test.

---
 test/test_mpi_distributed.jl | 67 ++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 test/test_mpi_distributed.jl

diff --git a/test/test_mpi_distributed.jl b/test/test_mpi_distributed.jl
new file mode 100644
index 0000000000..89a3bb7a6f
--- /dev/null
+++ b/test/test_mpi_distributed.jl
@@ -0,0 +1,67 @@
+using Test, Ferrite, MPI
+
+# @testset "dof distribution" begin
+#     MPI.Init()
+#     my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
+
+#     dim = 2
+#     ref = RefCube
+#     ip = Lagrange{dim, ref, 1}()
+#     global_grid = generate_grid(Quadrilateral, (3, 3))
+#     global_topology = ExclusiveTopology(global_grid)
+#     dgrid = DistributedGrid(global_grid, global_topology, MPI.COMM_WORLD, Int32[3,3,4,2,5,4,1,2,5])
+
+#     dh = DistributedDofHandler(dgrid)
+#     push!(dh, :u, 1, ip)
+#     close!(dh);
+
+#     @test length(dh.ldof_to_gdof) == length(dh.ldof_to_rank)
+#     if my_rank == 1
+#         @test dh.ldof_to_gdof == [1,2,3,4]
+#         @test dh.ldof_to_rank == [1,1,1,1]
+#     elseif my_rank == 2
+#         @test dh.ldof_to_gdof == [5,6,2,1,7,8,3]
+#         @test dh.ldof_to_rank == [2,2,1,1,2,2,1]
+#     elseif my_rank == 3
+#         @test dh.ldof_to_gdof == [9,10, 6, 5,11,12]
+#         @test dh.ldof_to_rank == [3, 3, 2, 2, 3, 3]
+#     elseif my_rank == 4
+#         @test dh.ldof_to_gdof == [11,13,14,12,15, 7]
+#         @test dh.ldof_to_rank == [ 3, 4, 4, 3, 4, 2]
+#     elseif my_rank == 5
+#         @test dh.ldof_to_gdof == [6,12, 7, 2,15,16, 8]
+#         @test dh.ldof_to_rank == [2, 3, 2, 1, 4, 5, 2]
+#     end
+#     MPI.Finalize()
+# end
+
+@testset "distributed grid generation" begin
+    MPI.Init()
+    my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
+
+    global_grid = generate_grid(Hexahedron, (2, 1, 1))
+    global_topology = ExclusiveTopology(global_grid)
+    dgrid = DistributedGrid(global_grid, global_topology, MPI.COMM_WORLD, Int32[2, 1])
+    if my_rank == 1
+        @test length(Ferrite.get_shared_edges(dgrid)) == 4
+        function check_edge_correctly_shared_1(idx_local, idx_nonlocal)
+            se = Ferrite.get_shared_edge(dgrid, idx_local)
+            @test Ferrite.remote_entities(se) == Dict(2 => [idx_nonlocal])
+        end
+        check_edge_correctly_shared_1(EdgeIndex(1,4), EdgeIndex(1,2))
+        check_edge_correctly_shared_1(EdgeIndex(1,9), EdgeIndex(1,10))
+        check_edge_correctly_shared_1(EdgeIndex(1,12), EdgeIndex(1,11))
+        check_edge_correctly_shared_1(EdgeIndex(1,8), EdgeIndex(1,6))
+    elseif my_rank == 2
+        @test length(Ferrite.get_shared_edges(dgrid)) == 4
+        function check_edge_correctly_shared_2(idx_nonlocal, idx_local)
+            se = Ferrite.get_shared_edge(dgrid, idx_local)
+            @test Ferrite.remote_entities(se) == Dict(1 => [idx_nonlocal])
+        end
+        check_edge_correctly_shared_2(EdgeIndex(1,4), EdgeIndex(1,2))
+        check_edge_correctly_shared_2(EdgeIndex(1,9), EdgeIndex(1,10))
+        check_edge_correctly_shared_2(EdgeIndex(1,12), EdgeIndex(1,11))
+        check_edge_correctly_shared_2(EdgeIndex(1,8), EdgeIndex(1,6))
+    end
+    MPI.Finalize()
+end

From 140185eb2203e88681730ec60a52f9d9590a18a4 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 16:03:00 +0100
Subject: [PATCH 080/124] Move most of the distributed code into an extension.

---
 Project.toml                                  |   9 +-
 ext/FerritePartitionedArrays.jl               |  25 ++
 .../DistributedDofHandler.jl                  |   0
 ext/FerritePartitionedArrays/assembler.jl     | 338 +++++++++++++++++
 ext/FerritePartitionedArrays/constraints.jl   | 110 ++++++
 ext/FerritePartitionedArrays/grid.jl          | 331 +++++++++++++++++
 ext/FerritePartitionedArrays/vtk-export.jl    |  91 +++++
 src/Dofs/ConstraintHandler.jl                 | 113 ------
 src/Export/VTK.jl                             |  93 -----
 src/Ferrite.jl                                |   1 -
 src/Grid/DistributedGrid.jl                   | 324 +----------------
 src/Grid/grid_generators.jl                   |   8 -
 src/assembler.jl                              | 340 ------------------
 src/exports.jl                                |  11 +-
 14 files changed, 906 insertions(+), 888 deletions(-)
 create mode 100644 ext/FerritePartitionedArrays.jl
 rename {src/Dofs => ext/FerritePartitionedArrays}/DistributedDofHandler.jl (100%)
 create mode 100644 ext/FerritePartitionedArrays/assembler.jl
 create mode 100644 ext/FerritePartitionedArrays/constraints.jl
 create mode 100644 ext/FerritePartitionedArrays/grid.jl
 create mode 100644 ext/FerritePartitionedArrays/vtk-export.jl

diff --git a/Project.toml b/Project.toml
index ed70ccd5d5..9a51fbd627 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,16 +5,17 @@ version = "0.3.8"
 [deps]
 EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
-MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
 Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
-PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"
 WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 
+[weakdeps]
+MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
+
 [compat]
 EnumX = "1"
 MPI = "^0.20.2"
@@ -31,7 +32,9 @@ FerriteGmsh = "4f95f4f8-b27c-4ae5-9a39-ea55e634e36b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Gmsh = "705231aa-382f-11e9-3f0c-b7cb4346fdeb"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
+MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 NBInclude = "0db19996-df87-5ea3-a455-e3a50d440464"
+PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
new file mode 100644
index 0000000000..6037578659
--- /dev/null
+++ b/ext/FerritePartitionedArrays.jl
@@ -0,0 +1,25 @@
+"""
+Module containing the code for distributed assembly via PartitionedArrays.jl
+"""
+module FerritePartitionedArrays
+
+using Ferrite
+using Metis
+using MPI
+using PartitionedArrays
+
+include("FerritePartitionedArrays/assembler.jl")
+include("FerritePartitionedArrays/grid.jl")
+include("FerritePartitionedArrays/vtk-export.jl")
+
+export 
+    # assembler
+    COOAssembler,
+    # grid
+    DistributedGrid,
+    # vtk-export
+    vtk_shared_vertices,
+    vtk_shared_faces,
+    vtk_shared_edges,
+    vtk_partitioning,
+end # module FerritePartitionedArrays
diff --git a/src/Dofs/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
similarity index 100%
rename from src/Dofs/DistributedDofHandler.jl
rename to ext/FerritePartitionedArrays/DistributedDofHandler.jl
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
new file mode 100644
index 0000000000..6ee1cc2ea7
--- /dev/null
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -0,0 +1,338 @@
+
+"""
+Simplest partitioned assembler in COO format to obtain a PSparseMatrix and a PVector.
+"""
+struct COOAssembler{T}
+    I::Vector{Int}
+    J::Vector{Int}
+    V::Vector{T}
+
+    cols
+    rows
+    f::PVector
+
+    👻remotes
+    dh
+
+    # TODO PartitionedArrays backend as additional input arg
+    COOAssembler(dh::DistributedDofHandler) = COOAssembler{Float64}(dh)
+
+    # TODO PartitionedArrays backend as additional input arg
+    function COOAssembler{T}(dh::DistributedDofHandler) where {T}
+        ldof_to_gdof = dh.ldof_to_gdof
+        ldof_to_rank = dh.ldof_to_rank
+        nldofs = num_local_dofs(dh)
+        ngdofs = num_global_dofs(dh)
+        dgrid = getglobalgrid(dh)
+        dim = getdim(dgrid)
+
+        I = Int[]
+        J = Int[]
+        V = T[]
+        sizehint!(I, nldofs)
+        sizehint!(J, nldofs)
+        sizehint!(V, nldofs)
+
+        # @TODO the code below can be massively simplified by introducing a ghost layer to the
+        #       distributed grid, which can efficiently precompute some of the values below.
+        comm = global_comm(dgrid)
+        np = MPI.Comm_size(comm)
+        my_rank = MPI.Comm_rank(comm)+1
+
+        @debug println("starting assembly... (R$my_rank)")
+
+        # Neighborhood graph
+        # @TODO cleanup old code below and use graph primitives instead.
+        (source_len, destination_len, _) = MPI.Dist_graph_neighbors_count(vertex_comm(dgrid))
+        sources = Vector{Cint}(undef, source_len)
+        destinations = Vector{Cint}(undef, destination_len)
+        MPI.Dist_graph_neighbors!(vertex_comm(dgrid), sources, destinations)
+
+        # Adjust to Julia index convention
+        sources .+= 1
+        destinations .+= 1
+
+        @debug println("Neighborhood | $sources | $destinations (R$my_rank)")
+
+        # Invert the relations to clarify the code
+        source_index = Dict{Cint, Int}()
+        for (i,remote_rank) ∈ enumerate(sources)
+            source_index[remote_rank] = i
+        end
+        destination_index = Dict{Int, Cint}()
+        for (i,remote_rank) ∈ enumerate(destinations)
+            destination_index[remote_rank] = i
+        end
+
+        # Note: We assume a symmetric neighborhood for now... this may not be true in general.
+        neighbors = MPIData(Int32.(sources), comm, (np,))
+
+        # Extract locally owned dofs
+        ltdof_indices = ldof_to_rank.==my_rank
+        ltdof_to_gdof = ldof_to_gdof[ltdof_indices]
+
+        @debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
+        @debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
+        @debug println("ldof_to_rank $ldof_to_rank (R$my_rank)")
+
+        # Process owns rows of owned dofs. The process also may write to some remote dofs,
+        # which correspond to non-owned share entities. Here we construct the rows for the
+        # distributed matrix.
+        # We decide for row (i.e. test function) ownership, because it the image of
+        # SpMV is process local.
+        row_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_rank))
+        #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
+        #row_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], ldof_to_gdof[.!ltdof_indices], Int32.(ldof_to_rank[.!ltdof_indices]))
+        row_data = MPIData(row_indices, comm, (np,))
+        row_exchanger = Exchanger(row_data)
+        rows = PRange(ngdofs,row_data,row_exchanger)
+
+        @debug println("rows done (R$my_rank)")
+
+        # For the locally visible columns we also have to take into account that remote
+        # processes will write their data in some of these, because their remotely
+        # owned trial functions overlap with the locally owned test functions.
+        ghost_dof_to_global = Int[]
+        ghost_dof_rank = Int32[]
+
+        # ------------ Ghost dof synchronization ----------
+        # Prepare sending ghost dofs to neighbors 👻
+        #@TODO move relevant parts into dof handler
+        #@TODO communication can be optimized by deduplicating entries in, and compressing the following arrays
+        #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
+        ghost_dof_to_send = [Int[] for i ∈ 1:destination_len] # global dof id
+        ghost_rank_to_send = [Int[] for i ∈ 1:destination_len] # rank of dof
+        ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
+        ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
+        ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
+        for (pivot_vertex, pivot_shared_vertex) ∈ dgrid.shared_vertices
+            # Start by searching shared entities which are not owned
+            pivot_vertex_owner_rank = compute_owner(dgrid, pivot_shared_vertex)
+            pivot_cell_idx = pivot_vertex[1]
+            pivot_vertex_global = toglobal(getlocalgrid(dgrid), pivot_vertex)
+
+            if my_rank != pivot_vertex_owner_rank
+                sender_slot = destination_index[pivot_vertex_owner_rank]
+
+                @debug println("$pivot_vertex may require synchronization (R$my_rank)")
+                # Note: We have to send ALL dofs on the element to the remote.
+                cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
+                cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
+
+                for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
+                    !has_vertex_dofs(dh, field_idx, pivot_vertex_global) && continue
+                    pivot_vertex_dof = vertex_dofs(dh, field_idx, pivot_vertex_global)
+                    
+                    for d ∈ 1:dh.field_dims[field_idx]
+                        @debug println("  adding dof $(pivot_vertex_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+
+                        # Extract dofs belonging to the current field
+                        #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
+                        #for cell_field_dof ∈ cell_field_dofs
+                        for cell_dof ∈ cell_dofs
+                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof+d-1])
+                            append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
+                            append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
+                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                        end
+                    end
+                end
+            end
+        end
+
+        if dim > 1
+            for (pivot_face, pivot_shared_face) ∈ dgrid.shared_faces
+                # Start by searching shared entities which are not owned
+                pivot_face_owner_rank = compute_owner(dgrid, pivot_shared_face)
+                pivot_cell_idx = pivot_face[1]
+
+                if my_rank != pivot_face_owner_rank
+                    sender_slot = destination_index[pivot_face_owner_rank]
+
+                    @debug println("$pivot_face may require synchronization (R$my_rank)")
+                    # Note: We have to send ALL dofs on the element to the remote.
+                    cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
+                    cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
+
+                    pivot_face_global = toglobal(getlocalgrid(dgrid), pivot_face)
+
+                    for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
+                        !has_face_dofs(dh, field_idx, pivot_face_global) && continue
+                        pivot_face_dof = face_dofs(dh, field_idx, pivot_face_global)
+                        
+                        for d ∈ 1:dh.field_dims[field_idx]
+                            @debug println("  adding dof $(pivot_face_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                            
+                            # Extract dofs belonging to the current field
+                            #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
+                            #for cell_field_dof ∈ cell_field_dofs
+                            for cell_dof ∈ cell_dofs
+                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_face_dof+d-1])
+                                append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
+                                append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
+                                append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                            end
+                        end
+                    end
+                end
+            end
+        end
+
+        if dim > 2
+            for (pivot_edge, pivot_shared_edge) ∈ dgrid.shared_edges
+                # Start by searching shared entities which are not owned
+                pivot_edge_owner_rank = compute_owner(dgrid, pivot_shared_edge)
+                pivot_cell_idx = pivot_edge[1]
+
+                if my_rank != pivot_edge_owner_rank
+                    sender_slot = destination_index[pivot_edge_owner_rank]
+
+                    @debug println("$pivot_edge may require synchronization (R$my_rank)")
+                    # Note: We have to send ALL dofs on the element to the remote.
+                    cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
+                    cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
+
+                    pivot_edge_global = toglobal(getlocalgrid(dgrid), pivot_edge)
+
+                    for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
+                        !has_edge_dofs(dh, field_idx, pivot_edge_global) && continue
+                        pivot_edge_dof = edge_dofs(dh, field_idx, pivot_edge_global)
+
+                        for d ∈ 1:dh.field_dims[field_idx]
+                            @debug println("  adding dof $(pivot_edge_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                            # Extract dofs belonging to the current field
+                            #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
+                            #for cell_field_dof ∈ cell_field_dofs
+                            for cell_dof ∈ cell_dofs
+                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dof+d-1])
+                                append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
+                                append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
+                                # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                            end
+                        end
+                    end
+                end
+            end
+        end
+
+        ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_dof_to_send]
+        ghost_recv_buffer_lengths = zeros(Int, destination_len)
+        MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), vertex_comm(dgrid));
+        @debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
+            println("receiving $ghost_recv_buffer_length ghosts from $(sources[i])  (R$my_rank)")
+        end
+
+        # Communicate ghost information 👻
+        # @TODO coalesce communication
+        ghost_send_buffer_dofs = vcat(ghost_dof_to_send...)
+        ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
+        ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        ghost_send_buffer_ranks = vcat(ghost_rank_to_send...)
+        ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        ghost_send_buffer_dofs_piv = vcat(ghost_dof_pivot_to_send...)
+        ghost_recv_buffer_dofs_piv = zeros(Int, sum(ghost_recv_buffer_lengths))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs_piv,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs_piv,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+
+        # Reconstruct source ranks
+        ghost_recv_buffer_source_ranks = Int[]
+        for (source_idx, recv_len) ∈ enumerate(ghost_recv_buffer_lengths)
+            append!(ghost_recv_buffer_source_ranks, ones(recv_len)*sources[source_idx])
+        end
+
+        @debug println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
+
+        unique_ghosts_dr = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
+        # unzip manually and make sure we do not add duplicate entries to our columns
+        for (dof,rank) ∈ unique_ghosts_dr
+            if rank != my_rank && dof ∉ ldof_to_gdof
+                push!(ghost_dof_to_global, dof)
+                push!(ghost_dof_rank, rank)
+            end
+        end
+
+        # ------------- Construct rows and cols of distributed matrix --------
+        all_local_cols = Int[ldof_to_gdof; ghost_dof_to_global]
+        all_local_col_ranks = Int32[ldof_to_rank; ghost_dof_rank]
+        @debug println("all_local_cols $all_local_cols (R$my_rank)")
+        @debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
+
+        col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, all_local_col_ranks)
+        #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
+        #col_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], all_local_cols[all_local_col_ranks .!= my_rank], Int32.(all_local_col_ranks[all_local_col_ranks .!= my_rank]))
+        col_data = MPIData(col_indices, comm, (np,))
+        col_exchanger = Exchanger(col_data)
+        cols = PRange(ngdofs,col_data,col_exchanger)
+
+        @debug println("cols and rows constructed (R$my_rank)")
+        f = PartitionedArrays.PVector(0.0,rows)
+        @debug println("f constructed (R$my_rank)")
+
+        👻remotes = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks,ghost_recv_buffer_fields)
+        @debug println("👻remotes $👻remotes (R$my_rank)")
+
+        return new(I, J, V, cols, rows, f, 👻remotes, dh)
+    end
+end
+
+@propagate_inbounds function assemble!(a::COOAssembler{T}, edof::AbstractVector{Int}, Ke::AbstractMatrix{T}) where {T}
+    n_dofs = length(edof)
+    append!(a.V, Ke)
+    @inbounds for j in 1:n_dofs
+        append!(a.I, edof)
+        for i in 1:n_dofs
+            push!(a.J, edof[j])
+        end
+    end
+end
+
+@propagate_inbounds function assemble!(a::COOAssembler{T}, dofs::AbstractVector{Int}, fe::AbstractVector{T}, Ke::AbstractMatrix{T}) where {T}
+    Ferrite.assemble!(a, dofs, Ke)
+    map_parts(local_view(a.f, a.f.rows)) do f_local
+        Ferrite.assemble!(f_local, dofs, fe)
+    end
+end
+
+function end_assemble(assembler::COOAssembler{T}) where {T}
+    comm = global_comm(getglobalgrid(assembler.dh))
+    np = MPI.Comm_size(comm)
+    my_rank = MPI.Comm_rank(comm)+1
+
+    # --------------------- Add ghost entries in IJ 👻 --------------------
+    I = map(i->assembler.dh.ldof_to_gdof[i], assembler.I)
+    J = map(j->assembler.dh.ldof_to_gdof[j], assembler.J)
+    V = map(v->v, assembler.V)
+
+    # Fix ghost layer 👻! Note that the locations for remote processes to write their
+    # data into are missing up to this point.
+    # TODO here still the interaction between fields is missing...
+    for (i, (pivot_dof, global_ghost_dof, ghost_owner_rank, ghost_field_idx)) ∈ enumerate(assembler.👻remotes)
+        for dᵢ ∈ 1:1#assembler.dh.field_dims[ghost_field_idx]
+            for dⱼ ∈ 1:1#assembler.dh.field_dims[ghost_field_idx]
+                push!(I, pivot_dof+dᵢ-1)
+                push!(J, global_ghost_dof+dⱼ-1)
+                push!(V, 0.0)
+            end
+        end
+    end
+
+    @debug println("I=$(I) (R$my_rank)")
+    @debug println("J=$(J) (R$my_rank)")
+    K = PartitionedArrays.PSparseMatrix(
+        MPIData(I, comm, (np,)),
+        MPIData(J, comm, (np,)),
+        MPIData(V, comm, (np,)),
+        assembler.rows, assembler.cols, ids=:global
+    )
+
+    PartitionedArrays.assemble!(K)
+    PartitionedArrays.assemble!(assembler.f)
+
+    return K, assembler.f
+end
diff --git a/ext/FerritePartitionedArrays/constraints.jl b/ext/FerritePartitionedArrays/constraints.jl
new file mode 100644
index 0000000000..9ca27f3395
--- /dev/null
+++ b/ext/FerritePartitionedArrays/constraints.jl
@@ -0,0 +1,110 @@
+function meandiag(K::PartitionedArrays.PSparseMatrix)
+    # Get local portion of z
+    z_pa = map_parts(local_view(K, K.rows, K.cols)) do K_local
+        z = zero(eltype(K_local))
+        for i in 1:size(K_local, 1)
+            z += abs(K_local[i, i])
+        end
+        return z;
+    end
+    # z = get_part(z_pa, MPI.Comm_rank(z_pa.comm)+1) # Crashes :)
+    return MPI.Allreduce(z_pa.part, MPI.SUM, z_pa.comm) / size(K, 1)
+end
+
+"""
+Poor man's Dirichlet BC application for PartitionedArrays. :)
+
+    TODO integrate with constraints.
+"""
+function apply_zero!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
+        f_local[ch.prescribed_dofs] .= 0.0
+    end
+
+    map_parts(local_view(K, K.rows, K.cols), local_view(f, f.rows)) do K_local, f_local
+        for cdof in ch.prescribed_dofs
+            K_local[cdof, :] .= 0.0
+            K_local[:, cdof] .= 0.0
+            K_local[cdof, cdof] = 1.0
+        end
+    end
+end
+
+"""
+Poor man's Dirichlet BC application for PartitionedArrays. :)
+
+    TODO integrate with constraints.
+    TODO optimize.
+"""
+function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+    # Start by substracting the inhomogeneous solution from the right hand side
+    u_constrained = PartitionedArrays.PVector(0.0, K.cols)
+    map_parts(local_view(u_constrained, u_constrained.rows)) do u_local
+        u_local[ch.prescribed_dofs] .= ch.inhomogeneities
+    end
+    f .-= K*u_constrained
+
+    m = meandiag(K)
+
+    # Then fix the 
+    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
+        # Note: RHS only non-zero for owned RHS entries
+        f_local[ch.prescribed_dofs] .= ch.inhomogeneities .* map(p -> p == partition.part, partition.lid_to_part[ch.prescribed_dofs]) * m
+    end
+
+    # Zero out locally visible rows and columns
+    map_parts(local_view(K, K.rows, K.cols)) do K_local
+        for cdof ∈ ch.prescribed_dofs
+            K_local[cdof, :] .= 0.0
+            K_local[:, cdof] .= 0.0
+            K_local[cdof, cdof] = m
+        end
+    end
+
+    # Zero out columns associated to the ghost dofs constrained on a remote process
+    # TODO optimize. If we assume that the sparsity pattern is symmetric, then we can constrain
+    #      via the column information of the matrix.
+
+    # Step 1: Send out all local ghosts to all other processes...
+    remote_ghost_gdofs, remote_ghost_parts = map_parts(K.cols.partition) do partition
+        remote_ghost_ldofs = partition.hid_to_lid
+        remote_ghost_parts = partition.lid_to_part[remote_ghost_ldofs]
+        remote_ghost_gdofs = partition.lid_to_gid[remote_ghost_ldofs]
+        return (remote_ghost_gdofs, remote_ghost_parts)
+    end
+
+    comm = remote_ghost_parts.comm
+    my_rank = MPI.Comm_rank(comm)+1
+    buffer_sizes_send = zeros(Cint, MPI.Comm_size(comm))
+    buffer_sizes_recv = Vector{Cint}(undef, MPI.Comm_size(comm))
+    for part ∈ remote_ghost_parts.part
+        buffer_sizes_send[part] += 1
+    end
+    MPI.Alltoall!(UBuffer(buffer_sizes_send, 1), UBuffer(buffer_sizes_recv, 1), comm)
+    @debug println("Got $buffer_sizes_recv (R$my_rank)")
+
+    remote_ghosts_recv = Vector{Int}(undef, sum(buffer_sizes_recv))
+    MPI.Alltoallv!(VBuffer(remote_ghost_gdofs.part, buffer_sizes_send), VBuffer(remote_ghosts_recv, buffer_sizes_recv), comm)
+    @debug println("Got $remote_ghosts_recv (R$my_rank)")
+
+    # Step 2: Union with all locally constrained dofs
+    @debug println("$my_rank : Step 2....")
+    remote_ghosts_constrained_send = copy(remote_ghosts_recv)
+    for (i, remote_ghost_dof) ∈ enumerate(remote_ghosts_recv)
+        remote_ghosts_constrained_send[i] = remote_ghost_dof ∈ K.cols.partition.part.lid_to_gid[ch.prescribed_dofs]
+    end
+
+    # Step 3: Send trash back
+    @debug println("$my_rank : Step 3....")
+    remote_ghosts_constrained_recv = Vector{Int}(undef, sum(buffer_sizes_send))
+    MPI.Alltoallv!(VBuffer(remote_ghosts_constrained_send, buffer_sizes_recv), VBuffer(remote_ghosts_constrained_recv, buffer_sizes_send), comm)
+
+    @debug println("$my_rank : remote constraints on $(remote_ghost_gdofs.part[remote_ghosts_constrained_recv .== 1])")
+
+    # Step 4: Constrain remaining columns
+    map_parts(local_view(K, K.rows, K.cols), K.cols.partition) do K_local, partition
+        for cdof ∈ partition.hid_to_lid[remote_ghosts_constrained_recv .== 1]
+            K_local[:, cdof] .= 0.0
+        end
+    end
+end
diff --git a/ext/FerritePartitionedArrays/grid.jl b/ext/FerritePartitionedArrays/grid.jl
new file mode 100644
index 0000000000..e7ffafd6e6
--- /dev/null
+++ b/ext/FerritePartitionedArrays/grid.jl
@@ -0,0 +1,331 @@
+
+# TODO the following three structs can be merged to one struct with type parameter.
+"""
+"""
+struct SharedVertex <: SharedEntity
+    local_idx::VertexIndex
+    remote_vertices::Dict{Int,Vector{VertexIndex}}
+end
+
+@inline remote_entities(sv::SharedVertex) = sv.remote_vertices
+
+"""
+"""
+struct SharedFace <: SharedEntity
+    local_idx::FaceIndex
+    remote_faces::Dict{Int,Vector{FaceIndex}}
+end
+
+@inline remote_entities(sf::SharedFace) = sf.remote_faces
+
+"""
+"""
+struct SharedEdge <: SharedEntity
+    local_idx::EdgeIndex
+    remote_edges::Dict{Int,Vector{EdgeIndex}}
+end
+
+@inline remote_entities(se::SharedEdge) = se.remote_edges
+
+"""
+@TODO docs
+@TODO PArrays ready constructor
+"""
+mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistributedGrid{dim}
+    # Dense comminicator on the grid
+    grid_comm::MPI.Comm
+    # Sparse communicator along the shared vertex neighbors
+    # We only need this one because the vertices induce the edge and face neighbors.
+    interface_comm::MPI.Comm
+    # Here we store the full local grid
+    local_grid::Grid{dim,C,T}
+    # Local copies of the shared entities of the form (local index, (process id in grid_comm, remote index))
+    # The entities consistently contain their *Index, because faces and edges are not materialized. 
+    shared_vertices::Dict{VertexIndex,SharedVertex}
+    shared_edges::Dict{EdgeIndex,SharedEdge}
+    shared_faces::Dict{FaceIndex,SharedFace}
+end
+
+
+"""
+"""
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}; grid_comm::MPI.Comm = MPI.COMM_WORLD, partition_alg = :RECURSIVE) where {dim,C,T}
+    grid_topology = ExclusiveTopology(grid_to_distribute)
+    return DistributedGrid(grid_to_distribute, grid_topology, grid_comm; partition_alg=partition_alg)
+end
+
+function create_partitioning(grid::Grid{dim,C,T}, grid_topology::ExclusiveTopology, n_partitions, partition_alg) where {dim,C,T}
+    n_cells_global = getncells(grid)
+    @assert n_cells_global > 0
+    
+    if n_partitions == 1
+        return ones(Metis.idx_t, n_cells_global)
+    end
+
+    # Set up the element connectivity graph
+    xadj = Vector{Metis.idx_t}(undef, n_cells_global+1)
+    xadj[1] = 1
+    adjncy = Vector{Metis.idx_t}(undef, 0)
+    @inbounds for i in 1:n_cells_global
+        n_neighbors = 0
+        for neighbor ∈ getneighborhood(grid_topology, grid, CellIndex(i))
+            push!(adjncy, neighbor)
+            n_neighbors += 1
+        end
+        xadj[i+1] = xadj[i] + n_neighbors
+    end
+
+    # Generate a partitioning
+    return Metis.partition(
+        Metis.Graph(
+            Metis.idx_t(n_cells_global),
+            xadj,
+            adjncy
+        ),
+        n_partitions;
+        alg=partition_alg
+    )
+end
+
+"""
+"""
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
+    n_cells_global = getncells(grid_to_distribute)
+    @assert n_cells_global > 0
+
+    parts = create_partitioning(grid_to_distribute, grid_topology, MPI.Comm_size(grid_comm), partition_alg)
+
+    DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts)
+end
+
+"""
+"""    
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts::Vector{Int32}) where {dim,C,T}
+    n_cells_global = getncells(grid_to_distribute)
+    @assert n_cells_global > 0 # Empty input mesh...
+
+    my_rank = MPI.Comm_rank(grid_comm)+1
+
+    # Start extraction of local grid
+    # 1. Extract local cells
+    local_cells = getcells(grid_to_distribute)[[i for i ∈ 1:n_cells_global if parts[i] == my_rank]]
+    @assert length(local_cells) > 0 # Cannot handle empty partitions yet
+
+    # 2. Find unique nodes
+    local_node_index_set = Set{Int}()
+    for cell ∈ local_cells
+        for global_node_idx ∈ cell.nodes # @TODO abstraction
+            push!(local_node_index_set, global_node_idx)
+        end
+    end
+
+    # 3. Build a map for global to local node indices
+    next_local_node_idx = 1
+    global_to_local_node_map = Dict{Int,Int}()
+    for global_node_idx ∈ local_node_index_set
+        global_to_local_node_map[global_node_idx] = next_local_node_idx
+        next_local_node_idx += 1
+    end
+
+    # 4. Extract local nodes
+    local_nodes = Vector{Node{dim,T}}(undef,length(local_node_index_set))
+    begin
+        global_nodes = getnodes(grid_to_distribute)
+        for global_node_idx ∈ local_node_index_set
+            local_node_idx = global_to_local_node_map[global_node_idx]
+            local_nodes[local_node_idx] = global_nodes[global_node_idx]
+        end
+    end
+
+    # 5. Transform cell indices
+    for local_cell_idx ∈ 1:length(local_cells)
+        local_cells[local_cell_idx] = C(map(global_node_idx -> global_to_local_node_map[global_node_idx], local_cells[local_cell_idx].nodes))
+    end
+
+    # 6. Extract sets
+    # @TODO deduplicate the code. We should be able to merge each of these into a macro or function.
+    # We build this map now, so we avoid the communication later.
+    global_to_local_cell_map = Dict{Int,Dict{Int,Int}}()
+    for rank ∈ 1:MPI.Comm_size(grid_comm)
+        global_to_local_cell_map[rank] = Dict{Int,Int}()
+        next_local_cell_idx = 1
+        for global_cell_idx ∈ 1:n_cells_global
+            if parts[global_cell_idx] == rank
+                global_to_local_cell_map[rank][global_cell_idx] = next_local_cell_idx
+                next_local_cell_idx += 1
+            end
+        end
+    end
+
+    cellsets = Dict{String,Set{Int}}()
+    for key ∈ keys(grid_to_distribute.cellsets)
+        cellsets[key] = Set{Int}() # create empty set, so it does not crash during assembly
+        for global_cell_idx ∈ grid_to_distribute.cellsets[key]
+            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
+                push!(cellsets[key], global_to_local_cell_map[my_rank][global_cell_idx])
+            end
+        end
+    end
+
+    nodesets = Dict{String,Set{Int}}()
+    for key ∈ keys(grid_to_distribute.nodesets)
+        nodesets[key] = Set{Int}() # create empty set, so it does not crash during assembly
+        for global_node_idx ∈ grid_to_distribute.nodesets[key]
+            if haskey(global_to_local_node_map, global_node_idx)
+                push!(nodesets[key], global_to_local_node_map[global_node_idx])
+            end
+        end
+    end
+
+    facesets = Dict{String,Set{FaceIndex}}()
+    for key ∈ keys(grid_to_distribute.facesets)
+        facesets[key] = Set{FaceIndex}() # create empty set, so it does not crash during assembly
+        for (global_cell_idx, i) ∈ grid_to_distribute.facesets[key]
+            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
+                push!(facesets[key], FaceIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
+            end
+        end
+    end
+
+    edgesets = Dict{String,Set{EdgeIndex}}()
+    for key ∈ keys(grid_to_distribute.edgesets)
+        edgesets[key] = Set{EdgeIndex}() # create empty set, so it does not crash during assembly
+        for (global_cell_idx, i) ∈ grid_to_distribute.edgesets[key]
+            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
+                push!(edgesets[key], EdgeIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
+            end
+        end
+    end
+
+    vertexsets = Dict{String,Set{VertexIndex}}()
+    for key ∈ keys(grid_to_distribute.vertexsets)
+        vertexsets[key] = Set{VertexIndex}() # create empty set, so it does not crash during assembly
+        for (global_cell_idx, i) ∈ grid_to_distribute.vertexsets[key]
+            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
+                push!(vertexsets[key], VertexIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
+            end
+        end
+    end
+
+    local_grid = Grid(
+        local_cells,
+        local_nodes,
+        cellsets=cellsets,
+        nodesets=nodesets,
+        facesets=facesets,
+        edgesets=edgesets,
+        vertexsets=vertexsets
+    )
+
+    shared_vertices = Dict{VertexIndex,SharedVertex}()
+    shared_edges = Dict{EdgeIndex,SharedEdge}()
+    shared_faces = Dict{FaceIndex,SharedFace}()
+    for (global_cell_idx,global_cell) ∈ enumerate(getcells(grid_to_distribute))
+        if parts[global_cell_idx] == my_rank
+            # Vertex
+            for (i, _) ∈ enumerate(vertices(global_cell))
+                cell_vertex = VertexIndex(global_cell_idx, i)
+                remote_vertices = Dict{Int,Vector{VertexIndex}}()
+                for other_vertex ∈ getneighborhood(grid_topology, grid_to_distribute, cell_vertex, true)
+                    (global_cell_neighbor_idx, j) = other_vertex
+                    other_rank = parts[global_cell_neighbor_idx]
+                    if other_rank != my_rank
+                        if toglobal(grid_to_distribute,cell_vertex) == toglobal(grid_to_distribute,other_vertex)
+                            if !haskey(remote_vertices,other_rank)
+                                remote_vertices[other_rank] = Vector(undef,0)
+                            end
+                            @debug println("Detected shared vertex $cell_vertex neighbor $other_vertex (R$my_rank)")
+                            push!(remote_vertices[other_rank], VertexIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
+                        end
+                    end
+                end
+
+                if length(remote_vertices) > 0
+                    idx = VertexIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
+                    shared_vertices[idx] = SharedVertex(idx, remote_vertices)
+                end
+            end
+
+            # Face
+            if dim > 1
+                for (i, _) ∈ enumerate(faces(global_cell))
+                    cell_face = FaceIndex(global_cell_idx, i)
+                    remote_faces = Dict{Int,Vector{FaceIndex}}()
+                    for other_face ∈ getneighborhood(grid_topology, grid_to_distribute, cell_face, true)
+                        (global_cell_neighbor_idx, j) = other_face
+                        other_rank = parts[global_cell_neighbor_idx]
+                        if other_rank != my_rank
+                            if toglobal(grid_to_distribute,cell_face) == toglobal(grid_to_distribute,other_face)
+                                if !haskey(remote_faces,other_rank)
+                                    remote_faces[other_rank] = Vector(undef,0)
+                                end
+                                @debug println("Detected shared face $cell_face neighbor $other_face (R$my_rank)")
+                                push!(remote_faces[other_rank], FaceIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
+                            end
+                        end
+                    end
+
+                    if length(remote_faces) > 0
+                        idx = FaceIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
+                        shared_faces[idx] = SharedFace(idx, remote_faces)
+                    end
+                end
+            end
+
+            # Edge
+            if dim > 2
+                for (i, _) ∈ enumerate(edges(global_cell))
+                    cell_edge = EdgeIndex(global_cell_idx, i)
+                    remote_edges = Dict{Int,Vector{EdgeIndex}}()
+                    for other_edge ∈ getneighborhood(grid_topology, grid_to_distribute, cell_edge, true)
+                        (global_cell_neighbor_idx, j) = other_edge
+                        other_rank = parts[global_cell_neighbor_idx]
+                        if other_rank != my_rank
+                            if toglobal(grid_to_distribute,cell_edge) == toglobal(grid_to_distribute,other_edge)
+                                if !haskey(remote_edges,other_edge)
+                                    remote_edges[other_rank] = Vector(undef,0)
+                                end
+                                @debug println("Detected shared edge $cell_edge neighbor $other_edge (R$my_rank)")
+                                push!(remote_edges[other_rank], EdgeIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
+                            end
+                        end
+                    end
+
+                    if length(remote_edges) > 0
+                        idx = EdgeIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
+                        shared_edges[idx] = SharedEdge(idx, remote_edges)
+                    end
+                end
+            end
+        end
+    end
+
+    # Neighborhood graph
+    neighbors_set = Set{Cint}()
+    for (vi, sv) ∈ shared_vertices
+        for (rank, vvi) ∈ sv.remote_vertices
+            push!(neighbors_set, rank)
+        end
+    end
+    # Adjust ranks back to to C index convention
+    dest = collect(neighbors_set).-1
+    degree = length(dest)
+    interface_comm = MPI.Dist_graph_create(grid_comm, Cint[my_rank-1], Cint[degree], Cint.(dest))
+
+    return DistributedGrid(grid_comm,interface_comm,local_grid,shared_vertices,shared_edges,shared_faces)
+end
+
+
+# Here we define the entity ownership by the process sharing an entity with lowest rank in the grid communicator.
+function compute_owner(dgrid::AbstractDistributedGrid, shared_entity::SharedEntity)::Int32
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1 # Shift rank up by 1 to match Julia's indexing convention
+    return minimum([my_rank; [remote_rank for (remote_rank, _) ∈ remote_entities(shared_entity)]])
+end
+
+"""
+Helper to generate distributed grids.
+It is designed to replace the call to [`generate_grid`](@ref) for use in distributed environments.
+"""
+function generate_distributed_grid(args...)
+    return DistributedGrid(generate_grid(args...))
+end
diff --git a/ext/FerritePartitionedArrays/vtk-export.jl b/ext/FerritePartitionedArrays/vtk-export.jl
new file mode 100644
index 0000000000..3db26fac55
--- /dev/null
+++ b/ext/FerritePartitionedArrays/vtk-export.jl
@@ -0,0 +1,91 @@
+"""
+"""
+function WriteVTK.vtk_grid(filename::AbstractString, dgrid::DistributedGrid{dim,C,T}; compress::Bool=true) where {dim,C,T}
+    part   = MPI.Comm_rank(global_comm(dgrid))+1
+    nparts = MPI.Comm_size(global_comm(dgrid))
+    cls = MeshCell[]
+    for cell in getcells(dgrid)
+        celltype = Ferrite.cell_to_vtkcell(typeof(cell))
+        push!(cls, MeshCell(celltype, nodes_to_vtkorder(cell)))
+    end
+    coords = reshape(reinterpret(T, getnodes(dgrid)), (dim, getnnodes(dgrid)))
+    return pvtk_grid(filename, coords, cls; part=part, nparts=nparts, compress=compress)
+end
+
+"""
+"""
+function WriteVTK.vtk_point_data(vtk, dh::AbstractDofHandler, u::PVector)
+    map_parts(local_view(u, u.rows)) do u_local
+        vtk_point_data(pvtkwrapper(vtk), dh, u_local)
+    end
+end
+
+"""
+Enrich the VTK file with meta information about shared vertices.
+"""
+function vtk_shared_vertices(vtk, dgrid::DistributedGrid)
+    u = Vector{Float64}(undef, getnnodes(dgrid))
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
+        fill!(u, 0.0)
+        for sv ∈ values(get_shared_vertices(dgrid))
+            if haskey(sv.remote_vertices, rank)
+                (cellidx, i) = sv.local_idx
+                cell = getcells(dgrid, cellidx)
+                u[vertices(cell)[i]] = my_rank
+            end
+        end
+        vtk_point_data(pvtkwrapper(vtk), u, "shared vertices with $rank")
+    end
+end
+
+
+"""
+Enrich the VTK file with meta information about shared faces.
+"""
+function vtk_shared_faces(vtk, dgrid::DistributedGrid)
+    u = Vector{Float64}(undef, getnnodes(dgrid))
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
+        fill!(u, 0.0)
+        for sf ∈ values(get_shared_faces(dgrid))
+            if haskey(sf.remote_faces, rank)
+                (cellidx, i) = sf.local_idx
+                cell = getcells(dgrid, cellidx)
+                facenodes = faces(cell)[i]
+                u[[facenodes...]] .= my_rank
+            end
+        end
+        vtk_point_data(pvtkwrapper(vtk), u, "shared faces with $rank")
+    end
+end
+
+
+"""
+Enrich the VTK file with meta information about shared edges.
+"""
+function vtk_shared_edges(vtk, dgrid::DistributedGrid)
+    u = Vector{Float64}(undef, getnnodes(dgrid))
+    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
+        fill!(u, 0.0)
+        for se ∈ values(get_shared_edges(dgrid))
+            if haskey(se.remote_edges, rank)
+                (cellidx, i) = se.local_idx
+                cell = getcells(dgrid, cellidx)
+                edgenodes = edges(cell)[i]
+                u[[edgenodes...]] .= my_rank
+            end
+        end
+        vtk_point_data(pvtkwrapper(vtk), u, "shared edges with $rank")
+    end
+end
+
+"""
+Enrich the VTK file with partitioning meta information.
+"""
+function vtk_partitioning(vtk, dgrid::DistributedGrid)
+    u  = Vector{Float64}(undef, getncells(dgrid))
+    u .= MPI.Comm_rank(global_comm(dgrid))+1
+    vtk_cell_data(pvtkwrapper(vtk), u, "partitioning")
+end
diff --git a/src/Dofs/ConstraintHandler.jl b/src/Dofs/ConstraintHandler.jl
index 34625713d4..e6a85b770d 100644
--- a/src/Dofs/ConstraintHandler.jl
+++ b/src/Dofs/ConstraintHandler.jl
@@ -1568,116 +1568,3 @@ function __check_periodic_faces_f(grid::Grid, fi::FaceIndex, fj::FaceIndex, xmi,
 
     return PeriodicFacePair(fi, fj, node_rot, mirror)
 end
-
-using PartitionedArrays
-
-function meandiag(K::PartitionedArrays.PSparseMatrix)
-    # Get local portion of z
-    z_pa = map_parts(local_view(K, K.rows, K.cols)) do K_local
-        z = zero(eltype(K_local))
-        for i in 1:size(K_local, 1)
-            z += abs(K_local[i, i])
-        end
-        return z;
-    end
-    # z = get_part(z_pa, MPI.Comm_rank(z_pa.comm)+1) # Crashes :)
-    return MPI.Allreduce(z_pa.part, MPI.SUM, z_pa.comm) / size(K, 1)
-end
-
-"""
-Poor man's Dirichlet BC application for PartitionedArrays. :)
-
-    TODO integrate with constraints.
-"""
-function apply_zero!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
-    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
-        f_local[ch.prescribed_dofs] .= 0.0
-    end
-
-    map_parts(local_view(K, K.rows, K.cols), local_view(f, f.rows)) do K_local, f_local
-        for cdof in ch.prescribed_dofs
-            K_local[cdof, :] .= 0.0
-            K_local[:, cdof] .= 0.0
-            K_local[cdof, cdof] = 1.0
-        end
-    end
-end
-
-"""
-Poor man's Dirichlet BC application for PartitionedArrays. :)
-
-    TODO integrate with constraints.
-    TODO optimize.
-"""
-function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
-    # Start by substracting the inhomogeneous solution from the right hand side
-    u_constrained = PartitionedArrays.PVector(0.0, K.cols)
-    map_parts(local_view(u_constrained, u_constrained.rows)) do u_local
-        u_local[ch.prescribed_dofs] .= ch.inhomogeneities
-    end
-    f .-= K*u_constrained
-
-    m = meandiag(K)
-
-    # Then fix the 
-    map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
-        # Note: RHS only non-zero for owned RHS entries
-        f_local[ch.prescribed_dofs] .= ch.inhomogeneities .* map(p -> p == partition.part, partition.lid_to_part[ch.prescribed_dofs]) * m
-    end
-
-    # Zero out locally visible rows and columns
-    map_parts(local_view(K, K.rows, K.cols)) do K_local
-        for cdof ∈ ch.prescribed_dofs
-            K_local[cdof, :] .= 0.0
-            K_local[:, cdof] .= 0.0
-            K_local[cdof, cdof] = m
-        end
-    end
-
-    # Zero out columns associated to the ghost dofs constrained on a remote process
-    # TODO optimize. If we assume that the sparsity pattern is symmetric, then we can constrain
-    #      via the column information of the matrix.
-
-    # Step 1: Send out all local ghosts to all other processes...
-    remote_ghost_gdofs, remote_ghost_parts = map_parts(K.cols.partition) do partition
-        remote_ghost_ldofs = partition.hid_to_lid
-        remote_ghost_parts = partition.lid_to_part[remote_ghost_ldofs]
-        remote_ghost_gdofs = partition.lid_to_gid[remote_ghost_ldofs]
-        return (remote_ghost_gdofs, remote_ghost_parts)
-    end
-
-    comm = remote_ghost_parts.comm
-    my_rank = MPI.Comm_rank(comm)+1
-    buffer_sizes_send = zeros(Cint, MPI.Comm_size(comm))
-    buffer_sizes_recv = Vector{Cint}(undef, MPI.Comm_size(comm))
-    for part ∈ remote_ghost_parts.part
-        buffer_sizes_send[part] += 1
-    end
-    MPI.Alltoall!(UBuffer(buffer_sizes_send, 1), UBuffer(buffer_sizes_recv, 1), comm)
-    @debug println("Got $buffer_sizes_recv (R$my_rank)")
-
-    remote_ghosts_recv = Vector{Int}(undef, sum(buffer_sizes_recv))
-    MPI.Alltoallv!(VBuffer(remote_ghost_gdofs.part, buffer_sizes_send), VBuffer(remote_ghosts_recv, buffer_sizes_recv), comm)
-    @debug println("Got $remote_ghosts_recv (R$my_rank)")
-
-    # Step 2: Union with all locally constrained dofs
-    @debug println("$my_rank : Step 2....")
-    remote_ghosts_constrained_send = copy(remote_ghosts_recv)
-    for (i, remote_ghost_dof) ∈ enumerate(remote_ghosts_recv)
-        remote_ghosts_constrained_send[i] = remote_ghost_dof ∈ K.cols.partition.part.lid_to_gid[ch.prescribed_dofs]
-    end
-
-    # Step 3: Send trash back
-    @debug println("$my_rank : Step 3....")
-    remote_ghosts_constrained_recv = Vector{Int}(undef, sum(buffer_sizes_send))
-    MPI.Alltoallv!(VBuffer(remote_ghosts_constrained_send, buffer_sizes_recv), VBuffer(remote_ghosts_constrained_recv, buffer_sizes_send), comm)
-
-    @debug println("$my_rank : remote constraints on $(remote_ghost_gdofs.part[remote_ghosts_constrained_recv .== 1])")
-
-    # Step 4: Constrain remaining columns
-    map_parts(local_view(K, K.rows, K.cols), K.cols.partition) do K_local, partition
-        for cdof ∈ partition.hid_to_lid[remote_ghosts_constrained_recv .== 1]
-            K_local[:, cdof] .= 0.0
-        end
-    end
-end
diff --git a/src/Export/VTK.jl b/src/Export/VTK.jl
index 13f6592a17..f0f12ba8c3 100644
--- a/src/Export/VTK.jl
+++ b/src/Export/VTK.jl
@@ -36,19 +36,6 @@ function WriteVTK.vtk_grid(filename::AbstractString, grid::Grid{dim,C,T}; compre
     return vtk_grid(filename, coords, cls; compress=compress)
 end
 
-function WriteVTK.vtk_grid(filename::AbstractString, dgrid::DistributedGrid{dim,C,T}; compress::Bool=true) where {dim,C,T}
-    part   = MPI.Comm_rank(global_comm(dgrid))+1
-    nparts = MPI.Comm_size(global_comm(dgrid))
-    cls = MeshCell[]
-    for cell in getcells(dgrid)
-        celltype = Ferrite.cell_to_vtkcell(typeof(cell))
-        push!(cls, MeshCell(celltype, nodes_to_vtkorder(cell)))
-    end
-    coords = reshape(reinterpret(T, getnodes(dgrid)), (dim, getnnodes(dgrid)))
-    return pvtk_grid(filename, coords, cls; part=part, nparts=nparts, compress=compress)
-end
-
-
 function toparaview!(v, x::Vec{D}) where D
     v[1:D] .= x
 end
@@ -143,83 +130,3 @@ function WriteVTK.vtk_point_data(vtkfile, dh::AbstractDofHandler, u::Vector, suf
 
     return vtkfile
 end
-
-using PartitionedArrays
-
-"""
-"""
-function WriteVTK.vtk_point_data(vtk, dh::AbstractDofHandler, u::PVector)
-    map_parts(local_view(u, u.rows)) do u_local
-        vtk_point_data(pvtkwrapper(vtk), dh, u_local)
-    end
-end
-
-"""
-Enrich the VTK file with meta information about shared vertices.
-"""
-function vtk_shared_vertices(vtk, dgrid::DistributedGrid)
-    u = Vector{Float64}(undef, getnnodes(dgrid))
-    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
-    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
-        fill!(u, 0.0)
-        for sv ∈ values(get_shared_vertices(dgrid))
-            if haskey(sv.remote_vertices, rank)
-                (cellidx, i) = sv.local_idx
-                cell = getcells(dgrid, cellidx)
-                u[vertices(cell)[i]] = my_rank
-            end
-        end
-        vtk_point_data(pvtkwrapper(vtk), u, "shared vertices with $rank")
-    end
-end
-
-
-"""
-Enrich the VTK file with meta information about shared faces.
-"""
-function vtk_shared_faces(vtk, dgrid::DistributedGrid)
-    u = Vector{Float64}(undef, getnnodes(dgrid))
-    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
-    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
-        fill!(u, 0.0)
-        for sf ∈ values(get_shared_faces(dgrid))
-            if haskey(sf.remote_faces, rank)
-                (cellidx, i) = sf.local_idx
-                cell = getcells(dgrid, cellidx)
-                facenodes = faces(cell)[i]
-                u[[facenodes...]] .= my_rank
-            end
-        end
-        vtk_point_data(pvtkwrapper(vtk), u, "shared faces with $rank")
-    end
-end
-
-
-"""
-Enrich the VTK file with meta information about shared edges.
-"""
-function vtk_shared_edges(vtk, dgrid::DistributedGrid)
-    u = Vector{Float64}(undef, getnnodes(dgrid))
-    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
-    for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
-        fill!(u, 0.0)
-        for se ∈ values(get_shared_edges(dgrid))
-            if haskey(se.remote_edges, rank)
-                (cellidx, i) = se.local_idx
-                cell = getcells(dgrid, cellidx)
-                edgenodes = edges(cell)[i]
-                u[[edgenodes...]] .= my_rank
-            end
-        end
-        vtk_point_data(pvtkwrapper(vtk), u, "shared edges with $rank")
-    end
-end
-
-"""
-Enrich the VTK file with partitioning meta information.
-"""
-function vtk_partitioning(vtk, dgrid::DistributedGrid)
-    u  = Vector{Float64}(undef, getncells(dgrid))
-    u .= MPI.Comm_rank(global_comm(dgrid))+1
-    vtk_cell_data(pvtkwrapper(vtk), u, "partitioning")
-end
diff --git a/src/Ferrite.jl b/src/Ferrite.jl
index bc05a0e79e..5f195d7bd1 100644
--- a/src/Ferrite.jl
+++ b/src/Ferrite.jl
@@ -56,7 +56,6 @@ include("Grid/coloring.jl")
 
 # Dofs
 include("Dofs/DofHandler.jl")
-include("Dofs/DistributedDofHandler.jl")
 include("Dofs/MixedDofHandler.jl")
 include("Dofs/ConstraintHandler.jl")
 
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 4ad75ee82d..bb845b699d 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -1,58 +1,10 @@
-using Metis
-using MPI
-
 """
 """
 abstract type AbstractDistributedGrid{sdim} <: AbstractGrid{sdim} end
 
-abstract type SharedEntity end
-
-# TODO the following three structs can be merged to one struct with type parameter.
-"""
-"""
-struct SharedVertex <: SharedEntity
-    local_idx::VertexIndex
-    remote_vertices::Dict{Int,Vector{VertexIndex}}
-end
-
-@inline remote_entities(sv::SharedVertex) = sv.remote_vertices
-
-"""
-"""
-struct SharedFace <: SharedEntity
-    local_idx::FaceIndex
-    remote_faces::Dict{Int,Vector{FaceIndex}}
-end
-
-@inline remote_entities(sf::SharedFace) = sf.remote_faces
-
-"""
-"""
-struct SharedEdge <: SharedEntity
-    local_idx::EdgeIndex
-    remote_edges::Dict{Int,Vector{EdgeIndex}}
-end
-
-@inline remote_entities(se::SharedEdge) = se.remote_edges
-
 """
-@TODO docs
-@TODO PArrays ready constructor
 """
-mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistributedGrid{dim}
-    # Dense comminicator on the grid
-    grid_comm::MPI.Comm
-    # Sparse communicator along the shared vertex neighbors
-    # We only need this one because the vertices induce the edge and face neighbors.
-    interface_comm::MPI.Comm
-    # Here we store the full local grid
-    local_grid::Grid{dim,C,T}
-    # Local copies of the shared entities of the form (local index, (process id in grid_comm, remote index))
-    # The entities consistently contain their *Index, because faces and edges are not materialized. 
-    shared_vertices::Dict{VertexIndex,SharedVertex}
-    shared_edges::Dict{EdgeIndex,SharedEdge}
-    shared_faces::Dict{FaceIndex,SharedFace}
-end
+abstract type SharedEntity end
 
 @inline get_shared_vertices(dgrid::AbstractDistributedGrid) = dgrid.shared_vertices
 @inline get_shared_edges(dgrid::AbstractDistributedGrid) = dgrid.shared_edges
@@ -80,274 +32,6 @@ returned by @global_comm .
 """
 @inline vertex_comm(dgrid::AbstractDistributedGrid) = dgrid.interface_comm
 
-"""
-"""
-function DistributedGrid(grid_to_distribute::Grid{dim,C,T}; grid_comm::MPI.Comm = MPI.COMM_WORLD, partition_alg = :RECURSIVE) where {dim,C,T}
-    grid_topology = ExclusiveTopology(grid_to_distribute)
-    return DistributedGrid(grid_to_distribute, grid_topology, grid_comm; partition_alg=partition_alg)
-end
-
-function create_partitioning(grid::Grid{dim,C,T}, grid_topology::ExclusiveTopology, n_partitions, partition_alg) where {dim,C,T}
-    n_cells_global = getncells(grid)
-    @assert n_cells_global > 0
-    
-    if n_partitions == 1
-        return ones(Metis.idx_t, n_cells_global)
-    end
-
-    # Set up the element connectivity graph
-    xadj = Vector{Metis.idx_t}(undef, n_cells_global+1)
-    xadj[1] = 1
-    adjncy = Vector{Metis.idx_t}(undef, 0)
-    @inbounds for i in 1:n_cells_global
-        n_neighbors = 0
-        for neighbor ∈ getneighborhood(grid_topology, grid, CellIndex(i))
-            push!(adjncy, neighbor)
-            n_neighbors += 1
-        end
-        xadj[i+1] = xadj[i] + n_neighbors
-    end
-
-    # Generate a partitioning
-    return Metis.partition(
-        Metis.Graph(
-            Metis.idx_t(n_cells_global),
-            xadj,
-            adjncy
-        ),
-        n_partitions;
-        alg=partition_alg
-    )
-end
-
-"""
-"""
-function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
-    n_cells_global = getncells(grid_to_distribute)
-    @assert n_cells_global > 0
-
-    parts = create_partitioning(grid_to_distribute, grid_topology, MPI.Comm_size(grid_comm), partition_alg)
-
-    DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts)
-end
-
-"""
-"""    
-function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts::Vector{Int32}) where {dim,C,T}
-    n_cells_global = getncells(grid_to_distribute)
-    @assert n_cells_global > 0 # Empty input mesh...
-
-    my_rank = MPI.Comm_rank(grid_comm)+1
-
-    # Start extraction of local grid
-    # 1. Extract local cells
-    local_cells = getcells(grid_to_distribute)[[i for i ∈ 1:n_cells_global if parts[i] == my_rank]]
-    @assert length(local_cells) > 0 # Cannot handle empty partitions yet
-
-    # 2. Find unique nodes
-    local_node_index_set = Set{Int}()
-    for cell ∈ local_cells
-        for global_node_idx ∈ cell.nodes # @TODO abstraction
-            push!(local_node_index_set, global_node_idx)
-        end
-    end
-
-    # 3. Build a map for global to local node indices
-    next_local_node_idx = 1
-    global_to_local_node_map = Dict{Int,Int}()
-    for global_node_idx ∈ local_node_index_set
-        global_to_local_node_map[global_node_idx] = next_local_node_idx
-        next_local_node_idx += 1
-    end
-
-    # 4. Extract local nodes
-    local_nodes = Vector{Node{dim,T}}(undef,length(local_node_index_set))
-    begin
-        global_nodes = getnodes(grid_to_distribute)
-        for global_node_idx ∈ local_node_index_set
-            local_node_idx = global_to_local_node_map[global_node_idx]
-            local_nodes[local_node_idx] = global_nodes[global_node_idx]
-        end
-    end
-
-    # 5. Transform cell indices
-    for local_cell_idx ∈ 1:length(local_cells)
-        local_cells[local_cell_idx] = C(map(global_node_idx -> global_to_local_node_map[global_node_idx], local_cells[local_cell_idx].nodes))
-    end
-
-    # 6. Extract sets
-    # @TODO deduplicate the code. We should be able to merge each of these into a macro or function.
-    # We build this map now, so we avoid the communication later.
-    global_to_local_cell_map = Dict{Int,Dict{Int,Int}}()
-    for rank ∈ 1:MPI.Comm_size(grid_comm)
-        global_to_local_cell_map[rank] = Dict{Int,Int}()
-        next_local_cell_idx = 1
-        for global_cell_idx ∈ 1:n_cells_global
-            if parts[global_cell_idx] == rank
-                global_to_local_cell_map[rank][global_cell_idx] = next_local_cell_idx
-                next_local_cell_idx += 1
-            end
-        end
-    end
-
-    cellsets = Dict{String,Set{Int}}()
-    for key ∈ keys(grid_to_distribute.cellsets)
-        cellsets[key] = Set{Int}() # create empty set, so it does not crash during assembly
-        for global_cell_idx ∈ grid_to_distribute.cellsets[key]
-            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
-                push!(cellsets[key], global_to_local_cell_map[my_rank][global_cell_idx])
-            end
-        end
-    end
-
-    nodesets = Dict{String,Set{Int}}()
-    for key ∈ keys(grid_to_distribute.nodesets)
-        nodesets[key] = Set{Int}() # create empty set, so it does not crash during assembly
-        for global_node_idx ∈ grid_to_distribute.nodesets[key]
-            if haskey(global_to_local_node_map, global_node_idx)
-                push!(nodesets[key], global_to_local_node_map[global_node_idx])
-            end
-        end
-    end
-
-    facesets = Dict{String,Set{FaceIndex}}()
-    for key ∈ keys(grid_to_distribute.facesets)
-        facesets[key] = Set{FaceIndex}() # create empty set, so it does not crash during assembly
-        for (global_cell_idx, i) ∈ grid_to_distribute.facesets[key]
-            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
-                push!(facesets[key], FaceIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
-            end
-        end
-    end
-
-    edgesets = Dict{String,Set{EdgeIndex}}()
-    for key ∈ keys(grid_to_distribute.edgesets)
-        edgesets[key] = Set{EdgeIndex}() # create empty set, so it does not crash during assembly
-        for (global_cell_idx, i) ∈ grid_to_distribute.edgesets[key]
-            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
-                push!(edgesets[key], EdgeIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
-            end
-        end
-    end
-
-    vertexsets = Dict{String,Set{VertexIndex}}()
-    for key ∈ keys(grid_to_distribute.vertexsets)
-        vertexsets[key] = Set{VertexIndex}() # create empty set, so it does not crash during assembly
-        for (global_cell_idx, i) ∈ grid_to_distribute.vertexsets[key]
-            if haskey(global_to_local_cell_map[my_rank], global_cell_idx)
-                push!(vertexsets[key], VertexIndex(global_to_local_cell_map[my_rank][global_cell_idx], i))
-            end
-        end
-    end
-
-    local_grid = Grid(
-        local_cells,
-        local_nodes,
-        cellsets=cellsets,
-        nodesets=nodesets,
-        facesets=facesets,
-        edgesets=edgesets,
-        vertexsets=vertexsets
-    )
-
-    shared_vertices = Dict{VertexIndex,SharedVertex}()
-    shared_edges = Dict{EdgeIndex,SharedEdge}()
-    shared_faces = Dict{FaceIndex,SharedFace}()
-    for (global_cell_idx,global_cell) ∈ enumerate(getcells(grid_to_distribute))
-        if parts[global_cell_idx] == my_rank
-            # Vertex
-            for (i, _) ∈ enumerate(vertices(global_cell))
-                cell_vertex = VertexIndex(global_cell_idx, i)
-                remote_vertices = Dict{Int,Vector{VertexIndex}}()
-                for other_vertex ∈ getneighborhood(grid_topology, grid_to_distribute, cell_vertex, true)
-                    (global_cell_neighbor_idx, j) = other_vertex
-                    other_rank = parts[global_cell_neighbor_idx]
-                    if other_rank != my_rank
-                        if toglobal(grid_to_distribute,cell_vertex) == toglobal(grid_to_distribute,other_vertex)
-                            if !haskey(remote_vertices,other_rank)
-                                remote_vertices[other_rank] = Vector(undef,0)
-                            end
-                            @debug println("Detected shared vertex $cell_vertex neighbor $other_vertex (R$my_rank)")
-                            push!(remote_vertices[other_rank], VertexIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
-                        end
-                    end
-                end
-
-                if length(remote_vertices) > 0
-                    idx = VertexIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
-                    shared_vertices[idx] = SharedVertex(idx, remote_vertices)
-                end
-            end
-
-            # Face
-            if dim > 1
-                for (i, _) ∈ enumerate(faces(global_cell))
-                    cell_face = FaceIndex(global_cell_idx, i)
-                    remote_faces = Dict{Int,Vector{FaceIndex}}()
-                    for other_face ∈ getneighborhood(grid_topology, grid_to_distribute, cell_face, true)
-                        (global_cell_neighbor_idx, j) = other_face
-                        other_rank = parts[global_cell_neighbor_idx]
-                        if other_rank != my_rank
-                            if toglobal(grid_to_distribute,cell_face) == toglobal(grid_to_distribute,other_face)
-                                if !haskey(remote_faces,other_rank)
-                                    remote_faces[other_rank] = Vector(undef,0)
-                                end
-                                @debug println("Detected shared face $cell_face neighbor $other_face (R$my_rank)")
-                                push!(remote_faces[other_rank], FaceIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
-                            end
-                        end
-                    end
-
-                    if length(remote_faces) > 0
-                        idx = FaceIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
-                        shared_faces[idx] = SharedFace(idx, remote_faces)
-                    end
-                end
-            end
-
-            # Edge
-            if dim > 2
-                for (i, _) ∈ enumerate(edges(global_cell))
-                    cell_edge = EdgeIndex(global_cell_idx, i)
-                    remote_edges = Dict{Int,Vector{EdgeIndex}}()
-                    for other_edge ∈ getneighborhood(grid_topology, grid_to_distribute, cell_edge, true)
-                        (global_cell_neighbor_idx, j) = other_edge
-                        other_rank = parts[global_cell_neighbor_idx]
-                        if other_rank != my_rank
-                            if toglobal(grid_to_distribute,cell_edge) == toglobal(grid_to_distribute,other_edge)
-                                if !haskey(remote_edges,other_edge)
-                                    remote_edges[other_rank] = Vector(undef,0)
-                                end
-                                @debug println("Detected shared edge $cell_edge neighbor $other_edge (R$my_rank)")
-                                push!(remote_edges[other_rank], EdgeIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
-                            end
-                        end
-                    end
-
-                    if length(remote_edges) > 0
-                        idx = EdgeIndex(global_to_local_cell_map[my_rank][global_cell_idx], i)
-                        shared_edges[idx] = SharedEdge(idx, remote_edges)
-                    end
-                end
-            end
-        end
-    end
-
-    # Neighborhood graph
-    neighbors_set = Set{Cint}()
-    for (vi, sv) ∈ shared_vertices
-        for (rank, vvi) ∈ sv.remote_vertices
-            push!(neighbors_set, rank)
-        end
-    end
-    # Adjust ranks back to to C index convention
-    dest = collect(neighbors_set).-1
-    degree = length(dest)
-    interface_comm = MPI.Dist_graph_create(grid_comm, Cint[my_rank-1], Cint[degree], Cint.(dest))
-
-    return DistributedGrid(grid_comm,interface_comm,local_grid,shared_vertices,shared_edges,shared_faces)
-end
-
 @inline getlocalgrid(dgrid::AbstractDistributedGrid) = dgrid.local_grid
 
 @inline getnodes(dgrid::AbstractDistributedGrid) = getnodes(getlocalgrid(dgrid))
@@ -364,12 +48,6 @@ end
 @inline getcelltype(dgrid::AbstractDistributedGrid) = eltype(getcells(getlocalgrid(dgrid)))
 @inline getcelltype(dgrid::AbstractDistributedGrid, i::Int) = typeof(getcells(getlocalgrid(dgrid),i))
 
-# Here we define the entity ownership by the process sharing an entity with lowest rank in the grid communicator.
-function compute_owner(dgrid::AbstractDistributedGrid, shared_entity::SharedEntity)::Int32
-    my_rank = MPI.Comm_rank(global_comm(dgrid))+1 # Shift rank up by 1 to match Julia's indexing convention
-    return minimum([my_rank; [remote_rank for (remote_rank, _) ∈ remote_entities(shared_entity)]])
-end
-
 @inline getcellset(grid::AbstractDistributedGrid, setname::String) = getcellset(getlocalgrid(grid), setname)
 @inline getcellsets(grid::AbstractDistributedGrid) = getcellsets(getlocalgrid(grid))
 
diff --git a/src/Grid/grid_generators.jl b/src/Grid/grid_generators.jl
index aef0b1a4ab..7b2b0e424a 100644
--- a/src/Grid/grid_generators.jl
+++ b/src/Grid/grid_generators.jl
@@ -454,11 +454,3 @@ function generate_grid(::Type{Tetrahedron}, cells_per_dim::NTuple{3,Int}, left::
 
     return Grid(cells, nodes, facesets=facesets, boundary_matrix=boundary_matrix)
 end
-
-"""
-Helper to generate distributed grids.
-It is designed to replace the call to [`generate_grid`](@ref) for use in distributed environments.
-"""
-function generate_distributed_grid(args...)
-    return DistributedGrid(generate_grid(args...))
-end
diff --git a/src/assembler.jl b/src/assembler.jl
index 750d24e2fe..1491c671b2 100644
--- a/src/assembler.jl
+++ b/src/assembler.jl
@@ -265,343 +265,3 @@ function InsertionSort!(A, order, ii=1, jj=length(A))
     end  # i
     return
 end
-
-using PartitionedArrays
-
-"""
-Simplest partitioned assembler in COO format to obtain a PSparseMatrix and a PVector.
-"""
-struct PartitionedArraysCOOAssembler{T}
-    I::Vector{Int}
-    J::Vector{Int}
-    V::Vector{T}
-
-    cols
-    rows
-    f::PVector
-
-    👻remotes
-    dh
-
-    # TODO PartitionedArrays backend as additional input arg
-    PartitionedArraysCOOAssembler(dh::DistributedDofHandler) = PartitionedArraysCOOAssembler{Float64}(dh)
-
-    # TODO PartitionedArrays backend as additional input arg
-    function PartitionedArraysCOOAssembler{T}(dh::DistributedDofHandler) where {T}
-        ldof_to_gdof = dh.ldof_to_gdof
-        ldof_to_rank = dh.ldof_to_rank
-        nldofs = num_local_dofs(dh)
-        ngdofs = num_global_dofs(dh)
-        dgrid = getglobalgrid(dh)
-        dim = getdim(dgrid)
-
-        I = Int[]
-        J = Int[]
-        V = T[]
-        sizehint!(I, nldofs)
-        sizehint!(J, nldofs)
-        sizehint!(V, nldofs)
-
-        # @TODO the code below can be massively simplified by introducing a ghost layer to the
-        #       distributed grid, which can efficiently precompute some of the values below.
-        comm = global_comm(dgrid)
-        np = MPI.Comm_size(comm)
-        my_rank = MPI.Comm_rank(comm)+1
-
-        @debug println("starting assembly... (R$my_rank)")
-
-        # Neighborhood graph
-        # @TODO cleanup old code below and use graph primitives instead.
-        (source_len, destination_len, _) = MPI.Dist_graph_neighbors_count(vertex_comm(dgrid))
-        sources = Vector{Cint}(undef, source_len)
-        destinations = Vector{Cint}(undef, destination_len)
-        MPI.Dist_graph_neighbors!(vertex_comm(dgrid), sources, destinations)
-
-        # Adjust to Julia index convention
-        sources .+= 1
-        destinations .+= 1
-
-        @debug println("Neighborhood | $sources | $destinations (R$my_rank)")
-
-        # Invert the relations to clarify the code
-        source_index = Dict{Cint, Int}()
-        for (i,remote_rank) ∈ enumerate(sources)
-            source_index[remote_rank] = i
-        end
-        destination_index = Dict{Int, Cint}()
-        for (i,remote_rank) ∈ enumerate(destinations)
-            destination_index[remote_rank] = i
-        end
-
-        # Note: We assume a symmetric neighborhood for now... this may not be true in general.
-        neighbors = MPIData(Int32.(sources), comm, (np,))
-
-        # Extract locally owned dofs
-        ltdof_indices = ldof_to_rank.==my_rank
-        ltdof_to_gdof = ldof_to_gdof[ltdof_indices]
-
-        @debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
-        @debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
-        @debug println("ldof_to_rank $ldof_to_rank (R$my_rank)")
-
-        # Process owns rows of owned dofs. The process also may write to some remote dofs,
-        # which correspond to non-owned share entities. Here we construct the rows for the
-        # distributed matrix.
-        # We decide for row (i.e. test function) ownership, because it the image of
-        # SpMV is process local.
-        row_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_rank))
-        #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
-        #row_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], ldof_to_gdof[.!ltdof_indices], Int32.(ldof_to_rank[.!ltdof_indices]))
-        row_data = MPIData(row_indices, comm, (np,))
-        row_exchanger = Exchanger(row_data)
-        rows = PRange(ngdofs,row_data,row_exchanger)
-
-        @debug println("rows done (R$my_rank)")
-
-        # For the locally visible columns we also have to take into account that remote
-        # processes will write their data in some of these, because their remotely
-        # owned trial functions overlap with the locally owned test functions.
-        ghost_dof_to_global = Int[]
-        ghost_dof_rank = Int32[]
-
-        # ------------ Ghost dof synchronization ----------
-        # Prepare sending ghost dofs to neighbors 👻
-        #@TODO move relevant parts into dof handler
-        #@TODO communication can be optimized by deduplicating entries in, and compressing the following arrays
-        #@TODO reorder communication by field to eliminate need for `ghost_dof_field_index_to_send`
-        ghost_dof_to_send = [Int[] for i ∈ 1:destination_len] # global dof id
-        ghost_rank_to_send = [Int[] for i ∈ 1:destination_len] # rank of dof
-        ghost_dof_field_index_to_send = [Int[] for i ∈ 1:destination_len]
-        ghost_dof_owner = [Int[] for i ∈ 1:destination_len] # corresponding owner
-        ghost_dof_pivot_to_send = [Int[] for i ∈ 1:destination_len] # corresponding dof to interact with
-        for (pivot_vertex, pivot_shared_vertex) ∈ dgrid.shared_vertices
-            # Start by searching shared entities which are not owned
-            pivot_vertex_owner_rank = compute_owner(dgrid, pivot_shared_vertex)
-            pivot_cell_idx = pivot_vertex[1]
-            pivot_vertex_global = toglobal(getlocalgrid(dgrid), pivot_vertex)
-
-            if my_rank != pivot_vertex_owner_rank
-                sender_slot = destination_index[pivot_vertex_owner_rank]
-
-                @debug println("$pivot_vertex may require synchronization (R$my_rank)")
-                # Note: We have to send ALL dofs on the element to the remote.
-                cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
-                cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
-
-                for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                    !has_vertex_dofs(dh, field_idx, pivot_vertex_global) && continue
-                    pivot_vertex_dof = vertex_dofs(dh, field_idx, pivot_vertex_global)
-                    
-                    for d ∈ 1:dh.field_dims[field_idx]
-                        @debug println("  adding dof $(pivot_vertex_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
-
-                        # Extract dofs belonging to the current field
-                        #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
-                        #for cell_field_dof ∈ cell_field_dofs
-                        for cell_dof ∈ cell_dofs
-                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof+d-1])
-                            append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
-                            append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
-                            append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
-                        end
-                    end
-                end
-            end
-        end
-
-        if dim > 1
-            for (pivot_face, pivot_shared_face) ∈ dgrid.shared_faces
-                # Start by searching shared entities which are not owned
-                pivot_face_owner_rank = compute_owner(dgrid, pivot_shared_face)
-                pivot_cell_idx = pivot_face[1]
-
-                if my_rank != pivot_face_owner_rank
-                    sender_slot = destination_index[pivot_face_owner_rank]
-
-                    @debug println("$pivot_face may require synchronization (R$my_rank)")
-                    # Note: We have to send ALL dofs on the element to the remote.
-                    cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
-                    cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
-
-                    pivot_face_global = toglobal(getlocalgrid(dgrid), pivot_face)
-
-                    for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                        !has_face_dofs(dh, field_idx, pivot_face_global) && continue
-                        pivot_face_dof = face_dofs(dh, field_idx, pivot_face_global)
-                        
-                        for d ∈ 1:dh.field_dims[field_idx]
-                            @debug println("  adding dof $(pivot_face_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
-                            
-                            # Extract dofs belonging to the current field
-                            #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
-                            #for cell_field_dof ∈ cell_field_dofs
-                            for cell_dof ∈ cell_dofs
-                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_face_dof+d-1])
-                                append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
-                                append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
-                                append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
-                            end
-                        end
-                    end
-                end
-            end
-        end
-
-        if dim > 2
-            for (pivot_edge, pivot_shared_edge) ∈ dgrid.shared_edges
-                # Start by searching shared entities which are not owned
-                pivot_edge_owner_rank = compute_owner(dgrid, pivot_shared_edge)
-                pivot_cell_idx = pivot_edge[1]
-
-                if my_rank != pivot_edge_owner_rank
-                    sender_slot = destination_index[pivot_edge_owner_rank]
-
-                    @debug println("$pivot_edge may require synchronization (R$my_rank)")
-                    # Note: We have to send ALL dofs on the element to the remote.
-                    cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
-                    cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
-
-                    pivot_edge_global = toglobal(getlocalgrid(dgrid), pivot_edge)
-
-                    for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                        !has_edge_dofs(dh, field_idx, pivot_edge_global) && continue
-                        pivot_edge_dof = edge_dofs(dh, field_idx, pivot_edge_global)
-
-                        for d ∈ 1:dh.field_dims[field_idx]
-                            @debug println("  adding dof $(pivot_edge_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
-                            # Extract dofs belonging to the current field
-                            #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
-                            #for cell_field_dof ∈ cell_field_dofs
-                            for cell_dof ∈ cell_dofs
-                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dof+d-1])
-                                append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
-                                append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
-                                # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
-                            end
-                        end
-                    end
-                end
-            end
-        end
-
-        ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_dof_to_send]
-        ghost_recv_buffer_lengths = zeros(Int, destination_len)
-        MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), vertex_comm(dgrid));
-        @debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
-            println("receiving $ghost_recv_buffer_length ghosts from $(sources[i])  (R$my_rank)")
-        end
-
-        # Communicate ghost information 👻
-        # @TODO coalesce communication
-        ghost_send_buffer_dofs = vcat(ghost_dof_to_send...)
-        ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-        ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
-        ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-        ghost_send_buffer_ranks = vcat(ghost_rank_to_send...)
-        ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-        ghost_send_buffer_dofs_piv = vcat(ghost_dof_pivot_to_send...)
-        ghost_recv_buffer_dofs_piv = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs_piv,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs_piv,ghost_recv_buffer_lengths), vertex_comm(dgrid))
-
-        # Reconstruct source ranks
-        ghost_recv_buffer_source_ranks = Int[]
-        for (source_idx, recv_len) ∈ enumerate(ghost_recv_buffer_lengths)
-            append!(ghost_recv_buffer_source_ranks, ones(recv_len)*sources[source_idx])
-        end
-
-        @debug println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
-
-        unique_ghosts_dr = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
-        # unzip manually and make sure we do not add duplicate entries to our columns
-        for (dof,rank) ∈ unique_ghosts_dr
-            if rank != my_rank && dof ∉ ldof_to_gdof
-                push!(ghost_dof_to_global, dof)
-                push!(ghost_dof_rank, rank)
-            end
-        end
-
-        # ------------- Construct rows and cols of distributed matrix --------
-        all_local_cols = Int[ldof_to_gdof; ghost_dof_to_global]
-        all_local_col_ranks = Int32[ldof_to_rank; ghost_dof_rank]
-        @debug println("all_local_cols $all_local_cols (R$my_rank)")
-        @debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
-
-        col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, all_local_col_ranks)
-        #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
-        #col_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], all_local_cols[all_local_col_ranks .!= my_rank], Int32.(all_local_col_ranks[all_local_col_ranks .!= my_rank]))
-        col_data = MPIData(col_indices, comm, (np,))
-        col_exchanger = Exchanger(col_data)
-        cols = PRange(ngdofs,col_data,col_exchanger)
-
-        @debug println("cols and rows constructed (R$my_rank)")
-        f = PartitionedArrays.PVector(0.0,rows)
-        @debug println("f constructed (R$my_rank)")
-
-        👻remotes = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks,ghost_recv_buffer_fields)
-        @debug println("👻remotes $👻remotes (R$my_rank)")
-
-        return new(I, J, V, cols, rows, f, 👻remotes, dh)
-    end
-end
-
-@propagate_inbounds function assemble!(a::PartitionedArraysCOOAssembler{T}, edof::AbstractVector{Int}, Ke::AbstractMatrix{T}) where {T}
-    n_dofs = length(edof)
-    append!(a.V, Ke)
-    @inbounds for j in 1:n_dofs
-        append!(a.I, edof)
-        for i in 1:n_dofs
-            push!(a.J, edof[j])
-        end
-    end
-end
-
-@propagate_inbounds function assemble!(a::PartitionedArraysCOOAssembler{T}, dofs::AbstractVector{Int}, fe::AbstractVector{T}, Ke::AbstractMatrix{T}) where {T}
-    Ferrite.assemble!(a, dofs, Ke)
-    map_parts(local_view(a.f, a.f.rows)) do f_local
-        Ferrite.assemble!(f_local, dofs, fe)
-    end
-end
-
-function end_assemble(assembler::PartitionedArraysCOOAssembler{T}) where {T}
-    comm = global_comm(getglobalgrid(assembler.dh))
-    np = MPI.Comm_size(comm)
-    my_rank = MPI.Comm_rank(comm)+1
-
-    # --------------------- Add ghost entries in IJ 👻 --------------------
-    I = map(i->assembler.dh.ldof_to_gdof[i], assembler.I)
-    J = map(j->assembler.dh.ldof_to_gdof[j], assembler.J)
-    V = map(v->v, assembler.V)
-
-    # Fix ghost layer 👻! Note that the locations for remote processes to write their
-    # data into are missing up to this point.
-    # TODO here still the interaction between fields is missing...
-    for (i, (pivot_dof, global_ghost_dof, ghost_owner_rank, ghost_field_idx)) ∈ enumerate(assembler.👻remotes)
-        for dᵢ ∈ 1:1#assembler.dh.field_dims[ghost_field_idx]
-            for dⱼ ∈ 1:1#assembler.dh.field_dims[ghost_field_idx]
-                push!(I, pivot_dof+dᵢ-1)
-                push!(J, global_ghost_dof+dⱼ-1)
-                push!(V, 0.0)
-            end
-        end
-    end
-
-    @debug println("I=$(I) (R$my_rank)")
-    @debug println("J=$(J) (R$my_rank)")
-    K = PartitionedArrays.PSparseMatrix(
-        MPIData(I, comm, (np,)),
-        MPIData(J, comm, (np,)),
-        MPIData(V, comm, (np,)),
-        assembler.rows, assembler.cols, ids=:global
-    )
-
-    PartitionedArrays.assemble!(K)
-    PartitionedArrays.assemble!(assembler.f)
-
-    return K, assembler.f
-end
diff --git a/src/exports.jl b/src/exports.jl
index edf0e7c201..fbb2c8ea3f 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -94,7 +94,6 @@ export
     addcellset!,
     transform!,
     generate_grid,
-    generate_distributed_grid,
     compute_vertex_values,
     is_shared_vertex,
     get_shared_vertices,
@@ -108,7 +107,6 @@ export
 
 # Dofs
     DofHandler,
-    DistributedDofHandler,
     close!,
     ndofs,
     num_local_true_dofs,
@@ -156,7 +154,6 @@ export
     start_assemble,
     assemble!,
     end_assemble,
-    PartitionedArraysCOOAssembler,
 
 # VTK export
     vtk_grid,
@@ -165,10 +162,10 @@ export
     vtk_nodeset,
     vtk_cellset,
     vtk_save,
-    vtk_shared_vertices,
-    vtk_shared_faces,
-    vtk_shared_edges,
-    vtk_partitioning,
+    # vtk_shared_vertices,
+    # vtk_shared_faces,
+    # vtk_shared_edges,
+    # vtk_partitioning,
 
 # L2 Projection
     project,

From afd3f13f693039705a4c72f4698f1b893366b212 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 16:04:10 +0100
Subject: [PATCH 081/124] Forgot two files.

---
 ext/FerritePartitionedArrays.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index 6037578659..be5babd8af 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -9,6 +9,8 @@ using MPI
 using PartitionedArrays
 
 include("FerritePartitionedArrays/assembler.jl")
+include("FerritePartitionedArrays/constraints.jl")
+include("FerritePartitionedArrays/DistributedDofHandler.jl")
 include("FerritePartitionedArrays/grid.jl")
 include("FerritePartitionedArrays/vtk-export.jl")
 

From 959eceaadc29a05f81195c51ad9291f058953388 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 16:28:24 +0100
Subject: [PATCH 082/124] Couple some functions to the Ferrite namespace.

---
 .../DistributedDofHandler.jl                     | 16 ++++++++--------
 ext/FerritePartitionedArrays/assembler.jl        |  6 +++---
 ext/FerritePartitionedArrays/constraints.jl      |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 6fbedc04f0..2e031f40d4 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -54,10 +54,10 @@ getlocalgrid(dh::DistributedDofHandler) = getlocalgrid(dh.grid)
 getglobalgrid(dh::DistributedDofHandler) = dh.grid
 
 # Compat layer against serial code
-getgrid(dh::DistributedDofHandler) = getlocalgrid(dh)
+Ferrite.getgrid(dh::DistributedDofHandler) = getlocalgrid(dh)
 
 # TODO this is copy pasta from DofHandler.jl
-function celldofs!(global_dofs::Vector{Int}, dh::DistributedDofHandler, i::Int)
+function Ferrite.celldofs!(global_dofs::Vector{Int}, dh::DistributedDofHandler, i::Int)
     @assert isclosed(dh)
     @assert length(global_dofs) == ndofs_per_cell(dh, i)
     unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], length(global_dofs))
@@ -65,10 +65,10 @@ function celldofs!(global_dofs::Vector{Int}, dh::DistributedDofHandler, i::Int)
 end
 
 # TODO this is copy pasta from DofHandler.jl
-cellcoords!(global_coords::Vector{<:Vec}, dh::DistributedDofHandler, i::Int) = cellcoords!(global_coords, getgrid(dh), i)
+Ferrite.cellcoords!(global_coords::Vector{<:Vec}, dh::DistributedDofHandler, i::Int) = cellcoords!(global_coords, getgrid(dh), i)
 
 # TODO this is copy pasta from DofHandler.jl
-function celldofs(dh::DistributedDofHandler, i::Int)
+function Ferrite.celldofs(dh::DistributedDofHandler, i::Int)
     @assert isclosed(dh)
     n = ndofs_per_cell(dh, i)
     global_dofs = zeros(Int, n)
@@ -76,7 +76,7 @@ function celldofs(dh::DistributedDofHandler, i::Int)
     return global_dofs
 end
 
-renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = error("Not implemented.")
+Ferrite.renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = error("Not implemented.")
 
 function compute_dof_ownership(dh)
     dgrid = getglobalgrid(dh)
@@ -611,7 +611,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     return local_to_global
 end
 
-function close!(dh::DistributedDofHandler)
+function Ferrite.close!(dh::DistributedDofHandler)
     __close!(dh)
     append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
     append!(dh.ldof_to_rank, compute_dof_ownership(dh))
@@ -620,7 +620,7 @@ end
 
 # TODO this is copy pasta from DofHandler.jl
 # close the DofHandler and distribute all the dofs
-function __close!(dh::DistributedDofHandler{dim}) where {dim}
+function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
     @assert !isclosed(dh)
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
@@ -779,7 +779,7 @@ function __close!(dh::DistributedDofHandler{dim}) where {dim}
 end
 
 # TODO this is copy pasta from DofHandler.jl
-function reshape_to_nodes(dh::DistributedDofHandler, u::Vector{T}, fieldname::Symbol) where T
+function Ferrite.reshape_to_nodes(dh::DistributedDofHandler, u::Vector{T}, fieldname::Symbol) where T
     # make sure the field exists
     fieldname ∈ getfieldnames(dh) || error("Field $fieldname not found.")
 
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index 6ee1cc2ea7..a2ffd3348b 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -281,7 +281,7 @@ struct COOAssembler{T}
     end
 end
 
-@propagate_inbounds function assemble!(a::COOAssembler{T}, edof::AbstractVector{Int}, Ke::AbstractMatrix{T}) where {T}
+@propagate_inbounds function Ferrite.assemble!(a::COOAssembler{T}, edof::AbstractVector{Int}, Ke::AbstractMatrix{T}) where {T}
     n_dofs = length(edof)
     append!(a.V, Ke)
     @inbounds for j in 1:n_dofs
@@ -292,14 +292,14 @@ end
     end
 end
 
-@propagate_inbounds function assemble!(a::COOAssembler{T}, dofs::AbstractVector{Int}, fe::AbstractVector{T}, Ke::AbstractMatrix{T}) where {T}
+@propagate_inbounds function Ferrite.assemble!(a::COOAssembler{T}, dofs::AbstractVector{Int}, fe::AbstractVector{T}, Ke::AbstractMatrix{T}) where {T}
     Ferrite.assemble!(a, dofs, Ke)
     map_parts(local_view(a.f, a.f.rows)) do f_local
         Ferrite.assemble!(f_local, dofs, fe)
     end
 end
 
-function end_assemble(assembler::COOAssembler{T}) where {T}
+function Ferrite.end_assemble(assembler::COOAssembler{T}) where {T}
     comm = global_comm(getglobalgrid(assembler.dh))
     np = MPI.Comm_size(comm)
     my_rank = MPI.Comm_rank(comm)+1
diff --git a/ext/FerritePartitionedArrays/constraints.jl b/ext/FerritePartitionedArrays/constraints.jl
index 9ca27f3395..feadb398f4 100644
--- a/ext/FerritePartitionedArrays/constraints.jl
+++ b/ext/FerritePartitionedArrays/constraints.jl
@@ -1,4 +1,4 @@
-function meandiag(K::PartitionedArrays.PSparseMatrix)
+function Ferrite.meandiag(K::PartitionedArrays.PSparseMatrix)
     # Get local portion of z
     z_pa = map_parts(local_view(K, K.rows, K.cols)) do K_local
         z = zero(eltype(K_local))
@@ -16,7 +16,7 @@ Poor man's Dirichlet BC application for PartitionedArrays. :)
 
     TODO integrate with constraints.
 """
-function apply_zero!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+function Ferrite.apply_zero!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
     map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
         f_local[ch.prescribed_dofs] .= 0.0
     end
@@ -36,7 +36,7 @@ Poor man's Dirichlet BC application for PartitionedArrays. :)
     TODO integrate with constraints.
     TODO optimize.
 """
-function apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
+function Ferrite.apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays.PVector, ch::ConstraintHandler)
     # Start by substracting the inhomogeneous solution from the right hand side
     u_constrained = PartitionedArrays.PVector(0.0, K.cols)
     map_parts(local_view(u_constrained, u_constrained.rows)) do u_local

From d517ec22802eb1a3c524e12b8d12e6928dcba814 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 16:28:41 +0100
Subject: [PATCH 083/124] Forgot some exports.

---
 ext/FerritePartitionedArrays.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index be5babd8af..208d3e0204 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -17,8 +17,11 @@ include("FerritePartitionedArrays/vtk-export.jl")
 export 
     # assembler
     COOAssembler,
+    # dofhandler
+    DistributedDofHandler,
     # grid
     DistributedGrid,
+    generate_distributed_grid,
     # vtk-export
     vtk_shared_vertices,
     vtk_shared_faces,

From 3c4e8c32af3dc08938c21719be4e55ce82d0c7e0 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 16:43:07 +0100
Subject: [PATCH 084/124] Move iterator overload into the extension.

---
 ext/FerritePartitionedArrays.jl           |  1 +
 ext/FerritePartitionedArrays/iterators.jl | 13 +++++++++++++
 src/iterators.jl                          |  4 +---
 3 files changed, 15 insertions(+), 3 deletions(-)
 create mode 100644 ext/FerritePartitionedArrays/iterators.jl

diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index 208d3e0204..b8ae69b501 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -12,6 +12,7 @@ include("FerritePartitionedArrays/assembler.jl")
 include("FerritePartitionedArrays/constraints.jl")
 include("FerritePartitionedArrays/DistributedDofHandler.jl")
 include("FerritePartitionedArrays/grid.jl")
+include("FerritePartitionedArrays/iterators.jl")
 include("FerritePartitionedArrays/vtk-export.jl")
 
 export 
diff --git a/ext/FerritePartitionedArrays/iterators.jl b/ext/FerritePartitionedArrays/iterators.jl
new file mode 100644
index 0000000000..6aced4a20d
--- /dev/null
+++ b/ext/FerritePartitionedArrays/iterators.jl
@@ -0,0 +1,13 @@
+"""
+This is copy pasta for now.
+"""
+function Ferrite.CellIterator(dh::DistributedDofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags())  where {dim,C,T}
+    isconcretetype(C) || _check_same_celltype(getgrid(dh), cellset)
+    N = nnodes_per_cell(getgrid(dh), cellset === nothing ? 1 : first(cellset))
+    cell = ScalarWrapper(0)
+    nodes = zeros(Int, N)
+    coords = zeros(Vec{dim,T}, N)
+    n = ndofs_per_cell(dh, cellset === nothing ? 1 : first(cellset))
+    celldofs = zeros(Int, n)
+    return Ferrite.CellIterator{dim,C,T,typeof(dh)}(flags, getgrid(dh), cell, nodes, coords, cellset, dh, celldofs)
+end
diff --git a/src/iterators.jl b/src/iterators.jl
index c049018ffb..211b07693c 100644
--- a/src/iterators.jl
+++ b/src/iterators.jl
@@ -41,7 +41,7 @@ struct CellIterator{dim,C,T,DH<:Union{AbstractDofHandler,Nothing}}
     dh::Union{DH,Nothing}
     celldofs::Vector{Int}
 
-    function CellIterator{dim,C,T}(dh::Union{DofHandler{dim,T,G},DistributedDofHandler{dim,T,G},MixedDofHandler{dim,T,G},Nothing}, cellset::Union{AbstractVector{Int},Nothing}, flags::UpdateFlags) where {dim,C,T,G}
+    function CellIterator{dim,C,T}(dh::Union{DofHandler{dim,T,G},MixedDofHandler{dim,T,G},Nothing}, cellset::Union{AbstractVector{Int},Nothing}, flags::UpdateFlags) where {dim,C,T,G}
         isconcretetype(C) || _check_same_celltype(getgrid(dh), cellset)
         N = nnodes_per_cell(getgrid(dh), cellset === nothing ? 1 : first(cellset))
         cell = ScalarWrapper(0)
@@ -66,8 +66,6 @@ CellIterator(grid::Grid{dim,C,T}, cellset::Union{AbstractVector{Int},Nothing}=no
     CellIterator{dim,C,T}(grid, cellset, flags)
 CellIterator(dh::DofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,T} =
     CellIterator{dim,getcelltype(dh.grid),T}(dh, cellset, flags)
-CellIterator(dh::DistributedDofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,C,T} =
-    CellIterator{dim,getcelltype(getlocalgrid(dh)),T}(dh, cellset, flags)
 CellIterator(dh::MixedDofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,T} =
     CellIterator{dim,getcelltype(dh.grid),T}(dh, cellset, flags)
 

From 51f9aefd5b1d4b066ab5abd42b734e9daf873ad4 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 16:49:39 +0100
Subject: [PATCH 085/124] Actually add extension.

---
 Project.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Project.toml b/Project.toml
index 9a51fbd627..bc6cffa427 100644
--- a/Project.toml
+++ b/Project.toml
@@ -16,6 +16,9 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 
+[extensions]
+FerritePartitionedArrays = "PartitionedArrays"
+
 [compat]
 EnumX = "1"
 MPI = "^0.20.2"

From 1d5c8169868a5b700e08fc16922eeeaf6ab9dfa3 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 17:14:12 +0100
Subject: [PATCH 086/124] Fix typo.

---
 ext/FerritePartitionedArrays.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index b8ae69b501..822a3eacb6 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -27,5 +27,7 @@ export
     vtk_shared_vertices,
     vtk_shared_faces,
     vtk_shared_edges,
-    vtk_partitioning,
+    vtk_partitioning
+    
+
 end # module FerritePartitionedArrays

From ace4c157a628aa5284c2527feca9cce6baab5b96 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 17:21:51 +0100
Subject: [PATCH 087/124] Fix extension dependencies.

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index bc6cffa427..edaae69973 100644
--- a/Project.toml
+++ b/Project.toml
@@ -17,7 +17,7 @@ MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 
 [extensions]
-FerritePartitionedArrays = "PartitionedArrays"
+FerritePartitionedArrays = ["Metis", "MPI", "PartitionedArrays"]
 
 [compat]
 EnumX = "1"

From 8048c1d5f5c95fb711464a2da6af582df9a9938d Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 17:35:25 +0100
Subject: [PATCH 088/124] Fix compile errors in extension.

---
 Project.toml                                          |  2 +-
 ext/FerritePartitionedArrays.jl                       |  5 +++--
 ext/FerritePartitionedArrays/DistributedDofHandler.jl | 10 +++++-----
 ext/FerritePartitionedArrays/grid.jl                  | 10 +++++-----
 ext/FerritePartitionedArrays/vtk-export.jl            |  2 +-
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/Project.toml b/Project.toml
index edaae69973..f8a8c32e64 100644
--- a/Project.toml
+++ b/Project.toml
@@ -17,7 +17,7 @@ MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 
 [extensions]
-FerritePartitionedArrays = ["Metis", "MPI", "PartitionedArrays"]
+FerritePartitionedArrays = ["MPI", "PartitionedArrays"]
 
 [compat]
 EnumX = "1"
diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index 822a3eacb6..89c3f0b551 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -7,11 +7,12 @@ using Ferrite
 using Metis
 using MPI
 using PartitionedArrays
+using Base: @propagate_inbounds
 
+include("FerritePartitionedArrays/grid.jl")
+include("FerritePartitionedArrays/DistributedDofHandler.jl")
 include("FerritePartitionedArrays/assembler.jl")
 include("FerritePartitionedArrays/constraints.jl")
-include("FerritePartitionedArrays/DistributedDofHandler.jl")
-include("FerritePartitionedArrays/grid.jl")
 include("FerritePartitionedArrays/iterators.jl")
 include("FerritePartitionedArrays/vtk-export.jl")
 
diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 2e031f40d4..9272bc71fb 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -9,18 +9,18 @@ Supports:
 - `Grid`s with a single concrete cell type.
 - One or several fields on the whole domaine.
 """
-struct DistributedDofHandler{dim,T,G<:AbstractDistributedGrid{dim}} <: AbstractDofHandler
+struct DistributedDofHandler{dim,T,G<:Ferrite.AbstractDistributedGrid{dim}} <: Ferrite.AbstractDofHandler
     field_names::Vector{Symbol}
     field_dims::Vector{Int}
     # TODO: field_interpolations can probably be better typed: We should at least require
     #       all the interpolations to have the same dimension and reference shape
     field_interpolations::Vector{Interpolation}
-    bc_values::Vector{BCValues{T}} # TODO: BcValues is created/handeld by the constrainthandler, so this can be removed
+    bc_values::Vector{Ferrite.BCValues{T}} # TODO: BcValues is created/handeld by the constrainthandler, so this can be removed
     cell_dofs::Vector{Int}
     cell_dofs_offset::Vector{Int}
-    closed::ScalarWrapper{Bool}
+    closed::Ferrite.ScalarWrapper{Bool}
     grid::G
-    ndofs::ScalarWrapper{Int}
+    ndofs::Ferrite.ScalarWrapper{Int}
 
     vertexdicts::Vector{Dict{Int,Int}}
     edgedicts::Vector{Dict{Tuple{Int,Int},Tuple{Int,Bool}}}
@@ -31,7 +31,7 @@ struct DistributedDofHandler{dim,T,G<:AbstractDistributedGrid{dim}} <: AbstractD
     ldof_to_rank::Vector{Int32}
 end
 
-function DistributedDofHandler(grid::AbstractDistributedGrid{dim}) where {dim}
+function DistributedDofHandler(grid::Ferrite.AbstractDistributedGrid{dim}) where {dim}
     isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. DistributedMixedDofHandler not implemented yet.")
     DistributedDofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[], Dict{Int,Vector{Int}}[], Int[], Int32[])
 end
diff --git a/ext/FerritePartitionedArrays/grid.jl b/ext/FerritePartitionedArrays/grid.jl
index e7ffafd6e6..a7154bc6d9 100644
--- a/ext/FerritePartitionedArrays/grid.jl
+++ b/ext/FerritePartitionedArrays/grid.jl
@@ -2,7 +2,7 @@
 # TODO the following three structs can be merged to one struct with type parameter.
 """
 """
-struct SharedVertex <: SharedEntity
+struct SharedVertex <: Ferrite.SharedEntity
     local_idx::VertexIndex
     remote_vertices::Dict{Int,Vector{VertexIndex}}
 end
@@ -11,7 +11,7 @@ end
 
 """
 """
-struct SharedFace <: SharedEntity
+struct SharedFace <: Ferrite.SharedEntity
     local_idx::FaceIndex
     remote_faces::Dict{Int,Vector{FaceIndex}}
 end
@@ -20,7 +20,7 @@ end
 
 """
 """
-struct SharedEdge <: SharedEntity
+struct SharedEdge <: Ferrite.SharedEntity
     local_idx::EdgeIndex
     remote_edges::Dict{Int,Vector{EdgeIndex}}
 end
@@ -31,7 +31,7 @@ end
 @TODO docs
 @TODO PArrays ready constructor
 """
-mutable struct DistributedGrid{dim,C<:AbstractCell,T<:Real} <: AbstractDistributedGrid{dim}
+mutable struct DistributedGrid{dim,C<:Ferrite.AbstractCell,T<:Real} <: Ferrite.AbstractDistributedGrid{dim}
     # Dense comminicator on the grid
     grid_comm::MPI.Comm
     # Sparse communicator along the shared vertex neighbors
@@ -317,7 +317,7 @@ end
 
 
 # Here we define the entity ownership by the process sharing an entity with lowest rank in the grid communicator.
-function compute_owner(dgrid::AbstractDistributedGrid, shared_entity::SharedEntity)::Int32
+function compute_owner(dgrid::Ferrite.AbstractDistributedGrid, shared_entity::Ferrite.SharedEntity)::Int32
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1 # Shift rank up by 1 to match Julia's indexing convention
     return minimum([my_rank; [remote_rank for (remote_rank, _) ∈ remote_entities(shared_entity)]])
 end
diff --git a/ext/FerritePartitionedArrays/vtk-export.jl b/ext/FerritePartitionedArrays/vtk-export.jl
index 3db26fac55..3257ff312b 100644
--- a/ext/FerritePartitionedArrays/vtk-export.jl
+++ b/ext/FerritePartitionedArrays/vtk-export.jl
@@ -14,7 +14,7 @@ end
 
 """
 """
-function WriteVTK.vtk_point_data(vtk, dh::AbstractDofHandler, u::PVector)
+function WriteVTK.vtk_point_data(vtk, dh::Ferrite.AbstractDofHandler, u::PVector)
     map_parts(local_view(u, u.rows)) do u_local
         vtk_point_data(pvtkwrapper(vtk), dh, u_local)
     end

From 78d7c724556a5964c00dde31aa6f0ce711c855d6 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 18:39:24 +0100
Subject: [PATCH 089/124] Add Pkg to ext for debugging.

---
 Project.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index f8a8c32e64..d7de90cd1b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,9 +15,10 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 [weakdeps]
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 
 [extensions]
-FerritePartitionedArrays = ["MPI", "PartitionedArrays"]
+FerritePartitionedArrays = ["Pkg", "MPI", "PartitionedArrays"]
 
 [compat]
 EnumX = "1"

From 09e671209642066978455bc391799604a663d11d Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 19:18:52 +0100
Subject: [PATCH 090/124] Remove Pkg dep again.

---
 Project.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index d7de90cd1b..f8a8c32e64 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,10 +15,9 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 [weakdeps]
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 
 [extensions]
-FerritePartitionedArrays = ["Pkg", "MPI", "PartitionedArrays"]
+FerritePartitionedArrays = ["MPI", "PartitionedArrays"]
 
 [compat]
 EnumX = "1"

From c4abb01e7d8fa37e730ddc54fbf29cb8318c3daa Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 19:49:47 +0100
Subject: [PATCH 091/124] Remove debug code.

---
 ext/FerritePartitionedArrays.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index 89c3f0b551..4e0fdc21ec 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -28,7 +28,6 @@ export
     vtk_shared_vertices,
     vtk_shared_faces,
     vtk_shared_edges,
-    vtk_partitioning
+    vtk_partitioning    
     
-
 end # module FerritePartitionedArrays

From 4331fbf39a64e11a136fabec27637cd0a8ff00aa Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 22:36:07 +0100
Subject: [PATCH 092/124] Fix namespace errors and add some functionality from
 master.

---
 docs/src/literate/distributed_assembly.jl     |  24 ++-
 docs/src/reference/grid.md                    |   1 +
 .../DistributedDofHandler.jl                  | 194 +++++++++---------
 ext/FerritePartitionedArrays/assembler.jl     |  20 +-
 ext/FerritePartitionedArrays/constraints.jl   |   2 +-
 ext/FerritePartitionedArrays/grid.jl          |  22 +-
 ext/FerritePartitionedArrays/iterators.jl     |  13 --
 ext/FerritePartitionedArrays/vtk-export.jl    |  18 +-
 src/Grid/DistributedGrid.jl                   |  11 -
 src/Grid/grid.jl                              |   3 +
 src/exports.jl                                |  10 +-
 src/iterators.jl                              |  10 +-
 12 files changed, 164 insertions(+), 164 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 7a8a09adc5..d5338ee95c 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -18,15 +18,17 @@ using Ferrite, MPI
 using IterativeSolvers #, HYPRE
 using PartitionedArrays #src
 
+FerritePartitionedArrays = Base.get_extension(Ferrite, :FerritePartitionedArrays)
+
 # Launch MPI
 MPI.Init()
 
 # We start generating a simple grid with 20x20 quadrilateral elements
 # and distribute it across our processors using `generate_distributed_grid`. 
-# dgrid = generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
-# dgrid = generate_distributed_grid(Tetrahedron, (2, 2, 2));
-dgrid = generate_distributed_grid(Hexahedron, (2, 2, 2)); #src
-# dgrid = generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
+# dgrid = FerritePartitionedArrays.generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
+# dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (2, 2, 2));
+dgrid = FerritePartitionedArrays.generate_distributed_grid(Hexahedron, (2, 1, 1)); #src
+# dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
 
 # ### Trial and test functions
 # Nothing changes here.
@@ -44,7 +46,7 @@ cellvalues = CellScalarValues(qr, ip, ip_geo);
 # ### Degrees of freedom
 # To handle the dofs correctly we now utilize the `DistributedDofHandle` 
 # instead of the `DofHandler`. For the user the interface is the same.
-dh = DistributedDofHandler(dgrid)
+dh = FerritePartitionedArrays.DistributedDofHandler(dgrid)
 push!(dh, :u, 1, ip)
 close!(dh);
 
@@ -65,7 +67,7 @@ println("R$my_rank: prescribing $(ch.prescribed_dofs) on $∂Ω")
 
 # ### Assembling the linear system
 # Assembling the system works also mostly analogue.
-function doassemble(cellvalues::CellScalarValues{dim}, dh::DistributedDofHandler) where {dim}
+function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArrays.DistributedDofHandler) where {dim}
     n_basefuncs = getnbasefunctions(cellvalues)
     Ke = zeros(n_basefuncs, n_basefuncs)
     fe = zeros(n_basefuncs)
@@ -77,7 +79,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::DistributedDofHandler
     # may trigger a large amount of communication.
     # NOTE: At the time of writing the only backend available is a COO 
     #       assembly via PartitionedArrays.jl .
-    assembler = PartitionedArraysCOOAssembler{Float64}(dh)
+    assembler = FerritePartitionedArrays.COOAssembler{Float64}(dh)
 
     # For the local assembly nothing changes
     for cell in CellIterator(dh)
@@ -150,10 +152,10 @@ vtk_grid("heat_equation_distributed", dh) do vtk
     # For debugging purposes it can be helpful to enrich 
     # the visualization with some meta  information about 
     # the grid and its partitioning
-    vtk_shared_vertices(vtk, dgrid)
-    vtk_shared_faces(vtk, dgrid)
-    vtk_shared_edges(vtk, dgrid) #src
-    vtk_partitioning(vtk, dgrid)
+    FerritePartitionedArrays.vtk_shared_vertices(vtk, dgrid)
+    FerritePartitionedArrays.vtk_shared_faces(vtk, dgrid)
+    FerritePartitionedArrays.vtk_shared_edges(vtk, dgrid) #src
+    FerritePartitionedArrays.vtk_partitioning(vtk, dgrid)
 end
 
 map_parts(local_view(u, u.rows)) do u_local
diff --git a/docs/src/reference/grid.md b/docs/src/reference/grid.md
index 4e834d9770..2f3ae62b83 100644
--- a/docs/src/reference/grid.md
+++ b/docs/src/reference/grid.md
@@ -24,6 +24,7 @@ getcells
 getncells
 getnodes
 getnnodes
+getdim
 Ferrite.nnodes_per_cell
 getcellset
 getcellsets
diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 9272bc71fb..6cf47746b9 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -33,7 +33,7 @@ end
 
 function DistributedDofHandler(grid::Ferrite.AbstractDistributedGrid{dim}) where {dim}
     isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. DistributedMixedDofHandler not implemented yet.")
-    DistributedDofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[], Dict{Int,Vector{Int}}[], Int[], Int32[])
+    DistributedDofHandler(Symbol[], Int[], Interpolation[], Ferrite.BCValues{Float64}[], Int[], Int[], Ferrite.ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[], Dict{Int,Vector{Int}}[], Int[], Int32[])
 end
 
 function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
@@ -42,7 +42,7 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
     for i in 1:num_fields(dh)
         println(io, "    ", repr(dh.field_names[i]), ", interpolation: ", dh.field_interpolations[i],", dim: ", dh.field_dims[i])
     end
-    if !isclosed(dh)
+    if !Ferrite.isclosed(dh)
         print(io, "  Not closed!")
     else
         println(io, "  Dofs per cell: ", ndofs_per_cell(dh))
@@ -50,7 +50,7 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
     end
 end
 
-getlocalgrid(dh::DistributedDofHandler) = getlocalgrid(dh.grid)
+Ferrite.getlocalgrid(dh::DistributedDofHandler) = Ferrite.getlocalgrid(dh.grid)
 getglobalgrid(dh::DistributedDofHandler) = dh.grid
 
 # Compat layer against serial code
@@ -58,7 +58,7 @@ Ferrite.getgrid(dh::DistributedDofHandler) = getlocalgrid(dh)
 
 # TODO this is copy pasta from DofHandler.jl
 function Ferrite.celldofs!(global_dofs::Vector{Int}, dh::DistributedDofHandler, i::Int)
-    @assert isclosed(dh)
+    @assert Ferrite.isclosed(dh)
     @assert length(global_dofs) == ndofs_per_cell(dh, i)
     unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], length(global_dofs))
     return global_dofs
@@ -69,7 +69,7 @@ Ferrite.cellcoords!(global_coords::Vector{<:Vec}, dh::DistributedDofHandler, i::
 
 # TODO this is copy pasta from DofHandler.jl
 function Ferrite.celldofs(dh::DistributedDofHandler, i::Int)
-    @assert isclosed(dh)
+    @assert Ferrite.isclosed(dh)
     n = ndofs_per_cell(dh, i)
     global_dofs = zeros(Int, n)
     unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], n)
@@ -87,9 +87,9 @@ function compute_dof_ownership(dh)
 
     for (lvi, sv) ∈ get_shared_vertices(dgrid)
         for field_idx in 1:num_fields(dh)
-            vi = toglobal(dgrid, lvi)
-            if has_vertex_dofs(dh, field_idx, vi)
-                local_dof_idx = vertex_dofs(dh, field_idx, vi)
+            vi = Ferrite.toglobal(dgrid, lvi)
+            if Ferrite.has_vertex_dofs(dh, field_idx, vi)
+                local_dof_idx = Ferrite.vertex_dofs(dh, field_idx, vi)
                 for d in 1:dh.field_dims[field_idx]
                     dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, sv)
                 end
@@ -99,9 +99,9 @@ function compute_dof_ownership(dh)
 
     for (lfi, sf) ∈ get_shared_faces(dgrid)
         for field_idx in 1:num_fields(dh)
-            fi = toglobal(dgrid, lfi)
-            if has_face_dofs(dh, field_idx, fi)
-                local_dof_idx = face_dofs(dh, field_idx, fi)
+            fi = Ferrite.toglobal(dgrid, lfi)
+            if Ferrite.has_face_dofs(dh, field_idx, fi)
+                local_dof_idx = Ferrite.face_dofs(dh, field_idx, fi)
                 for d in 1:dh.field_dims[field_idx]
                     dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, sf)
                 end
@@ -111,9 +111,9 @@ function compute_dof_ownership(dh)
 
     for (lei, se) ∈ get_shared_edges(dgrid)
         for field_idx in 1:num_fields(dh)
-            ei = toglobal(dgrid, lei)
-            if has_edge_dofs(dh, field_idx, ei)
-                local_dof_idx = edge_dofs(dh, field_idx, ei)
+            ei = Ferrite.toglobal(dgrid, lei)
+            if Ferrite.has_edge_dofs(dh, field_idx, ei)
+                local_dof_idx = Ferrite.edge_dofs(dh, field_idx, ei)
                 for d in 1:dh.field_dims[field_idx]
                     dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, se)
                 end
@@ -175,13 +175,13 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     # TODO: implement for entitied with dim > 0
     next_local_idx = 1
     for (ci, cell) in enumerate(getcells(getgrid(dh)))
-        @debug println("cell #$ci (R$my_rank)")
+        Ferrite.@debug println("cell #$ci (R$my_rank)")
         for field_idx in 1:num_fields(dh)
-            @debug println("  field: $(dh.field_names[field_idx]) (R$my_rank)")
-            interpolation_info = InterpolationInfo(dh.field_interpolations[field_idx])
+            Ferrite.@debug println("  field: $(dh.field_names[field_idx]) (R$my_rank)")
+            interpolation_info = Ferrite.InterpolationInfo(dh.field_interpolations[field_idx])
             if interpolation_info.nvertexdofs > 0
-                for (vi,vertex) in enumerate(vertices(cell))
-                    @debug println("    vertex#$vertex (R$my_rank)")
+                for (vi,vertex) in enumerate(Ferrite.vertices(cell))
+                    Ferrite.@debug println("    vertex#$vertex (R$my_rank)")
                     lvi = VertexIndex(ci,vi)
                     # Dof is owned if it is local or if my rank is the smallest in the neighborhood
                     if !is_shared_vertex(dgrid, lvi) || (compute_owner(dgrid, get_shared_vertex(dgrid, lvi)) == my_rank)
@@ -189,13 +189,13 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         dof_local_idx = dh.vertexdicts[field_idx][vertex]
                         if local_to_global[dof_local_idx] == 0
                             for d in 1:dh.field_dims[field_idx]
-                                @debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
+                                Ferrite.@debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
                                 local_to_global[dof_local_idx+d-1] = next_local_idx
                                 next_local_idx += 1
                             end
                         else
                             for d in 1:dh.field_dims[field_idx]
-                                @debug println("      vertex dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                                Ferrite.@debug println("      vertex dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
                             end
                         end
                     end
@@ -212,7 +212,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                 if !haskey(vertices_send,remote_rank)
                                     vertices_send[remote_rank] = Vector{VertexIndex}()
                                 end
-                                @debug println("      prepare sending vertex #$(lvi) to $remote_rank (R$my_rank)")
+                                Ferrite.@debug println("      prepare sending vertex #$(lvi) to $remote_rank (R$my_rank)")
                                 for i ∈ svs
                                     push!(vertices_send[remote_rank],lvi)
                                 end
@@ -222,7 +222,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                 else
                                     n_vertices_recv[remote_rank] += length(svs)
                                 end
-                                @debug println("      prepare receiving vertex #$(lvi) from $remote_rank (R$my_rank)")
+                                Ferrite.@debug println("      prepare receiving vertex #$(lvi) from $remote_rank (R$my_rank)")
                             end
                         end
                     end
@@ -231,22 +231,22 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 
             if dim > 2 # edges only in 3D
                 if interpolation_info.nedgedofs > 0
-                    for (ei,edge) in enumerate(edges(cell))
-                        @debug println("    edge#$edge (R$my_rank)")
+                    for (ei,edge) in enumerate(Ferrite.edges(cell))
+                        Ferrite.@debug println("    edge#$edge (R$my_rank)")
                         lei = EdgeIndex(ci,ei)
                         # Dof is owned if it is local or if my rank is the smallest in the neighborhood
                         if !is_shared_edge(dgrid, lei) || (compute_owner(dgrid, get_shared_edge(dgrid, lei)) == my_rank)
                             # Update dof assignment
-                            dof_local_idx = dh.edgedicts[field_idx][toglobal(getlocalgrid(dgrid), lei)][1]
+                            dof_local_idx = dh.edgedicts[field_idx][Ferrite.toglobal(getlocalgrid(dgrid), lei)][1]
                             if local_to_global[dof_local_idx] == 0
                                 for d in 1:dh.field_dims[field_idx]
-                                    @debug println("      mapping edge dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
+                                    Ferrite.@debug println("      mapping edge dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
                                     local_to_global[dof_local_idx+d-1] = next_local_idx
                                     next_local_idx += 1
                                 end
                             else
                                 for d in 1:dh.field_dims[field_idx]
-                                    @debug println("      edge dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                                    Ferrite.@debug println("      edge dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
                                 end
                             end
                         end
@@ -263,7 +263,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                     if !haskey(edges_send,remote_rank)
                                         edges_send[remote_rank] = EdgeIndex[]
                                     end
-                                    @debug println("      prepare sending edge #$(lei) to $remote_rank (R$my_rank)")
+                                    Ferrite.@debug println("      prepare sending edge #$(lei) to $remote_rank (R$my_rank)")
                                     for i ∈ svs
                                         push!(edges_send[remote_rank], lei)
                                     end
@@ -272,7 +272,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                         edges_recv[remote_rank] = EdgeIndex[]
                                     end
                                     push!(edges_recv[remote_rank], lei)
-                                    @debug println("      prepare receiving edge #$(lei) from $remote_rank (R$my_rank)")
+                                    Ferrite.@debug println("      prepare receiving edge #$(lei) from $remote_rank (R$my_rank)")
                                 end
                             end
                         end
@@ -281,22 +281,22 @@ function local_to_global_numbering(dh::DistributedDofHandler)
             end
 
             if interpolation_info.nfacedofs > 0 && (interpolation_info.dim == dim)
-                for (fi,face) in enumerate(faces(cell))
-                    @debug println("    face#$face (R$my_rank)")
+                for (fi,face) in enumerate(Ferrite.faces(cell))
+                    Ferrite.@debug println("    face#$face (R$my_rank)")
                     lfi = FaceIndex(ci,fi)
                     # Dof is owned if it is local or if my rank is the smallest in the neighborhood
                     if !is_shared_face(dgrid, lfi) || (compute_owner(dgrid, get_shared_face(dgrid, lfi)) == my_rank)
                         # Update dof assignment
-                        dof_local_idx = dh.facedicts[field_idx][toglobal(getlocalgrid(dgrid), lfi)]
+                        dof_local_idx = dh.facedicts[field_idx][Ferrite.toglobal(getlocalgrid(dgrid), lfi)]
                         if local_to_global[dof_local_idx] == 0
                             for d in 1:dh.field_dims[field_idx]
-                                @debug println("      mapping face dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
+                                Ferrite.@debug println("      mapping face dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
                                 local_to_global[dof_local_idx+d-1] = next_local_idx
                                 next_local_idx += 1
                             end
                         else
                             for d in 1:dh.field_dims[field_idx]
-                                @debug println("      face dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                                Ferrite.@debug println("      face dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
                             end
                         end
                     end
@@ -313,7 +313,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                 if !haskey(faces_send,remote_rank)
                                     faces_send[remote_rank] = FaceIndex[]
                                 end
-                                @debug println("      prepare sending face #$(lfi) to $remote_rank (R$my_rank)")
+                                Ferrite.@debug println("      prepare sending face #$(lfi) to $remote_rank (R$my_rank)")
                                 for i ∈ svs
                                     push!(faces_send[remote_rank],lfi)
                                 end
@@ -323,7 +323,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                                 else
                                     n_faces_recv[remote_rank] += length(svs)
                                 end
-                                @debug println("      prepare receiving face #$(lfi) from $remote_rank (R$my_rank)")
+                                Ferrite.@debug println("      prepare receiving face #$(lfi) from $remote_rank (R$my_rank)")
                             end
                         end
                     end
@@ -331,20 +331,20 @@ function local_to_global_numbering(dh::DistributedDofHandler)
             end
 
             if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
-                @debug println("    cell#$ci")
+                Ferrite.@debug println("    cell#$ci")
                 for celldof in 1:interpolation_info.ncelldofs
                     # Update dof assignment
                     dof_local_idx = dh.celldicts[field_idx][ci][celldof]
-                    if local_to_global[dof_local_idx+d-1] == 0
+                    if local_to_global[dof_local_idx] == 0
                         for d in 1:dh.field_dims[field_idx]
-                            @debug println("      mapping cell dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
+                            Ferrite.@debug println("      mapping cell dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
                             local_to_global[dof_local_idx+d-1] = next_local_idx
                             next_local_idx += 1
                         end
                     else
                         for d in 1:dh.field_dims[field_idx]
                             # Should never happen...
-                            @debug println("      WARNING! cell dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                            Ferrite.@debug println("      WARNING! cell dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
                         end
                     end
                 end # cell loop
@@ -354,7 +354,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 
     #
     num_true_local_dofs = next_local_idx-1
-    @debug println("#true local dofs $num_true_local_dofs (R$my_rank)")
+    Ferrite.@debug println("#true local dofs $num_true_local_dofs (R$my_rank)")
 
     # @TODO optimize the following synchronization with MPI line graph topology 
     # and allgather
@@ -366,7 +366,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     if my_rank < MPI.Comm_size(global_comm(dgrid))
         MPI.Send(local_offset+num_true_local_dofs, global_comm(dgrid); dest=my_rank+1-1)
     end
-    @debug println("#shifted local dof range $(local_offset+1):$(local_offset+num_true_local_dofs) (R$my_rank)")
+    Ferrite.@debug println("#shifted local dof range $(local_offset+1):$(local_offset+num_true_local_dofs) (R$my_rank)")
 
     # Shift assigned local dofs (dofs with value >0) into the global range
     # At this point in the algorithm the dofs with value 0 are the dofs owned of neighboring processes
@@ -385,7 +385,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
             for remote_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
                 if haskey(vertices_send, remote_rank)
                     n_vertices = length(vertices_send[remote_rank])
-                    @debug println("Sending $n_vertices vertices to rank $remote_rank (R$my_rank)")
+                    Ferrite.@debug println("Sending $n_vertices vertices to rank $remote_rank (R$my_rank)")
                     remote_cells = Array{Int64}(undef,n_vertices)
                     remote_cell_vis = Array{Int64}(undef,n_vertices)
                     next_buffer_idx = 1
@@ -403,13 +403,13 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     for fi ∈ 1:num_fields(dh)
                         next_buffer_idx = 1
                         if length(dh.vertexdicts[fi]) == 0
-                            @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                            Ferrite.@debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
                             continue
                         end
                         # fill correspondence array
                         corresponding_global_dofs = Array{Int64}(undef,n_vertices)
                         for (lci,lclvi) ∈ vertices_send[remote_rank]
-                            vi = vertices(getcells(getgrid(dh),lci))[lclvi]
+                            vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
                             if haskey(dh.vertexdicts[fi], vi)
                                 corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.vertexdicts[fi][vi]]
                             end
@@ -421,7 +421,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 
                 if haskey(faces_send, remote_rank)
                     n_faces = length(faces_send[remote_rank])
-                    @debug println("Sending $n_faces faces to rank $remote_rank (R$my_rank)")
+                    Ferrite.@debug println("Sending $n_faces faces to rank $remote_rank (R$my_rank)")
                     remote_cells = Array{Int64}(undef,n_faces)
                     remote_cell_vis = Array{Int64}(undef,n_faces)
                     next_buffer_idx = 1
@@ -439,13 +439,13 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     for fi ∈ 1:num_fields(dh)
                         next_buffer_idx = 1
                         if length(dh.facedicts[fi]) == 0
-                            @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                            Ferrite.@debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
                             continue
                         end
                         # fill correspondence array
                         corresponding_global_dofs = Array{Int64}(undef,n_faces)
                         for (lci,lclvi) ∈ faces_send[remote_rank]
-                            vi = sortface(faces(getcells(getgrid(dh),lci))[lclvi])
+                            vi = Ferrite.sortface(Ferrite.faces(getcells(getgrid(dh),lci))[lclvi])
                             if haskey(dh.facedicts[fi], vi)
                                 corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.facedicts[fi][vi]]
                             end
@@ -460,14 +460,14 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     edges_send_unique_set = Set{Tuple{Int,Int}}()
                     edges_send_unique = Set{EdgeIndex}()
                     for lei ∈ edges_send[remote_rank]
-                        edge = toglobal(dgrid, lei)
+                        edge = Ferrite.toglobal(dgrid, lei)
                         if edge ∉ edges_send_unique_set
                             push!(edges_send_unique_set, edge)
                             push!(edges_send_unique, lei)
                         end
                     end
                     n_edges = length(edges_send_unique)
-                    @debug println("Sending $n_edges edges to rank $remote_rank (R$my_rank)")
+                    Ferrite.@debug println("Sending $n_edges edges to rank $remote_rank (R$my_rank)")
                     remote_cells = Array{Int64}(undef,n_edges)
                     remote_cell_vis = Array{Int64}(undef,n_edges)
                     next_buffer_idx = 1
@@ -485,13 +485,13 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     for fi ∈ 1:num_fields(dh)
                         next_buffer_idx = 1
                         if length(dh.edgedicts[fi]) == 0
-                            @debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                            Ferrite.@debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
                             continue
                         end
                         # fill correspondence array
                         corresponding_global_dofs = Array{Int64}(undef,n_edges)
                         for (lci,lclvi) ∈ edges_send_unique
-                            vi = sortedge(edges(getcells(getgrid(dh),lci))[lclvi])[1]
+                            vi = Ferrite.sortedge(Ferrite.edges(getcells(getgrid(dh),lci))[lclvi])[1]
                             if haskey(dh.edgedicts[fi], vi)
                                 corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.edgedicts[fi][vi][1]]
                             end
@@ -504,27 +504,27 @@ function local_to_global_numbering(dh::DistributedDofHandler)
         else
             if haskey(n_vertices_recv, sending_rank)
                 n_vertices = n_vertices_recv[sending_rank]
-                @debug println("Receiving $n_vertices vertices from rank $sending_rank (R$my_rank)")
+                Ferrite.@debug println("Receiving $n_vertices vertices from rank $sending_rank (R$my_rank)")
                 local_cells = Array{Int64}(undef,n_vertices)
                 local_cell_vis = Array{Int64}(undef,n_vertices)
                 MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
                 MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
                 for field_idx in 1:num_fields(dh)
                     if length(dh.vertexdicts[field_idx]) == 0
-                        @debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
+                        Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
                         continue
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_vertices)
                     MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
-                        vi = vertices(getcells(getgrid(dh),lci))[lclvi]
+                        vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
                         if haskey(dh.vertexdicts[field_idx], vi)
                             for d in 1:dh.field_dims[field_idx]
                                 local_to_global[dh.vertexdicts[field_idx][vi]+d-1] = corresponding_global_dofs[cdi]+d-1
-                                @debug println("  Updating field $(dh.field_names[field_idx]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]+d-1) (R$my_rank)")
+                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]+d-1) (R$my_rank)")
                             end
                         else
-                            @debug println("  Skipping recv on field $(dh.field_names[field_idx]) vertex $vi (R$my_rank)")
+                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) vertex $vi (R$my_rank)")
                         end
                     end
                 end
@@ -532,27 +532,27 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 
             if haskey(n_faces_recv, sending_rank)
                 n_faces = n_faces_recv[sending_rank]
-                @debug println("Receiving $n_faces faces from rank $sending_rank (R$my_rank)")
+                Ferrite.@debug println("Receiving $n_faces faces from rank $sending_rank (R$my_rank)")
                 local_cells = Array{Int64}(undef,n_faces)
                 local_cell_vis = Array{Int64}(undef,n_faces)
                 MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
                 MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
                 for field_idx in 1:num_fields(dh)
                     if length(dh.facedicts[field_idx]) == 0
-                        @debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
+                        Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
                         continue
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_faces)
                     MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
-                        vi = sortface(faces(getcells(getgrid(dh),lci))[lclvi])
+                        vi = Ferrite.sortface(Ferrite.faces(getcells(getgrid(dh),lci))[lclvi])
                         if haskey(dh.facedicts[field_idx], vi)
                             for d in 1:dh.field_dims[field_idx]
                                 local_to_global[dh.facedicts[field_idx][vi]+d-1] = corresponding_global_dofs[cdi]+d-1
-                                @debug println("  Updating field $(dh.field_names[field_idx]) face $(FaceIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) face $(FaceIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
                             end
                         else
-                            @debug println("  Skipping recv on field $(dh.field_names[field_idx]) face $vi (R$my_rank)")
+                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) face $vi (R$my_rank)")
                         end
                     end
                 end
@@ -561,32 +561,32 @@ function local_to_global_numbering(dh::DistributedDofHandler)
             if haskey(edges_recv, sending_rank)
                 edges_recv_unique_set = Set{Tuple{Int,Int}}()
                 for lei ∈ edges_recv[sending_rank]
-                    edge = toglobal(dgrid, lei)
+                    edge = Ferrite.toglobal(dgrid, lei)
                     push!(edges_recv_unique_set, edge)
                 end
                 n_edges = length(edges_recv_unique_set)
-                @debug println("Receiving $n_edges edges from rank $sending_rank (R$my_rank)")
+                Ferrite.@debug println("Receiving $n_edges edges from rank $sending_rank (R$my_rank)")
                 local_cells = Array{Int64}(undef,n_edges)
                 local_cell_vis = Array{Int64}(undef,n_edges)
                 MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
                 MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
                 for field_idx in 1:num_fields(dh)
                     if length(dh.edgedicts[field_idx]) == 0
-                        @debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
+                        Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
                         continue
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_edges)
                     MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
-                    @debug println("   Received $corresponding_global_dofs edge dofs from $sending_rank (R$my_rank)")
+                    Ferrite.@debug println("   Received $corresponding_global_dofs edge dofs from $sending_rank (R$my_rank)")
                     for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
-                        vi = sortedge(edges(getcells(getgrid(dh),lci))[lclvi])[1]
+                        vi = Ferrite.sortedge(Ferrite.edges(getcells(getgrid(dh),lci))[lclvi])[1]
                         if haskey(dh.edgedicts[field_idx], vi)
                             for d in 1:dh.field_dims[field_idx]
                                 local_to_global[dh.edgedicts[field_idx][vi][1]+d-1] = corresponding_global_dofs[cdi]+d-1
-                                @debug println("  Updating field $(dh.field_names[field_idx]) edge $(EdgeIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) edge $(EdgeIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
                             end
                         else
-                            @debug println("  Skipping recv on field $(dh.field_names[field_idx]) edge $vi (R$my_rank)")
+                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) edge $vi (R$my_rank)")
                         end
                     end
                 end
@@ -595,10 +595,10 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     end
 
     # Postcondition: All local dofs need a corresponding global dof!
-    @debug println("Local to global mapping: $local_to_global (R$my_rank)")
+    Ferrite.@debug println("Local to global mapping: $local_to_global (R$my_rank)")
     @assert findfirst(local_to_global .== 0) === nothing
 
-    # @debug vtk_grid("dofs", dgrid; compress=false) do vtk
+    # Ferrite.@debug vtk_grid("dofs", dgrid; compress=false) do vtk
     #     u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
     #     fill!(u, 0.0)
     #     for i=1:length(u)
@@ -612,7 +612,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 end
 
 function Ferrite.close!(dh::DistributedDofHandler)
-    __close!(dh)
+    Ferrite.__close!(dh)
     append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
     append!(dh.ldof_to_rank, compute_dof_ownership(dh))
     dh.ndofs.x = num_local_dofs(dh)
@@ -621,7 +621,7 @@ end
 # TODO this is copy pasta from DofHandler.jl
 # close the DofHandler and distribute all the dofs
 function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
-    @assert !isclosed(dh)
+    @assert !Ferrite.isclosed(dh)
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
     # number and the first dof we added to that vertex.
@@ -657,13 +657,13 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
     # celldofs are never shared between different cells so there is no need
     # for a `celldict` to keep track of which cells we have added dofs too.
 
-    # We create the `InterpolationInfo` structs with precomputed information for each
+    # We create the `Ferrite.InterpolationInfo` structs with precomputed information for each
     # interpolation since that allows having the cell loop as the outermost loop,
     # and the interpolation loop inside without using a function barrier
-    interpolation_infos = InterpolationInfo[]
+    interpolation_infos = Ferrite.InterpolationInfo[]
     for interpolation in dh.field_interpolations
-        # push!(dh.interpolation_info, InterpolationInfo(interpolation))
-        push!(interpolation_infos, InterpolationInfo(interpolation))
+        # push!(dh.interpolation_info, Ferrite.InterpolationInfo(interpolation))
+        push!(interpolation_infos, Ferrite.InterpolationInfo(interpolation))
     end
 
     # not implemented yet: more than one facedof per face in 3D
@@ -674,25 +674,25 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
 
     # loop over all the cells, and distribute dofs for all the fields
     for (ci, cell) in enumerate(getcells(getgrid(dh)))
-        @debug println("cell #$ci")
+        Ferrite.@debug println("cell #$ci")
         for fi in 1:num_fields(dh)
             interpolation_info = interpolation_infos[fi]
-            @debug println("  field: $(dh.field_names[fi])")
+            Ferrite.@debug println("  field: $(dh.field_names[fi])")
             if interpolation_info.nvertexdofs > 0
-                for vertex in vertices(cell)
-                    @debug println("    vertex#$vertex")
+                for vertex in Ferrite.vertices(cell)
+                    Ferrite.@debug println("    vertex#$vertex")
                     token = Base.ht_keyindex2!(dh.vertexdicts[fi], vertex)
                     if token > 0 # haskey(dh.vertexdicts[fi], vertex) # reuse dofs
                         reuse_dof = dh.vertexdicts[fi].vals[token] # dh.vertexdicts[fi][vertex]
                         for d in 1:dh.field_dims[fi]
-                            @debug println("      reusing dof #$(reuse_dof + (d-1))")
+                            Ferrite.@debug println("      reusing dof #$(reuse_dof + (d-1))")
                             push!(dh.cell_dofs, reuse_dof + (d-1))
                         end
                     else # token <= 0, distribute new dofs
                         for vertexdof in 1:interpolation_info.nvertexdofs
                             Base._setindex!(dh.vertexdicts[fi], nextdof, vertex, -token) # dh.vertexdicts[fi][vertex] = nextdof
                             for d in 1:dh.field_dims[fi]
-                                @debug println("      adding dof#$nextdof")
+                                Ferrite.@debug println("      adding dof#$nextdof")
                                 push!(dh.cell_dofs, nextdof)
                                 nextdof += 1
                             end
@@ -702,16 +702,16 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
             end
             if dim > 2 # edges only in 3D
                 if interpolation_info.nedgedofs > 0
-                    for edge in edges(cell)
-                        sedge, dir = sortedge(edge)
-                        @debug println("    edge#$sedge dir: $(dir)")
+                    for edge in Ferrite.edges(cell)
+                        sedge, dir = Ferrite.sortedge(edge)
+                        Ferrite.@debug println("    edge#$sedge dir: $(dir)")
                         token = Base.ht_keyindex2!(dh.edgedicts[fi], sedge)
                         if token > 0 # haskey(dh.edgedicts[fi], sedge), reuse dofs
                             startdof, olddir = dh.edgedicts[fi].vals[token] # dh.edgedicts[fi][sedge] # first dof for this edge (if dir == true)
                             for edgedof in (dir == olddir ? (1:interpolation_info.nedgedofs) : (interpolation_info.nedgedofs:-1:1))
                                 for d in 1:dh.field_dims[fi]
                                     reuse_dof = startdof + (d-1) + (edgedof-1)*dh.field_dims[fi]
-                                    @debug println("      reusing dof#$(reuse_dof)")
+                                    Ferrite.@debug println("      reusing dof#$(reuse_dof)")
                                     push!(dh.cell_dofs, reuse_dof)
                                 end
                             end
@@ -719,7 +719,7 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                             Base._setindex!(dh.edgedicts[fi], (nextdof, dir), sedge, -token) # dh.edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
                             for edgedof in 1:interpolation_info.nedgedofs
                                 for d in 1:dh.field_dims[fi]
-                                    @debug println("      adding dof#$nextdof")
+                                    Ferrite.@debug println("      adding dof#$nextdof")
                                     push!(dh.cell_dofs, nextdof)
                                     nextdof += 1
                                 end
@@ -729,16 +729,16 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                 end
             end
             if interpolation_info.nfacedofs > 0 && (interpolation_info.dim == dim)
-                for face in faces(cell)
-                    sface = sortface(face) # TODO: faces(cell) may as well just return the sorted list
-                    @debug println("    face#$sface")
+                for face in Ferrite.faces(cell)
+                    sface = Ferrite.sortface(face) # TODO: faces(cell) may as well just return the sorted list
+                    Ferrite.@debug println("    face#$sface")
                     token = Base.ht_keyindex2!(dh.facedicts[fi], sface)
                     if token > 0 # haskey(dh.facedicts[fi], sface), reuse dofs
                         startdof = dh.facedicts[fi].vals[token] # dh.facedicts[fi][sface]
                         for facedof in interpolation_info.nfacedofs:-1:1 # always reverse (YOLO)
                             for d in 1:dh.field_dims[fi]
                                 reuse_dof = startdof + (d-1) + (facedof-1)*dh.field_dims[fi]
-                                @debug println("      reusing dof#$(reuse_dof)")
+                                Ferrite.@debug println("      reusing dof#$(reuse_dof)")
                                 push!(dh.cell_dofs, reuse_dof)
                             end
                         end
@@ -746,7 +746,7 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                         Base._setindex!(dh.facedicts[fi], nextdof, sface, -token)# dh.facedicts[fi][sface] = nextdof,  store the first dof for this face
                         for facedof in 1:interpolation_info.nfacedofs
                             for d in 1:dh.field_dims[fi]
-                                @debug println("      adding dof#$nextdof")
+                                Ferrite.@debug println("      adding dof#$nextdof")
                                 push!(dh.cell_dofs, nextdof)
                                 nextdof += 1
                             end
@@ -755,10 +755,10 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                 end # face loop
             end
             if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
-                @debug println("    cell#$ci")
+                Ferrite.@debug println("    cell#$ci")
                 for celldof in 1:interpolation_info.ncelldofs
                     for d in 1:dh.field_dims[fi]
-                        @debug println("      adding dof#$nextdof")
+                        Ferrite.@debug println("      adding dof#$nextdof")
                         if !haskey(dh.celldicts[fi], ci)
                             dh.celldicts[fi][ci] = Vector{Int}(undef,0)
                         end
@@ -790,7 +790,7 @@ function Ferrite.reshape_to_nodes(dh::DistributedDofHandler, u::Vector{T}, field
     space_dim = field_dim == 2 ? 3 : field_dim
     data = fill(zero(T), space_dim, getnnodes(getgrid(dh)))
 
-    reshape_field_data!(data, dh, u, offset, field_dim)
+    Ferrite.reshape_field_data!(data, dh, u, offset, field_dim)
 
     return data
 end
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index a2ffd3348b..6a9ce8a93a 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -109,7 +109,7 @@ struct COOAssembler{T}
             # Start by searching shared entities which are not owned
             pivot_vertex_owner_rank = compute_owner(dgrid, pivot_shared_vertex)
             pivot_cell_idx = pivot_vertex[1]
-            pivot_vertex_global = toglobal(getlocalgrid(dgrid), pivot_vertex)
+            pivot_vertex_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vertex)
 
             if my_rank != pivot_vertex_owner_rank
                 sender_slot = destination_index[pivot_vertex_owner_rank]
@@ -120,8 +120,8 @@ struct COOAssembler{T}
                 cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
                 for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                    !has_vertex_dofs(dh, field_idx, pivot_vertex_global) && continue
-                    pivot_vertex_dof = vertex_dofs(dh, field_idx, pivot_vertex_global)
+                    !Ferrite.has_vertex_dofs(dh, field_idx, pivot_vertex_global) && continue
+                    pivot_vertex_dof = Ferrite.vertex_dofs(dh, field_idx, pivot_vertex_global)
                     
                     for d ∈ 1:dh.field_dims[field_idx]
                         @debug println("  adding dof $(pivot_vertex_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
@@ -154,11 +154,11 @@ struct COOAssembler{T}
                     cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                     cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
-                    pivot_face_global = toglobal(getlocalgrid(dgrid), pivot_face)
+                    pivot_face_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_face)
 
                     for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                        !has_face_dofs(dh, field_idx, pivot_face_global) && continue
-                        pivot_face_dof = face_dofs(dh, field_idx, pivot_face_global)
+                        !Ferrite.has_face_dofs(dh, field_idx, pivot_face_global) && continue
+                        pivot_face_dof = Ferrite.face_dofs(dh, field_idx, pivot_face_global)
                         
                         for d ∈ 1:dh.field_dims[field_idx]
                             @debug println("  adding dof $(pivot_face_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
@@ -192,11 +192,11 @@ struct COOAssembler{T}
                     cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                     cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
-                    pivot_edge_global = toglobal(getlocalgrid(dgrid), pivot_edge)
+                    pivot_edge_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_edge)
 
                     for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                        !has_edge_dofs(dh, field_idx, pivot_edge_global) && continue
-                        pivot_edge_dof = edge_dofs(dh, field_idx, pivot_edge_global)
+                        !Ferrite.has_edge_dofs(dh, field_idx, pivot_edge_global) && continue
+                        pivot_edge_dof = Ferrite.edge_dofs(dh, field_idx, pivot_edge_global)
 
                         for d ∈ 1:dh.field_dims[field_idx]
                             @debug println("  adding dof $(pivot_edge_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
@@ -207,7 +207,7 @@ struct COOAssembler{T}
                                 append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dof+d-1])
                                 append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
                                 append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
-                                # append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
+                                append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
                             end
                         end
                     end
diff --git a/ext/FerritePartitionedArrays/constraints.jl b/ext/FerritePartitionedArrays/constraints.jl
index feadb398f4..7b07239efc 100644
--- a/ext/FerritePartitionedArrays/constraints.jl
+++ b/ext/FerritePartitionedArrays/constraints.jl
@@ -44,7 +44,7 @@ function Ferrite.apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays
     end
     f .-= K*u_constrained
 
-    m = meandiag(K)
+    m = Ferrite.meandiag(K)
 
     # Then fix the 
     map_parts(local_view(f, f.rows), f.rows.partition) do f_local, partition
diff --git a/ext/FerritePartitionedArrays/grid.jl b/ext/FerritePartitionedArrays/grid.jl
index a7154bc6d9..92eb208c94 100644
--- a/ext/FerritePartitionedArrays/grid.jl
+++ b/ext/FerritePartitionedArrays/grid.jl
@@ -46,6 +46,16 @@ mutable struct DistributedGrid{dim,C<:Ferrite.AbstractCell,T<:Real} <: Ferrite.A
     shared_faces::Dict{FaceIndex,SharedFace}
 end
 
+"""
+Global dense communicator of the distributed grid.
+"""
+@inline global_comm(dgrid::DistributedGrid) = dgrid.grid_comm
+
+"""
+Graph communicator for shared vertices. Guaranteed to be derived from the communicator 
+returned by @global_comm .
+"""
+@inline vertex_comm(dgrid::DistributedGrid) = dgrid.interface_comm
 
 """
 """
@@ -223,14 +233,14 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
     for (global_cell_idx,global_cell) ∈ enumerate(getcells(grid_to_distribute))
         if parts[global_cell_idx] == my_rank
             # Vertex
-            for (i, _) ∈ enumerate(vertices(global_cell))
+            for (i, _) ∈ enumerate(Ferrite.vertices(global_cell))
                 cell_vertex = VertexIndex(global_cell_idx, i)
                 remote_vertices = Dict{Int,Vector{VertexIndex}}()
                 for other_vertex ∈ getneighborhood(grid_topology, grid_to_distribute, cell_vertex, true)
                     (global_cell_neighbor_idx, j) = other_vertex
                     other_rank = parts[global_cell_neighbor_idx]
                     if other_rank != my_rank
-                        if toglobal(grid_to_distribute,cell_vertex) == toglobal(grid_to_distribute,other_vertex)
+                        if Ferrite.toglobal(grid_to_distribute,cell_vertex) == Ferrite.toglobal(grid_to_distribute,other_vertex)
                             if !haskey(remote_vertices,other_rank)
                                 remote_vertices[other_rank] = Vector(undef,0)
                             end
@@ -248,14 +258,14 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
 
             # Face
             if dim > 1
-                for (i, _) ∈ enumerate(faces(global_cell))
+                for (i, _) ∈ enumerate(Ferrite.faces(global_cell))
                     cell_face = FaceIndex(global_cell_idx, i)
                     remote_faces = Dict{Int,Vector{FaceIndex}}()
                     for other_face ∈ getneighborhood(grid_topology, grid_to_distribute, cell_face, true)
                         (global_cell_neighbor_idx, j) = other_face
                         other_rank = parts[global_cell_neighbor_idx]
                         if other_rank != my_rank
-                            if toglobal(grid_to_distribute,cell_face) == toglobal(grid_to_distribute,other_face)
+                            if Ferrite.toglobal(grid_to_distribute,cell_face) == Ferrite.toglobal(grid_to_distribute,other_face)
                                 if !haskey(remote_faces,other_rank)
                                     remote_faces[other_rank] = Vector(undef,0)
                                 end
@@ -274,14 +284,14 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
 
             # Edge
             if dim > 2
-                for (i, _) ∈ enumerate(edges(global_cell))
+                for (i, _) ∈ enumerate(Ferrite.edges(global_cell))
                     cell_edge = EdgeIndex(global_cell_idx, i)
                     remote_edges = Dict{Int,Vector{EdgeIndex}}()
                     for other_edge ∈ getneighborhood(grid_topology, grid_to_distribute, cell_edge, true)
                         (global_cell_neighbor_idx, j) = other_edge
                         other_rank = parts[global_cell_neighbor_idx]
                         if other_rank != my_rank
-                            if toglobal(grid_to_distribute,cell_edge) == toglobal(grid_to_distribute,other_edge)
+                            if Ferrite.toglobal(grid_to_distribute,cell_edge) == Ferrite.toglobal(grid_to_distribute,other_edge)
                                 if !haskey(remote_edges,other_edge)
                                     remote_edges[other_rank] = Vector(undef,0)
                                 end
diff --git a/ext/FerritePartitionedArrays/iterators.jl b/ext/FerritePartitionedArrays/iterators.jl
index 6aced4a20d..e69de29bb2 100644
--- a/ext/FerritePartitionedArrays/iterators.jl
+++ b/ext/FerritePartitionedArrays/iterators.jl
@@ -1,13 +0,0 @@
-"""
-This is copy pasta for now.
-"""
-function Ferrite.CellIterator(dh::DistributedDofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags())  where {dim,C,T}
-    isconcretetype(C) || _check_same_celltype(getgrid(dh), cellset)
-    N = nnodes_per_cell(getgrid(dh), cellset === nothing ? 1 : first(cellset))
-    cell = ScalarWrapper(0)
-    nodes = zeros(Int, N)
-    coords = zeros(Vec{dim,T}, N)
-    n = ndofs_per_cell(dh, cellset === nothing ? 1 : first(cellset))
-    celldofs = zeros(Int, n)
-    return Ferrite.CellIterator{dim,C,T,typeof(dh)}(flags, getgrid(dh), cell, nodes, coords, cellset, dh, celldofs)
-end
diff --git a/ext/FerritePartitionedArrays/vtk-export.jl b/ext/FerritePartitionedArrays/vtk-export.jl
index 3257ff312b..e6cd873178 100644
--- a/ext/FerritePartitionedArrays/vtk-export.jl
+++ b/ext/FerritePartitionedArrays/vtk-export.jl
@@ -6,7 +6,7 @@ function WriteVTK.vtk_grid(filename::AbstractString, dgrid::DistributedGrid{dim,
     cls = MeshCell[]
     for cell in getcells(dgrid)
         celltype = Ferrite.cell_to_vtkcell(typeof(cell))
-        push!(cls, MeshCell(celltype, nodes_to_vtkorder(cell)))
+        push!(cls, MeshCell(celltype, Ferrite.nodes_to_vtkorder(cell)))
     end
     coords = reshape(reinterpret(T, getnodes(dgrid)), (dim, getnnodes(dgrid)))
     return pvtk_grid(filename, coords, cls; part=part, nparts=nparts, compress=compress)
@@ -16,7 +16,7 @@ end
 """
 function WriteVTK.vtk_point_data(vtk, dh::Ferrite.AbstractDofHandler, u::PVector)
     map_parts(local_view(u, u.rows)) do u_local
-        vtk_point_data(pvtkwrapper(vtk), dh, u_local)
+        vtk_point_data(Ferrite.pvtkwrapper(vtk), dh, u_local)
     end
 end
 
@@ -32,10 +32,10 @@ function vtk_shared_vertices(vtk, dgrid::DistributedGrid)
             if haskey(sv.remote_vertices, rank)
                 (cellidx, i) = sv.local_idx
                 cell = getcells(dgrid, cellidx)
-                u[vertices(cell)[i]] = my_rank
+                u[Ferrite.vertices(cell)[i]] = my_rank
             end
         end
-        vtk_point_data(pvtkwrapper(vtk), u, "shared vertices with $rank")
+        vtk_point_data(Ferrite.pvtkwrapper(vtk), u, "shared vertices with $rank")
     end
 end
 
@@ -52,11 +52,11 @@ function vtk_shared_faces(vtk, dgrid::DistributedGrid)
             if haskey(sf.remote_faces, rank)
                 (cellidx, i) = sf.local_idx
                 cell = getcells(dgrid, cellidx)
-                facenodes = faces(cell)[i]
+                facenodes = Ferrite.faces(cell)[i]
                 u[[facenodes...]] .= my_rank
             end
         end
-        vtk_point_data(pvtkwrapper(vtk), u, "shared faces with $rank")
+        vtk_point_data(Ferrite.pvtkwrapper(vtk), u, "shared faces with $rank")
     end
 end
 
@@ -73,11 +73,11 @@ function vtk_shared_edges(vtk, dgrid::DistributedGrid)
             if haskey(se.remote_edges, rank)
                 (cellidx, i) = se.local_idx
                 cell = getcells(dgrid, cellidx)
-                edgenodes = edges(cell)[i]
+                edgenodes = Ferrite.edges(cell)[i]
                 u[[edgenodes...]] .= my_rank
             end
         end
-        vtk_point_data(pvtkwrapper(vtk), u, "shared edges with $rank")
+        vtk_point_data(Ferrite.pvtkwrapper(vtk), u, "shared edges with $rank")
     end
 end
 
@@ -87,5 +87,5 @@ Enrich the VTK file with partitioning meta information.
 function vtk_partitioning(vtk, dgrid::DistributedGrid)
     u  = Vector{Float64}(undef, getncells(dgrid))
     u .= MPI.Comm_rank(global_comm(dgrid))+1
-    vtk_cell_data(pvtkwrapper(vtk), u, "partitioning")
+    vtk_cell_data(Ferrite.pvtkwrapper(vtk), u, "partitioning")
 end
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index bb845b699d..17bca38120 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -21,17 +21,6 @@ abstract type SharedEntity end
 @inline is_shared_face(dgrid::AbstractDistributedGrid, fi::FaceIndex) = haskey(dgrid.shared_faces, fi)
 
 
-"""
-Global dense communicator of the distributed grid.
-"""
-@inline global_comm(dgrid::AbstractDistributedGrid) = dgrid.grid_comm
-
-"""
-Graph communicator for shared vertices. Guaranteed to be derived from the communicator 
-returned by @global_comm .
-"""
-@inline vertex_comm(dgrid::AbstractDistributedGrid) = dgrid.interface_comm
-
 @inline getlocalgrid(dgrid::AbstractDistributedGrid) = dgrid.local_grid
 
 @inline getnodes(dgrid::AbstractDistributedGrid) = getnodes(getlocalgrid(dgrid))
diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 489b99e4bc..f2c68e2120 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -14,6 +14,7 @@ struct Node{dim,T}
 end
 Node(x::NTuple{dim,T}) where {dim,T} = Node(Vec{dim,T}(x))
 getcoordinates(n::Node) = n.x
+get_coordinate_eltype(::Node{dim,T}) where {dim,T} = T
 
 
 abstract type AbstractCell{dim,N,M} end
@@ -465,6 +466,8 @@ to a Node.
 @inline getnnodes(grid::AbstractGrid) = length(grid.nodes)
 "Returns the number of nodes of the `i`-th cell."
 @inline nnodes_per_cell(grid::AbstractGrid, i::Int=1) = nnodes(grid.cells[i])
+"Return the number type of the nodal coordinates."
+@inline get_coordinate_eltype(grid::AbstractGrid) = get_coordinate_eltype(first(getnodes(grid)))
 
 """
     getcellset(grid::AbstractGrid, setname::String)
diff --git a/src/exports.jl b/src/exports.jl
index fbb2c8ea3f..efcb0f1b8b 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -83,7 +83,7 @@ export
     getfacesets,
     getedgesets,
     getvertexsets,
-    global_comm,
+    getdim,
     vertex_comm,
     onboundary,
     nfaces,
@@ -96,8 +96,13 @@ export
     generate_grid,
     compute_vertex_values,
     is_shared_vertex,
+    get_shared_vertex,
     get_shared_vertices,
+    is_shared_face,
+    get_shared_face,
     get_shared_faces,
+    is_shared_edge,
+    get_shared_edge,
     get_shared_edges,
 
 # Grid coloring
@@ -115,6 +120,7 @@ export
     ndofs_per_cell,
     celldofs!,
     celldofs,
+    cellcoords!,
     create_sparsity_pattern,
     create_symmetric_sparsity_pattern,
     dof_range,
@@ -124,7 +130,9 @@ export
     Field,
     reshape_to_nodes,
     num_fields,
+    field_offset,
     getfieldnames,
+    getfielddim,
     dof_range,
     #entity_dofs,
 
diff --git a/src/iterators.jl b/src/iterators.jl
index 211b07693c..f9b3c99ed6 100644
--- a/src/iterators.jl
+++ b/src/iterators.jl
@@ -41,7 +41,11 @@ struct CellIterator{dim,C,T,DH<:Union{AbstractDofHandler,Nothing}}
     dh::Union{DH,Nothing}
     celldofs::Vector{Int}
 
-    function CellIterator{dim,C,T}(dh::Union{DofHandler{dim,T,G},MixedDofHandler{dim,T,G},Nothing}, cellset::Union{AbstractVector{Int},Nothing}, flags::UpdateFlags) where {dim,C,T,G}
+    function CellIterator(dh::DH, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {DH<:AbstractDofHandler}
+        grid = getgrid(dh)
+        dim = getdim(grid)
+        C = getcelltype(grid)
+        T = get_coordinate_eltype(grid)
         isconcretetype(C) || _check_same_celltype(getgrid(dh), cellset)
         N = nnodes_per_cell(getgrid(dh), cellset === nothing ? 1 : first(cellset))
         cell = ScalarWrapper(0)
@@ -64,10 +68,6 @@ end
 
 CellIterator(grid::Grid{dim,C,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,C,T} =
     CellIterator{dim,C,T}(grid, cellset, flags)
-CellIterator(dh::DofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,T} =
-    CellIterator{dim,getcelltype(dh.grid),T}(dh, cellset, flags)
-CellIterator(dh::MixedDofHandler{dim,T}, cellset::Union{AbstractVector{Int},Nothing}=nothing, flags::UpdateFlags=UpdateFlags()) where {dim,T} =
-    CellIterator{dim,getcelltype(dh.grid),T}(dh, cellset, flags)
 
 # iterator interface
 function Base.iterate(ci::CellIterator, state = 1)

From 224785aedf58e9d85a475d2734eb65cec2485c2a Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 22:41:12 +0100
Subject: [PATCH 093/124] Freeze partitioned arrays because 0.3.0 contains
 breaking changes.

---
 docs/LocalPreferences.toml                |   6 +
 docs/Manifest.toml                        | 689 +++++++++++-----------
 docs/Project.toml                         |   4 +-
 docs/src/literate/distributed_assembly.jl |   2 +-
 4 files changed, 370 insertions(+), 331 deletions(-)
 create mode 100644 docs/LocalPreferences.toml

diff --git a/docs/LocalPreferences.toml b/docs/LocalPreferences.toml
new file mode 100644
index 0000000000..d88be56147
--- /dev/null
+++ b/docs/LocalPreferences.toml
@@ -0,0 +1,6 @@
+[MPIPreferences]
+_format = "1.0"
+abi = "OpenMPI"
+binary = "system"
+libmpi = "libmpi"
+mpiexec = "mpiexec"
diff --git a/docs/Manifest.toml b/docs/Manifest.toml
index b08369986d..22df2a8d90 100644
--- a/docs/Manifest.toml
+++ b/docs/Manifest.toml
@@ -1,8 +1,8 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.8.2"
+julia_version = "1.9.0-beta4"
 manifest_format = "2.0"
-project_hash = "aa7aaa4a7e0a82eb77e2b1e6ab54d762b5e9cff6"
+project_hash = "496d72ebf77bfce22849d2f38c0edb3979829e55"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
@@ -11,9 +11,9 @@ version = "0.0.1"
 
 [[deps.Adapt]]
 deps = ["LinearAlgebra"]
-git-tree-sha1 = "195c5505521008abea5aee4f96930717958eac6f"
+git-tree-sha1 = "0310e08cb19f5da31d08341c6120c047598f5b9c"
 uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
-version = "3.4.0"
+version = "3.5.0"
 
 [[deps.ArgTools]]
 uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
@@ -21,21 +21,21 @@ version = "1.1.1"
 
 [[deps.ArnoldiMethod]]
 deps = ["LinearAlgebra", "Random", "StaticArrays"]
-git-tree-sha1 = "f87e559f87a45bece9c9ed97458d3afe98b1ebb9"
+git-tree-sha1 = "62e51b39331de8911e4a7ff6f5aaf38a5f4cc0ae"
 uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
-version = "0.1.0"
+version = "0.2.0"
 
 [[deps.ArrayInterface]]
-deps = ["ArrayInterfaceCore", "Compat", "IfElse", "LinearAlgebra", "Static"]
-git-tree-sha1 = "d6173480145eb632d6571c148d94b9d3d773820e"
+deps = ["ArrayInterfaceCore", "Compat", "IfElse", "LinearAlgebra", "SnoopPrecompile", "Static"]
+git-tree-sha1 = "dedc16cbdd1d32bead4617d27572f582216ccf23"
 uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "6.0.23"
+version = "6.0.25"
 
 [[deps.ArrayInterfaceCore]]
-deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "e9f7992287edfc27b3cbe0046c544bace004ca5b"
+deps = ["LinearAlgebra", "SnoopPrecompile", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "e5f08b5689b1aad068e01751889f2f615c7db36d"
 uuid = "30b0a656-2188-435a-8636-2ec0e6a096e2"
-version = "0.1.22"
+version = "0.1.29"
 
 [[deps.ArrayInterfaceGPUArrays]]
 deps = ["Adapt", "ArrayInterfaceCore", "GPUArraysCore", "LinearAlgebra"]
@@ -45,15 +45,15 @@ version = "0.2.2"
 
 [[deps.ArrayInterfaceOffsetArrays]]
 deps = ["ArrayInterface", "OffsetArrays", "Static"]
-git-tree-sha1 = "c49f6bad95a30defff7c637731f00934c7289c50"
+git-tree-sha1 = "3d1a9a01976971063b3930d1aed1d9c4af0817f8"
 uuid = "015c0d05-e682-4f19-8f0a-679ce4c54826"
-version = "0.1.6"
+version = "0.1.7"
 
 [[deps.ArrayInterfaceStaticArrays]]
-deps = ["Adapt", "ArrayInterface", "ArrayInterfaceStaticArraysCore", "LinearAlgebra", "Static", "StaticArrays"]
-git-tree-sha1 = "efb000a9f643f018d5154e56814e338b5746c560"
+deps = ["Adapt", "ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceStaticArraysCore", "LinearAlgebra", "Static", "StaticArrays"]
+git-tree-sha1 = "f12dc65aef03d0a49650b20b2fdaf184928fd886"
 uuid = "b0d46f97-bff5-4637-a19a-dd75974142cd"
-version = "0.1.4"
+version = "0.1.5"
 
 [[deps.ArrayInterfaceStaticArraysCore]]
 deps = ["Adapt", "ArrayInterfaceCore", "LinearAlgebra", "StaticArraysCore"]
@@ -63,9 +63,9 @@ version = "0.1.3"
 
 [[deps.ArrayLayouts]]
 deps = ["FillArrays", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "9a8017694c92ca097b23b3b43806be560af4c2ce"
+git-tree-sha1 = "4aff5fa660eb95c2e0deb6bcdabe4d9a96bc4667"
 uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
-version = "0.8.12"
+version = "0.8.18"
 
 [[deps.Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -74,21 +74,21 @@ uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 
 [[deps.BitFlags]]
-git-tree-sha1 = "84259bb6172806304b9101094a7cc4bc6f56dbc6"
+git-tree-sha1 = "43b1a4a8f797c1cddadf60499a8a077d4af2cd2d"
 uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35"
-version = "0.1.5"
+version = "0.1.7"
 
 [[deps.BitTwiddlingConvenienceFunctions]]
 deps = ["Static"]
-git-tree-sha1 = "eaee37f76339077f86679787a71990c4e465477f"
+git-tree-sha1 = "0c5f81f47bbbcf4aea7b2959135713459170798b"
 uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b"
-version = "0.1.4"
+version = "0.1.5"
 
 [[deps.BlockArrays]]
 deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra"]
-git-tree-sha1 = "f9f6d3f5e6ac9d78e461c183bfe0945db679f514"
+git-tree-sha1 = "8e5457e5f1335822210ed74bacaabecf11d3bdf7"
 uuid = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
-version = "0.16.21"
+version = "0.16.24"
 
 [[deps.Bzip2_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -96,53 +96,46 @@ git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
 uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
 version = "1.0.8+0"
 
+[[deps.CEnum]]
+git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90"
+uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
+version = "0.4.2"
+
 [[deps.CPUSummary]]
 deps = ["CpuId", "IfElse", "Static"]
-git-tree-sha1 = "9bdd5aceea9fa109073ace6b430a24839d79315e"
+git-tree-sha1 = "2c144ddb46b552f72d7eafe7cc2f50746e41ea21"
 uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
-version = "0.1.27"
+version = "0.2.2"
 
 [[deps.Cairo_jll]]
-deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
+deps = ["Artifacts", "Bzip2_jll", "CompilerSupportLibraries_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
 git-tree-sha1 = "4b859a208b2397a7a623a03449e4636bdb17bcf2"
 uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a"
 version = "1.16.1+1"
 
-[[deps.Calculus]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad"
-uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9"
-version = "0.5.1"
-
 [[deps.ChainRulesCore]]
 deps = ["Compat", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "e7ff6cadf743c098e08fca25c91103ee4303c9bb"
+git-tree-sha1 = "c6d890a52d2c4d55d326439580c3b8d0875a77d9"
 uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-version = "1.15.6"
-
-[[deps.ChangesOfVariables]]
-deps = ["ChainRulesCore", "LinearAlgebra", "Test"]
-git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8"
-uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
-version = "0.1.4"
+version = "1.15.7"
 
 [[deps.CloseOpenIntervals]]
 deps = ["ArrayInterface", "Static"]
-git-tree-sha1 = "5522c338564580adf5d58d91e43a55db0fa5fb39"
+git-tree-sha1 = "d61300b9895f129f4bd684b2aff97cf319b6c493"
 uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9"
-version = "0.1.10"
+version = "0.1.11"
 
 [[deps.CodecZlib]]
 deps = ["TranscodingStreams", "Zlib_jll"]
-git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da"
+git-tree-sha1 = "9c209fb7536406834aa938fb149964b985de6c83"
 uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
-version = "0.7.0"
+version = "0.7.1"
 
 [[deps.ColorSchemes]]
-deps = ["ColorTypes", "ColorVectorSpace", "Colors", "FixedPointNumbers", "Random"]
-git-tree-sha1 = "1fd869cc3875b57347f7027521f561cf46d1fcd8"
+deps = ["ColorTypes", "ColorVectorSpace", "Colors", "FixedPointNumbers", "Random", "SnoopPrecompile"]
+git-tree-sha1 = "aa3edc8f8dea6cbfa176ee12f7c2fc82f0608ed3"
 uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
-version = "3.19.0"
+version = "3.20.0"
 
 [[deps.ColorTypes]]
 deps = ["FixedPointNumbers", "Random"]
@@ -152,20 +145,20 @@ version = "0.11.4"
 
 [[deps.ColorVectorSpace]]
 deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "SpecialFunctions", "Statistics", "TensorCore"]
-git-tree-sha1 = "d08c20eef1f2cbc6e60fd3612ac4340b89fea322"
+git-tree-sha1 = "600cc5508d66b78aae350f7accdb58763ac18589"
 uuid = "c3611d14-8923-5661-9e6a-0046d554d3a4"
-version = "0.9.9"
+version = "0.9.10"
 
 [[deps.Colors]]
 deps = ["ColorTypes", "FixedPointNumbers", "Reexport"]
-git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40"
+git-tree-sha1 = "fc08e5930ee9a4e03f84bfb5211cb54e7769758a"
 uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
-version = "0.12.8"
+version = "0.12.10"
 
 [[deps.CommonSolve]]
-git-tree-sha1 = "332a332c97c7071600984b3c31d9067e1a4e6e25"
+git-tree-sha1 = "9441451ee712d1aec22edad62db1a9af3dc8d852"
 uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
-version = "0.2.1"
+version = "0.2.3"
 
 [[deps.CommonSubexpressions]]
 deps = ["MacroTools", "Test"]
@@ -175,14 +168,14 @@ version = "0.3.0"
 
 [[deps.Compat]]
 deps = ["Dates", "LinearAlgebra", "UUIDs"]
-git-tree-sha1 = "3ca828fe1b75fa84b021a7860bd039eaea84d2f2"
+git-tree-sha1 = "61fdd77467a5c3ad071ef8277ac6bd6af7dd4c04"
 uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "4.3.0"
+version = "4.6.0"
 
 [[deps.CompilerSupportLibraries_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
-version = "0.5.2+0"
+version = "1.0.2+0"
 
 [[deps.ConstructionBase]]
 deps = ["LinearAlgebra"]
@@ -202,9 +195,9 @@ uuid = "adafc99b-e345-5852-983c-f28acb93d879"
 version = "0.3.1"
 
 [[deps.DataAPI]]
-git-tree-sha1 = "46d2680e618f8abd007bce0c3026cb0c4a8f2032"
+git-tree-sha1 = "e8119c1a33d267e16108be441a287a6981ba1630"
 uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
-version = "1.12.0"
+version = "1.14.0"
 
 [[deps.DataStructures]]
 deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
@@ -223,19 +216,37 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
 [[deps.DelimitedFiles]]
 deps = ["Mmap"]
+git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae"
 uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
-
-[[deps.DensityInterface]]
-deps = ["InverseFunctions", "Test"]
-git-tree-sha1 = "80c3e8639e3353e5d2912fb3a1916b8455e2494b"
-uuid = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
-version = "0.4.0"
+version = "1.9.1"
 
 [[deps.DiffEqBase]]
-deps = ["ArrayInterfaceCore", "ChainRulesCore", "DataStructures", "Distributions", "DocStringExtensions", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "MuladdMacro", "NonlinearSolve", "Parameters", "Printf", "RecursiveArrayTools", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "Static", "StaticArrays", "Statistics", "Tricks", "ZygoteRules"]
-git-tree-sha1 = "c272e6fb3c3558d807886d5247ed2a0b9c6f3823"
+deps = ["ArrayInterfaceCore", "ChainRulesCore", "DataStructures", "DocStringExtensions", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "MuladdMacro", "Parameters", "PreallocationTools", "Printf", "RecursiveArrayTools", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "Static", "StaticArrays", "Statistics", "Tricks", "ZygoteRules"]
+git-tree-sha1 = "b91fef836ef8c2c4480bce90bb8981e1ac21b444"
 uuid = "2b5f629d-d688-5b77-993f-72d75c75574e"
-version = "6.105.1"
+version = "6.117.0"
+
+    [deps.DiffEqBase.extensions]
+    DiffEqBaseDistributionsExt = "Distributions"
+    DiffEqBaseGeneralizedGeneratedExt = "GeneralizedGenerated"
+    DiffEqBaseMPIExt = "MPI"
+    DiffEqBaseMeasurementsExt = "Measurements"
+    DiffEqBaseMonteCarloMeasurementsExt = "MonteCarloMeasurements"
+    DiffEqBaseReverseDiffExt = "ReverseDiff"
+    DiffEqBaseTrackerExt = "Tracker"
+    DiffEqBaseUnitfulExt = "Unitful"
+    DiffEqBaseZygoteExt = "Zygote"
+
+    [deps.DiffEqBase.weakdeps]
+    Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+    GeneralizedGenerated = "6b9d7cbe-bcb9-11e9-073f-15a7a543e2eb"
+    MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+    Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7"
+    MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+    Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
+    Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [[deps.DiffResults]]
 deps = ["StaticArraysCore"]
@@ -245,9 +256,9 @@ version = "1.1.0"
 
 [[deps.DiffRules]]
 deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "8b7a4d23e22f5d44883671da70865ca98f2ebf9d"
+git-tree-sha1 = "c5b6685d53f933c11404a3ae9822afe30d522494"
 uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.12.0"
+version = "1.12.2"
 
 [[deps.Distances]]
 deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"]
@@ -259,39 +270,27 @@ version = "0.10.7"
 deps = ["Random", "Serialization", "Sockets"]
 uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
-[[deps.Distributions]]
-deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "04db820ebcfc1e053bd8cbb8d8bccf0ff3ead3f7"
-uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.76"
-
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
-git-tree-sha1 = "5158c2b41018c5f7eb1470d558127ac274eca0c9"
+git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.9.1"
+version = "0.9.3"
 
 [[deps.Documenter]]
 deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "6030186b00a38e9d0434518627426570aac2ef95"
+git-tree-sha1 = "58fea7c536acd71f3eef6be3b21c0df5f3df88fd"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.27.23"
+version = "0.27.24"
 
 [[deps.Downloads]]
 deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
 uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 version = "1.6.0"
 
-[[deps.DualNumbers]]
-deps = ["Calculus", "NaNMath", "SpecialFunctions"]
-git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566"
-uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74"
-version = "0.6.8"
-
 [[deps.EnumX]]
-git-tree-sha1 = "e5333cd1e1c713ee21d07b6ed8b0d8853fabe650"
+git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237"
 uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
-version = "1.0.3"
+version = "1.0.4"
 
 [[deps.Expat_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -300,10 +299,10 @@ uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
 version = "2.4.8+0"
 
 [[deps.ExponentialUtilities]]
-deps = ["ArrayInterfaceCore", "GPUArraysCore", "GenericSchur", "LinearAlgebra", "Printf", "SparseArrays", "libblastrampoline_jll"]
-git-tree-sha1 = "b19c3f5001b11b71d0f970f354677d604f3a1a97"
+deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceGPUArrays", "GPUArraysCore", "GenericSchur", "LinearAlgebra", "Printf", "SnoopPrecompile", "SparseArrays", "libblastrampoline_jll"]
+git-tree-sha1 = "1c06afe6eb356a6148a2e5f07eddaf30f018bd5b"
 uuid = "d4d017d3-3776-5f7e-afef-a10c40355c18"
-version = "1.19.0"
+version = "1.22.1"
 
 [[deps.ExprTools]]
 git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d"
@@ -330,9 +329,9 @@ version = "1.3.8+0"
 
 [[deps.FastBroadcast]]
 deps = ["ArrayInterface", "ArrayInterfaceCore", "LinearAlgebra", "Polyester", "Static", "StrideArraysCore"]
-git-tree-sha1 = "21cdeff41e5a1822c2acd7fc7934c5f450588e00"
+git-tree-sha1 = "4bef892787c972913d4d84e7255400759bb650e5"
 uuid = "7034ab61-46d4-4ed7-9d0f-46aef9175898"
-version = "0.2.1"
+version = "0.2.4"
 
 [[deps.FastClosures]]
 git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef"
@@ -341,15 +340,19 @@ version = "0.3.2"
 
 [[deps.FastLapackInterface]]
 deps = ["LinearAlgebra"]
-git-tree-sha1 = "14a6f7a21125f715d935fe8f83560ee833f7d79d"
+git-tree-sha1 = "7fbaf9f73cd4c8561702ea9b16acf3f99d913fe4"
 uuid = "29a986be-02c6-4525-aec4-84b980013641"
-version = "1.2.7"
+version = "1.2.8"
 
 [[deps.Ferrite]]
-deps = ["EnumX", "LinearAlgebra", "MPI", "MPIPreferences", "Metis", "NearestNeighbors", "PartitionedArrays", "Reexport", "SparseArrays", "Tensors", "WriteVTK"]
+deps = ["EnumX", "LinearAlgebra", "Metis", "NearestNeighbors", "Reexport", "SparseArrays", "Tensors", "WriteVTK"]
 path = ".."
 uuid = "c061ca5d-56c9-439f-9c0e-210fe06d3992"
 version = "0.3.8"
+weakdeps = ["MPI", "PartitionedArrays"]
+
+    [deps.Ferrite.extensions]
+    FerritePartitionedArrays = ["MPI", "PartitionedArrays"]
 
 [[deps.FerriteGmsh]]
 deps = ["Ferrite", "Gmsh"]
@@ -368,15 +371,15 @@ uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
 
 [[deps.FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
-git-tree-sha1 = "802bfc139833d2ba893dd9e62ba1767c88d708ae"
+git-tree-sha1 = "d3ba08ab64bdfd27234d3f61956c966266757fe6"
 uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.13.5"
+version = "0.13.7"
 
 [[deps.FiniteDiff]]
 deps = ["ArrayInterfaceCore", "LinearAlgebra", "Requires", "Setfield", "SparseArrays", "StaticArrays"]
-git-tree-sha1 = "5a2cff9b6b77b33b89f3d97a4d367747adce647e"
+git-tree-sha1 = "04ed1f0029b6b3af88343e439b995141cb0d0b8d"
 uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
-version = "2.15.0"
+version = "2.17.0"
 
 [[deps.FixedPointNumbers]]
 deps = ["Statistics"]
@@ -398,9 +401,9 @@ version = "0.4.2"
 
 [[deps.ForwardDiff]]
 deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"]
-git-tree-sha1 = "187198a4ed8ccd7b5d99c41b69c679269ea2b2d4"
+git-tree-sha1 = "a69dd6db8a809f78846ff259298678f0d6212180"
 uuid = "f6369f11-7733-5829-9624-2563aa707210"
-version = "0.10.32"
+version = "0.10.34"
 
 [[deps.FreeType2_jll]]
 deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
@@ -421,9 +424,9 @@ version = "1.1.3"
 
 [[deps.FunctionWrappersWrappers]]
 deps = ["FunctionWrappers"]
-git-tree-sha1 = "a5e6e7f12607e90d71b09e6ce2c965e41b337968"
+git-tree-sha1 = "b104d487b34566608f8b4e1c39fb0b10aa279ff8"
 uuid = "77dc65aa-8811-40c2-897b-53d922fa7daf"
-version = "0.1.1"
+version = "0.1.3"
 
 [[deps.Future]]
 deps = ["Random"]
@@ -448,21 +451,21 @@ version = "6.2.1+2"
 
 [[deps.GPUArraysCore]]
 deps = ["Adapt"]
-git-tree-sha1 = "6872f5ec8fd1a38880f027a26739d42dcda6691f"
+git-tree-sha1 = "1cd7f0af1aa58abc02ea1d872953a97359cb87fa"
 uuid = "46192b85-c4d5-4398-a991-12ede77f4527"
-version = "0.1.2"
+version = "0.1.4"
 
 [[deps.GR]]
-deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Preferences", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"]
-git-tree-sha1 = "00a9d4abadc05b9476e937a5557fcce476b9e547"
+deps = ["Artifacts", "Base64", "DelimitedFiles", "Downloads", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Preferences", "Printf", "Random", "Serialization", "Sockets", "TOML", "Tar", "Test", "UUIDs", "p7zip_jll"]
+git-tree-sha1 = "660b2ea2ec2b010bb02823c6d0ff6afd9bdc5c16"
 uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
-version = "0.69.5"
+version = "0.71.7"
 
 [[deps.GR_jll]]
-deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt5Base_jll", "Zlib_jll", "libpng_jll"]
-git-tree-sha1 = "bc9f7725571ddb4ab2c4bc74fa397c1c5ad08943"
+deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Qt5Base_jll", "Zlib_jll", "libpng_jll"]
+git-tree-sha1 = "d5e1fd17ac7f3aa4c5287a61ee28d4f8b8e98873"
 uuid = "d2c73de3-f751-5644-a686-071e5b155ba9"
-version = "0.69.1+0"
+version = "0.71.7+0"
 
 [[deps.GenericSchur]]
 deps = ["LinearAlgebra", "Printf"]
@@ -478,9 +481,9 @@ version = "0.21.0+0"
 
 [[deps.Glib_jll]]
 deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE2_jll", "Pkg", "Zlib_jll"]
-git-tree-sha1 = "fb83fbe02fe57f2c068013aa94bcdf6760d3a7a7"
+git-tree-sha1 = "d3b3624125c1474292d0d8ed0f65554ac37ddb23"
 uuid = "7746bdde-850d-59dc-9ae8-88ece973131d"
-version = "2.74.0+1"
+version = "2.74.0+2"
 
 [[deps.Gmsh]]
 deps = ["gmsh_jll"]
@@ -496,9 +499,9 @@ version = "1.3.14+0"
 
 [[deps.Graphs]]
 deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
-git-tree-sha1 = "ba2d094a88b6b287bd25cfa86f301e7693ffae2f"
+git-tree-sha1 = "1cf1d7dcb4bc32d7b4a5add4232db3750c27ecb4"
 uuid = "86223c79-3864-5bf0-83f7-82e725a168b6"
-version = "1.7.4"
+version = "1.8.0"
 
 [[deps.Grisu]]
 git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2"
@@ -513,9 +516,9 @@ version = "1.12.2+2"
 
 [[deps.HTTP]]
 deps = ["Base64", "CodecZlib", "Dates", "IniFile", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
-git-tree-sha1 = "3cdd8948c55d8b53b5323f23c9581555dc2e30e1"
+git-tree-sha1 = "37e4657cd56b11abe3d10cd4a1ec5fbdb4180263"
 uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
-version = "1.5.0"
+version = "1.7.4"
 
 [[deps.HarfBuzz_jll]]
 deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"]
@@ -525,15 +528,9 @@ version = "2.8.1+1"
 
 [[deps.HostCPUFeatures]]
 deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"]
-git-tree-sha1 = "b7b88a4716ac33fe31d6556c02fc60017594343c"
+git-tree-sha1 = "734fd90dd2f920a2f1921d5388dcebe805b262dc"
 uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
-version = "0.1.8"
-
-[[deps.HypergeometricFunctions]]
-deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions", "Test"]
-git-tree-sha1 = "709d864e3ed6e3545230601f94e11ebc65994641"
-uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a"
-version = "0.3.11"
+version = "0.1.14"
 
 [[deps.IOCapture]]
 deps = ["Logging", "Random"]
@@ -560,12 +557,6 @@ version = "0.5.1"
 deps = ["Markdown"]
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 
-[[deps.InverseFunctions]]
-deps = ["Test"]
-git-tree-sha1 = "49510dfcb407e572524ba94aeae2fced1f3feb0f"
-uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
-version = "0.1.8"
-
 [[deps.IrrationalConstants]]
 git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151"
 uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
@@ -608,21 +599,21 @@ version = "2.1.2+0"
 
 [[deps.KLU]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"]
-git-tree-sha1 = "cae5e3dfd89b209e01bcd65b3a25e74462c67ee0"
+git-tree-sha1 = "764164ed65c30738750965d55652db9c94c59bfe"
 uuid = "ef3ab10e-7fda-4108-b977-705223b18434"
-version = "0.3.0"
+version = "0.4.0"
 
 [[deps.Krylov]]
 deps = ["LinearAlgebra", "Printf", "SparseArrays"]
-git-tree-sha1 = "92256444f81fb094ff5aa742ed10835a621aef75"
+git-tree-sha1 = "dd90aacbfb622f898a97c2a4411ac49101ebab8a"
 uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
-version = "0.8.4"
+version = "0.9.0"
 
 [[deps.KrylovKit]]
-deps = ["LinearAlgebra", "Printf"]
-git-tree-sha1 = "49b0c1dd5c292870577b8f58c51072bd558febb9"
+deps = ["ChainRulesCore", "GPUArraysCore", "LinearAlgebra", "Printf"]
+git-tree-sha1 = "1a5e1d9941c783b0119897d29f2eb665d876ecf3"
 uuid = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
-version = "0.5.4"
+version = "0.6.0"
 
 [[deps.LAME_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -638,9 +629,9 @@ version = "3.0.0+1"
 
 [[deps.LLVMOpenMP_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "ad927676766e6529a2d5152f12040620447c0c9b"
+git-tree-sha1 = "f689897ccbe049adb19a065c495e75f372ecd42b"
 uuid = "1d63c593-3942-5779-bab2-d838dc0a180e"
-version = "14.0.4+0"
+version = "15.0.4+0"
 
 [[deps.LZO_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -655,15 +646,21 @@ version = "1.3.0"
 
 [[deps.Latexify]]
 deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "OrderedCollections", "Printf", "Requires"]
-git-tree-sha1 = "ab9aa169d2160129beb241cb2750ca499b4e90e9"
+git-tree-sha1 = "2422f47b34d4b127720a18f86fa7b1aa2e141f29"
 uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
-version = "0.15.17"
+version = "0.15.18"
 
 [[deps.LayoutPointers]]
 deps = ["ArrayInterface", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"]
-git-tree-sha1 = "73e2e40eb02d6ccd191a8a9f8cee20db8d5df010"
+git-tree-sha1 = "0ad6f0c51ce004dadc24a28a0dfecfb568e52242"
 uuid = "10f19ff3-798f-405d-979b-55457f8fc047"
-version = "0.1.11"
+version = "0.1.13"
+
+[[deps.Lazy]]
+deps = ["MacroTools"]
+git-tree-sha1 = "1370f8202dac30758f3c345f9909b97f53d87d3f"
+uuid = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0"
+version = "0.15.1"
 
 [[deps.LazyArtifacts]]
 deps = ["Artifacts", "Pkg"]
@@ -705,9 +702,9 @@ version = "1.8.7+0"
 
 [[deps.Libglvnd_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"]
-git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf"
+git-tree-sha1 = "6f73d1dd803986947b2c750138528a999a6c7733"
 uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29"
-version = "1.3.0+3"
+version = "1.6.0+0"
 
 [[deps.Libgpg_error_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -717,9 +714,9 @@ version = "1.42.0+0"
 
 [[deps.Libiconv_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
+git-tree-sha1 = "c7cb1f5d892775ba13767a87c7ada0b980ea0a71"
 uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
-version = "1.16.1+1"
+version = "1.16.1+2"
 
 [[deps.Libmount_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -739,12 +736,6 @@ git-tree-sha1 = "7f3efec06033682db852f8b3bc3c1d2b0a0ab066"
 uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700"
 version = "2.36.0+0"
 
-[[deps.LightGraphs]]
-deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
-git-tree-sha1 = "432428df5f360964040ed60418dd5601ecd240b6"
-uuid = "093fc24a-ae57-5d10-9952-331d41423f4d"
-version = "1.3.5"
-
 [[deps.LightXML]]
 deps = ["Libdl", "XML2_jll"]
 git-tree-sha1 = "e129d9391168c677cd4800f5c0abb1ed8cb3794f"
@@ -758,7 +749,7 @@ uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 version = "7.2.0"
 
 [[deps.LinearAlgebra]]
-deps = ["Libdl", "libblastrampoline_jll"]
+deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
 uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [[deps.LinearElasticity_jll]]
@@ -768,10 +759,16 @@ uuid = "18c40d15-f7cd-5a6d-bc92-87468d86c5db"
 version = "5.0.0+0"
 
 [[deps.LinearSolve]]
-deps = ["ArrayInterfaceCore", "DocStringExtensions", "FastLapackInterface", "GPUArraysCore", "IterativeSolvers", "KLU", "Krylov", "KrylovKit", "LinearAlgebra", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "SnoopPrecompile", "SparseArrays", "SuiteSparse", "UnPack"]
-git-tree-sha1 = "70db49cbaec1cdf4def39c4ac51a3abe56b2e421"
+deps = ["ArrayInterfaceCore", "DocStringExtensions", "FastLapackInterface", "GPUArraysCore", "IterativeSolvers", "KLU", "Krylov", "KrylovKit", "LinearAlgebra", "Preferences", "RecursiveFactorization", "Reexport", "SciMLBase", "SciMLOperators", "Setfield", "SnoopPrecompile", "SparseArrays", "Sparspak", "SuiteSparse", "UnPack"]
+git-tree-sha1 = "ed97c2b4e46d02d4c866d3ccfae039a6c09568b1"
 uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
-version = "1.27.0"
+version = "1.35.0"
+
+    [deps.LinearSolve.extensions]
+    LinearSolveHYPRE = "HYPRE"
+
+    [deps.LinearSolve.weakdeps]
+    HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
 
 [[deps.Literate]]
 deps = ["Base64", "IOCapture", "JSON", "REPL"]
@@ -786,31 +783,46 @@ uuid = "16fef848-5104-11e9-1b77-fb7a48bbb589"
 version = "1.1.0"
 
 [[deps.LogExpFunctions]]
-deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
-git-tree-sha1 = "94d9c52ca447e23eac0c0f074effbcd38830deb5"
+deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
+git-tree-sha1 = "680e733c3a0a9cea9e935c8c2184aea6a63fa0b5"
 uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
-version = "0.3.18"
+version = "0.3.21"
+
+    [deps.LogExpFunctions.extensions]
+    ChainRulesCoreExt = "ChainRulesCore"
+    ChangesOfVariablesExt = "ChangesOfVariables"
+    InverseFunctionsExt = "InverseFunctions"
+
+    [deps.LogExpFunctions.weakdeps]
+    ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+    ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
+    InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
 
 [[deps.Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
 [[deps.LoggingExtras]]
 deps = ["Dates", "Logging"]
-git-tree-sha1 = "5d4d2d9904227b8bd66386c1138cf4d5ffa826bf"
+git-tree-sha1 = "cedb76b37bc5a6c702ade66be44f831fa23c681e"
 uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36"
-version = "0.4.9"
+version = "1.0.0"
 
 [[deps.LoopVectorization]]
-deps = ["ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "CPUSummary", "ChainRulesCore", "CloseOpenIntervals", "DocStringExtensions", "ForwardDiff", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDDualNumbers", "SIMDTypes", "SLEEFPirates", "SnoopPrecompile", "SpecialFunctions", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
-git-tree-sha1 = "9f6030ca92d1a816e931abb657219c9fc4991a96"
+deps = ["ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDTypes", "SLEEFPirates", "SnoopPrecompile", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
+git-tree-sha1 = "9696a80c21a56b937e3fd89e972f8db5db3186e2"
 uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
-version = "0.12.136"
+version = "0.12.150"
+weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"]
+
+    [deps.LoopVectorization.extensions]
+    ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"]
+    SpecialFunctionsExt = "SpecialFunctions"
 
 [[deps.METIS_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "1d31872bb9c5e7ec1f618e8c4a56c8b0d9bddc7e"
+git-tree-sha1 = "1fd0a97409e418b78c53fac671cf4622efdf0f21"
 uuid = "d00139f3-1899-568f-a2f0-47f597d42d70"
-version = "5.1.1+0"
+version = "5.1.2+0"
 
 [[deps.MIMEs]]
 git-tree-sha1 = "65f28ad4b594aebe22157d6fac869786a255b7eb"
@@ -825,9 +837,9 @@ version = "5.6.0+0"
 
 [[deps.MPI]]
 deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Requires", "Serialization", "Sockets"]
-git-tree-sha1 = "a330c3fc517b52723645283a1d18569c58f703dd"
+git-tree-sha1 = "6d72bafd3960f9c119ceb8f034fef28346490fe5"
 uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195"
-version = "0.20.2"
+version = "0.20.8"
 
 [[deps.MPICH_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"]
@@ -837,9 +849,9 @@ version = "4.0.2+5"
 
 [[deps.MPIPreferences]]
 deps = ["Libdl", "Preferences"]
-git-tree-sha1 = "34892fb69751a76bcf8b7add84ec77015208a1ec"
+git-tree-sha1 = "71f937129731a29eabe6969db2c90368a4408933"
 uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
-version = "0.1.6"
+version = "0.1.7"
 
 [[deps.MPItrampoline_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "Pkg", "TOML"]
@@ -864,9 +876,9 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 
 [[deps.MbedTLS]]
 deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "Random", "Sockets"]
-git-tree-sha1 = "6872f9594ff273da6d13c7c1a1545d5a8c7d0c1c"
+git-tree-sha1 = "03a9b9718f5682ecb107ac9f7308991db4ce395b"
 uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
-version = "1.1.6"
+version = "1.1.7"
 
 [[deps.MbedTLS_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -874,15 +886,25 @@ uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
 version = "2.28.0+0"
 
 [[deps.Measures]]
-git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f"
+git-tree-sha1 = "c13304c81eec1ed3af7fc20e75fb6b26092a1102"
 uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
-version = "0.3.1"
+version = "0.3.2"
 
 [[deps.Metis]]
-deps = ["Graphs", "LightGraphs", "LinearAlgebra", "METIS_jll", "SparseArrays"]
-git-tree-sha1 = "3285c93a67ed2effccf6ecf862a6346fcf5c565e"
+deps = ["CEnum", "LinearAlgebra", "METIS_jll", "SparseArrays"]
+git-tree-sha1 = "66a4f74edb3ac5f28c74de60f9acc2a541fbbe28"
 uuid = "2679e427-3c69-5b7f-982b-ece356f1e94b"
-version = "1.2.0"
+version = "1.4.0"
+
+    [deps.Metis.extensions]
+    MetisGraphs = "Graphs"
+    MetisLightGraphs = "LightGraphs"
+    MetisSimpleWeightedGraphs = ["SimpleWeightedGraphs", "Graphs"]
+
+    [deps.Metis.weakdeps]
+    Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
+    LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
+    SimpleWeightedGraphs = "47aef6b3-ad0c-573a-a1e2-d07658019622"
 
 [[deps.MicrosoftMPI_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -892,27 +914,27 @@ version = "10.1.3+2"
 
 [[deps.Missings]]
 deps = ["DataAPI"]
-git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f"
+git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272"
 uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
-version = "1.0.2"
+version = "1.1.0"
 
 [[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 
 [[deps.MozillaCACerts_jll]]
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-version = "2022.2.1"
+version = "2022.10.11"
 
 [[deps.MuladdMacro]]
-git-tree-sha1 = "c6190f9a7fc5d9d5915ab29f2134421b12d24a68"
+git-tree-sha1 = "cac9cc5499c25554cba55cd3c30543cff5ca4fab"
 uuid = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221"
-version = "0.2.2"
+version = "0.2.4"
 
 [[deps.NLSolversBase]]
 deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"]
-git-tree-sha1 = "50310f934e55e5ca3912fb941dec199b49ca9b68"
+git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c"
 uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
-version = "7.8.2"
+version = "7.8.3"
 
 [[deps.NLsolve]]
 deps = ["Distances", "LineSearches", "LinearAlgebra", "NLSolversBase", "Printf", "Reexport"]
@@ -922,25 +944,25 @@ version = "4.5.1"
 
 [[deps.NaNMath]]
 deps = ["OpenLibm_jll"]
-git-tree-sha1 = "a7c3d1da1189a1c2fe843a3bfa04d18d20eb3211"
+git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4"
 uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
-version = "1.0.1"
+version = "1.0.2"
 
 [[deps.NearestNeighbors]]
 deps = ["Distances", "StaticArrays"]
-git-tree-sha1 = "440165bf08bc500b8fe4a7be2dc83271a00c0716"
+git-tree-sha1 = "2c3726ceb3388917602169bed973dbc97f1b51a8"
 uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
-version = "0.4.12"
+version = "0.4.13"
 
 [[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 version = "1.2.0"
 
 [[deps.NonlinearSolve]]
-deps = ["ArrayInterfaceCore", "FiniteDiff", "ForwardDiff", "IterativeSolvers", "LinearAlgebra", "RecursiveArrayTools", "RecursiveFactorization", "Reexport", "SciMLBase", "Setfield", "StaticArrays", "UnPack"]
-git-tree-sha1 = "a754a21521c0ab48d37f44bbac1eefd1387bdcfc"
+deps = ["ArrayInterfaceCore", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "LinearSolve", "RecursiveArrayTools", "Reexport", "SciMLBase", "SimpleNonlinearSolve", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArraysCore", "UnPack"]
+git-tree-sha1 = "e2b063236a3103a3640ff1f2e3945ca387281cbe"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
-version = "0.3.22"
+version = "1.3.0"
 
 [[deps.OCCT_jll]]
 deps = ["Artifacts", "FreeType2_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll", "Xorg_libXfixes_jll", "Xorg_libXft_jll", "Xorg_libXinerama_jll", "Xorg_libXrender_jll"]
@@ -950,9 +972,9 @@ version = "7.6.2+2"
 
 [[deps.OffsetArrays]]
 deps = ["Adapt"]
-git-tree-sha1 = "f71d8950b724e9ff6110fc948dff5a329f901d64"
+git-tree-sha1 = "82d7c9e310fe55aa54996e6f7f94674e2a38fcb4"
 uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
-version = "1.12.8"
+version = "1.12.9"
 
 [[deps.Ogg_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -963,7 +985,7 @@ version = "1.3.5+1"
 [[deps.OpenBLAS_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.20+0"
+version = "0.3.21+0"
 
 [[deps.OpenLibm_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -978,15 +1000,15 @@ version = "4.1.3+3"
 
 [[deps.OpenSSL]]
 deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"]
-git-tree-sha1 = "3c3c4a401d267b04942545b1e964a20279587fd7"
+git-tree-sha1 = "6503b77492fd7fcb9379bf73cd31035670e3c509"
 uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c"
-version = "1.3.0"
+version = "1.3.3"
 
 [[deps.OpenSSL_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "e60321e3f2616584ff98f0a4f18d98ae6f89bbb3"
+git-tree-sha1 = "9ff31d101d987eb9d66bd8b176ac7c277beccd09"
 uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
-version = "1.1.17+0"
+version = "1.1.20+0"
 
 [[deps.OpenSpecFun_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
@@ -996,9 +1018,9 @@ version = "0.5.5+0"
 
 [[deps.Optim]]
 deps = ["Compat", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"]
-git-tree-sha1 = "b9fe76d1a39807fdcf790b991981a922de0c3050"
+git-tree-sha1 = "1903afc76b7d01719d9c30d3c7d501b61db96721"
 uuid = "429524aa-4258-5aef-a3af-852621145aeb"
-version = "1.7.3"
+version = "1.7.4"
 
 [[deps.Opus_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -1012,21 +1034,15 @@ uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 version = "1.4.1"
 
 [[deps.OrdinaryDiffEq]]
-deps = ["Adapt", "ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceGPUArrays", "ArrayInterfaceStaticArrays", "ArrayInterfaceStaticArraysCore", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
-git-tree-sha1 = "88b3bc390fe76e559bef97b6abe55e8d3a440a56"
+deps = ["Adapt", "ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceGPUArrays", "ArrayInterfaceStaticArrays", "ArrayInterfaceStaticArraysCore", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLNLSolve", "SimpleNonlinearSolve", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
+git-tree-sha1 = "9e846d9c0f66fed04e7617a8bc380918a1ffe7ff"
 uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
-version = "6.29.3"
+version = "6.44.0"
 
 [[deps.PCRE2_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
-version = "10.40.0+0"
-
-[[deps.PDMats]]
-deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "cf494dca75a69712a72b80bc48f59dcf3dea63ec"
-uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
-version = "0.11.16"
+version = "10.42.0+0"
 
 [[deps.Parameters]]
 deps = ["OrderedCollections", "UnPack"]
@@ -1035,16 +1051,16 @@ uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
 version = "0.12.3"
 
 [[deps.Parsers]]
-deps = ["Dates"]
-git-tree-sha1 = "6c01a9b494f6d2a9fc180a08b182fcb06f0958a0"
+deps = ["Dates", "SnoopPrecompile"]
+git-tree-sha1 = "6f4fbcd1ad45905a5dee3f4256fabb49aa2110c6"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.4.2"
+version = "2.5.7"
 
 [[deps.PartitionedArrays]]
 deps = ["Distances", "IterativeSolvers", "LinearAlgebra", "MPI", "Printf", "SparseArrays", "SparseMatricesCSR"]
-git-tree-sha1 = "94291b7ddeac39816572660383055870b41bca64"
+git-tree-sha1 = "8a8a72723ffb62a395b0475b78b4695fb7090441"
 uuid = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
-version = "0.2.11"
+version = "0.2.15"
 
 [[deps.Pipe]]
 git-tree-sha1 = "6842804e7867b115ca9de748a0cf6b364523c16d"
@@ -1058,9 +1074,9 @@ uuid = "30392449-352a-5448-841d-b1acce4e97dc"
 version = "0.40.1+0"
 
 [[deps.Pkg]]
-deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
 uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-version = "1.8.0"
+version = "1.9.0"
 
 [[deps.PlotThemes]]
 deps = ["PlotUtils", "Statistics"]
@@ -1070,27 +1086,27 @@ version = "3.1.0"
 
 [[deps.PlotUtils]]
 deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "SnoopPrecompile", "Statistics"]
-git-tree-sha1 = "21303256d239f6b484977314674aef4bb1fe4420"
+git-tree-sha1 = "c95373e73290cf50a8a22c3375e4625ded5c5280"
 uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043"
-version = "1.3.1"
+version = "1.3.4"
 
 [[deps.Plots]]
-deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "JLFzf", "JSON", "LaTeXStrings", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "RelocatableFolders", "Requires", "Scratch", "Showoff", "SnoopPrecompile", "SparseArrays", "Statistics", "StatsBase", "UUIDs", "UnicodeFun", "Unzip"]
-git-tree-sha1 = "041704a5182f25cdcbb1369f13d9d9f94a86b5fd"
+deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "JLFzf", "JSON", "LaTeXStrings", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Preferences", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "RelocatableFolders", "Requires", "Scratch", "Showoff", "SnoopPrecompile", "SparseArrays", "Statistics", "StatsBase", "UUIDs", "UnicodeFun", "Unzip"]
+git-tree-sha1 = "8ac949bd0ebc46a44afb1fdca1094554a84b086e"
 uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-version = "1.35.4"
+version = "1.38.5"
 
 [[deps.Polyester]]
 deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"]
-git-tree-sha1 = "cb2ede4b9cc432c1cba4d4452a62ae1d2a4141bb"
+git-tree-sha1 = "e8e0fabcff4df8686c4267503887202a783d498e"
 uuid = "f517fe37-dbe3-4b94-8317-1923a5111588"
-version = "0.6.16"
+version = "0.7.2"
 
 [[deps.PolyesterWeave]]
 deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"]
-git-tree-sha1 = "b42fb2292fbbaed36f25d33a15c8cc0b4f287fcf"
+git-tree-sha1 = "240d7170f5ffdb285f9427b92333c3463bf65bf6"
 uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad"
-version = "0.1.10"
+version = "0.2.1"
 
 [[deps.PositiveFactorizations]]
 deps = ["LinearAlgebra"]
@@ -1099,10 +1115,16 @@ uuid = "85a6dd25-e78a-55b7-8502-1745935b8125"
 version = "0.2.4"
 
 [[deps.PreallocationTools]]
-deps = ["Adapt", "ArrayInterfaceCore", "ForwardDiff"]
-git-tree-sha1 = "3953d18698157e1d27a51678c89c88d53e071a42"
+deps = ["Adapt", "ArrayInterfaceCore", "ForwardDiff", "Requires"]
+git-tree-sha1 = "2c7658dd593e3adc118b00429e1048829f1abb8c"
 uuid = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
-version = "0.4.4"
+version = "0.4.11"
+
+    [deps.PreallocationTools.extensions]
+    PreallocationToolsReverseDiffExt = "ReverseDiff"
+
+    [deps.PreallocationTools.weakdeps]
+    ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 
 [[deps.Preferences]]
 deps = ["TOML"]
@@ -1122,15 +1144,9 @@ version = "1.7.2"
 
 [[deps.Qt5Base_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"]
-git-tree-sha1 = "c6c0f690d0cc7caddb74cef7aa847b824a16b256"
+git-tree-sha1 = "0c03844e2231e12fda4d0086fd7cbe4098ee8dc5"
 uuid = "ea2cea3b-5b76-57ae-a6ef-0a8af62496e1"
-version = "5.15.3+1"
-
-[[deps.QuadGK]]
-deps = ["DataStructures", "LinearAlgebra"]
-git-tree-sha1 = "3c009334f45dfd546a16a57960a821a1a023d241"
-uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-version = "2.5.0"
+version = "5.15.3+2"
 
 [[deps.REPL]]
 deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
@@ -1142,27 +1158,33 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [[deps.RecipesBase]]
 deps = ["SnoopPrecompile"]
-git-tree-sha1 = "d12e612bba40d189cead6ff857ddb67bd2e6a387"
+git-tree-sha1 = "261dddd3b862bd2c940cf6ca4d1c8fe593e457c8"
 uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-version = "1.3.1"
+version = "1.3.3"
 
 [[deps.RecipesPipeline]]
 deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase", "SnoopPrecompile"]
-git-tree-sha1 = "9b1c0c8e9188950e66fc28f40bfe0f8aac311fe0"
+git-tree-sha1 = "e974477be88cb5e3040009f3767611bc6357846f"
 uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c"
-version = "0.6.7"
+version = "0.6.11"
 
 [[deps.RecursiveArrayTools]]
-deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceStaticArraysCore", "ChainRulesCore", "DocStringExtensions", "FillArrays", "GPUArraysCore", "IteratorInterfaceExtensions", "LinearAlgebra", "RecipesBase", "StaticArraysCore", "Statistics", "Tables", "ZygoteRules"]
-git-tree-sha1 = "3004608dc42101a944e44c1c68b599fa7c669080"
+deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceStaticArraysCore", "ChainRulesCore", "DocStringExtensions", "FillArrays", "GPUArraysCore", "IteratorInterfaceExtensions", "LinearAlgebra", "RecipesBase", "Requires", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables", "ZygoteRules"]
+git-tree-sha1 = "54e055256bbd41fd10566880bc4baa5316bca6fe"
 uuid = "731186ca-8d62-57ce-b412-fbd966d074cd"
-version = "2.32.0"
+version = "2.37.0"
+
+    [deps.RecursiveArrayTools.extensions]
+    RecursiveArrayToolsTrackerExt = "Tracker"
+
+    [deps.RecursiveArrayTools.weakdeps]
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
 
 [[deps.RecursiveFactorization]]
 deps = ["LinearAlgebra", "LoopVectorization", "Polyester", "SnoopPrecompile", "StrideArraysCore", "TriangularSolve"]
-git-tree-sha1 = "0a2dfb3358fcde3676beb75405e782faa8c9aded"
+git-tree-sha1 = "9088515ad915c99026beb5436d0a09cd8c18163e"
 uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
-version = "0.2.12"
+version = "0.2.18"
 
 [[deps.Reexport]]
 git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
@@ -1181,23 +1203,11 @@ git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
 uuid = "ae029012-a4dd-5104-9daa-d747884805df"
 version = "1.3.0"
 
-[[deps.Rmath]]
-deps = ["Random", "Rmath_jll"]
-git-tree-sha1 = "bf3188feca147ce108c76ad82c2792c57abe7b1f"
-uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
-version = "0.7.0"
-
-[[deps.Rmath_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "68db32dff12bb6127bac73c209881191bf0efbb7"
-uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f"
-version = "0.3.0+0"
-
 [[deps.RuntimeGeneratedFunctions]]
 deps = ["ExprTools", "SHA", "Serialization"]
-git-tree-sha1 = "cdc1e4278e91a6ad530770ebb327f9ed83cf10c4"
+git-tree-sha1 = "50314d2ef65fce648975a8e80ae6d8409ebbf835"
 uuid = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
-version = "0.5.3"
+version = "0.5.5"
 
 [[deps.SCOTCH_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
@@ -1210,15 +1220,10 @@ uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 version = "0.7.0"
 
 [[deps.SIMD]]
-git-tree-sha1 = "7dbc15af7ed5f751a82bf3ed37757adf76c32402"
+deps = ["SnoopPrecompile"]
+git-tree-sha1 = "8b20084a97b004588125caebf418d8cab9e393d1"
 uuid = "fdea26ae-647d-5447-a871-4b548cad5224"
-version = "3.4.1"
-
-[[deps.SIMDDualNumbers]]
-deps = ["ForwardDiff", "IfElse", "SLEEFPirates", "VectorizationBase"]
-git-tree-sha1 = "dd4195d308df24f33fb10dde7c22103ba88887fa"
-uuid = "3cdde19b-5bb0-4aaf-8931-af3e248e098b"
-version = "0.1.1"
+version = "3.4.4"
 
 [[deps.SIMDTypes]]
 git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c"
@@ -1227,15 +1232,27 @@ version = "0.1.0"
 
 [[deps.SLEEFPirates]]
 deps = ["IfElse", "Static", "VectorizationBase"]
-git-tree-sha1 = "938c9ecffb28338a6b8b970bda0f3806a65e7906"
+git-tree-sha1 = "cda0aece8080e992f6370491b08ef3909d1c04e7"
 uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
-version = "0.6.36"
+version = "0.6.38"
 
 [[deps.SciMLBase]]
-deps = ["ArrayInterfaceCore", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Preferences", "RecipesBase", "RecursiveArrayTools", "RuntimeGeneratedFunctions", "StaticArraysCore", "Statistics", "Tables"]
-git-tree-sha1 = "d41daf11db3383bd979ba00e1590d2f4297ace61"
+deps = ["ArrayInterfaceCore", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Preferences", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables"]
+git-tree-sha1 = "76eec814289c4a249ee3747ceeea0d83defbeb8d"
 uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
-version = "1.63.0"
+version = "1.84.1"
+
+[[deps.SciMLNLSolve]]
+deps = ["DiffEqBase", "LineSearches", "NLsolve", "Reexport", "SciMLBase"]
+git-tree-sha1 = "66c7f901dbcad51791136e2d90ee67240256ecde"
+uuid = "e9a6253c-8580-4d32-9898-8661bb511710"
+version = "0.1.3"
+
+[[deps.SciMLOperators]]
+deps = ["ArrayInterfaceCore", "DocStringExtensions", "Lazy", "LinearAlgebra", "Setfield", "SparseArrays", "StaticArraysCore", "Tricks"]
+git-tree-sha1 = "c737d575c18bdf9aba0a3c7071d5249d09f45dd8"
+uuid = "c0aeaf25-5076-4817-a8d5-81caf7dfa961"
+version = "0.1.21"
 
 [[deps.Scratch]]
 deps = ["Dates"]
@@ -1267,6 +1284,12 @@ git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1"
 uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7"
 version = "1.1.0"
 
+[[deps.SimpleNonlinearSolve]]
+deps = ["ArrayInterfaceCore", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "Reexport", "SciMLBase", "SnoopPrecompile", "StaticArraysCore"]
+git-tree-sha1 = "3f558105e8ef4aac1e22bf30bd1f1e95698bfc95"
+uuid = "727e6d20-b764-4bd8-a329-72de5adea6c7"
+version = "0.1.10"
+
 [[deps.SimpleTraits]]
 deps = ["InteractiveUtils", "MacroTools"]
 git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
@@ -1274,34 +1297,41 @@ uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
 version = "0.9.4"
 
 [[deps.SnoopPrecompile]]
-git-tree-sha1 = "f604441450a3c0569830946e5b33b78c928e1a85"
+deps = ["Preferences"]
+git-tree-sha1 = "e760a70afdcd461cf01a575947738d359234665c"
 uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c"
-version = "1.0.1"
+version = "1.0.3"
 
 [[deps.Sockets]]
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
 
 [[deps.SortingAlgorithms]]
 deps = ["DataStructures"]
-git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508"
+git-tree-sha1 = "a4ada03f999bd01b3a25dcaa30b2d929fe537e00"
 uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
-version = "1.0.1"
+version = "1.1.0"
 
 [[deps.SparseArrays]]
-deps = ["LinearAlgebra", "Random"]
+deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [[deps.SparseDiffTools]]
 deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceStaticArrays", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "Graphs", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays", "VertexSafeGraphs"]
-git-tree-sha1 = "a434a4a3a5757440cb3b6500eb9690ff5a516cf6"
+git-tree-sha1 = "4245283bee733122a9cb4545748d64e0c63337c0"
 uuid = "47a9eef4-7e08-11e9-0b38-333d64bd3804"
-version = "1.27.0"
+version = "1.30.0"
 
 [[deps.SparseMatricesCSR]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "4870b3e7db7063927b163fb981bd579410b68b2d"
+git-tree-sha1 = "38677ca58e80b5cad2382e5a1848f93b054ad28d"
 uuid = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
-version = "0.6.6"
+version = "0.6.7"
+
+[[deps.Sparspak]]
+deps = ["Libdl", "LinearAlgebra", "Logging", "OffsetArrays", "Printf", "SparseArrays", "Test"]
+git-tree-sha1 = "342cf4b449c299d8d1ceaf00b7a49f4fbc7940e7"
+uuid = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
+version = "0.3.9"
 
 [[deps.SpecialFunctions]]
 deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
@@ -1311,15 +1341,15 @@ version = "2.1.7"
 
 [[deps.Static]]
 deps = ["IfElse"]
-git-tree-sha1 = "de4f0a4f049a4c87e4948c04acff37baf1be01a6"
+git-tree-sha1 = "c35b107b61e7f34fa3f124026f2a9be97dea9e1c"
 uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
-version = "0.7.7"
+version = "0.8.3"
 
 [[deps.StaticArrays]]
 deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"]
-git-tree-sha1 = "f86b3a049e5d05227b10e15dbb315c5b90f14988"
+git-tree-sha1 = "67d3e75e8af8089ea34ce96974d5468d4a008ca6"
 uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "1.5.9"
+version = "1.5.15"
 
 [[deps.StaticArraysCore]]
 git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a"
@@ -1329,6 +1359,7 @@ version = "1.4.0"
 [[deps.Statistics]]
 deps = ["LinearAlgebra", "SparseArrays"]
 uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+version = "1.9.0"
 
 [[deps.StatsAPI]]
 deps = ["LinearAlgebra"]
@@ -1342,17 +1373,11 @@ git-tree-sha1 = "d1bf48bfcc554a3761a133fe3a9bb01488e06916"
 uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 version = "0.33.21"
 
-[[deps.StatsFuns]]
-deps = ["ChainRulesCore", "HypergeometricFunctions", "InverseFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"]
-git-tree-sha1 = "5783b877201a82fc0014cbf381e7e6eb130473a4"
-uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-version = "1.0.1"
-
 [[deps.StrideArraysCore]]
 deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "SIMDTypes", "Static", "ThreadingUtilities"]
-git-tree-sha1 = "ac730bd978bf35f9fe45daa0bd1f51e493e97eb4"
+git-tree-sha1 = "8114ba9c3694827838d45ea3c9f6b9ccb4182cf2"
 uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da"
-version = "0.3.15"
+version = "0.4.7"
 
 [[deps.SuiteSparse]]
 deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
@@ -1361,12 +1386,18 @@ uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 [[deps.SuiteSparse_jll]]
 deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"]
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "5.10.1+0"
+version = "5.10.1+6"
+
+[[deps.SymbolicIndexingInterface]]
+deps = ["DocStringExtensions"]
+git-tree-sha1 = "6b764c160547240d868be4e961a5037f47ad7379"
+uuid = "2efcf032-c050-4f8e-a9bb-153293bab1f5"
+version = "0.2.1"
 
 [[deps.TOML]]
 deps = ["Dates"]
 uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-version = "1.0.0"
+version = "1.0.3"
 
 [[deps.TableTraits]]
 deps = ["IteratorInterfaceExtensions"]
@@ -1383,7 +1414,7 @@ version = "1.10.0"
 [[deps.Tar]]
 deps = ["ArgTools", "SHA"]
 uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
-version = "1.10.1"
+version = "1.10.0"
 
 [[deps.TensorCore]]
 deps = ["LinearAlgebra"]
@@ -1392,10 +1423,10 @@ uuid = "62fd8b95-f654-4bbd-a8a5-9c27f68ccd50"
 version = "0.1.1"
 
 [[deps.Tensors]]
-deps = ["ForwardDiff", "LinearAlgebra", "SIMD", "StaticArrays", "Statistics"]
-git-tree-sha1 = "2aeb143305a3ff33d3241263d13d14db64948a2d"
+deps = ["ForwardDiff", "LinearAlgebra", "SIMD", "SnoopPrecompile", "StaticArrays", "Statistics"]
+git-tree-sha1 = "71f054343e85ab1eab12bf8336004309002ff82d"
 uuid = "48a634ad-e948-5137-8d70-aa71f2a747f4"
-version = "1.12.0"
+version = "1.13.1"
 
 [[deps.Test]]
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
@@ -1403,27 +1434,27 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [[deps.ThreadingUtilities]]
 deps = ["ManualMemory"]
-git-tree-sha1 = "f8629df51cab659d70d2e5618a430b4d3f37f2c3"
+git-tree-sha1 = "c97f60dd4f2331e1a495527f80d242501d2f9865"
 uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5"
-version = "0.5.0"
+version = "0.5.1"
 
 [[deps.TimerOutputs]]
 deps = ["ExprTools", "Printf"]
-git-tree-sha1 = "9dfcb767e17b0849d6aaf85997c98a5aea292513"
+git-tree-sha1 = "f2fd3f288dfc6f507b0c3a2eb3bac009251e548b"
 uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
-version = "0.5.21"
+version = "0.5.22"
 
 [[deps.TranscodingStreams]]
 deps = ["Random", "Test"]
-git-tree-sha1 = "8a75929dcd3c38611db2f8d08546decb514fcadf"
+git-tree-sha1 = "94f38103c984f89cf77c402f2a68dbd870f8165f"
 uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.9.9"
+version = "0.9.11"
 
 [[deps.TriangularSolve]]
-deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "SnoopPrecompile", "Static", "VectorizationBase"]
-git-tree-sha1 = "fdddcf6b2c7751cd97de69c18157aacc18fbc660"
+deps = ["CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "LoopVectorization", "Polyester", "Static", "VectorizationBase"]
+git-tree-sha1 = "31eedbc0b6d07c08a700e26d31298ac27ef330eb"
 uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
-version = "0.1.14"
+version = "0.1.19"
 
 [[deps.Tricks]]
 git-tree-sha1 = "6bac775f2d42a611cdfcd1fb217ee719630c4175"
@@ -1431,9 +1462,9 @@ uuid = "410a4b4d-49e4-4fbc-ab6d-cb71b17b3775"
 version = "0.1.6"
 
 [[deps.URIs]]
-git-tree-sha1 = "e59ecc5a41b000fa94423a578d29290c7266fc10"
+git-tree-sha1 = "074f993b0ca030848b897beff716d93aca60f06a"
 uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
-version = "1.4.0"
+version = "1.4.2"
 
 [[deps.UUIDs]]
 deps = ["Random", "SHA"]
@@ -1460,9 +1491,9 @@ version = "0.2.0"
 
 [[deps.VectorizationBase]]
 deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"]
-git-tree-sha1 = "866e77ea9c675306652f5b5b9010ccbccc684c79"
+git-tree-sha1 = "4c59c2df8d2676c4691a39fa70495a6db0c5d290"
 uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
-version = "0.21.53"
+version = "0.21.58"
 
 [[deps.VertexSafeGraphs]]
 deps = ["Graphs"]
@@ -1472,9 +1503,9 @@ version = "0.2.0"
 
 [[deps.Wayland_jll]]
 deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
-git-tree-sha1 = "3e61f0b86f90dacb0bc0e73a0c5a83f6a8636e23"
+git-tree-sha1 = "ed8d92d9774b077c53e1da50fd81a36af3744c1c"
 uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89"
-version = "1.19.0+0"
+version = "1.21.0+0"
 
 [[deps.Wayland_protocols_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -1484,15 +1515,15 @@ version = "1.25.0+0"
 
 [[deps.WriteVTK]]
 deps = ["Base64", "CodecZlib", "FillArrays", "LightXML", "TranscodingStreams"]
-git-tree-sha1 = "f50c47d715199601a54afdd5267f24c8174842ae"
+git-tree-sha1 = "49353f30da65f377cff0f934bb9f562a2c0441b9"
 uuid = "64499a7a-5c06-52f2-abe2-ccb03c286192"
-version = "1.16.0"
+version = "1.17.1"
 
 [[deps.XML2_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
-git-tree-sha1 = "58443b63fb7e465a8a7210828c91c08b92132dff"
+git-tree-sha1 = "93c41695bc1c08c46c5899f4fe06d6ead504bb73"
 uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
-version = "2.9.14+0"
+version = "2.10.3+0"
 
 [[deps.XSLT_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
@@ -1635,13 +1666,13 @@ version = "1.4.0+3"
 [[deps.Zlib_jll]]
 deps = ["Libdl"]
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-version = "1.2.12+3"
+version = "1.2.13+0"
 
 [[deps.Zstd_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "e45044cd873ded54b6a5bac0eb5c971392cf1927"
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "c6edfe154ad7b313c01aceca188c05c835c67360"
 uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
-version = "1.5.2+0"
+version = "1.5.4+0"
 
 [[deps.ZygoteRules]]
 deps = ["MacroTools"]
@@ -1674,9 +1705,9 @@ uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0"
 version = "0.15.1+0"
 
 [[deps.libblastrampoline_jll]]
-deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
+deps = ["Artifacts", "Libdl"]
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.1.1+0"
+version = "5.4.0+0"
 
 [[deps.libfdk_aac_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
diff --git a/docs/Project.toml b/docs/Project.toml
index f80e8e3bfa..2b86e56fae 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -10,12 +10,14 @@ IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"
diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index d5338ee95c..16c653bfd5 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -27,7 +27,7 @@ MPI.Init()
 # and distribute it across our processors using `generate_distributed_grid`. 
 # dgrid = FerritePartitionedArrays.generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
 # dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (2, 2, 2));
-dgrid = FerritePartitionedArrays.generate_distributed_grid(Hexahedron, (2, 1, 1)); #src
+dgrid = FerritePartitionedArrays.generate_distributed_grid(Hexahedron, (2, 2, 2)); #src
 # dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
 
 # ### Trial and test functions

From c6c40d2765a99d9cd312db1f0a4f623676f1fa67 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 22:42:11 +0100
Subject: [PATCH 094/124] Derp.

---
 Project.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Project.toml b/Project.toml
index f8a8c32e64..573d7c062c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -23,6 +23,7 @@ FerritePartitionedArrays = ["MPI", "PartitionedArrays"]
 EnumX = "1"
 MPI = "^0.20.2"
 NearestNeighbors = "0.4"
+PartitionedArrays = "0.2.15"
 Reexport = "1"
 Tensors = "^1.12"
 WriteVTK = "1.13"

From 4911675ec81928b73dbe2ee90b43f6c645d47656 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Thu, 16 Feb 2023 23:03:13 +0100
Subject: [PATCH 095/124] Remove empty file.

---
 ext/FerritePartitionedArrays.jl           | 1 -
 ext/FerritePartitionedArrays/iterators.jl | 0
 2 files changed, 1 deletion(-)
 delete mode 100644 ext/FerritePartitionedArrays/iterators.jl

diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index 4e0fdc21ec..9ec313103a 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -13,7 +13,6 @@ include("FerritePartitionedArrays/grid.jl")
 include("FerritePartitionedArrays/DistributedDofHandler.jl")
 include("FerritePartitionedArrays/assembler.jl")
 include("FerritePartitionedArrays/constraints.jl")
-include("FerritePartitionedArrays/iterators.jl")
 include("FerritePartitionedArrays/vtk-export.jl")
 
 export 
diff --git a/ext/FerritePartitionedArrays/iterators.jl b/ext/FerritePartitionedArrays/iterators.jl
deleted file mode 100644
index e69de29bb2..0000000000

From 9d5ee9f72d49ba0b31b42063c1514597b5f30d10 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 01:27:08 +0100
Subject: [PATCH 096/124] Replace precomputed dofs with ad-hoc computed dofs.

---
 .../DistributedDofHandler.jl                  |  29 ++--
 ext/FerritePartitionedArrays/assembler.jl     |  29 ++--
 src/Dofs/DofHandler.jl                        | 125 +++++++++++++++---
 test/test_grid_dofhandler_vtk.jl              |  54 ++++++++
 4 files changed, 182 insertions(+), 55 deletions(-)

diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 6cf47746b9..61c3e8508c 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -50,6 +50,8 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
     end
 end
 
+Ferrite.getdim(dh::DistributedDofHandler{dim}) where {dim} = dim 
+
 Ferrite.getlocalgrid(dh::DistributedDofHandler) = Ferrite.getlocalgrid(dh.grid)
 getglobalgrid(dh::DistributedDofHandler) = dh.grid
 
@@ -87,36 +89,27 @@ function compute_dof_ownership(dh)
 
     for (lvi, sv) ∈ get_shared_vertices(dgrid)
         for field_idx in 1:num_fields(dh)
-            vi = Ferrite.toglobal(dgrid, lvi)
-            if Ferrite.has_vertex_dofs(dh, field_idx, vi)
-                local_dof_idx = Ferrite.vertex_dofs(dh, field_idx, vi)
-                for d in 1:dh.field_dims[field_idx]
-                    dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, sv)
-                end
+            if Ferrite.has_vertex_dofs(dh, field_idx, lvi)
+                local_dof_indices = Ferrite.vertex_dofs(dh, field_idx, lvi)
+                dof_owner[local_dof_indices] .= compute_owner(dgrid, sv)
             end
         end
     end
 
     for (lfi, sf) ∈ get_shared_faces(dgrid)
         for field_idx in 1:num_fields(dh)
-            fi = Ferrite.toglobal(dgrid, lfi)
-            if Ferrite.has_face_dofs(dh, field_idx, fi)
-                local_dof_idx = Ferrite.face_dofs(dh, field_idx, fi)
-                for d in 1:dh.field_dims[field_idx]
-                    dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, sf)
-                end
+            if Ferrite.has_face_dofs(dh, field_idx, lfi)
+                local_dof_indices = Ferrite.face_dofs(dh, field_idx, lfi)
+                dof_owner[local_dof_indices] .= compute_owner(dgrid, sf)
             end
         end
     end
 
     for (lei, se) ∈ get_shared_edges(dgrid)
         for field_idx in 1:num_fields(dh)
-            ei = Ferrite.toglobal(dgrid, lei)
-            if Ferrite.has_edge_dofs(dh, field_idx, ei)
-                local_dof_idx = Ferrite.edge_dofs(dh, field_idx, ei)
-                for d in 1:dh.field_dims[field_idx]
-                    dof_owner[local_dof_idx+d-1] = compute_owner(dgrid, se)
-                end
+            if Ferrite.has_edge_dofs(dh, field_idx, lei)
+                local_dof_indices = Ferrite.edge_dofs(dh, field_idx, lei)
+                dof_owner[local_dof_indices] .= compute_owner(dgrid, se)
             end
         end
     end
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index 6a9ce8a93a..8542abd33a 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -109,7 +109,6 @@ struct COOAssembler{T}
             # Start by searching shared entities which are not owned
             pivot_vertex_owner_rank = compute_owner(dgrid, pivot_shared_vertex)
             pivot_cell_idx = pivot_vertex[1]
-            pivot_vertex_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_vertex)
 
             if my_rank != pivot_vertex_owner_rank
                 sender_slot = destination_index[pivot_vertex_owner_rank]
@@ -120,17 +119,17 @@ struct COOAssembler{T}
                 cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
                 for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                    !Ferrite.has_vertex_dofs(dh, field_idx, pivot_vertex_global) && continue
-                    pivot_vertex_dof = Ferrite.vertex_dofs(dh, field_idx, pivot_vertex_global)
+                    !Ferrite.has_vertex_dofs(dh, field_idx, pivot_vertex) && continue
+                    pivot_vertex_dofs = Ferrite.vertex_dofs(dh, field_idx, pivot_vertex)
                     
                     for d ∈ 1:dh.field_dims[field_idx]
-                        @debug println("  adding dof $(pivot_vertex_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                        @debug println("  adding dof $(pivot_vertex_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
 
                         # Extract dofs belonging to the current field
                         #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
                         #for cell_field_dof ∈ cell_field_dofs
                         for cell_dof ∈ cell_dofs
-                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dof+d-1])
+                            append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_vertex_dofs[d]])
                             append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
                             append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
                             append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
@@ -154,20 +153,18 @@ struct COOAssembler{T}
                     cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                     cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
-                    pivot_face_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_face)
-
                     for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                        !Ferrite.has_face_dofs(dh, field_idx, pivot_face_global) && continue
-                        pivot_face_dof = Ferrite.face_dofs(dh, field_idx, pivot_face_global)
+                        !Ferrite.has_face_dofs(dh, field_idx, pivot_face) && continue
+                        pivot_face_dofs = Ferrite.face_dofs(dh, field_idx, pivot_face)
                         
                         for d ∈ 1:dh.field_dims[field_idx]
-                            @debug println("  adding dof $(pivot_face_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                            @debug println("  adding dof $(pivot_face_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
                             
                             # Extract dofs belonging to the current field
                             #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
                             #for cell_field_dof ∈ cell_field_dofs
                             for cell_dof ∈ cell_dofs
-                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_face_dof+d-1])
+                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_face_dofs[d]])
                                 append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
                                 append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
                                 append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
@@ -192,19 +189,17 @@ struct COOAssembler{T}
                     cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                     cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
 
-                    pivot_edge_global = Ferrite.toglobal(getlocalgrid(dgrid), pivot_edge)
-
                     for (field_idx, field_name) in zip(1:num_fields(dh), getfieldnames(dh))
-                        !Ferrite.has_edge_dofs(dh, field_idx, pivot_edge_global) && continue
-                        pivot_edge_dof = Ferrite.edge_dofs(dh, field_idx, pivot_edge_global)
+                        !Ferrite.has_edge_dofs(dh, field_idx, pivot_edge) && continue
+                        pivot_edge_dofs = Ferrite.edge_dofs(dh, field_idx, pivot_edge)
 
                         for d ∈ 1:dh.field_dims[field_idx]
-                            @debug println("  adding dof $(pivot_edge_dof+d-1) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                            @debug println("  adding dof $(pivot_edge_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
                             # Extract dofs belonging to the current field
                             #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
                             #for cell_field_dof ∈ cell_field_dofs
                             for cell_dof ∈ cell_dofs
-                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dof+d-1])
+                                append!(ghost_dof_pivot_to_send[sender_slot], ldof_to_gdof[pivot_edge_dofs[d]])
                                 append!(ghost_dof_to_send[sender_slot], ldof_to_gdof[cell_dof])
                                 append!(ghost_rank_to_send[sender_slot], ldof_to_rank[cell_dof])
                                 append!(ghost_dof_field_index_to_send[sender_slot], field_idx)
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index e59ca0f1f4..c4551826a7 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -46,23 +46,98 @@ function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
     end
 end
 
+"""
+Get the spatial dimension of a dofhandler.
+"""
+getdim(dh::DofHandler{dim}) where {dim} = dim 
+
 # has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[field_idx], vertex)
 # has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
 # has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.facedicts[field_idx], face)
 
-has_cell_dofs(dh::AbstractDofHandler, field_idx::Int, cell::Int) = haskey(dh.celldicts[field_idx], cell)
-has_vertex_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[field_idx], vertex)
-has_edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
-has_face_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.facedicts[field_idx], face)
+has_cell_dofs(dh::AbstractDofHandler, field_idx::Int, cell::Int) = ncelldofs(getfieldinterpolation(dh, field_idx)) > 0
+has_vertex_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::VertexIndex) = nvertexdofs(getfieldinterpolation(dh, field_idx)) > 0
+has_edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::EdgeIndex) = nedgedofs(getfieldinterpolation(dh, field_idx)) > 0
+has_face_dofs(dh::AbstractDofHandler, field_idx::Int, face::FaceIndex) where {dim} = nfacedofs(getfieldinterpolation(dh, field_idx)) > 0
 
 # entity_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
 # entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
 # entity_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
 
-cell_dofs(dh::AbstractDofHandler, field_idx::Int, cell::Int) = dh.celldicts[field_idx][cell]
-vertex_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
-edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge][1]
-face_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
+"""
+Compute the dofs belonging to a given cell of a given field.
+"""
+function cell_dofs(dh::AbstractDofHandler, field_idx::Int, cell::Int)
+    ip = getfieldinterpolation(dh, field_idx)
+    fdim = getfielddim(dh, field_idx)
+    nentitydofs = fdim*ncelldofs(ip)
+    totaldofs = fdim*getnbasefunctions(ip)
+    ldofs = dof_range(dh, field_idx)[(totaldofs-nentitydofs+1):totaldofs]
+    return celldofs(dh, cell)[ldofs]
+end
+
+"""
+Compute the dofs belonging to a given vertex of a given field.
+"""
+function vertex_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::VertexIndex)
+    ip = getfieldinterpolation(dh, field_idx)
+    nvdofs = Ferrite.nvertexdofs(ip)
+    nvdofs == 0 && return Int[]
+    fdim = getfielddim(dh, field_idx)
+    cell,local_vertex_index = vertex
+    cell_geo = getcells(getgrid(dh), cell)
+    nvertices = length(Ferrite.vertices(cell_geo))
+    nentitydofs = fdim*nvdofs*nvertices
+    ldofr = Ferrite.dof_range(dh, field_idx)[1:nentitydofs]
+    vdofs = Ferrite.celldofs(dh, cell)[ldofr]
+    return reshape(vdofs, (fdim,nvertices))[:, local_vertex_index]
+end
+
+"""
+Compute the dofs belonging to a given edge of a given field.
+"""
+function edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::EdgeIndex)
+    ip = getfieldinterpolation(dh, field_idx)
+    nedofs = Ferrite.nedgedofs(ip)
+    nedofs == 0 && return Int[]
+    nvdofs = Ferrite.nvertexdofs(ip)
+    fdim = getfielddim(dh, field_idx)
+    cell,local_edge_index = edge
+    cell_geo = getcells(getgrid(dh), cell)
+    nedges_on_cell = length(Ferrite.edges(cell_geo))
+    nvertices_on_cell = length(Ferrite.vertices(cell_geo))
+    nentitydofs = fdim*nedofs*nedges_on_cell
+    offset = fdim*nvdofs*nvertices_on_cell
+    edge_dofrange = Ferrite.dof_range(dh, field_idx)[(offset+1):(offset+nentitydofs)]
+    lodal_edgedofs = Ferrite.celldofs(dh, cell)[edge_dofrange]
+    return reshape(lodal_edgedofs, (fdim,nedges_on_cell))[:, local_edge_index]
+end
+
+"""
+Compute the dofs belonging to a given face of a given field.
+"""
+function face_dofs(dh::AbstractDofHandler, field_idx::Int, face::FaceIndex)
+    ip = Ferrite.getfieldinterpolation(dh, field_idx)
+    dim = getdim(dh)
+    nfdofs = Ferrite.nfacedofs(ip)
+    nfdofs == 0 && return Int[]
+    nvdofs = Ferrite.nvertexdofs(ip)
+    fdim = getfielddim(dh, field_idx)
+    cell,local_face_index = face
+    cell_geo = getcells(getgrid(dh), cell)
+    nedges_on_cell = length(Ferrite.edges(cell_geo))
+    nfaces_on_cell = length(Ferrite.faces(cell_geo))
+    nvertices_on_cell = length(Ferrite.vertices(cell_geo))
+    nentitydofs = fdim*Ferrite.nfacedofs(ip)*nfaces_on_cell
+    offset = fdim*nvdofs*nvertices_on_cell
+    if dim > 2
+        nedofs = Ferrite.nedgedofs(ip)
+        offset += fdim*nedofs*nedges_on_cell
+    end
+    face_dofrange = Ferrite.dof_range(dh, field_idx)[(offset+1):(offset+nentitydofs)]
+    local_facedofs = Ferrite.celldofs(dh, cell)[face_dofrange]
+    return reshape(local_facedofs, (fdim,nfaces_on_cell))[:, local_face_index]
+end
 
 """
     ndofs(dh::AbstractDofHandler)
@@ -78,6 +153,7 @@ getfieldinterpolation(dh::AbstractDofHandler, field_idx::Int) = dh.field_interpo
 getfielddim(dh::AbstractDofHandler, field_idx::Int) = dh.field_dims[field_idx]
 getbcvalue(dh::AbstractDofHandler, field_idx::Int) = dh.bc_values[field_idx]
 getgrid(dh::AbstractDofHandler) = dh.grid
+
 function find_field(dh::AbstractDofHandler, field_name::Symbol)
     j = findfirst(i->i == field_name, dh.field_names)
     j === nothing && error("could not find field :$field_name in DofHandler (existing fields: $(getfieldnames(dh)))")
@@ -85,18 +161,27 @@ function find_field(dh::AbstractDofHandler, field_name::Symbol)
 end
 
 # Calculate the offset to the first local dof of a field
-function field_offset(dh::AbstractDofHandler, field_name::Symbol)
+function field_offset(dh::AbstractDofHandler, field_idx::Int)
     offset = 0
-    for i in 1:find_field(dh, field_name)-1
+    for i in 1:field_idx-1
         offset += getnbasefunctions(getfieldinterpolation(dh,i))::Int * getfielddim(dh, i)
     end
     return offset
 end
 
-function getfielddim(dh::AbstractDofHandler, name::Symbol)
-    field_pos = findfirst(i->i == name, getfieldnames(dh))
-    field_pos === nothing && error("did not find field $name")
-    return getfielddim(dh, field_pos)
+function field_offset(dh::AbstractDofHandler, field_name::Symbol)
+    field_idx = findfirst(i->i == field_name, getfieldnames(dh))
+    field_idx === nothing && error("did not find field $field_name")
+    return field_offset(dh,field_idx)
+end
+
+
+"""
+"""
+function dof_range(dh::AbstractDofHandler, field_idx::Int)
+    offset = field_offset(dh, field_idx)
+    n_field_dofs = getnbasefunctions(getfieldinterpolation(dh, field_idx))::Int * getfielddim(dh, field_idx)
+    return (offset+1):(offset+n_field_dofs)
 end
 
 """
@@ -118,10 +203,9 @@ julia> dof_range(dh, :p)
 ```
 """
 function dof_range(dh::AbstractDofHandler, field_name::Symbol)
-    f = find_field(dh, field_name)
-    offset = field_offset(dh, field_name)
-    n_field_dofs = getnbasefunctions(dh.field_interpolations[f])::Int * getfielddim(dh, f)
-    return (offset+1):(offset+n_field_dofs)
+    field_idx = findfirst(i->i == field_name, getfieldnames(dh))
+    field_idx === nothing && error("did not find field $field_name")
+    return dof_range(dh, field_idx)
 end
 
 """
@@ -170,7 +254,8 @@ function sortface(face::Tuple{Int,Int,Int,Int})
 end
 
 function close!(dh::DofHandler)
-    return __close!(dh)
+    __close!(dh)
+    return dh
 end
 
 # close the DofHandler and distribute all the dofs
@@ -320,7 +405,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
     dh.ndofs[] = maximum(dh.cell_dofs)
     dh.closed[] = true
 
-    return dh
+    return dh.vertexdicts, dh.edgedicts, dh.facedicts
 end
 
 function celldofs!(global_dofs::Vector{Int}, dh::DofHandler, i::Int)
diff --git a/test/test_grid_dofhandler_vtk.jl b/test/test_grid_dofhandler_vtk.jl
index cddecf9020..02c0647d91 100644
--- a/test/test_grid_dofhandler_vtk.jl
+++ b/test/test_grid_dofhandler_vtk.jl
@@ -454,4 +454,58 @@ end
     close!(dh)
     @test celldofs(dh, 1) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
     @test celldofs(dh, 2) == [2, 22, 3, 23, 24, 25, 26, 27, 28, 29, 30, 11, 10, 9, 8, 31, 32, 33, 34, 35, 36]
+
+    # Consistency check for dof computation.
+    grid = generate_grid(Hexahedron, (2, 2, 2))
+    dh = DofHandler(grid)
+    push!(dh, :u, 3, Lagrange{3,RefCube,2}())
+    push!(dh, :v, 1, Lagrange{3,RefCube,2}())
+    push!(dh, :w, 3, Lagrange{3,RefCube,1}())
+    push!(dh, :x, 3, Lagrange{3,RefCube,2}())
+    vertexdicts, edgedicts, facedicts = Ferrite.__close!(dh)
+    @test Ferrite.find_field(dh, :u) == 1
+    @test Ferrite.find_field(dh, :v) == 2
+    @test Ferrite.find_field(dh, :w) == 3
+    for (ci,cell) ∈ enumerate(getcells(grid))
+        for field_idx ∈ 1:3
+            for vi ∈ 1:length(Ferrite.vertices(cell))
+                vertex = VertexIndex(ci,vi)
+                global_vertex = Ferrite.toglobal(grid, vertex)
+                dofs = Ferrite.vertex_dofs(dh, field_idx, vertex)
+                if !haskey(vertexdicts[field_idx], global_vertex)
+                    @test empty(dofs)
+                    @test !Ferrite.has_vertex_dofs(dh, field_idx, vertex)
+                else 
+                    @test vertexdicts[field_idx][global_vertex] == dofs[1]
+                    @test Ferrite.has_vertex_dofs(dh, field_idx, vertex)
+                end
+            end
+
+            for fi ∈ 1:length(Ferrite.faces(cell))
+                face = FaceIndex(ci,fi)
+                global_face = Ferrite.toglobal(grid, face)
+                dofs = Ferrite.face_dofs(dh, field_idx, face)
+                if !haskey(facedicts[field_idx], global_face)
+                    @test isempty(dofs)
+                    @test !Ferrite.has_face_dofs(dh, field_idx, face)
+                else
+                    @test facedicts[field_idx][global_face] == dofs[1]
+                    @test Ferrite.has_face_dofs(dh, field_idx, face)
+                end
+            end
+
+            for ei ∈ 1:length(Ferrite.edges(cell))
+                edge = EdgeIndex(ci,ei)
+                global_edge = Ferrite.toglobal(grid, edge)
+                dofs = Ferrite.edge_dofs(dh, field_idx, edge)
+                if !haskey(edgedicts[field_idx], global_edge) 
+                    @test isempty(dofs) 
+                    @test !Ferrite.has_edge_dofs(dh, field_idx, edge)
+                else
+                    @test edgedicts[field_idx][global_edge][1] == dofs[1]
+                    @test Ferrite.has_edge_dofs(dh, field_idx, edge)
+                end
+            end
+        end
+    end
 end

From d0f7918767766abccba81bc1afeca7549d5876b4 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 02:50:51 +0100
Subject: [PATCH 097/124] Remove helper dicts in dof handlers.

---
 .../DistributedDofHandler.jl                  | 221 ++++++++----------
 src/Dofs/DofHandler.jl                        |  61 ++---
 test/test_grid_dofhandler_vtk.jl              |   2 +-
 3 files changed, 128 insertions(+), 156 deletions(-)

diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 61c3e8508c..278f76eae0 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -22,18 +22,13 @@ struct DistributedDofHandler{dim,T,G<:Ferrite.AbstractDistributedGrid{dim}} <: F
     grid::G
     ndofs::Ferrite.ScalarWrapper{Int}
 
-    vertexdicts::Vector{Dict{Int,Int}}
-    edgedicts::Vector{Dict{Tuple{Int,Int},Tuple{Int,Bool}}}
-    facedicts::Vector{Dict{NTuple{dim,Int},Int}}
-    celldicts::Vector{Dict{Int,Vector{Int}}}
-
     ldof_to_gdof::Vector{Int}
     ldof_to_rank::Vector{Int32}
 end
 
 function DistributedDofHandler(grid::Ferrite.AbstractDistributedGrid{dim}) where {dim}
     isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. DistributedMixedDofHandler not implemented yet.")
-    DistributedDofHandler(Symbol[], Int[], Interpolation[], Ferrite.BCValues{Float64}[], Int[], Int[], Ferrite.ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[], Dict{Int,Vector{Int}}[], Int[], Int32[])
+    DistributedDofHandler(Symbol[], Int[], Interpolation[], Ferrite.BCValues{Float64}[], Int[], Int[], Ferrite.ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Int[], Int32[])
 end
 
 function Base.show(io::IO, ::MIME"text/plain", dh::DistributedDofHandler)
@@ -165,13 +160,12 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     # * cell: Just the cell itself
     # * All other entities: All cells for which one of the corresponding entities interior intersects 
     #                       with the interior of the entity in question.
-    # TODO: implement for entitied with dim > 0
     next_local_idx = 1
     for (ci, cell) in enumerate(getcells(getgrid(dh)))
         Ferrite.@debug println("cell #$ci (R$my_rank)")
         for field_idx in 1:num_fields(dh)
             Ferrite.@debug println("  field: $(dh.field_names[field_idx]) (R$my_rank)")
-            interpolation_info = Ferrite.InterpolationInfo(dh.field_interpolations[field_idx])
+            interpolation_info = Ferrite.InterpolationInfo(Ferrite.getfieldinterpolation(dh, field_idx))
             if interpolation_info.nvertexdofs > 0
                 for (vi,vertex) in enumerate(Ferrite.vertices(cell))
                     Ferrite.@debug println("    vertex#$vertex (R$my_rank)")
@@ -179,16 +173,16 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     # Dof is owned if it is local or if my rank is the smallest in the neighborhood
                     if !is_shared_vertex(dgrid, lvi) || (compute_owner(dgrid, get_shared_vertex(dgrid, lvi)) == my_rank)
                         # Update dof assignment
-                        dof_local_idx = dh.vertexdicts[field_idx][vertex]
-                        if local_to_global[dof_local_idx] == 0
-                            for d in 1:dh.field_dims[field_idx]
-                                Ferrite.@debug println("      mapping vertex dof#$dof_local_idx to $next_local_idx (R$my_rank)")
-                                local_to_global[dof_local_idx+d-1] = next_local_idx
+                        dof_local_indices = Ferrite.vertex_dofs(dh, field_idx, lvi)
+                        if local_to_global[dof_local_indices[1]] == 0
+                            for d in 1:getfielddim(dh, field_idx)
+                                Ferrite.@debug println("      mapping vertex dof#$dof_local_indices[d] to $next_local_idx (R$my_rank)")
+                                local_to_global[dof_local_indices[d]] = next_local_idx
                                 next_local_idx += 1
                             end
                         else
-                            for d in 1:dh.field_dims[field_idx]
-                                Ferrite.@debug println("      vertex dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                            for d in 1:getfielddim(dh, field_idx)
+                                Ferrite.@debug println("      vertex dof#$(dof_local_indices[d]) already mapped to $(local_to_global[dof_local_indices[d]]) (R$my_rank)")
                             end
                         end
                     end
@@ -230,16 +224,16 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                         # Dof is owned if it is local or if my rank is the smallest in the neighborhood
                         if !is_shared_edge(dgrid, lei) || (compute_owner(dgrid, get_shared_edge(dgrid, lei)) == my_rank)
                             # Update dof assignment
-                            dof_local_idx = dh.edgedicts[field_idx][Ferrite.toglobal(getlocalgrid(dgrid), lei)][1]
-                            if local_to_global[dof_local_idx] == 0
-                                for d in 1:dh.field_dims[field_idx]
-                                    Ferrite.@debug println("      mapping edge dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
-                                    local_to_global[dof_local_idx+d-1] = next_local_idx
+                            dof_local_indices = Ferrite.edge_dofs(dh, field_idx, lei)
+                            if local_to_global[dof_local_indices[1]] == 0
+                                for d in 1:getfielddim(dh, field_idx)
+                                    Ferrite.@debug println("      mapping edge dof#$(dof_local_indices[d]) to $next_local_idx (R$my_rank)")
+                                    local_to_global[dof_local_indices[d]] = next_local_idx
                                     next_local_idx += 1
                                 end
                             else
-                                for d in 1:dh.field_dims[field_idx]
-                                    Ferrite.@debug println("      edge dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                                for d in 1:getfielddim(dh, field_idx)
+                                    Ferrite.@debug println("      edge dof#$(dof_local_indices[d]) already mapped to $(local_to_global[dof_local_indices[d]]) (R$my_rank)")
                                 end
                             end
                         end
@@ -280,16 +274,16 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     # Dof is owned if it is local or if my rank is the smallest in the neighborhood
                     if !is_shared_face(dgrid, lfi) || (compute_owner(dgrid, get_shared_face(dgrid, lfi)) == my_rank)
                         # Update dof assignment
-                        dof_local_idx = dh.facedicts[field_idx][Ferrite.toglobal(getlocalgrid(dgrid), lfi)]
-                        if local_to_global[dof_local_idx] == 0
-                            for d in 1:dh.field_dims[field_idx]
-                                Ferrite.@debug println("      mapping face dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
-                                local_to_global[dof_local_idx+d-1] = next_local_idx
+                        dof_local_indices = Ferrite.face_dofs(dh, field_idx, lfi)
+                        if local_to_global[dof_local_indices[1]] == 0
+                            for d in 1:getfielddim(dh, field_idx)
+                                Ferrite.@debug println("      mapping face dof#$(dof_local_indices[d]) to $next_local_idx (R$my_rank)")
+                                local_to_global[dof_local_indices[d]] = next_local_idx
                                 next_local_idx += 1
                             end
                         else
-                            for d in 1:dh.field_dims[field_idx]
-                                Ferrite.@debug println("      face dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                            for d in 1:getfielddim(dh, field_idx)
+                                Ferrite.@debug println("      face dof#$(dof_local_indices[d]) already mapped to $(local_to_global[dof_local_indices[d]]) (R$my_rank)")
                             end
                         end
                     end
@@ -325,19 +319,19 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 
             if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
                 Ferrite.@debug println("    cell#$ci")
-                for celldof in 1:interpolation_info.ncelldofs
+                if interpolation_info.ncelldofs > 0
                     # Update dof assignment
-                    dof_local_idx = dh.celldicts[field_idx][ci][celldof]
-                    if local_to_global[dof_local_idx] == 0
-                        for d in 1:dh.field_dims[field_idx]
-                            Ferrite.@debug println("      mapping cell dof#$(dof_local_idx+d-1) to $next_local_idx (R$my_rank)")
-                            local_to_global[dof_local_idx+d-1] = next_local_idx
+                    dof_local_indices = Ferrite.cell_dofs(dh, field_idx, ci)
+                    if local_to_global[dof_local_indices[1]] == 0
+                        for d in 1:getfielddim(dh, field_idx)
+                            Ferrite.@debug println("      mapping cell dof#$(dof_local_indices[d]) to $next_local_idx (R$my_rank)")
+                            local_to_global[dof_local_indices[d]] = next_local_idx
                             next_local_idx += 1
                         end
                     else
-                        for d in 1:dh.field_dims[field_idx]
+                        for d in 1:getfielddim(dh, field_idx)
                             # Should never happen...
-                            Ferrite.@debug println("      WARNING! cell dof#$(dof_local_idx+d-1) already mapped to $(local_to_global[dof_local_idx+d-1]) (R$my_rank)")
+                            Ferrite.@debug println("      WARNING! cell dof#$(dof_local_indices[d]) already mapped to $(local_to_global[dof_local_indices[d]]) (R$my_rank)")
                         end
                     end
                 end # cell loop
@@ -393,18 +387,20 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     end
                     MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
                     MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
-                    for fi ∈ 1:num_fields(dh)
+                    for field_idx ∈ 1:num_fields(dh)
                         next_buffer_idx = 1
-                        if length(dh.vertexdicts[fi]) == 0
-                            Ferrite.@debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                        ip = Ferrite.getfieldinterpolation(dh, field_idx)
+                        if Ferrite.nvertexdofs(ip) == 0
+                            Ferrite.@debug println("Skipping send vertex on field $(dh.field_names[field_idx]) (R$my_rank)")
                             continue
                         end
                         # fill correspondence array
                         corresponding_global_dofs = Array{Int64}(undef,n_vertices)
-                        for (lci,lclvi) ∈ vertices_send[remote_rank]
-                            vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
-                            if haskey(dh.vertexdicts[fi], vi)
-                                corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.vertexdicts[fi][vi]]
+                        for vertex ∈ vertices_send[remote_rank]
+                            if Ferrite.has_vertex_dofs(dh, field_idx, vertex)
+                                # We just put the first dof into the array to reduce communication
+                                vdofs = Ferrite.vertex_dofs(dh, field_idx, vertex)
+                                corresponding_global_dofs[next_buffer_idx] = local_to_global[vdofs[1]]
                             end
                             next_buffer_idx += 1
                         end
@@ -429,18 +425,20 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     end
                     MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
                     MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
-                    for fi ∈ 1:num_fields(dh)
+                    for field_idx ∈ 1:num_fields(dh)
                         next_buffer_idx = 1
-                        if length(dh.facedicts[fi]) == 0
-                            Ferrite.@debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                        ip = Ferrite.getfieldinterpolation(dh, field_idx)
+                        if Ferrite.nfacedofs(ip) == 0
+                            Ferrite.@debug println("Skipping send faces on field $(dh.field_names[field_idx]) (R$my_rank)")
                             continue
                         end
                         # fill correspondence array
                         corresponding_global_dofs = Array{Int64}(undef,n_faces)
-                        for (lci,lclvi) ∈ faces_send[remote_rank]
-                            vi = Ferrite.sortface(Ferrite.faces(getcells(getgrid(dh),lci))[lclvi])
-                            if haskey(dh.facedicts[fi], vi)
-                                corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.facedicts[fi][vi]]
+                        for face ∈ faces_send[remote_rank]
+                            if Ferrite.has_face_dofs(dh, field_idx, face)
+                                # We just put the first dof into the array to reduce communication
+                                fdofs = Ferrite.face_dofs(dh, field_idx, face)
+                                corresponding_global_dofs[next_buffer_idx] = local_to_global[fdofs[1]]
                             end
                             next_buffer_idx += 1
                         end
@@ -475,18 +473,20 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                     end
                     MPI.Send(remote_cells, global_comm(dgrid); dest=remote_rank-1)
                     MPI.Send(remote_cell_vis, global_comm(dgrid); dest=remote_rank-1)
-                    for fi ∈ 1:num_fields(dh)
+                    for field_idx ∈ 1:num_fields(dh)
                         next_buffer_idx = 1
-                        if length(dh.edgedicts[fi]) == 0
-                            Ferrite.@debug println("Skipping send on field $(dh.field_names[fi]) (R$my_rank)")
+                        ip = Ferrite.getfieldinterpolation(dh, field_idx)
+                        if Ferrite.nedgedofs(ip) == 0
+                            Ferrite.@debug println("Skipping send edges on field $(dh.field_names[field_idx]) (R$my_rank)")
                             continue
                         end
                         # fill correspondence array
                         corresponding_global_dofs = Array{Int64}(undef,n_edges)
-                        for (lci,lclvi) ∈ edges_send_unique
-                            vi = Ferrite.sortedge(Ferrite.edges(getcells(getgrid(dh),lci))[lclvi])[1]
-                            if haskey(dh.edgedicts[fi], vi)
-                                corresponding_global_dofs[next_buffer_idx] = local_to_global[dh.edgedicts[fi][vi][1]]
+                        for edge ∈ edges_send_unique
+                            if Ferrite.has_edge_dofs(dh, field_idx, edge)
+                                # We just put the first dof into the array to reduce communication
+                                edofs = Ferrite.edge_dofs(dh, field_idx, edge)
+                                corresponding_global_dofs[next_buffer_idx] = local_to_global[edofs[1]]
                             end
                             next_buffer_idx += 1
                         end
@@ -503,21 +503,22 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                 MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
                 MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
                 for field_idx in 1:num_fields(dh)
-                    if length(dh.vertexdicts[field_idx]) == 0
-                        Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
+                    ip = Ferrite.getfieldinterpolation(dh, field_idx)
+                    if Ferrite.nvertexdofs(ip) == 0
+                        Ferrite.@debug println("  Skipping recv of vertices on field $(dh.field_names[field_idx]) (R$my_rank)")
                         continue
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_vertices)
                     MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
-                    for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
-                        vi = Ferrite.vertices(getcells(getgrid(dh),lci))[lclvi]
-                        if haskey(dh.vertexdicts[field_idx], vi)
-                            for d in 1:dh.field_dims[field_idx]
-                                local_to_global[dh.vertexdicts[field_idx][vi]+d-1] = corresponding_global_dofs[cdi]+d-1
-                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) vertex $(VertexIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]+d-1) (R$my_rank)")
+                    for (cdi,vertex) ∈ enumerate(VertexIndex.(zip(local_cells,local_cell_vis)))
+                        if Ferrite.has_vertex_dofs(dh, field_idx, vertex)
+                            vdofs = Ferrite.vertex_dofs(dh, field_idx, vertex)
+                            for d in 1:getfielddim(dh, field_idx)
+                                local_to_global[vdofs[d]] = corresponding_global_dofs[cdi]+d-1
+                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) vertex $vertex to $(corresponding_global_dofs[cdi]+d-1) (R$my_rank)")
                             end
                         else
-                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) vertex $vi (R$my_rank)")
+                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) vertex $vertex (R$my_rank)")
                         end
                     end
                 end
@@ -531,21 +532,22 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                 MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
                 MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
                 for field_idx in 1:num_fields(dh)
-                    if length(dh.facedicts[field_idx]) == 0
-                        Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
+                    ip = Ferrite.getfieldinterpolation(dh, field_idx)
+                    if Ferrite.nfacedofs(ip) == 0
+                        Ferrite.@debug println("  Skipping recv of faces on field $(dh.field_names[field_idx]) (R$my_rank)")
                         continue
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_faces)
                     MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
-                    for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
-                        vi = Ferrite.sortface(Ferrite.faces(getcells(getgrid(dh),lci))[lclvi])
-                        if haskey(dh.facedicts[field_idx], vi)
-                            for d in 1:dh.field_dims[field_idx]
-                                local_to_global[dh.facedicts[field_idx][vi]+d-1] = corresponding_global_dofs[cdi]+d-1
-                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) face $(FaceIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                    for (cdi,face) ∈ enumerate(FaceIndex.(zip(local_cells,local_cell_vis)))
+                        if Ferrite.has_face_dofs(dh, field_idx, face)
+                            fdofs = Ferrite.face_dofs(dh, field_idx, face)
+                            for d in 1:getfielddim(dh, field_idx)
+                                local_to_global[fdofs[d]] = corresponding_global_dofs[cdi]+d-1
+                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) face $face to $(corresponding_global_dofs[cdi]) (R$my_rank)")
                             end
                         else
-                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) face $vi (R$my_rank)")
+                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) face $face (R$my_rank)")
                         end
                     end
                 end
@@ -564,22 +566,23 @@ function local_to_global_numbering(dh::DistributedDofHandler)
                 MPI.Recv!(local_cells, global_comm(dgrid); source=sending_rank-1)
                 MPI.Recv!(local_cell_vis, global_comm(dgrid); source=sending_rank-1)
                 for field_idx in 1:num_fields(dh)
-                    if length(dh.edgedicts[field_idx]) == 0
+                    ip = Ferrite.getfieldinterpolation(dh, field_idx)
+                    if Ferrite.nedgedofs(ip) == 0
                         Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) (R$my_rank)")
                         continue
                     end
                     corresponding_global_dofs = Array{Int64}(undef,n_edges)
                     MPI.Recv!(corresponding_global_dofs, global_comm(dgrid); source=sending_rank-1)
                     Ferrite.@debug println("   Received $corresponding_global_dofs edge dofs from $sending_rank (R$my_rank)")
-                    for (cdi,(lci,lclvi)) ∈ enumerate(zip(local_cells,local_cell_vis))
-                        vi = Ferrite.sortedge(Ferrite.edges(getcells(getgrid(dh),lci))[lclvi])[1]
-                        if haskey(dh.edgedicts[field_idx], vi)
-                            for d in 1:dh.field_dims[field_idx]
-                                local_to_global[dh.edgedicts[field_idx][vi][1]+d-1] = corresponding_global_dofs[cdi]+d-1
-                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) edge $(EdgeIndex(lci,lclvi)) to $(corresponding_global_dofs[cdi]) (R$my_rank)")
+                    for (cdi,edge) ∈ enumerate(EdgeIndex.(zip(local_cells,local_cell_vis)))
+                        if Ferrite.has_edge_dofs(dh, field_idx, edge)
+                            edofs = Ferrite.edge_dofs(dh, field_idx, edge)
+                            for d in 1:getfielddim(dh, field_idx)
+                                local_to_global[edofs[d]] = corresponding_global_dofs[cdi]+d-1
+                                Ferrite.@debug println("  Updating field $(dh.field_names[field_idx]) edge $edge to $(corresponding_global_dofs[cdi]) (R$my_rank)")
                             end
                         else
-                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) edge $vi (R$my_rank)")
+                            Ferrite.@debug println("  Skipping recv on field $(dh.field_names[field_idx]) edge $edge (R$my_rank)")
                         end
                     end
                 end
@@ -618,34 +621,20 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
     # number and the first dof we added to that vertex.
-    resize!(dh.vertexdicts, num_fields(dh))
-    for i in 1:num_fields(dh)
-        dh.vertexdicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
-    end
+    vertexdicts = [Dict{Int,Int}() for _ in 1:num_fields(dh)]
 
     # `edgedict` keeps track of the visited edges, this will only be used for a 3D problem
     # An edge is determined from two vertices, but we also need to store the direction
     # of the first edge we encounter and add dofs too. When we encounter the same edge
     # the next time we check if the direction is the same, otherwise we reuse the dofs
     # in the reverse order
-    resize!(dh.edgedicts, num_fields(dh))
-    for i in 1:num_fields(dh)
-        dh.edgedicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
-    end
+    edgedicts = [Dict{Tuple{Int,Int},Tuple{Int,Bool}}() for _ in 1:num_fields(dh)]
 
     # `facedict` keeps track of the visited faces. We only need to store the first dof we
     # added to the face; if we encounter the same face again we *always* reverse the order
     # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
     # face (i.e. a surface) is uniquely determined by 3 vertices.
-    resize!(dh.facedicts, num_fields(dh))
-    for i in 1:num_fields(dh)
-        dh.facedicts[i] = Dict{NTuple{dim,Int},Int}()
-    end
-
-    resize!(dh.celldicts, num_fields(dh))
-    for i in 1:num_fields(dh)
-        dh.celldicts[i] = Dict{Int,Vector{Int}}()
-    end
+    facedicts = [Dict{NTuple{dim,Int},Int}() for _ in 1:num_fields(dh)]
 
     # celldofs are never shared between different cells so there is no need
     # for a `celldict` to keep track of which cells we have added dofs too.
@@ -674,16 +663,16 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
             if interpolation_info.nvertexdofs > 0
                 for vertex in Ferrite.vertices(cell)
                     Ferrite.@debug println("    vertex#$vertex")
-                    token = Base.ht_keyindex2!(dh.vertexdicts[fi], vertex)
-                    if token > 0 # haskey(dh.vertexdicts[fi], vertex) # reuse dofs
-                        reuse_dof = dh.vertexdicts[fi].vals[token] # dh.vertexdicts[fi][vertex]
+                    token = Base.ht_keyindex2!(vertexdicts[fi], vertex)
+                    if token > 0 # haskey(vertexdicts[fi], vertex) # reuse dofs
+                        reuse_dof = vertexdicts[fi].vals[token] # vertexdicts[fi][vertex]
                         for d in 1:dh.field_dims[fi]
                             Ferrite.@debug println("      reusing dof #$(reuse_dof + (d-1))")
                             push!(dh.cell_dofs, reuse_dof + (d-1))
                         end
                     else # token <= 0, distribute new dofs
                         for vertexdof in 1:interpolation_info.nvertexdofs
-                            Base._setindex!(dh.vertexdicts[fi], nextdof, vertex, -token) # dh.vertexdicts[fi][vertex] = nextdof
+                            Base._setindex!(vertexdicts[fi], nextdof, vertex, -token) # vertexdicts[fi][vertex] = nextdof
                             for d in 1:dh.field_dims[fi]
                                 Ferrite.@debug println("      adding dof#$nextdof")
                                 push!(dh.cell_dofs, nextdof)
@@ -698,9 +687,9 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                     for edge in Ferrite.edges(cell)
                         sedge, dir = Ferrite.sortedge(edge)
                         Ferrite.@debug println("    edge#$sedge dir: $(dir)")
-                        token = Base.ht_keyindex2!(dh.edgedicts[fi], sedge)
-                        if token > 0 # haskey(dh.edgedicts[fi], sedge), reuse dofs
-                            startdof, olddir = dh.edgedicts[fi].vals[token] # dh.edgedicts[fi][sedge] # first dof for this edge (if dir == true)
+                        token = Base.ht_keyindex2!(edgedicts[fi], sedge)
+                        if token > 0 # haskey(edgedicts[fi], sedge), reuse dofs
+                            startdof, olddir = edgedicts[fi].vals[token] # edgedicts[fi][sedge] # first dof for this edge (if dir == true)
                             for edgedof in (dir == olddir ? (1:interpolation_info.nedgedofs) : (interpolation_info.nedgedofs:-1:1))
                                 for d in 1:dh.field_dims[fi]
                                     reuse_dof = startdof + (d-1) + (edgedof-1)*dh.field_dims[fi]
@@ -709,7 +698,7 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                                 end
                             end
                         else # token <= 0, distribute new dofs
-                            Base._setindex!(dh.edgedicts[fi], (nextdof, dir), sedge, -token) # dh.edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
+                            Base._setindex!(edgedicts[fi], (nextdof, dir), sedge, -token) # edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
                             for edgedof in 1:interpolation_info.nedgedofs
                                 for d in 1:dh.field_dims[fi]
                                     Ferrite.@debug println("      adding dof#$nextdof")
@@ -725,9 +714,9 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                 for face in Ferrite.faces(cell)
                     sface = Ferrite.sortface(face) # TODO: faces(cell) may as well just return the sorted list
                     Ferrite.@debug println("    face#$sface")
-                    token = Base.ht_keyindex2!(dh.facedicts[fi], sface)
-                    if token > 0 # haskey(dh.facedicts[fi], sface), reuse dofs
-                        startdof = dh.facedicts[fi].vals[token] # dh.facedicts[fi][sface]
+                    token = Base.ht_keyindex2!(facedicts[fi], sface)
+                    if token > 0 # haskey(facedicts[fi], sface), reuse dofs
+                        startdof = facedicts[fi].vals[token] # facedicts[fi][sface]
                         for facedof in interpolation_info.nfacedofs:-1:1 # always reverse (YOLO)
                             for d in 1:dh.field_dims[fi]
                                 reuse_dof = startdof + (d-1) + (facedof-1)*dh.field_dims[fi]
@@ -736,7 +725,7 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                             end
                         end
                     else # distribute new dofs
-                        Base._setindex!(dh.facedicts[fi], nextdof, sface, -token)# dh.facedicts[fi][sface] = nextdof,  store the first dof for this face
+                        Base._setindex!(facedicts[fi], nextdof, sface, -token)# facedicts[fi][sface] = nextdof,  store the first dof for this face
                         for facedof in 1:interpolation_info.nfacedofs
                             for d in 1:dh.field_dims[fi]
                                 Ferrite.@debug println("      adding dof#$nextdof")
@@ -752,10 +741,6 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
                 for celldof in 1:interpolation_info.ncelldofs
                     for d in 1:dh.field_dims[fi]
                         Ferrite.@debug println("      adding dof#$nextdof")
-                        if !haskey(dh.celldicts[fi], ci)
-                            dh.celldicts[fi][ci] = Vector{Int}(undef,0)
-                        end
-                        push!(dh.celldicts[fi][ci], nextdof)
                         push!(dh.cell_dofs, nextdof)
                         nextdof += 1
                     end
@@ -768,7 +753,7 @@ function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
     dh.ndofs[] = maximum(dh.cell_dofs)
     dh.closed[] = true
 
-    return dh
+    return dh, vertexdicts, edgedicts, facedicts
 end
 
 # TODO this is copy pasta from DofHandler.jl
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index c4551826a7..2e0d1a99f2 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -21,15 +21,11 @@ struct DofHandler{dim,T,G<:AbstractGrid{dim}} <: AbstractDofHandler
     closed::ScalarWrapper{Bool}
     grid::G
     ndofs::ScalarWrapper{Int}
-
-    vertexdicts::Vector{Dict{Int,Int}}
-    edgedicts::Vector{Dict{Tuple{Int,Int},Tuple{Int,Bool}}}
-    facedicts::Vector{Dict{NTuple{dim,Int},Int}}
 end
 
 function DofHandler(grid::AbstractGrid{dim}) where {dim}
     isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. Use MixedDofHandler instead of DofHandler")
-    DofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Dict{Int,Int}[], Dict{Tuple{Int,Int},Tuple{Int,Bool}}[],Dict{NTuple{dim,Int},Int}[])
+    DofHandler(Symbol[], Int[], Interpolation[], BCValues{Float64}[], Int[], Int[], ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1))
 end
 
 function Base.show(io::IO, ::MIME"text/plain", dh::DofHandler)
@@ -51,18 +47,18 @@ Get the spatial dimension of a dofhandler.
 """
 getdim(dh::DofHandler{dim}) where {dim} = dim 
 
-# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(dh.vertexdicts[field_idx], vertex)
-# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(dh.edgedicts[field_idx], edge)
-# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(dh.facedicts[field_idx], face)
+# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = haskey(vertexdicts[field_idx], vertex)
+# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = haskey(edgedicts[field_idx], edge)
+# has_entity_dof(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = haskey(facedicts[field_idx], face)
 
 has_cell_dofs(dh::AbstractDofHandler, field_idx::Int, cell::Int) = ncelldofs(getfieldinterpolation(dh, field_idx)) > 0
 has_vertex_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::VertexIndex) = nvertexdofs(getfieldinterpolation(dh, field_idx)) > 0
 has_edge_dofs(dh::AbstractDofHandler, field_idx::Int, edge::EdgeIndex) = nedgedofs(getfieldinterpolation(dh, field_idx)) > 0
-has_face_dofs(dh::AbstractDofHandler, field_idx::Int, face::FaceIndex) where {dim} = nfacedofs(getfieldinterpolation(dh, field_idx)) > 0
+has_face_dofs(dh::AbstractDofHandler, field_idx::Int, face::FaceIndex) = nfacedofs(getfieldinterpolation(dh, field_idx)) > 0
 
-# entity_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = dh.vertexdicts[field_idx][vertex]
-# entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = dh.edgedicts[field_idx][edge]
-# entity_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = dh.facedicts[field_idx][face]
+# entity_dofs(dh::AbstractDofHandler, field_idx::Int, vertex::Int) = vertexdicts[field_idx][vertex]
+# entity_dofs(dh::AbstractDofHandler, field_idx::Int, edge::Tuple{Int,Int}) = edgedicts[field_idx][edge]
+# entity_dofs(dh::AbstractDofHandler, field_idx::Int, face::NTuple{dim,Int}) where {dim} = facedicts[field_idx][face]
 
 """
 Compute the dofs belonging to a given cell of a given field.
@@ -264,29 +260,20 @@ function __close!(dh::DofHandler{dim}) where {dim}
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
     # number and the first dof we added to that vertex.
-    resize!(dh.vertexdicts, num_fields(dh))
-    for i in 1:num_fields(dh)
-        dh.vertexdicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
-    end
+    vertexdicts = [Dict{Int,Int}() for _ in 1:nfields(dh)]
 
     # `edgedict` keeps track of the visited edges, this will only be used for a 3D problem
     # An edge is determined from two vertices, but we also need to store the direction
     # of the first edge we encounter and add dofs too. When we encounter the same edge
     # the next time we check if the direction is the same, otherwise we reuse the dofs
     # in the reverse order
-    resize!(dh.edgedicts, num_fields(dh))
-    for i in 1:num_fields(dh)
-        dh.edgedicts[i] = Dict{Tuple{Int,Int},Tuple{Int,Bool}}()
-    end
+    edgedicts = [Dict{Tuple{Int,Int},Tuple{Int,Bool}}() for _ in 1:nfields(dh)]
 
     # `facedict` keeps track of the visited faces. We only need to store the first dof we
     # added to the face; if we encounter the same face again we *always* reverse the order
     # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
     # face (i.e. a surface) is uniquely determined by 3 vertices.
-    resize!(dh.facedicts, num_fields(dh))
-    for i in 1:num_fields(dh)
-        dh.facedicts[i] = Dict{NTuple{dim,Int},Int}()
-    end
+    facedicts = [Dict{NTuple{dim,Int},Int}() for _ in 1:nfields(dh)]
 
     # celldofs are never shared between different cells so there is no need
     # for a `celldict` to keep track of which cells we have added dofs too.
@@ -315,16 +302,16 @@ function __close!(dh::DofHandler{dim}) where {dim}
             if interpolation_info.nvertexdofs > 0
                 for vertex in vertices(cell)
                     @debug println("    vertex#$vertex")
-                    token = Base.ht_keyindex2!(dh.vertexdicts[fi], vertex)
-                    if token > 0 # haskey(dh.vertexdicts[fi], vertex) # reuse dofs
-                        reuse_dof = dh.vertexdicts[fi].vals[token] # dh.vertexdicts[fi][vertex]
+                    token = Base.ht_keyindex2!(vertexdicts[fi], vertex)
+                    if token > 0 # haskey(vertexdicts[fi], vertex) # reuse dofs
+                        reuse_dof = vertexdicts[fi].vals[token] # vertexdicts[fi][vertex]
                         for d in 1:dh.field_dims[fi]
                             @debug println("      reusing dof #$(reuse_dof + (d-1))")
                             push!(dh.cell_dofs, reuse_dof + (d-1))
                         end
                     else # token <= 0, distribute new dofs
                         for vertexdof in 1:interpolation_info.nvertexdofs
-                            Base._setindex!(dh.vertexdicts[fi], nextdof, vertex, -token) # dh.vertexdicts[fi][vertex] = nextdof
+                            Base._setindex!(vertexdicts[fi], nextdof, vertex, -token) # vertexdicts[fi][vertex] = nextdof
                             for d in 1:dh.field_dims[fi]
                                 @debug println("      adding dof#$nextdof")
                                 push!(dh.cell_dofs, nextdof)
@@ -339,9 +326,9 @@ function __close!(dh::DofHandler{dim}) where {dim}
                     for edge in edges(cell)
                         sedge, dir = sortedge(edge)
                         @debug println("    edge#$sedge dir: $(dir)")
-                        token = Base.ht_keyindex2!(dh.edgedicts[fi], sedge)
-                        if token > 0 # haskey(dh.edgedicts[fi], sedge), reuse dofs
-                            startdof, olddir = dh.edgedicts[fi].vals[token] # dh.edgedicts[fi][sedge] # first dof for this edge (if dir == true)
+                        token = Base.ht_keyindex2!(edgedicts[fi], sedge)
+                        if token > 0 # haskey(edgedicts[fi], sedge), reuse dofs
+                            startdof, olddir = edgedicts[fi].vals[token] # edgedicts[fi][sedge] # first dof for this edge (if dir == true)
                             for edgedof in (dir == olddir ? (1:interpolation_info.nedgedofs) : (interpolation_info.nedgedofs:-1:1))
                                 for d in 1:dh.field_dims[fi]
                                     reuse_dof = startdof + (d-1) + (edgedof-1)*dh.field_dims[fi]
@@ -350,7 +337,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
                                 end
                             end
                         else # token <= 0, distribute new dofs
-                            Base._setindex!(dh.edgedicts[fi], (nextdof, dir), sedge, -token) # dh.edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
+                            Base._setindex!(edgedicts[fi], (nextdof, dir), sedge, -token) # edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
                             for edgedof in 1:interpolation_info.nedgedofs
                                 for d in 1:dh.field_dims[fi]
                                     @debug println("      adding dof#$nextdof")
@@ -366,9 +353,9 @@ function __close!(dh::DofHandler{dim}) where {dim}
                 for face in faces(cell)
                     sface = sortface(face) # TODO: faces(cell) may as well just return the sorted list
                     @debug println("    face#$sface")
-                    token = Base.ht_keyindex2!(dh.facedicts[fi], sface)
-                    if token > 0 # haskey(dh.facedicts[fi], sface), reuse dofs
-                        startdof = dh.facedicts[fi].vals[token] # dh.facedicts[fi][sface]
+                    token = Base.ht_keyindex2!(facedicts[fi], sface)
+                    if token > 0 # haskey(facedicts[fi], sface), reuse dofs
+                        startdof = facedicts[fi].vals[token] # facedicts[fi][sface]
                         for facedof in interpolation_info.nfacedofs:-1:1 # always reverse (YOLO)
                             for d in 1:dh.field_dims[fi]
                                 reuse_dof = startdof + (d-1) + (facedof-1)*dh.field_dims[fi]
@@ -377,7 +364,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
                             end
                         end
                     else # distribute new dofs
-                        Base._setindex!(dh.facedicts[fi], nextdof, sface, -token)# dh.facedicts[fi][sface] = nextdof,  store the first dof for this face
+                        Base._setindex!(facedicts[fi], nextdof, sface, -token)# facedicts[fi][sface] = nextdof,  store the first dof for this face
                         for facedof in 1:interpolation_info.nfacedofs
                             for d in 1:dh.field_dims[fi]
                                 @debug println("      adding dof#$nextdof")
@@ -405,7 +392,7 @@ function __close!(dh::DofHandler{dim}) where {dim}
     dh.ndofs[] = maximum(dh.cell_dofs)
     dh.closed[] = true
 
-    return dh.vertexdicts, dh.edgedicts, dh.facedicts
+    return dh, vertexdicts, edgedicts, facedicts
 end
 
 function celldofs!(global_dofs::Vector{Int}, dh::DofHandler, i::Int)
diff --git a/test/test_grid_dofhandler_vtk.jl b/test/test_grid_dofhandler_vtk.jl
index 02c0647d91..c57fbb691a 100644
--- a/test/test_grid_dofhandler_vtk.jl
+++ b/test/test_grid_dofhandler_vtk.jl
@@ -462,7 +462,7 @@ end
     push!(dh, :v, 1, Lagrange{3,RefCube,2}())
     push!(dh, :w, 3, Lagrange{3,RefCube,1}())
     push!(dh, :x, 3, Lagrange{3,RefCube,2}())
-    vertexdicts, edgedicts, facedicts = Ferrite.__close!(dh)
+    _, vertexdicts, edgedicts, facedicts = Ferrite.__close!(dh)
     @test Ferrite.find_field(dh, :u) == 1
     @test Ferrite.find_field(dh, :v) == 2
     @test Ferrite.find_field(dh, :w) == 3

From ddb763bfee725d069251995ba0ed44977ac58c28 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 16:04:35 +0100
Subject: [PATCH 098/124] Reduce redundancies.

---
 .../DistributedDofHandler.jl                  | 170 ------------------
 src/Dofs/DofHandler.jl                        |   6 +-
 2 files changed, 4 insertions(+), 172 deletions(-)

diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 278f76eae0..ce65fe104c 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -364,7 +364,6 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     end
 
     # Sync non-owned dofs with neighboring processes.
-    # TODO: implement for entitied with dim > 0
     # TODO: Use MPI graph primitives to simplify this code
     # TODO: Simplify with dimension-agnostic code...
     for sending_rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
@@ -594,16 +593,6 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     Ferrite.@debug println("Local to global mapping: $local_to_global (R$my_rank)")
     @assert findfirst(local_to_global .== 0) === nothing
 
-    # Ferrite.@debug vtk_grid("dofs", dgrid; compress=false) do vtk
-    #     u = Vector{Float64}(undef,length(dgrid.local_grid.nodes))
-    #     fill!(u, 0.0)
-    #     for i=1:length(u)
-    #         u[i] = local_to_global[dh.vertexdicts[1][i]]
-    #     end
-    #     vtk_point_data(vtk, u,"dof")
-    #     vtk_partitioning(vtk, dgrid)
-    # end
-
     return local_to_global
 end
 
@@ -614,165 +603,6 @@ function Ferrite.close!(dh::DistributedDofHandler)
     dh.ndofs.x = num_local_dofs(dh)
 end
 
-# TODO this is copy pasta from DofHandler.jl
-# close the DofHandler and distribute all the dofs
-function Ferrite.__close!(dh::DistributedDofHandler{dim}) where {dim}
-    @assert !Ferrite.isclosed(dh)
-
-    # `vertexdict` keeps track of the visited vertices. We store the global vertex
-    # number and the first dof we added to that vertex.
-    vertexdicts = [Dict{Int,Int}() for _ in 1:num_fields(dh)]
-
-    # `edgedict` keeps track of the visited edges, this will only be used for a 3D problem
-    # An edge is determined from two vertices, but we also need to store the direction
-    # of the first edge we encounter and add dofs too. When we encounter the same edge
-    # the next time we check if the direction is the same, otherwise we reuse the dofs
-    # in the reverse order
-    edgedicts = [Dict{Tuple{Int,Int},Tuple{Int,Bool}}() for _ in 1:num_fields(dh)]
-
-    # `facedict` keeps track of the visited faces. We only need to store the first dof we
-    # added to the face; if we encounter the same face again we *always* reverse the order
-    # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
-    # face (i.e. a surface) is uniquely determined by 3 vertices.
-    facedicts = [Dict{NTuple{dim,Int},Int}() for _ in 1:num_fields(dh)]
-
-    # celldofs are never shared between different cells so there is no need
-    # for a `celldict` to keep track of which cells we have added dofs too.
-
-    # We create the `Ferrite.InterpolationInfo` structs with precomputed information for each
-    # interpolation since that allows having the cell loop as the outermost loop,
-    # and the interpolation loop inside without using a function barrier
-    interpolation_infos = Ferrite.InterpolationInfo[]
-    for interpolation in dh.field_interpolations
-        # push!(dh.interpolation_info, Ferrite.InterpolationInfo(interpolation))
-        push!(interpolation_infos, Ferrite.InterpolationInfo(interpolation))
-    end
-
-    # not implemented yet: more than one facedof per face in 3D
-    dim == 3 && @assert(!any(x->x.nfacedofs > 1, interpolation_infos))
-
-    nextdof = 1 # next free dof to distribute
-    push!(dh.cell_dofs_offset, 1) # dofs for the first cell start at 1
-
-    # loop over all the cells, and distribute dofs for all the fields
-    for (ci, cell) in enumerate(getcells(getgrid(dh)))
-        Ferrite.@debug println("cell #$ci")
-        for fi in 1:num_fields(dh)
-            interpolation_info = interpolation_infos[fi]
-            Ferrite.@debug println("  field: $(dh.field_names[fi])")
-            if interpolation_info.nvertexdofs > 0
-                for vertex in Ferrite.vertices(cell)
-                    Ferrite.@debug println("    vertex#$vertex")
-                    token = Base.ht_keyindex2!(vertexdicts[fi], vertex)
-                    if token > 0 # haskey(vertexdicts[fi], vertex) # reuse dofs
-                        reuse_dof = vertexdicts[fi].vals[token] # vertexdicts[fi][vertex]
-                        for d in 1:dh.field_dims[fi]
-                            Ferrite.@debug println("      reusing dof #$(reuse_dof + (d-1))")
-                            push!(dh.cell_dofs, reuse_dof + (d-1))
-                        end
-                    else # token <= 0, distribute new dofs
-                        for vertexdof in 1:interpolation_info.nvertexdofs
-                            Base._setindex!(vertexdicts[fi], nextdof, vertex, -token) # vertexdicts[fi][vertex] = nextdof
-                            for d in 1:dh.field_dims[fi]
-                                Ferrite.@debug println("      adding dof#$nextdof")
-                                push!(dh.cell_dofs, nextdof)
-                                nextdof += 1
-                            end
-                        end
-                    end
-                end # vertex loop
-            end
-            if dim > 2 # edges only in 3D
-                if interpolation_info.nedgedofs > 0
-                    for edge in Ferrite.edges(cell)
-                        sedge, dir = Ferrite.sortedge(edge)
-                        Ferrite.@debug println("    edge#$sedge dir: $(dir)")
-                        token = Base.ht_keyindex2!(edgedicts[fi], sedge)
-                        if token > 0 # haskey(edgedicts[fi], sedge), reuse dofs
-                            startdof, olddir = edgedicts[fi].vals[token] # edgedicts[fi][sedge] # first dof for this edge (if dir == true)
-                            for edgedof in (dir == olddir ? (1:interpolation_info.nedgedofs) : (interpolation_info.nedgedofs:-1:1))
-                                for d in 1:dh.field_dims[fi]
-                                    reuse_dof = startdof + (d-1) + (edgedof-1)*dh.field_dims[fi]
-                                    Ferrite.@debug println("      reusing dof#$(reuse_dof)")
-                                    push!(dh.cell_dofs, reuse_dof)
-                                end
-                            end
-                        else # token <= 0, distribute new dofs
-                            Base._setindex!(edgedicts[fi], (nextdof, dir), sedge, -token) # edgedicts[fi][sedge] = (nextdof, dir),  store only the first dof for the edge
-                            for edgedof in 1:interpolation_info.nedgedofs
-                                for d in 1:dh.field_dims[fi]
-                                    Ferrite.@debug println("      adding dof#$nextdof")
-                                    push!(dh.cell_dofs, nextdof)
-                                    nextdof += 1
-                                end
-                            end
-                        end
-                    end # edge loop
-                end
-            end
-            if interpolation_info.nfacedofs > 0 && (interpolation_info.dim == dim)
-                for face in Ferrite.faces(cell)
-                    sface = Ferrite.sortface(face) # TODO: faces(cell) may as well just return the sorted list
-                    Ferrite.@debug println("    face#$sface")
-                    token = Base.ht_keyindex2!(facedicts[fi], sface)
-                    if token > 0 # haskey(facedicts[fi], sface), reuse dofs
-                        startdof = facedicts[fi].vals[token] # facedicts[fi][sface]
-                        for facedof in interpolation_info.nfacedofs:-1:1 # always reverse (YOLO)
-                            for d in 1:dh.field_dims[fi]
-                                reuse_dof = startdof + (d-1) + (facedof-1)*dh.field_dims[fi]
-                                Ferrite.@debug println("      reusing dof#$(reuse_dof)")
-                                push!(dh.cell_dofs, reuse_dof)
-                            end
-                        end
-                    else # distribute new dofs
-                        Base._setindex!(facedicts[fi], nextdof, sface, -token)# facedicts[fi][sface] = nextdof,  store the first dof for this face
-                        for facedof in 1:interpolation_info.nfacedofs
-                            for d in 1:dh.field_dims[fi]
-                                Ferrite.@debug println("      adding dof#$nextdof")
-                                push!(dh.cell_dofs, nextdof)
-                                nextdof += 1
-                            end
-                        end
-                    end
-                end # face loop
-            end
-            if interpolation_info.ncelldofs > 0 # always distribute new dofs for cell
-                Ferrite.@debug println("    cell#$ci")
-                for celldof in 1:interpolation_info.ncelldofs
-                    for d in 1:dh.field_dims[fi]
-                        Ferrite.@debug println("      adding dof#$nextdof")
-                        push!(dh.cell_dofs, nextdof)
-                        nextdof += 1
-                    end
-                end # cell loop
-            end
-        end # field loop
-        # push! the first index of the next cell to the offset vector
-        push!(dh.cell_dofs_offset, length(dh.cell_dofs)+1)
-    end # cell loop
-    dh.ndofs[] = maximum(dh.cell_dofs)
-    dh.closed[] = true
-
-    return dh, vertexdicts, edgedicts, facedicts
-end
-
-# TODO this is copy pasta from DofHandler.jl
-function Ferrite.reshape_to_nodes(dh::DistributedDofHandler, u::Vector{T}, fieldname::Symbol) where T
-    # make sure the field exists
-    fieldname ∈ getfieldnames(dh) || error("Field $fieldname not found.")
-
-    field_idx = findfirst(i->i==fieldname, getfieldnames(dh))
-    offset = field_offset(dh, fieldname)
-    field_dim = getfielddim(dh, field_idx)
-
-    space_dim = field_dim == 2 ? 3 : field_dim
-    data = fill(zero(T), space_dim, getnnodes(getgrid(dh)))
-
-    Ferrite.reshape_field_data!(data, dh, u, offset, field_dim)
-
-    return data
-end
-
 function WriteVTK.vtk_grid(filename::AbstractString, dh::DistributedDofHandler; compress::Bool=true)
     vtk_grid(filename, getglobalgrid(dh); compress=compress)
 end
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index 2e0d1a99f2..1efc851457 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -255,8 +255,10 @@ function close!(dh::DofHandler)
 end
 
 # close the DofHandler and distribute all the dofs
-function __close!(dh::DofHandler{dim}) where {dim}
+function __close!(dh::AbstractDofHandler)
     @assert !isclosed(dh)
+    
+    dim = getdim(dh)
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
     # number and the first dof we added to that vertex.
@@ -541,7 +543,7 @@ Reshape the entries of the dof-vector `u` which correspond to the field `fieldna
 Return a matrix with a column for every node and a row for every dimension of the field.
 For superparametric fields only the entries corresponding to nodes of the grid will be returned. Do not use this function for subparametric approximations.
 """
-function reshape_to_nodes(dh::DofHandler, u::Vector{T}, fieldname::Symbol) where T
+function reshape_to_nodes(dh::AbstractDofHandler, u::Vector{T}, fieldname::Symbol) where T
     # make sure the field exists
     fieldname ∈ Ferrite.getfieldnames(dh) || error("Field $fieldname not found.")
 

From d41e787dc69cb1723a11c55f689f5365f4f160a2 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 16:08:45 +0100
Subject: [PATCH 099/124] readability.

---
 ext/FerritePartitionedArrays/DistributedDofHandler.jl | 10 ++++++----
 ext/FerritePartitionedArrays/assembler.jl             |  7 ++++---
 ext/FerritePartitionedArrays/grid.jl                  |  2 ++
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index ce65fe104c..791da685fb 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -77,7 +77,7 @@ Ferrite.renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) =
 
 function compute_dof_ownership(dh)
     dgrid = getglobalgrid(dh)
-    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+    my_rank = global_rank(dgrid)
 
     dof_owner = Vector{Int}(undef,ndofs(dh))
     fill!(dof_owner, my_rank)
@@ -115,7 +115,7 @@ end
 """
 Compute the number of dofs owned by the current process.
 """
-num_local_true_dofs(dh::DistributedDofHandler) = sum(dh.ldof_to_rank .== (MPI.Comm_rank(global_comm(dh.grid))+1))
+num_local_true_dofs(dh::DistributedDofHandler) = sum(dh.ldof_to_rank .== global_rank(getglobalgrid(dh)))
 
 """
 Compute the number of dofs visible to the current process.
@@ -125,7 +125,7 @@ num_local_dofs(dh::DistributedDofHandler) = length(dh.ldof_to_gdof)
 """
 Compute the number of dofs in the global system.
 """
-num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(num_local_true_dofs(dh), MPI.SUM, global_comm(dh.grid))
+num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(num_local_true_dofs(dh), MPI.SUM, global_comm(getglobalgrid(dh)))
 
 """
 Renumber the dofs in local ordering to their corresponding global numbering.
@@ -136,7 +136,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
     dgrid = getglobalgrid(dh)
     dim = getdim(dgrid)
     # MPI rank starting with 1 to match Julia's index convention
-    my_rank = MPI.Comm_rank(global_comm(dgrid))+1
+    my_rank = global_rank(dgrid)
 
     local_to_global = Vector{Int}(undef,ndofs(dh))
     fill!(local_to_global,0) # 0 is the invalid index!
@@ -601,6 +601,8 @@ function Ferrite.close!(dh::DistributedDofHandler)
     append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
     append!(dh.ldof_to_rank, compute_dof_ownership(dh))
     dh.ndofs.x = num_local_dofs(dh)
+    # Reorder to make local dofs continuous
+    sum(dh.ldof_to_rank.==)
 end
 
 function WriteVTK.vtk_grid(filename::AbstractString, dh::DistributedDofHandler; compress::Bool=true)
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index 8542abd33a..3ae2770be6 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -37,7 +37,7 @@ struct COOAssembler{T}
         #       distributed grid, which can efficiently precompute some of the values below.
         comm = global_comm(dgrid)
         np = MPI.Comm_size(comm)
-        my_rank = MPI.Comm_rank(comm)+1
+        my_rank = global_rank(dgrid)
 
         @debug println("starting assembly... (R$my_rank)")
 
@@ -295,9 +295,10 @@ end
 end
 
 function Ferrite.end_assemble(assembler::COOAssembler{T}) where {T}
-    comm = global_comm(getglobalgrid(assembler.dh))
+    dgrid = getglobalgrid(assembler.dh)
+    comm = global_comm(dgrid)
     np = MPI.Comm_size(comm)
-    my_rank = MPI.Comm_rank(comm)+1
+    my_rank = global_rank(dgrid)
 
     # --------------------- Add ghost entries in IJ 👻 --------------------
     I = map(i->assembler.dh.ldof_to_gdof[i], assembler.I)
diff --git a/ext/FerritePartitionedArrays/grid.jl b/ext/FerritePartitionedArrays/grid.jl
index 92eb208c94..88600974ab 100644
--- a/ext/FerritePartitionedArrays/grid.jl
+++ b/ext/FerritePartitionedArrays/grid.jl
@@ -57,6 +57,8 @@ returned by @global_comm .
 """
 @inline vertex_comm(dgrid::DistributedGrid) = dgrid.interface_comm
 
+@inline global_rank(dgrid::DistributedGrid) =  MPI.Comm_rank(global_comm(dgrid))+1
+
 """
 """
 function DistributedGrid(grid_to_distribute::Grid{dim,C,T}; grid_comm::MPI.Comm = MPI.COMM_WORLD, partition_alg = :RECURSIVE) where {dim,C,T}

From 573dfd86595a39695721132dee251a8add3a2077 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 18:43:58 +0100
Subject: [PATCH 100/124] Error messages.

---
 ext/FerritePartitionedArrays/assembler.jl   | 42 ++++++++++-----------
 ext/FerritePartitionedArrays/constraints.jl | 10 ++---
 ext/FerritePartitionedArrays/grid.jl        |  6 +--
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index 3ae2770be6..dadbbb6050 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -39,7 +39,7 @@ struct COOAssembler{T}
         np = MPI.Comm_size(comm)
         my_rank = global_rank(dgrid)
 
-        @debug println("starting assembly... (R$my_rank)")
+        Ferrite.@debug println("starting assembly... (R$my_rank)")
 
         # Neighborhood graph
         # @TODO cleanup old code below and use graph primitives instead.
@@ -52,7 +52,7 @@ struct COOAssembler{T}
         sources .+= 1
         destinations .+= 1
 
-        @debug println("Neighborhood | $sources | $destinations (R$my_rank)")
+        Ferrite.@debug println("Neighborhood | $sources | $destinations (R$my_rank)")
 
         # Invert the relations to clarify the code
         source_index = Dict{Cint, Int}()
@@ -71,9 +71,9 @@ struct COOAssembler{T}
         ltdof_indices = ldof_to_rank.==my_rank
         ltdof_to_gdof = ldof_to_gdof[ltdof_indices]
 
-        @debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
-        @debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
-        @debug println("ldof_to_rank $ldof_to_rank (R$my_rank)")
+        Ferrite.@debug println("ltdof_to_gdof $ltdof_to_gdof (R$my_rank)")
+        Ferrite.@debug println("ldof_to_gdof $ldof_to_gdof (R$my_rank)")
+        Ferrite.@debug println("ldof_to_rank $ldof_to_rank (R$my_rank)")
 
         # Process owns rows of owned dofs. The process also may write to some remote dofs,
         # which correspond to non-owned share entities. Here we construct the rows for the
@@ -87,7 +87,7 @@ struct COOAssembler{T}
         row_exchanger = Exchanger(row_data)
         rows = PRange(ngdofs,row_data,row_exchanger)
 
-        @debug println("rows done (R$my_rank)")
+        Ferrite.@debug println("rows done (R$my_rank)")
 
         # For the locally visible columns we also have to take into account that remote
         # processes will write their data in some of these, because their remotely
@@ -113,7 +113,7 @@ struct COOAssembler{T}
             if my_rank != pivot_vertex_owner_rank
                 sender_slot = destination_index[pivot_vertex_owner_rank]
 
-                @debug println("$pivot_vertex may require synchronization (R$my_rank)")
+                Ferrite.@debug println("$pivot_vertex may require synchronization (R$my_rank)")
                 # Note: We have to send ALL dofs on the element to the remote.
                 cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                 cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
@@ -123,7 +123,7 @@ struct COOAssembler{T}
                     pivot_vertex_dofs = Ferrite.vertex_dofs(dh, field_idx, pivot_vertex)
                     
                     for d ∈ 1:dh.field_dims[field_idx]
-                        @debug println("  adding dof $(pivot_vertex_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                        Ferrite.@debug println("  adding dof $(pivot_vertex_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
 
                         # Extract dofs belonging to the current field
                         #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
@@ -148,7 +148,7 @@ struct COOAssembler{T}
                 if my_rank != pivot_face_owner_rank
                     sender_slot = destination_index[pivot_face_owner_rank]
 
-                    @debug println("$pivot_face may require synchronization (R$my_rank)")
+                    Ferrite.@debug println("$pivot_face may require synchronization (R$my_rank)")
                     # Note: We have to send ALL dofs on the element to the remote.
                     cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                     cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
@@ -158,7 +158,7 @@ struct COOAssembler{T}
                         pivot_face_dofs = Ferrite.face_dofs(dh, field_idx, pivot_face)
                         
                         for d ∈ 1:dh.field_dims[field_idx]
-                            @debug println("  adding dof $(pivot_face_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                            Ferrite.@debug println("  adding dof $(pivot_face_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
                             
                             # Extract dofs belonging to the current field
                             #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
@@ -184,7 +184,7 @@ struct COOAssembler{T}
                 if my_rank != pivot_edge_owner_rank
                     sender_slot = destination_index[pivot_edge_owner_rank]
 
-                    @debug println("$pivot_edge may require synchronization (R$my_rank)")
+                    Ferrite.@debug println("$pivot_edge may require synchronization (R$my_rank)")
                     # Note: We have to send ALL dofs on the element to the remote.
                     cell_dofs_upper_bound = (pivot_cell_idx == getncells(dh.grid)) ? length(dh.cell_dofs) : dh.cell_dofs_offset[pivot_cell_idx+1]
                     cell_dofs = dh.cell_dofs[dh.cell_dofs_offset[pivot_cell_idx]:cell_dofs_upper_bound]
@@ -194,7 +194,7 @@ struct COOAssembler{T}
                         pivot_edge_dofs = Ferrite.edge_dofs(dh, field_idx, pivot_edge)
 
                         for d ∈ 1:dh.field_dims[field_idx]
-                            @debug println("  adding dof $(pivot_edge_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
+                            Ferrite.@debug println("  adding dof $(pivot_edge_dofs[d]) to ghost sync synchronization on slot $sender_slot (R$my_rank)")
                             # Extract dofs belonging to the current field
                             #cell_field_dofs = cell_dofs[dof_range(dh, field_name)]
                             #for cell_field_dof ∈ cell_field_dofs
@@ -213,7 +213,7 @@ struct COOAssembler{T}
         ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_dof_to_send]
         ghost_recv_buffer_lengths = zeros(Int, destination_len)
         MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), vertex_comm(dgrid));
-        @debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
+        Ferrite.@debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
             println("receiving $ghost_recv_buffer_length ghosts from $(sources[i])  (R$my_rank)")
         end
 
@@ -241,7 +241,7 @@ struct COOAssembler{T}
             append!(ghost_recv_buffer_source_ranks, ones(recv_len)*sources[source_idx])
         end
 
-        @debug println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
+        Ferrite.@debug println("received $ghost_recv_buffer_dofs with owners $ghost_recv_buffer_ranks (R$my_rank)")
 
         unique_ghosts_dr = sort(unique(first,zip(ghost_recv_buffer_dofs,ghost_recv_buffer_ranks)))
         # unzip manually and make sure we do not add duplicate entries to our columns
@@ -255,8 +255,8 @@ struct COOAssembler{T}
         # ------------- Construct rows and cols of distributed matrix --------
         all_local_cols = Int[ldof_to_gdof; ghost_dof_to_global]
         all_local_col_ranks = Int32[ldof_to_rank; ghost_dof_rank]
-        @debug println("all_local_cols $all_local_cols (R$my_rank)")
-        @debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
+        Ferrite.@debug println("all_local_cols $all_local_cols (R$my_rank)")
+        Ferrite.@debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
 
         col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, all_local_col_ranks)
         #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
@@ -265,12 +265,12 @@ struct COOAssembler{T}
         col_exchanger = Exchanger(col_data)
         cols = PRange(ngdofs,col_data,col_exchanger)
 
-        @debug println("cols and rows constructed (R$my_rank)")
+        Ferrite.@debug println("cols and rows constructed (R$my_rank)")
         f = PartitionedArrays.PVector(0.0,rows)
-        @debug println("f constructed (R$my_rank)")
+        Ferrite.@debug println("f constructed (R$my_rank)")
 
         👻remotes = zip(ghost_recv_buffer_dofs_piv, ghost_recv_buffer_dofs, ghost_recv_buffer_ranks,ghost_recv_buffer_fields)
-        @debug println("👻remotes $👻remotes (R$my_rank)")
+        Ferrite.@debug println("👻remotes $👻remotes (R$my_rank)")
 
         return new(I, J, V, cols, rows, f, 👻remotes, dh)
     end
@@ -318,8 +318,8 @@ function Ferrite.end_assemble(assembler::COOAssembler{T}) where {T}
         end
     end
 
-    @debug println("I=$(I) (R$my_rank)")
-    @debug println("J=$(J) (R$my_rank)")
+    Ferrite.@debug println("I=$(I) (R$my_rank)")
+    Ferrite.@debug println("J=$(J) (R$my_rank)")
     K = PartitionedArrays.PSparseMatrix(
         MPIData(I, comm, (np,)),
         MPIData(J, comm, (np,)),
diff --git a/ext/FerritePartitionedArrays/constraints.jl b/ext/FerritePartitionedArrays/constraints.jl
index 7b07239efc..388b1c8b29 100644
--- a/ext/FerritePartitionedArrays/constraints.jl
+++ b/ext/FerritePartitionedArrays/constraints.jl
@@ -81,25 +81,25 @@ function Ferrite.apply!(K::PartitionedArrays.PSparseMatrix, f::PartitionedArrays
         buffer_sizes_send[part] += 1
     end
     MPI.Alltoall!(UBuffer(buffer_sizes_send, 1), UBuffer(buffer_sizes_recv, 1), comm)
-    @debug println("Got $buffer_sizes_recv (R$my_rank)")
+    Ferrite.@debug println("Got $buffer_sizes_recv (R$my_rank)")
 
     remote_ghosts_recv = Vector{Int}(undef, sum(buffer_sizes_recv))
     MPI.Alltoallv!(VBuffer(remote_ghost_gdofs.part, buffer_sizes_send), VBuffer(remote_ghosts_recv, buffer_sizes_recv), comm)
-    @debug println("Got $remote_ghosts_recv (R$my_rank)")
+    Ferrite.@debug println("Got $remote_ghosts_recv (R$my_rank)")
 
     # Step 2: Union with all locally constrained dofs
-    @debug println("$my_rank : Step 2....")
+    Ferrite.@debug println("$my_rank : Step 2....")
     remote_ghosts_constrained_send = copy(remote_ghosts_recv)
     for (i, remote_ghost_dof) ∈ enumerate(remote_ghosts_recv)
         remote_ghosts_constrained_send[i] = remote_ghost_dof ∈ K.cols.partition.part.lid_to_gid[ch.prescribed_dofs]
     end
 
     # Step 3: Send trash back
-    @debug println("$my_rank : Step 3....")
+    Ferrite.@debug println("$my_rank : Step 3....")
     remote_ghosts_constrained_recv = Vector{Int}(undef, sum(buffer_sizes_send))
     MPI.Alltoallv!(VBuffer(remote_ghosts_constrained_send, buffer_sizes_recv), VBuffer(remote_ghosts_constrained_recv, buffer_sizes_send), comm)
 
-    @debug println("$my_rank : remote constraints on $(remote_ghost_gdofs.part[remote_ghosts_constrained_recv .== 1])")
+    Ferrite.@debug println("$my_rank : remote constraints on $(remote_ghost_gdofs.part[remote_ghosts_constrained_recv .== 1])")
 
     # Step 4: Constrain remaining columns
     map_parts(local_view(K, K.rows, K.cols), K.cols.partition) do K_local, partition
diff --git a/ext/FerritePartitionedArrays/grid.jl b/ext/FerritePartitionedArrays/grid.jl
index 88600974ab..4be334f7da 100644
--- a/ext/FerritePartitionedArrays/grid.jl
+++ b/ext/FerritePartitionedArrays/grid.jl
@@ -246,7 +246,7 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
                             if !haskey(remote_vertices,other_rank)
                                 remote_vertices[other_rank] = Vector(undef,0)
                             end
-                            @debug println("Detected shared vertex $cell_vertex neighbor $other_vertex (R$my_rank)")
+                            Ferrite.@debug println("Detected shared vertex $cell_vertex neighbor $other_vertex (R$my_rank)")
                             push!(remote_vertices[other_rank], VertexIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
                         end
                     end
@@ -271,7 +271,7 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
                                 if !haskey(remote_faces,other_rank)
                                     remote_faces[other_rank] = Vector(undef,0)
                                 end
-                                @debug println("Detected shared face $cell_face neighbor $other_face (R$my_rank)")
+                                Ferrite.@debug println("Detected shared face $cell_face neighbor $other_face (R$my_rank)")
                                 push!(remote_faces[other_rank], FaceIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
                             end
                         end
@@ -297,7 +297,7 @@ function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::Exclu
                                 if !haskey(remote_edges,other_edge)
                                     remote_edges[other_rank] = Vector(undef,0)
                                 end
-                                @debug println("Detected shared edge $cell_edge neighbor $other_edge (R$my_rank)")
+                                Ferrite.@debug println("Detected shared edge $cell_edge neighbor $other_edge (R$my_rank)")
                                 push!(remote_edges[other_rank], EdgeIndex(global_to_local_cell_map[other_rank][global_cell_neighbor_idx], j))
                             end
                         end

From c16a7f5bfd3ab90e992d87e4c6ac068021753eda Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 18:45:00 +0100
Subject: [PATCH 101/124] I think this is the correct close to make hypre
 functional.

---
 .../DistributedDofHandler.jl                  | 36 +++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 791da685fb..1f7859725f 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -598,11 +598,43 @@ end
 
 function Ferrite.close!(dh::DistributedDofHandler)
     Ferrite.__close!(dh)
+    append!(dh.ldof_to_rank, compute_dof_ownership(dh))
     append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
+    return dh
+end
+
+
+function close_hypre!(dh::DistributedDofHandler)
+    Ferrite.__close!(dh)
+    # Compute the owners of the dofs
     append!(dh.ldof_to_rank, compute_dof_ownership(dh))
-    dh.ndofs.x = num_local_dofs(dh)
+
     # Reorder to make local dofs continuous
-    sum(dh.ldof_to_rank.==)
+    next_local_dof = 1
+    next_nonlocal_dof = num_local_true_dofs(dh)+1
+    my_rank = global_rank(getglobalgrid(dh))
+    permutation = Vector{Int}(undef, dh.ndofs.x)
+    for i ∈ 1:dh.ndofs.x
+        if dh.ldof_to_rank[i] == my_rank
+            permutation[next_local_dof] = i
+            next_local_dof += 1
+        else
+            permutation[next_nonlocal_dof] = i
+            next_nonlocal_dof += 1
+        end
+    end
+    cell_dofs = dh.cell_dofs
+    for i in eachindex(cell_dofs)
+        cell_dofs[i] = permutation[cell_dofs[i]]
+    end
+
+    dh.ldof_to_rank .= dh.ldof_to_rank[permutation]
+    # dh.ldof_to_gdof .= dh.ldof_to_gdof[permutation]
+
+    # Communicate the numbering to make it global
+    append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
+
+    return dh
 end
 
 function WriteVTK.vtk_grid(filename::AbstractString, dh::DistributedDofHandler; compress::Bool=true)

From 9de31310b107c77e5f4afd8bbdb2ac7a2749930b Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 19:44:19 +0100
Subject: [PATCH 102/124] Typo.

---
 src/iterators.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/iterators.jl b/src/iterators.jl
index bb040a4b79..19faff7b2b 100644
--- a/src/iterators.jl
+++ b/src/iterators.jl
@@ -163,7 +163,7 @@ function CellIterator(gridordh::Union{Grid,AbstractDofHandler},
     if gridordh isa MixedDofHandler
         # TODO: Since the CellCache is resizeable this is not really necessary to check
         #       here, but might be useful to catch slow code paths?
-        _check_same_celltype(grid, set)
+        _check_same_celltype(getgrid(gridordh), set)
     end
     return CellIterator(CellCache(gridordh, flags), set)
 end

From 1bb93bd583245eed5a12257f93600213f898965a Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 19:54:58 +0100
Subject: [PATCH 103/124] Add missing method.

---
 src/Dofs/DofHandler.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index e2a0df0607..6c96d8bf08 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -171,6 +171,11 @@ function field_offset(dh::AbstractDofHandler, field_name::Symbol)
     return field_offset(dh,field_idx)
 end
 
+function getfielddim(dh::AbstractDofHandler, field_name::Symbol) 
+    field_idx = findfirst(i->i == field_name, getfieldnames(dh))
+    field_idx === nothing && error("did not find field $field_name")
+    return getfielddim(dh, field_idx)
+end
 
 """
 """

From 6e1c52d5552952356d59033915054e3bc1972feb Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 20:42:22 +0100
Subject: [PATCH 104/124] Hypre gets into shape. Needs more testing and utils.

---
 docs/LocalPreferences.toml                    |   6 -
 docs/Manifest.toml                            |  24 +-
 docs/Project.toml                             |   1 +
 docs/src/literate/distributed_assembly.jl     |   4 +-
 .../literate/distributed_assembly_hypre.jl    | 240 ++++++++++++++++++
 .../DistributedDofHandler.jl                  |  17 +-
 6 files changed, 268 insertions(+), 24 deletions(-)
 delete mode 100644 docs/LocalPreferences.toml
 create mode 100644 docs/src/literate/distributed_assembly_hypre.jl

diff --git a/docs/LocalPreferences.toml b/docs/LocalPreferences.toml
deleted file mode 100644
index d88be56147..0000000000
--- a/docs/LocalPreferences.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-[MPIPreferences]
-_format = "1.0"
-abi = "OpenMPI"
-binary = "system"
-libmpi = "libmpi"
-mpiexec = "mpiexec"
diff --git a/docs/Manifest.toml b/docs/Manifest.toml
index 4ea98e6a82..b9ac6627c6 100644
--- a/docs/Manifest.toml
+++ b/docs/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.9.0-beta4"
 manifest_format = "2.0"
-project_hash = "496d72ebf77bfce22849d2f38c0edb3979829e55"
+project_hash = "27e3c9a6b7b6a6a2a2a32de6779b15088202d916"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
@@ -522,6 +522,18 @@ git-tree-sha1 = "37e4657cd56b11abe3d10cd4a1ec5fbdb4180263"
 uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 version = "1.7.4"
 
+[[deps.HYPRE]]
+deps = ["CEnum", "HYPRE_jll", "Libdl", "MPI", "PartitionedArrays", "SparseArrays", "SparseMatricesCSR"]
+git-tree-sha1 = "b45b676f271aff979a680d47bc0b364b6919b52b"
+uuid = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
+version = "1.4.0"
+
+[[deps.HYPRE_jll]]
+deps = ["Artifacts", "JLLWrappers", "LAPACK_jll", "LazyArtifacts", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenBLAS_jll", "OpenMPI_jll", "Pkg", "TOML"]
+git-tree-sha1 = "b77d3eca75f8442e034ccf415c87405a49e77985"
+uuid = "0a602bbd-b08b-5d75-8d32-0de6eef44785"
+version = "2.23.1+1"
+
 [[deps.HarfBuzz_jll]]
 deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"]
 git-tree-sha1 = "129acf094d168394e80ee1dc4bc06ec835e510a3"
@@ -623,6 +635,12 @@ git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c"
 uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d"
 version = "3.100.1+0"
 
+[[deps.LAPACK_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg", "libblastrampoline_jll"]
+git-tree-sha1 = "a539affa8228208f5a3396037165b04bff9a2ba6"
+uuid = "51474c39-65e3-53ba-86ba-03b1b862ec14"
+version = "3.10.0+1"
+
 [[deps.LERC_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
 git-tree-sha1 = "bf36f528eec6634efc60d7ec062008f171071434"
@@ -765,13 +783,11 @@ deps = ["ArrayInterfaceCore", "DocStringExtensions", "FastLapackInterface", "GPU
 git-tree-sha1 = "ed97c2b4e46d02d4c866d3ccfae039a6c09568b1"
 uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 version = "1.35.0"
+weakdeps = ["HYPRE"]
 
     [deps.LinearSolve.extensions]
     LinearSolveHYPRE = "HYPRE"
 
-    [deps.LinearSolve.weakdeps]
-    HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
-
 [[deps.Literate]]
 deps = ["Base64", "IOCapture", "JSON", "REPL"]
 git-tree-sha1 = "1c4418beaa6664041e0f9b48f0710f57bff2fcbe"
diff --git a/docs/Project.toml b/docs/Project.toml
index 2b86e56fae..12eec41f15 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -6,6 +6,7 @@ FerriteGmsh = "4f95f4f8-b27c-4ae5-9a39-ea55e634e36b"
 FerriteMeshParser = "0f8c756f-80dd-4a75-85c6-b0a5ab9d4620"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Gmsh = "705231aa-382f-11e9-3f0c-b7cb4346fdeb"
+HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 16c653bfd5..68d998310d 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -16,7 +16,7 @@
 # First we load Ferrite, and some other packages we need
 using Ferrite, MPI
 using IterativeSolvers #, HYPRE
-using PartitionedArrays #src
+using PartitionedArrays, Metis #src
 
 FerritePartitionedArrays = Base.get_extension(Ferrite, :FerritePartitionedArrays)
 
@@ -47,7 +47,7 @@ cellvalues = CellScalarValues(qr, ip, ip_geo);
 # To handle the dofs correctly we now utilize the `DistributedDofHandle` 
 # instead of the `DofHandler`. For the user the interface is the same.
 dh = FerritePartitionedArrays.DistributedDofHandler(dgrid)
-push!(dh, :u, 1, ip)
+add!(dh, :u, 1, ip)
 close!(dh);
 
 # ### Boundary conditions
diff --git a/docs/src/literate/distributed_assembly_hypre.jl b/docs/src/literate/distributed_assembly_hypre.jl
new file mode 100644
index 0000000000..728e01aff8
--- /dev/null
+++ b/docs/src/literate/distributed_assembly_hypre.jl
@@ -0,0 +1,240 @@
+# # Distributed Assembly of Heat Equation
+#
+# ## Introduction
+#
+# Now we want to solve the heat problem in parallel. To be specific, this example shows
+# how to utilize process parallelism to assemble finite element matrices in parallel.
+# This example presumes that the reader is familiar with solving the heat problem in
+# serial with Ferrite.jl, as presented in [the first example](@ref heat_example).
+#
+#-
+# ## Commented Program
+#
+# Now we solve the problem in Ferrite. What follows is a program spliced with comments.
+#md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
+#
+# First we load Ferrite, and some other packages we need
+using Ferrite, MPI
+using HYPRE
+using PartitionedArrays, Metis
+
+# Launch MPI
+MPI.Init()
+HYPRE.Init()
+
+function Ferrite.create_sparsity_pattern(::Type{<:HYPREMatrix}, dh::DofHandler, ch::Union{ConstraintHandler,Nothing}=nothing; kwargs...)
+    println("Ferrite.create_sparsity_pattern")
+    K = create_sparsity_pattern(dh, ch; kwargs...)
+    fill!(K.nzval, 1)
+    return HYPREMatrix(K)
+end
+
+###########################################
+## HYPREAssembler and associated methods ##
+###########################################
+
+struct HYPREAssembler <: Ferrite.AbstractSparseAssembler
+    A::HYPRE.HYPREAssembler
+end
+
+Ferrite.matrix_handle(a::HYPREAssembler) = a.A.A.A # :)
+Ferrite.vector_handle(a::HYPREAssembler) = a.A.b.b # :)
+
+function Ferrite.start_assemble(K::HYPREMatrix, f::HYPREVector)
+    println("Ferrite.start_assemble")
+    return HYPREAssembler(HYPRE.start_assemble!(K, f))
+end
+
+function Ferrite.assemble!(a::HYPREAssembler, dofs::AbstractVector{<:Integer}, ke::AbstractMatrix, fe::AbstractVector)
+    println("Ferrite.assemble!")
+    HYPRE.assemble!(a.A, dofs, ke, fe)
+end
+
+
+## Methods for arrayutils.jl ##
+
+function Ferrite.addindex!(A::HYPREMatrix, v, i::Int, j::Int)
+    println("Ferrite.addindex!")
+    nrows = HYPRE_Int(1)
+    ncols = Ref{HYPRE_Int}(1)
+    rows = Ref{HYPRE_BigInt}(i)
+    cols = Ref{HYPRE_BigInt}(j)
+    values = Ref{HYPRE_Complex}(v)
+    HYPRE.@check HYPRE_IJMatrixAddToValues(A.ijmatrix, nrows, ncols, rows, cols, values)
+    return A
+end
+
+function Ferrite.addindex!(b::HYPREVector, v, i::Int)
+    println("Ferrite.addindex!")
+    nvalues = HYPRE_Int(1)
+    indices = Ref{HYPRE_BigInt}(i)
+    values = Ref{HYPRE_Complex}(v)
+    HYPRE.@check HYPRE_IJVectorAddToValues(b.ijvector, nvalues, indices, values)
+    return b
+end
+
+FerritePartitionedArrays = Base.get_extension(Ferrite, :FerritePartitionedArrays)
+
+# We start generating a simple grid with 20x20 quadrilateral elements
+# and distribute it across our processors using `generate_distributed_grid`. 
+# dgrid = FerritePartitionedArrays.generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
+# dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (2, 2, 2));
+dgrid = FerritePartitionedArrays.generate_distributed_grid(Hexahedron, (2, 2, 2)); #src
+# dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
+
+# ### Trial and test functions
+# Nothing changes here.
+dim = 2
+dim = 3 #src
+ref = RefCube
+# ref = RefTetrahedron #src
+ip = Lagrange{dim, ref, 1}()
+ip = Lagrange{dim, ref, 2}() #src
+ip_geo = Lagrange{dim, ref, 1}()
+qr = QuadratureRule{dim, ref}(2)
+qr = QuadratureRule{dim, ref}(4) #src
+cellvalues = CellScalarValues(qr, ip, ip_geo);
+
+# ### Degrees of freedom
+# To handle the dofs correctly we now utilize the `DistributedDofHandle` 
+# instead of the `DofHandler`. For the user the interface is the same.
+dh = FerritePartitionedArrays.DistributedDofHandler(dgrid)
+push!(dh, :u, 1, ip)
+close!(dh);
+# Hypre needs locally continuous indices.
+FerritePartitionedArrays.hypre_reorder!(dh);
+
+# ### Boundary conditions
+# Nothing has to be changed here either.
+ch = ConstraintHandler(dh);
+∂Ω = union(getfaceset.((dgrid, ), ["left", "right", "top", "bottom"])...);
+∂Ω = union(getfaceset.((dgrid, ), ["left", "right", "top", "bottom", "front", "back"])...); #src
+dbc = Dirichlet(:u, ∂Ω, (x, t) -> 0)
+dbc_val = 0 #src
+dbc = Dirichlet(:u, ∂Ω, (x, t) -> dbc_val) #src
+add!(ch, dbc);
+close!(ch)
+# update!(ch, 0.0);
+
+my_rank = MPI.Comm_rank(MPI.COMM_WORLD)
+# println("R$my_rank: prescribing $(ch.prescribed_dofs) on $∂Ω")
+
+# ### Assembling the linear system
+# Assembling the system works also mostly analogue.
+function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArrays.DistributedDofHandler, ch::ConstraintHandler) where {dim}
+    n_basefuncs = getnbasefunctions(cellvalues)
+    Ke = zeros(n_basefuncs, n_basefuncs)
+    fe = zeros(n_basefuncs)
+
+    # comm = global_comm(getglobalgrid(dh))
+    comm = MPI.COMM_WORLD
+    ilower = dh.ldof_to_gdof[1]
+    iupper = dh.ldof_to_gdof[1]+FerritePartitionedArrays.num_local_true_dofs(dh)-1
+    @show ilower, iupper
+    K = HYPREMatrix(comm, ilower, iupper)
+    f = HYPREVector(comm, ilower, iupper)
+
+    # --------------------- Distributed assembly --------------------
+    # The synchronization with the global sparse matrix is handled by 
+    # an assembler again. You can choose from different backends, which
+    # are described in the docs and will be expaned over time. This call
+    # may trigger a large amount of communication.
+    # NOTE: At the time of writing the only backend available is a COO 
+    #       assembly via PartitionedArrays.jl .
+    assembler = start_assemble(K, f)
+
+    # For the local assembly nothing changes
+    for cell in CellIterator(dh)
+        fill!(Ke, 0)
+        fill!(fe, 0)
+
+        reinit!(cellvalues, cell)
+        coords = getcoordinates(cell)
+                
+        for q_point in 1:getnquadpoints(cellvalues)
+            dΩ = getdetJdV(cellvalues, q_point)
+            
+            for i in 1:n_basefuncs
+                v  = shape_value(cellvalues, q_point, i)
+                ∇v = shape_gradient(cellvalues, q_point, i)
+                # Manufactured solution of Π cos(xᵢπ)
+                x = spatial_coordinate(cellvalues, q_point, coords)
+                fe[i] += (π/2)^2 * dim * prod(cos, x*π/2) * v * dΩ
+
+                for j in 1:n_basefuncs
+                    ∇u = shape_gradient(cellvalues, q_point, j)
+                    Ke[i, j] += (∇v ⋅ ∇u) * dΩ
+                end
+            end
+        end
+
+        apply_local!(Ke, fe, celldofs(cell), ch)
+        Ferrite.assemble!(assembler, dh.ldof_to_gdof[celldofs(cell)], fe, Ke)
+    end
+
+    # Finally, for the `PartitionedArraysCOOAssembler` we have to call
+    # `end_assemble` to construct the global sparse matrix and the global
+    # right hand side vector.
+    HYPRE.finish_assemble!(assembler.A)
+    return K, f
+end
+#md nothing # hide
+
+# ### Solution of the system
+# Again, we assemble our problem and apply the constraints as needed.
+K, f = doassemble(cellvalues, dh, ch);
+
+precond = HYPRE.BoomerAMG()
+solver = HYPRE.PCG(; Precond = precond)
+xh = HYPRE.solve(solver, K, f)
+
+# Copy solution from HYPRE to Julia
+x = Vector{Float64}(undef, FerritePartitionedArrays.num_local_true_dofs(dh))
+copy!(x, xh)
+
+# Collect to root rank
+# if my_rank == 1
+#     X = Vector{Float64}(undef, 45)
+#     counts = [27+9,27-9]
+#     MPI.Gatherv!(x, VBuffer(X, [counts]), MPI.COMM_WORLD)
+#     @show norm(X)
+# else
+#     MPI.Gatherv!(x, nothing, MPI.COMM_WORLD)
+# end
+@show x
+
+# # ### Exporting via PVTK
+# # To visualize the result we export the grid and our field `u`
+# # to a VTK-file, which can be viewed in e.g. [ParaView](https://www.paraview.org/).
+# vtk_grid("heat_equation_distributed", dh) do vtk
+#     vtk_point_data(vtk, dh, u)
+#     # For debugging purposes it can be helpful to enrich 
+#     # the visualization with some meta  information about 
+#     # the grid and its partitioning
+#     FerritePartitionedArrays.vtk_shared_vertices(vtk, dgrid)
+#     FerritePartitionedArrays.vtk_shared_faces(vtk, dgrid)
+#     FerritePartitionedArrays.vtk_shared_edges(vtk, dgrid) #src
+#     FerritePartitionedArrays.vtk_partitioning(vtk, dgrid)
+# end
+
+# ## Test the result against the manufactured solution                    #src
+# using Test                                                              #src
+# for cell in CellIterator(dh)                                            #src
+#     reinit!(cellvalues, cell)                                           #src
+#     n_basefuncs = getnbasefunctions(cellvalues)                         #src
+#     coords = getcoordinates(cell)                                       #src
+#     map_parts(local_view(u, u.rows)) do u_local                         #src
+#         uₑ = u_local[celldofs(cell)]                                    #src
+#         for q_point in 1:getnquadpoints(cellvalues)                     #src
+#             x = spatial_coordinate(cellvalues, q_point, coords)         #src
+#             for i in 1:n_basefuncs                                      #src
+#                 uₐₙₐ    = prod(cos, x*π/2)+dbc_val                      #src
+#                 uₐₚₚᵣₒₓ = function_value(cellvalues, q_point, uₑ)       #src
+#                 @test isapprox(uₐₙₐ, uₐₚₚᵣₒₓ; atol=1e-1)                #src
+#             end                                                         #src
+#         end                                                             #src
+#     end                                                                 #src
+# end                                                                     #src
+
+# Finally, we gracefully shutdown MPI
+# MPI.Finalize()
diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 1f7859725f..bd8c025649 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -604,11 +604,7 @@ function Ferrite.close!(dh::DistributedDofHandler)
 end
 
 
-function close_hypre!(dh::DistributedDofHandler)
-    Ferrite.__close!(dh)
-    # Compute the owners of the dofs
-    append!(dh.ldof_to_rank, compute_dof_ownership(dh))
-
+function hypre_reorder!(dh::DistributedDofHandler)
     # Reorder to make local dofs continuous
     next_local_dof = 1
     next_nonlocal_dof = num_local_true_dofs(dh)+1
@@ -628,13 +624,10 @@ function close_hypre!(dh::DistributedDofHandler)
         cell_dofs[i] = permutation[cell_dofs[i]]
     end
 
-    dh.ldof_to_rank .= dh.ldof_to_rank[permutation]
-    # dh.ldof_to_gdof .= dh.ldof_to_gdof[permutation]
-
-    # Communicate the numbering to make it global
-    append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
-
-    return dh
+    dh.ldof_to_rank .= dh.ldof_to_rank[permutation]  
+    Ferrite.@debug println("Updated dof ranks: $(dh.ldof_to_rank) (R$my_rank)")
+    dh.ldof_to_gdof .= dh.ldof_to_gdof[permutation]
+    Ferrite.@debug println("Updated local to global: $(dh.ldof_to_gdof) (R$my_rank)")
 end
 
 function WriteVTK.vtk_grid(filename::AbstractString, dh::DistributedDofHandler; compress::Bool=true)

From 1c31a8b1ad519799d55e8d555139ce6ccaf92f1a Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 20:57:37 +0100
Subject: [PATCH 105/124] Add missing overload for reinit./src/cellml2julia

---
 src/iterators.jl | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/iterators.jl b/src/iterators.jl
index 19faff7b2b..94cc4b4cb2 100644
--- a/src/iterators.jl
+++ b/src/iterators.jl
@@ -69,7 +69,17 @@ function CellCache(dh::DH, flags::UpdateFlags=UpdateFlags()) where {DH<:Abstract
     return CellCache(flags, getgrid(dh), ScalarWrapper(-1), nodes, coords, dh, celldofs)
 end
 
-# TODO: Can always resize and combine the two reinit! methods maybe?
+function reinit!(cc::CellCache{<:Any,<:AbstractGrid,<:Nothing}, i::Int)
+    cc.cellid[] = i
+    if cc.flags.nodes
+        cellnodes!(cc.nodes, cc.grid, i)
+    end
+    if cc.flags.coords
+        cellcoords!(cc.coords, cc.grid, i)
+    end
+    return cc
+end
+
 function reinit!(cc::CellCache{<:Any,<:AbstractGrid,<:DofHandler}, i::Int)
     cc.cellid[] = i
     if cc.flags.nodes
@@ -78,7 +88,7 @@ function reinit!(cc::CellCache{<:Any,<:AbstractGrid,<:DofHandler}, i::Int)
     if cc.flags.coords
         cellcoords!(cc.coords, cc.grid, i)
     end
-    if cc.dh !== nothing && cc.flags.dofs
+    if cc.flags.dofs
         celldofs!(cc.dofs, cc.dh, i)
     end
     return cc

From a54cdf03e2d9be6943f868d13ddcfd61aba05e7b Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Fri, 17 Feb 2023 21:18:12 +0100
Subject: [PATCH 106/124] Fix exports.

---
 src/exports.jl | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/exports.jl b/src/exports.jl
index c26cf027dd..8e618a71a5 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -40,7 +40,6 @@ export
 
 # Grid
     Grid,
-    DistributedGrid,
     Node,
     Cell,
     Line,
@@ -66,7 +65,6 @@ export
     getcells,
     getgrid,
     getlocalgrid,
-    getglobalgrid,
     getncells,
     getnodes,
     getnnodes,
@@ -84,7 +82,6 @@ export
     getedgesets,
     getvertexsets,
     getdim,
-    vertex_comm,
     onboundary,
     nfaces,
     addnodeset!,
@@ -114,9 +111,6 @@ export
     DofHandler,
     close!,
     ndofs,
-    num_local_true_dofs,
-    num_local_dofs,
-    num_global_dofs,
     ndofs_per_cell,
     celldofs!,
     celldofs,

From 96dd397150dbc8b4b04bb7c1ca2b4ba391eab643 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Sun, 19 Feb 2023 01:07:52 +0100
Subject: [PATCH 107/124] Fix HYPRE example.

---
 .../literate/distributed_assembly_hypre.jl    |  94 ++++++------
 .../DistributedDofHandler.jl                  | 139 ++++++++++++++----
 2 files changed, 157 insertions(+), 76 deletions(-)

diff --git a/docs/src/literate/distributed_assembly_hypre.jl b/docs/src/literate/distributed_assembly_hypre.jl
index 728e01aff8..b486bfeeb5 100644
--- a/docs/src/literate/distributed_assembly_hypre.jl
+++ b/docs/src/literate/distributed_assembly_hypre.jl
@@ -79,7 +79,7 @@ FerritePartitionedArrays = Base.get_extension(Ferrite, :FerritePartitionedArrays
 # and distribute it across our processors using `generate_distributed_grid`. 
 # dgrid = FerritePartitionedArrays.generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
 # dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (2, 2, 2));
-dgrid = FerritePartitionedArrays.generate_distributed_grid(Hexahedron, (2, 2, 2)); #src
+dgrid = FerritePartitionedArrays.generate_distributed_grid(Hexahedron, (10, 10, 10)); #src
 # dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
 
 # ### Trial and test functions
@@ -102,7 +102,8 @@ dh = FerritePartitionedArrays.DistributedDofHandler(dgrid)
 push!(dh, :u, 1, ip)
 close!(dh);
 # Hypre needs locally continuous indices.
-FerritePartitionedArrays.hypre_reorder!(dh);
+#TODO I think we can eliminate this one easily.
+# FerritePartitionedArrays.hypre_reorder!(dh);
 
 # ### Boundary conditions
 # Nothing has to be changed here either.
@@ -116,7 +117,7 @@ add!(ch, dbc);
 close!(ch)
 # update!(ch, 0.0);
 
-my_rank = MPI.Comm_rank(MPI.COMM_WORLD)
+my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
 # println("R$my_rank: prescribing $(ch.prescribed_dofs) on $∂Ω")
 
 # ### Assembling the linear system
@@ -128,8 +129,11 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArr
 
     # comm = global_comm(getglobalgrid(dh))
     comm = MPI.COMM_WORLD
-    ilower = dh.ldof_to_gdof[1]
-    iupper = dh.ldof_to_gdof[1]+FerritePartitionedArrays.num_local_true_dofs(dh)-1
+    ltdofs = dh.ldof_to_gdof[dh.ldof_to_rank .== my_rank]
+    ilower = minimum(ltdofs)
+    iupper = maximum(ltdofs)
+    # ilower = dh.ldof_to_gdof[1]
+    # iupper = dh.ldof_to_gdof[1]+FerritePartitionedArrays.num_local_true_dofs(dh)-1
     @show ilower, iupper
     K = HYPREMatrix(comm, ilower, iupper)
     f = HYPREVector(comm, ilower, iupper)
@@ -186,55 +190,43 @@ K, f = doassemble(cellvalues, dh, ch);
 
 precond = HYPRE.BoomerAMG()
 solver = HYPRE.PCG(; Precond = precond)
-xh = HYPRE.solve(solver, K, f)
+uh = HYPRE.solve(solver, K, f)
 
 # Copy solution from HYPRE to Julia
-x = Vector{Float64}(undef, FerritePartitionedArrays.num_local_true_dofs(dh))
-copy!(x, xh)
-
-# Collect to root rank
-# if my_rank == 1
-#     X = Vector{Float64}(undef, 45)
-#     counts = [27+9,27-9]
-#     MPI.Gatherv!(x, VBuffer(X, [counts]), MPI.COMM_WORLD)
-#     @show norm(X)
-# else
-#     MPI.Gatherv!(x, nothing, MPI.COMM_WORLD)
-# end
-@show x
+uj = Vector{Float64}(undef, FerritePartitionedArrays.num_local_true_dofs(dh))
+copy!(uj, uh)
+
+# And convert from HYPRE to Ferrite
+u_local = Vector{Float64}(undef, FerritePartitionedArrays.num_local_dofs(dh))
+FerritePartitionedArrays.hypre_to_ferrite!(u_local, uj, dh)
 
 # # ### Exporting via PVTK
 # # To visualize the result we export the grid and our field `u`
 # # to a VTK-file, which can be viewed in e.g. [ParaView](https://www.paraview.org/).
-# vtk_grid("heat_equation_distributed", dh) do vtk
-#     vtk_point_data(vtk, dh, u)
-#     # For debugging purposes it can be helpful to enrich 
-#     # the visualization with some meta  information about 
-#     # the grid and its partitioning
-#     FerritePartitionedArrays.vtk_shared_vertices(vtk, dgrid)
-#     FerritePartitionedArrays.vtk_shared_faces(vtk, dgrid)
-#     FerritePartitionedArrays.vtk_shared_edges(vtk, dgrid) #src
-#     FerritePartitionedArrays.vtk_partitioning(vtk, dgrid)
-# end
-
-# ## Test the result against the manufactured solution                    #src
-# using Test                                                              #src
-# for cell in CellIterator(dh)                                            #src
-#     reinit!(cellvalues, cell)                                           #src
-#     n_basefuncs = getnbasefunctions(cellvalues)                         #src
-#     coords = getcoordinates(cell)                                       #src
-#     map_parts(local_view(u, u.rows)) do u_local                         #src
-#         uₑ = u_local[celldofs(cell)]                                    #src
-#         for q_point in 1:getnquadpoints(cellvalues)                     #src
-#             x = spatial_coordinate(cellvalues, q_point, coords)         #src
-#             for i in 1:n_basefuncs                                      #src
-#                 uₐₙₐ    = prod(cos, x*π/2)+dbc_val                      #src
-#                 uₐₚₚᵣₒₓ = function_value(cellvalues, q_point, uₑ)       #src
-#                 @test isapprox(uₐₙₐ, uₐₚₚᵣₒₓ; atol=1e-1)                #src
-#             end                                                         #src
-#         end                                                             #src
-#     end                                                                 #src
-# end                                                                     #src
-
-# Finally, we gracefully shutdown MPI
-# MPI.Finalize()
+vtk_grid("heat_equation_distributed", dh) do vtk
+    vtk_point_data(vtk, dh, u_local)
+    # For debugging purposes it can be helpful to enrich 
+    # the visualization with some meta  information about 
+    # the grid and its partitioning
+    FerritePartitionedArrays.vtk_shared_vertices(vtk, dgrid)
+    FerritePartitionedArrays.vtk_shared_faces(vtk, dgrid)
+    FerritePartitionedArrays.vtk_shared_edges(vtk, dgrid) #src
+    FerritePartitionedArrays.vtk_partitioning(vtk, dgrid)
+end
+
+## Test the result against the manufactured solution                    #src
+using Test                                                              #src
+for cell in CellIterator(dh)                                            #src
+    reinit!(cellvalues, cell)                                           #src
+    n_basefuncs = getnbasefunctions(cellvalues)                         #src
+    coords = getcoordinates(cell)                                       #src
+    uₑ = u_local[celldofs(cell)]                                        #src
+    for q_point in 1:getnquadpoints(cellvalues)                         #src
+        x = spatial_coordinate(cellvalues, q_point, coords)             #src
+        for i in 1:n_basefuncs                                          #src
+            uₐₙₐ    = prod(cos, x*π/2)+dbc_val                          #src
+            uₐₚₚᵣₒₓ = function_value(cellvalues, q_point, uₑ)           #src
+            @test isapprox(uₐₙₐ, uₐₚₚᵣₒₓ; atol=1e-1)                    #src
+        end                                                             #src
+    end                                                                 #src
+end                                                                     #src
diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index bd8c025649..b68800c6b1 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -85,8 +85,8 @@ function compute_dof_ownership(dh)
     for (lvi, sv) ∈ get_shared_vertices(dgrid)
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_vertex_dofs(dh, field_idx, lvi)
-                local_dof_indices = Ferrite.vertex_dofs(dh, field_idx, lvi)
-                dof_owner[local_dof_indices] .= compute_owner(dgrid, sv)
+                local_dofs = Ferrite.vertex_dofs(dh, field_idx, lvi)
+                dof_owner[local_dofs] .= compute_owner(dgrid, sv)
             end
         end
     end
@@ -94,8 +94,8 @@ function compute_dof_ownership(dh)
     for (lfi, sf) ∈ get_shared_faces(dgrid)
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_face_dofs(dh, field_idx, lfi)
-                local_dof_indices = Ferrite.face_dofs(dh, field_idx, lfi)
-                dof_owner[local_dof_indices] .= compute_owner(dgrid, sf)
+                local_dofs = Ferrite.face_dofs(dh, field_idx, lfi)
+                dof_owner[local_dofs] .= compute_owner(dgrid, sf)
             end
         end
     end
@@ -103,8 +103,8 @@ function compute_dof_ownership(dh)
     for (lei, se) ∈ get_shared_edges(dgrid)
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_edge_dofs(dh, field_idx, lei)
-                local_dof_indices = Ferrite.edge_dofs(dh, field_idx, lei)
-                dof_owner[local_dof_indices] .= compute_owner(dgrid, se)
+                local_dofs = Ferrite.edge_dofs(dh, field_idx, lei)
+                dof_owner[local_dofs] .= compute_owner(dgrid, se)
             end
         end
     end
@@ -604,32 +604,121 @@ function Ferrite.close!(dh::DistributedDofHandler)
 end
 
 
-function hypre_reorder!(dh::DistributedDofHandler)
-    # Reorder to make local dofs continuous
-    next_local_dof = 1
-    next_nonlocal_dof = num_local_true_dofs(dh)+1
+# Hypre to Ferrite vector
+function hypre_to_ferrite!(u, x, dh)
     my_rank = global_rank(getglobalgrid(dh))
-    permutation = Vector{Int}(undef, dh.ndofs.x)
-    for i ∈ 1:dh.ndofs.x
-        if dh.ldof_to_rank[i] == my_rank
-            permutation[next_local_dof] = i
-            next_local_dof += 1
-        else
-            permutation[next_nonlocal_dof] = i
-            next_nonlocal_dof += 1
+
+    # Helper to gather which global dof and values have to be send to which process
+    gdof_value_send = [Dict{Int,Float64}() for i ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)]
+    # Helper to get the global dof to local dof mapping
+    rank_recv_count = [0 for i∈1:MPI.Comm_size(MPI.COMM_WORLD)]
+    gdof_to_ldof = Dict{Int,Int}()
+
+    next_dof = 1
+    for (ldof,rank) ∈ enumerate(dh.ldof_to_rank)
+        if rank == my_rank
+            u[ldof] = x[next_dof]
+            next_dof += 1
+        else 
+            # We have to sync these later.
+            gdof_to_ldof[dh.ldof_to_gdof[ldof]] = ldof
+            rank_recv_count[rank] += 1
         end
     end
-    cell_dofs = dh.cell_dofs
-    for i in eachindex(cell_dofs)
-        cell_dofs[i] = permutation[cell_dofs[i]]
+
+    # TODO speed this up and better API
+    dgrid = FerritePartitionedArrays.getglobalgrid(dh)
+    for (lvi, sv) ∈ get_shared_vertices(dgrid)
+        my_rank != FerritePartitionedArrays.compute_owner(dgrid, sv) && continue
+        for field_idx in 1:num_fields(dh)
+            if Ferrite.has_vertex_dofs(dh, field_idx, lvi)
+                local_dofs = Ferrite.vertex_dofs(dh, field_idx, lvi)
+                global_dofs = dh.ldof_to_gdof[local_dofs]
+                for receiver_rank ∈ keys(FerritePartitionedArrays.remote_entities(sv))
+                    for i ∈ 1:length(global_dofs)
+                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
+                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
+                    end
+                end
+            end
+        end
     end
 
-    dh.ldof_to_rank .= dh.ldof_to_rank[permutation]  
-    Ferrite.@debug println("Updated dof ranks: $(dh.ldof_to_rank) (R$my_rank)")
-    dh.ldof_to_gdof .= dh.ldof_to_gdof[permutation]
-    Ferrite.@debug println("Updated local to global: $(dh.ldof_to_gdof) (R$my_rank)")
+    for (lvi, se) ∈ get_shared_edges(dgrid)
+        my_rank != FerritePartitionedArrays.compute_owner(dgrid, se) && continue
+        for field_idx in 1:num_fields(dh)
+            if Ferrite.has_edge_dofs(dh, field_idx, lvi)
+                local_dofs = Ferrite.edge_dofs(dh, field_idx, lvi)
+                global_dofs = dh.ldof_to_gdof[local_dofs]
+                for receiver_rank ∈ keys(FerritePartitionedArrays.remote_entities(se))
+                    for i ∈ 1:length(global_dofs)
+                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
+                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
+                    end
+                end
+            end
+        end
+    end
+    
+    for (lvi, sf) ∈ get_shared_faces(dgrid)
+        my_rank != FerritePartitionedArrays.compute_owner(dgrid, sf) && continue
+        for field_idx in 1:num_fields(dh)
+            if Ferrite.has_face_dofs(dh, field_idx, lvi)
+                local_dofs = Ferrite.face_dofs(dh, field_idx, lvi)
+                global_dofs = dh.ldof_to_gdof[local_dofs]
+                for receiver_rank ∈ keys(FerritePartitionedArrays.remote_entities(sf))
+                    for i ∈ 1:length(global_dofs)
+                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
+                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
+                    end
+                end
+            end
+        end
+    end
+
+    Ferrite.@debug println("preparing to distribute $gdof_value_send (R$my_rank)")
+
+    # TODO precompute graph at it is static
+    graph_source   = Cint[my_rank-1]
+    graph_dest   = Cint[]
+    for r ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
+        !isempty(gdof_value_send[r]) && push!(graph_dest, r-1)
+    end
+
+    graph_degree = Cint[length(graph_dest)]
+    graph_comm = MPI.Dist_graph_create(MPI.COMM_WORLD, graph_source, graph_degree, graph_dest)
+    indegree, outdegree, _ = MPI.Dist_graph_neighbors_count(graph_comm)
+
+    inranks = Vector{Cint}(undef, indegree)
+    outranks = Vector{Cint}(undef, outdegree)
+    MPI.Dist_graph_neighbors!(graph_comm, inranks, outranks)
+
+    send_count = [length(gdof_value_send[outrank+1]) for outrank ∈ outranks]
+    recv_count = [rank_recv_count[inrank+1] for inrank ∈ inranks]
+
+    send_gdof = Cint[]
+    for outrank ∈ outranks
+        append!(send_gdof, Cint.(keys(gdof_value_send[outrank+1])))
+    end
+    recv_gdof = Vector{Cint}(undef, sum(recv_count))
+    MPI.Neighbor_alltoallv!(VBuffer(send_gdof,send_count), VBuffer(recv_gdof,recv_count), graph_comm)
+
+    send_val = Cdouble[]
+    for outrank ∈ outranks
+        append!(send_val, Cdouble.(values(gdof_value_send[outrank+1])))
+    end
+    recv_val = Vector{Cdouble}(undef, sum(recv_count))
+    MPI.Neighbor_alltoallv!(VBuffer(send_val,send_count), VBuffer(recv_val,recv_count), graph_comm)
+
+    for (gdof, val) ∈ zip(recv_gdof, recv_val)
+        u[gdof_to_ldof[gdof]] = val
+    end
+
+    return u
 end
 
+
+
 function WriteVTK.vtk_grid(filename::AbstractString, dh::DistributedDofHandler; compress::Bool=true)
     vtk_grid(filename, getglobalgrid(dh); compress=compress)
 end

From 5b9a07ac7b62e610aed153ae6e4b75a2b6f7121b Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Sun, 19 Feb 2023 01:08:28 +0100
Subject: [PATCH 108/124] Fix deprecation.

---
 src/Dofs/DofHandler.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index 6c96d8bf08..e850faf85b 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -267,20 +267,20 @@ function __close!(dh::AbstractDofHandler)
 
     # `vertexdict` keeps track of the visited vertices. We store the global vertex
     # number and the first dof we added to that vertex.
-    vertexdicts = [Dict{Int,Int}() for _ in 1:nfields(dh)]
+    vertexdicts = [Dict{Int,Int}() for _ in 1:num_fields(dh)]
 
     # `edgedict` keeps track of the visited edges, this will only be used for a 3D problem
     # An edge is determined from two vertices, but we also need to store the direction
     # of the first edge we encounter and add dofs too. When we encounter the same edge
     # the next time we check if the direction is the same, otherwise we reuse the dofs
     # in the reverse order
-    edgedicts = [Dict{Tuple{Int,Int},Tuple{Int,Bool}}() for _ in 1:nfields(dh)]
+    edgedicts = [Dict{Tuple{Int,Int},Tuple{Int,Bool}}() for _ in 1:num_fields(dh)]
 
     # `facedict` keeps track of the visited faces. We only need to store the first dof we
     # added to the face; if we encounter the same face again we *always* reverse the order
     # In 2D a face (i.e. a line) is uniquely determined by 2 vertices, and in 3D a
     # face (i.e. a surface) is uniquely determined by 3 vertices.
-    facedicts = [Dict{NTuple{dim,Int},Int}() for _ in 1:nfields(dh)]
+    facedicts = [Dict{NTuple{dim,Int},Int}() for _ in 1:num_fields(dh)]
 
     # celldofs are never shared between different cells so there is no need
     # for a `celldict` to keep track of which cells we have added dofs too.
@@ -460,7 +460,7 @@ with stored values in the correct places.
 
 The keyword argument `coupling` can be used to specify how fields (or components) in the dof
 handler couple to each other. `coupling` should be a square matrix of booleans with
-`nfields` (or `ncomponents`) rows/columns with `true` if fields are coupled and `false` if
+`num_fields` (or `ncomponents`) rows/columns with `true` if fields are coupled and `false` if
 not. By default full coupling is assumed.
 
 See the [Sparsity Pattern](@ref) section of the manual.

From 3a4fb82fbbfe479b0f5d5a365823632931e06e51 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Sun, 19 Feb 2023 12:18:16 +0100
Subject: [PATCH 109/124] Fix plasticity example.

---
 .../distributed_assembly_plasticity.jl        | 87 +++----------------
 .../DistributedDofHandler.jl                  |  3 +
 ext/FerritePartitionedArrays/assembler.jl     |  4 -
 src/Dofs/DofHandler.jl                        |  2 +-
 4 files changed, 16 insertions(+), 80 deletions(-)

diff --git a/docs/src/literate/distributed_assembly_plasticity.jl b/docs/src/literate/distributed_assembly_plasticity.jl
index 9bcf2b6fa6..6cc5f71bac 100644
--- a/docs/src/literate/distributed_assembly_plasticity.jl
+++ b/docs/src/literate/distributed_assembly_plasticity.jl
@@ -15,75 +15,15 @@
 #
 # First we load Ferrite, and some other packages we need
 using Ferrite, MPI
-using IterativeSolvers #, HYPRE
-using PartitionedArrays #src
+using IterativeSolvers
+using PartitionedArrays, Metis
 using SparseArrays, BlockArrays
 
-
-function PartitionedArrays.matrix_exchanger(values,row_exchanger,row_lids,col_lids)
-
-    part = PartitionedArrays.get_part_ids(row_lids)
-    parts_rcv = row_exchanger.parts_rcv
-    parts_snd = row_exchanger.parts_snd
-  
-    function setup_rcv(part,parts_rcv,row_lids,col_lids,values)
-      owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_rcv) ))
-      ptrs = zeros(Int32,length(parts_rcv)+1)
-      for (li,lj,v) in nziterator(values)
-        owner = row_lids.lid_to_part[li]
-        if owner != part
-          ptrs[owner_to_i[owner]+1] +=1
-        end
-      end
-      length_to_ptrs!(ptrs)
-      k_rcv_data = zeros(Int,ptrs[end]-1)
-      gi_rcv_data = zeros(Int,ptrs[end]-1)
-      gj_rcv_data = zeros(Int,ptrs[end]-1)
-      for (k,(li,lj,v)) in enumerate(nziterator(values))
-        owner = row_lids.lid_to_part[li]
-        if owner != part
-          p = ptrs[owner_to_i[owner]]
-          k_rcv_data[p] = k
-          gi_rcv_data[p] = row_lids.lid_to_gid[li]
-          gj_rcv_data[p] = col_lids.lid_to_gid[lj]
-          ptrs[owner_to_i[owner]] += 1
-        end
-      end
-      rewind_ptrs!(ptrs)
-      k_rcv = Table(k_rcv_data,ptrs)
-      gi_rcv = Table(gi_rcv_data,ptrs)
-      gj_rcv = Table(gj_rcv_data,ptrs)
-      k_rcv, gi_rcv, gj_rcv
-    end
-  
-    k_rcv, gi_rcv, gj_rcv = PartitionedArrays.map_parts(setup_rcv,part,parts_rcv,row_lids,col_lids,values)
-  
-    gi_snd = PartitionedArrays.exchange(gi_rcv,parts_snd,parts_rcv)
-    gj_snd = PartitionedArrays.exchange(gj_rcv,parts_snd,parts_rcv)
-  
-    function setup_snd(part,row_lids,col_lids,gi_snd,gj_snd,values)
-      ptrs = gi_snd.ptrs
-      k_snd_data = zeros(Int,ptrs[end]-1)
-      for p in 1:length(gi_snd.data)
-        gi = gi_snd.data[p]
-        gj = gj_snd.data[p]
-        li = row_lids.gid_to_lid[gi]
-        lj = col_lids.gid_to_lid[gj]
-        k = nzindex(values,li,lj)
-        PartitionedArrays.@check k > 0 "The sparsity pattern of the ghost layer is inconsistent on part $part | local index ($li,$lj) | global index ($gi, $gj)"
-        k_snd_data[p] = k
-      end
-      k_snd = Table(k_snd_data,ptrs)
-      k_snd
-    end
-  
-    k_snd = map_parts(setup_snd,part,row_lids,col_lids,gi_snd,gj_snd,values)
-  
-    PartitionedArrays.Exchanger(parts_rcv,parts_snd,k_rcv,k_snd)
-  end
+FerritePartitionedArrays = Base.get_extension(Ferrite, :FerritePartitionedArrays)
 
 # Launch MPI
 MPI.Init()
+
 # First we generate a simple grid, specifying the 4 corners of Cooks membrane.
 function create_cook_grid(nx, ny)
     corners = [Vec{2}((0.0,   0.0)),
@@ -94,7 +34,7 @@ function create_cook_grid(nx, ny)
     ## facesets for boundary conditions
     addfaceset!(grid, "clamped", x -> norm(x[1]) ≈ 0.0);
     addfaceset!(grid, "traction", x -> norm(x[1]) ≈ 48.0);
-    return DistributedGrid(grid)
+    return FerritePartitionedArrays.DistributedGrid(grid)
 end;
 
 # Next we define a function to set up our cell- and facevalues.
@@ -120,7 +60,7 @@ end;
 # We create a DofHandler, with two fields, `:u` and `:p`,
 # with possibly different interpolations
 function create_dofhandler(grid, ipu, ipp)
-    dh = DistributedDofHandler(grid)
+    dh = FerritePartitionedArrays.DistributedDofHandler(grid)
     push!(dh, :u, 2, ipu) # displacement
     push!(dh, :p, 1, ipp) # pressure
     close!(dh)
@@ -148,10 +88,10 @@ end
 # element matrix. Since Ferrite does not force us to use any particular matrix type we will
 # use a `PseudoBlockArray` from `BlockArrays.jl`.
 function doassemble(cellvalues_u::CellVectorValues{dim}, cellvalues_p::CellScalarValues{dim},
-                    facevalues_u::FaceVectorValues{dim}, grid::DistributedGrid,
-                    dh::DistributedDofHandler, mp::LinearElasticity) where {dim}
+                    facevalues_u::FaceVectorValues{dim}, grid::FerritePartitionedArrays.DistributedGrid,
+                    dh::FerritePartitionedArrays.DistributedDofHandler, mp::LinearElasticity) where {dim}
 
-    assembler = PartitionedArraysCOOAssembler{Float64}(dh)
+    assembler = FerritePartitionedArrays.COOAssembler{Float64}(dh)
     nu = getnbasefunctions(cellvalues_u)
     np = getnbasefunctions(cellvalues_p)
 
@@ -253,7 +193,7 @@ function solve(ν, interpolation_u, interpolation_p)
     dh = create_dofhandler(grid, interpolation_u, interpolation_p)
     dbc = create_bc(dh)
     vtk_grid("cook_dgrid", dh) do vtk
-        vtk_partitioning(vtk, grid)
+        FerritePartitionedArrays.vtk_partitioning(vtk, grid)
     end
     ## cellvalues
     cellvalues_u, cellvalues_p, facevalues_u = create_values(interpolation_u, interpolation_p)
@@ -268,7 +208,7 @@ function solve(ν, interpolation_u, interpolation_p)
                          "_linear"
     vtk_grid(filename, dh) do vtkfile
         vtk_point_data(vtkfile, dh, u)
-        vtk_partitioning(vtkfile, grid)
+        FerritePartitionedArrays.vtk_partitioning(vtkfile, grid)
     end
     return u
 end
@@ -279,12 +219,9 @@ u1 = solve(0.4999999, linear, linear);
 u2 = solve(0.4999999, quadratic, linear);
 
 ## test the result                 #src
-using Test                         #src
+# using Test                         #src
 # @test norm(u2) ≈ 919.2122668839389 #src
 
-# Finally, we gracefully shutdown MPI
-MPI.Finalize()
-
 #md # ## [Plain program](@id distributed-assembly-plain-program)
 #md #
 #md # Here follows a version of the program without any comments.
diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index b68800c6b1..7d9e2ceaf8 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -75,6 +75,9 @@ end
 
 Ferrite.renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = error("Not implemented.")
 
+"""
+TODO fix for shells
+"""
 function compute_dof_ownership(dh)
     dgrid = getglobalgrid(dh)
     my_rank = global_rank(dgrid)
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index dadbbb6050..f17b203e1b 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -81,8 +81,6 @@ struct COOAssembler{T}
         # We decide for row (i.e. test function) ownership, because it the image of
         # SpMV is process local.
         row_indices = PartitionedArrays.IndexSet(my_rank, ldof_to_gdof, Int32.(ldof_to_rank))
-        #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
-        #row_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], ldof_to_gdof[.!ltdof_indices], Int32.(ldof_to_rank[.!ltdof_indices]))
         row_data = MPIData(row_indices, comm, (np,))
         row_exchanger = Exchanger(row_data)
         rows = PRange(ngdofs,row_data,row_exchanger)
@@ -259,8 +257,6 @@ struct COOAssembler{T}
         Ferrite.@debug println("all_local_col_ranks $all_local_col_ranks (R$my_rank)")
 
         col_indices = PartitionedArrays.IndexSet(my_rank, all_local_cols, all_local_col_ranks)
-        #FIXME: This below must be fixed before we can assemble to HYPRE IJ. Problem seems to be that rows and cols must be continuously assigned.
-        #col_indices = PartitionedArrays.IndexRange(my_rank, length(ltdof_indices), ltdof_to_gdof[1], all_local_cols[all_local_col_ranks .!= my_rank], Int32.(all_local_col_ranks[all_local_col_ranks .!= my_rank]))
         col_data = MPIData(col_indices, comm, (np,))
         col_exchanger = Exchanger(col_data)
         cols = PRange(ngdofs,col_data,col_exchanger)
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index e850faf85b..6a2b74dc64 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -121,12 +121,12 @@ function face_dofs(dh::AbstractDofHandler, field_idx::Int, face::FaceIndex)
     fdim = getfielddim(dh, field_idx)
     cell,local_face_index = face
     cell_geo = getcells(getgrid(dh), cell)
-    nedges_on_cell = length(Ferrite.edges(cell_geo))
     nfaces_on_cell = length(Ferrite.faces(cell_geo))
     nvertices_on_cell = length(Ferrite.vertices(cell_geo))
     nentitydofs = fdim*Ferrite.nfacedofs(ip)*nfaces_on_cell
     offset = fdim*nvdofs*nvertices_on_cell
     if dim > 2
+        nedges_on_cell = length(Ferrite.edges(cell_geo))
         nedofs = Ferrite.nedgedofs(ip)
         offset += fdim*nedofs*nedges_on_cell
     end

From f125b3caf23d8d17b049c66aa336c80f7de2f47b Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Mon, 20 Feb 2023 15:16:25 +0100
Subject: [PATCH 110/124] Prepare extension split.

---
 docs/src/literate/distributed_assembly.jl     | 21 +--------
 .../literate/distributed_assembly_hypre.jl    | 46 ++++++++-----------
 .../DistributedDofHandler.jl                  | 39 +++++++---------
 ext/FerritePartitionedArrays/assembler.jl     | 19 ++++----
 ext/FerritePartitionedArrays/grid.jl          |  5 +-
 src/Dofs/DofHandler.jl                        |  4 +-
 src/Grid/grid.jl                              |  4 +-
 7 files changed, 54 insertions(+), 84 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 68d998310d..ffd11c8e2d 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -46,7 +46,7 @@ cellvalues = CellScalarValues(qr, ip, ip_geo);
 # ### Degrees of freedom
 # To handle the dofs correctly we now utilize the `DistributedDofHandle` 
 # instead of the `DofHandler`. For the user the interface is the same.
-dh = FerritePartitionedArrays.DistributedDofHandler(dgrid)
+dh = DofHandler(dgrid)
 add!(dh, :u, 1, ip)
 close!(dh);
 
@@ -63,7 +63,6 @@ close!(ch)
 update!(ch, 0.0);
 
 my_rank = MPI.Comm_rank(MPI.COMM_WORLD)
-println("R$my_rank: prescribing $(ch.prescribed_dofs) on $∂Ω")
 
 # ### Assembling the linear system
 # Assembling the system works also mostly analogue.
@@ -79,7 +78,7 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArr
     # may trigger a large amount of communication.
     # NOTE: At the time of writing the only backend available is a COO 
     #       assembly via PartitionedArrays.jl .
-    assembler = FerritePartitionedArrays.COOAssembler{Float64}(dh)
+    assembler = start_assemble(dh, MPIBackend())
 
     # For the local assembly nothing changes
     for cell in CellIterator(dh)
@@ -128,22 +127,6 @@ apply!(K, f, ch)
 # partly due to unimplemented multiplication operators for the matrix data type.
 u = cg(K, f)
 
-#FIXME #src
-# Compute the solution with HYPRE (needs the hotfix in https://github.com/fredrikekre/HYPRE.jl/pull/4 to function partially) #src
-# u_ = HYPRE.solve(                         #src
-#     HYPRE.PCG(                            #src
-#         global_comm(dgrid);               #src
-#         Precond = HYPRE.BoomerAMG()       #src
-#     ),                                    #src
-#     HYPRE.HYPREMatrix(K),                 #src
-#     HYPRE.HYPREVector(f)                  #src
-# )
-
-# Convert back to PartitionedArrays vector #src
-# u = PVector(0.0, K.cols) #src
-# copy!(u, u_) #src
-# PartitionedArrays.assemble!(u) #src
-
 # ### Exporting via PVTK
 # To visualize the result we export the grid and our field `u`
 # to a VTK-file, which can be viewed in e.g. [ParaView](https://www.paraview.org/).
diff --git a/docs/src/literate/distributed_assembly_hypre.jl b/docs/src/literate/distributed_assembly_hypre.jl
index b486bfeeb5..ff2933f8c3 100644
--- a/docs/src/literate/distributed_assembly_hypre.jl
+++ b/docs/src/literate/distributed_assembly_hypre.jl
@@ -23,7 +23,6 @@ MPI.Init()
 HYPRE.Init()
 
 function Ferrite.create_sparsity_pattern(::Type{<:HYPREMatrix}, dh::DofHandler, ch::Union{ConstraintHandler,Nothing}=nothing; kwargs...)
-    println("Ferrite.create_sparsity_pattern")
     K = create_sparsity_pattern(dh, ch; kwargs...)
     fill!(K.nzval, 1)
     return HYPREMatrix(K)
@@ -41,20 +40,20 @@ Ferrite.matrix_handle(a::HYPREAssembler) = a.A.A.A # :)
 Ferrite.vector_handle(a::HYPREAssembler) = a.A.b.b # :)
 
 function Ferrite.start_assemble(K::HYPREMatrix, f::HYPREVector)
-    println("Ferrite.start_assemble")
     return HYPREAssembler(HYPRE.start_assemble!(K, f))
 end
 
 function Ferrite.assemble!(a::HYPREAssembler, dofs::AbstractVector{<:Integer}, ke::AbstractMatrix, fe::AbstractVector)
-    println("Ferrite.assemble!")
     HYPRE.assemble!(a.A, dofs, ke, fe)
 end
 
+function Ferrite.end_assemble(a::HYPREAssembler)
+    HYPRE.finish_assemble!(a.A)
+end
 
 ## Methods for arrayutils.jl ##
 
 function Ferrite.addindex!(A::HYPREMatrix, v, i::Int, j::Int)
-    println("Ferrite.addindex!")
     nrows = HYPRE_Int(1)
     ncols = Ref{HYPRE_Int}(1)
     rows = Ref{HYPRE_BigInt}(i)
@@ -65,7 +64,6 @@ function Ferrite.addindex!(A::HYPREMatrix, v, i::Int, j::Int)
 end
 
 function Ferrite.addindex!(b::HYPREVector, v, i::Int)
-    println("Ferrite.addindex!")
     nvalues = HYPRE_Int(1)
     indices = Ref{HYPRE_BigInt}(i)
     values = Ref{HYPRE_Complex}(v)
@@ -98,12 +96,9 @@ cellvalues = CellScalarValues(qr, ip, ip_geo);
 # ### Degrees of freedom
 # To handle the dofs correctly we now utilize the `DistributedDofHandle` 
 # instead of the `DofHandler`. For the user the interface is the same.
-dh = FerritePartitionedArrays.DistributedDofHandler(dgrid)
+dh = DofHandler(dgrid)
 push!(dh, :u, 1, ip)
 close!(dh);
-# Hypre needs locally continuous indices.
-#TODO I think we can eliminate this one easily.
-# FerritePartitionedArrays.hypre_reorder!(dh);
 
 # ### Boundary conditions
 # Nothing has to be changed here either.
@@ -115,10 +110,6 @@ dbc_val = 0 #src
 dbc = Dirichlet(:u, ∂Ω, (x, t) -> dbc_val) #src
 add!(ch, dbc);
 close!(ch)
-# update!(ch, 0.0);
-
-my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
-# println("R$my_rank: prescribing $(ch.prescribed_dofs) on $∂Ω")
 
 # ### Assembling the linear system
 # Assembling the system works also mostly analogue.
@@ -127,24 +118,19 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArr
     Ke = zeros(n_basefuncs, n_basefuncs)
     fe = zeros(n_basefuncs)
 
-    # comm = global_comm(getglobalgrid(dh))
-    comm = MPI.COMM_WORLD
-    ltdofs = dh.ldof_to_gdof[dh.ldof_to_rank .== my_rank]
-    ilower = minimum(ltdofs)
-    iupper = maximum(ltdofs)
-    # ilower = dh.ldof_to_gdof[1]
-    # iupper = dh.ldof_to_gdof[1]+FerritePartitionedArrays.num_local_true_dofs(dh)-1
-    @show ilower, iupper
-    K = HYPREMatrix(comm, ilower, iupper)
-    f = HYPREVector(comm, ilower, iupper)
-
     # --------------------- Distributed assembly --------------------
     # The synchronization with the global sparse matrix is handled by 
     # an assembler again. You can choose from different backends, which
     # are described in the docs and will be expaned over time. This call
     # may trigger a large amount of communication.
-    # NOTE: At the time of writing the only backend available is a COO 
-    #       assembly via PartitionedArrays.jl .
+
+    # TODO how to put this into an interface.
+    dgrid = FerritePartitionedArrays.getglobalgrid(dh)
+    comm = FerritePartitionedArrays.global_comm(dgrid)
+    ldofrange = FerritePartitionedArrays.local_dof_range(dh)
+    K = HYPREMatrix(comm, first(ldofrange), last(ldofrange))
+    f = HYPREVector(comm, first(ldofrange), last(ldofrange))
+
     assembler = start_assemble(K, f)
 
     # For the local assembly nothing changes
@@ -173,13 +159,16 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArr
         end
 
         apply_local!(Ke, fe, celldofs(cell), ch)
+        
+        # TODO how to put this into an interface.
         Ferrite.assemble!(assembler, dh.ldof_to_gdof[celldofs(cell)], fe, Ke)
     end
 
-    # Finally, for the `PartitionedArraysCOOAssembler` we have to call
+    # Finally, for the `HYPREAssembler` we have to call
     # `end_assemble` to construct the global sparse matrix and the global
     # right hand side vector.
-    HYPRE.finish_assemble!(assembler.A)
+    end_assemble(assembler)
+
     return K, f
 end
 #md nothing # hide
@@ -192,6 +181,7 @@ precond = HYPRE.BoomerAMG()
 solver = HYPRE.PCG(; Precond = precond)
 uh = HYPRE.solve(solver, K, f)
 
+# TODO how to put this into an interface.
 # Copy solution from HYPRE to Julia
 uj = Vector{Float64}(undef, FerritePartitionedArrays.num_local_true_dofs(dh))
 copy!(uj, uh)
diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
index 7d9e2ceaf8..f327ea4a6b 100644
--- a/ext/FerritePartitionedArrays/DistributedDofHandler.jl
+++ b/ext/FerritePartitionedArrays/DistributedDofHandler.jl
@@ -26,7 +26,19 @@ struct DistributedDofHandler{dim,T,G<:Ferrite.AbstractDistributedGrid{dim}} <: F
     ldof_to_rank::Vector{Int32}
 end
 
-function DistributedDofHandler(grid::Ferrite.AbstractDistributedGrid{dim}) where {dim}
+"""
+Compute the global dof range of the dofs owned by the calling process. It is guaranteed to be continuous.
+"""
+function local_dof_range(dh::DistributedDofHandler)
+    my_rank = global_rank(getglobalgrid(dh))
+    ltdofs = dh.ldof_to_gdof[dh.ldof_to_rank .== my_rank]
+    return minimum(ltdofs):maximum(ltdofs)
+end
+
+"""
+Construct the correct distributed dof handler from a given distributed grid.
+"""
+function Ferrite.DofHandler(grid::Ferrite.AbstractDistributedGrid{dim}) where {dim}
     isconcretetype(getcelltype(grid)) || error("Grid includes different celltypes. DistributedMixedDofHandler not implemented yet.")
     DistributedDofHandler(Symbol[], Int[], Interpolation[], Ferrite.BCValues{Float64}[], Int[], Int[], Ferrite.ScalarWrapper(false), grid, Ferrite.ScalarWrapper(-1), Int[], Int32[])
 end
@@ -53,32 +65,14 @@ getglobalgrid(dh::DistributedDofHandler) = dh.grid
 # Compat layer against serial code
 Ferrite.getgrid(dh::DistributedDofHandler) = getlocalgrid(dh)
 
-# TODO this is copy pasta from DofHandler.jl
-function Ferrite.celldofs!(global_dofs::Vector{Int}, dh::DistributedDofHandler, i::Int)
-    @assert Ferrite.isclosed(dh)
-    @assert length(global_dofs) == ndofs_per_cell(dh, i)
-    unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], length(global_dofs))
-    return global_dofs
-end
-
-# TODO this is copy pasta from DofHandler.jl
-Ferrite.cellcoords!(global_coords::Vector{<:Vec}, dh::DistributedDofHandler, i::Int) = cellcoords!(global_coords, getgrid(dh), i)
-
-# TODO this is copy pasta from DofHandler.jl
-function Ferrite.celldofs(dh::DistributedDofHandler, i::Int)
-    @assert Ferrite.isclosed(dh)
-    n = ndofs_per_cell(dh, i)
-    global_dofs = zeros(Int, n)
-    unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], n)
-    return global_dofs
-end
-
+# TODO problem here is that the reorder has to be synchronized. We also cannot arbitrary reorder dofs, 
+# because some distributed matrix data structures have strict requirements on the orderings.
 Ferrite.renumber!(dh::DistributedDofHandler, perm::AbstractVector{<:Integer}) = error("Not implemented.")
 
 """
 TODO fix for shells
 """
-function compute_dof_ownership(dh)
+function compute_dof_ownership(dh::DistributedDofHandler)
     dgrid = getglobalgrid(dh)
     my_rank = global_rank(dgrid)
 
@@ -600,6 +594,7 @@ function local_to_global_numbering(dh::DistributedDofHandler)
 end
 
 function Ferrite.close!(dh::DistributedDofHandler)
+    # We could merge these functions into an optimized one if we want.
     Ferrite.__close!(dh)
     append!(dh.ldof_to_rank, compute_dof_ownership(dh))
     append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index f17b203e1b..e8daacad38 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -14,9 +14,6 @@ struct COOAssembler{T}
     👻remotes
     dh
 
-    # TODO PartitionedArrays backend as additional input arg
-    COOAssembler(dh::DistributedDofHandler) = COOAssembler{Float64}(dh)
-
     # TODO PartitionedArrays backend as additional input arg
     function COOAssembler{T}(dh::DistributedDofHandler) where {T}
         ldof_to_gdof = dh.ldof_to_gdof
@@ -43,10 +40,10 @@ struct COOAssembler{T}
 
         # Neighborhood graph
         # @TODO cleanup old code below and use graph primitives instead.
-        (source_len, destination_len, _) = MPI.Dist_graph_neighbors_count(vertex_comm(dgrid))
+        (source_len, destination_len, _) = MPI.Dist_graph_neighbors_count(interface_comm(dgrid))
         sources = Vector{Cint}(undef, source_len)
         destinations = Vector{Cint}(undef, destination_len)
-        MPI.Dist_graph_neighbors!(vertex_comm(dgrid), sources, destinations)
+        MPI.Dist_graph_neighbors!(interface_comm(dgrid), sources, destinations)
 
         # Adjust to Julia index convention
         sources .+= 1
@@ -210,7 +207,7 @@ struct COOAssembler{T}
 
         ghost_send_buffer_lengths = Int[length(i) for i ∈ ghost_dof_to_send]
         ghost_recv_buffer_lengths = zeros(Int, destination_len)
-        MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), vertex_comm(dgrid));
+        MPI.Neighbor_alltoall!(UBuffer(ghost_send_buffer_lengths,1), UBuffer(ghost_recv_buffer_lengths,1), interface_comm(dgrid));
         Ferrite.@debug for (i,ghost_recv_buffer_length) ∈ enumerate(ghost_recv_buffer_lengths)
             println("receiving $ghost_recv_buffer_length ghosts from $(sources[i])  (R$my_rank)")
         end
@@ -219,19 +216,19 @@ struct COOAssembler{T}
         # @TODO coalesce communication
         ghost_send_buffer_dofs = vcat(ghost_dof_to_send...)
         ghost_recv_buffer_dofs = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs,ghost_recv_buffer_lengths), interface_comm(dgrid))
 
         ghost_send_buffer_fields = vcat(ghost_dof_field_index_to_send...)
         ghost_recv_buffer_fields = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_fields,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_fields,ghost_recv_buffer_lengths), interface_comm(dgrid))
 
         ghost_send_buffer_ranks = vcat(ghost_rank_to_send...)
         ghost_recv_buffer_ranks = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_ranks,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_ranks,ghost_recv_buffer_lengths), interface_comm(dgrid))
 
         ghost_send_buffer_dofs_piv = vcat(ghost_dof_pivot_to_send...)
         ghost_recv_buffer_dofs_piv = zeros(Int, sum(ghost_recv_buffer_lengths))
-        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs_piv,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs_piv,ghost_recv_buffer_lengths), vertex_comm(dgrid))
+        MPI.Neighbor_alltoallv!(VBuffer(ghost_send_buffer_dofs_piv,ghost_send_buffer_lengths), VBuffer(ghost_recv_buffer_dofs_piv,ghost_recv_buffer_lengths), interface_comm(dgrid))
 
         # Reconstruct source ranks
         ghost_recv_buffer_source_ranks = Int[]
@@ -272,6 +269,8 @@ struct COOAssembler{T}
     end
 end
 
+Ferrite.start_assemble(dh::DistributedDofHandler, _::MPIBackend) = COOAssembler{Float64}(dh)
+    
 @propagate_inbounds function Ferrite.assemble!(a::COOAssembler{T}, edof::AbstractVector{Int}, Ke::AbstractMatrix{T}) where {T}
     n_dofs = length(edof)
     append!(a.V, Ke)
diff --git a/ext/FerritePartitionedArrays/grid.jl b/ext/FerritePartitionedArrays/grid.jl
index 4be334f7da..e252c48bce 100644
--- a/ext/FerritePartitionedArrays/grid.jl
+++ b/ext/FerritePartitionedArrays/grid.jl
@@ -55,8 +55,11 @@ Global dense communicator of the distributed grid.
 Graph communicator for shared vertices. Guaranteed to be derived from the communicator 
 returned by @global_comm .
 """
-@inline vertex_comm(dgrid::DistributedGrid) = dgrid.interface_comm
+@inline interface_comm(dgrid::DistributedGrid) = dgrid.interface_comm
 
+"""
+Get the rank on the global communicator of the distributed grid.
+"""
 @inline global_rank(dgrid::DistributedGrid) =  MPI.Comm_rank(global_comm(dgrid))+1
 
 """
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index 6a2b74dc64..f560200542 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -402,14 +402,14 @@ function __close!(dh::AbstractDofHandler)
     return dh, vertexdicts, edgedicts, facedicts
 end
 
-function celldofs!(global_dofs::Vector{Int}, dh::DofHandler, i::Int)
+function celldofs!(global_dofs::Vector{Int}, dh::AbstractDofHandler, i::Int)
     @assert isclosed(dh)
     @assert length(global_dofs) == ndofs_per_cell(dh, i)
     unsafe_copyto!(global_dofs, 1, dh.cell_dofs, dh.cell_dofs_offset[i], length(global_dofs))
     return global_dofs
 end
 
-function celldofs(dh::DofHandler, i::Int)
+function celldofs(dh::AbstractDofHandler, i::Int)
     @assert isclosed(dh)
     n = ndofs_per_cell(dh, i)
     global_dofs = zeros(Int, n)
diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index bcaba19c0e..34acc496d0 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -466,9 +466,9 @@ to a Node.
 @inline getnodes(grid::AbstractGrid, v::Union{Int, Vector{Int}}) = grid.nodes[v]
 @inline getnodes(grid::AbstractGrid, setname::String) = grid.nodes[collect(getnodeset(grid,setname))]
 "Returns the number of nodes in the grid."
-@inline getnnodes(grid::AbstractGrid) = length(grid.nodes)
+@inline getnnodes(grid::AbstractGrid) = length(getnodes(grid))
 "Returns the number of nodes of the `i`-th cell."
-@inline nnodes_per_cell(grid::AbstractGrid, i::Int=1) = nnodes(grid.cells[i])
+@inline nnodes_per_cell(grid::AbstractGrid, i::Int=1) = nnodes(getcells(grid, i))
 "Return the number type of the nodal coordinates."
 @inline get_coordinate_eltype(grid::AbstractGrid) = get_coordinate_eltype(first(getnodes(grid)))
 

From 071c7df0a61bf7f843ee921ce196dbd7e041634e Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Mon, 20 Feb 2023 15:39:02 +0100
Subject: [PATCH 111/124] Widen interface to new extensions.

---
 docs/src/literate/distributed_assembly.jl          | 10 +++++-----
 docs/src/literate/distributed_assembly_hypre.jl    |  8 ++++----
 .../literate/distributed_assembly_plasticity.jl    |  8 ++++----
 ext/FerritePartitionedArrays.jl                    | 14 --------------
 ext/FerritePartitionedArrays/vtk-export.jl         |  8 ++++----
 src/Export/VTK.jl                                  |  5 +++++
 src/exports.jl                                     |  8 ++++----
 7 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index ffd11c8e2d..57711e5c06 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -65,7 +65,7 @@ update!(ch, 0.0);
 my_rank = MPI.Comm_rank(MPI.COMM_WORLD)
 
 # ### Assembling the linear system
-# Assembling the system works also mostly analogue.
+# Assembling the system works also mostly analogue. Note that the dof handler type changed.
 function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArrays.DistributedDofHandler) where {dim}
     n_basefuncs = getnbasefunctions(cellvalues)
     Ke = zeros(n_basefuncs, n_basefuncs)
@@ -135,10 +135,10 @@ vtk_grid("heat_equation_distributed", dh) do vtk
     # For debugging purposes it can be helpful to enrich 
     # the visualization with some meta  information about 
     # the grid and its partitioning
-    FerritePartitionedArrays.vtk_shared_vertices(vtk, dgrid)
-    FerritePartitionedArrays.vtk_shared_faces(vtk, dgrid)
-    FerritePartitionedArrays.vtk_shared_edges(vtk, dgrid) #src
-    FerritePartitionedArrays.vtk_partitioning(vtk, dgrid)
+    vtk_shared_vertices(vtk, dgrid)
+    vtk_shared_faces(vtk, dgrid)
+    vtk_shared_edges(vtk, dgrid) #src
+    vtk_partitioning(vtk, dgrid)
 end
 
 map_parts(local_view(u, u.rows)) do u_local
diff --git a/docs/src/literate/distributed_assembly_hypre.jl b/docs/src/literate/distributed_assembly_hypre.jl
index ff2933f8c3..cfe6bf0ae5 100644
--- a/docs/src/literate/distributed_assembly_hypre.jl
+++ b/docs/src/literate/distributed_assembly_hypre.jl
@@ -198,10 +198,10 @@ vtk_grid("heat_equation_distributed", dh) do vtk
     # For debugging purposes it can be helpful to enrich 
     # the visualization with some meta  information about 
     # the grid and its partitioning
-    FerritePartitionedArrays.vtk_shared_vertices(vtk, dgrid)
-    FerritePartitionedArrays.vtk_shared_faces(vtk, dgrid)
-    FerritePartitionedArrays.vtk_shared_edges(vtk, dgrid) #src
-    FerritePartitionedArrays.vtk_partitioning(vtk, dgrid)
+    vtk_shared_vertices(vtk, dgrid)
+    vtk_shared_faces(vtk, dgrid)
+    vtk_shared_edges(vtk, dgrid) #src
+    vtk_partitioning(vtk, dgrid)
 end
 
 ## Test the result against the manufactured solution                    #src
diff --git a/docs/src/literate/distributed_assembly_plasticity.jl b/docs/src/literate/distributed_assembly_plasticity.jl
index 6cc5f71bac..7ffef3514f 100644
--- a/docs/src/literate/distributed_assembly_plasticity.jl
+++ b/docs/src/literate/distributed_assembly_plasticity.jl
@@ -60,7 +60,7 @@ end;
 # We create a DofHandler, with two fields, `:u` and `:p`,
 # with possibly different interpolations
 function create_dofhandler(grid, ipu, ipp)
-    dh = FerritePartitionedArrays.DistributedDofHandler(grid)
+    dh = DofHandler(grid)
     push!(dh, :u, 2, ipu) # displacement
     push!(dh, :p, 1, ipp) # pressure
     close!(dh)
@@ -91,7 +91,7 @@ function doassemble(cellvalues_u::CellVectorValues{dim}, cellvalues_p::CellScala
                     facevalues_u::FaceVectorValues{dim}, grid::FerritePartitionedArrays.DistributedGrid,
                     dh::FerritePartitionedArrays.DistributedDofHandler, mp::LinearElasticity) where {dim}
 
-    assembler = FerritePartitionedArrays.COOAssembler{Float64}(dh)
+    assembler = start_assemble(dh, MPIBackend())
     nu = getnbasefunctions(cellvalues_u)
     np = getnbasefunctions(cellvalues_p)
 
@@ -193,7 +193,7 @@ function solve(ν, interpolation_u, interpolation_p)
     dh = create_dofhandler(grid, interpolation_u, interpolation_p)
     dbc = create_bc(dh)
     vtk_grid("cook_dgrid", dh) do vtk
-        FerritePartitionedArrays.vtk_partitioning(vtk, grid)
+        vtk_partitioning(vtk, grid)
     end
     ## cellvalues
     cellvalues_u, cellvalues_p, facevalues_u = create_values(interpolation_u, interpolation_p)
@@ -208,7 +208,7 @@ function solve(ν, interpolation_u, interpolation_p)
                          "_linear"
     vtk_grid(filename, dh) do vtkfile
         vtk_point_data(vtkfile, dh, u)
-        FerritePartitionedArrays.vtk_partitioning(vtkfile, grid)
+        vtk_partitioning(vtkfile, grid)
     end
     return u
 end
diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index 7094ff0132..d5e2812777 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -15,19 +15,5 @@ include("FerritePartitionedArrays/iterators.jl")
 include("FerritePartitionedArrays/assembler.jl")
 include("FerritePartitionedArrays/constraints.jl")
 include("FerritePartitionedArrays/vtk-export.jl")
-
-export 
-    # assembler
-    COOAssembler,
-    # dofhandler
-    DistributedDofHandler,
-    # grid
-    DistributedGrid,
-    generate_distributed_grid,
-    # vtk-export
-    vtk_shared_vertices,
-    vtk_shared_faces,
-    vtk_shared_edges,
-    vtk_partitioning    
     
 end # module FerritePartitionedArrays
diff --git a/ext/FerritePartitionedArrays/vtk-export.jl b/ext/FerritePartitionedArrays/vtk-export.jl
index e6cd873178..afcc308d2b 100644
--- a/ext/FerritePartitionedArrays/vtk-export.jl
+++ b/ext/FerritePartitionedArrays/vtk-export.jl
@@ -23,7 +23,7 @@ end
 """
 Enrich the VTK file with meta information about shared vertices.
 """
-function vtk_shared_vertices(vtk, dgrid::DistributedGrid)
+function Ferrite.vtk_shared_vertices(vtk, dgrid::DistributedGrid)
     u = Vector{Float64}(undef, getnnodes(dgrid))
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
     for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
@@ -43,7 +43,7 @@ end
 """
 Enrich the VTK file with meta information about shared faces.
 """
-function vtk_shared_faces(vtk, dgrid::DistributedGrid)
+function Ferrite.vtk_shared_faces(vtk, dgrid::DistributedGrid)
     u = Vector{Float64}(undef, getnnodes(dgrid))
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
     for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
@@ -64,7 +64,7 @@ end
 """
 Enrich the VTK file with meta information about shared edges.
 """
-function vtk_shared_edges(vtk, dgrid::DistributedGrid)
+function Ferrite.vtk_shared_edges(vtk, dgrid::DistributedGrid)
     u = Vector{Float64}(undef, getnnodes(dgrid))
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
     for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
@@ -84,7 +84,7 @@ end
 """
 Enrich the VTK file with partitioning meta information.
 """
-function vtk_partitioning(vtk, dgrid::DistributedGrid)
+function Ferrite.vtk_partitioning(vtk, dgrid::DistributedGrid)
     u  = Vector{Float64}(undef, getncells(dgrid))
     u .= MPI.Comm_rank(global_comm(dgrid))+1
     vtk_cell_data(Ferrite.pvtkwrapper(vtk), u, "partitioning")
diff --git a/src/Export/VTK.jl b/src/Export/VTK.jl
index f0f12ba8c3..b2552d50e7 100644
--- a/src/Export/VTK.jl
+++ b/src/Export/VTK.jl
@@ -130,3 +130,8 @@ function WriteVTK.vtk_point_data(vtkfile, dh::AbstractDofHandler, u::Vector, suf
 
     return vtkfile
 end
+
+vtk_shared_vertices(vtk, ::AbstractDistributedGrid) = error("Not implemented.")
+vtk_shared_faces(vtk, ::AbstractDistributedGrid) = error("Not implemented.")
+vtk_shared_edges(vtk, ::AbstractDistributedGrid) = error("Not implemented.")
+vtk_partitioning(vtk, ::AbstractDistributedGrid) = error("Not implemented.")
diff --git a/src/exports.jl b/src/exports.jl
index 8e618a71a5..1019f12150 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -169,10 +169,10 @@ export
     vtk_nodeset,
     vtk_cellset,
     vtk_save,
-    # vtk_shared_vertices,
-    # vtk_shared_faces,
-    # vtk_shared_edges,
-    # vtk_partitioning,
+    vtk_shared_vertices,
+    vtk_shared_faces,
+    vtk_shared_edges,
+    vtk_partitioning,
 
 # L2 Projection
     project,

From fa6f3dfe4e2a2f18a318fcd8cb27fbd15fd5d808 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Mon, 20 Feb 2023 16:22:58 +0100
Subject: [PATCH 112/124] Acutally split up packages.

---
 Project.toml                                  |  4 ++++
 docs/src/literate/distributed_assembly.jl     | 12 +++++-----
 .../literate/distributed_assembly_hypre.jl    | 24 +++++++++----------
 .../distributed_assembly_plasticity.jl        |  8 +++----
 ext/FerriteMPI.jl                             | 16 +++++++++++++
 .../DistributedDofHandler.jl                  |  0
 .../grid.jl                                   |  0
 .../iterators.jl                              |  0
 .../vtk-export.jl                             |  0
 ext/FerritePartitionedArrays.jl               |  5 ----
 ext/FerritePartitionedArrays/assembler.jl     |  8 +++++--
 11 files changed, 48 insertions(+), 29 deletions(-)
 create mode 100644 ext/FerriteMPI.jl
 rename ext/{FerritePartitionedArrays => FerriteMPI}/DistributedDofHandler.jl (100%)
 rename ext/{FerritePartitionedArrays => FerriteMPI}/grid.jl (100%)
 rename ext/{FerritePartitionedArrays => FerriteMPI}/iterators.jl (100%)
 rename ext/{FerritePartitionedArrays => FerriteMPI}/vtk-export.jl (100%)

diff --git a/Project.toml b/Project.toml
index 3f60b6163f..faf7e784cb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,18 +15,22 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 
 [weakdeps]
 BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
+HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
 Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 
 [extensions]
 FerriteBlockArrays = "BlockArrays"
+FerriteHYPRE = ["MPI", "Metis", "HYPRE"]
 FerriteMetis = "Metis"
+FerriteMPI = ["MPI", "Metis"]
 FerritePartitionedArrays = ["MPI", "Metis", "PartitionedArrays"]
 
 [compat]
 BlockArrays = "0.16"
 EnumX = "1"
+HYPRE = "^1.4.0"
 Metis = "1.3"
 MPI = "^0.20.2"
 NearestNeighbors = "0.4"
diff --git a/docs/src/literate/distributed_assembly.jl b/docs/src/literate/distributed_assembly.jl
index 57711e5c06..c32b279720 100644
--- a/docs/src/literate/distributed_assembly.jl
+++ b/docs/src/literate/distributed_assembly.jl
@@ -18,17 +18,17 @@ using Ferrite, MPI
 using IterativeSolvers #, HYPRE
 using PartitionedArrays, Metis #src
 
-FerritePartitionedArrays = Base.get_extension(Ferrite, :FerritePartitionedArrays)
+FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
 
 # Launch MPI
 MPI.Init()
 
 # We start generating a simple grid with 20x20 quadrilateral elements
 # and distribute it across our processors using `generate_distributed_grid`. 
-# dgrid = FerritePartitionedArrays.generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
-# dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (2, 2, 2));
-dgrid = FerritePartitionedArrays.generate_distributed_grid(Hexahedron, (2, 2, 2)); #src
-# dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
+# dgrid = FerriteMPI.generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
+# dgrid = FerriteMPI.generate_distributed_grid(Tetrahedron, (2, 2, 2));
+dgrid = FerriteMPI.generate_distributed_grid(Hexahedron, (2, 2, 2)); #src
+# dgrid = FerriteMPI.generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
 
 # ### Trial and test functions
 # Nothing changes here.
@@ -66,7 +66,7 @@ my_rank = MPI.Comm_rank(MPI.COMM_WORLD)
 
 # ### Assembling the linear system
 # Assembling the system works also mostly analogue. Note that the dof handler type changed.
-function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArrays.DistributedDofHandler) where {dim}
+function doassemble(cellvalues::CellScalarValues{dim}, dh::FerriteMPI.DistributedDofHandler) where {dim}
     n_basefuncs = getnbasefunctions(cellvalues)
     Ke = zeros(n_basefuncs, n_basefuncs)
     fe = zeros(n_basefuncs)
diff --git a/docs/src/literate/distributed_assembly_hypre.jl b/docs/src/literate/distributed_assembly_hypre.jl
index cfe6bf0ae5..dfc4e8e5fb 100644
--- a/docs/src/literate/distributed_assembly_hypre.jl
+++ b/docs/src/literate/distributed_assembly_hypre.jl
@@ -71,14 +71,14 @@ function Ferrite.addindex!(b::HYPREVector, v, i::Int)
     return b
 end
 
-FerritePartitionedArrays = Base.get_extension(Ferrite, :FerritePartitionedArrays)
+FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
 
 # We start generating a simple grid with 20x20 quadrilateral elements
 # and distribute it across our processors using `generate_distributed_grid`. 
-# dgrid = FerritePartitionedArrays.generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
-# dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (2, 2, 2));
-dgrid = FerritePartitionedArrays.generate_distributed_grid(Hexahedron, (10, 10, 10)); #src
-# dgrid = FerritePartitionedArrays.generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
+# dgrid = FerriteMPI.generate_distributed_grid(QuadraticQuadrilateral, (3, 1));
+# dgrid = FerriteMPI.generate_distributed_grid(Tetrahedron, (2, 2, 2));
+dgrid = FerriteMPI.generate_distributed_grid(Hexahedron, (10, 10, 10)); #src
+# dgrid = FerriteMPI.generate_distributed_grid(Tetrahedron, (3, 3, 3)); #src
 
 # ### Trial and test functions
 # Nothing changes here.
@@ -113,7 +113,7 @@ close!(ch)
 
 # ### Assembling the linear system
 # Assembling the system works also mostly analogue.
-function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArrays.DistributedDofHandler, ch::ConstraintHandler) where {dim}
+function doassemble(cellvalues::CellScalarValues{dim}, dh::FerriteMPI.DistributedDofHandler, ch::ConstraintHandler) where {dim}
     n_basefuncs = getnbasefunctions(cellvalues)
     Ke = zeros(n_basefuncs, n_basefuncs)
     fe = zeros(n_basefuncs)
@@ -125,9 +125,9 @@ function doassemble(cellvalues::CellScalarValues{dim}, dh::FerritePartitionedArr
     # may trigger a large amount of communication.
 
     # TODO how to put this into an interface.
-    dgrid = FerritePartitionedArrays.getglobalgrid(dh)
-    comm = FerritePartitionedArrays.global_comm(dgrid)
-    ldofrange = FerritePartitionedArrays.local_dof_range(dh)
+    dgrid = FerriteMPI.getglobalgrid(dh)
+    comm = FerriteMPI.global_comm(dgrid)
+    ldofrange = FerriteMPI.local_dof_range(dh)
     K = HYPREMatrix(comm, first(ldofrange), last(ldofrange))
     f = HYPREVector(comm, first(ldofrange), last(ldofrange))
 
@@ -183,12 +183,12 @@ uh = HYPRE.solve(solver, K, f)
 
 # TODO how to put this into an interface.
 # Copy solution from HYPRE to Julia
-uj = Vector{Float64}(undef, FerritePartitionedArrays.num_local_true_dofs(dh))
+uj = Vector{Float64}(undef, FerriteMPI.num_local_true_dofs(dh))
 copy!(uj, uh)
 
 # And convert from HYPRE to Ferrite
-u_local = Vector{Float64}(undef, FerritePartitionedArrays.num_local_dofs(dh))
-FerritePartitionedArrays.hypre_to_ferrite!(u_local, uj, dh)
+u_local = Vector{Float64}(undef, FerriteMPI.num_local_dofs(dh))
+FerriteMPI.hypre_to_ferrite!(u_local, uj, dh)
 
 # # ### Exporting via PVTK
 # # To visualize the result we export the grid and our field `u`
diff --git a/docs/src/literate/distributed_assembly_plasticity.jl b/docs/src/literate/distributed_assembly_plasticity.jl
index 7ffef3514f..21df76f1bb 100644
--- a/docs/src/literate/distributed_assembly_plasticity.jl
+++ b/docs/src/literate/distributed_assembly_plasticity.jl
@@ -19,7 +19,7 @@ using IterativeSolvers
 using PartitionedArrays, Metis
 using SparseArrays, BlockArrays
 
-FerritePartitionedArrays = Base.get_extension(Ferrite, :FerritePartitionedArrays)
+FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
 
 # Launch MPI
 MPI.Init()
@@ -34,7 +34,7 @@ function create_cook_grid(nx, ny)
     ## facesets for boundary conditions
     addfaceset!(grid, "clamped", x -> norm(x[1]) ≈ 0.0);
     addfaceset!(grid, "traction", x -> norm(x[1]) ≈ 48.0);
-    return FerritePartitionedArrays.DistributedGrid(grid)
+    return FerriteMPI.DistributedGrid(grid)
 end;
 
 # Next we define a function to set up our cell- and facevalues.
@@ -88,8 +88,8 @@ end
 # element matrix. Since Ferrite does not force us to use any particular matrix type we will
 # use a `PseudoBlockArray` from `BlockArrays.jl`.
 function doassemble(cellvalues_u::CellVectorValues{dim}, cellvalues_p::CellScalarValues{dim},
-                    facevalues_u::FaceVectorValues{dim}, grid::FerritePartitionedArrays.DistributedGrid,
-                    dh::FerritePartitionedArrays.DistributedDofHandler, mp::LinearElasticity) where {dim}
+                    facevalues_u::FaceVectorValues{dim}, grid::FerriteMPI.DistributedGrid,
+                    dh::FerriteMPI.DistributedDofHandler, mp::LinearElasticity) where {dim}
 
     assembler = start_assemble(dh, MPIBackend())
     nu = getnbasefunctions(cellvalues_u)
diff --git a/ext/FerriteMPI.jl b/ext/FerriteMPI.jl
new file mode 100644
index 0000000000..93a29d7b11
--- /dev/null
+++ b/ext/FerriteMPI.jl
@@ -0,0 +1,16 @@
+"""
+Module containing the code for a non-overlapping grid and the corresponding dof management via MPI.
+"""
+module FerriteMPI
+
+using Ferrite
+using Metis
+using MPI
+using Base: @propagate_inbounds
+
+include("FerriteMPI/grid.jl")
+include("FerriteMPI/DistributedDofHandler.jl")
+include("FerriteMPI/iterators.jl")
+include("FerriteMPI/vtk-export.jl")
+    
+end # module FerriteMPI
diff --git a/ext/FerritePartitionedArrays/DistributedDofHandler.jl b/ext/FerriteMPI/DistributedDofHandler.jl
similarity index 100%
rename from ext/FerritePartitionedArrays/DistributedDofHandler.jl
rename to ext/FerriteMPI/DistributedDofHandler.jl
diff --git a/ext/FerritePartitionedArrays/grid.jl b/ext/FerriteMPI/grid.jl
similarity index 100%
rename from ext/FerritePartitionedArrays/grid.jl
rename to ext/FerriteMPI/grid.jl
diff --git a/ext/FerritePartitionedArrays/iterators.jl b/ext/FerriteMPI/iterators.jl
similarity index 100%
rename from ext/FerritePartitionedArrays/iterators.jl
rename to ext/FerriteMPI/iterators.jl
diff --git a/ext/FerritePartitionedArrays/vtk-export.jl b/ext/FerriteMPI/vtk-export.jl
similarity index 100%
rename from ext/FerritePartitionedArrays/vtk-export.jl
rename to ext/FerriteMPI/vtk-export.jl
diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index d5e2812777..3942f7e3a5 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -4,16 +4,11 @@ Module containing the code for distributed assembly via PartitionedArrays.jl
 module FerritePartitionedArrays
 
 using Ferrite
-using Metis
 using MPI
 using PartitionedArrays
 using Base: @propagate_inbounds
 
-include("FerritePartitionedArrays/grid.jl")
-include("FerritePartitionedArrays/DistributedDofHandler.jl")
-include("FerritePartitionedArrays/iterators.jl")
 include("FerritePartitionedArrays/assembler.jl")
 include("FerritePartitionedArrays/constraints.jl")
-include("FerritePartitionedArrays/vtk-export.jl")
     
 end # module FerritePartitionedArrays
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index e8daacad38..7bdb3aaf3c 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -15,7 +15,9 @@ struct COOAssembler{T}
     dh
 
     # TODO PartitionedArrays backend as additional input arg
-    function COOAssembler{T}(dh::DistributedDofHandler) where {T}
+    # TODO fix type
+    #function COOAssembler{T}(dh::FerriteMPI.DistributedDofHandler) where {T}
+    function COOAssembler{T}(dh) where {T}
         ldof_to_gdof = dh.ldof_to_gdof
         ldof_to_rank = dh.ldof_to_rank
         nldofs = num_local_dofs(dh)
@@ -269,7 +271,9 @@ struct COOAssembler{T}
     end
 end
 
-Ferrite.start_assemble(dh::DistributedDofHandler, _::MPIBackend) = COOAssembler{Float64}(dh)
+# TODO fix type
+# Ferrite.start_assemble(dh::FerriteMPI.DistributedDofHandler, _::MPIBackend) = COOAssembler{Float64}(dh)
+Ferrite.start_assemble(dh, _::MPIBackend) = COOAssembler{Float64}(dh)
     
 @propagate_inbounds function Ferrite.assemble!(a::COOAssembler{T}, edof::AbstractVector{Int}, Ke::AbstractMatrix{T}) where {T}
     n_dofs = length(edof)

From 93eeac40c1753e768365ebb8004f3c27fa87d00f Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Mon, 20 Feb 2023 16:57:08 +0100
Subject: [PATCH 113/124] First attempt at splitting extensions with transient
 deps.

---
 ext/FerriteMPI.jl                          |  2 ++
 ext/FerriteMPI/DistributedDofHandler.jl    | 30 +++++++++-------------
 ext/FerriteMPI/grid.jl                     |  8 +++---
 ext/FerriteMPI/vtk-export.jl               | 15 +++++------
 ext/FerritePartitionedArrays.jl            |  3 +++
 ext/FerritePartitionedArrays/export-vtk.jl |  7 +++++
 src/Dofs/DofHandler.jl                     |  7 +++++
 src/Grid/DistributedGrid.jl                |  7 +++++
 8 files changed, 49 insertions(+), 30 deletions(-)
 create mode 100644 ext/FerritePartitionedArrays/export-vtk.jl

diff --git a/ext/FerriteMPI.jl b/ext/FerriteMPI.jl
index 93a29d7b11..4bcabcecd8 100644
--- a/ext/FerriteMPI.jl
+++ b/ext/FerriteMPI.jl
@@ -4,6 +4,8 @@ Module containing the code for a non-overlapping grid and the corresponding dof
 module FerriteMPI
 
 using Ferrite
+# TODO remove me. These are merely hotfixes to split the extensions trasiently via an internal API.
+import Ferrite: getglobalgrid, num_global_dofs, num_global_dofs, num_local_dofs, num_local_true_dofs, global_comm, interface_comm, global_rank, compute_owner, local_dof_range
 using Metis
 using MPI
 using Base: @propagate_inbounds
diff --git a/ext/FerriteMPI/DistributedDofHandler.jl b/ext/FerriteMPI/DistributedDofHandler.jl
index f327ea4a6b..cc95fd90a4 100644
--- a/ext/FerriteMPI/DistributedDofHandler.jl
+++ b/ext/FerriteMPI/DistributedDofHandler.jl
@@ -29,7 +29,7 @@ end
 """
 Compute the global dof range of the dofs owned by the calling process. It is guaranteed to be continuous.
 """
-function local_dof_range(dh::DistributedDofHandler)
+function Ferrite.local_dof_range(dh::DistributedDofHandler)
     my_rank = global_rank(getglobalgrid(dh))
     ltdofs = dh.ldof_to_gdof[dh.ldof_to_rank .== my_rank]
     return minimum(ltdofs):maximum(ltdofs)
@@ -60,7 +60,7 @@ end
 Ferrite.getdim(dh::DistributedDofHandler{dim}) where {dim} = dim 
 
 Ferrite.getlocalgrid(dh::DistributedDofHandler) = Ferrite.getlocalgrid(dh.grid)
-getglobalgrid(dh::DistributedDofHandler) = dh.grid
+Ferrite.getglobalgrid(dh::DistributedDofHandler) = dh.grid
 
 # Compat layer against serial code
 Ferrite.getgrid(dh::DistributedDofHandler) = getlocalgrid(dh)
@@ -112,17 +112,17 @@ end
 """
 Compute the number of dofs owned by the current process.
 """
-num_local_true_dofs(dh::DistributedDofHandler) = sum(dh.ldof_to_rank .== global_rank(getglobalgrid(dh)))
+Ferrite.num_local_true_dofs(dh::DistributedDofHandler) = sum(dh.ldof_to_rank .== global_rank(getglobalgrid(dh)))
 
 """
 Compute the number of dofs visible to the current process.
 """
-num_local_dofs(dh::DistributedDofHandler) = length(dh.ldof_to_gdof)
+Ferrite.num_local_dofs(dh::DistributedDofHandler) = length(dh.ldof_to_gdof)
 
 """
 Compute the number of dofs in the global system.
 """
-num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(num_local_true_dofs(dh), MPI.SUM, global_comm(getglobalgrid(dh)))
+Ferrite.num_global_dofs(dh::DistributedDofHandler) = MPI.Allreduce(num_local_true_dofs(dh), MPI.SUM, global_comm(getglobalgrid(dh)))
 
 """
 Renumber the dofs in local ordering to their corresponding global numbering.
@@ -625,14 +625,14 @@ function hypre_to_ferrite!(u, x, dh)
     end
 
     # TODO speed this up and better API
-    dgrid = FerritePartitionedArrays.getglobalgrid(dh)
+    dgrid = getglobalgrid(dh)
     for (lvi, sv) ∈ get_shared_vertices(dgrid)
-        my_rank != FerritePartitionedArrays.compute_owner(dgrid, sv) && continue
+        my_rank != compute_owner(dgrid, sv) && continue
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_vertex_dofs(dh, field_idx, lvi)
                 local_dofs = Ferrite.vertex_dofs(dh, field_idx, lvi)
                 global_dofs = dh.ldof_to_gdof[local_dofs]
-                for receiver_rank ∈ keys(FerritePartitionedArrays.remote_entities(sv))
+                for receiver_rank ∈ keys(remote_entities(sv))
                     for i ∈ 1:length(global_dofs)
                         # Note that u already has the correct values for all locally owned dofs due to the loop above!
                         gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
@@ -643,12 +643,12 @@ function hypre_to_ferrite!(u, x, dh)
     end
 
     for (lvi, se) ∈ get_shared_edges(dgrid)
-        my_rank != FerritePartitionedArrays.compute_owner(dgrid, se) && continue
+        my_rank != compute_owner(dgrid, se) && continue
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_edge_dofs(dh, field_idx, lvi)
                 local_dofs = Ferrite.edge_dofs(dh, field_idx, lvi)
                 global_dofs = dh.ldof_to_gdof[local_dofs]
-                for receiver_rank ∈ keys(FerritePartitionedArrays.remote_entities(se))
+                for receiver_rank ∈ keys(remote_entities(se))
                     for i ∈ 1:length(global_dofs)
                         # Note that u already has the correct values for all locally owned dofs due to the loop above!
                         gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
@@ -659,12 +659,12 @@ function hypre_to_ferrite!(u, x, dh)
     end
     
     for (lvi, sf) ∈ get_shared_faces(dgrid)
-        my_rank != FerritePartitionedArrays.compute_owner(dgrid, sf) && continue
+        my_rank != compute_owner(dgrid, sf) && continue
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_face_dofs(dh, field_idx, lvi)
                 local_dofs = Ferrite.face_dofs(dh, field_idx, lvi)
                 global_dofs = dh.ldof_to_gdof[local_dofs]
-                for receiver_rank ∈ keys(FerritePartitionedArrays.remote_entities(sf))
+                for receiver_rank ∈ keys(remote_entities(sf))
                     for i ∈ 1:length(global_dofs)
                         # Note that u already has the correct values for all locally owned dofs due to the loop above!
                         gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
@@ -714,9 +714,3 @@ function hypre_to_ferrite!(u, x, dh)
 
     return u
 end
-
-
-
-function WriteVTK.vtk_grid(filename::AbstractString, dh::DistributedDofHandler; compress::Bool=true)
-    vtk_grid(filename, getglobalgrid(dh); compress=compress)
-end
diff --git a/ext/FerriteMPI/grid.jl b/ext/FerriteMPI/grid.jl
index e252c48bce..f48ed9be8a 100644
--- a/ext/FerriteMPI/grid.jl
+++ b/ext/FerriteMPI/grid.jl
@@ -49,18 +49,18 @@ end
 """
 Global dense communicator of the distributed grid.
 """
-@inline global_comm(dgrid::DistributedGrid) = dgrid.grid_comm
+@inline Ferrite.global_comm(dgrid::DistributedGrid) = dgrid.grid_comm
 
 """
 Graph communicator for shared vertices. Guaranteed to be derived from the communicator 
 returned by @global_comm .
 """
-@inline interface_comm(dgrid::DistributedGrid) = dgrid.interface_comm
+@inline Ferrite.interface_comm(dgrid::DistributedGrid) = dgrid.interface_comm
 
 """
 Get the rank on the global communicator of the distributed grid.
 """
-@inline global_rank(dgrid::DistributedGrid) =  MPI.Comm_rank(global_comm(dgrid))+1
+@inline Ferrite.global_rank(dgrid::DistributedGrid) =  MPI.Comm_rank(global_comm(dgrid))+1
 
 """
 """
@@ -332,7 +332,7 @@ end
 
 
 # Here we define the entity ownership by the process sharing an entity with lowest rank in the grid communicator.
-function compute_owner(dgrid::Ferrite.AbstractDistributedGrid, shared_entity::Ferrite.SharedEntity)::Int32
+function Ferrite.compute_owner(dgrid::Ferrite.AbstractDistributedGrid, shared_entity::Ferrite.SharedEntity)::Int32
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1 # Shift rank up by 1 to match Julia's indexing convention
     return minimum([my_rank; [remote_rank for (remote_rank, _) ∈ remote_entities(shared_entity)]])
 end
diff --git a/ext/FerriteMPI/vtk-export.jl b/ext/FerriteMPI/vtk-export.jl
index afcc308d2b..b887d65e68 100644
--- a/ext/FerriteMPI/vtk-export.jl
+++ b/ext/FerriteMPI/vtk-export.jl
@@ -1,3 +1,10 @@
+"""
+"""
+function WriteVTK.vtk_grid(filename::AbstractString, dh::DistributedDofHandler; compress::Bool=true)
+    vtk_grid(filename, getglobalgrid(dh); compress=compress)
+end
+
+
 """
 """
 function WriteVTK.vtk_grid(filename::AbstractString, dgrid::DistributedGrid{dim,C,T}; compress::Bool=true) where {dim,C,T}
@@ -12,14 +19,6 @@ function WriteVTK.vtk_grid(filename::AbstractString, dgrid::DistributedGrid{dim,
     return pvtk_grid(filename, coords, cls; part=part, nparts=nparts, compress=compress)
 end
 
-"""
-"""
-function WriteVTK.vtk_point_data(vtk, dh::Ferrite.AbstractDofHandler, u::PVector)
-    map_parts(local_view(u, u.rows)) do u_local
-        vtk_point_data(Ferrite.pvtkwrapper(vtk), dh, u_local)
-    end
-end
-
 """
 Enrich the VTK file with meta information about shared vertices.
 """
diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index 3942f7e3a5..8a397ba0ef 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -4,11 +4,14 @@ Module containing the code for distributed assembly via PartitionedArrays.jl
 module FerritePartitionedArrays
 
 using Ferrite
+# TODO remove me. These are merely hotfixes to split the extensions trasiently via an internal API.
+import Ferrite: getglobalgrid, num_global_dofs, num_global_dofs, num_local_true_dofs, num_local_dofs, global_comm, interface_comm, global_rank, compute_owner
 using MPI
 using PartitionedArrays
 using Base: @propagate_inbounds
 
 include("FerritePartitionedArrays/assembler.jl")
 include("FerritePartitionedArrays/constraints.jl")
+include("FerritePartitionedArrays/export-vtk.jl")
     
 end # module FerritePartitionedArrays
diff --git a/ext/FerritePartitionedArrays/export-vtk.jl b/ext/FerritePartitionedArrays/export-vtk.jl
new file mode 100644
index 0000000000..4daa7fcd58
--- /dev/null
+++ b/ext/FerritePartitionedArrays/export-vtk.jl
@@ -0,0 +1,7 @@
+"""
+"""
+function WriteVTK.vtk_point_data(vtk, dh::Ferrite.AbstractDofHandler, u::PVector)
+    map_parts(local_view(u, u.rows)) do u_local
+        vtk_point_data(Ferrite.pvtkwrapper(vtk), dh, u_local)
+    end
+end
\ No newline at end of file
diff --git a/src/Dofs/DofHandler.jl b/src/Dofs/DofHandler.jl
index f560200542..8c3aedd644 100644
--- a/src/Dofs/DofHandler.jl
+++ b/src/Dofs/DofHandler.jl
@@ -150,6 +150,13 @@ getfielddim(dh::AbstractDofHandler, field_idx::Int) = dh.field_dims[field_idx]
 getbcvalue(dh::AbstractDofHandler, field_idx::Int) = dh.bc_values[field_idx]
 getgrid(dh::AbstractDofHandler) = dh.grid
 
+# INTERNAL API - MIGHT BE MOVED IN THE FUTURE
+num_local_true_dofs(dh::AbstractDofHandler) = dh.ndofs
+num_local_dofs(dh::AbstractDofHandler) = dh.ndofs
+num_global_dofs(dh::AbstractDofHandler) = error("Not implemented.")
+getglobalgrid(dh::AbstractDofHandler) = error("Not implemented.")
+local_dof_range(dh::AbstractDofHandler) = error("Not implemented.")
+
 function find_field(dh::AbstractDofHandler, field_name::Symbol)
     j = findfirst(i->i == field_name, dh.field_names)
     j === nothing && error("could not find field :$field_name in DofHandler (existing fields: $(getfieldnames(dh)))")
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 17bca38120..3c2253f420 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -2,6 +2,13 @@
 """
 abstract type AbstractDistributedGrid{sdim} <: AbstractGrid{sdim} end
 
+#TODO remove. This is a temporary workaround to make the transient extensions work.
+global_comm(::AbstractDistributedGrid) = error("Not implemented.")
+interface_comm(::AbstractDistributedGrid) = error("Not implemented.")
+global_rank(::AbstractDistributedGrid) = error("Not implemented.")
+compute_owner(::AbstractDistributedGrid) = error("Not implemented.")
+
+
 """
 """
 abstract type SharedEntity end

From cacebad0b2675733de3ef37e685948c9ed4252b9 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Mon, 20 Feb 2023 19:51:59 +0100
Subject: [PATCH 114/124] Finalize extension split.

---
 .../literate/distributed_assembly_hypre.jl    |  63 +---------
 ext/FerriteHYPRE.jl                           |  20 +++
 ext/FerriteHYPRE/assembler.jl                 |  48 ++++++++
 ext/FerriteHYPRE/conversion.jl                | 116 ++++++++++++++++++
 ext/FerriteMPI.jl                             |   8 +-
 ext/FerriteMPI/DistributedDofHandler.jl       | 114 -----------------
 ext/FerriteMPI/grid.jl                        |   6 +-
 ext/FerritePartitionedArrays.jl               |   8 +-
 ext/FerritePartitionedArrays/assembler.jl     |   3 +
 src/Grid/DistributedGrid.jl                   |   8 +-
 10 files changed, 212 insertions(+), 182 deletions(-)
 create mode 100644 ext/FerriteHYPRE.jl
 create mode 100644 ext/FerriteHYPRE/assembler.jl
 create mode 100644 ext/FerriteHYPRE/conversion.jl

diff --git a/docs/src/literate/distributed_assembly_hypre.jl b/docs/src/literate/distributed_assembly_hypre.jl
index dfc4e8e5fb..d08f1c466f 100644
--- a/docs/src/literate/distributed_assembly_hypre.jl
+++ b/docs/src/literate/distributed_assembly_hypre.jl
@@ -14,64 +14,16 @@
 #md # The full program, without comments, can be found in the next [section](@ref heat_equation-plain-program).
 #
 # First we load Ferrite, and some other packages we need
-using Ferrite, MPI
-using HYPRE
-using PartitionedArrays, Metis
+using Ferrite
+using MPI, Metis # Loads FerriteMPI 
+using HYPRE # Loads FerriteHYPRE
 
 # Launch MPI
 MPI.Init()
 HYPRE.Init()
 
-function Ferrite.create_sparsity_pattern(::Type{<:HYPREMatrix}, dh::DofHandler, ch::Union{ConstraintHandler,Nothing}=nothing; kwargs...)
-    K = create_sparsity_pattern(dh, ch; kwargs...)
-    fill!(K.nzval, 1)
-    return HYPREMatrix(K)
-end
-
-###########################################
-## HYPREAssembler and associated methods ##
-###########################################
-
-struct HYPREAssembler <: Ferrite.AbstractSparseAssembler
-    A::HYPRE.HYPREAssembler
-end
-
-Ferrite.matrix_handle(a::HYPREAssembler) = a.A.A.A # :)
-Ferrite.vector_handle(a::HYPREAssembler) = a.A.b.b # :)
-
-function Ferrite.start_assemble(K::HYPREMatrix, f::HYPREVector)
-    return HYPREAssembler(HYPRE.start_assemble!(K, f))
-end
-
-function Ferrite.assemble!(a::HYPREAssembler, dofs::AbstractVector{<:Integer}, ke::AbstractMatrix, fe::AbstractVector)
-    HYPRE.assemble!(a.A, dofs, ke, fe)
-end
-
-function Ferrite.end_assemble(a::HYPREAssembler)
-    HYPRE.finish_assemble!(a.A)
-end
-
-## Methods for arrayutils.jl ##
-
-function Ferrite.addindex!(A::HYPREMatrix, v, i::Int, j::Int)
-    nrows = HYPRE_Int(1)
-    ncols = Ref{HYPRE_Int}(1)
-    rows = Ref{HYPRE_BigInt}(i)
-    cols = Ref{HYPRE_BigInt}(j)
-    values = Ref{HYPRE_Complex}(v)
-    HYPRE.@check HYPRE_IJMatrixAddToValues(A.ijmatrix, nrows, ncols, rows, cols, values)
-    return A
-end
-
-function Ferrite.addindex!(b::HYPREVector, v, i::Int)
-    nvalues = HYPRE_Int(1)
-    indices = Ref{HYPRE_BigInt}(i)
-    values = Ref{HYPRE_Complex}(v)
-    HYPRE.@check HYPRE_IJVectorAddToValues(b.ijvector, nvalues, indices, values)
-    return b
-end
-
 FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
+FerriteHYPRE = Base.get_extension(Ferrite, :FerriteHYPRE)
 
 # We start generating a simple grid with 20x20 quadrilateral elements
 # and distribute it across our processors using `generate_distributed_grid`. 
@@ -181,14 +133,9 @@ precond = HYPRE.BoomerAMG()
 solver = HYPRE.PCG(; Precond = precond)
 uh = HYPRE.solve(solver, K, f)
 
-# TODO how to put this into an interface.
-# Copy solution from HYPRE to Julia
-uj = Vector{Float64}(undef, FerriteMPI.num_local_true_dofs(dh))
-copy!(uj, uh)
-
 # And convert from HYPRE to Ferrite
 u_local = Vector{Float64}(undef, FerriteMPI.num_local_dofs(dh))
-FerriteMPI.hypre_to_ferrite!(u_local, uj, dh)
+FerriteHYPRE.hypre_to_ferrite!(u_local, uh, dh)
 
 # # ### Exporting via PVTK
 # # To visualize the result we export the grid and our field `u`
diff --git a/ext/FerriteHYPRE.jl b/ext/FerriteHYPRE.jl
new file mode 100644
index 0000000000..e3a12514b8
--- /dev/null
+++ b/ext/FerriteHYPRE.jl
@@ -0,0 +1,20 @@
+"""
+Module containing the code for distributed assembly via HYPRE.jl
+"""
+module FerriteHYPRE
+
+using Ferrite
+# TODO remove me. These are merely hotfixes to split the extensions trasiently via an internal API.
+import Ferrite: getglobalgrid, num_global_dofs, num_global_dofs, num_local_true_dofs, num_local_dofs, global_comm, interface_comm, global_rank, compute_owner, remote_entities
+using MPI
+using HYPRE
+using Base: @propagate_inbounds
+
+include("FerriteHYPRE/assembler.jl")
+include("FerriteHYPRE/conversion.jl")
+
+function __init__()
+    @info "FerriteHYPRE extension loaded."
+end
+
+end # module FerriteHYPRE
diff --git a/ext/FerriteHYPRE/assembler.jl b/ext/FerriteHYPRE/assembler.jl
new file mode 100644
index 0000000000..711c74cebe
--- /dev/null
+++ b/ext/FerriteHYPRE/assembler.jl
@@ -0,0 +1,48 @@
+function Ferrite.create_sparsity_pattern(::Type{<:HYPREMatrix}, dh::Ferrite.AbstractDofHandler, ch::Union{ConstraintHandler,Nothing}=nothing; kwargs...)
+    K = create_sparsity_pattern(dh, ch; kwargs...)
+    fill!(K.nzval, 1)
+    return HYPREMatrix(K)
+end
+
+###########################################
+## HYPREAssembler and associated methods ##
+###########################################
+
+struct HYPREAssembler <: Ferrite.AbstractSparseAssembler
+    A::HYPRE.HYPREAssembler
+end
+
+Ferrite.matrix_handle(a::HYPREAssembler) = a.A.A.A # :)
+Ferrite.vector_handle(a::HYPREAssembler) = a.A.b.b # :)
+
+function Ferrite.start_assemble(K::HYPREMatrix, f::HYPREVector)
+    return HYPREAssembler(HYPRE.start_assemble!(K, f))
+end
+
+function Ferrite.assemble!(a::HYPREAssembler, dofs::AbstractVector{<:Integer}, ke::AbstractMatrix, fe::AbstractVector)
+    HYPRE.assemble!(a.A, dofs, ke, fe)
+end
+
+function Ferrite.end_assemble(a::HYPREAssembler)
+    HYPRE.finish_assemble!(a.A)
+end
+
+## Methods for arrayutils.jl ##
+
+function Ferrite.addindex!(A::HYPREMatrix, v, i::Int, j::Int)
+    nrows = HYPRE_Int(1)
+    ncols = Ref{HYPRE_Int}(1)
+    rows = Ref{HYPRE_BigInt}(i)
+    cols = Ref{HYPRE_BigInt}(j)
+    values = Ref{HYPRE_Complex}(v)
+    HYPRE.@check HYPRE_IJMatrixAddToValues(A.ijmatrix, nrows, ncols, rows, cols, values)
+    return A
+end
+
+function Ferrite.addindex!(b::HYPREVector, v, i::Int)
+    nvalues = HYPRE_Int(1)
+    indices = Ref{HYPRE_BigInt}(i)
+    values = Ref{HYPRE_Complex}(v)
+    HYPRE.@check HYPRE_IJVectorAddToValues(b.ijvector, nvalues, indices, values)
+    return b
+end
\ No newline at end of file
diff --git a/ext/FerriteHYPRE/conversion.jl b/ext/FerriteHYPRE/conversion.jl
new file mode 100644
index 0000000000..43d5c97495
--- /dev/null
+++ b/ext/FerriteHYPRE/conversion.jl
@@ -0,0 +1,116 @@
+# Hypre to Ferrite vector
+function hypre_to_ferrite!(u::Vector{T}, uh::HYPREVector, dh::Ferrite.AbstractDofHandler) where {T}
+    # Copy solution from HYPRE to Julia
+    uj = Vector{Float64}(undef, num_local_true_dofs(dh))
+    copy!(uj, uh)
+
+    my_rank = global_rank(getglobalgrid(dh))
+
+    # Helper to gather which global dof and values have to be send to which process
+    gdof_value_send = [Dict{Int,Float64}() for i ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)]
+    # Helper to get the global dof to local dof mapping
+    rank_recv_count = [0 for i∈1:MPI.Comm_size(MPI.COMM_WORLD)]
+    gdof_to_ldof = Dict{Int,Int}()
+
+    next_dof = 1
+    for (ldof,rank) ∈ enumerate(dh.ldof_to_rank)
+        if rank == my_rank
+            u[ldof] = uj[next_dof]
+            next_dof += 1
+        else 
+            # We have to sync these later.
+            gdof_to_ldof[dh.ldof_to_gdof[ldof]] = ldof
+            rank_recv_count[rank] += 1
+        end
+    end
+
+    # TODO speed this up and better API
+    dgrid = getglobalgrid(dh)
+    for (lvi, sv) ∈ get_shared_vertices(dgrid)
+        my_rank != compute_owner(dgrid, sv) && continue
+        for field_idx in 1:num_fields(dh)
+            if Ferrite.has_vertex_dofs(dh, field_idx, lvi)
+                local_dofs = Ferrite.vertex_dofs(dh, field_idx, lvi)
+                global_dofs = dh.ldof_to_gdof[local_dofs]
+                for receiver_rank ∈ keys(remote_entities(sv))
+                    for i ∈ 1:length(global_dofs)
+                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
+                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
+                    end
+                end
+            end
+        end
+    end
+
+    for (lvi, se) ∈ get_shared_edges(dgrid)
+        my_rank != compute_owner(dgrid, se) && continue
+        for field_idx in 1:num_fields(dh)
+            if Ferrite.has_edge_dofs(dh, field_idx, lvi)
+                local_dofs = Ferrite.edge_dofs(dh, field_idx, lvi)
+                global_dofs = dh.ldof_to_gdof[local_dofs]
+                for receiver_rank ∈ keys(remote_entities(se))
+                    for i ∈ 1:length(global_dofs)
+                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
+                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
+                    end
+                end
+            end
+        end
+    end
+    
+    for (lvi, sf) ∈ get_shared_faces(dgrid)
+        my_rank != compute_owner(dgrid, sf) && continue
+        for field_idx in 1:num_fields(dh)
+            if Ferrite.has_face_dofs(dh, field_idx, lvi)
+                local_dofs = Ferrite.face_dofs(dh, field_idx, lvi)
+                global_dofs = dh.ldof_to_gdof[local_dofs]
+                for receiver_rank ∈ keys(remote_entities(sf))
+                    for i ∈ 1:length(global_dofs)
+                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
+                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
+                    end
+                end
+            end
+        end
+    end
+
+    Ferrite.@debug println("preparing to distribute $gdof_value_send (R$my_rank)")
+
+    # TODO precompute graph at it is static
+    graph_source   = Cint[my_rank-1]
+    graph_dest   = Cint[]
+    for r ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
+        !isempty(gdof_value_send[r]) && push!(graph_dest, r-1)
+    end
+
+    graph_degree = Cint[length(graph_dest)]
+    graph_comm = MPI.Dist_graph_create(MPI.COMM_WORLD, graph_source, graph_degree, graph_dest)
+    indegree, outdegree, _ = MPI.Dist_graph_neighbors_count(graph_comm)
+
+    inranks = Vector{Cint}(undef, indegree)
+    outranks = Vector{Cint}(undef, outdegree)
+    MPI.Dist_graph_neighbors!(graph_comm, inranks, outranks)
+
+    send_count = [length(gdof_value_send[outrank+1]) for outrank ∈ outranks]
+    recv_count = [rank_recv_count[inrank+1] for inrank ∈ inranks]
+
+    send_gdof = Cint[]
+    for outrank ∈ outranks
+        append!(send_gdof, Cint.(keys(gdof_value_send[outrank+1])))
+    end
+    recv_gdof = Vector{Cint}(undef, sum(recv_count))
+    MPI.Neighbor_alltoallv!(VBuffer(send_gdof,send_count), VBuffer(recv_gdof,recv_count), graph_comm)
+
+    send_val = Cdouble[]
+    for outrank ∈ outranks
+        append!(send_val, Cdouble.(values(gdof_value_send[outrank+1])))
+    end
+    recv_val = Vector{Cdouble}(undef, sum(recv_count))
+    MPI.Neighbor_alltoallv!(VBuffer(send_val,send_count), VBuffer(recv_val,recv_count), graph_comm)
+
+    for (gdof, val) ∈ zip(recv_gdof, recv_val)
+        u[gdof_to_ldof[gdof]] = val
+    end
+
+    return u
+end
diff --git a/ext/FerriteMPI.jl b/ext/FerriteMPI.jl
index 4bcabcecd8..8f8acc0f94 100644
--- a/ext/FerriteMPI.jl
+++ b/ext/FerriteMPI.jl
@@ -5,7 +5,7 @@ module FerriteMPI
 
 using Ferrite
 # TODO remove me. These are merely hotfixes to split the extensions trasiently via an internal API.
-import Ferrite: getglobalgrid, num_global_dofs, num_global_dofs, num_local_dofs, num_local_true_dofs, global_comm, interface_comm, global_rank, compute_owner, local_dof_range
+import Ferrite: getglobalgrid, num_global_dofs, num_global_dofs, num_local_dofs, num_local_true_dofs, global_comm, interface_comm, global_rank, compute_owner, local_dof_range, remote_entities
 using Metis
 using MPI
 using Base: @propagate_inbounds
@@ -14,5 +14,9 @@ include("FerriteMPI/grid.jl")
 include("FerriteMPI/DistributedDofHandler.jl")
 include("FerriteMPI/iterators.jl")
 include("FerriteMPI/vtk-export.jl")
-    
+
+function __init__()
+    @info "FerriteMPI extension loaded."
+end
+
 end # module FerriteMPI
diff --git a/ext/FerriteMPI/DistributedDofHandler.jl b/ext/FerriteMPI/DistributedDofHandler.jl
index cc95fd90a4..4cc01e766a 100644
--- a/ext/FerriteMPI/DistributedDofHandler.jl
+++ b/ext/FerriteMPI/DistributedDofHandler.jl
@@ -600,117 +600,3 @@ function Ferrite.close!(dh::DistributedDofHandler)
     append!(dh.ldof_to_gdof, local_to_global_numbering(dh))
     return dh
 end
-
-
-# Hypre to Ferrite vector
-function hypre_to_ferrite!(u, x, dh)
-    my_rank = global_rank(getglobalgrid(dh))
-
-    # Helper to gather which global dof and values have to be send to which process
-    gdof_value_send = [Dict{Int,Float64}() for i ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)]
-    # Helper to get the global dof to local dof mapping
-    rank_recv_count = [0 for i∈1:MPI.Comm_size(MPI.COMM_WORLD)]
-    gdof_to_ldof = Dict{Int,Int}()
-
-    next_dof = 1
-    for (ldof,rank) ∈ enumerate(dh.ldof_to_rank)
-        if rank == my_rank
-            u[ldof] = x[next_dof]
-            next_dof += 1
-        else 
-            # We have to sync these later.
-            gdof_to_ldof[dh.ldof_to_gdof[ldof]] = ldof
-            rank_recv_count[rank] += 1
-        end
-    end
-
-    # TODO speed this up and better API
-    dgrid = getglobalgrid(dh)
-    for (lvi, sv) ∈ get_shared_vertices(dgrid)
-        my_rank != compute_owner(dgrid, sv) && continue
-        for field_idx in 1:num_fields(dh)
-            if Ferrite.has_vertex_dofs(dh, field_idx, lvi)
-                local_dofs = Ferrite.vertex_dofs(dh, field_idx, lvi)
-                global_dofs = dh.ldof_to_gdof[local_dofs]
-                for receiver_rank ∈ keys(remote_entities(sv))
-                    for i ∈ 1:length(global_dofs)
-                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
-                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
-                    end
-                end
-            end
-        end
-    end
-
-    for (lvi, se) ∈ get_shared_edges(dgrid)
-        my_rank != compute_owner(dgrid, se) && continue
-        for field_idx in 1:num_fields(dh)
-            if Ferrite.has_edge_dofs(dh, field_idx, lvi)
-                local_dofs = Ferrite.edge_dofs(dh, field_idx, lvi)
-                global_dofs = dh.ldof_to_gdof[local_dofs]
-                for receiver_rank ∈ keys(remote_entities(se))
-                    for i ∈ 1:length(global_dofs)
-                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
-                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
-                    end
-                end
-            end
-        end
-    end
-    
-    for (lvi, sf) ∈ get_shared_faces(dgrid)
-        my_rank != compute_owner(dgrid, sf) && continue
-        for field_idx in 1:num_fields(dh)
-            if Ferrite.has_face_dofs(dh, field_idx, lvi)
-                local_dofs = Ferrite.face_dofs(dh, field_idx, lvi)
-                global_dofs = dh.ldof_to_gdof[local_dofs]
-                for receiver_rank ∈ keys(remote_entities(sf))
-                    for i ∈ 1:length(global_dofs)
-                        # Note that u already has the correct values for all locally owned dofs due to the loop above!
-                        gdof_value_send[receiver_rank][global_dofs[i]] = u[local_dofs[i]]
-                    end
-                end
-            end
-        end
-    end
-
-    Ferrite.@debug println("preparing to distribute $gdof_value_send (R$my_rank)")
-
-    # TODO precompute graph at it is static
-    graph_source   = Cint[my_rank-1]
-    graph_dest   = Cint[]
-    for r ∈ 1:MPI.Comm_size(MPI.COMM_WORLD)
-        !isempty(gdof_value_send[r]) && push!(graph_dest, r-1)
-    end
-
-    graph_degree = Cint[length(graph_dest)]
-    graph_comm = MPI.Dist_graph_create(MPI.COMM_WORLD, graph_source, graph_degree, graph_dest)
-    indegree, outdegree, _ = MPI.Dist_graph_neighbors_count(graph_comm)
-
-    inranks = Vector{Cint}(undef, indegree)
-    outranks = Vector{Cint}(undef, outdegree)
-    MPI.Dist_graph_neighbors!(graph_comm, inranks, outranks)
-
-    send_count = [length(gdof_value_send[outrank+1]) for outrank ∈ outranks]
-    recv_count = [rank_recv_count[inrank+1] for inrank ∈ inranks]
-
-    send_gdof = Cint[]
-    for outrank ∈ outranks
-        append!(send_gdof, Cint.(keys(gdof_value_send[outrank+1])))
-    end
-    recv_gdof = Vector{Cint}(undef, sum(recv_count))
-    MPI.Neighbor_alltoallv!(VBuffer(send_gdof,send_count), VBuffer(recv_gdof,recv_count), graph_comm)
-
-    send_val = Cdouble[]
-    for outrank ∈ outranks
-        append!(send_val, Cdouble.(values(gdof_value_send[outrank+1])))
-    end
-    recv_val = Vector{Cdouble}(undef, sum(recv_count))
-    MPI.Neighbor_alltoallv!(VBuffer(send_val,send_count), VBuffer(recv_val,recv_count), graph_comm)
-
-    for (gdof, val) ∈ zip(recv_gdof, recv_val)
-        u[gdof_to_ldof[gdof]] = val
-    end
-
-    return u
-end
diff --git a/ext/FerriteMPI/grid.jl b/ext/FerriteMPI/grid.jl
index f48ed9be8a..655dfcba12 100644
--- a/ext/FerriteMPI/grid.jl
+++ b/ext/FerriteMPI/grid.jl
@@ -7,7 +7,7 @@ struct SharedVertex <: Ferrite.SharedEntity
     remote_vertices::Dict{Int,Vector{VertexIndex}}
 end
 
-@inline remote_entities(sv::SharedVertex) = sv.remote_vertices
+@inline Ferrite.remote_entities(sv::SharedVertex) = sv.remote_vertices
 
 """
 """
@@ -16,7 +16,7 @@ struct SharedFace <: Ferrite.SharedEntity
     remote_faces::Dict{Int,Vector{FaceIndex}}
 end
 
-@inline remote_entities(sf::SharedFace) = sf.remote_faces
+@inline Ferrite.remote_entities(sf::SharedFace) = sf.remote_faces
 
 """
 """
@@ -25,7 +25,7 @@ struct SharedEdge <: Ferrite.SharedEntity
     remote_edges::Dict{Int,Vector{EdgeIndex}}
 end
 
-@inline remote_entities(se::SharedEdge) = se.remote_edges
+@inline Ferrite.remote_entities(se::SharedEdge) = se.remote_edges
 
 """
 @TODO docs
diff --git a/ext/FerritePartitionedArrays.jl b/ext/FerritePartitionedArrays.jl
index 8a397ba0ef..32af7f1159 100644
--- a/ext/FerritePartitionedArrays.jl
+++ b/ext/FerritePartitionedArrays.jl
@@ -5,7 +5,7 @@ module FerritePartitionedArrays
 
 using Ferrite
 # TODO remove me. These are merely hotfixes to split the extensions trasiently via an internal API.
-import Ferrite: getglobalgrid, num_global_dofs, num_global_dofs, num_local_true_dofs, num_local_dofs, global_comm, interface_comm, global_rank, compute_owner
+import Ferrite: getglobalgrid, num_global_dofs, num_global_dofs, num_local_true_dofs, num_local_dofs, global_comm, interface_comm, global_rank, compute_owner, remote_entities
 using MPI
 using PartitionedArrays
 using Base: @propagate_inbounds
@@ -13,5 +13,9 @@ using Base: @propagate_inbounds
 include("FerritePartitionedArrays/assembler.jl")
 include("FerritePartitionedArrays/constraints.jl")
 include("FerritePartitionedArrays/export-vtk.jl")
-    
+
+function __init__()
+    @info "FerritePartitionedArrays extension loaded."
+end
+
 end # module FerritePartitionedArrays
diff --git a/ext/FerritePartitionedArrays/assembler.jl b/ext/FerritePartitionedArrays/assembler.jl
index 7bdb3aaf3c..d2016f90fc 100644
--- a/ext/FerritePartitionedArrays/assembler.jl
+++ b/ext/FerritePartitionedArrays/assembler.jl
@@ -1,3 +1,6 @@
+function Ferrite.create_sparsity_pattern(::Type{<:PSparseMatrix}, dh::Ferrite.AbstractDofHandler, ch::Union{ConstraintHandler,Nothing}=nothing; kwargs...)
+    error("Not implemented.")
+end
 
 """
 Simplest partitioned assembler in COO format to obtain a PSparseMatrix and a PVector.
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 3c2253f420..2592644703 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -2,17 +2,19 @@
 """
 abstract type AbstractDistributedGrid{sdim} <: AbstractGrid{sdim} end
 
+"""
+"""
+abstract type SharedEntity end
+
 #TODO remove. This is a temporary workaround to make the transient extensions work.
 global_comm(::AbstractDistributedGrid) = error("Not implemented.")
 interface_comm(::AbstractDistributedGrid) = error("Not implemented.")
 global_rank(::AbstractDistributedGrid) = error("Not implemented.")
 compute_owner(::AbstractDistributedGrid) = error("Not implemented.")
-
+remote_entities(::SharedEntity) = error("Not implemented.")
 
 """
 """
-abstract type SharedEntity end
-
 @inline get_shared_vertices(dgrid::AbstractDistributedGrid) = dgrid.shared_vertices
 @inline get_shared_edges(dgrid::AbstractDistributedGrid) = dgrid.shared_edges
 @inline get_shared_faces(dgrid::AbstractDistributedGrid) = dgrid.shared_faces

From 325a75c7687182159441870785aa2cd9c4f7709d Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Mon, 20 Feb 2023 20:13:44 +0100
Subject: [PATCH 115/124] Try to fix CI.

---
 Project.toml       |  14 +--
 docs/Manifest.toml | 209 ++++++++++++++++++++++++---------------------
 2 files changed, 119 insertions(+), 104 deletions(-)

diff --git a/Project.toml b/Project.toml
index faf7e784cb..597f6cb8d8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,9 +4,12 @@ version = "0.3.11"
 
 [deps]
 EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
+HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
+PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
@@ -16,23 +19,23 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
 [weakdeps]
 BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
-Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
 
 [extensions]
 FerriteBlockArrays = "BlockArrays"
 FerriteHYPRE = ["MPI", "Metis", "HYPRE"]
-FerriteMetis = "Metis"
 FerriteMPI = ["MPI", "Metis"]
+FerriteMetis = "Metis"
 FerritePartitionedArrays = ["MPI", "Metis", "PartitionedArrays"]
 
 [compat]
 BlockArrays = "0.16"
 EnumX = "1"
 HYPRE = "^1.4.0"
-Metis = "1.3"
 MPI = "^0.20.2"
+Metis = "1.3"
 NearestNeighbors = "0.4"
 PartitionedArrays = "0.2.15"
 Preferences = "1"
@@ -47,7 +50,9 @@ Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 FerriteGmsh = "4f95f4f8-b27c-4ae5-9a39-ea55e634e36b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Gmsh = "705231aa-382f-11e9-3f0c-b7cb4346fdeb"
+HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 NBInclude = "0db19996-df87-5ea3-a455-e3a50d440464"
@@ -58,7 +63,6 @@ SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
-Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
 [targets]
-test = ["BlockArrays", "Downloads", "FerriteGmsh", "ForwardDiff", "Gmsh", "IterativeSolvers", "Metis", "NBInclude", "ProgressMeter", "Random", "SHA", "StableRNGs", "Test", "TimerOutputs", "Logging"]
+test = ["BlockArrays", "Downloads", "FerriteGmsh", "ForwardDiff", "Gmsh", "HYPRE", "IterativeSolvers", "Metis", "MPI", "NBInclude", "PartitionedArrays", "ProgressMeter", "Random", "SHA", "StableRNGs", "Test", "TimerOutputs", "Logging"]
diff --git a/docs/Manifest.toml b/docs/Manifest.toml
index b9ac6627c6..3bdbd9148b 100644
--- a/docs/Manifest.toml
+++ b/docs/Manifest.toml
@@ -26,10 +26,26 @@ uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
 version = "0.2.0"
 
 [[deps.ArrayInterface]]
-deps = ["ArrayInterfaceCore", "Compat", "IfElse", "LinearAlgebra", "SnoopPrecompile", "Static"]
-git-tree-sha1 = "dedc16cbdd1d32bead4617d27572f582216ccf23"
+deps = ["Adapt", "LinearAlgebra", "Requires", "SnoopPrecompile", "SparseArrays", "SuiteSparse"]
+git-tree-sha1 = "4d9946e51e24f5e509779e3e2c06281a733914c2"
 uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "6.0.25"
+version = "7.1.0"
+
+    [deps.ArrayInterface.extensions]
+    ArrayInterfaceBandedMatricesExt = "BandedMatrices"
+    ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices"
+    ArrayInterfaceCUDAExt = "CUDA"
+    ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore"
+    ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore"
+    ArrayInterfaceTrackerExt = "Tracker"
+
+    [deps.ArrayInterface.weakdeps]
+    BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
+    BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
+    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+    GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
+    StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
+    Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
 
 [[deps.ArrayInterfaceCore]]
 deps = ["LinearAlgebra", "SnoopPrecompile", "SparseArrays", "SuiteSparse"]
@@ -37,30 +53,6 @@ git-tree-sha1 = "e5f08b5689b1aad068e01751889f2f615c7db36d"
 uuid = "30b0a656-2188-435a-8636-2ec0e6a096e2"
 version = "0.1.29"
 
-[[deps.ArrayInterfaceGPUArrays]]
-deps = ["Adapt", "ArrayInterfaceCore", "GPUArraysCore", "LinearAlgebra"]
-git-tree-sha1 = "fc114f550b93d4c79632c2ada2924635aabfa5ed"
-uuid = "6ba088a2-8465-4c0a-af30-387133b534db"
-version = "0.2.2"
-
-[[deps.ArrayInterfaceOffsetArrays]]
-deps = ["ArrayInterface", "OffsetArrays", "Static"]
-git-tree-sha1 = "3d1a9a01976971063b3930d1aed1d9c4af0817f8"
-uuid = "015c0d05-e682-4f19-8f0a-679ce4c54826"
-version = "0.1.7"
-
-[[deps.ArrayInterfaceStaticArrays]]
-deps = ["Adapt", "ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceStaticArraysCore", "LinearAlgebra", "Static", "StaticArrays"]
-git-tree-sha1 = "f12dc65aef03d0a49650b20b2fdaf184928fd886"
-uuid = "b0d46f97-bff5-4637-a19a-dd75974142cd"
-version = "0.1.5"
-
-[[deps.ArrayInterfaceStaticArraysCore]]
-deps = ["Adapt", "ArrayInterfaceCore", "LinearAlgebra", "StaticArraysCore"]
-git-tree-sha1 = "93c8ba53d8d26e124a5a8d4ec914c3a16e6a0970"
-uuid = "dd5226c6-a4d4-4bc7-8575-46859f9c95b9"
-version = "0.1.3"
-
 [[deps.ArrayLayouts]]
 deps = ["FillArrays", "LinearAlgebra", "SparseArrays"]
 git-tree-sha1 = "4aff5fa660eb95c2e0deb6bcdabe4d9a96bc4667"
@@ -120,10 +112,10 @@ uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 version = "1.15.7"
 
 [[deps.CloseOpenIntervals]]
-deps = ["ArrayInterface", "Static"]
-git-tree-sha1 = "d61300b9895f129f4bd684b2aff97cf319b6c493"
+deps = ["Static", "StaticArrayInterface"]
+git-tree-sha1 = "70232f82ffaab9dc52585e0dd043b5e0c6b714f1"
 uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9"
-version = "0.1.11"
+version = "0.1.12"
 
 [[deps.CodecZlib]]
 deps = ["TranscodingStreams", "Zlib_jll"]
@@ -221,10 +213,10 @@ uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 version = "1.9.1"
 
 [[deps.DiffEqBase]]
-deps = ["ArrayInterfaceCore", "ChainRulesCore", "DataStructures", "DocStringExtensions", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "MuladdMacro", "Parameters", "PreallocationTools", "Printf", "RecursiveArrayTools", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "Static", "StaticArrays", "Statistics", "Tricks", "ZygoteRules"]
-git-tree-sha1 = "b91fef836ef8c2c4480bce90bb8981e1ac21b444"
+deps = ["ArrayInterface", "ChainRulesCore", "DataStructures", "DocStringExtensions", "EnumX", "FastBroadcast", "ForwardDiff", "FunctionWrappers", "FunctionWrappersWrappers", "LinearAlgebra", "Logging", "Markdown", "MuladdMacro", "Parameters", "PreallocationTools", "Printf", "RecursiveArrayTools", "Reexport", "Requires", "SciMLBase", "Setfield", "SparseArrays", "Static", "StaticArraysCore", "Statistics", "Tricks", "ZygoteRules"]
+git-tree-sha1 = "9441053d50b00cd5fe54ed13fd7081cf9feb2ce5"
 uuid = "2b5f629d-d688-5b77-993f-72d75c75574e"
-version = "6.117.0"
+version = "6.120.0"
 
     [deps.DiffEqBase.extensions]
     DiffEqBaseDistributionsExt = "Distributions"
@@ -299,10 +291,10 @@ uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
 version = "2.4.8+0"
 
 [[deps.ExponentialUtilities]]
-deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceGPUArrays", "GPUArraysCore", "GenericSchur", "LinearAlgebra", "Printf", "SnoopPrecompile", "SparseArrays", "libblastrampoline_jll"]
-git-tree-sha1 = "1c06afe6eb356a6148a2e5f07eddaf30f018bd5b"
+deps = ["Adapt", "ArrayInterface", "GPUArraysCore", "GenericSchur", "LinearAlgebra", "Printf", "SnoopPrecompile", "SparseArrays", "libblastrampoline_jll"]
+git-tree-sha1 = "fb7dbef7d2631e2d02c49e2750f7447648b0ec9b"
 uuid = "d4d017d3-3776-5f7e-afef-a10c40355c18"
-version = "1.22.1"
+version = "1.24.0"
 
 [[deps.ExprTools]]
 git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d"
@@ -328,10 +320,10 @@ uuid = "4fce6fc7-ba6a-5f4c-898f-77e99806d6f8"
 version = "1.3.8+0"
 
 [[deps.FastBroadcast]]
-deps = ["ArrayInterface", "ArrayInterfaceCore", "LinearAlgebra", "Polyester", "Static", "StrideArraysCore"]
-git-tree-sha1 = "4bef892787c972913d4d84e7255400759bb650e5"
+deps = ["ArrayInterface", "LinearAlgebra", "Polyester", "Static", "StaticArrayInterface", "StrideArraysCore"]
+git-tree-sha1 = "d1248fceea0b26493fd33e8e9e8c553270da03bd"
 uuid = "7034ab61-46d4-4ed7-9d0f-46aef9175898"
-version = "0.2.4"
+version = "0.2.5"
 
 [[deps.FastClosures]]
 git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef"
@@ -340,19 +332,21 @@ version = "0.3.2"
 
 [[deps.FastLapackInterface]]
 deps = ["LinearAlgebra"]
-git-tree-sha1 = "7fbaf9f73cd4c8561702ea9b16acf3f99d913fe4"
+git-tree-sha1 = "c1293a93193f0ae94be7cf338d33e162c39d8788"
 uuid = "29a986be-02c6-4525-aec4-84b980013641"
-version = "1.2.8"
+version = "1.2.9"
 
 [[deps.Ferrite]]
 deps = ["EnumX", "LinearAlgebra", "NearestNeighbors", "Preferences", "Reexport", "SparseArrays", "Tensors", "WriteVTK"]
 path = ".."
 uuid = "c061ca5d-56c9-439f-9c0e-210fe06d3992"
 version = "0.3.11"
-weakdeps = ["BlockArrays", "MPI", "Metis", "PartitionedArrays"]
+weakdeps = ["BlockArrays", "HYPRE", "MPI", "Metis", "PartitionedArrays"]
 
     [deps.Ferrite.extensions]
     FerriteBlockArrays = "BlockArrays"
+    FerriteHYPRE = ["MPI", "Metis", "HYPRE"]
+    FerriteMPI = ["MPI", "Metis"]
     FerriteMetis = "Metis"
     FerritePartitionedArrays = ["MPI", "Metis", "PartitionedArrays"]
 
@@ -378,10 +372,10 @@ uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
 version = "0.13.7"
 
 [[deps.FiniteDiff]]
-deps = ["ArrayInterfaceCore", "LinearAlgebra", "Requires", "Setfield", "SparseArrays", "StaticArrays"]
-git-tree-sha1 = "04ed1f0029b6b3af88343e439b995141cb0d0b8d"
+deps = ["ArrayInterface", "LinearAlgebra", "Requires", "Setfield", "SparseArrays", "StaticArrays"]
+git-tree-sha1 = "ed1b56934a2f7a65035976985da71b6a65b4f2cf"
 uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
-version = "2.17.0"
+version = "2.18.0"
 
 [[deps.FixedPointNumbers]]
 deps = ["Statistics"]
@@ -606,10 +600,10 @@ uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 version = "0.21.3"
 
 [[deps.JpegTurbo_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "b53380851c6e6664204efb2e62cd24fa5c47e4ba"
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "6f2675ef130a300a112286de91973805fcc5ffbc"
 uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8"
-version = "2.1.2+0"
+version = "2.1.91+0"
 
 [[deps.KLU]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"]
@@ -671,10 +665,10 @@ uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
 version = "0.15.18"
 
 [[deps.LayoutPointers]]
-deps = ["ArrayInterface", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static"]
-git-tree-sha1 = "0ad6f0c51ce004dadc24a28a0dfecfb568e52242"
+deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"]
+git-tree-sha1 = "88b8f66b604da079a627b6fb2860d3704a6729a1"
 uuid = "10f19ff3-798f-405d-979b-55457f8fc047"
-version = "0.1.13"
+version = "0.1.14"
 
 [[deps.Lazy]]
 deps = ["MacroTools"]
@@ -779,10 +773,10 @@ uuid = "18c40d15-f7cd-5a6d-bc92-87468d86c5db"
 version = "5.0.0+0"
 
 [[deps.LinearSolve]]
-deps = ["ArrayInterfaceCore", "DocStringExtensions", "FastLapackInterface", "GPUArraysCore", "IterativeSolvers", "KLU", "Krylov", "KrylovKit", "LinearAlgebra", "Preferences", "RecursiveFactorization", "Reexport", "SciMLBase", "SciMLOperators", "Setfield", "SnoopPrecompile", "SparseArrays", "Sparspak", "SuiteSparse", "UnPack"]
-git-tree-sha1 = "ed97c2b4e46d02d4c866d3ccfae039a6c09568b1"
+deps = ["ArrayInterface", "DocStringExtensions", "FastLapackInterface", "GPUArraysCore", "IterativeSolvers", "KLU", "Krylov", "KrylovKit", "LinearAlgebra", "Preferences", "RecursiveFactorization", "Reexport", "SciMLBase", "SciMLOperators", "Setfield", "SnoopPrecompile", "SparseArrays", "Sparspak", "SuiteSparse", "UnPack"]
+git-tree-sha1 = "d1fce810e9a4213607f0182cf25ffd6ce13e19b6"
 uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
-version = "1.35.0"
+version = "1.37.0"
 weakdeps = ["HYPRE"]
 
     [deps.LinearSolve.extensions]
@@ -802,14 +796,14 @@ version = "1.1.1"
 
 [[deps.LogExpFunctions]]
 deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
-git-tree-sha1 = "680e733c3a0a9cea9e935c8c2184aea6a63fa0b5"
+git-tree-sha1 = "0a1b7c2863e44523180fdb3146534e265a91870b"
 uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
-version = "0.3.21"
+version = "0.3.23"
 
     [deps.LogExpFunctions.extensions]
-    ChainRulesCoreExt = "ChainRulesCore"
-    ChangesOfVariablesExt = "ChangesOfVariables"
-    InverseFunctionsExt = "InverseFunctions"
+    LogExpFunctionsChainRulesCoreExt = "ChainRulesCore"
+    LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables"
+    LogExpFunctionsInverseFunctionsExt = "InverseFunctions"
 
     [deps.LogExpFunctions.weakdeps]
     ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
@@ -826,10 +820,10 @@ uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36"
 version = "1.0.0"
 
 [[deps.LoopVectorization]]
-deps = ["ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceOffsetArrays", "ArrayInterfaceStaticArrays", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDTypes", "SLEEFPirates", "SnoopPrecompile", "Static", "ThreadingUtilities", "UnPack", "VectorizationBase"]
-git-tree-sha1 = "9696a80c21a56b937e3fd89e972f8db5db3186e2"
+deps = ["ArrayInterface", "ArrayInterfaceCore", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "SIMDTypes", "SLEEFPirates", "SnoopPrecompile", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"]
+git-tree-sha1 = "d407ea0d7c354f5765914d0982c233328523c82f"
 uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
-version = "0.12.150"
+version = "0.12.151"
 weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"]
 
     [deps.LoopVectorization.extensions]
@@ -977,10 +971,10 @@ uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 version = "1.2.0"
 
 [[deps.NonlinearSolve]]
-deps = ["ArrayInterfaceCore", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "LinearSolve", "RecursiveArrayTools", "Reexport", "SciMLBase", "SimpleNonlinearSolve", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArraysCore", "UnPack"]
-git-tree-sha1 = "e2b063236a3103a3640ff1f2e3945ca387281cbe"
+deps = ["ArrayInterface", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "LinearSolve", "RecursiveArrayTools", "Reexport", "SciMLBase", "SimpleNonlinearSolve", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArraysCore", "UnPack"]
+git-tree-sha1 = "536aa8b33b2c3a10df8ce89bdb0b0affef93d393"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
-version = "1.3.0"
+version = "1.4.0"
 
 [[deps.OCCT_jll]]
 deps = ["Artifacts", "FreeType2_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll", "Xorg_libXfixes_jll", "Xorg_libXft_jll", "Xorg_libXinerama_jll", "Xorg_libXrender_jll"]
@@ -1052,10 +1046,10 @@ uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 version = "1.4.1"
 
 [[deps.OrdinaryDiffEq]]
-deps = ["Adapt", "ArrayInterface", "ArrayInterfaceCore", "ArrayInterfaceGPUArrays", "ArrayInterfaceStaticArrays", "ArrayInterfaceStaticArraysCore", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLNLSolve", "SimpleNonlinearSolve", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
-git-tree-sha1 = "9e846d9c0f66fed04e7617a8bc380918a1ffe7ff"
+deps = ["Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "IfElse", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLNLSolve", "SimpleNonlinearSolve", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
+git-tree-sha1 = "3b98b39987fecc8c8c94f58b51d67190097b0b64"
 uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
-version = "6.44.0"
+version = "6.46.0"
 
 [[deps.PCRE2_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -1115,10 +1109,10 @@ uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 version = "1.38.5"
 
 [[deps.Polyester]]
-deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StrideArraysCore", "ThreadingUtilities"]
-git-tree-sha1 = "e8e0fabcff4df8686c4267503887202a783d498e"
+deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Requires", "Static", "StaticArrayInterface", "StrideArraysCore", "ThreadingUtilities"]
+git-tree-sha1 = "0fe4e7c4d8ff4c70bfa507f0dd96fa161b115777"
 uuid = "f517fe37-dbe3-4b94-8317-1923a5111588"
-version = "0.7.2"
+version = "0.7.3"
 
 [[deps.PolyesterWeave]]
 deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"]
@@ -1133,10 +1127,10 @@ uuid = "85a6dd25-e78a-55b7-8502-1745935b8125"
 version = "0.2.4"
 
 [[deps.PreallocationTools]]
-deps = ["Adapt", "ArrayInterfaceCore", "ForwardDiff", "Requires"]
-git-tree-sha1 = "2c7658dd593e3adc118b00429e1048829f1abb8c"
+deps = ["Adapt", "ArrayInterface", "ForwardDiff", "Requires"]
+git-tree-sha1 = "f739b1b3cc7b9949af3b35089931f2b58c289163"
 uuid = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
-version = "0.4.11"
+version = "0.4.12"
 
     [deps.PreallocationTools.extensions]
     PreallocationToolsReverseDiffExt = "ReverseDiff"
@@ -1187,10 +1181,10 @@ uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c"
 version = "0.6.11"
 
 [[deps.RecursiveArrayTools]]
-deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceStaticArraysCore", "ChainRulesCore", "DocStringExtensions", "FillArrays", "GPUArraysCore", "IteratorInterfaceExtensions", "LinearAlgebra", "RecipesBase", "Requires", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables", "ZygoteRules"]
-git-tree-sha1 = "54e055256bbd41fd10566880bc4baa5316bca6fe"
+deps = ["Adapt", "ArrayInterface", "ChainRulesCore", "DocStringExtensions", "FillArrays", "GPUArraysCore", "IteratorInterfaceExtensions", "LinearAlgebra", "RecipesBase", "Requires", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables", "ZygoteRules"]
+git-tree-sha1 = "3dcb2a98436389c0aac964428a5fa099118944de"
 uuid = "731186ca-8d62-57ce-b412-fbd966d074cd"
-version = "2.37.0"
+version = "2.38.0"
 
     [deps.RecursiveArrayTools.extensions]
     RecursiveArrayToolsTrackerExt = "Tracker"
@@ -1255,10 +1249,10 @@ uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
 version = "0.6.38"
 
 [[deps.SciMLBase]]
-deps = ["ArrayInterfaceCore", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Preferences", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables"]
-git-tree-sha1 = "76eec814289c4a249ee3747ceeea0d83defbeb8d"
+deps = ["ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Preferences", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables"]
+git-tree-sha1 = "fd2a15854af0ba1542b89efa24512b0377e7e37d"
 uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
-version = "1.84.1"
+version = "1.86.1"
 
 [[deps.SciMLNLSolve]]
 deps = ["DiffEqBase", "LineSearches", "NLsolve", "Reexport", "SciMLBase"]
@@ -1267,10 +1261,10 @@ uuid = "e9a6253c-8580-4d32-9898-8661bb511710"
 version = "0.1.3"
 
 [[deps.SciMLOperators]]
-deps = ["ArrayInterfaceCore", "DocStringExtensions", "Lazy", "LinearAlgebra", "Setfield", "SparseArrays", "StaticArraysCore", "Tricks"]
-git-tree-sha1 = "c737d575c18bdf9aba0a3c7071d5249d09f45dd8"
+deps = ["ArrayInterface", "DocStringExtensions", "Lazy", "LinearAlgebra", "Setfield", "SparseArrays", "StaticArraysCore", "Tricks"]
+git-tree-sha1 = "8419114acbba861ac49e1ab2750bae5c5eda35c4"
 uuid = "c0aeaf25-5076-4817-a8d5-81caf7dfa961"
-version = "0.1.21"
+version = "0.1.22"
 
 [[deps.Scratch]]
 deps = ["Dates"]
@@ -1303,10 +1297,16 @@ uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7"
 version = "1.1.0"
 
 [[deps.SimpleNonlinearSolve]]
-deps = ["ArrayInterfaceCore", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "Reexport", "SciMLBase", "SnoopPrecompile", "StaticArraysCore"]
-git-tree-sha1 = "3f558105e8ef4aac1e22bf30bd1f1e95698bfc95"
+deps = ["ArrayInterface", "DiffEqBase", "FiniteDiff", "ForwardDiff", "LinearAlgebra", "Reexport", "Requires", "SciMLBase", "SnoopPrecompile", "StaticArraysCore"]
+git-tree-sha1 = "326789bbaa1b65b809bd4596b74e4fc3be5af6ac"
 uuid = "727e6d20-b764-4bd8-a329-72de5adea6c7"
-version = "0.1.10"
+version = "0.1.13"
+
+    [deps.SimpleNonlinearSolve.extensions]
+    SimpleBatchedNonlinearSolveExt = "NNlib"
+
+    [deps.SimpleNonlinearSolve.weakdeps]
+    NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 
 [[deps.SimpleTraits]]
 deps = ["InteractiveUtils", "MacroTools"]
@@ -1334,10 +1334,10 @@ deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [[deps.SparseDiffTools]]
-deps = ["Adapt", "ArrayInterfaceCore", "ArrayInterfaceStaticArrays", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "Graphs", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays", "VertexSafeGraphs"]
-git-tree-sha1 = "4245283bee733122a9cb4545748d64e0c63337c0"
+deps = ["Adapt", "ArrayInterface", "Compat", "DataStructures", "FiniteDiff", "ForwardDiff", "Graphs", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays", "VertexSafeGraphs"]
+git-tree-sha1 = "e19ac47477c9a8fcca06dab5e5471417d5d9d723"
 uuid = "47a9eef4-7e08-11e9-0b38-333d64bd3804"
-version = "1.30.0"
+version = "1.31.0"
 
 [[deps.SparseMatricesCSR]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
@@ -1359,15 +1359,26 @@ version = "2.1.7"
 
 [[deps.Static]]
 deps = ["IfElse"]
-git-tree-sha1 = "c35b107b61e7f34fa3f124026f2a9be97dea9e1c"
+git-tree-sha1 = "d0435ba43ab5ad1cbb5f0d286ca4ba67029ed3ee"
 uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
-version = "0.8.3"
+version = "0.8.4"
+
+[[deps.StaticArrayInterface]]
+deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "Requires", "SnoopPrecompile", "SparseArrays", "Static", "SuiteSparse"]
+git-tree-sha1 = "5589ab073f8a244d2530b36478f53806f9106002"
+uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718"
+version = "1.2.1"
+weakdeps = ["OffsetArrays", "StaticArrays"]
+
+    [deps.StaticArrayInterface.extensions]
+    StaticArrayInterfaceOffsetArraysExt = "OffsetArrays"
+    StaticArrayInterfaceStaticArraysExt = "StaticArrays"
 
 [[deps.StaticArrays]]
 deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"]
-git-tree-sha1 = "67d3e75e8af8089ea34ce96974d5468d4a008ca6"
+git-tree-sha1 = "2d7d9e1ddadc8407ffd460e24218e37ef52dd9a3"
 uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "1.5.15"
+version = "1.5.16"
 
 [[deps.StaticArraysCore]]
 git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a"
@@ -1392,10 +1403,10 @@ uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 version = "0.33.21"
 
 [[deps.StrideArraysCore]]
-deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "SIMDTypes", "Static", "ThreadingUtilities"]
-git-tree-sha1 = "8114ba9c3694827838d45ea3c9f6b9ccb4182cf2"
+deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface", "ThreadingUtilities"]
+git-tree-sha1 = "2842f1dbd12d59f2728ba79f4002cd6b61808f8b"
 uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da"
-version = "0.4.7"
+version = "0.4.8"
 
 [[deps.SuiteSparse]]
 deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
@@ -1508,10 +1519,10 @@ uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d"
 version = "0.2.0"
 
 [[deps.VectorizationBase]]
-deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static"]
-git-tree-sha1 = "4c59c2df8d2676c4691a39fa70495a6db0c5d290"
+deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"]
+git-tree-sha1 = "7bdcd1b36993026f91e61c3cc671c7127770be84"
 uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
-version = "0.21.58"
+version = "0.21.59"
 
 [[deps.VertexSafeGraphs]]
 deps = ["Graphs"]

From 921fb9a6aabcdfc2cc3d9bb197ae6a489a51534b Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Mon, 20 Feb 2023 20:29:05 +0100
Subject: [PATCH 116/124] Rename file.

---
 ext/FerriteMPI.jl                              | 2 +-
 ext/FerriteMPI/{grid.jl => DistributedGrid.jl} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename ext/FerriteMPI/{grid.jl => DistributedGrid.jl} (100%)

diff --git a/ext/FerriteMPI.jl b/ext/FerriteMPI.jl
index 8f8acc0f94..49d6f2e204 100644
--- a/ext/FerriteMPI.jl
+++ b/ext/FerriteMPI.jl
@@ -10,7 +10,7 @@ using Metis
 using MPI
 using Base: @propagate_inbounds
 
-include("FerriteMPI/grid.jl")
+include("FerriteMPI/DistributedGrid.jl")
 include("FerriteMPI/DistributedDofHandler.jl")
 include("FerriteMPI/iterators.jl")
 include("FerriteMPI/vtk-export.jl")
diff --git a/ext/FerriteMPI/grid.jl b/ext/FerriteMPI/DistributedGrid.jl
similarity index 100%
rename from ext/FerriteMPI/grid.jl
rename to ext/FerriteMPI/DistributedGrid.jl

From 1d219876b85f8b1472dfaab72be70015362322a5 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Tue, 21 Feb 2023 15:17:15 +0100
Subject: [PATCH 117/124] Introduce new topology.

---
 src/Grid/grid.jl | 201 ++++++++++++++++++++++++++++++++++-------------
 src/exports.jl   |   1 +
 2 files changed, 148 insertions(+), 54 deletions(-)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 0174f3106d..9ecebccd08 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -16,6 +16,7 @@ Node(x::NTuple{dim,T}) where {dim,T} = Node(Vec{dim,T}(x))
 getcoordinates(n::Node) = n.x
 get_coordinate_eltype(::Node{dim,T}) where {dim,T} = T
 
+abstract type AbstractGrid{dim} end
 abstract type AbstractCell{dim,N,M} end
 """
     Cell{dim,N,M} <: AbstractCell{dim,N,M}
@@ -115,6 +116,11 @@ function Base.show(io::IO, ::MIME"text/plain", n::EntityNeighborhood)
     end
 end
 
+getcells(neighbor::EntityNeighborhood{T}) where T <: BoundaryIndex = first.(neighbor.neighbor_info)
+getcells(neighbor::EntityNeighborhood{CellIndex}) = getproperty.(neighbor.neighbor_info, :idx)
+getcells(neighbors::Vector{T}) where T <: EntityNeighborhood = reduce(vcat, getcells.(neighbors))
+getcells(neighbors::Vector{T}) where T <: BoundaryIndex = getindex.(neighbors,1)
+
 """
     face_npoints(::AbstractCell{dim,N,M)
 Specifies for each subtype of AbstractCell how many nodes form a face
@@ -199,17 +205,17 @@ function ExclusiveTopology(cells::Vector{C}) where C <: AbstractCell
             cell_local_ids = findall(x->x in cell_vertices_table[neighbor], cell_vertices_table[cellid])
             # vertex neighbor
             if length(cell_local_ids) == 1
-                _vertex_neighbor!(V_vertex, I_vertex, J_vertex, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
+               _vertex_neighbor!(V_vertex, I_vertex, J_vertex, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
             # face neighbor
             elseif length(cell_local_ids) == face_npoints(cell)
-                _face_neighbor!(V_face, I_face, J_face, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
+               _face_neighbor!(V_face, I_face, J_face, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
             # edge neighbor
             elseif getdim(cell) > 2 && length(cell_local_ids) == edge_npoints(cell)
-                _edge_neighbor!(V_edge, I_edge, J_edge, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
+               _edge_neighbor!(V_edge, I_edge, J_edge, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
             end
         end
     end
-    
+
     celltype = eltype(cells)
     if isconcretetype(celltype)
         dim = getdim(cells[1])
@@ -287,7 +293,9 @@ function ExclusiveTopology(cells::Vector{C}) where C <: AbstractCell
     return ExclusiveTopology(vertex_cell_table,cell_neighbor_table,face_neighbor,vertex_neighbor,edge_neighbor,vertex_vertex_table,face_skeleton_local)
 end
 
-function _vertex_neighbor!(V_vertex, I_vertex, J_vertex, cellid, cell, neighbor, neighborid, neighbor_cell)
+ExclusiveTopology(grid::AbstractGrid) = ExclusiveTopology(getcells(grid))
+
+function _vertex_neighbor!(V_vertex::Vector{EntityNeighborhood}, I_vertex::Vector{Int}, J_vertex::Vector{Int}, cellid::Int, cell::AbstractCell, neighbor::Vector{Int}, neighborid::Int, neighbor_cell::AbstractCell)
     vertex_neighbor = VertexIndex((neighborid, neighbor[1]))
     cell_vertex_id = findfirst(x->x==neighbor_cell.nodes[neighbor[1]], cell.nodes)
     push!(V_vertex,EntityNeighborhood(vertex_neighbor))
@@ -295,29 +303,32 @@ function _vertex_neighbor!(V_vertex, I_vertex, J_vertex, cellid, cell, neighbor,
     push!(J_vertex,cell_vertex_id)
 end
 
-function _edge_neighbor!(V_edge, I_edge, J_edge, cellid, cell, neighbor, neighborid, neighbor_cell)
-    neighbor_edge = neighbor_cell.nodes[neighbor]
+function _edge_neighbor!(V_edge::Vector{EntityNeighborhood}, I_edge::Vector{Int}, J_edge::Vector{Int}, cellid::Int, cell::AbstractCell, neighbor::Vector{Int}, neighborid::Int, neighbor_cell::AbstractCell)
+    neighbor_edge_nodes = neighbor_cell.nodes[neighbor]
     if getdim(neighbor_cell) < 3
-        neighbor_edge_id = findfirst(x->issubset(x,neighbor_edge), faces(neighbor_cell))
+        neighbor_edge_id = findfirst(x->issubset(x,neighbor_edge_nodes), faces(neighbor_cell))
         edge_neighbor = FaceIndex((neighborid, neighbor_edge_id))
     else
-        neighbor_edge_id = findfirst(x->issubset(x,neighbor_edge), edges(neighbor_cell))
+        neighbor_edge_id = findfirst(x->issubset(x,neighbor_edge_nodes), edges(neighbor_cell))
         edge_neighbor = EdgeIndex((neighborid, neighbor_edge_id))
     end
-    cell_edge_id = findfirst(x->issubset(x,neighbor_edge),edges(cell))
+    cell_edge_id = findfirst(x->issubset(x,neighbor_edge_nodes), edges(cell))
     push!(V_edge, EntityNeighborhood(edge_neighbor))
     push!(I_edge, cellid)
     push!(J_edge, cell_edge_id)
 end
 
-function _face_neighbor!(V_face, I_face, J_face, cellid, cell, neighbor, neighborid, neighbor_cell)
+function _face_neighbor!(V_face::Vector{EntityNeighborhood}, I_face::Vector{Int}, J_face::Vector{Int}, cellid::Int, cell::AbstractCell, neighbor::Vector{Int}, neighborid::Int, neighbor_cell::AbstractCell)
     neighbor_face = neighbor_cell.nodes[neighbor]
     if getdim(neighbor_cell) == getdim(cell)
         neighbor_face_id = findfirst(x->issubset(x,neighbor_face), faces(neighbor_cell))
         face_neighbor = FaceIndex((neighborid, neighbor_face_id))
-    else
+    elseif getdim(neighbor_cell) == 2 && getdim(cell) == 3
         neighbor_face_id = findfirst(x->issubset(x,neighbor_face), edges(neighbor_cell))
         face_neighbor = EdgeIndex((neighborid, neighbor_face_id))
+    elseif getdim(neighbor_cell) == 1 && getdim(cell) == 3
+        neighbor_face_id = findfirst(x->issubset(x,neighbor_face), edges(neighbor_cell))
+        face_neighbor = VertexIndex((neighborid, neighbor_face_id))
     end
     cell_face_id = findfirst(x->issubset(x,neighbor_face),faces(cell))
     push!(V_face, EntityNeighborhood(face_neighbor))
@@ -325,14 +336,114 @@ function _face_neighbor!(V_face, I_face, J_face, cellid, cell, neighbor, neighbo
     push!(J_face, cell_face_id)
 end
 
-getcells(neighbor::EntityNeighborhood{T}) where T <: BoundaryIndex = first.(neighbor.neighbor_info)
-getcells(neighbor::EntityNeighborhood{CellIndex}) = getproperty.(neighbor.neighbor_info, :idx)
-getcells(neighbors::Vector{T}) where T <: EntityNeighborhood = reduce(vcat, getcells.(neighbors))
-getcells(neighbors::Vector{T}) where T <: BoundaryIndex = getindex.(neighbors,1)
+"""
+    CoverTopology(cells::Vector{C}) where C <: AbstractCell
 
-abstract type AbstractGrid{dim} end
+`CoverTopology` stores the intuitive neighborhood information of a grid. Here the 
+neighborhood is a set of similar entities which fully cover each other.
+"""
+struct CoverTopology <: AbstractTopology
+    # maps a global vertex id to all cells containing the vertex
+    vertex_to_cell::Dict{Int,Vector{Int}}
+    # index of the vector = cell id ->  all other connected cells
+    cell_neighbor::Vector{EntityNeighborhood{CellIndex}}
+    # face_neighbor[cellid,local_face_id] -> exclusive connected entities (not restricted to one entity)
+    face_neighbor::SparseMatrixCSC{EntityNeighborhood,Int}
+    # vertex_neighbor[cellid,local_vertex_id] -> exclusive connected entities to the given vertex
+    vertex_neighbor::SparseMatrixCSC{EntityNeighborhood,Int}
+    # edge_neighbor[cellid,local_edge_id] -> exclusive connected entities of the given edge
+    edge_neighbor::SparseMatrixCSC{EntityNeighborhood,Int}
+    # list of unique faces in the grid given as FaceIndex
+    face_skeleton::Vector{FaceIndex}
+end
 
-ExclusiveTopology(grid::AbstractGrid) = ExclusiveTopology(getcells(grid))
+"""
+"""
+function CoverTopology(cells::Vector{C}) where C <: AbstractCell
+    cell_vertices_table = vertices.(cells) #needs generic interface for <: AbstractCell
+    vertex_cell_table = Dict{Int,Vector{Int}}()
+
+    for (cellid, cell_nodes) in enumerate(cell_vertices_table)
+       for node in cell_nodes
+            if haskey(vertex_cell_table, node)
+                push!(vertex_cell_table[node], cellid)
+            else
+                vertex_cell_table[node] = [cellid]
+            end
+        end
+    end
+
+    I_face = Int[]; J_face = Int[]; V_face = EntityNeighborhood[]
+    I_edge = Int[]; J_edge = Int[]; V_edge = EntityNeighborhood[]
+    I_vertex = Int[]; J_vertex = Int[]; V_vertex = EntityNeighborhood[]
+    cell_neighbor_table = Vector{EntityNeighborhood{CellIndex}}(undef, length(cells))
+
+    for (cellid, cell) in enumerate(cells)
+        #cell neighborhood
+        cell_neighbors = getindex.((vertex_cell_table,), cell_vertices_table[cellid]) # cell -> vertex -> cell
+        cell_neighbors = unique(reduce(vcat,cell_neighbors)) # non unique list initially
+        filter!(x->x!=cellid, cell_neighbors) # get rid of self neighborhood
+        cell_neighbor_table[cellid] = EntityNeighborhood(CellIndex.(cell_neighbors))
+
+        for neighbor in cell_neighbors
+            neighbor_local_ids = findall(x->x in cell.nodes, cells[neighbor].nodes)
+            cell_local_ids = findall(x->x in cells[neighbor].nodes, cell.nodes)
+            # cells are connected via exactly one vertex
+            if length(cell_local_ids) == 1
+                _vertex_neighbor!(V_vertex, I_vertex, J_vertex, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
+            # cells are only connected via exactly one face
+            elseif length(cell_local_ids) == face_npoints(cell)
+                _face_neighbor!(V_face, I_face, J_face, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
+                # Add edges on face
+                if getdim(cell) > 2
+                    for cell_edge_nodes ∈ edges(cell)
+                        neighbor_edge_local_ids = findall(x->x ∈ cell_edge_nodes, cells[neighbor].nodes)
+                        if length(neighbor_edge_local_ids) == edge_npoints(cell)
+                            _edge_neighbor!(V_edge, I_edge, J_edge, cellid, cell, neighbor_edge_local_ids, neighbor, cells[neighbor])
+                        end
+                    end
+                end
+                # Add vertices on face
+                for cell_vertex_node ∈ vertices(cell)
+                    neighbor_vertex_local_id = findall(x->x == cell_vertex_node, cells[neighbor].nodes)
+                    if length(neighbor_vertex_local_id) == 1
+                        _vertex_neighbor!(V_vertex, I_vertex, J_vertex, cellid, cell, neighbor_vertex_local_id, neighbor, cells[neighbor])
+                    end
+                end
+            # cells are only connected via exactly one edge
+            elseif getdim(cell) > 2 && length(cell_local_ids) == edge_npoints(cell)
+                _edge_neighbor!(V_edge, I_edge, J_edge, cellid, cell, neighbor_local_ids, neighbor, cells[neighbor])
+                # Add vertices on edge
+                for cell_vertex_nodes ∈ vertices(cell)
+                    neighbor_vertex_local_id = findall(x->x == cell_vertex_nodes, cells[neighbor].nodes)
+                    if length(neighbor_vertex_local_id) == 1
+                        _vertex_neighbor!(V_vertex, I_vertex, J_vertex, cellid, cell, neighbor_vertex_local_id, neighbor, cells[neighbor])
+                    end
+                end
+            end
+        end
+    end
+
+    face_neighbor = sparse(I_face,J_face,V_face)
+    vertex_neighbor = sparse(I_vertex,J_vertex,V_vertex)
+    edge_neighbor = sparse(I_edge,J_edge,V_edge)
+
+    # Face Skeleton
+    face_skeleton_global = Set{NTuple}()
+    face_skeleton_local = Vector{FaceIndex}()
+    fs_length = length(face_skeleton_global)
+    for (cellid,cell) in enumerate(cells)
+        for (local_face_id,face) in enumerate(faces(cell))
+            push!(face_skeleton_global, sortface(face))
+            fs_length_new = length(face_skeleton_global)
+            if fs_length != fs_length_new
+                push!(face_skeleton_local, FaceIndex(cellid,local_face_id))
+                fs_length = fs_length_new
+            end
+        end
+    end
+    return CoverTopology(vertex_cell_table,cell_neighbor_table,face_neighbor,vertex_neighbor,edge_neighbor,face_skeleton_local)
+end
 
 """
     Grid{dim, C<:AbstractCell, T<:Real} <: AbstractGrid}
@@ -379,10 +490,10 @@ end
 # Grid utility functions #
 ##########################
 """
-    getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, cellidx::CellIndex, include_self=false)
-    getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
-    getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, vertexidx::VertexIndex, include_self=false)
-    getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, edgeidx::EdgeIndex, include_self=false)
+    getneighborhood(top::CoverTopology, grid::AbstractGrid, cellidx::CellIndex, include_self=false)
+    getneighborhood(top::CoverTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
+    getneighborhood(top::CoverTopology, grid::AbstractGrid, vertexidx::VertexIndex, include_self=false)
+    getneighborhood(top::CoverTopology, grid::AbstractGrid, edgeidx::EdgeIndex, include_self=false)
 
 Returns all directly connected entities of the same type, i.e. calling the function with a `VertexIndex` will return
 a list of directly connected vertices (connected via face/edge). If `include_self` is true, the given `*Index` is included
@@ -391,7 +502,7 @@ in the returned list.
 !!! warning
     This feature is highly experimental and very likely subjected to interface changes in the future.
 """
-function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, cellidx::CellIndex, include_self=false)
+function getneighborhood(top::CoverTopology, grid::AbstractGrid, cellidx::CellIndex, include_self=false)
     patch = getcells(top.cell_neighbor[cellidx.idx])
     if include_self
         return [patch; cellidx.idx]
@@ -400,8 +511,7 @@ function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, cellidx::Ce
     end
 end
 
-function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
-    # TODO cleaner solution...
+function getneighborhood(top::CoverTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
     data = faceidx[2] <= size(top.face_neighbor, 2) ? top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info : []
     if include_self
         return [data; faceidx]
@@ -410,49 +520,32 @@ function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, faceidx::Fa
     end
 end
 
-function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, vertexidx::VertexIndex, include_self=false)
-    cellid, local_vertexid = vertexidx[1], vertexidx[2]
-    cell_vertices = vertices(getcells(grid,cellid))
-    global_vertexid = cell_vertices[local_vertexid]
+function getneighborhood(top::CoverTopology, grid::AbstractGrid, vertexidx::VertexIndex, include_self=false)
     if include_self
-        vertex_to_cell = top.vertex_to_cell[global_vertexid]
-        self_reference_local = Vector{VertexIndex}(undef,length(vertex_to_cell))
-        for (i,cellid) in enumerate(vertex_to_cell)
-            local_vertex = VertexIndex(cellid,findfirst(x->x==global_vertexid,vertices(getcells(grid,cellid))))
-            self_reference_local[i] = local_vertex
-        end
-        return [top.vertex_vertex_neighbor[global_vertexid].neighbor_info; self_reference_local]
+        return [top.vertex_neighbor[vertexidx[1], vertexidx[2]].neighbor_info; vertexidx]
     else
-        return top.vertex_vertex_neighbor[global_vertexid].neighbor_info
+        return top.vertex_neighbor[vertexidx[1], vertexidx[2]].neighbor_info
     end
 end
 
-function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid{3}, edgeidx::EdgeIndex, include_self=false)
-    cellid, local_edgeidx = edgeidx[1], edgeidx[2]
-    cell_edges = edges(getcells(grid,cellid))
-    nonlocal_edgeid = cell_edges[local_edgeidx] 
-    cell_neighbors = getneighborhood(top,grid,CellIndex(cellid))
-    self_reference_local = EdgeIndex[]
-    for cellid in cell_neighbors
-        local_neighbor_edgeid = findfirst(x->issubset(x,nonlocal_edgeid),edges(getcells(grid,cellid)))
-        local_neighbor_edgeid === nothing && continue
-        local_edge = EdgeIndex(cellid,local_neighbor_edgeid)
-        push!(self_reference_local, local_edge)
-    end
-    if include_self  
-        return unique([top.edge_neighbor[cellid, local_edgeidx].neighbor_info; self_reference_local; edgeidx])
+function getneighborhood(top::CoverTopology, grid::AbstractGrid{3}, edgeidx::EdgeIndex, include_self=false)
+    if include_self
+        return [top.edge_neighbor[edgeidx[1], edgeidx[2]].neighbor_info; edgeidx]
     else
-        return unique([top.edge_neighbor[cellid, local_edgeidx].neighbor_info; self_reference_local])
+        return top.edge_neighbor[edgeidx[1], edgeidx[2]].neighbor_info
     end
 end
 
 
+CoverTopology(grid::AbstractGrid) = CoverTopology(getcells(grid))
+
+
 """
-    faceskeleton(grid) -> Vector{FaceIndex}
+    faceskeleton(topology, grid) -> Vector{FaceIndex}
 Returns an iterateable face skeleton. The skeleton consists of `FaceIndex` that can be used to `reinit`
 `FaceValues`.
 """
-faceskeleton(top::ExclusiveTopology, grid::AbstractGrid) =  top.face_skeleton
+faceskeleton(top::AbstractTopology, grid::AbstractGrid) =  top.face_skeleton
 
 """
     toglobal(grid::AbstractGrid, vertexidx::VertexIndex) -> Int
diff --git a/src/exports.jl b/src/exports.jl
index 1019f12150..123e03f257 100644
--- a/src/exports.jl
+++ b/src/exports.jl
@@ -59,6 +59,7 @@ export
     FaceIndex,
     EdgeIndex,
     VertexIndex,
+    CoverTopology,
     ExclusiveTopology,
     getneighborhood,
     faceskeleton,

From 1edc6570c4275418676a059e962e1949a87e6d2f Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Tue, 21 Feb 2023 15:19:56 +0100
Subject: [PATCH 118/124] Improve distribute mesh api.

---
 ext/FerriteHYPRE/conversion.jl          | 17 ++++++++++-------
 ext/FerriteMPI/DistributedDofHandler.jl |  9 ++++++---
 ext/FerriteMPI/DistributedGrid.jl       | 10 +++++-----
 ext/FerriteMPI/vtk-export.jl            |  6 +++---
 src/Grid/DistributedGrid.jl             |  6 +++---
 5 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/ext/FerriteHYPRE/conversion.jl b/ext/FerriteHYPRE/conversion.jl
index 43d5c97495..ffe4cf15af 100644
--- a/ext/FerriteHYPRE/conversion.jl
+++ b/ext/FerriteHYPRE/conversion.jl
@@ -26,7 +26,8 @@ function hypre_to_ferrite!(u::Vector{T}, uh::HYPREVector, dh::Ferrite.AbstractDo
 
     # TODO speed this up and better API
     dgrid = getglobalgrid(dh)
-    for (lvi, sv) ∈ get_shared_vertices(dgrid)
+    for sv ∈ get_shared_vertices(dgrid)
+        lvi = sv.local_idx
         my_rank != compute_owner(dgrid, sv) && continue
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_vertex_dofs(dh, field_idx, lvi)
@@ -42,11 +43,12 @@ function hypre_to_ferrite!(u::Vector{T}, uh::HYPREVector, dh::Ferrite.AbstractDo
         end
     end
 
-    for (lvi, se) ∈ get_shared_edges(dgrid)
+    for se ∈ get_shared_edges(dgrid)
+        lei = se.local_idx
         my_rank != compute_owner(dgrid, se) && continue
         for field_idx in 1:num_fields(dh)
-            if Ferrite.has_edge_dofs(dh, field_idx, lvi)
-                local_dofs = Ferrite.edge_dofs(dh, field_idx, lvi)
+            if Ferrite.has_edge_dofs(dh, field_idx, lei)
+                local_dofs = Ferrite.edge_dofs(dh, field_idx, lei)
                 global_dofs = dh.ldof_to_gdof[local_dofs]
                 for receiver_rank ∈ keys(remote_entities(se))
                     for i ∈ 1:length(global_dofs)
@@ -58,11 +60,12 @@ function hypre_to_ferrite!(u::Vector{T}, uh::HYPREVector, dh::Ferrite.AbstractDo
         end
     end
     
-    for (lvi, sf) ∈ get_shared_faces(dgrid)
+    for sf ∈ get_shared_faces(dgrid)
+        lfi = sf.local_idx
         my_rank != compute_owner(dgrid, sf) && continue
         for field_idx in 1:num_fields(dh)
-            if Ferrite.has_face_dofs(dh, field_idx, lvi)
-                local_dofs = Ferrite.face_dofs(dh, field_idx, lvi)
+            if Ferrite.has_face_dofs(dh, field_idx, lfi)
+                local_dofs = Ferrite.face_dofs(dh, field_idx, lfi)
                 global_dofs = dh.ldof_to_gdof[local_dofs]
                 for receiver_rank ∈ keys(remote_entities(sf))
                     for i ∈ 1:length(global_dofs)
diff --git a/ext/FerriteMPI/DistributedDofHandler.jl b/ext/FerriteMPI/DistributedDofHandler.jl
index 4cc01e766a..94c9525b67 100644
--- a/ext/FerriteMPI/DistributedDofHandler.jl
+++ b/ext/FerriteMPI/DistributedDofHandler.jl
@@ -79,7 +79,8 @@ function compute_dof_ownership(dh::DistributedDofHandler)
     dof_owner = Vector{Int}(undef,ndofs(dh))
     fill!(dof_owner, my_rank)
 
-    for (lvi, sv) ∈ get_shared_vertices(dgrid)
+    for sv ∈ get_shared_vertices(dgrid)
+        lvi = sv.local_idx
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_vertex_dofs(dh, field_idx, lvi)
                 local_dofs = Ferrite.vertex_dofs(dh, field_idx, lvi)
@@ -88,7 +89,8 @@ function compute_dof_ownership(dh::DistributedDofHandler)
         end
     end
 
-    for (lfi, sf) ∈ get_shared_faces(dgrid)
+    for sf ∈ get_shared_faces(dgrid)
+        lfi = sf.local_idx
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_face_dofs(dh, field_idx, lfi)
                 local_dofs = Ferrite.face_dofs(dh, field_idx, lfi)
@@ -97,7 +99,8 @@ function compute_dof_ownership(dh::DistributedDofHandler)
         end
     end
 
-    for (lei, se) ∈ get_shared_edges(dgrid)
+    for se ∈ get_shared_edges(dgrid)
+        lei = se.local_idx
         for field_idx in 1:num_fields(dh)
             if Ferrite.has_edge_dofs(dh, field_idx, lei)
                 local_dofs = Ferrite.edge_dofs(dh, field_idx, lei)
diff --git a/ext/FerriteMPI/DistributedGrid.jl b/ext/FerriteMPI/DistributedGrid.jl
index 655dfcba12..485ec3da94 100644
--- a/ext/FerriteMPI/DistributedGrid.jl
+++ b/ext/FerriteMPI/DistributedGrid.jl
@@ -65,11 +65,11 @@ Get the rank on the global communicator of the distributed grid.
 """
 """
 function DistributedGrid(grid_to_distribute::Grid{dim,C,T}; grid_comm::MPI.Comm = MPI.COMM_WORLD, partition_alg = :RECURSIVE) where {dim,C,T}
-    grid_topology = ExclusiveTopology(grid_to_distribute)
+    grid_topology = CoverTopology(grid_to_distribute)
     return DistributedGrid(grid_to_distribute, grid_topology, grid_comm; partition_alg=partition_alg)
 end
 
-function create_partitioning(grid::Grid{dim,C,T}, grid_topology::ExclusiveTopology, n_partitions, partition_alg) where {dim,C,T}
+function create_partitioning(grid::Grid{dim,C,T}, grid_topology::CoverTopology, n_partitions, partition_alg) where {dim,C,T}
     n_cells_global = getncells(grid)
     @assert n_cells_global > 0
     
@@ -104,18 +104,18 @@ end
 
 """
 """
-function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::CoverTopology, grid_comm::MPI.Comm; partition_alg = :RECURSIVE) where {dim,C,T}
     n_cells_global = getncells(grid_to_distribute)
     @assert n_cells_global > 0
 
     parts = create_partitioning(grid_to_distribute, grid_topology, MPI.Comm_size(grid_comm), partition_alg)
 
-    DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts)
+    DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::CoverTopology, grid_comm::MPI.Comm, parts)
 end
 
 """
 """    
-function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::ExclusiveTopology, grid_comm::MPI.Comm, parts::Vector{Int32}) where {dim,C,T}
+function DistributedGrid(grid_to_distribute::Grid{dim,C,T}, grid_topology::CoverTopology, grid_comm::MPI.Comm, parts::Vector{Int32}) where {dim,C,T}
     n_cells_global = getncells(grid_to_distribute)
     @assert n_cells_global > 0 # Empty input mesh...
 
diff --git a/ext/FerriteMPI/vtk-export.jl b/ext/FerriteMPI/vtk-export.jl
index b887d65e68..fde55b0557 100644
--- a/ext/FerriteMPI/vtk-export.jl
+++ b/ext/FerriteMPI/vtk-export.jl
@@ -27,7 +27,7 @@ function Ferrite.vtk_shared_vertices(vtk, dgrid::DistributedGrid)
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
     for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
         fill!(u, 0.0)
-        for sv ∈ values(get_shared_vertices(dgrid))
+        for sv ∈ get_shared_vertices(dgrid)
             if haskey(sv.remote_vertices, rank)
                 (cellidx, i) = sv.local_idx
                 cell = getcells(dgrid, cellidx)
@@ -47,7 +47,7 @@ function Ferrite.vtk_shared_faces(vtk, dgrid::DistributedGrid)
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
     for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
         fill!(u, 0.0)
-        for sf ∈ values(get_shared_faces(dgrid))
+        for sf ∈ get_shared_faces(dgrid)
             if haskey(sf.remote_faces, rank)
                 (cellidx, i) = sf.local_idx
                 cell = getcells(dgrid, cellidx)
@@ -68,7 +68,7 @@ function Ferrite.vtk_shared_edges(vtk, dgrid::DistributedGrid)
     my_rank = MPI.Comm_rank(global_comm(dgrid))+1
     for rank ∈ 1:MPI.Comm_size(global_comm(dgrid))
         fill!(u, 0.0)
-        for se ∈ values(get_shared_edges(dgrid))
+        for se ∈ get_shared_edges(dgrid)
             if haskey(se.remote_edges, rank)
                 (cellidx, i) = se.local_idx
                 cell = getcells(dgrid, cellidx)
diff --git a/src/Grid/DistributedGrid.jl b/src/Grid/DistributedGrid.jl
index 2592644703..5327e64192 100644
--- a/src/Grid/DistributedGrid.jl
+++ b/src/Grid/DistributedGrid.jl
@@ -15,9 +15,9 @@ remote_entities(::SharedEntity) = error("Not implemented.")
 
 """
 """
-@inline get_shared_vertices(dgrid::AbstractDistributedGrid) = dgrid.shared_vertices
-@inline get_shared_edges(dgrid::AbstractDistributedGrid) = dgrid.shared_edges
-@inline get_shared_faces(dgrid::AbstractDistributedGrid) = dgrid.shared_faces
+@inline get_shared_vertices(dgrid::AbstractDistributedGrid) = values(dgrid.shared_vertices)
+@inline get_shared_edges(dgrid::AbstractDistributedGrid) = values(dgrid.shared_edges)
+@inline get_shared_faces(dgrid::AbstractDistributedGrid) = values(dgrid.shared_faces)
 
 @inline get_shared_vertex(dgrid::AbstractDistributedGrid, vi::VertexIndex) = dgrid.shared_vertices[vi]
 @inline get_shared_edge(dgrid::AbstractDistributedGrid, ei::EdgeIndex) = dgrid.shared_edges[ei]

From 92b8e059a9453a61ec452d7e9c8df2b917502429 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Tue, 21 Feb 2023 15:20:44 +0100
Subject: [PATCH 119/124] Add test battery for new distributed modules.

---
 test/runtests.jl                |   1 +
 test/test_distributed.jl        |  23 +++++
 test/test_distributed_impl_2.jl |  54 +++++++++++
 test/test_distributed_impl_3.jl | 160 ++++++++++++++++++++++++++++++++
 test/test_distributed_impl_5.jl |  46 +++++++++
 test/test_mpi_distributed.jl    |  67 -------------
 6 files changed, 284 insertions(+), 67 deletions(-)
 create mode 100644 test/test_distributed.jl
 create mode 100644 test/test_distributed_impl_2.jl
 create mode 100644 test/test_distributed_impl_3.jl
 create mode 100644 test/test_distributed_impl_5.jl
 delete mode 100644 test/test_mpi_distributed.jl

diff --git a/test/runtests.jl b/test/runtests.jl
index 5c0e223d21..2b33e185b4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,5 +34,6 @@ include("test_pointevaluation.jl")
 include("test_apply_rhs.jl")
 include("test_apply_analytical.jl")
 HAS_EXTENSIONS && include("blockarrays.jl")
+HAS_EXTENSIONS && include("test_distributed.jl")
 include("test_examples.jl")
 @test all(x -> isdefined(Ferrite, x), names(Ferrite))  # Test that all exported symbols are defined
diff --git a/test/test_distributed.jl b/test/test_distributed.jl
new file mode 100644
index 0000000000..f6a4232428
--- /dev/null
+++ b/test/test_distributed.jl
@@ -0,0 +1,23 @@
+using MPI
+using Test
+
+@testset "FerriteMPI n=2" begin
+    n = 2  # number of processes
+    mpiexec() do exe  # MPI wrapper
+        run(`$exe -n $n $(Base.julia_cmd()) test_distributed_impl_2.jl`)
+    end
+end
+
+@testset "FerriteMPI n=3" begin
+    n = 3  # number of processes
+    mpiexec() do exe  # MPI wrapper
+        run(`$exe -n $n $(Base.julia_cmd()) test_distributed_impl_3.jl`)
+    end
+end
+
+@testset "FerriteMPI n=5" begin
+    n = 5  # number of processes
+    mpiexec() do exe  # MPI wrapper
+        run(`$exe -n $n $(Base.julia_cmd()) test_distributed_impl_5.jl`)
+    end
+end
diff --git a/test/test_distributed_impl_2.jl b/test/test_distributed_impl_2.jl
new file mode 100644
index 0000000000..4300da7d7a
--- /dev/null
+++ b/test/test_distributed_impl_2.jl
@@ -0,0 +1,54 @@
+using Ferrite, MPI, Metis
+using Test
+
+MPI.Init()
+@testset "setup check 2" begin
+    @test MPI.Comm_size(MPI.COMM_WORLD) == 2
+end
+
+FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
+
+@testset "distributed grid construction 2" begin
+    # We do not cover subcommunicators for now.
+    comm = MPI.COMM_WORLD
+
+    global_grid = generate_grid(Hexahedron, (2, 1, 1))
+    global_topology = CoverTopology(global_grid)
+    dgrid = FerriteMPI.DistributedGrid(global_grid, global_topology, comm, Int32[2, 1])
+    my_rank = Ferrite.global_rank(dgrid)
+    if my_rank == 1
+        # Edges
+        @test length(Ferrite.get_shared_edges(dgrid)) == 4
+        function check_edge_correctly_shared_1(idx_local, idx_nonlocal)
+            se = Ferrite.get_shared_edge(dgrid, idx_local)
+            @test Ferrite.remote_entities(se) == Dict(2 => [idx_nonlocal])
+        end
+        check_edge_correctly_shared_1(EdgeIndex(1,4), EdgeIndex(1,2))
+        check_edge_correctly_shared_1(EdgeIndex(1,12), EdgeIndex(1,10))
+        check_edge_correctly_shared_1(EdgeIndex(1,8), EdgeIndex(1,7))
+        check_edge_correctly_shared_1(EdgeIndex(1,5), EdgeIndex(1,6))
+
+        # Faces
+        @test length(Ferrite.get_shared_faces(dgrid)) == 1
+        sf = Ferrite.get_shared_face(dgrid, FaceIndex(1,5))
+        @test Ferrite.remote_entities(sf) == Dict(2 => [FaceIndex(1,3)])
+    elseif my_rank == 2
+        # Edges
+        @test length(Ferrite.get_shared_edges(dgrid)) == 4
+        function check_edge_correctly_shared_2(idx_nonlocal, idx_local)
+            se = Ferrite.get_shared_edge(dgrid, idx_local)
+            @test Ferrite.remote_entities(se) == Dict(1 => [idx_nonlocal])
+        end
+        check_edge_correctly_shared_2(EdgeIndex(1,4), EdgeIndex(1,2))
+        check_edge_correctly_shared_2(EdgeIndex(1,12), EdgeIndex(1,10))
+        check_edge_correctly_shared_2(EdgeIndex(1,8), EdgeIndex(1,7))
+        check_edge_correctly_shared_2(EdgeIndex(1,5), EdgeIndex(1,6))
+
+        # Faces
+        @test length(Ferrite.get_shared_faces(dgrid)) == 1
+        sf = Ferrite.get_shared_face(dgrid, FaceIndex(1,3))
+        @test Ferrite.remote_entities(sf) == Dict(1 => [FaceIndex(1,5)])
+    end
+    MPI.Finalize()
+end
+
diff --git a/test/test_distributed_impl_3.jl b/test/test_distributed_impl_3.jl
new file mode 100644
index 0000000000..1225c6378c
--- /dev/null
+++ b/test/test_distributed_impl_3.jl
@@ -0,0 +1,160 @@
+using Ferrite, MPI, Metis
+using Test
+
+MPI.Init()
+@testset "setup check 2" begin
+    @test MPI.Comm_size(MPI.COMM_WORLD) == 3
+end
+
+
+FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
+
+# In this test battery we check for several invariants
+# 1. Elements are not rotated or flipped during distribution
+# 2. Element index order is preserved during distribution
+# 3. Shared entities are setup correctly
+# Note that there is no strict ordering requirement on the shared entities!
+@testset "distributed grid construction 3" begin
+    # We do not cover subcommunicators for now.
+    comm = MPI.COMM_WORLD
+
+    # y
+    # ^
+    # |
+    # |
+    #  ----> x
+    #
+    # Global grid:
+    # +------+------+
+    # |   3  |   4  |
+    # +------+------+
+    # |   1  |   2  |
+    # +------+------+
+    # 
+    # Distributed grid:
+    # +------+------+
+    # | 2[3] | 1[2] |
+    # +------+------+
+    # | 1[1] | 1[3] |
+    # +------+------+
+    #
+    # With the notation "a[b]" where
+    # - a denotes the local element index 
+    # - b denotes the rank
+    #
+    @testset "Quadrilateral" begin
+        grid = generate_grid(Quadrilateral, (2,2))
+        topo = CoverTopology(grid)
+        dgrid = FerriteMPI.DistributedGrid(grid, topo, comm, Int32[1,3,3,2])
+
+        my_rank = Ferrite.global_rank(dgrid)
+        if my_rank == 1
+            lgrid = getlocalgrid(dgrid)
+            @test getncells(lgrid) == 1
+            non_shared_vertices = [VertexIndex(1,1)]
+            non_shared_faces = [FaceIndex(1,1), FaceIndex(1,4)]
+        elseif my_rank == 2
+            lgrid = getlocalgrid(dgrid)
+            @test getncells(lgrid) == 1
+            non_shared_vertices = [VertexIndex(1,3)]
+            non_shared_faces = [FaceIndex(1,2), FaceIndex(1,3)]
+        elseif my_rank == 3
+            lgrid = getlocalgrid(dgrid)
+            @test getncells(lgrid) == 2
+            non_shared_vertices = [VertexIndex(1,2), VertexIndex(2,4)]
+            non_shared_faces = [FaceIndex(1,1), FaceIndex(1,2), FaceIndex(2,3), FaceIndex(2,4)]
+        else
+            # Abstract machine or memory corruption during exectution above.
+            @test false
+        end
+
+        for sv ∈ get_shared_vertices(dgrid)
+            @test sv.local_idx ∉ Set(non_shared_vertices)
+        end
+        for v ∈ non_shared_vertices
+            @test !is_shared_vertex(dgrid, v)
+        end
+        for sf ∈ get_shared_faces(dgrid)
+            @test sf.local_idx ∉ Set(non_shared_faces)
+        end
+        for f ∈ non_shared_faces
+            @test !is_shared_face(dgrid, f)
+        end
+    end
+
+    # y
+    # ^  z
+    # | /
+    # |/
+    #  ----> x
+    # Global grid:
+    #     +------+------+
+    #    /|   3  |   4  |
+    #   + +------+------+
+    #   |/|   1  |   2  |
+    #   + +------+------+
+    #   |/      /      /
+    #   +------+------+
+    #  
+    # Distributed grid:
+    #     +------+------+
+    #    /| 2[3] | 1[2] |
+    #   + +------+------+
+    #   |/| 1[1] | 1[3] |
+    #   + +------+------+
+    #   |/      /      /
+    #   +------+------+
+    #
+    # With the notation "a[b]" where
+    # - a denotes the local element index 
+    # - b denotes the rank
+    #
+    @testset "Hexahedron" begin
+        grid = generate_grid(Hexahedron, (2,2,1))
+        topo = CoverTopology(grid)
+        dgrid = FerriteMPI.DistributedGrid(grid, topo, comm, Int32[1,3,3,2])
+
+        my_rank = Ferrite.global_rank(dgrid)
+        if my_rank == 1
+            lgrid = getlocalgrid(dgrid)
+            @test getncells(lgrid) == 1
+            non_shared_vertices = [VertexIndex(1,1), VertexIndex(1,5)]
+            non_shared_faces    = [FaceIndex(1,1), FaceIndex(1,6), FaceIndex(1,2), FaceIndex(1,5)]
+            non_shared_edges    = [EdgeIndex(1,1), EdgeIndex(1,4), EdgeIndex(1,5), EdgeIndex(1,9), EdgeIndex(1,12)]
+        elseif my_rank == 2
+            lgrid = getlocalgrid(dgrid)
+            @test getncells(lgrid) == 1
+            non_shared_vertices = [VertexIndex(1,3), VertexIndex(1,7)]
+            non_shared_faces    = [FaceIndex(1,1), FaceIndex(1,6), FaceIndex(1,3), FaceIndex(1,4)]
+            non_shared_edges    = [EdgeIndex(1,2), EdgeIndex(1,10), EdgeIndex(1,3), EdgeIndex(1,11), EdgeIndex(1,7)]
+        elseif my_rank == 3
+            lgrid = getlocalgrid(dgrid)
+            @test getncells(lgrid) == 2
+            non_shared_vertices = [VertexIndex(1,2), VertexIndex(1,6), VertexIndex(2,4), VertexIndex(2,8)]
+            non_shared_faces    = [FaceIndex(1,1), FaceIndex(1,6), FaceIndex(1,2), FaceIndex(1,3), FaceIndex(2,1), FaceIndex(2,6), FaceIndex(2,5), FaceIndex(2,4)]
+            non_shared_edges    = [EdgeIndex(1,1), EdgeIndex(1,9), EdgeIndex(1,6), EdgeIndex(1,2), EdgeIndex(1,10), EdgeIndex(2,4), EdgeIndex(2,12), EdgeIndex(2,8), EdgeIndex(2,3), EdgeIndex(2,11)]
+        else
+            # Abstract machine or memory corruption during exectution above.
+            @test false
+        end
+
+        for sv ∈ get_shared_vertices(dgrid)
+            @test sv.local_idx ∉ Set(non_shared_vertices)
+        end
+        for v ∈ non_shared_vertices
+            @test !is_shared_vertex(dgrid, v)
+        end
+        for sf ∈ get_shared_faces(dgrid)
+            @test sf.local_idx ∉ Set(non_shared_faces)
+        end
+        for f ∈ non_shared_faces
+            @test !is_shared_face(dgrid, f)
+        end
+        for se ∈ get_shared_edges(dgrid)
+            @test se.local_idx ∉ Set(non_shared_edges)
+        end
+        for e ∈ non_shared_edges
+            @test !is_shared_edge(dgrid, e)
+        end
+    end
+end
diff --git a/test/test_distributed_impl_5.jl b/test/test_distributed_impl_5.jl
new file mode 100644
index 0000000000..9d24ba5e53
--- /dev/null
+++ b/test/test_distributed_impl_5.jl
@@ -0,0 +1,46 @@
+using Ferrite, MPI, Metis
+using Test
+
+MPI.Init()
+@testset "setup check 2" begin
+    @test MPI.Comm_size(MPI.COMM_WORLD) == 5
+end
+
+FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
+
+# We arbitrarily test a quite hard case with many corner 
+# cases in 2D to catch regressions.
+@testset "distributed dof distribution 5" begin
+    # We do not cover subcommunicators for now.
+    comm = MPI.COMM_WORLD
+
+    dim = 2
+    ref = RefCube
+    ip = Lagrange{dim, ref, 1}()
+    global_grid = generate_grid(Quadrilateral, (3, 3))
+    global_topology = CoverTopology(global_grid)
+    dgrid = FerriteMPI.DistributedGrid(global_grid, global_topology, comm, Int32[3,3,4,2,5,4,1,2,5])
+    my_rank = Ferrite.global_rank(dgrid)
+    
+    dh = DofHandler(dgrid)
+    push!(dh, :u, 1, ip)
+    close!(dh);
+
+    @test length(dh.ldof_to_gdof) == length(dh.ldof_to_rank)
+    if my_rank == 1
+        @test dh.ldof_to_gdof == [1,2,3,4]
+        @test dh.ldof_to_rank == [1,1,1,1]
+    elseif my_rank == 2
+        @test dh.ldof_to_gdof == [5,6,2,1,7,8,3]
+        @test dh.ldof_to_rank == [2,2,1,1,2,2,1]
+    elseif my_rank == 3
+        @test dh.ldof_to_gdof == [9,10, 6, 5,11,12]
+        @test dh.ldof_to_rank == [3, 3, 2, 2, 3, 3]
+    elseif my_rank == 4
+        @test dh.ldof_to_gdof == [11,13,14,12,15, 7]
+        @test dh.ldof_to_rank == [ 3, 4, 4, 3, 4, 2]
+    elseif my_rank == 5
+        @test dh.ldof_to_gdof == [6,12, 7, 2,15,16, 8]
+        @test dh.ldof_to_rank == [2, 3, 2, 1, 4, 5, 2]
+    end
+end
diff --git a/test/test_mpi_distributed.jl b/test/test_mpi_distributed.jl
deleted file mode 100644
index 89a3bb7a6f..0000000000
--- a/test/test_mpi_distributed.jl
+++ /dev/null
@@ -1,67 +0,0 @@
-using Test, Ferrite, MPI
-
-# @testset "dof distribution" begin
-#     MPI.Init()
-#     my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
-
-#     dim = 2
-#     ref = RefCube
-#     ip = Lagrange{dim, ref, 1}()
-#     global_grid = generate_grid(Quadrilateral, (3, 3))
-#     global_topology = ExclusiveTopology(global_grid)
-#     dgrid = DistributedGrid(global_grid, global_topology, MPI.COMM_WORLD, Int32[3,3,4,2,5,4,1,2,5])
-
-#     dh = DistributedDofHandler(dgrid)
-#     push!(dh, :u, 1, ip)
-#     close!(dh);
-
-#     @test length(dh.ldof_to_gdof) == length(dh.ldof_to_rank)
-#     if my_rank == 1
-#         @test dh.ldof_to_gdof == [1,2,3,4]
-#         @test dh.ldof_to_rank == [1,1,1,1]
-#     elseif my_rank == 2
-#         @test dh.ldof_to_gdof == [5,6,2,1,7,8,3]
-#         @test dh.ldof_to_rank == [2,2,1,1,2,2,1]
-#     elseif my_rank == 3
-#         @test dh.ldof_to_gdof == [9,10, 6, 5,11,12]
-#         @test dh.ldof_to_rank == [3, 3, 2, 2, 3, 3]
-#     elseif my_rank == 4
-#         @test dh.ldof_to_gdof == [11,13,14,12,15, 7]
-#         @test dh.ldof_to_rank == [ 3, 4, 4, 3, 4, 2]
-#     elseif my_rank == 5
-#         @test dh.ldof_to_gdof == [6,12, 7, 2,15,16, 8]
-#         @test dh.ldof_to_rank == [2, 3, 2, 1, 4, 5, 2]
-#     end
-#     MPI.Finalize()
-# end
-
-@testset "distributed grid generation" begin
-    MPI.Init()
-    my_rank = MPI.Comm_rank(MPI.COMM_WORLD)+1
-
-    global_grid = generate_grid(Hexahedron, (2, 1, 1))
-    global_topology = ExclusiveTopology(global_grid)
-    dgrid = DistributedGrid(global_grid, global_topology, MPI.COMM_WORLD, Int32[2, 1])
-    if my_rank == 1
-        @test length(Ferrite.get_shared_edges(dgrid)) == 4
-        function check_edge_correctly_shared_1(idx_local, idx_nonlocal)
-            se = Ferrite.get_shared_edge(dgrid, idx_local)
-            @test Ferrite.remote_entities(se) == Dict(2 => [idx_nonlocal])
-        end
-        check_edge_correctly_shared_1(EdgeIndex(1,4), EdgeIndex(1,2))
-        check_edge_correctly_shared_1(EdgeIndex(1,9), EdgeIndex(1,10))
-        check_edge_correctly_shared_1(EdgeIndex(1,12), EdgeIndex(1,11))
-        check_edge_correctly_shared_1(EdgeIndex(1,8), EdgeIndex(1,6))
-    elseif my_rank == 2
-        @test length(Ferrite.get_shared_edges(dgrid)) == 4
-        function check_edge_correctly_shared_2(idx_nonlocal, idx_local)
-            se = Ferrite.get_shared_edge(dgrid, idx_local)
-            @test Ferrite.remote_entities(se) == Dict(1 => [idx_nonlocal])
-        end
-        check_edge_correctly_shared_2(EdgeIndex(1,4), EdgeIndex(1,2))
-        check_edge_correctly_shared_2(EdgeIndex(1,9), EdgeIndex(1,10))
-        check_edge_correctly_shared_2(EdgeIndex(1,12), EdgeIndex(1,11))
-        check_edge_correctly_shared_2(EdgeIndex(1,8), EdgeIndex(1,6))
-    end
-    MPI.Finalize()
-end

From 491f0ea4520b889927c33feab9615e4c6be110a8 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Tue, 21 Feb 2023 15:55:46 +0100
Subject: [PATCH 120/124] Fix oopsie.

---
 src/Grid/grid.jl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 9ecebccd08..bd78c9392c 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -489,6 +489,70 @@ end
 ##########################
 # Grid utility functions #
 ##########################
+"""
+    getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, cellidx::CellIndex, include_self=false)
+    getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
+    getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, vertexidx::VertexIndex, include_self=false)
+    getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, edgeidx::EdgeIndex, include_self=false)
+Returns all directly connected entities of the same type, i.e. calling the function with a `VertexIndex` will return
+a list of directly connected vertices (connected via face/edge). If `include_self` is true, the given `*Index` is included 
+in the returned list.
+!!! warning
+    This feature is highly experimental and very likely subjected to interface changes in the future.
+"""
+function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, cellidx::CellIndex, include_self=false)
+    patch = getcells(top.cell_neighbor[cellidx.idx])
+    if include_self
+        return [patch; cellidx.idx]
+    else 
+        return patch
+    end
+end
+
+function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)
+    if include_self 
+        return [top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info; faceidx]
+    else
+        return top.face_neighbor[faceidx[1],faceidx[2]].neighbor_info
+    end
+end
+
+function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid, vertexidx::VertexIndex, include_self=false)
+    cellid, local_vertexid = vertexidx[1], vertexidx[2]
+    cell_vertices = vertices(getcells(grid,cellid))
+    global_vertexid = cell_vertices[local_vertexid]
+    if include_self
+        vertex_to_cell = top.vertex_to_cell[global_vertexid]
+        self_reference_local = Vector{VertexIndex}(undef,length(vertex_to_cell))
+        for (i,cellid) in enumerate(vertex_to_cell)
+            local_vertex = VertexIndex(cellid,findfirst(x->x==global_vertexid,vertices(getcells(grid,cellid))))
+            self_reference_local[i] = local_vertex
+        end
+        return [top.vertex_vertex_neighbor[global_vertexid].neighbor_info; self_reference_local]
+    else
+        return top.vertex_vertex_neighbor[global_vertexid].neighbor_info
+    end
+end
+
+function getneighborhood(top::ExclusiveTopology, grid::AbstractGrid{3}, edgeidx::EdgeIndex, include_self=false)
+    cellid, local_edgeidx = edgeidx[1], edgeidx[2]
+    cell_edges = edges(getcells(grid,cellid))
+    nonlocal_edgeid = cell_edges[local_edgeidx] 
+    cell_neighbors = getneighborhood(top,grid,CellIndex(cellid))
+    self_reference_local = EdgeIndex[]
+    for cellid in cell_neighbors
+        local_neighbor_edgeid = findfirst(x->issubset(x,nonlocal_edgeid),edges(getcells(grid,cellid)))
+        local_neighbor_edgeid === nothing && continue
+        local_edge = EdgeIndex(cellid,local_neighbor_edgeid)
+        push!(self_reference_local, local_edge)
+    end
+    if include_self  
+        return unique([top.edge_neighbor[cellid, local_edgeidx].neighbor_info; self_reference_local; edgeidx])
+    else
+        return unique([top.edge_neighbor[cellid, local_edgeidx].neighbor_info; self_reference_local])
+    end
+end
+
 """
     getneighborhood(top::CoverTopology, grid::AbstractGrid, cellidx::CellIndex, include_self=false)
     getneighborhood(top::CoverTopology, grid::AbstractGrid, faceidx::FaceIndex, include_self=false)

From 0b4c7e7cdb8ad214dc48cac73eecefe410566d39 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Tue, 21 Feb 2023 16:34:06 +0100
Subject: [PATCH 121/124] Revert changes to face neighborhood construction
 helper.

---
 src/Grid/grid.jl | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index bd78c9392c..392e208c2b 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -323,12 +323,9 @@ function _face_neighbor!(V_face::Vector{EntityNeighborhood}, I_face::Vector{Int}
     if getdim(neighbor_cell) == getdim(cell)
         neighbor_face_id = findfirst(x->issubset(x,neighbor_face), faces(neighbor_cell))
         face_neighbor = FaceIndex((neighborid, neighbor_face_id))
-    elseif getdim(neighbor_cell) == 2 && getdim(cell) == 3
+    else
         neighbor_face_id = findfirst(x->issubset(x,neighbor_face), edges(neighbor_cell))
         face_neighbor = EdgeIndex((neighborid, neighbor_face_id))
-    elseif getdim(neighbor_cell) == 1 && getdim(cell) == 3
-        neighbor_face_id = findfirst(x->issubset(x,neighbor_face), edges(neighbor_cell))
-        face_neighbor = VertexIndex((neighborid, neighbor_face_id))
     end
     cell_face_id = findfirst(x->issubset(x,neighbor_face),faces(cell))
     push!(V_face, EntityNeighborhood(face_neighbor))

From 27bfeb29e56172c3ca3eb7781e715832a334ac2e Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Tue, 21 Feb 2023 16:34:43 +0100
Subject: [PATCH 122/124] Fix remaining inconsistency between geometry and
 algebra in hexahedral elements.

---
 src/Grid/grid.jl                | 44 +++++++++++++--
 src/interpolations.jl           | 98 +++++++++++++++++++++++++++++----
 test/test_distributed_impl_2.jl | 12 ++--
 test/test_distributed_impl_3.jl |  6 +-
 test/test_distributed_impl_5.jl |  2 +-
 5 files changed, 138 insertions(+), 24 deletions(-)

diff --git a/src/Grid/grid.jl b/src/Grid/grid.jl
index 392e208c2b..e9efa909d6 100644
--- a/src/Grid/grid.jl
+++ b/src/Grid/grid.jl
@@ -971,6 +971,7 @@ end
 # Functions to uniquely identify vertices, edges and faces, used when distributing
 # dofs over a mesh. For this we can ignore the nodes on edged, faces and inside cells,
 # we only need to use the nodes that are vertices.
+# NOTE: These are required to be consistent with the corresponding geometric default interpolation.
 # 1D: vertices
 faces(c::Union{Line,QuadraticLine}) = (c.nodes[1], c.nodes[2])
 vertices(c::Union{Line,Line2D,Line3D,QuadraticLine}) = (c.nodes[1], c.nodes[2])
@@ -983,11 +984,46 @@ faces(c::Union{Quadrilateral,QuadraticQuadrilateral}) = ((c.nodes[1],c.nodes[2])
 # 3D: vertices, edges, faces
 edges(c::Line3D) = ((c.nodes[1],c.nodes[2]),)
 vertices(c::Union{Tetrahedron,QuadraticTetrahedron}) = (c.nodes[1], c.nodes[2], c.nodes[3], c.nodes[4])
-edges(c::Union{Tetrahedron,QuadraticTetrahedron}) = ((c.nodes[1],c.nodes[2]), (c.nodes[2],c.nodes[3]), (c.nodes[3],c.nodes[1]), (c.nodes[1],c.nodes[4]), (c.nodes[2],c.nodes[4]), (c.nodes[3],c.nodes[4]))
-faces(c::Union{Tetrahedron,QuadraticTetrahedron}) = ((c.nodes[1],c.nodes[3],c.nodes[2]), (c.nodes[1],c.nodes[2],c.nodes[4]), (c.nodes[2],c.nodes[3],c.nodes[4]), (c.nodes[1],c.nodes[4],c.nodes[3]))
+edges(c::Union{Tetrahedron,QuadraticTetrahedron}) = (
+    (c.nodes[1],c.nodes[2]), # Local edge index 1
+    (c.nodes[2],c.nodes[3]),
+    (c.nodes[3],c.nodes[1]),
+    (c.nodes[1],c.nodes[4]),
+    (c.nodes[2],c.nodes[4]),
+    (c.nodes[3],c.nodes[4])  # Local edge index 6
+)
+faces(c::Union{Tetrahedron,QuadraticTetrahedron}) = (
+    (c.nodes[1],c.nodes[3],c.nodes[2]), # Local face index 1
+    (c.nodes[1],c.nodes[2],c.nodes[4]),
+    (c.nodes[2],c.nodes[3],c.nodes[4]),
+    (c.nodes[1],c.nodes[4],c.nodes[3])  # Local face index 4
+)
+
+
 vertices(c::Union{Hexahedron,Cell{3,20,6}}) = (c.nodes[1], c.nodes[2], c.nodes[3], c.nodes[4], c.nodes[5], c.nodes[6], c.nodes[7], c.nodes[8])
-edges(c::Union{Hexahedron,Cell{3,20,6}}) = ((c.nodes[1],c.nodes[2]), (c.nodes[2],c.nodes[3]), (c.nodes[3],c.nodes[4]), (c.nodes[4],c.nodes[1]), (c.nodes[5],c.nodes[6]), (c.nodes[6],c.nodes[7]), (c.nodes[7],c.nodes[8]), (c.nodes[8],c.nodes[5]), (c.nodes[1],c.nodes[5]), (c.nodes[2],c.nodes[6]), (c.nodes[3],c.nodes[7]), (c.nodes[4],c.nodes[8]))
-faces(c::Union{Hexahedron,Cell{3,20,6}}) = ((c.nodes[1],c.nodes[4],c.nodes[3],c.nodes[2]), (c.nodes[1],c.nodes[2],c.nodes[6],c.nodes[5]), (c.nodes[2],c.nodes[3],c.nodes[7],c.nodes[6]), (c.nodes[3],c.nodes[4],c.nodes[8],c.nodes[7]), (c.nodes[1],c.nodes[5],c.nodes[8],c.nodes[4]), (c.nodes[5],c.nodes[6],c.nodes[7],c.nodes[8]))
+edges(c::Union{Hexahedron,Cell{3,20,6}}) = (
+    (c.nodes[1],c.nodes[2]), # Local edge index 1
+    (c.nodes[2],c.nodes[3]),
+    (c.nodes[3],c.nodes[4]),
+    (c.nodes[4],c.nodes[1]),
+    (c.nodes[5],c.nodes[6]),
+    (c.nodes[6],c.nodes[7]),
+    (c.nodes[7],c.nodes[8]),
+    (c.nodes[8],c.nodes[5]),
+    (c.nodes[1],c.nodes[5]),
+    (c.nodes[2],c.nodes[6]),
+    (c.nodes[3],c.nodes[7]),
+    (c.nodes[4],c.nodes[8])  # Local edge index 12
+)
+faces(c::Union{Hexahedron,Cell{3,20,6}}) = (
+    (c.nodes[1],c.nodes[4],c.nodes[3],c.nodes[2]), # Local face index 1
+    (c.nodes[1],c.nodes[2],c.nodes[6],c.nodes[5]),
+    (c.nodes[2],c.nodes[3],c.nodes[7],c.nodes[6]),
+    (c.nodes[3],c.nodes[4],c.nodes[8],c.nodes[7]),
+    (c.nodes[1],c.nodes[5],c.nodes[8],c.nodes[4]),
+    (c.nodes[5],c.nodes[6],c.nodes[7],c.nodes[8])  # Local face index 6
+)
+
 edges(c::Union{Quadrilateral3D}) = ((c.nodes[1],c.nodes[2]), (c.nodes[2],c.nodes[3]), (c.nodes[3],c.nodes[4]), (c.nodes[4],c.nodes[1]))
 faces(c::Union{Quadrilateral3D}) = ((c.nodes[1],c.nodes[2],c.nodes[3],c.nodes[4]),)
 
diff --git a/src/interpolations.jl b/src/interpolations.jl
index 94568d13dd..bd5479d2d2 100644
--- a/src/interpolations.jl
+++ b/src/interpolations.jl
@@ -437,8 +437,19 @@ end
 getnbasefunctions(::Lagrange{3,RefTetrahedron,1}) = 4
 nvertexdofs(::Lagrange{3,RefTetrahedron,1}) = 1
 
-faces(::Lagrange{3,RefTetrahedron,1}) = ((1,3,2), (1,2,4), (2,3,4), (1,4,3))
-edges(::Lagrange{3,RefTetrahedron,1}) = ((1,2), (2,3), (3,1), (1,4), (2,4), (3,4))
+faces(::Lagrange{3,RefTetrahedron,1}) = (
+    (1,3,2),
+    (1,2,4),
+    (2,3,4),
+    (1,4,3))
+edges(::Lagrange{3,RefTetrahedron,1}) = (
+    (1,2),
+    (2,3),
+    (3,1),
+    (1,4),
+    (2,4),
+    (3,4)
+)
 
 function reference_coordinates(::Lagrange{3,RefTetrahedron,1})
     return [Vec{3, Float64}((0.0, 0.0, 0.0)),
@@ -465,8 +476,20 @@ getnbasefunctions(::Lagrange{3,RefTetrahedron,2}) = 10
 nvertexdofs(::Lagrange{3,RefTetrahedron,2}) = 1
 nedgedofs(::Lagrange{3,RefTetrahedron,2}) = 1
 
-faces(::Lagrange{3,RefTetrahedron,2}) = ((1,3,2,7,6,5), (1,2,4,5,9,8), (2,3,4,6,10,9), (1,4,3,8,10,7))
-edges(::Lagrange{3,RefTetrahedron,2}) = ((1,2,5), (2,3,6), (3,1,7), (1,4,8), (2,4,9), (3,4,10))
+faces(::Lagrange{3,RefTetrahedron,2}) = (
+    (1,3,2, 7, 6,5),
+    (1,2,4, 5, 9,8),
+    (2,3,4, 6,10,9),
+    (1,4,3, 8,10,7)
+)
+edges(::Lagrange{3,RefTetrahedron,2}) = (
+    (1,2,  5),
+    (2,3,  6),
+    (3,1,  7),
+    (1,4,  8),
+    (2,4,  9),
+    (3,4, 10)
+)
 
 function reference_coordinates(::Lagrange{3,RefTetrahedron,2})
     return [Vec{3, Float64}((0.0, 0.0, 0.0)),
@@ -506,8 +529,29 @@ end
 getnbasefunctions(::Lagrange{3,RefCube,1}) = 8
 nvertexdofs(::Lagrange{3,RefCube,1}) = 1
 
-faces(::Lagrange{3,RefCube,1}) = ((1,4,3,2), (1,2,6,5), (2,3,7,6), (3,4,8,7), (1,5,8,4), (5,6,7,8))
-edges(::Lagrange{3,RefCube,1}) = ((1,2), (2,3), (3,4), (4,1), (1,5), (2,6), (3,7), (4,8), (5,6), (6,7), (7,8), (8,5))
+faces(::Lagrange{3,RefCube,1}) = (
+    (1,4,3,2), # Local face index 1
+    (1,2,6,5),
+    (2,3,7,6),
+    (3,4,8,7),
+    (1,5,8,4),
+    (5,6,7,8)  # Local face index 6
+)
+
+edges(::Lagrange{3,RefCube,1}) = (
+    (1,2),  # Local edge index 1
+    (2,3), 
+    (3,4),
+    (4,1),
+    (5,6),
+    (6,7),
+    (7,8),
+    (8,5),
+    (1,5),
+    (2,6),
+    (3,7),
+    (4,8)   # Local edge index 12
+)
 
 function reference_coordinates(::Lagrange{3,RefCube,1})
     return [Vec{3, Float64}((-1.0, -1.0, -1.0)),
@@ -547,14 +591,27 @@ nfacedofs(::Lagrange{3,RefCube,2}) = 1
 ncelldofs(::Lagrange{3,RefCube,2}) = 1
 
 faces(::Lagrange{3,RefCube,2}) = (
-    (1,4,3,2, 12,11,10,9, 21),
+    (1,4,3,2, 12,11,10,9, 21),  # Local face index 1
     (1,2,6,5, 9,18,13,17, 22),
     (2,3,7,6, 10,19,14,18, 23),
     (3,4,8,7, 11,20,15,19, 24),
     (1,5,8,4, 17,16,20,12, 25),
-    (5,6,7,8, 13,14,15,16, 26),
+    (5,6,7,8, 13,14,15,16, 26), # Local face index 6
+)
+edges(::Lagrange{3,RefCube,2}) = (
+    (1,2,  9), # Local edge index 1
+    (2,3, 10),
+    (3,4, 11),
+    (4,1, 12),
+    (5,6, 13),
+    (6,7, 14),
+    (7,8, 15),
+    (8,5, 16),
+    (1,5, 17),
+    (2,6, 18),
+    (3,7, 19),
+    (4,8, 20)  # Local edge index 12
 )
-edges(::Lagrange{3,RefCube,2}) = ((1,2, 9), (2,3, 10), (3,4, 11), (4,1, 12), (1,5, 17), (2,6, 18), (3,7, 19), (4,8, 20), (5,6, 13), (6,7, 14), (7,8, 15), (8,5, 16))
 
 function reference_coordinates(::Lagrange{3,RefCube,2})
            # vertex
@@ -716,7 +773,28 @@ getlowerorder(::Serendipity{3,RefCube,2}) = Lagrange{3,RefCube,1}()
 nvertexdofs(::Serendipity{3,RefCube,2}) = 1
 nedgedofs(::Serendipity{3,RefCube,2}) = 1
 
-faces(::Serendipity{3,RefCube,2}) = ((1,4,3,2,12,11,10,9), (1,2,6,5,9,18,13,17), (2,3,7,6,10,19,14,18), (3,4,8,7,11,20,15,19), (1,5,8,4,17,16,20,12), (5,6,7,8,13,14,15,16))
+faces(::Serendipity{3,RefCube,2}) = (
+    (1,4,3,2, 12,11,10, 9),
+    (1,2,6,5,  9,18,13,17),
+    (2,3,7,6, 10,19,14,18),
+    (3,4,8,7, 11,20,15,19),
+    (1,5,8,4, 17,16,20,12),
+    (5,6,7,8, 13,14,15,16)
+)
+edges(::Serendipity{3,RefCube,2}) = (
+    (1,2,  9), # Local edge index 1
+    (2,3, 10),
+    (3,4, 11),
+    (4,1, 12),
+    (5,6, 13),
+    (6,7, 14),
+    (7,8, 15),
+    (8,5, 16),
+    (1,5, 17),
+    (2,6, 18),
+    (3,7, 19),
+    (4,8, 20)  # Local edge index 12
+)
 
 function reference_coordinates(::Serendipity{3,RefCube,2})
     return [Vec{3, Float64}((-1.0, -1.0, -1.0)),
diff --git a/test/test_distributed_impl_2.jl b/test/test_distributed_impl_2.jl
index 4300da7d7a..5f2beeb8f5 100644
--- a/test/test_distributed_impl_2.jl
+++ b/test/test_distributed_impl_2.jl
@@ -24,9 +24,9 @@ FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
             @test Ferrite.remote_entities(se) == Dict(2 => [idx_nonlocal])
         end
         check_edge_correctly_shared_1(EdgeIndex(1,4), EdgeIndex(1,2))
-        check_edge_correctly_shared_1(EdgeIndex(1,12), EdgeIndex(1,10))
-        check_edge_correctly_shared_1(EdgeIndex(1,8), EdgeIndex(1,7))
-        check_edge_correctly_shared_1(EdgeIndex(1,5), EdgeIndex(1,6))
+        check_edge_correctly_shared_1(EdgeIndex(1,12), EdgeIndex(1,11))
+        check_edge_correctly_shared_1(EdgeIndex(1,9), EdgeIndex(1,10))
+        check_edge_correctly_shared_1(EdgeIndex(1,8), EdgeIndex(1,6))
 
         # Faces
         @test length(Ferrite.get_shared_faces(dgrid)) == 1
@@ -40,9 +40,9 @@ FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
             @test Ferrite.remote_entities(se) == Dict(1 => [idx_nonlocal])
         end
         check_edge_correctly_shared_2(EdgeIndex(1,4), EdgeIndex(1,2))
-        check_edge_correctly_shared_2(EdgeIndex(1,12), EdgeIndex(1,10))
-        check_edge_correctly_shared_2(EdgeIndex(1,8), EdgeIndex(1,7))
-        check_edge_correctly_shared_2(EdgeIndex(1,5), EdgeIndex(1,6))
+        check_edge_correctly_shared_2(EdgeIndex(1,12), EdgeIndex(1,11))
+        check_edge_correctly_shared_2(EdgeIndex(1,9), EdgeIndex(1,10))
+        check_edge_correctly_shared_2(EdgeIndex(1,8), EdgeIndex(1,6))
 
         # Faces
         @test length(Ferrite.get_shared_faces(dgrid)) == 1
diff --git a/test/test_distributed_impl_3.jl b/test/test_distributed_impl_3.jl
index 1225c6378c..dcbb275d7b 100644
--- a/test/test_distributed_impl_3.jl
+++ b/test/test_distributed_impl_3.jl
@@ -120,19 +120,19 @@ FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
             @test getncells(lgrid) == 1
             non_shared_vertices = [VertexIndex(1,1), VertexIndex(1,5)]
             non_shared_faces    = [FaceIndex(1,1), FaceIndex(1,6), FaceIndex(1,2), FaceIndex(1,5)]
-            non_shared_edges    = [EdgeIndex(1,1), EdgeIndex(1,4), EdgeIndex(1,5), EdgeIndex(1,9), EdgeIndex(1,12)]
+            non_shared_edges    = [EdgeIndex(1,1), EdgeIndex(1,4), EdgeIndex(1,5), EdgeIndex(1,8), EdgeIndex(1,9)]
         elseif my_rank == 2
             lgrid = getlocalgrid(dgrid)
             @test getncells(lgrid) == 1
             non_shared_vertices = [VertexIndex(1,3), VertexIndex(1,7)]
             non_shared_faces    = [FaceIndex(1,1), FaceIndex(1,6), FaceIndex(1,3), FaceIndex(1,4)]
-            non_shared_edges    = [EdgeIndex(1,2), EdgeIndex(1,10), EdgeIndex(1,3), EdgeIndex(1,11), EdgeIndex(1,7)]
+            non_shared_edges    = [EdgeIndex(1,2), EdgeIndex(1,6), EdgeIndex(1,3), EdgeIndex(1,7), EdgeIndex(1,11)]
         elseif my_rank == 3
             lgrid = getlocalgrid(dgrid)
             @test getncells(lgrid) == 2
             non_shared_vertices = [VertexIndex(1,2), VertexIndex(1,6), VertexIndex(2,4), VertexIndex(2,8)]
             non_shared_faces    = [FaceIndex(1,1), FaceIndex(1,6), FaceIndex(1,2), FaceIndex(1,3), FaceIndex(2,1), FaceIndex(2,6), FaceIndex(2,5), FaceIndex(2,4)]
-            non_shared_edges    = [EdgeIndex(1,1), EdgeIndex(1,9), EdgeIndex(1,6), EdgeIndex(1,2), EdgeIndex(1,10), EdgeIndex(2,4), EdgeIndex(2,12), EdgeIndex(2,8), EdgeIndex(2,3), EdgeIndex(2,11)]
+            non_shared_edges    = [EdgeIndex(1,1), EdgeIndex(1,5), EdgeIndex(1,2), EdgeIndex(1,6), EdgeIndex(1,10), EdgeIndex(2,4), EdgeIndex(2,8), EdgeIndex(2,3), EdgeIndex(2,7), EdgeIndex(2,12)]
         else
             # Abstract machine or memory corruption during exectution above.
             @test false
diff --git a/test/test_distributed_impl_5.jl b/test/test_distributed_impl_5.jl
index 9d24ba5e53..53a735711b 100644
--- a/test/test_distributed_impl_5.jl
+++ b/test/test_distributed_impl_5.jl
@@ -23,7 +23,7 @@ FerriteMPI = Base.get_extension(Ferrite, :FerriteMPI)
     my_rank = Ferrite.global_rank(dgrid)
     
     dh = DofHandler(dgrid)
-    push!(dh, :u, 1, ip)
+    add!(dh, :u, 1, ip)
     close!(dh);
 
     @test length(dh.ldof_to_gdof) == length(dh.ldof_to_rank)

From 26ac8655039ef155669ff270f0237b085ad7ac52 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Tue, 21 Feb 2023 16:41:36 +0100
Subject: [PATCH 123/124] Fix deprecation warnings.

---
 src/Dofs/ConstraintHandler.jl    | 2 +-
 test/test_grid_dofhandler_vtk.jl | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Dofs/ConstraintHandler.jl b/src/Dofs/ConstraintHandler.jl
index 1c1081af96..9b93a9ee25 100644
--- a/src/Dofs/ConstraintHandler.jl
+++ b/src/Dofs/ConstraintHandler.jl
@@ -513,7 +513,7 @@ function WriteVTK.vtk_point_data(vtkfile, ch::ConstraintHandler)
     unique!(unique_fields)
 
     for field in unique_fields
-        nd = ndim(ch.dh, field)
+        nd = getfielddim(ch.dh, field)
         data = zeros(Float64, nd, getnnodes(getgrid(ch.dh)))
         for dbc in ch.dbcs
             dbc.field_name != field && continue
diff --git a/test/test_grid_dofhandler_vtk.jl b/test/test_grid_dofhandler_vtk.jl
index a36b4ee520..7a32dad0c0 100644
--- a/test/test_grid_dofhandler_vtk.jl
+++ b/test/test_grid_dofhandler_vtk.jl
@@ -484,10 +484,10 @@ end
     # Consistency check for dof computation.
     grid = generate_grid(Hexahedron, (2, 2, 2))
     dh = DofHandler(grid)
-    push!(dh, :u, 3, Lagrange{3,RefCube,2}())
-    push!(dh, :v, 1, Lagrange{3,RefCube,2}())
-    push!(dh, :w, 3, Lagrange{3,RefCube,1}())
-    push!(dh, :x, 3, Lagrange{3,RefCube,2}())
+    add!(dh, :u, 3, Lagrange{3,RefCube,2}())
+    add!(dh, :v, 1, Lagrange{3,RefCube,2}())
+    add!(dh, :w, 3, Lagrange{3,RefCube,1}())
+    add!(dh, :x, 3, Lagrange{3,RefCube,2}())
     _, vertexdicts, edgedicts, facedicts = Ferrite.__close!(dh)
     @test Ferrite.find_field(dh, :u) == 1
     @test Ferrite.find_field(dh, :v) == 2

From 3696fdfedb9a30d5e11745e7d308a73c7bd444a8 Mon Sep 17 00:00:00 2001
From: Dennis Ogiermann <dennis.ogiermann@ruhr-uni-bochum.de>
Date: Tue, 21 Feb 2023 16:56:22 +0100
Subject: [PATCH 124/124] Remove Pkg dep from docs.

---
 docs/Manifest.toml | 12 ++++++------
 docs/Project.toml  |  1 -
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/Manifest.toml b/docs/Manifest.toml
index 3bdbd9148b..508f5d9924 100644
--- a/docs/Manifest.toml
+++ b/docs/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.9.0-beta4"
 manifest_format = "2.0"
-project_hash = "27e3c9a6b7b6a6a2a2a32de6779b15088202d916"
+project_hash = "afd7489977add81b3c289383a98aa47a3a1e7ea2"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
@@ -1046,10 +1046,10 @@ uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 version = "1.4.1"
 
 [[deps.OrdinaryDiffEq]]
-deps = ["Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "IfElse", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLNLSolve", "SimpleNonlinearSolve", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrays", "UnPack"]
-git-tree-sha1 = "3b98b39987fecc8c8c94f58b51d67190097b0b64"
+deps = ["Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DocStringExtensions", "ExponentialUtilities", "FastBroadcast", "FastClosures", "FiniteDiff", "ForwardDiff", "FunctionWrappersWrappers", "IfElse", "LinearAlgebra", "LinearSolve", "Logging", "LoopVectorization", "MacroTools", "MuladdMacro", "NLsolve", "NonlinearSolve", "Polyester", "PreallocationTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLNLSolve", "SimpleNonlinearSolve", "SnoopPrecompile", "SparseArrays", "SparseDiffTools", "StaticArrayInterface", "StaticArrays", "UnPack"]
+git-tree-sha1 = "a364df19a43c4a9520eeca693aa2e77b679a2b0c"
 uuid = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
-version = "6.46.0"
+version = "6.47.0"
 
 [[deps.PCRE2_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -1250,9 +1250,9 @@ version = "0.6.38"
 
 [[deps.SciMLBase]]
 deps = ["ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Preferences", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables"]
-git-tree-sha1 = "fd2a15854af0ba1542b89efa24512b0377e7e37d"
+git-tree-sha1 = "33f031423eedc1f9e43f6112da6f13d5b49ea7da"
 uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
-version = "1.86.1"
+version = "1.86.2"
 
 [[deps.SciMLNLSolve]]
 deps = ["DiffEqBase", "LineSearches", "NLsolve", "Reexport", "SciMLBase"]
diff --git a/docs/Project.toml b/docs/Project.toml
index 12eec41f15..c177d3b9d6 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -18,7 +18,6 @@ Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"