diff --git a/PartitionedSolvers/src/PartitionedSolvers.jl b/PartitionedSolvers/src/PartitionedSolvers.jl index bf08d25a..768adf7f 100644 --- a/PartitionedSolvers/src/PartitionedSolvers.jl +++ b/PartitionedSolvers/src/PartitionedSolvers.jl @@ -29,6 +29,7 @@ export smoothed_aggregation export v_cycle export w_cycle export amg_level_params +export amg_level_params_linear_elasticity export amg_fine_params export amg_coarse_params export amg_statistics diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 499a5230..83a43f36 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -145,7 +145,6 @@ function constant_prolongator(node_to_aggregate,aggregates,n_nullspace_vecs) nnodes = length(node_to_aggregate) pending = typeof_aggregate(0) isolated = typeof_aggregate(-1) - nnodes = length(node_to_aggregate) naggregates = length(aggregates) aggregate_to_nodes_ptrs = zeros(Int,naggregates+1) for node in 1:nnodes @@ -198,6 +197,54 @@ function constant_prolongator(node_to_aggregate::PVector,aggregates::PRange,n_nu P0 end +function collect_nodes_in_aggregate(node_to_aggregate::PVector,aggregates::PRange) + aggregate_to_nodes_partition = map(own_values(node_to_aggregate), partition(aggregates)) do own_node_to_global_aggregate, my_aggregates + # convert to local ids + global_to_local_aggregate = global_to_local(my_aggregates) + local_aggregates = global_to_local_aggregate[my_aggregates] + @assert all(local_aggregates .!= 0) + n_local_aggregates = length(local_aggregates) + own_node_to_local_aggregate = map(own_node_to_global_aggregate) do global_aggregate + global_to_local_aggregate[global_aggregate] + end + local_aggregate_to_local_nodes = collect_nodes_in_aggregate(own_node_to_local_aggregate, 1:n_local_aggregates) + local_aggregate_to_local_nodes + end + PVector(aggregate_to_nodes_partition, partition(aggregates)) +end + +function collect_nodes_in_aggregate(node_to_aggregate,aggregates) + typeof_aggregate = eltype(node_to_aggregate) + nnodes = length(node_to_aggregate) + pending = typeof_aggregate(0) + isolated = typeof_aggregate(-1) + nnodes = length(node_to_aggregate) + naggregates = length(aggregates) + aggregate_to_nodes_ptrs = zeros(Int,naggregates+1) + for node in 1:nnodes + agg = node_to_aggregate[node] + if agg == pending || agg == isolated + continue + end + aggregate_to_nodes_ptrs[agg+1] += 1 + end + length_to_ptrs!(aggregate_to_nodes_ptrs) + ndata = aggregate_to_nodes_ptrs[end]-1 + aggregate_to_nodes_data = zeros(Int,ndata) + for node in 1:nnodes + agg = node_to_aggregate[node] + if agg == pending || agg == isolated + continue + end + p = aggregate_to_nodes_ptrs[agg] + aggregate_to_nodes_data[p] = node + aggregate_to_nodes_ptrs[agg] += 1 + end + rewind_ptrs!(aggregate_to_nodes_ptrs) + aggregate_to_nodes = jagged_array(aggregate_to_nodes_data,aggregate_to_nodes_ptrs) + aggregate_to_nodes +end + function tentative_prolongator_for_laplace(P0,B) n_nullspace_vecs = length(B) if n_nullspace_vecs != 1 @@ -207,6 +254,175 @@ function tentative_prolongator_for_laplace(P0,B) P0,Bc end +function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, block_size) + own_values_B = map(own_values, B) + n_B = length(B) + global_aggregate_to_owner = global_to_owner(aggregate_to_nodes.index_partition) + P0_partition, Bc_partition, coarse_dof_to_Bc... = map(aggregate_to_nodes.index_partition, B[1].index_partition, local_values(aggregate_to_nodes), own_values_B...) do local_aggregate_local_indices, local_dof_local_indices, local_aggregate_to_local_nodes, own_dof_to_b... + P0_own_own, local_coarse_dof_to_Bc = tentative_prolongator_with_block_size(local_aggregate_to_local_nodes, own_dof_to_b, block_size) + # Create P0 partition + n_global_aggregates = global_length(local_aggregate_local_indices) + n_own_aggregates = own_length(local_aggregate_local_indices) + n_local_aggregates = local_length(local_aggregate_local_indices) + n_ghost_aggregates = ghost_length(local_aggregate_local_indices) + n_own_dofs = length(own_dof_to_b[1]) + n_own_coarse_dofs = n_own_aggregates * n_B + n_global_coarse_dofs = n_global_aggregates * n_B + @assert n_own_aggregates == n_local_aggregates + @assert n_ghost_aggregates == 0 + @assert size(P0_own_own, 1) == n_own_dofs + @assert size(P0_own_own, 2) == n_own_coarse_dofs + @assert length(local_coarse_dof_to_Bc) == n_B + @assert length(local_coarse_dof_to_Bc[1]) == n_own_coarse_dofs + P0_own_ghost = sparse(Int[], Int[], eltype(P0_own_own)[], n_own_aggregates, n_ghost_aggregates) + P0_ghost_own = sparse(Int[], Int[], eltype(P0_own_own)[], n_ghost_aggregates, n_own_aggregates) + P0_ghost_ghost = sparse(Int[], Int[], eltype(P0_own_own)[], n_ghost_aggregates, n_ghost_aggregates) + blocks = PartitionedArrays.split_matrix_blocks(P0_own_own, P0_own_ghost, P0_ghost_own, P0_ghost_ghost) + perm_aggs = PartitionedArrays.local_permutation(local_aggregate_local_indices) + # Turn permutation of aggregates to permutation of null vectors + perm_cols = zeros(Int, size(P0_own_own,2)) + for (i, agg) in enumerate(perm_aggs) + cols = (agg - 1) * n_B + 1 : agg * n_B + inds = (i - 1) * n_B + 1 : i * n_B + perm_cols[inds] = cols + end + perm_rows_with_ghost = PartitionedArrays.local_permutation(local_dof_local_indices) + # Filter only own rows + perm_rows = zeros(Int, own_length(local_dof_local_indices)) + ptr = 1 + for ind in perm_rows_with_ghost + if ind in own_to_local(local_dof_local_indices) + perm_rows[ptr] = ind + ptr += 1 + end + end + P0_partition = PartitionedArrays.split_matrix(blocks, perm_rows, perm_cols) + # Partition of Bc + own_to_global_coarse_dofs = zeros(Int, n_own_coarse_dofs) + for (i, agg) in enumerate(local_to_global(local_aggregate_local_indices)) + inds = (i-1) * n_B + 1 : i * n_B + global_coarse_dofs = (agg-1) * n_B + 1 : agg * n_B + own_to_global_coarse_dofs[inds] = global_coarse_dofs + end + global_coarse_dofs_to_owner = zeros(Int, n_global_coarse_dofs) + for global_agg in 1:n_global_aggregates + inds = (global_agg-1) * n_B + 1 : global_agg * n_B + global_coarse_dofs_to_owner[inds] .= global_aggregate_to_owner[global_agg] + end + own_coarse_dofs = OwnIndices(n_global_coarse_dofs, part_id(local_aggregate_local_indices), own_to_global_coarse_dofs) #todo + ghost_coarse_dofs = GhostIndices(n_global_coarse_dofs, Int[], Int32[]) + Bc_partition = OwnAndGhostIndices(own_coarse_dofs, ghost_coarse_dofs, global_coarse_dofs_to_owner) + P0_partition, Bc_partition, local_coarse_dof_to_Bc... + end |> tuple_of_arrays + P0 = PSparseMatrix(P0_partition, B[1].index_partition, Bc_partition, true) + Bc = [PVector(b, Bc_partition) for b in coarse_dof_to_Bc] + P0, Bc +end + + +function tentative_prolongator_with_block_size(aggregate_to_nodes::JaggedArray,B, block_size) + # Algorithm 7 in https://mediatum.ub.tum.de/download/1229321/1229321.pdf + + if length(B) < 1 + error("Null space must contain at least one null vector.") + end + + n_aggregates = length(aggregate_to_nodes.ptrs)-1 + n_B = length(B) + n_dofs = length(aggregate_to_nodes.data)*block_size + n_dofs_c = n_aggregates * n_B + Bc = [Vector{Float64}(undef,n_dofs_c) for _ in 1:n_B] + + # Build P0 colptr + P0_colptr = zeros(Int, n_dofs_c + 1) + P0_colptr[1] = 1 + for i_agg in 1:n_aggregates + pini = aggregate_to_nodes.ptrs[i_agg] + pend = aggregate_to_nodes.ptrs[i_agg+1]-1 + n_nodes = length(pini:pend) + for b in 1:n_B + col = (i_agg - 1) * n_B + b + P0_colptr[col+1] += P0_colptr[col] + n_nodes * block_size + end + end + + # Build P0 rowvals + nnz = length(aggregate_to_nodes.data) * block_size * n_B + P0_rowval = zeros(Int, nnz) + for i_agg in 1:n_aggregates + pini = aggregate_to_nodes.ptrs[i_agg] + pend = aggregate_to_nodes.ptrs[i_agg+1]-1 + i_nodes = aggregate_to_nodes.data[pini:pend] + for b in 1:n_B + rval_ini = P0_colptr[(i_agg-1)*n_B+b] + for i_node in i_nodes + rval_end = rval_ini+block_size-1 + P0_rowval[rval_ini:rval_end] = node_to_dofs(i_node, block_size) + rval_ini = rval_end + 1 + end + end + end + + P0 = SparseMatrixCSC( + n_dofs, + n_dofs_c, + P0_colptr, + P0_rowval, + ones(nnz)) + + # Fill with nullspace vectors + for i_agg in 1:n_aggregates + for b in 1:n_B + col = (i_agg-1) * n_B + b + pini = P0.colptr[col] + pend = P0.colptr[col+1]-1 + rowids = P0.rowval[pini:pend] + P0.nzval[pini:pend] = B[b][rowids] # fails + end + end + + # Compute QR decomposition for nullspace in each aggregate + for i_agg in 1:n_aggregates + # Copy values to Bi + pini = aggregate_to_nodes.ptrs[i_agg] + pend = aggregate_to_nodes.ptrs[i_agg+1]-1 + n_i = length(pini:pend) * block_size + + if n_i < n_B + error("Singleton node aggregate: Case not implemented.") + end + + Bi = zeros(n_i, n_B) + P0cols = (i_agg-1)*n_B+1 : i_agg*n_B + for (b, col) in enumerate(P0cols) + pini = P0.colptr[col] + pend = P0.colptr[col+1]-1 + Bi[:,b] = P0.nzval[pini:pend] + end + + # Compute thin QR decomposition + F= qr(Bi) + Qi = F.Q * Matrix(I,(n_i, n_B)) + Qi = Qi[:,1:n_B] + Ri = F.R + + # Build global tentative prolongator + for (b, col) in enumerate(P0cols) + pini = P0.colptr[col] + pend = P0.colptr[col+1]-1 + P0.nzval[pini:pend] = Qi[:,b] + end + + # Build coarse null space + Bcrows = P0cols + for b in 1:n_B + Bc[b][Bcrows] = Ri[:, b] + end + end + + P0, Bc +end + function generic_tentative_prolongator(P0::SparseArrays.AbstractSparseMatrixCSC,B) error("not implemented yet") # A draft for the scalar case is commented below @@ -247,19 +463,52 @@ end function smoothed_prolongator(A,P0,diagA=dense_diag(A);approximate_omega) # TODO the performance of this one can be improved invDiagA = 1 ./ diagA - omega = approximate_omega(invDiagA,A) Dinv = PartitionedArrays.sparse_diag_matrix(invDiagA,(axes(A,1),axes(A,1))) - P = (I-omega*Dinv*A)*P0 + omega = approximate_omega(invDiagA,A,Dinv*A) + P = (I-omega*Dinv*A) * P0 P end -function omega_for_1d_laplace(invD,A) +function omega_for_1d_laplace(invD,A, DinvA) # in general this will be (4/3)/ρ(D^-1*A) # with a cheap approximation (e.g., power method) of the spectral radius ρ(D^-1*A) # ρ(D^-1*A) == 2 for 1d Laplace problem 2/3 end +function lambda_generic(invD,A,DinvA::AbstractMatrix) + ω = 4/3 + # Perform a few iterations of Power method to estimate lambda + # (Remark 3.5.2. in https://mediatum.ub.tum.de/download/1229321/1229321.pdf) + x0 = rand(size(DinvA,2)) + ρ, x = spectral_radius(DinvA, x0, 20) + ω/ρ +end + +function lambda_generic(invD,A,DinvA::PSparseMatrix) + ω = 4/3 + # Perform a few iterations of Power method to estimate lambda + # (Remark 3.5.2. in https://mediatum.ub.tum.de/download/1229321/1229321.pdf) + x0 = prand(partition(axes(DinvA,2))) + ρ, x = spectral_radius(DinvA, x0, 20) + ω/ρ +end + +function spectral_radius(A, x, num_iterations:: Int) + # Choose diagonal vector as initial guess + y = similar(x) + # Power iteration + for _ in 1:num_iterations + mul!(y, A, x) + y_norm = norm(y) + x = y/y_norm + end + # Compute spectral radius using Rayleigh coefficient + y = mul!(y, A, x) + ρ = (y' * x) / (x' * x) + abs(ρ), x +end + function enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold) Ac,Bc,R,P,cache,repartition_threshold end @@ -306,6 +555,206 @@ function smoothed_aggregation(; (coarsen, coarsen!) end +function smoothed_aggregation_with_block_size(; + epsilon = 0, + approximate_omega = lambda_generic, + tentative_prolongator = tentative_prolongator_with_block_size, + repartition_threshold = 2000, + block_size = 1, + ) + function coarsen(A,B) + # build strength graph + G = strength_graph(A, block_size; epsilon) + diagG = dense_diag(G) + node_to_aggregate, node_aggregates = aggregate(G,diagG;epsilon) + aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) + Pc,Bc = tentative_prolongator(aggregate_to_nodes,B, block_size) + diagA = dense_diag(A) + P = smoothed_prolongator(A, Pc, diagA; approximate_omega) + R = transpose(P) + Ac,cache = rap(R,A,P;reuse=true) + Ac,Bc,R,P,cache,repartition_threshold = enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold)#maybe changes here + Ac,Bc,R,P,cache + end + function coarsen!(A,Ac,R,P,cache) + rap!(Ac,R,A,P,cache) + Ac,R,P,cache + end + (coarsen, coarsen!) +end + +function getblock!(B,A,ids_i,ids_j) + for (j,J) in enumerate(ids_j) + for (i,I) in enumerate(ids_i) + B[i,j] = A[I,J] + end + end +end + +function strength_graph(A::PSparseMatrix, block_size::Integer; epsilon) + dof_partition = partition(axes(A,1)) + node_partition = map(dof_partition) do my_dofs + n_local_dofs = length(my_dofs) + @assert n_local_dofs % block_size == 0 + @assert own_length(my_dofs) == n_local_dofs + n_own_nodes = div(n_local_dofs, block_size) + own_to_global_node = zeros(Int, n_own_nodes) + for local_node in 1:n_own_nodes + local_dof = (local_node - 1) * block_size + 1 + global_dof = my_dofs[local_dof] + global_node = div(global_dof -1, block_size) +1 + own_to_global_node[local_node] = global_node + end + n_global_dofs = global_length(my_dofs) + n_global_nodes = div(n_global_dofs, block_size) + @assert n_global_dofs % block_size == 0 + own_nodes = OwnIndices(n_global_nodes, part_id(my_dofs), own_to_global_node) + ghost_nodes = GhostIndices(n_global_nodes, Int[], Int32[]) + my_nodes = OwnAndGhostIndices(own_nodes, ghost_nodes) + my_nodes + end + + G_partition = map(own_own_values(A), node_partition) do A_own_own, my_nodes + G_own_own = strength_graph(A_own_own, block_size, epsilon=epsilon) + n_own_nodes = own_length(my_nodes) + n_ghost_nodes = ghost_length(my_nodes) + @assert n_ghost_nodes == 0 + @assert size(G_own_own, 1) == n_own_nodes + @assert size(G_own_own, 2) == size(G_own_own, 1) + G_own_ghost = sparse(Int[], Int[], eltype(G_own_own)[], n_own_nodes, n_ghost_nodes) + G_ghost_own = sparse(Int[], Int[], eltype(G_own_own)[], n_ghost_nodes, n_own_nodes) + G_ghost_ghost = sparse(Int[], Int[], eltype(G_own_own)[], n_ghost_nodes, n_ghost_nodes) + blocks = PartitionedArrays.split_matrix_blocks(G_own_own, G_own_ghost, G_ghost_own, G_ghost_ghost) + perm = PartitionedArrays.local_permutation(my_nodes) + PartitionedArrays.split_matrix(blocks, perm, perm) + end + + G = PSparseMatrix(G_partition, node_partition, node_partition, true) + G +end + +function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon) + + if block_size < 1 + error("Block size must be equal to or larger than 1.") + end + + if A.n != A.m + error("Matrix must be square.") + end + + if A.n % block_size != 0 + error("Matrix size must be multiple of block size.") + end + + if block_size == 1 + return A + end + + if epsilon < 0 + error("Expected epsilon >= 0.") + end + + n_dofs = A.m + nnodes = div(n_dofs, block_size) + B = zeros(block_size,block_size) + diag_norms = zeros(nnodes) + + # Count nonzero values + nnz = 0 + + # If epsilon <= 1, all diagonal entries are in the graph + if epsilon <= 1 + nnz += nnodes + end + + # Calculate norms of diagonal values first + for i_node in 1:nnodes + i_dofs = node_to_dofs(i_node, block_size) + getblock!(B,A,i_dofs,i_dofs) + diag_norms[i_node] = norm(B) + end + + for j_node in 1:nnodes + for i_node in 1:nnodes + if j_node != i_node + i_dofs = node_to_dofs(i_node, block_size) + j_dofs = node_to_dofs(j_node, block_size) + getblock!(B,A,i_dofs,j_dofs) + norm_B = norm(B) + if norm_B == 0 + continue + end + # Calculate strength according to https://github.com/pyamg/pyamg/blob/e1fe54c93be1029c02ddcf84c2338a607b088703/pyamg/strength.py#L275 + if norm_B >= epsilon * sqrt(diag_norms[i_node] * diag_norms[j_node]) + nnz += 1 + end + end + end + end + + # Allocate data structures for sparsematrix + I = zeros(Int, nnz) + J = zeros(Int, nnz) + V = zeros(nnz) + + i_nz = 1 + + for j_node in 1:nnodes + for i_node in 1:nnodes + if j_node == i_node + # Diagonal entries are always in graph if epsilon <= 1 + if epsilon <= 1 + I[i_nz] = i_node + J[i_nz] = j_node + V[i_nz] = 1.0 + i_nz += 1 + end + else + i_dofs = node_to_dofs(i_node, block_size) + j_dofs = node_to_dofs(j_node, block_size) + getblock!(B,A,i_dofs,j_dofs) + norm_B = norm(B) + if norm_B == 0 + continue + end + # Calculate strength according to https://github.com/pyamg/pyamg/blob/e1fe54c93be1029c02ddcf84c2338a607b088703/pyamg/strength.py#L275 + if norm_B >= epsilon * sqrt(diag_norms[i_node] * diag_norms[j_node]) + I[i_nz] = i_node + J[i_nz] = j_node + V[i_nz] = 1.0 + i_nz += 1 + end + end + end + end + + G = sparse(I, J, V, nnodes, nnodes) + G +end + +function node_to_dofs(i_node, block_size) + # Convert single node in graph to range of dofs + ((i_node-1)*block_size+1) : i_node*block_size +end + +function dofs_to_node(dofs, block_size) + # Convert range of dofs to single node in graph + div(dofs[end], block_size) +end + +function amg_level_params_linear_elasticity(block_size; + pre_smoother = additive_schwarz(gauss_seidel(;iters=1);iters=1), + coarsening = smoothed_aggregation_with_block_size(;approximate_omega = lambda_generic, + tentative_prolongator = tentative_prolongator_with_block_size, block_size = block_size), + cycle = v_cycle, + pos_smoother = pre_smoother, + ) + + level_params = (;pre_smoother,pos_smoother,coarsening,cycle) + level_params +end + function amg_level_params(; pre_smoother = additive_schwarz(gauss_seidel(;iters=1);iters=1), coarsening = smoothed_aggregation(;), @@ -490,6 +939,4 @@ function amg_finalize!(setup) end coarse_solver_setup = setup.coarse_level.coarse_solver_setup finalize!(coarse_solver_setup) - nothing -end - +end \ No newline at end of file diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 659ca455..a7328c8c 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -1,16 +1,220 @@ module AMGTests using PartitionedArrays -using PartitionedArrays: laplace_matrix using PartitionedSolvers using LinearAlgebra -using Test +using IterativeSolvers using IterativeSolvers: cg! -import IterativeSolvers +using Test +using SparseArrays + + +# Test strength graph computation +# First with Psparse matrix and known solution +ndofs = 18 +nnodes = 9 +nrows = 3 +p = 4 +ranks = DebugArray(LinearIndices((p,))) +row_partition = uniform_partition(ranks, ndofs) + +IJV = map(row_partition) do row_indices + I, J, V = Int[], Int[], Float64[] + for i in local_to_global(row_indices) + push!(V, 9) + push!(I, i) + push!(J, i) + + node = 0 + if (i % 2) == 1 + # 1st dimension + push!(V, -1) + push!(I, i) + push!(J, i+1) + node = div((i+1),2) + else + # 2nd dimension + push!(V, -1) + push!(I, i) + push!(J, i-1) + node = div(i,2) + end + north = node - nrows + south = node + nrows + east = node - 1 + west = node + 1 + if north >= 1 + push!(V, -1) + push!(I, i) + push!(J, 2*north - 1) + push!(V, -1) + push!(I, i) + push!(J, 2*north) + end + if south <= nnodes + push!(V, -1) + push!(I, i) + push!(J, 2*south -1) + push!(V, -1) + push!(I, i) + push!(J, 2*south) + end + if node % nrows != 1 + push!(V, -1) + push!(I, i) + push!(J, 2*east-1) + push!(V, -1) + push!(I, i) + push!(J, 2*east) + end + if node % nrows != 0 + push!(V, -1) + push!(I, i) + push!(J, 2*west-1) + push!(V, -1) + push!(I, i) + push!(J, 2*west) + end + end + I,J,V +end + +I,J,V = tuple_of_arrays(IJV) +A = psparse(I,J,V,row_partition, row_partition) |> fetch + +# build solution +I = [1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9] +J = [1, 2, 4, + 1, 2, 3, 5, + 2, 3, 6, + 1, 4, 5, 7, + 2, 4, 5, 6, 8, + 3, 5, 6, 9, + 4, 7, 8, + 5, 7, 8, 9, + 6, 8, 9] +V = ones(length(I)) +solution = sparse(I, J, V, nnodes, nnodes) + +epsilon = 0.02 + +# Test with CSC sparse matrix +# Test sample matrix with known solution +A_seq = centralize(A) +G_seq = PartitionedSolvers.strength_graph(A_seq, 2, epsilon=epsilon) +@test solution ≈ G_seq + +# Another test with blocksize 3 +M = rand([-2.0, -1, 1, 2], (3, 3)) +M = sparse(M) +A = blockdiag(M, M, M) +solution = sparse([1, 2, 3], [1, 2, 3], fill(1.0, 3), 3, 3) +G = PartitionedSolvers.strength_graph(A, 3, epsilon=epsilon) +@test solution ≈ G + +# Test with minimal matrix size +G = PartitionedSolvers.strength_graph(M, 3, epsilon=epsilon) +solution = sparse([1], [1], 1.0, 1, 1) +@test solution ≈ G + +# Create Test matrix for Linear Elasticity +parts_per_dir = (2,2,2) +block_size = length(parts_per_dir) +p = prod(parts_per_dir) +ranks = DebugArray(LinearIndices((p,))) +nodes_per_dir = map(i->block_size*i,parts_per_dir) +args = PartitionedArrays.linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) +A_dist = psparse(args...) |> fetch +A_seq = centralize(A_dist) + +# Test strength graph with sequential and parallel linear elasticity matrix +G_seq = PartitionedSolvers.strength_graph(A_seq, block_size, epsilon=epsilon) +G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) +G_dist_centralized = centralize(G_dist) +diff = G_seq - G_dist_centralized +diff_nnz = nnz(diff) +println("Test strength graph: \n$(round(diff_nnz/(G_seq.m^2); digits=3))% of total entries are different between sequential and parallel G.") +println("G_dist has $(round(1-nnz(G_dist_centralized)/nnz(G_seq);digits=2))% fewer nnz entries than G_seq.") +println("(G_dist nnz entries: $(nnz(G_dist_centralized)), G_seq nnz entries: $(nnz(G_seq)))") +@test isa(G_dist, PSparseMatrix) + +# Test sequential collect nodes in aggregate +diagG = dense_diag(centralize(G_dist)) +node_to_aggregate_seq, node_aggregates_seq = PartitionedSolvers.aggregate(centralize(G_dist),diagG;epsilon) +aggregate_to_nodes_seq = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate_seq, node_aggregates_seq) +@test length(aggregate_to_nodes_seq.data) == length(node_to_aggregate_seq) +for i_agg in node_aggregates_seq + pini = aggregate_to_nodes_seq.ptrs[i_agg] + pend = aggregate_to_nodes_seq.ptrs[i_agg+1]-1 + nodes = aggregate_to_nodes_seq.data[pini:pend] + @test all(node_to_aggregate_seq[nodes] .== i_agg) +end + +# Test parallel collect_nodes_in_aggregate +diagG = dense_diag(G_dist) +node_to_aggregate_dist, node_aggregates_dist = PartitionedSolvers.aggregate(G_dist,diagG;epsilon) +aggregate_to_nodes_dist = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate_dist, node_aggregates_dist) +map(partition(aggregate_to_nodes_dist), partition(node_to_aggregate_dist), partition(node_aggregates_dist)) do my_aggregate_to_nodes, my_node_to_aggregate, my_aggregates + @test length(my_aggregate_to_nodes.data) == length(my_node_to_aggregate) + global_to_local_aggregate = global_to_local(my_aggregates) + local_aggregates = global_to_local_aggregate[my_aggregates] + own_node_to_local_aggregate = map(my_node_to_aggregate) do global_aggregate + global_to_local_aggregate[global_aggregate] + end + for i_agg in local_aggregates + pini = my_aggregate_to_nodes.ptrs[i_agg] + pend = my_aggregate_to_nodes.ptrs[i_agg+1]-1 + nodes = my_aggregate_to_nodes.data[pini:pend] + @test all(own_node_to_local_aggregate[nodes] .== i_agg) + end +end + +# Create nullspace vectors for tentative prolongator +x_dist = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir,ranks) +B_dist = nullspace_linear_elasticity(x_dist, partition(axes(A_dist,2))) +B_seq = [collect(b) for b in B_dist] + +# Test tentative prolongator with sequential linear elasticity matrix +Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes_seq,B_seq, block_size) +@test Pc * stack(Bc) ≈ stack(B_seq) + +# Test tentative prolongator with parallel matrix +Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes_dist,B_dist, block_size) +n_B = length(B_dist) +for i in 1:n_B + @test isa(Bc[i], PVector) +end +Bc_matrix = zeros(size(Pc,2), length(Bc)) +for (i,b) in enumerate(Bc) + Bc_matrix[:,i] = collect(b) +end +B_matrix = zeros(size(Pc,1), length(Bc)) +for (i,b) in enumerate(B_dist) + B_matrix[:,i] = collect(b) +end +@test centralize(Pc) * Bc_matrix ≈ B_matrix + + +# Test spectral radius sequential & parallel +diagA = dense_diag(A_dist) +invD = 1 ./ diagA +Dinv = PartitionedArrays.sparse_diag_matrix(invD,(axes(A_dist,1),axes(A_dist,1))) +M = Dinv * A_dist +exp = eigmax(Array(centralize(M))) +cols = axes(M, 2) +x0 = prand(partition(cols)) +x0_seq = collect(x0) +l_dist, x = PartitionedSolvers.spectral_radius(M, x0, 10) +l_seq, x = PartitionedSolvers.spectral_radius(centralize(M), x0_seq, 10) +@test l_dist ≈ l_seq +@test abs((l_dist-exp)/exp) < 2*10^-1 -# First with a sequential matrix + +# Test amg +# first with a sequential matrix nodes_per_dir = (100,100) -A = laplace_matrix(nodes_per_dir) +A = PartitionedArrays.laplace_matrix(nodes_per_dir) using Random Random.seed!(1) x = rand(size(A,2)) @@ -30,7 +234,7 @@ amg_statistics(S) |> display # Non-default options level_params = amg_level_params(; - pre_smoother = jacobi(;iters=10,omega=2/3), + pre_smoother = PartitionedSolvers.jacobi(;iters=10,omega=2/3), cycle = v_cycle, ) @@ -57,7 +261,7 @@ finalize!(S) # Now as a preconditioner level_params = amg_level_params(; - pre_smoother = gauss_seidel(;iters=1), + pre_smoother = PartitionedSolvers.gauss_seidel(;iters=1), ) fine_params = amg_fine_params(;level_params) @@ -72,6 +276,38 @@ solve!(y,S,b) update!(S,2*A) solve!(y,S,b) + + +# Now for linear elasticity (sequential) + +parts_per_dir = (2,2,2) +block_size = length(parts_per_dir) +p = prod(parts_per_dir) +ranks = DebugArray(LinearIndices((p,))) +nodes_per_dir = map(i->block_size*i,parts_per_dir) +args = PartitionedArrays.linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) +A_dist = psparse(args...) |> fetch +A = centralize(A_dist) + +x = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir, ranks) +B_dist = nullspace_linear_elasticity(x, partition(axes(A_dist,2))) +B = [collect(b) for b in B_dist] +x_exact = rand(size(A,2)) +b = A*x_exact +y = similar(x_exact) +y .= 0 + +level_params = amg_level_params_linear_elasticity(block_size) +fine_params = amg_fine_params(;level_params) +solver = amg(;fine_params) + +Pl = setup(solver,y,A,b;nullspace=B) +cg!(y,A,b;Pl,verbose=true) +println("Linear Elasticity norm of error: $(norm(y-x_exact))") +@test y ≈ x_exact + + + # Now in parallel parts_per_dir = (2,2) @@ -79,7 +315,7 @@ np = prod(parts_per_dir) parts = DebugArray(LinearIndices((np,))) nodes_per_dir = (100,100) -A = laplace_matrix(nodes_per_dir,parts_per_dir,parts) +A = PartitionedArrays.laplace_matrix(nodes_per_dir,parts_per_dir,parts) x = pones(partition(axes(A,2))) b = A*x @@ -105,7 +341,7 @@ solve!(y,S,b) finalize!(S) level_params = amg_level_params(; - pre_smoother = jacobi(;iters=1,omega=2/3), + pre_smoother = PartitionedSolvers.jacobi(;iters=1,omega=2/3), coarsening = smoothed_aggregation(;repartition_threshold=10000000) ) @@ -121,9 +357,9 @@ cg!(y,A,b;Pl,verbose=true) nodes_per_dir = (40,40,40) parts_per_dir = (2,2,1) -nparts = prod(parts_per_dir) -parts = LinearIndices((nparts,)) -A = laplace_matrix(nodes_per_dir,parts_per_dir,parts) +np = prod(parts_per_dir) +parts = LinearIndices((np,)) +A = PartitionedArrays.laplace_matrix(nodes_per_dir,parts_per_dir,parts) x_exact = pones(partition(axes(A,2))) b = A*x_exact x = similar(b,axes(A,2)) @@ -132,4 +368,29 @@ Pl = setup(amg(),x,A,b) _, history = cg!(x,A,b;Pl,log=true) display(history) -end + +# Now for linear elasticity + +parts_per_dir = (2,2,2) +block_size = length(parts_per_dir) +p = prod(parts_per_dir) +ranks = DebugArray(LinearIndices((p,))) +nodes_per_dir = map(i->block_size*i,parts_per_dir) +args = linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) +A = psparse(args...) |> fetch +x_exact = pones(partition(axes(A,2))) +x = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir, ranks) +B = nullspace_linear_elasticity(x, partition(axes(A,2))) +b = A*x_exact +y = similar(b,axes(A,2)) +y .= 0 + +level_params = amg_level_params_linear_elasticity(block_size) +fine_params = amg_fine_params(;level_params) +solver = amg(;fine_params) + +Pl = setup(solver,y,A,b;nullspace=B) +cg!(y,A,b;Pl,verbose=true) +@test y ≈ x_exact + +end \ No newline at end of file