From fe2cf93a43eb32a91be20324809d0e66db4ecba2 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Mon, 1 Jul 2024 09:40:41 +0200 Subject: [PATCH 01/18] Add sequential strength graph computation --- PartitionedSolvers/src/amg.jl | 99 ++++++++++++++++++++++ PartitionedSolvers/test/amg_tests.jl | 118 ++++++++++++++++++++++++++- 2 files changed, 215 insertions(+), 2 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 499a5230..981db5bf 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -306,6 +306,105 @@ function smoothed_aggregation(; (coarsen, coarsen!) end +function smoothed_aggregation_with_block_size(; + epsilon = 0, + approximate_omega = omega_for_1d_laplace, + tentative_prolongator = tentative_prolongator_for_laplace, + repartition_threshold = 2000, + block_size = 1, + ) + function coarsen(A,B) + # build strength graph + G = strength_graph(A, block_size=block_size, theta=epsilon) + diagG = dense_diag(G) + node_to_aggregate, node_aggregates = aggregate(G,diagG;epsilon) + # compute dof_to_aggregate, dof_aggregates + n_nullspace_vecs = length(B) + P0 = constant_prolongator(dof_to_aggregate, dof_aggregates,n_nullspace_vecs) + P0,Bc = tentative_prolongator(P0,B) # changes needed here + P = smoothed_prolongator(A,P0,diagG;approximate_omega) #changes needed here + R = transpose(P) + Ac,cache = rap(R,A,P;reuse=true) + Ac,Bc,R,P,cache,repartition_threshold = enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold)#maybe changes here + Ac,Bc,R,P,cache + end + function coarsen!(A,Ac,R,P,cache) + rap!(Ac,R,A,P,cache) + Ac,R,P,cache + end + (coarsen, coarsen!) +end + +function getblock!(B,A,ids_i,ids_j) + for (j,J) in enumerate(ids_j) + for (i,I) in enumerate(ids_i) + B[i,j] = A[I,J] + end + end +end + +function strength_graph(A::AbstractSparseMatrix, block_size::Integer; theta = 0) + + if block_size < 1 + error("Block size must be equal to or larger than 1.") + end + + if A.n != A.m + error("Matrix must be square.") + end + + if A.n % block_size != 0 + error("Matrix size must be multiple of block size.") + end + + if block_size == 1 + return A + end + + if theta < 0 + error("Expected a positive theta.") + end + + n_dofs = A.m + nnodes = Integer(n_dofs/block_size) + B = zeros(block_size,block_size) + diag_norms = zeros(nnodes) + + I = Int[] + J = Int[] + V = Float64[] + + for i_node in 1:nnodes + i_dofs = ((i_node-1)*block_size+1) : i_node*block_size + getblock!(B,A,i_dofs,i_dofs) + diag_norms[i_node] = norm(B) + if theta <= 1 + push!(I, i_node) + push!(J, i_node) + push!(V, 1.0) + end + end + + for j_node in 1:nnodes + for i_node in 1:nnodes + if j_node != i_node + i_dofs = ((i_node-1)*block_size+1) : i_node*block_size + j_dofs = ((j_node-1)*block_size+1) : j_node*block_size + getblock!(B,A,i_dofs,j_dofs) + # Calculate strength according to https://github.com/pyamg/pyamg/blob/e1fe54c93be1029c02ddcf84c2338a607b088703/pyamg/strength.py#L275 + if norm(B) >= theta * sqrt(diag_norms[i_node] * diag_norms[j_node]) + push!(I, i_node) + push!(J, j_node) + push!(V, 1.0) + end + end + end + end + + G = sparse(I, J, V, nnodes, nnodes) + G +end + function amg_level_params(; pre_smoother = additive_schwarz(gauss_seidel(;iters=1);iters=1), coarsening = smoothed_aggregation(;), diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 125a584b..fbd0c626 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -6,6 +6,121 @@ using PartitionedSolvers using LinearAlgebra using Test using IterativeSolvers: cg! +using SparseArrays + +# Test strength graph computation +# First with Psparse matrix +nrows = 18 +ngrid = 9 +nrowgrid = 3 +ncols = nrows +p = 4 +ranks = DebugArray(LinearIndices((p,))) +row_partition = uniform_partition(ranks, nrows) + +IJV = map(row_partition) do row_indices + I, J, V = Int[], Int[], Float64[] + for i in local_to_global(row_indices) + push!(V, 9) + push!(I, i) + push!(J, i) + + grid_point = 0 + if (i % 2) == 1 + # 1st dimension + push!(V, -1) + push!(I, i) + push!(J, i+1) + grid_point = div((i+1),2) + else + # 2nd dimension + push!(V, -1) + push!(I, i) + push!(J, i-1) + grid_point = div(i,2) + end + north = grid_point - nrowgrid + south = grid_point + nrowgrid + east = grid_point - 1 + west = grid_point + 1 + if north >= 1 + push!(V, -1) + push!(I, i) + push!(J, 2*north - 1) + push!(V, -1) + push!(I, i) + push!(J, 2*north) + end + if south <= ngrid + push!(V, -1) + push!(I, i) + push!(J, 2*south -1) + push!(V, -1) + push!(I, i) + push!(J, 2*south) + end + if grid_point % nrowgrid != 1 + push!(V, -1) + push!(I, i) + push!(J, 2*east-1) + push!(V, -1) + push!(I, i) + push!(J, 2*east) + end + if grid_point % nrowgrid != 0 + push!(V, -1) + push!(I, i) + push!(J, 2*west-1) + push!(V, -1) + push!(I, i) + push!(J, 2*west) + end + end + I,J,V +end + +I,J,V = tuple_of_arrays(IJV) +col_partition = row_partition +A = psparse(I,J,V,row_partition, col_partition) |> fetch +# TODO: implement and test psparse matrix +#R = PartitionedSolvers.strength_graph(A, 2) + +# Now with CSC sparse matrix +# Test sample matrix with block size 2 +theta = 0.02 +A = centralize(A) + +# build solution +I = [1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9] +J = [1, 2, 4, + 1, 2, 3, 5, + 2, 3, 6, + 1, 4, 5, 7, + 2, 4, 5, 6, 8, + 3, 5, 6, 9, + 4, 7, 8, + 5, 7, 8, 9, + 6, 8, 9] +V = ones(length(I)) + +solution = sparse(I, J, V, ngrid, ngrid) +R = PartitionedSolvers.strength_graph(A, 2, theta=theta) +@test solution ≈ R + +# Another test with 3 dims +M = rand([-2.0, -1, 1, 2], (3, 3)) +M = sparse(M) +A = blockdiag(M, M, M) +solution = sparse([1, 2, 3], [1, 2, 3], fill(1.0, 3), 3, 3) +R = PartitionedSolvers.strength_graph(A, 3, theta=theta) +@test solution ≈ R + +# Test with minimal matrix size +R = PartitionedSolvers.strength_graph(M, 3, theta=theta) +solution = sparse([1], [1], 1.0, 1, 1) +@test solution ≈ R + # First with a sequential matrix nodes_per_dir = (100,100) @@ -124,5 +239,4 @@ x .= 0 Pl = setup(amg(),x,A,b) _, history = cg!(x,A,b;Pl,log=true) display(history) - -end +end \ No newline at end of file From 9d93e310d7a3ad629c722528ef3cbfed532060d8 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Fri, 19 Jul 2024 08:51:10 +0200 Subject: [PATCH 02/18] Add strength graph and tentative prolongator with block size --- PartitionedSolvers/src/amg.jl | 308 ++++++++++++++++++++++++--- PartitionedSolvers/test/amg_tests.jl | 131 +++++++++--- 2 files changed, 381 insertions(+), 58 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 981db5bf..6f848413 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -145,7 +145,6 @@ function constant_prolongator(node_to_aggregate,aggregates,n_nullspace_vecs) nnodes = length(node_to_aggregate) pending = typeof_aggregate(0) isolated = typeof_aggregate(-1) - nnodes = length(node_to_aggregate) naggregates = length(aggregates) aggregate_to_nodes_ptrs = zeros(Int,naggregates+1) for node in 1:nnodes @@ -198,6 +197,79 @@ function constant_prolongator(node_to_aggregate::PVector,aggregates::PRange,n_nu P0 end +function collect_nodes_in_aggregate(node_to_aggregate,aggregates) + typeof_aggregate = eltype(node_to_aggregate) + nnodes = length(node_to_aggregate) + pending = typeof_aggregate(0) + isolated = typeof_aggregate(-1) + nnodes = length(node_to_aggregate) + naggregates = length(aggregates) + aggregate_to_nodes_ptrs = zeros(Int,naggregates+1) + for node in 1:nnodes + agg = node_to_aggregate[node] + if agg == pending || agg == isolated + continue + end + aggregate_to_nodes_ptrs[agg+1] += 1 + end + length_to_ptrs!(aggregate_to_nodes_ptrs) + ndata = aggregate_to_nodes_ptrs[end]-1 + aggregate_to_nodes_data = zeros(Int,ndata) + for node in 1:nnodes + agg = node_to_aggregate[node] + if agg == pending || agg == isolated + continue + end + p = aggregate_to_nodes_ptrs[agg] + aggregate_to_nodes_data[p] = node + aggregate_to_nodes_ptrs[agg] += 1 + end + rewind_ptrs!(aggregate_to_nodes_ptrs) + aggregate_to_nodes = jagged_array(aggregate_to_nodes_data,aggregate_to_nodes_ptrs) + aggregate_to_nodes +end + +function remove_singleton_aggregates(aggregate_to_nodes_old) + typeof_nodes = eltype(aggregate_to_nodes_old[1]) + n_aggregates_old = length(aggregate_to_nodes_old) + n_aggregates = 0 + n_data = 0 + for i in 1:n_aggregates_old + nnodes = aggregate_to_nodes_old.ptrs[i+1] - aggregate_to_nodes_old.ptrs[i] + if nnodes < 2 + continue + end + n_aggregates += 1 + n_data += nnodes + end + + # If there are no singleton aggregates, stop and return old array + if n_aggregates == n_aggregates_old + return aggregate_to_nodes_old + end + + # Else copy non-singleton aggregates to new array + aggregate_to_nodes_ptrs = zeros(Int, n_aggregates+1) + aggregate_to_nodes_data = zeros(typeof_nodes, n_data) + aggregate_to_nodes_ptrs[1] = 1 + agg = 1 + for i in 1:n_aggregates_old + nnodes = aggregate_to_nodes_old.ptrs[i+1] - aggregate_to_nodes_old.ptrs[i] + if nnodes < 2 + continue + end + aggregate_to_nodes_ptrs[agg+1] = aggregate_to_nodes_ptrs[agg] + nnodes + pini = aggregate_to_nodes_ptrs[agg] + pend = aggregate_to_nodes_ptrs[agg+1]-1 + pini_old = aggregate_to_nodes_old.ptrs[i] + pend_old = aggregate_to_nodes_old.ptrs[i+1]-1 + aggregate_to_nodes_data[pini:pend] = aggregate_to_nodes_old.data[pini_old:pend_old] + agg += 1 + end + aggregate_to_nodes = jagged_array(aggregate_to_nodes_data,aggregate_to_nodes_ptrs) + aggregate_to_nodes +end + function tentative_prolongator_for_laplace(P0,B) n_nullspace_vecs = length(B) if n_nullspace_vecs != 1 @@ -207,6 +279,107 @@ function tentative_prolongator_for_laplace(P0,B) P0,Bc end +function tentative_prolongator_with_block_size(aggregate_to_nodes::JaggedArray,B, block_size) + # A draft for the scalar case is commented below + ## TODO assumes CSC + # Algorithm 7 in https://mediatum.ub.tum.de/download/1229321/1229321.pdf + + if length(B) < 1 + error("Null space must contain at least one null vector.") + end + + n_aggregates = length(aggregate_to_nodes.ptrs)-1 + n_B = length(B) + n_dofs = length(B[1]) + n_dofs_c = n_aggregates * n_B + Bc = [Vector{Float64}(undef,n_dofs_c) for _ in 1:n_B] + + # Build P0 colptr + P0_colptr = zeros(Int, n_dofs_c + 1) + P0_colptr[1] = 1 + for i_agg in 1:n_aggregates + pini = aggregate_to_nodes.ptrs[i_agg] + pend = aggregate_to_nodes.ptrs[i_agg+1]-1 + n_nodes = length(pini:pend) + for b in 1:n_B + col = (i_agg - 1) * n_B + b + P0_colptr[col+1] += P0_colptr[col] + n_nodes * block_size + end + end + + # Build P0 rowvals + nnz = length(aggregate_to_nodes.data) * block_size * n_B + P0_rowval = zeros(Int, nnz) + for i_agg in 1:n_aggregates + pini = aggregate_to_nodes.ptrs[i_agg] + pend = aggregate_to_nodes.ptrs[i_agg+1]-1 + i_nodes = aggregate_to_nodes.data[pini:pend] + for b in 1:n_B + rval_ini = P0_colptr[(i_agg-1)*n_B+b] + for i_node in i_nodes + rval_end = rval_ini+block_size-1 + P0_rowval[rval_ini:rval_end] = node_to_dofs(i_node, block_size) + rval_ini = rval_end + 1 + end + end + end + + P0 = SparseMatrixCSC( + n_dofs, + n_dofs_c, + P0_colptr, + P0_rowval, + ones(nnz)) + + # Fill with nullspace vectors + for i_agg in 1:n_aggregates + for b in 1:n_B + col = (i_agg-1) * n_B + b + pini = P0.colptr[col] + pend = P0.colptr[col+1]-1 + rowids = P0.rowval[pini:pend] + P0.nzval[pini:pend] = B[b][rowids] + end + end + + # Compute QR decomposition for nullspace in each aggregate + for i_agg in 1:n_aggregates + # Copy values to Bi + pini = aggregate_to_nodes.ptrs[i_agg] + pend = aggregate_to_nodes.ptrs[i_agg+1]-1 + n_i = length(pini:pend) * block_size + Bi = zeros(n_i, n_B) + P0cols = (i_agg-1)*n_B+1 : i_agg*n_B + for (b, col) in enumerate(P0cols) + pini = P0.colptr[col] + pend = P0.colptr[col+1]-1 + Bi[:,b] = P0.nzval[pini:pend] + end + + # Compute thin QR decomposition + # TODO: check if QR decomposition is possible if n_i >= n_B, raise an error + F= qr(Bi) + Qi = F.Q * Matrix(I,(n_i, n_B)) + Qi = Qi[:,1:n_B] + Ri = F.R + + # Build global tentative prolongator + for (b, col) in enumerate(P0cols) + pini = P0.colptr[col] + pend = P0.colptr[col+1]-1 + P0.nzval[pini:pend] = Qi[:,b] + end + + # Build coarse null space + Bcrows = P0cols + for b in 1:n_B + Bc[b][Bcrows] = Ri[:, b] + end + end + + P0, Bc +end + function generic_tentative_prolongator(P0::SparseArrays.AbstractSparseMatrixCSC,B) error("not implemented yet") # A draft for the scalar case is commented below @@ -247,19 +420,42 @@ end function smoothed_prolongator(A,P0,diagA=dense_diag(A);approximate_omega) # TODO the performance of this one can be improved invDiagA = 1 ./ diagA - omega = approximate_omega(invDiagA,A) Dinv = PartitionedArrays.sparse_diag_matrix(invDiagA,(axes(A,1),axes(A,1))) + omega = approximate_omega(invDiagA,A,Dinv*A) P = (I-omega*Dinv*A)*P0 P end -function omega_for_1d_laplace(invD,A) +function omega_for_1d_laplace(invD,A, DinvA) # in general this will be (4/3)/ρ(D^-1*A) # with a cheap approximation (e.g., power method) of the spectral radius ρ(D^-1*A) # ρ(D^-1*A) == 2 for 1d Laplace problem 2/3 end +function lambda_generic(invD,A,DinvA) + ω = 4/3 + # Perform a few iterations of Power method to estimate lambda + # (Remark 3.5.2. in https://mediatum.ub.tum.de/download/1229321/1229321.pdf) + ρ = spectral_radius(DinvA, 20) + ω/ρ +end + +function spectral_radius(A, x, num_iterations:: Int) + # Choose diagonal vector as initial guess + y = zeros(size(A,1)) + # Power iteration + for i in 1:num_iterations + mul!(y, A, x) + y_norm = norm(y) + x = y/y_norm + end + # Compute spectral radius using Rayleigh coefficient + y = mul!(y, A, x) + ρ = (y' * x) / (x' * x) + abs(ρ) +end + function enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold) Ac,Bc,R,P,cache,repartition_threshold end @@ -308,21 +504,19 @@ end function smoothed_aggregation_with_block_size(; epsilon = 0, - approximate_omega = omega_for_1d_laplace, - tentative_prolongator = tentative_prolongator_for_laplace, + approximate_omega = lambda_generic, + tentative_prolongator = tentative_prolongator_with_block_size, repartition_threshold = 2000, block_size = 1, ) function coarsen(A,B) # build strength graph - G = strength_graph(A, block_size=block_size, theta=epsilon) + G = strength_graph(A, block_size=block_size, epsilon=epsilon) diagG = dense_diag(G) node_to_aggregate, node_aggregates = aggregate(G,diagG;epsilon) - # compute dof_to_aggregate, dof_aggregates - n_nullspace_vecs = length(B) - P0 = constant_prolongator(dof_to_aggregate, dof_aggregates,n_nullspace_vecs) - P0,Bc = tentative_prolongator(P0,B) # changes needed here - P = smoothed_prolongator(A,P0,diagG;approximate_omega) #changes needed here + aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) + Pc,Bc,perm = tentative_prolongator(aggregate_to_nodes,B, block_size) + P = smoothed_prolongator(A,Pc,diagG;approximate_omega) R = transpose(P) Ac,cache = rap(R,A,P;reuse=true) Ac,Bc,R,P,cache,repartition_threshold = enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold)#maybe changes here @@ -343,7 +537,7 @@ function getblock!(B,A,ids_i,ids_j) end end -function strength_graph(A::AbstractSparseMatrix, block_size::Integer; theta = 0) +function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon = 0) if block_size < 1 error("Block size must be equal to or larger than 1.") @@ -361,50 +555,102 @@ function strength_graph(A::AbstractSparseMatrix, block_size::Integer; theta = 0) return A end - if theta < 0 - error("Expected a positive theta.") + if epsilon < 0 + error("Expected a positive epsilon.") end n_dofs = A.m - nnodes = Integer(n_dofs/block_size) + nnodes = div(n_dofs, block_size) B = zeros(block_size,block_size) diag_norms = zeros(nnodes) + + # Count nonzero values + nnz = 0 - I = Int[] - J = Int[] - V = Float64[] + # If epsilon <= 1, all diagonal entries are in the graph + if epsilon <= 1 + nnz += nnodes + end + # Calculate norms of diagonal values first for i_node in 1:nnodes - i_dofs = ((i_node-1)*block_size+1) : i_node*block_size + i_dofs = node_to_dofs(i_node, block_size) getblock!(B,A,i_dofs,i_dofs) diag_norms[i_node] = norm(B) - if theta <= 1 - push!(I, i_node) - push!(J, i_node) - push!(V, 1.0) - end end for j_node in 1:nnodes for i_node in 1:nnodes if j_node != i_node - i_dofs = ((i_node-1)*block_size+1) : i_node*block_size - j_dofs = ((j_node-1)*block_size+1) : j_node*block_size + i_dofs = node_to_dofs(i_node, block_size) + j_dofs = node_to_dofs(j_node, block_size) getblock!(B,A,i_dofs,j_dofs) # Calculate strength according to https://github.com/pyamg/pyamg/blob/e1fe54c93be1029c02ddcf84c2338a607b088703/pyamg/strength.py#L275 - if norm(B) >= theta * sqrt(diag_norms[i_node] * diag_norms[j_node]) - push!(I, i_node) - push!(J, j_node) - push!(V, 1.0) + if norm(B) >= epsilon * sqrt(diag_norms[i_node] * diag_norms[j_node]) + nnz += 1 end end end end - + + # Allocate data structures for sparsematrix + I = zeros(Int, nnz) + J = zeros(Int, nnz) + V = zeros(nnz) + + i_nz = 1 + + for j_node in 1:nnodes + for i_node in 1:nnodes + if j_node == i_node + # Diagonal entries are always in graph if epsilon <= 1 + if epsilon <= 1 + I[i_nz] = i_node + J[i_nz] = j_node + V[i_nz] = 1.0 + i_nz += 1 + end + else + i_dofs = node_to_dofs(i_node, block_size) + j_dofs = node_to_dofs(j_node, block_size) + getblock!(B,A,i_dofs,j_dofs) + # Calculate strength according to https://github.com/pyamg/pyamg/blob/e1fe54c93be1029c02ddcf84c2338a607b088703/pyamg/strength.py#L275 + if norm(B) >= epsilon * sqrt(diag_norms[i_node] * diag_norms[j_node]) + I[i_nz] = i_node + J[i_nz] = j_node + V[i_nz] = 1.0 + i_nz += 1 + end + end + end + end + G = sparse(I, J, V, nnodes, nnodes) G end +function node_to_dofs(i_node, block_size) + # Convert single node in graph to range of dofs + ((i_node-1)*block_size+1) : i_node*block_size +end + +function dofs_to_node(dofs, block_size) + # Convert range of dofs to single node in graph + div(dofs[end], block_size) +end + +function amg_level_params_linear_elasticity(; + pre_smoother = additive_schwarz(gauss_seidel(;iters=1);iters=1), + coarsening = smoothed_aggregation(approximate_omega = lambda_generic, + tentative_prolongator = tentative_prolongator_with_block_size), + cycle = v_cycle, + pos_smoother = pre_smoother, + ) + + level_params = (;pre_smoother,pos_smoother,coarsening,cycle) + level_params +end + function amg_level_params(; pre_smoother = additive_schwarz(gauss_seidel(;iters=1);iters=1), coarsening = smoothed_aggregation(;), diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index fbd0c626..1ed77203 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -1,5 +1,8 @@ module AMGTests +# just for debugging +using Pkg +Pkg.activate("PartitionedSolvers") using PartitionedArrays using PartitionedArrays: laplace_matrix using PartitionedSolvers @@ -10,13 +13,12 @@ using SparseArrays # Test strength graph computation # First with Psparse matrix -nrows = 18 -ngrid = 9 -nrowgrid = 3 -ncols = nrows +ndofs = 18 +nnodes = 9 +nrows = 3 p = 4 ranks = DebugArray(LinearIndices((p,))) -row_partition = uniform_partition(ranks, nrows) +row_partition = uniform_partition(ranks, ndofs) IJV = map(row_partition) do row_indices I, J, V = Int[], Int[], Float64[] @@ -25,24 +27,24 @@ IJV = map(row_partition) do row_indices push!(I, i) push!(J, i) - grid_point = 0 + node = 0 if (i % 2) == 1 # 1st dimension push!(V, -1) push!(I, i) push!(J, i+1) - grid_point = div((i+1),2) + node = div((i+1),2) else # 2nd dimension push!(V, -1) push!(I, i) push!(J, i-1) - grid_point = div(i,2) + node = div(i,2) end - north = grid_point - nrowgrid - south = grid_point + nrowgrid - east = grid_point - 1 - west = grid_point + 1 + north = node - nrows + south = node + nrows + east = node - 1 + west = node + 1 if north >= 1 push!(V, -1) push!(I, i) @@ -51,7 +53,7 @@ IJV = map(row_partition) do row_indices push!(I, i) push!(J, 2*north) end - if south <= ngrid + if south <= nnodes push!(V, -1) push!(I, i) push!(J, 2*south -1) @@ -59,7 +61,7 @@ IJV = map(row_partition) do row_indices push!(I, i) push!(J, 2*south) end - if grid_point % nrowgrid != 1 + if node % nrows != 1 push!(V, -1) push!(I, i) push!(J, 2*east-1) @@ -67,7 +69,7 @@ IJV = map(row_partition) do row_indices push!(I, i) push!(J, 2*east) end - if grid_point % nrowgrid != 0 + if node % nrows != 0 push!(V, -1) push!(I, i) push!(J, 2*west-1) @@ -83,12 +85,12 @@ I,J,V = tuple_of_arrays(IJV) col_partition = row_partition A = psparse(I,J,V,row_partition, col_partition) |> fetch # TODO: implement and test psparse matrix -#R = PartitionedSolvers.strength_graph(A, 2) + # Now with CSC sparse matrix # Test sample matrix with block size 2 -theta = 0.02 -A = centralize(A) +epsilon = 0.02 +A_test = centralize(A) # build solution I = [1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, @@ -104,26 +106,101 @@ J = [1, 2, 4, 6, 8, 9] V = ones(length(I)) -solution = sparse(I, J, V, ngrid, ngrid) -R = PartitionedSolvers.strength_graph(A, 2, theta=theta) -@test solution ≈ R +solution = sparse(I, J, V, nnodes, nnodes) +G_test = PartitionedSolvers.strength_graph(A_test, 2, epsilon=epsilon) +@test solution ≈ G_test # Another test with 3 dims M = rand([-2.0, -1, 1, 2], (3, 3)) M = sparse(M) A = blockdiag(M, M, M) solution = sparse([1, 2, 3], [1, 2, 3], fill(1.0, 3), 3, 3) -R = PartitionedSolvers.strength_graph(A, 3, theta=theta) -@test solution ≈ R +G = PartitionedSolvers.strength_graph(A, 3, epsilon=epsilon) +@test solution ≈ G # Test with minimal matrix size -R = PartitionedSolvers.strength_graph(M, 3, theta=theta) +G = PartitionedSolvers.strength_graph(M, 3, epsilon=epsilon) solution = sparse([1], [1], 1.0, 1, 1) -@test solution ≈ R +@test solution ≈ G + +# Test tentative prolongator +function random_nullspace(ndofs, n_B) + B = Array{Array{Float64, 1}, 1}(undef, n_B) + for i = 1:n_B + B[i] = rand(ndofs) + end + B +end +# Test tentative prolongator with laplace matrix +nodes_per_dir = (90,90) +block_size = 3 +A = laplace_matrix(nodes_per_dir) +G = PartitionedSolvers.strength_graph(A, block_size, epsilon=epsilon) +diagG = dense_diag(G) +B = random_nullspace(size(A, 1), block_size) +node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G,diagG;epsilon) +aggregate_to_nodes_old = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) +aggregate_to_nodes = PartitionedSolvers.remove_singleton_aggregates(aggregate_to_nodes_old) +# Assert there are no singleton aggregates +@assert length(aggregate_to_nodes_old) == length(aggregate_to_nodes) +Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B, block_size) +@test Pc * stack(Bc) ≈ stack(B) + + +# Test spectral radius estimation +n_its = [2, 4, 6, 8, 10, 15, 20, 30, 40, 50, 60, 70, 80, 90, 100] +n_trials = 10 +errors_powm = zeros(n_trials, length(n_its)) +time_powm = zeros(n_trials,length(n_its)) +errors_spectrad = zeros(n_trials,length(n_its)) +time_spectrad = zeros(n_trials,length(n_its)) +using IterativeSolvers +using Plots +using LinearMaps +using Statistics +A = laplace_matrix((100,100)) +diagA = dense_diag(A) +invD = 1 ./ diagA +x0 = rand(size(A,2)) +M = invD .* A +Fmap = LinearMap(M) +exp = 2.0 +for (i, n_it) in enumerate(n_its) + for t in 1:n_trials + # Power method + tic = time_ns() + λ, x = powm!(Fmap, x0, maxiter = n_it) + toc = time_ns() + time_powm[t,i] = toc - tic + errors_powm[t,i] = abs((λ-exp)/exp) + + # own implementation + tic = time_ns() + ρ = PartitionedSolvers.spectral_radius(M,x0, n_it) + toc = time_ns() + time_spectrad[t,i] = toc-tic + errors_spectrad[t,i] = abs((ρ-exp)/exp) + end +end +avg_time_powm = median(time_powm, dims=1) +avg_time_spectrad = median(time_spectrad, dims=1) +avg_error_powm = median(errors_powm, dims=1) +avg_error_spectrad = median(errors_spectrad, dims=1) +avg_time_powm = avg_time_powm ./ 10^9 +avg_time_spectrad = avg_time_spectrad ./ 10^9 +p1 = plot(n_its, [avg_error_powm' avg_error_spectrad'], label = ["powm" "spectrad"], + marker=[:c :x], ms=2) +plot!(p1, xlabel="#iterations", ylabel="avg rel error", xticks = n_its) +p2 = plot(n_its, [avg_time_powm' avg_time_spectrad'], label = ["powm" "spectrad"], + marker=[:c :x], ms=2) +plot!(p2, xlabel="#iterations", ylabel="avg runtime (s)", xticks = n_its) +p = plot(p1, p2, layout=(2,1), suptitle="Convergence of powm and spectral_radius") +savefig(p, "C:/Users/gelie/Home/ComputationalScience/GSoC/powm_convergence.png") +display(p) # First with a sequential matrix -nodes_per_dir = (100,100) +#= nodes_per_dir = (100,100) A = laplace_matrix(nodes_per_dir) using Random Random.seed!(1) @@ -238,5 +315,5 @@ x = similar(b,axes(A,2)) x .= 0 Pl = setup(amg(),x,A,b) _, history = cg!(x,A,b;Pl,log=true) -display(history) +display(history) =# end \ No newline at end of file From e7ea591a15d87daeccda33e10e9f497d2a43b4b1 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Fri, 19 Jul 2024 08:52:12 +0200 Subject: [PATCH 03/18] Add IterativeSolvers dependency --- PartitionedSolvers/src/PartitionedSolvers.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/PartitionedSolvers/src/PartitionedSolvers.jl b/PartitionedSolvers/src/PartitionedSolvers.jl index 1c7899dc..bf08d25a 100644 --- a/PartitionedSolvers/src/PartitionedSolvers.jl +++ b/PartitionedSolvers/src/PartitionedSolvers.jl @@ -3,6 +3,7 @@ module PartitionedSolvers using PartitionedArrays using SparseArrays using LinearAlgebra +using IterativeSolvers export setup export solve! From ae89c8d6405cc86fba38c6c3c92ec912a550c2f5 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Wed, 24 Jul 2024 10:06:09 +0200 Subject: [PATCH 04/18] Add spectral radius estimation --- PartitionedSolvers/src/amg.jl | 18 ++-- PartitionedSolvers/test/amg_tests.jl | 137 +++++++++++++++++---------- 2 files changed, 101 insertions(+), 54 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 6f848413..a47b333a 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -348,6 +348,11 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::JaggedArray,B pini = aggregate_to_nodes.ptrs[i_agg] pend = aggregate_to_nodes.ptrs[i_agg+1]-1 n_i = length(pini:pend) * block_size + + if n_i < n_B + error("Singleton node aggregate: Case not implemented.") + end + Bi = zeros(n_i, n_B) P0cols = (i_agg-1)*n_B+1 : i_agg*n_B for (b, col) in enumerate(P0cols) @@ -357,7 +362,6 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::JaggedArray,B end # Compute thin QR decomposition - # TODO: check if QR decomposition is possible if n_i >= n_B, raise an error F= qr(Bi) Qi = F.Q * Matrix(I,(n_i, n_B)) Qi = Qi[:,1:n_B] @@ -437,15 +441,16 @@ function lambda_generic(invD,A,DinvA) ω = 4/3 # Perform a few iterations of Power method to estimate lambda # (Remark 3.5.2. in https://mediatum.ub.tum.de/download/1229321/1229321.pdf) - ρ = spectral_radius(DinvA, 20) + x0 = rand(size(DinvA,2)) + ρ = spectral_radius(DinvA, x0, maxiter=20) ω/ρ end function spectral_radius(A, x, num_iterations:: Int) # Choose diagonal vector as initial guess - y = zeros(size(A,1)) + y = similar(x) # Power iteration - for i in 1:num_iterations + for _ in 1:num_iterations mul!(y, A, x) y_norm = norm(y) x = y/y_norm @@ -453,7 +458,7 @@ function spectral_radius(A, x, num_iterations:: Int) # Compute spectral radius using Rayleigh coefficient y = mul!(y, A, x) ρ = (y' * x) / (x' * x) - abs(ρ) + abs(ρ), x end function enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold) @@ -516,7 +521,8 @@ function smoothed_aggregation_with_block_size(; node_to_aggregate, node_aggregates = aggregate(G,diagG;epsilon) aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) Pc,Bc,perm = tentative_prolongator(aggregate_to_nodes,B, block_size) - P = smoothed_prolongator(A,Pc,diagG;approximate_omega) + diagA = dense_diag(A) + P = smoothed_prolongator(A,Pc,diagA;approximate_omega) R = transpose(P) Ac,cache = rap(R,A,P;reuse=true) Ac,Bc,R,P,cache,repartition_threshold = enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold)#maybe changes here diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index c4fea87c..c4d46524 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -7,8 +7,11 @@ using PartitionedArrays using PartitionedArrays: laplace_matrix using PartitionedSolvers using LinearAlgebra -using Test +using IterativeSolvers using IterativeSolvers: cg! +using Plots +using Statistics +using Test using SparseArrays # Test strength graph computation @@ -133,71 +136,109 @@ function random_nullspace(ndofs, n_B) end # Test tentative prolongator with laplace matrix -nodes_per_dir = (90,90) block_size = 3 -A = laplace_matrix(nodes_per_dir) +parts_per_dir = (2,2,2) +p = prod(parts_per_dir) +ranks = DebugArray(LinearIndices((p,))) +nodes_per_dir = map(i->block_size*i,parts_per_dir) +args = laplacian_fdm(nodes_per_dir,parts_per_dir,ranks) +A_dist = psparse(args...) |> fetch +A = centralize(A_dist) +println("dims A: $(size(A))") G = PartitionedSolvers.strength_graph(A, block_size, epsilon=epsilon) diagG = dense_diag(G) B = random_nullspace(size(A, 1), block_size) node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G,diagG;epsilon) -aggregate_to_nodes_old = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) -aggregate_to_nodes = PartitionedSolvers.remove_singleton_aggregates(aggregate_to_nodes_old) -# Assert there are no singleton aggregates -@assert length(aggregate_to_nodes_old) == length(aggregate_to_nodes) +aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B, block_size) @test Pc * stack(Bc) ≈ stack(B) +# Test spectral radius sequential & parallel +diagA = dense_diag(A_dist) +invD = 1 ./ diagA +Dinv = PartitionedArrays.sparse_diag_matrix(invD,(axes(A_dist,1),axes(A_dist,1))) +M = Dinv * A_dist +exp = eigmax(Array(centralize(M))) +cols = axes(M, 2) +x0 = prand(partition(cols)) +x0_seq = collect(x0) +l, x = PartitionedSolvers.spectral_radius(M, x0, 10) +lseq, x = PartitionedSolvers.spectral_radius(centralize(M), x0_seq, 10) +@test l ≈ lseq +@test abs((l-exp)/exp) < 2*10^-1 # Test spectral radius estimation -n_its = [2, 4, 6, 8, 10, 15, 20, 30, 40, 50, 60, 70, 80, 90, 100] -n_trials = 10 -errors_powm = zeros(n_trials, length(n_its)) -time_powm = zeros(n_trials,length(n_its)) -errors_spectrad = zeros(n_trials,length(n_its)) -time_spectrad = zeros(n_trials,length(n_its)) -using IterativeSolvers -using Plots -using LinearMaps -using Statistics -A = laplace_matrix((100,100)) -diagA = dense_diag(A) -invD = 1 ./ diagA -x0 = rand(size(A,2)) -M = invD .* A -Fmap = LinearMap(M) -exp = 2.0 -for (i, n_it) in enumerate(n_its) +#= maxiter = 5000 +nnodes = [8, 16, 32] +exp = [1.9396926207859075, 1.9829730996838997, 1.9954719225730886] +msizes = zeros(Int, length(nnodes)) +parts_per_dir = (2,2,2) +p = prod(parts_per_dir) +ranks = DebugArray(LinearIndices((p,))) +n_trials = 3 +A_dims = zeros(length(nnodes)) +errors_powm = zeros(length(nnodes), n_trials, maxiter) +time_powm = zeros(length(nnodes), n_trials) +errors_spectrad = zeros(length(nnodes), n_trials, maxiter) +time_spectrad = zeros(length(nnodes), n_trials) + +for (n_i, n) in enumerate(nnodes) + nodes_per_dir = (n, n, n) + args = laplacian_fdm(nodes_per_dir,parts_per_dir,ranks) + A = psparse(args...) |> fetch |> centralize + println("dims A: $(size(A))") + msizes[n_i] = size(A,1) + diagA = dense_diag(A) + invD = 1 ./ diagA + M = invD .* A + #M_dense = Array(M) + #exp[n_i] = max(abs(eigmax(M_dense)), abs(eigmin(M_dense))) for t in 1:n_trials - # Power method + x0 = rand(size(A,2)) + lprev, x = powm!(M, x0, maxiter=1) tic = time_ns() - λ, x = powm!(Fmap, x0, maxiter = n_it) + for i in 1:maxiter + # Power method + l, x = powm!(M, x, maxiter = 1) + errors_powm[n_i, t, i] = abs(l-lprev) + lprev = l + end toc = time_ns() - time_powm[t,i] = toc - tic - errors_powm[t,i] = abs((λ-exp)/exp) - - # own implementation + time_powm[n_i,t] = (toc - tic)/10^9 + ρprev, x = PartitionedSolvers.spectral_radius(M,x0, 1) tic = time_ns() - ρ = PartitionedSolvers.spectral_radius(M,x0, n_it) + for i in 1:maxiter + # own implementation + ρ, x = PartitionedSolvers.spectral_radius(M,x, 1) + errors_spectrad[n_i, t, i] = abs(ρ-ρprev) + ρprev = ρ + end toc = time_ns() - time_spectrad[t,i] = toc-tic - errors_spectrad[t,i] = abs((ρ-exp)/exp) + time_spectrad[n_i, t] = (toc-tic)/10^9 end end -avg_time_powm = median(time_powm, dims=1) -avg_time_spectrad = median(time_spectrad, dims=1) -avg_error_powm = median(errors_powm, dims=1) -avg_error_spectrad = median(errors_spectrad, dims=1) -avg_time_powm = avg_time_powm ./ 10^9 -avg_time_spectrad = avg_time_spectrad ./ 10^9 -p1 = plot(n_its, [avg_error_powm' avg_error_spectrad'], label = ["powm" "spectrad"], - marker=[:c :x], ms=2) -plot!(p1, xlabel="#iterations", ylabel="avg rel error", xticks = n_its) -p2 = plot(n_its, [avg_time_powm' avg_time_spectrad'], label = ["powm" "spectrad"], - marker=[:c :x], ms=2) -plot!(p2, xlabel="#iterations", ylabel="avg runtime (s)", xticks = n_its) +avg_time_powm = median(time_powm, dims=2) +avg_time_spectrad = median(time_spectrad, dims=2) +avg_error_powm = median(errors_powm, dims=2) +avg_error_spectrad = median(errors_spectrad, dims=2) + +p1 = plot() +plot!(p1, [1], [0], color="black", label="powm") +plot!(p1, [1], [0], color="black", ls=:dash, label="spectrad") +for (n_i, n) in enumerate(msizes) + plot!(p1, 1:maxiter, avg_error_powm[n_i,:,:]', label = "($(n),$(n))", color= n_i) + plot!(p1, 1:maxiter, avg_error_spectrad[n_i,:,:]', label = "", color=n_i, ls=:dash) +end + +p2 = plot(msizes, [avg_time_powm avg_time_spectrad], label = ["powm" "spectrad"], color="black", marker=[:c :x]) +yticks=[10^-16, 10^-14, 10^-12, 10^-10, 10^-8, 10^-6, 10^-4, 10^-2, 10^0] +xticks=[10^0, 10^1, 10^2, 10^3] +plot!(p1, xlabel="#iterations (k)", ylabel="|λ(k) - exp|", legend=:outertopleft, xscale=:log, + xticks=xticks, yscale=:log, ylim=(10^-16, 1), yticks=yticks) +plot!(p2, xlabel="size of matrix", ylabel="avg runtime (s)", xscale=:log10) p = plot(p1, p2, layout=(2,1), suptitle="Convergence of powm and spectral_radius") -savefig(p, "C:/Users/gelie/Home/ComputationalScience/GSoC/powm_convergence.png") -display(p) +savefig(p, "C:/Users/gelie/Home/ComputationalScience/GSoC/powm_l-lprev_k$(maxiter)_m$(msizes[end]).png") +display(p) =# # First with a sequential matrix #= nodes_per_dir = (100,100) From b8c4470d4e503396cae398678de90e139e39540f Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Fri, 26 Jul 2024 11:36:10 +0200 Subject: [PATCH 05/18] Add parallel implementation of strength_graph and collect_nodes_in_aggregates --- PartitionedSolvers/src/amg.jl | 85 ++++++++++++++++++++++++++-- PartitionedSolvers/test/amg_tests.jl | 74 +++++++++++++++++++----- 2 files changed, 141 insertions(+), 18 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index a47b333a..69366971 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -197,6 +197,22 @@ function constant_prolongator(node_to_aggregate::PVector,aggregates::PRange,n_nu P0 end +function collect_nodes_in_aggregate(node_to_aggregate::PVector,aggregates::PRange) + aggregate_to_nodes_partition = map(own_values(node_to_aggregate), partition(aggregates)) do own_node_to_global_aggregate, my_aggregates + # convert to local ids + global_to_local_aggregate = global_to_local(my_aggregates) + local_aggregates = global_to_local_aggregate[my_aggregates] + @assert all(local_aggregates .!= 0) + n_local_aggregates = length(local_aggregates) + own_node_to_local_aggregate = map(own_node_to_global_aggregate) do global_aggregate + global_to_local_aggregate[global_aggregate] + end + local_aggregate_to_local_nodes = collect_nodes_in_aggregate(own_node_to_local_aggregate, 1:n_local_aggregates) + local_aggregate_to_local_nodes + end + PVector(aggregate_to_nodes_partition, partition(aggregates)) +end + function collect_nodes_in_aggregate(node_to_aggregate,aggregates) typeof_aggregate = eltype(node_to_aggregate) nnodes = length(node_to_aggregate) @@ -279,6 +295,25 @@ function tentative_prolongator_for_laplace(P0,B) P0,Bc end +function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, block_size) + # B vector of pvectors + own_values_B = map(own_values, B) + P0_partition, Bc_partition = map(own_values(aggregate_to_nodes), own_values_B...) do own_aggregate_to_local_nodes, local_dof_to_b... + P0_own_own, local_coarse_dof_to_Bc = tentative_prolongator_with_block_size(own_aggregate_to_local_nodes, local_dof_to_b, block_size) + # create P0 sparse matrix as in strength graph + # partition in B + # return tuple here + end |> tuple_of_arrays + # partition for the coarse dofs + P0 = PSparseMatrix(P0_partition, dof_partition, coarse_dof_partition) + Bc = map(Bc_partition) do bic_partition + + PVector(bic_partition, coarse_dof_partition) + end + P0, Bc +end + + function tentative_prolongator_with_block_size(aggregate_to_nodes::JaggedArray,B, block_size) # A draft for the scalar case is commented below ## TODO assumes CSC @@ -516,13 +551,13 @@ function smoothed_aggregation_with_block_size(; ) function coarsen(A,B) # build strength graph - G = strength_graph(A, block_size=block_size, epsilon=epsilon) + G = strength_graph(A, block_size=block_size, epsilon=epsilon) # TODO parallel diagG = dense_diag(G) node_to_aggregate, node_aggregates = aggregate(G,diagG;epsilon) - aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) - Pc,Bc,perm = tentative_prolongator(aggregate_to_nodes,B, block_size) + aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) # TODO: provide parallel version + Pc,Bc,perm = tentative_prolongator(aggregate_to_nodes,B, block_size) # TODO parallel diagA = dense_diag(A) - P = smoothed_prolongator(A,Pc,diagA;approximate_omega) + P = smoothed_prolongator(A,Pc,diagA;approximate_omega) # the given approximate omega should work in parallel R = transpose(P) Ac,cache = rap(R,A,P;reuse=true) Ac,Bc,R,P,cache,repartition_threshold = enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold)#maybe changes here @@ -543,6 +578,48 @@ function getblock!(B,A,ids_i,ids_j) end end +function strength_graph(A::PSparseMatrix, block_size::Integer; epsilon = 0) + dof_partition = partition(axes(A,1)) + node_partition = map(dof_partition) do my_dofs + n_local_dofs = length(my_dofs) + @assert n_local_dofs % block_size == 0 + @assert own_length(my_dofs) == n_local_dofs + n_own_nodes = div(n_local_dofs, block_size) + own_to_global_node = zeros(Int, n_own_nodes) + for local_node in 1:n_own_nodes + local_dof = (local_node - 1) * block_size + 1 + global_dof = my_dofs[local_dof] + global_node = div(global_dof -1, block_size) +1 + own_to_global_node[local_node] = global_node + end + n_global_dofs = global_length(my_dofs) + n_global_nodes = div(n_global_dofs, block_size) + @assert n_global_dofs % block_size == 0 + own_nodes = OwnIndices(n_global_nodes, part_id(my_dofs), own_to_global_node) + ghost_nodes = GhostIndices(n_global_nodes, Int[], Int32[]) + my_nodes = OwnAndGhostIndices(own_nodes, ghost_nodes) + my_nodes + end + + G_partition = map(own_own_values(A), node_partition) do A_own_own, my_nodes + G_own_own = strength_graph(A_own_own, block_size, epsilon=epsilon) + n_own_nodes = own_length(my_nodes) + n_ghost_nodes = ghost_length(my_nodes) + @assert n_ghost_nodes == 0 + @assert size(G_own_own, 1) == n_own_nodes + @assert size(G_own_own, 2) == size(G_own_own, 1) + G_own_ghost = sparse(Int[], Int[], eltype(G_own_own)[], n_own_nodes, n_ghost_nodes) + G_ghost_own = sparse(Int[], Int[], eltype(G_own_own)[], n_ghost_nodes, n_own_nodes) + G_ghost_ghost = sparse(Int[], Int[], eltype(G_own_own)[], n_ghost_nodes, n_ghost_nodes) + blocks = PartitionedArrays.split_matrix_blocks(G_own_own, G_own_ghost, G_ghost_own, G_ghost_ghost) + perm = PartitionedArrays.local_permutation(my_nodes) + PartitionedArrays.split_matrix(blocks, perm, perm) + end + + G = PSparseMatrix(G_partition, node_partition, node_partition, true) + G +end + function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon = 0) if block_size < 1 diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index c4d46524..47d7cf99 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -87,13 +87,6 @@ end I,J,V = tuple_of_arrays(IJV) col_partition = row_partition A = psparse(I,J,V,row_partition, col_partition) |> fetch -# TODO: implement and test psparse matrix - - -# Now with CSC sparse matrix -# Test sample matrix with block size 2 -epsilon = 0.02 -A_test = centralize(A) # build solution I = [1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, @@ -108,10 +101,15 @@ J = [1, 2, 4, 5, 7, 8, 9, 6, 8, 9] V = ones(length(I)) - solution = sparse(I, J, V, nnodes, nnodes) -G_test = PartitionedSolvers.strength_graph(A_test, 2, epsilon=epsilon) -@test solution ≈ G_test + +epsilon = 0.02 + +# Test with CSC sparse matrix +# Test sample matrix with block size 2 +A_seq = centralize(A) +G_seq = PartitionedSolvers.strength_graph(A_seq, 2, epsilon=epsilon) +@test solution ≈ G_seq # Another test with 3 dims M = rand([-2.0, -1, 1, 2], (3, 3)) @@ -126,6 +124,52 @@ G = PartitionedSolvers.strength_graph(M, 3, epsilon=epsilon) solution = sparse([1], [1], 1.0, 1, 1) @test solution ≈ G +# Test strength_graph with psparse matrix +block_size = 3 +parts_per_dir = (2,2,2) +p = prod(parts_per_dir) +ranks = DebugArray(LinearIndices((p,))) +nodes_per_dir = map(i->block_size*i,parts_per_dir) +args = laplacian_fdm(nodes_per_dir,parts_per_dir,ranks) +A_dist = psparse(args...) |> fetch +A_seq = centralize(A_dist) +G_seq = PartitionedSolvers.strength_graph(A_seq, block_size, epsilon=epsilon) +G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) +diff = G_seq - centralize(G_dist) +println("Difference between sequential and parallel strength_graph:") +display(diff) + +# Test sequential collect nodes in aggregate +diagG = dense_diag(centralize(G_dist)) +node_to_aggregate_seq, node_aggregates_seq = PartitionedSolvers.aggregate(centralize(G_dist),diagG;epsilon) +aggregate_to_nodes_seq = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate_seq, node_aggregates_seq) +@test length(aggregate_to_nodes_seq.data) == length(node_to_aggregate_seq) +for i_agg in node_aggregates_seq + pini = aggregate_to_nodes_seq.ptrs[i_agg] + pend = aggregate_to_nodes_seq.ptrs[i_agg+1]-1 + nodes = aggregate_to_nodes_seq.data[pini:pend] + @test all(node_to_aggregate_seq[nodes] .== i_agg) +end + +# Test parallel collect_nodes_in_aggregate +diagG = dense_diag(G_dist) +node_to_aggregate_dist, node_aggregates_dist = PartitionedSolvers.aggregate(G_dist,diagG;epsilon) +aggregate_to_nodes_dist = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate_dist, node_aggregates_dist) +map(partition(aggregate_to_nodes_dist), partition(node_to_aggregate_dist), partition(node_aggregates_dist)) do my_aggregate_to_nodes, my_node_to_aggregate, my_aggregates + @test length(my_aggregate_to_nodes.data) == length(my_node_to_aggregate) + global_to_local_aggregate = global_to_local(my_aggregates) + local_aggregates = global_to_local_aggregate[my_aggregates] + own_node_to_local_aggregate = map(my_node_to_aggregate) do global_aggregate + global_to_local_aggregate[global_aggregate] + end + for i_agg in local_aggregates + pini = my_aggregate_to_nodes.ptrs[i_agg] + pend = my_aggregate_to_nodes.ptrs[i_agg+1]-1 + nodes = my_aggregate_to_nodes.data[pini:pend] + @test all(own_node_to_local_aggregate[nodes] .== i_agg) + end +end + # Test tentative prolongator function random_nullspace(ndofs, n_B) B = Array{Array{Float64, 1}, 1}(undef, n_B) @@ -144,7 +188,6 @@ nodes_per_dir = map(i->block_size*i,parts_per_dir) args = laplacian_fdm(nodes_per_dir,parts_per_dir,ranks) A_dist = psparse(args...) |> fetch A = centralize(A_dist) -println("dims A: $(size(A))") G = PartitionedSolvers.strength_graph(A, block_size, epsilon=epsilon) diagG = dense_diag(G) B = random_nullspace(size(A, 1), block_size) @@ -153,6 +196,9 @@ aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggre Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B, block_size) @test Pc * stack(Bc) ≈ stack(B) +# Test tentative prolongator with parallel matrix + + # Test spectral radius sequential & parallel diagA = dense_diag(A_dist) invD = 1 ./ diagA @@ -233,13 +279,13 @@ end p2 = plot(msizes, [avg_time_powm avg_time_spectrad], label = ["powm" "spectrad"], color="black", marker=[:c :x]) yticks=[10^-16, 10^-14, 10^-12, 10^-10, 10^-8, 10^-6, 10^-4, 10^-2, 10^0] xticks=[10^0, 10^1, 10^2, 10^3] -plot!(p1, xlabel="#iterations (k)", ylabel="|λ(k) - exp|", legend=:outertopleft, xscale=:log, +plot!(p1, xlabel="#iterations (k)", ylabel="|λ(k) - λ(k-1)|", legend=:outertopleft, xscale=:log, xticks=xticks, yscale=:log, ylim=(10^-16, 1), yticks=yticks) plot!(p2, xlabel="size of matrix", ylabel="avg runtime (s)", xscale=:log10) p = plot(p1, p2, layout=(2,1), suptitle="Convergence of powm and spectral_radius") savefig(p, "C:/Users/gelie/Home/ComputationalScience/GSoC/powm_l-lprev_k$(maxiter)_m$(msizes[end]).png") -display(p) =# - +display(p) + =# # First with a sequential matrix #= nodes_per_dir = (100,100) A = laplace_matrix(nodes_per_dir) From d2299465bc6c6937cb9bb6e833d44a270f645055 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Wed, 7 Aug 2024 14:32:50 +0200 Subject: [PATCH 06/18] Add parallel implementation of tentative prolongator --- PartitionedSolvers/src/amg.jl | 63 +++++++++++++++++++++------- PartitionedSolvers/test/amg_tests.jl | 32 +++++++++++--- 2 files changed, 76 insertions(+), 19 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 69366971..03522187 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -298,19 +298,56 @@ end function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, block_size) # B vector of pvectors own_values_B = map(own_values, B) - P0_partition, Bc_partition = map(own_values(aggregate_to_nodes), own_values_B...) do own_aggregate_to_local_nodes, local_dof_to_b... - P0_own_own, local_coarse_dof_to_Bc = tentative_prolongator_with_block_size(own_aggregate_to_local_nodes, local_dof_to_b, block_size) - # create P0 sparse matrix as in strength graph - # partition in B - # return tuple here + n_B = length(B) + P0_partition, Bc_partition, coarse_dof_to_Bc... = map(aggregate_to_nodes.index_partition, B[1].index_partition, local_values(aggregate_to_nodes), own_values_B...) do local_aggregate_local_indices, local_dof_local_indices, local_aggregate_to_local_nodes, own_dof_to_b... + P0_own_own, local_coarse_dof_to_Bc = tentative_prolongator_with_block_size(local_aggregate_to_local_nodes, own_dof_to_b, block_size) + # Create P0 partition + n_global_aggregates = global_length(local_aggregate_local_indices) + n_own_aggregates = own_length(local_aggregate_local_indices) + n_local_aggregates = local_length(local_aggregate_local_indices) + n_ghost_aggregates = ghost_length(local_aggregate_local_indices) + n_own_dofs = length(own_dof_to_b[1]) + n_own_coarse_dofs = n_own_aggregates * n_B + n_global_coarse_dofs = n_global_aggregates * n_B + @assert n_own_aggregates == n_local_aggregates + @assert n_ghost_aggregates == 0 + @assert size(P0_own_own, 1) == n_own_dofs + @assert size(P0_own_own, 2) == n_own_coarse_dofs + @assert length(local_coarse_dof_to_Bc) == n_B + @assert length(local_coarse_dof_to_Bc[1]) == n_own_coarse_dofs + P0_own_ghost = sparse(Int[], Int[], eltype(P0_own_own)[], n_own_aggregates, n_ghost_aggregates) + P0_ghost_own = sparse(Int[], Int[], eltype(P0_own_own)[], n_ghost_aggregates, n_own_aggregates) + P0_ghost_ghost = sparse(Int[], Int[], eltype(P0_own_own)[], n_ghost_aggregates, n_ghost_aggregates) + blocks = PartitionedArrays.split_matrix_blocks(P0_own_own, P0_own_ghost, P0_ghost_own, P0_ghost_ghost) + perm_aggs = PartitionedArrays.local_permutation(local_aggregate_local_indices) + # Turn permutation of aggregates to permutation of null vectors + perm_cols = zeros(Int, size(P0_own_own,2)) + for (i, agg) in enumerate(perm_aggs) + cols = (agg - 1) * n_B + 1 : agg * n_B + inds = (i - 1) * n_B + 1 : i * n_B + perm_cols[inds] = cols + end + perm_rows = PartitionedArrays.local_permutation(local_dof_local_indices) + P0_partition = PartitionedArrays.split_matrix(blocks, perm_rows, perm_cols) + # Partition of Bc + own_to_global_coarse_dofs = zeros(Int, n_own_coarse_dofs) + for (i, agg) in enumerate(local_to_global(local_aggregate_local_indices)) + inds = (i-1) * n_B + 1 : i * n_B + global_coarse_dofs = (agg-1) * n_B + 1 : agg * n_B + own_to_global_coarse_dofs[inds] = global_coarse_dofs + end + own_coarse_dofs = OwnIndices(n_global_coarse_dofs, part_id(local_dof_local_indices), own_to_global_coarse_dofs) + ghost_coarse_dofs = GhostIndices(n_global_coarse_dofs, Int[], Int32[]) + my_coarse_dofs = OwnAndGhostIndices(own_coarse_dofs, ghost_coarse_dofs) + P0_partition, my_coarse_dofs, local_coarse_dof_to_Bc... end |> tuple_of_arrays # partition for the coarse dofs - P0 = PSparseMatrix(P0_partition, dof_partition, coarse_dof_partition) - Bc = map(Bc_partition) do bic_partition - - PVector(bic_partition, coarse_dof_partition) - end - P0, Bc + P0 = PSparseMatrix(P0_partition, B[1].index_partition, Bc_partition, true) + Bc = Array{PVector}(undef, n_B) + for (i,b) in enumerate(coarse_dof_to_Bc) + Bc[i] = PVector(b, Bc_partition) + end + P0, Bc end @@ -918,6 +955,4 @@ function amg_finalize!(setup) end coarse_solver_setup = setup.coarse_level.coarse_solver_setup finalize!(coarse_solver_setup) - nothing -end - +end \ No newline at end of file diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 47d7cf99..a7444b64 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -136,8 +136,6 @@ A_seq = centralize(A_dist) G_seq = PartitionedSolvers.strength_graph(A_seq, block_size, epsilon=epsilon) G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) diff = G_seq - centralize(G_dist) -println("Difference between sequential and parallel strength_graph:") -display(diff) # Test sequential collect nodes in aggregate diagG = dense_diag(centralize(G_dist)) @@ -171,7 +169,7 @@ map(partition(aggregate_to_nodes_dist), partition(node_to_aggregate_dist), parti end # Test tentative prolongator -function random_nullspace(ndofs, n_B) +function random_nullspace(ndofs::Int, n_B) B = Array{Array{Float64, 1}, 1}(undef, n_B) for i = 1:n_B B[i] = rand(ndofs) @@ -179,6 +177,14 @@ function random_nullspace(ndofs, n_B) B end +function random_nullspace(index_partition::AbstractArray, n_B) + B = Array{PVector}(undef, n_B) + for i = 1:n_B + B[i] = prand(index_partition) + end + B +end + # Test tentative prolongator with laplace matrix block_size = 3 parts_per_dir = (2,2,2) @@ -197,6 +203,21 @@ Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_n @test Pc * stack(Bc) ≈ stack(B) # Test tentative prolongator with parallel matrix +G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) +diagG = dense_diag(G_dist) +node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_dist,diagG;epsilon) +aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) +B_dist = random_nullspace(partition(axes(A_dist,1)), block_size) +Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B_dist, block_size) +Bc_matrix = zeros(size(Pc,2), length(Bc)) +for (i,b) in enumerate(Bc) + Bc_matrix[:,i] = collect(b) +end +B_matrix = zeros(size(Pc,1), length(Bc)) +for (i,b) in enumerate(B) + B_matrix[:,i] = collect(b) +end +@test centralize(Pc) * Bc_matrix ≈ B_matrix # TODO: test fails # Test spectral radius sequential & parallel @@ -285,9 +306,10 @@ plot!(p2, xlabel="size of matrix", ylabel="avg runtime (s)", xscale=:log10) p = plot(p1, p2, layout=(2,1), suptitle="Convergence of powm and spectral_radius") savefig(p, "C:/Users/gelie/Home/ComputationalScience/GSoC/powm_l-lprev_k$(maxiter)_m$(msizes[end]).png") display(p) - =# + + # First with a sequential matrix -#= nodes_per_dir = (100,100) +nodes_per_dir = (100,100) A = laplace_matrix(nodes_per_dir) using Random Random.seed!(1) From 7cf1469400bbb58aae83a5d5138b703d923acc49 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Fri, 9 Aug 2024 09:50:15 +0200 Subject: [PATCH 07/18] Add tests for parallel tentative prolongator --- PartitionedSolvers/src/amg.jl | 4 ++-- PartitionedSolvers/test/amg_tests.jl | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 03522187..9ff28005 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -338,8 +338,8 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, bl end own_coarse_dofs = OwnIndices(n_global_coarse_dofs, part_id(local_dof_local_indices), own_to_global_coarse_dofs) ghost_coarse_dofs = GhostIndices(n_global_coarse_dofs, Int[], Int32[]) - my_coarse_dofs = OwnAndGhostIndices(own_coarse_dofs, ghost_coarse_dofs) - P0_partition, my_coarse_dofs, local_coarse_dof_to_Bc... + Bc_partition = OwnAndGhostIndices(own_coarse_dofs, ghost_coarse_dofs) + P0_partition, Bc_partition, local_coarse_dof_to_Bc... end |> tuple_of_arrays # partition for the coarse dofs P0 = PSparseMatrix(P0_partition, B[1].index_partition, Bc_partition, true) diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index a7444b64..2ea00328 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -205,19 +205,23 @@ Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_n # Test tentative prolongator with parallel matrix G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) diagG = dense_diag(G_dist) +n_B = block_size node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_dist,diagG;epsilon) aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) -B_dist = random_nullspace(partition(axes(A_dist,1)), block_size) +B_dist = random_nullspace(partition(axes(A_dist,1)), n_B) Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B_dist, block_size) +for i in 1:n_B + @test isa(Bc[i], PVector) +end Bc_matrix = zeros(size(Pc,2), length(Bc)) for (i,b) in enumerate(Bc) Bc_matrix[:,i] = collect(b) end B_matrix = zeros(size(Pc,1), length(Bc)) -for (i,b) in enumerate(B) +for (i,b) in enumerate(B_dist) B_matrix[:,i] = collect(b) end -@test centralize(Pc) * Bc_matrix ≈ B_matrix # TODO: test fails +@test centralize(Pc) * Bc_matrix ≈ B_matrix # Test spectral radius sequential & parallel From e06c74e1d8970e0dcc5d561aa96655e3a8df56e3 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Fri, 9 Aug 2024 10:12:50 +0200 Subject: [PATCH 08/18] Remove plots of power method convergence --- PartitionedSolvers/src/amg.jl | 2 - PartitionedSolvers/test/amg_tests.jl | 83 ++-------------------------- 2 files changed, 4 insertions(+), 81 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 9ff28005..6ca6cc85 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -296,7 +296,6 @@ function tentative_prolongator_for_laplace(P0,B) end function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, block_size) - # B vector of pvectors own_values_B = map(own_values, B) n_B = length(B) P0_partition, Bc_partition, coarse_dof_to_Bc... = map(aggregate_to_nodes.index_partition, B[1].index_partition, local_values(aggregate_to_nodes), own_values_B...) do local_aggregate_local_indices, local_dof_local_indices, local_aggregate_to_local_nodes, own_dof_to_b... @@ -341,7 +340,6 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, bl Bc_partition = OwnAndGhostIndices(own_coarse_dofs, ghost_coarse_dofs) P0_partition, Bc_partition, local_coarse_dof_to_Bc... end |> tuple_of_arrays - # partition for the coarse dofs P0 = PSparseMatrix(P0_partition, B[1].index_partition, Bc_partition, true) Bc = Array{PVector}(undef, n_B) for (i,b) in enumerate(coarse_dof_to_Bc) diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 2ea00328..558ffec5 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -9,7 +9,6 @@ using PartitionedSolvers using LinearAlgebra using IterativeSolvers using IterativeSolvers: cg! -using Plots using Statistics using Test using SparseArrays @@ -223,7 +222,6 @@ for (i,b) in enumerate(B_dist) end @test centralize(Pc) * Bc_matrix ≈ B_matrix - # Test spectral radius sequential & parallel diagA = dense_diag(A_dist) invD = 1 ./ diagA @@ -238,79 +236,6 @@ lseq, x = PartitionedSolvers.spectral_radius(centralize(M), x0_seq, 10) @test l ≈ lseq @test abs((l-exp)/exp) < 2*10^-1 -# Test spectral radius estimation -#= maxiter = 5000 -nnodes = [8, 16, 32] -exp = [1.9396926207859075, 1.9829730996838997, 1.9954719225730886] -msizes = zeros(Int, length(nnodes)) -parts_per_dir = (2,2,2) -p = prod(parts_per_dir) -ranks = DebugArray(LinearIndices((p,))) -n_trials = 3 -A_dims = zeros(length(nnodes)) -errors_powm = zeros(length(nnodes), n_trials, maxiter) -time_powm = zeros(length(nnodes), n_trials) -errors_spectrad = zeros(length(nnodes), n_trials, maxiter) -time_spectrad = zeros(length(nnodes), n_trials) - -for (n_i, n) in enumerate(nnodes) - nodes_per_dir = (n, n, n) - args = laplacian_fdm(nodes_per_dir,parts_per_dir,ranks) - A = psparse(args...) |> fetch |> centralize - println("dims A: $(size(A))") - msizes[n_i] = size(A,1) - diagA = dense_diag(A) - invD = 1 ./ diagA - M = invD .* A - #M_dense = Array(M) - #exp[n_i] = max(abs(eigmax(M_dense)), abs(eigmin(M_dense))) - for t in 1:n_trials - x0 = rand(size(A,2)) - lprev, x = powm!(M, x0, maxiter=1) - tic = time_ns() - for i in 1:maxiter - # Power method - l, x = powm!(M, x, maxiter = 1) - errors_powm[n_i, t, i] = abs(l-lprev) - lprev = l - end - toc = time_ns() - time_powm[n_i,t] = (toc - tic)/10^9 - ρprev, x = PartitionedSolvers.spectral_radius(M,x0, 1) - tic = time_ns() - for i in 1:maxiter - # own implementation - ρ, x = PartitionedSolvers.spectral_radius(M,x, 1) - errors_spectrad[n_i, t, i] = abs(ρ-ρprev) - ρprev = ρ - end - toc = time_ns() - time_spectrad[n_i, t] = (toc-tic)/10^9 - end -end -avg_time_powm = median(time_powm, dims=2) -avg_time_spectrad = median(time_spectrad, dims=2) -avg_error_powm = median(errors_powm, dims=2) -avg_error_spectrad = median(errors_spectrad, dims=2) - -p1 = plot() -plot!(p1, [1], [0], color="black", label="powm") -plot!(p1, [1], [0], color="black", ls=:dash, label="spectrad") -for (n_i, n) in enumerate(msizes) - plot!(p1, 1:maxiter, avg_error_powm[n_i,:,:]', label = "($(n),$(n))", color= n_i) - plot!(p1, 1:maxiter, avg_error_spectrad[n_i,:,:]', label = "", color=n_i, ls=:dash) -end - -p2 = plot(msizes, [avg_time_powm avg_time_spectrad], label = ["powm" "spectrad"], color="black", marker=[:c :x]) -yticks=[10^-16, 10^-14, 10^-12, 10^-10, 10^-8, 10^-6, 10^-4, 10^-2, 10^0] -xticks=[10^0, 10^1, 10^2, 10^3] -plot!(p1, xlabel="#iterations (k)", ylabel="|λ(k) - λ(k-1)|", legend=:outertopleft, xscale=:log, - xticks=xticks, yscale=:log, ylim=(10^-16, 1), yticks=yticks) -plot!(p2, xlabel="size of matrix", ylabel="avg runtime (s)", xscale=:log10) -p = plot(p1, p2, layout=(2,1), suptitle="Convergence of powm and spectral_radius") -savefig(p, "C:/Users/gelie/Home/ComputationalScience/GSoC/powm_l-lprev_k$(maxiter)_m$(msizes[end]).png") -display(p) - # First with a sequential matrix nodes_per_dir = (100,100) @@ -334,7 +259,7 @@ amg_statistics(S) |> display # Non-default options level_params = amg_level_params(; - pre_smoother = jacobi(;iters=10,omega=2/3), + pre_smoother = PartitionedSolvers.jacobi(;iters=10,omega=2/3), cycle = v_cycle, ) @@ -361,7 +286,7 @@ finalize!(S) # Now as a preconditioner level_params = amg_level_params(; - pre_smoother = gauss_seidel(;iters=1), + pre_smoother = PartitionedSolvers.gauss_seidel(;iters=1), ) fine_params = amg_fine_params(;level_params) @@ -409,7 +334,7 @@ solve!(y,S,b) finalize!(S) level_params = amg_level_params(; - pre_smoother = jacobi(;iters=1,omega=2/3), + pre_smoother = PartitionedSolvers.jacobi(;iters=1,omega=2/3), coarsening = smoothed_aggregation(;repartition_threshold=10000000) ) @@ -434,5 +359,5 @@ x = similar(b,axes(A,2)) x .= 0 Pl = setup(amg(),x,A,b) _, history = cg!(x,A,b;Pl,log=true) -display(history) =# +display(history) end \ No newline at end of file From e2a8e86dcf344d53adf7917cafafc7dc8eec4fba Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Tue, 13 Aug 2024 09:11:28 +0200 Subject: [PATCH 09/18] Use linear elasticity matrix for testing --- PartitionedSolvers/test/amg_tests.jl | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 558ffec5..8208eaa9 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -3,7 +3,7 @@ module AMGTests # just for debugging using Pkg Pkg.activate("PartitionedSolvers") -using PartitionedArrays +using PartitionedArrays using PartitionedArrays: laplace_matrix using PartitionedSolvers using LinearAlgebra @@ -123,18 +123,22 @@ G = PartitionedSolvers.strength_graph(M, 3, epsilon=epsilon) solution = sparse([1], [1], 1.0, 1, 1) @test solution ≈ G -# Test strength_graph with psparse matrix +# Create Psparse Test matrix (Linear Elasticity) block_size = 3 parts_per_dir = (2,2,2) p = prod(parts_per_dir) ranks = DebugArray(LinearIndices((p,))) nodes_per_dir = map(i->block_size*i,parts_per_dir) -args = laplacian_fdm(nodes_per_dir,parts_per_dir,ranks) -A_dist = psparse(args...) |> fetch +args = PartitionedArrays.linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) +A_dist = psparse(args...) |> fetch A_seq = centralize(A_dist) +display(A_seq) + +# Test strength graph with sequential and parallel linear elasticity matrix G_seq = PartitionedSolvers.strength_graph(A_seq, block_size, epsilon=epsilon) G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) diff = G_seq - centralize(G_dist) +display(diff) # Test sequential collect nodes in aggregate diagG = dense_diag(centralize(G_dist)) @@ -185,18 +189,10 @@ function random_nullspace(index_partition::AbstractArray, n_B) end # Test tentative prolongator with laplace matrix -block_size = 3 -parts_per_dir = (2,2,2) -p = prod(parts_per_dir) -ranks = DebugArray(LinearIndices((p,))) -nodes_per_dir = map(i->block_size*i,parts_per_dir) -args = laplacian_fdm(nodes_per_dir,parts_per_dir,ranks) -A_dist = psparse(args...) |> fetch -A = centralize(A_dist) -G = PartitionedSolvers.strength_graph(A, block_size, epsilon=epsilon) -diagG = dense_diag(G) -B = random_nullspace(size(A, 1), block_size) -node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G,diagG;epsilon) +G_seq = PartitionedSolvers.strength_graph(A_seq, block_size, epsilon=epsilon) +diagG = dense_diag(G_seq) +B = random_nullspace(size(A_seq, 1), block_size) +node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_seq,diagG;epsilon) aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B, block_size) @test Pc * stack(Bc) ≈ stack(B) From 2008787b8d5bc25cabf0c88ca9f933e825b374e0 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Tue, 13 Aug 2024 11:36:39 +0200 Subject: [PATCH 10/18] Use near nullspace for linear elasticity for testing --- PartitionedSolvers/test/amg_tests.jl | 71 ++++++++++++---------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 8208eaa9..c3c960a6 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -3,8 +3,7 @@ module AMGTests # just for debugging using Pkg Pkg.activate("PartitionedSolvers") -using PartitionedArrays -using PartitionedArrays: laplace_matrix +using PartitionedArrays using PartitionedSolvers using LinearAlgebra using IterativeSolvers @@ -123,22 +122,26 @@ G = PartitionedSolvers.strength_graph(M, 3, epsilon=epsilon) solution = sparse([1], [1], 1.0, 1, 1) @test solution ≈ G -# Create Psparse Test matrix (Linear Elasticity) -block_size = 3 +# Create Test matrices (Linear Elasticity) parts_per_dir = (2,2,2) +block_size = length(parts_per_dir) p = prod(parts_per_dir) ranks = DebugArray(LinearIndices((p,))) nodes_per_dir = map(i->block_size*i,parts_per_dir) args = PartitionedArrays.linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) A_dist = psparse(args...) |> fetch A_seq = centralize(A_dist) -display(A_seq) # Test strength graph with sequential and parallel linear elasticity matrix G_seq = PartitionedSolvers.strength_graph(A_seq, block_size, epsilon=epsilon) G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) -diff = G_seq - centralize(G_dist) -display(diff) +G_dist_centralized = centralize(G_dist) +diff = G_seq - G_dist_centralized +diff_nnz = nnz(diff) +println("$(round(diff_nnz/(G_seq.m^2); digits=3))% of total entries are different between sequential and parallel G.") +println("G_dist has $(round(1-nnz(G_dist_centralized)/nnz(G_seq);digits=2))% fewer nnz entries than G_seq.") +println("(G_dist nnz entries: $(nnz(G_dist_centralized)), G_seq nnz entries: $(nnz(G_seq)))") +@test isa(G_dist, PSparseMatrix) # Test sequential collect nodes in aggregate diagG = dense_diag(centralize(G_dist)) @@ -171,40 +174,25 @@ map(partition(aggregate_to_nodes_dist), partition(node_to_aggregate_dist), parti end end -# Test tentative prolongator -function random_nullspace(ndofs::Int, n_B) - B = Array{Array{Float64, 1}, 1}(undef, n_B) - for i = 1:n_B - B[i] = rand(ndofs) - end - B -end - -function random_nullspace(index_partition::AbstractArray, n_B) - B = Array{PVector}(undef, n_B) - for i = 1:n_B - B[i] = prand(index_partition) - end - B -end +# Create nullspace vectors for tentative prolongator +x_dist = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir,ranks) +B_dist = near_nullspace_linear_elasticity(x_dist, partition(axes(A_dist,2))) +B_seq = [collect(b) for b in B_dist] -# Test tentative prolongator with laplace matrix +# Test tentative prolongator with sequential linear elasticity matrix G_seq = PartitionedSolvers.strength_graph(A_seq, block_size, epsilon=epsilon) -diagG = dense_diag(G_seq) -B = random_nullspace(size(A_seq, 1), block_size) -node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_seq,diagG;epsilon) +node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_seq, dense_diag(G_seq);epsilon) aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) -Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B, block_size) -@test Pc * stack(Bc) ≈ stack(B) +Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B_seq, block_size) +@test Pc * stack(Bc) ≈ stack(B_seq) + # Test tentative prolongator with parallel matrix G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) -diagG = dense_diag(G_dist) -n_B = block_size -node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_dist,diagG;epsilon) +node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_dist,dense_diag(G_dist);epsilon) aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) -B_dist = random_nullspace(partition(axes(A_dist,1)), n_B) Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B_dist, block_size) +n_B = length(B_dist) for i in 1:n_B @test isa(Bc[i], PVector) end @@ -218,6 +206,7 @@ for (i,b) in enumerate(B_dist) end @test centralize(Pc) * Bc_matrix ≈ B_matrix + # Test spectral radius sequential & parallel diagA = dense_diag(A_dist) invD = 1 ./ diagA @@ -232,10 +221,11 @@ lseq, x = PartitionedSolvers.spectral_radius(centralize(M), x0_seq, 10) @test l ≈ lseq @test abs((l-exp)/exp) < 2*10^-1 - -# First with a sequential matrix +#= +# Test amg +# first with a sequential matrix nodes_per_dir = (100,100) -A = laplace_matrix(nodes_per_dir) +A = PartitionedArrays.laplace_matrix(nodes_per_dir) using Random Random.seed!(1) x = rand(size(A,2)) @@ -304,7 +294,7 @@ np = prod(parts_per_dir) parts = DebugArray(LinearIndices((np,))) nodes_per_dir = (100,100) -A = laplace_matrix(nodes_per_dir,parts_per_dir,parts) +A = PartitionedArrays.laplace_matrix(nodes_per_dir,parts_per_dir,parts) x = pones(partition(axes(A,2))) b = A*x @@ -346,9 +336,9 @@ cg!(y,A,b;Pl,verbose=true) nodes_per_dir = (40,40,40) parts_per_dir = (2,2,1) -nparts = prod(parts_per_dir) -parts = LinearIndices((nparts,)) -A = laplace_matrix(nodes_per_dir,parts_per_dir,parts) +np = prod(parts_per_dir) +parts = LinearIndices((np,)) +A = PartitionedArrays.laplace_matrix(nodes_per_dir,parts_per_dir,parts) x_exact = pones(partition(axes(A,2))) b = A*x_exact x = similar(b,axes(A,2)) @@ -356,4 +346,5 @@ x .= 0 Pl = setup(amg(),x,A,b) _, history = cg!(x,A,b;Pl,log=true) display(history) +=# end \ No newline at end of file From b2ee910b8d64f97f0e2621b617957d7568c1dc28 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Tue, 13 Aug 2024 11:37:27 +0200 Subject: [PATCH 11/18] Fix typo in node_coordinates_unit_cube --- src/PartitionedArrays.jl | 2 +- src/gallery.jl | 2 +- test/gallery_tests.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/PartitionedArrays.jl b/src/PartitionedArrays.jl index 5575613c..6c7d1542 100644 --- a/src/PartitionedArrays.jl +++ b/src/PartitionedArrays.jl @@ -180,7 +180,7 @@ include("p_timer.jl") export laplacian_fdm export laplacian_fem export linear_elasticity_fem -export node_coorinates_unit_cube +export node_coordinates_unit_cube export near_nullspace_linear_elasticity include("gallery.jl") diff --git a/src/gallery.jl b/src/gallery.jl index 231dcce1..59a9c51d 100644 --- a/src/gallery.jl +++ b/src/gallery.jl @@ -415,7 +415,7 @@ function node_to_dof_partition(node_partition,D) dof_partition end -function node_coorinates_unit_cube( +function node_coordinates_unit_cube( nodes_per_dir, # free (== interior) nodes parts_per_dir, parts, diff --git a/test/gallery_tests.jl b/test/gallery_tests.jl index 07fb9a13..dbd393bb 100644 --- a/test/gallery_tests.jl +++ b/test/gallery_tests.jl @@ -41,7 +41,7 @@ function gallery_tests(distribute,parts_per_dir) Y = A*pones(axes(A,2)) @test isa(y,PVector) - x = node_coorinates_unit_cube(nodes_per_dir,parts_per_dir,ranks) + x = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir,ranks) B = near_nullspace_linear_elasticity(x) @test isa(B[1],PVector) B = near_nullspace_linear_elasticity(x,partition(axes(A,2))) From e80f15ce062b0a1323f0720847dc94b8604d3e45 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Wed, 14 Aug 2024 10:01:28 +0200 Subject: [PATCH 12/18] Delete remove_singleton_aggregates --- PartitionedSolvers/src/amg.jl | 46 +--------------------------- PartitionedSolvers/test/amg_tests.jl | 32 ++++++++----------- 2 files changed, 13 insertions(+), 65 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 6ca6cc85..008443a3 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -245,47 +245,6 @@ function collect_nodes_in_aggregate(node_to_aggregate,aggregates) aggregate_to_nodes end -function remove_singleton_aggregates(aggregate_to_nodes_old) - typeof_nodes = eltype(aggregate_to_nodes_old[1]) - n_aggregates_old = length(aggregate_to_nodes_old) - n_aggregates = 0 - n_data = 0 - for i in 1:n_aggregates_old - nnodes = aggregate_to_nodes_old.ptrs[i+1] - aggregate_to_nodes_old.ptrs[i] - if nnodes < 2 - continue - end - n_aggregates += 1 - n_data += nnodes - end - - # If there are no singleton aggregates, stop and return old array - if n_aggregates == n_aggregates_old - return aggregate_to_nodes_old - end - - # Else copy non-singleton aggregates to new array - aggregate_to_nodes_ptrs = zeros(Int, n_aggregates+1) - aggregate_to_nodes_data = zeros(typeof_nodes, n_data) - aggregate_to_nodes_ptrs[1] = 1 - agg = 1 - for i in 1:n_aggregates_old - nnodes = aggregate_to_nodes_old.ptrs[i+1] - aggregate_to_nodes_old.ptrs[i] - if nnodes < 2 - continue - end - aggregate_to_nodes_ptrs[agg+1] = aggregate_to_nodes_ptrs[agg] + nnodes - pini = aggregate_to_nodes_ptrs[agg] - pend = aggregate_to_nodes_ptrs[agg+1]-1 - pini_old = aggregate_to_nodes_old.ptrs[i] - pend_old = aggregate_to_nodes_old.ptrs[i+1]-1 - aggregate_to_nodes_data[pini:pend] = aggregate_to_nodes_old.data[pini_old:pend_old] - agg += 1 - end - aggregate_to_nodes = jagged_array(aggregate_to_nodes_data,aggregate_to_nodes_ptrs) - aggregate_to_nodes -end - function tentative_prolongator_for_laplace(P0,B) n_nullspace_vecs = length(B) if n_nullspace_vecs != 1 @@ -341,10 +300,7 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, bl P0_partition, Bc_partition, local_coarse_dof_to_Bc... end |> tuple_of_arrays P0 = PSparseMatrix(P0_partition, B[1].index_partition, Bc_partition, true) - Bc = Array{PVector}(undef, n_B) - for (i,b) in enumerate(coarse_dof_to_Bc) - Bc[i] = PVector(b, Bc_partition) - end + Bc = [PVector(b, Bc_partition) for b in coarse_dof_to_Bc] P0, Bc end diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index c3c960a6..7f91238b 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -13,7 +13,7 @@ using Test using SparseArrays # Test strength graph computation -# First with Psparse matrix +# First with Psparse matrix and known solution ndofs = 18 nnodes = 9 nrows = 3 @@ -83,8 +83,7 @@ IJV = map(row_partition) do row_indices end I,J,V = tuple_of_arrays(IJV) -col_partition = row_partition -A = psparse(I,J,V,row_partition, col_partition) |> fetch +A = psparse(I,J,V,row_partition, row_partition) |> fetch # build solution I = [1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, @@ -104,12 +103,12 @@ solution = sparse(I, J, V, nnodes, nnodes) epsilon = 0.02 # Test with CSC sparse matrix -# Test sample matrix with block size 2 +# Test sample matrix with known solution A_seq = centralize(A) G_seq = PartitionedSolvers.strength_graph(A_seq, 2, epsilon=epsilon) @test solution ≈ G_seq -# Another test with 3 dims +# Another test with blocksize 3 M = rand([-2.0, -1, 1, 2], (3, 3)) M = sparse(M) A = blockdiag(M, M, M) @@ -122,7 +121,7 @@ G = PartitionedSolvers.strength_graph(M, 3, epsilon=epsilon) solution = sparse([1], [1], 1.0, 1, 1) @test solution ≈ G -# Create Test matrices (Linear Elasticity) +# Create Test matrix for Linear Elasticity parts_per_dir = (2,2,2) block_size = length(parts_per_dir) p = prod(parts_per_dir) @@ -138,7 +137,7 @@ G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) G_dist_centralized = centralize(G_dist) diff = G_seq - G_dist_centralized diff_nnz = nnz(diff) -println("$(round(diff_nnz/(G_seq.m^2); digits=3))% of total entries are different between sequential and parallel G.") +println("Test strength graph: \n$(round(diff_nnz/(G_seq.m^2); digits=3))% of total entries are different between sequential and parallel G.") println("G_dist has $(round(1-nnz(G_dist_centralized)/nnz(G_seq);digits=2))% fewer nnz entries than G_seq.") println("(G_dist nnz entries: $(nnz(G_dist_centralized)), G_seq nnz entries: $(nnz(G_seq)))") @test isa(G_dist, PSparseMatrix) @@ -180,18 +179,11 @@ B_dist = near_nullspace_linear_elasticity(x_dist, partition(axes(A_dist,2))) B_seq = [collect(b) for b in B_dist] # Test tentative prolongator with sequential linear elasticity matrix -G_seq = PartitionedSolvers.strength_graph(A_seq, block_size, epsilon=epsilon) -node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_seq, dense_diag(G_seq);epsilon) -aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) -Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B_seq, block_size) +Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes_seq,B_seq, block_size) @test Pc * stack(Bc) ≈ stack(B_seq) - # Test tentative prolongator with parallel matrix -G_dist = PartitionedSolvers.strength_graph(A_dist, block_size, epsilon=epsilon) -node_to_aggregate, node_aggregates = PartitionedSolvers.aggregate(G_dist,dense_diag(G_dist);epsilon) -aggregate_to_nodes = PartitionedSolvers.collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) -Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes,B_dist, block_size) +Pc, Bc = PartitionedSolvers.tentative_prolongator_with_block_size(aggregate_to_nodes_dist,B_dist, block_size) n_B = length(B_dist) for i in 1:n_B @test isa(Bc[i], PVector) @@ -216,10 +208,10 @@ exp = eigmax(Array(centralize(M))) cols = axes(M, 2) x0 = prand(partition(cols)) x0_seq = collect(x0) -l, x = PartitionedSolvers.spectral_radius(M, x0, 10) -lseq, x = PartitionedSolvers.spectral_radius(centralize(M), x0_seq, 10) -@test l ≈ lseq -@test abs((l-exp)/exp) < 2*10^-1 +l_dist, x = PartitionedSolvers.spectral_radius(M, x0, 10) +l_seq, x = PartitionedSolvers.spectral_radius(centralize(M), x0_seq, 10) +@test l_dist ≈ l_seq +@test abs((l_dist-exp)/exp) < 2*10^-1 #= # Test amg From 5fc4e956dee26394f1cad136a78307e3d98314b1 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Wed, 14 Aug 2024 15:33:47 +0200 Subject: [PATCH 13/18] Test amg for sequential linear elasticity --- PartitionedSolvers/src/PartitionedSolvers.jl | 1 + PartitionedSolvers/src/amg.jl | 10 ++-- PartitionedSolvers/test/amg_tests.jl | 62 +++++++++++++++++++- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/PartitionedSolvers/src/PartitionedSolvers.jl b/PartitionedSolvers/src/PartitionedSolvers.jl index bf08d25a..768adf7f 100644 --- a/PartitionedSolvers/src/PartitionedSolvers.jl +++ b/PartitionedSolvers/src/PartitionedSolvers.jl @@ -29,6 +29,7 @@ export smoothed_aggregation export v_cycle export w_cycle export amg_level_params +export amg_level_params_linear_elasticity export amg_fine_params export amg_coarse_params export amg_statistics diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 008443a3..84866844 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -468,7 +468,7 @@ function lambda_generic(invD,A,DinvA) # Perform a few iterations of Power method to estimate lambda # (Remark 3.5.2. in https://mediatum.ub.tum.de/download/1229321/1229321.pdf) x0 = rand(size(DinvA,2)) - ρ = spectral_radius(DinvA, x0, maxiter=20) + ρ, x = spectral_radius(DinvA, x0, 20) ω/ρ end @@ -542,11 +542,11 @@ function smoothed_aggregation_with_block_size(; ) function coarsen(A,B) # build strength graph - G = strength_graph(A, block_size=block_size, epsilon=epsilon) # TODO parallel + G = strength_graph(A, block_size, epsilon=epsilon) # TODO parallel diagG = dense_diag(G) node_to_aggregate, node_aggregates = aggregate(G,diagG;epsilon) - aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) # TODO: provide parallel version - Pc,Bc,perm = tentative_prolongator(aggregate_to_nodes,B, block_size) # TODO parallel + aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) + Pc,Bc = tentative_prolongator(aggregate_to_nodes,B, block_size) diagA = dense_diag(A) P = smoothed_prolongator(A,Pc,diagA;approximate_omega) # the given approximate omega should work in parallel R = transpose(P) @@ -715,7 +715,7 @@ end function amg_level_params_linear_elasticity(; pre_smoother = additive_schwarz(gauss_seidel(;iters=1);iters=1), - coarsening = smoothed_aggregation(approximate_omega = lambda_generic, + coarsening = smoothed_aggregation_with_block_size(approximate_omega = lambda_generic, tentative_prolongator = tentative_prolongator_with_block_size), cycle = v_cycle, pos_smoother = pre_smoother, diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 7f91238b..7efad6db 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -213,7 +213,7 @@ l_seq, x = PartitionedSolvers.spectral_radius(centralize(M), x0_seq, 10) @test l_dist ≈ l_seq @test abs((l_dist-exp)/exp) < 2*10^-1 -#= + # Test amg # first with a sequential matrix nodes_per_dir = (100,100) @@ -279,6 +279,37 @@ solve!(y,S,b) update!(S,2*A) solve!(y,S,b) +# Now for linear elasticity (sequential) + + +level_params = amg_level_params_linear_elasticity() +fine_params = amg_fine_params(;level_params) +solver = amg(;fine_params) + +parts_per_dir = (2,2,2) +block_size = length(parts_per_dir) +p = prod(parts_per_dir) +ranks = DebugArray(LinearIndices((p,))) +nodes_per_dir = map(i->block_size*i,parts_per_dir) +args = PartitionedArrays.linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) +A_dist = psparse(args...) |> fetch +A = centralize(A_dist) + +x = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir, ranks) +B_dist = near_nullspace_linear_elasticity(x, partition(axes(A_dist,2))) +B = [collect(b) for b in 1:length(B_dist)] +x_exact = rand(size(A,2)) +b = A*x_exact +y = similar(x_exact) +y .= 0 + +Pl = setup(solver,y,A,b) +cg!(y,A,b;Pl,verbose=true) +println("Linear Elasticity norm of error: $(norm(y-x_exact))") +@test y ≈ x_exact + + + # Now in parallel parts_per_dir = (2,2) @@ -338,5 +369,32 @@ x .= 0 Pl = setup(amg(),x,A,b) _, history = cg!(x,A,b;Pl,log=true) display(history) -=# + +# Now for linear elasticity + +level_params = amg_level_params_linear_elasticity() +fine_params = amg_fine_params(;level_params) +solver = amg(;fine_params) + +parts_per_dir = (2,2,2) +block_size = length(parts_per_dir) +p = prod(parts_per_dir) +ranks = DebugArray(LinearIndices((p,))) +nodes_per_dir = map(i->block_size*i,parts_per_dir) +args = PartitionedArrays.linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) +A = psparse(args...) |> fetch + +x = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir, ranks) +B = near_nullspace_linear_elasticity(x, partition(axes(A,2))) +x_exact = pones(partition(axes(A,2))) +b = A*x_exact # TODO: bounds error +x = similar(b,axes(A,2)) +x .= 0 + +Pl = setup(solver,x,A,b) +cg!(x,A,b;Pl,verbose=true) +diff = collect(x) - collect(x_exact) +display(diff) +@test x ≈ x_exact + end \ No newline at end of file From 706885dc53f9155973fbbda6608829d999c0346b Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Wed, 21 Aug 2024 09:10:50 +0200 Subject: [PATCH 14/18] Fix bug in tentative_prolongator_with_block_size --- PartitionedSolvers/src/amg.jl | 48 ++++++++++++++++++-------- PartitionedSolvers/test/amg_tests.jl | 50 ++++++++++++++-------------- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 84866844..75ad080d 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -257,6 +257,7 @@ end function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, block_size) own_values_B = map(own_values, B) n_B = length(B) + global_aggregate_to_owner = global_to_owner(aggregate_to_nodes.index_partition) P0_partition, Bc_partition, coarse_dof_to_Bc... = map(aggregate_to_nodes.index_partition, B[1].index_partition, local_values(aggregate_to_nodes), own_values_B...) do local_aggregate_local_indices, local_dof_local_indices, local_aggregate_to_local_nodes, own_dof_to_b... P0_own_own, local_coarse_dof_to_Bc = tentative_prolongator_with_block_size(local_aggregate_to_local_nodes, own_dof_to_b, block_size) # Create P0 partition @@ -285,7 +286,16 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, bl inds = (i - 1) * n_B + 1 : i * n_B perm_cols[inds] = cols end - perm_rows = PartitionedArrays.local_permutation(local_dof_local_indices) + perm_rows_with_ghost = PartitionedArrays.local_permutation(local_dof_local_indices) + # Filter only own rows + perm_rows = zeros(Int, own_length(local_dof_local_indices)) + ptr = 1 + for ind in perm_rows_with_ghost + if ind in own_to_local(local_dof_local_indices) + perm_rows[ptr] = ind + ptr += 1 + end + end P0_partition = PartitionedArrays.split_matrix(blocks, perm_rows, perm_cols) # Partition of Bc own_to_global_coarse_dofs = zeros(Int, n_own_coarse_dofs) @@ -294,9 +304,14 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::PVector,B, bl global_coarse_dofs = (agg-1) * n_B + 1 : agg * n_B own_to_global_coarse_dofs[inds] = global_coarse_dofs end - own_coarse_dofs = OwnIndices(n_global_coarse_dofs, part_id(local_dof_local_indices), own_to_global_coarse_dofs) + global_coarse_dofs_to_owner = zeros(Int, n_global_coarse_dofs) + for global_agg in 1:n_global_aggregates + inds = (global_agg-1) * n_B + 1 : global_agg * n_B + global_coarse_dofs_to_owner[inds] .= global_aggregate_to_owner[global_agg] + end + own_coarse_dofs = OwnIndices(n_global_coarse_dofs, part_id(local_aggregate_local_indices), own_to_global_coarse_dofs) #todo ghost_coarse_dofs = GhostIndices(n_global_coarse_dofs, Int[], Int32[]) - Bc_partition = OwnAndGhostIndices(own_coarse_dofs, ghost_coarse_dofs) + Bc_partition = OwnAndGhostIndices(own_coarse_dofs, ghost_coarse_dofs, global_coarse_dofs_to_owner) P0_partition, Bc_partition, local_coarse_dof_to_Bc... end |> tuple_of_arrays P0 = PSparseMatrix(P0_partition, B[1].index_partition, Bc_partition, true) @@ -306,8 +321,6 @@ end function tentative_prolongator_with_block_size(aggregate_to_nodes::JaggedArray,B, block_size) - # A draft for the scalar case is commented below - ## TODO assumes CSC # Algorithm 7 in https://mediatum.ub.tum.de/download/1229321/1229321.pdf if length(B) < 1 @@ -316,7 +329,7 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::JaggedArray,B n_aggregates = length(aggregate_to_nodes.ptrs)-1 n_B = length(B) - n_dofs = length(B[1]) + n_dofs = length(aggregate_to_nodes.data)*block_size n_dofs_c = n_aggregates * n_B Bc = [Vector{Float64}(undef,n_dofs_c) for _ in 1:n_B] @@ -364,7 +377,7 @@ function tentative_prolongator_with_block_size(aggregate_to_nodes::JaggedArray,B pini = P0.colptr[col] pend = P0.colptr[col+1]-1 rowids = P0.rowval[pini:pend] - P0.nzval[pini:pend] = B[b][rowids] + P0.nzval[pini:pend] = B[b][rowids] # fails end end @@ -452,7 +465,7 @@ function smoothed_prolongator(A,P0,diagA=dense_diag(A);approximate_omega) invDiagA = 1 ./ diagA Dinv = PartitionedArrays.sparse_diag_matrix(invDiagA,(axes(A,1),axes(A,1))) omega = approximate_omega(invDiagA,A,Dinv*A) - P = (I-omega*Dinv*A)*P0 + P = (I-omega*Dinv*A) * P0 #TODO: this gives an error P end @@ -463,7 +476,7 @@ function omega_for_1d_laplace(invD,A, DinvA) 2/3 end -function lambda_generic(invD,A,DinvA) +function lambda_generic(invD,A,DinvA::AbstractMatrix) ω = 4/3 # Perform a few iterations of Power method to estimate lambda # (Remark 3.5.2. in https://mediatum.ub.tum.de/download/1229321/1229321.pdf) @@ -472,6 +485,15 @@ function lambda_generic(invD,A,DinvA) ω/ρ end +function lambda_generic(invD,A,DinvA::PSparseMatrix) + ω = 4/3 + # Perform a few iterations of Power method to estimate lambda + # (Remark 3.5.2. in https://mediatum.ub.tum.de/download/1229321/1229321.pdf) + x0 = prand(partition(axes(DinvA,2))) + ρ, x = spectral_radius(DinvA, x0, 20) + ω/ρ +end + function spectral_radius(A, x, num_iterations:: Int) # Choose diagonal vector as initial guess y = similar(x) @@ -542,13 +564,13 @@ function smoothed_aggregation_with_block_size(; ) function coarsen(A,B) # build strength graph - G = strength_graph(A, block_size, epsilon=epsilon) # TODO parallel + G = strength_graph(A, block_size, epsilon=epsilon) diagG = dense_diag(G) node_to_aggregate, node_aggregates = aggregate(G,diagG;epsilon) aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) Pc,Bc = tentative_prolongator(aggregate_to_nodes,B, block_size) diagA = dense_diag(A) - P = smoothed_prolongator(A,Pc,diagA;approximate_omega) # the given approximate omega should work in parallel + P = smoothed_prolongator(A, Pc, diagA; approximate_omega) R = transpose(P) Ac,cache = rap(R,A,P;reuse=true) Ac,Bc,R,P,cache,repartition_threshold = enhance_coarse_partition(A,Ac,Bc,R,P,cache,repartition_threshold)#maybe changes here @@ -713,10 +735,10 @@ function dofs_to_node(dofs, block_size) div(dofs[end], block_size) end -function amg_level_params_linear_elasticity(; +function amg_level_params_linear_elasticity(block_size; pre_smoother = additive_schwarz(gauss_seidel(;iters=1);iters=1), coarsening = smoothed_aggregation_with_block_size(approximate_omega = lambda_generic, - tentative_prolongator = tentative_prolongator_with_block_size), + tentative_prolongator = tentative_prolongator_with_block_size, block_size = block_size), cycle = v_cycle, pos_smoother = pre_smoother, ) diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 7efad6db..6851548f 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -12,6 +12,7 @@ using Statistics using Test using SparseArrays + # Test strength graph computation # First with Psparse matrix and known solution ndofs = 18 @@ -175,7 +176,7 @@ end # Create nullspace vectors for tentative prolongator x_dist = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir,ranks) -B_dist = near_nullspace_linear_elasticity(x_dist, partition(axes(A_dist,2))) +B_dist = nullspace_linear_elasticity(x_dist, partition(axes(A_dist,2))) B_seq = [collect(b) for b in B_dist] # Test tentative prolongator with sequential linear elasticity matrix @@ -279,12 +280,9 @@ solve!(y,S,b) update!(S,2*A) solve!(y,S,b) -# Now for linear elasticity (sequential) -level_params = amg_level_params_linear_elasticity() -fine_params = amg_fine_params(;level_params) -solver = amg(;fine_params) +# Now for linear elasticity (sequential) parts_per_dir = (2,2,2) block_size = length(parts_per_dir) @@ -296,14 +294,18 @@ A_dist = psparse(args...) |> fetch A = centralize(A_dist) x = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir, ranks) -B_dist = near_nullspace_linear_elasticity(x, partition(axes(A_dist,2))) -B = [collect(b) for b in 1:length(B_dist)] +B_dist = nullspace_linear_elasticity(x, partition(axes(A_dist,2))) +B = [collect(b) for b in B_dist] x_exact = rand(size(A,2)) b = A*x_exact y = similar(x_exact) y .= 0 -Pl = setup(solver,y,A,b) +level_params = amg_level_params_linear_elasticity(block_size) +fine_params = amg_fine_params(;level_params) +solver = amg(;fine_params) + +Pl = setup(solver,y,A,b;nullspace=B) cg!(y,A,b;Pl,verbose=true) println("Linear Elasticity norm of error: $(norm(y-x_exact))") @test y ≈ x_exact @@ -370,31 +372,29 @@ Pl = setup(amg(),x,A,b) _, history = cg!(x,A,b;Pl,log=true) display(history) -# Now for linear elasticity -level_params = amg_level_params_linear_elasticity() -fine_params = amg_fine_params(;level_params) -solver = amg(;fine_params) +# Now for linear elasticity parts_per_dir = (2,2,2) block_size = length(parts_per_dir) p = prod(parts_per_dir) ranks = DebugArray(LinearIndices((p,))) nodes_per_dir = map(i->block_size*i,parts_per_dir) -args = PartitionedArrays.linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) -A = psparse(args...) |> fetch - -x = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir, ranks) -B = near_nullspace_linear_elasticity(x, partition(axes(A,2))) +args = linear_elasticity_fem(nodes_per_dir,parts_per_dir,ranks) +A = psparse(args...) |> fetch x_exact = pones(partition(axes(A,2))) -b = A*x_exact # TODO: bounds error -x = similar(b,axes(A,2)) -x .= 0 +x = node_coordinates_unit_cube(nodes_per_dir,parts_per_dir, ranks) +B = nullspace_linear_elasticity(x, partition(axes(A,2))) +b = A*x_exact +y = similar(b,axes(A,2)) +y .= 0 + +level_params = amg_level_params_linear_elasticity(block_size) +fine_params = amg_fine_params(;level_params) +solver = amg(;fine_params) -Pl = setup(solver,x,A,b) -cg!(x,A,b;Pl,verbose=true) -diff = collect(x) - collect(x_exact) -display(diff) -@test x ≈ x_exact +Pl = setup(solver,y,A,b;nullspace=B) +cg!(y,A,b;Pl,verbose=true) +@test y ≈ x_exact end \ No newline at end of file From 2946d79bd5dadaf35ddef0159646b2abbf0290e7 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Wed, 21 Aug 2024 09:40:54 +0200 Subject: [PATCH 15/18] Remove Pkg dependency --- PartitionedSolvers/test/amg_tests.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 6851548f..0d3c4b0e 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -1,8 +1,5 @@ module AMGTests -# just for debugging -using Pkg -Pkg.activate("PartitionedSolvers") using PartitionedArrays using PartitionedSolvers using LinearAlgebra From a0d569627135498ce4703e8c2f082212a9693057 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Wed, 21 Aug 2024 09:45:58 +0200 Subject: [PATCH 16/18] Remove Statistics dependency --- PartitionedSolvers/test/amg_tests.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/PartitionedSolvers/test/amg_tests.jl b/PartitionedSolvers/test/amg_tests.jl index 0d3c4b0e..a7328c8c 100644 --- a/PartitionedSolvers/test/amg_tests.jl +++ b/PartitionedSolvers/test/amg_tests.jl @@ -5,7 +5,6 @@ using PartitionedSolvers using LinearAlgebra using IterativeSolvers using IterativeSolvers: cg! -using Statistics using Test using SparseArrays From f2588744343867437753bf9f627b68a410acdbec Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Thu, 22 Aug 2024 17:09:43 +0200 Subject: [PATCH 17/18] Set default epsilon to 0.01 --- PartitionedSolvers/src/amg.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 75ad080d..37c40682 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -465,7 +465,7 @@ function smoothed_prolongator(A,P0,diagA=dense_diag(A);approximate_omega) invDiagA = 1 ./ diagA Dinv = PartitionedArrays.sparse_diag_matrix(invDiagA,(axes(A,1),axes(A,1))) omega = approximate_omega(invDiagA,A,Dinv*A) - P = (I-omega*Dinv*A) * P0 #TODO: this gives an error + P = (I-omega*Dinv*A) * P0 P end @@ -556,7 +556,7 @@ function smoothed_aggregation(; end function smoothed_aggregation_with_block_size(; - epsilon = 0, + epsilon = 0.01, approximate_omega = lambda_generic, tentative_prolongator = tentative_prolongator_with_block_size, repartition_threshold = 2000, @@ -564,7 +564,7 @@ function smoothed_aggregation_with_block_size(; ) function coarsen(A,B) # build strength graph - G = strength_graph(A, block_size, epsilon=epsilon) + G = strength_graph(A, block_size; epsilon) diagG = dense_diag(G) node_to_aggregate, node_aggregates = aggregate(G,diagG;epsilon) aggregate_to_nodes = collect_nodes_in_aggregate(node_to_aggregate, node_aggregates) @@ -591,7 +591,7 @@ function getblock!(B,A,ids_i,ids_j) end end -function strength_graph(A::PSparseMatrix, block_size::Integer; epsilon = 0) +function strength_graph(A::PSparseMatrix, block_size::Integer; epsilon) dof_partition = partition(axes(A,1)) node_partition = map(dof_partition) do my_dofs n_local_dofs = length(my_dofs) @@ -633,7 +633,7 @@ function strength_graph(A::PSparseMatrix, block_size::Integer; epsilon = 0) G end -function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon = 0) +function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon) if block_size < 1 error("Block size must be equal to or larger than 1.") @@ -651,8 +651,8 @@ function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon = return A end - if epsilon < 0 - error("Expected a positive epsilon.") + if epsilon <= 0 + error("Expected epsilon > 0.") end n_dofs = A.m @@ -737,7 +737,7 @@ end function amg_level_params_linear_elasticity(block_size; pre_smoother = additive_schwarz(gauss_seidel(;iters=1);iters=1), - coarsening = smoothed_aggregation_with_block_size(approximate_omega = lambda_generic, + coarsening = smoothed_aggregation_with_block_size(;approximate_omega = lambda_generic, tentative_prolongator = tentative_prolongator_with_block_size, block_size = block_size), cycle = v_cycle, pos_smoother = pre_smoother, From 6d9cad869dd407fdbea8ea55a20fffac03201679 Mon Sep 17 00:00:00 2001 From: Gelieza K Date: Fri, 23 Aug 2024 09:01:16 +0200 Subject: [PATCH 18/18] Fix bug in strength_graph --- PartitionedSolvers/src/amg.jl | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/PartitionedSolvers/src/amg.jl b/PartitionedSolvers/src/amg.jl index 37c40682..83a43f36 100644 --- a/PartitionedSolvers/src/amg.jl +++ b/PartitionedSolvers/src/amg.jl @@ -556,7 +556,7 @@ function smoothed_aggregation(; end function smoothed_aggregation_with_block_size(; - epsilon = 0.01, + epsilon = 0, approximate_omega = lambda_generic, tentative_prolongator = tentative_prolongator_with_block_size, repartition_threshold = 2000, @@ -651,8 +651,8 @@ function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon) return A end - if epsilon <= 0 - error("Expected epsilon > 0.") + if epsilon < 0 + error("Expected epsilon >= 0.") end n_dofs = A.m @@ -681,8 +681,12 @@ function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon) i_dofs = node_to_dofs(i_node, block_size) j_dofs = node_to_dofs(j_node, block_size) getblock!(B,A,i_dofs,j_dofs) + norm_B = norm(B) + if norm_B == 0 + continue + end # Calculate strength according to https://github.com/pyamg/pyamg/blob/e1fe54c93be1029c02ddcf84c2338a607b088703/pyamg/strength.py#L275 - if norm(B) >= epsilon * sqrt(diag_norms[i_node] * diag_norms[j_node]) + if norm_B >= epsilon * sqrt(diag_norms[i_node] * diag_norms[j_node]) nnz += 1 end end @@ -710,8 +714,12 @@ function strength_graph(A::AbstractSparseMatrix, block_size::Integer; epsilon) i_dofs = node_to_dofs(i_node, block_size) j_dofs = node_to_dofs(j_node, block_size) getblock!(B,A,i_dofs,j_dofs) + norm_B = norm(B) + if norm_B == 0 + continue + end # Calculate strength according to https://github.com/pyamg/pyamg/blob/e1fe54c93be1029c02ddcf84c2338a607b088703/pyamg/strength.py#L275 - if norm(B) >= epsilon * sqrt(diag_norms[i_node] * diag_norms[j_node]) + if norm_B >= epsilon * sqrt(diag_norms[i_node] * diag_norms[j_node]) I[i_nz] = i_node J[i_nz] = j_node V[i_nz] = 1.0