From 046fdcbeddf3d9baa29a7329e81fdd931d6d8c76 Mon Sep 17 00:00:00 2001 From: Jeremiah Corrado Date: Mon, 30 Sep 2024 16:49:43 -0500 Subject: [PATCH 01/10] move makeSparseArray to SymArrayDmap module. Use Arkouda's global controls for whether domains are distributed. Use same makeSparseArray routine in SparseSymArray and randSparseMatrix helper proc Signed-off-by: Jeremiah Corrado --- src/MultiTypeSymEntry.chpl | 7 ------ src/SparseMatrix.chpl | 51 ++++++-------------------------------- src/SparseMatrixMsg.chpl | 6 ++--- src/SymArrayDmap.chpl | 32 ++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 54 deletions(-) diff --git a/src/MultiTypeSymEntry.chpl b/src/MultiTypeSymEntry.chpl index 4718a15743..c75c6340f2 100644 --- a/src/MultiTypeSymEntry.chpl +++ b/src/MultiTypeSymEntry.chpl @@ -727,13 +727,6 @@ module MultiTypeSymEntry return new shared SparseSymEntry(a, size, matLayout, eltType); } - proc makeSparseArray(size, type eltType, param matLayout) { - const dom = {1..size, 1..size}; - var spsDom: sparse subdomain(dom) dmapped new dmap(new CS(compressRows=(matLayout==layout.CSR))); - var A: [spsDom] eltType; - return A; - } - class GeneratorSymEntry:AbstractSymEntry { type etype; diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 0a308d4788..54313adad2 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -455,14 +455,16 @@ module SparseMatrix { return C; } - proc randSparseMatrix(size, density, param layout, param distributed=false, type eltType) { - const Dom = {1..size, 1..size}; + proc randSparseMatrix(size, density, param layout, type eltType) { + import SymArrayDmap.makeSparseDomain; + var (SD, dense) = makeSparseDomain(size, layout); - // compute some random sparse index patterns for the matrices - // - const AD = randSparseDomain(Dom, density, layout, distributed); + // randomly include index pairs based on provided density + for (i,j) in dense do + if rands.next() <= density then + SD += (i,j); - var A: [AD] eltType; + var A: [SD] eltType; return A; } @@ -500,43 +502,6 @@ module SparseMatrix { } - // create a local random sparse matrix within the space of 'Dom' of - // the given density and layout. If distributed is true, this will - // be a block-distributed sparse matrix, otherwise it'll be local. - // - proc randSparseDomain(parentDom, density, param matLayout, param distributed) - where distributed == false { - - var SD: sparse subdomain(parentDom) dmapped new dmap(new CS(compressRows=(matLayout==layout.CSR))); - - for (i,j) in parentDom do - if rands.next() <= density then - SD += (i,j); - - return SD; - } - - proc randSparseDomain(parentDom, density, param matLayout, param distributed) - where distributed == true { - const locsPerDim = sqrt(numLocales:real): int, - grid = {0.. Date: Tue, 1 Oct 2024 13:11:31 -0500 Subject: [PATCH 02/10] fill sparse matrices in global row-major or column-major order, instead of 1 locale at a time Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 45 ++++++++++++++++++++++++++++++++-------- src/SparseMatrixMsg.chpl | 4 ++-- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 54313adad2..764c7bdec3 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -6,7 +6,7 @@ module SparseMatrix { use CommAggregation; // Quick and dirty, not permanent - proc fillSparseMatrix(ref spsMat, const A: [?D] ?eltType) throws { + proc fillSparseMatrix(ref spsMat, const A: [?D] ?eltType, param l: layout) throws { if A.rank != 1 then throw getErrorWithContext( msg="fill vals requires a 1D array; got a %iD array".format(A.rank), @@ -31,8 +31,35 @@ module SparseMatrix { moduleName=getModuleName(), errorClass="IllegalArgumentError" ); - for((i,j), idx) in zip(spsMat.domain,A.domain) { - spsMat[i,j] = A[idx]; + + // Note: this simplified loop cannot be used because iteration over spsMat.domain + // occures one locale at a time (i.e., the first spsMat.domain.parDom.localSubdomain(Locales[0]).size + // values from 'A' are deposited on locale 0, and so on), rather than depositing + // them row-major or column-major globally + // for ((i,j), idx) in zip(spsMat.domain,A.domain) { + // spsMat[i,j] = A[idx]; + // } + + if l == layout.CSR { + var idx = 0; + for i in spsMat.domain.parentDom.dim(0) { + for j in spsMat.domain.parentDom.dim(1) { + if spsMat.domain.contains((i, j)) { + spsMat[i,j] = A[idx]; + idx += 1; + } + } + } + } else { + var idx = 0; + for j in spsMat.domain.parentDom.dim(1) { + for i in spsMat.domain.parentDom.dim(0) { + if spsMat.domain.contains((i, j)) { + spsMat[i,j] = A[idx]; + idx += 1; + } + } + } } } @@ -368,14 +395,14 @@ module SparseMatrix { // sparse, outer, matrix-matrix multiplication algorithm; A is assumed // CSC and B CSR - // proc sparseMatMatMult(A, B) { - // var spsData: sparseMatDat; + proc sparseMatMatMult(A, B) { + var spsData: sparseMatDat; - // sparseMatMatMult(A, B, spsData); + sparseMatMatMult(A, B, spsData); - // var C = makeSparseMat(A.domain.parentDom, spsData); - // return C; - // } + var C = makeSparseMat(A.domain.parentDom, spsData); + return C; + } // This version forms the guts of the above and permits a running set // of nonzeroes to be passed in and updated rather than assuming that diff --git a/src/SparseMatrixMsg.chpl b/src/SparseMatrixMsg.chpl index dc83eadffb..7d6fac6a92 100644 --- a/src/SparseMatrixMsg.chpl +++ b/src/SparseMatrixMsg.chpl @@ -125,11 +125,11 @@ module SparseMatrixMsg { if gEnt.layoutStr=="CSC" { // Hardcode for int right now var sparrayEntry = gEnt.toSparseSymEntry(int, dimensions=2, layout.CSC); - fillSparseMatrix(sparrayEntry.a, vals.a); + fillSparseMatrix(sparrayEntry.a, vals.a, layout.CSC); } else if gEnt.layoutStr=="CSR" { // Hardcode for int right now var sparrayEntry = gEnt.toSparseSymEntry(int, dimensions=2, layout.CSR); - fillSparseMatrix(sparrayEntry.a, vals.a); + fillSparseMatrix(sparrayEntry.a, vals.a, layout.CSR); } else { throw getErrorWithContext( msg="unsupported layout for sparse matrix: %s".format(gEnt.layoutStr), From 7c4d9eb4b26023d7428991bfa561d8e32efd01f7 Mon Sep 17 00:00:00 2001 From: Jeremiah Corrado Date: Tue, 1 Oct 2024 13:12:55 -0500 Subject: [PATCH 03/10] debugging sparseMatToPdarray Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 116 ++++++++++++++++++++++++++++-------------- tests/sparse_test.py | 14 +++-- 2 files changed, 89 insertions(+), 41 deletions(-) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 764c7bdec3..a3194c0c60 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -70,41 +70,64 @@ module SparseMatrix { proc sparseMatToPdarrayCSR(const ref spsMat, ref rows, ref cols, ref vals) { // // serial algorithm (for reference): // for((i,j), idx) in zip(spsMat.domain,0..) { - // rows[idx] = i; - // cols[idx] = j; - // vals[idx] = spsMat[i, j]; + // rows[idx] = i; + // cols[idx] = j; + // vals[idx] = spsMat[i, j]; // } + for i in spsMat.domain.parentDom.dim(0) { + for j in spsMat.domain.parentDom.dim(1) { + if spsMat.domain.contains((i, j)) { + writeln("(",i,",",j,")\t", spsMat[i,j]); + } + } + } + // matrix shape const m = spsMat.shape[0], n = spsMat.shape[1]; + writeln("(", m, ", ", n, ")"); + // info about matrix block distribution across a 2D grid of locales const grid = spsMat.domain.targetLocales(), - nRowBlocks = grid.domain.dim(0).size, - nColBlocks = grid.domain.dim(1).size; + nRowBlocks = grid.domain.dim(0).size, // 2 + nColBlocks = grid.domain.dim(1).size; // 2 + + writeln("grid: ", grid, " \t ", grid.domain); // number of non-zeros in each row, for each column-block of the matrix // TODO: use zero-based indexing for SparseSymEntry const nnzDom = blockDist.createDomain({1..m, 0.. ", iStart, "..", iEnd, "\t", jStart, "..", jEnd, "\t", spsMat.domain.parentDom.localSubdomain()); + // TODO: there is probably a much smarter way to compute this information using the // underlying CSR data structures for i in iStart..iEnd { @@ -112,9 +135,9 @@ module SparseMatrix { if spsMat.domain.contains((i,j)) { // TODO: this localAccess assumes that the `rg*nRowsPerGroup` math lines up perfectly // with the matrix's block distribution; this is (probably) not guaranteed - // - should use the actual dense block distribution to compute the local indices - // - will need to store that as a field in SparseSymEntry - nnzPerColBlock.localAccess[i,colBlockIdx] += 1; + // - should use the parentDom to compute actual local indices + // nnzPerColBlock.localAccess[i,colBlockIdx] += 1; + nnzPerColBlock[i, colBlockIdx] += 1; } } } @@ -122,8 +145,12 @@ module SparseMatrix { } } + writeln("nnzPerColBlock: ", nnzPerColBlock); + // scan operation to find the starting index (in the 1D output arrays) for each column-block of each row - const colBlockStartOffsets = flattenedExScanCSR(nnzPerColBlock, nRowGroups, nTasksPerRowBlock, nRowsPerGroup); + const colBlockStartOffsets = flattenedExScanCSR(nnzPerColBlock, nRowGroups, nRowGroupsPerBlock, nRowsPerGroup); + + writeln("colBlockStartOffsets: ", colBlockStartOffsets); // store the non-zero indices and values in the output arrays coforall colBlockIdx in 0.. ", iStart, "..", iEnd, "\t", jStart, "..", jEnd); + for i in iStart..iEnd { var idx = colBlockStartOffsets[i, colBlockIdx]; + writeln("\t\t(", colBlockIdx, ", ", rg, " : ", rowBlockIdx, ") -> row: ", i, "\t ", idx); for j in jStart..jEnd { if spsMat.domain.contains((i,j)) { - // rows[idx] = i; - // cols[idx] = j; - // vals[idx] = spsMat[i, j]; - idxAgg.copy(rows[idx], i); - idxAgg.copy(cols[idx], j); - valAgg.copy(vals[idx], spsMat[i, j]); // TODO: (see above note about localAccess) + rows[idx] = i; + cols[idx] = j; + vals[idx] = spsMat[i, j]; + // idxAgg.copy(cols[idx], j); + // idxAgg.copy(rows[idx], i); + // valAgg.copy(vals[idx], spsMat[i, j]); // TODO: (see above note about localAccess) idx += 1; } } @@ -157,6 +187,10 @@ module SparseMatrix { } } } + + writeln("rows: ", rows); + writeln("cols: ", cols); + writeln("vals: ", vals); } /* @@ -187,7 +221,7 @@ module SparseMatrix { // details for splitting columns into groups for task-level parallelism const nTasksPerColBlock = here.maxTaskPar, - nColGroups = nColBlocks * nTasksPerColBlock, + nColGroups = min(nColBlocks * nTasksPerColBlock, n), nColsPerGroup = n / nColGroups, nRowsPerBlock = m / nRowBlocks; @@ -209,9 +243,9 @@ module SparseMatrix { if spsMat.domain.contains((i,j)) { // TODO: this localAccess assumes that the `cg*nColsPerGroup` math lines up perfectly // with the matrix's block distribution; this is (probably) not guaranteed - // - should use the actual dense block distribution to compute the local indices - // - will need to store that as a field in SparseSymEntry - nnzPerRowBlock.localAccess[rowBlockIdx,j] += 1; + // - should use the parentDom to compute actual local indices + // nnzPerRowBlock.localAccess[rowBlockIdx,j] += 1; + nnzPerRowBlock[rowBlockIdx,j] += 1; } } } @@ -241,12 +275,12 @@ module SparseMatrix { var idx = rowBlockStartOffsets[rowBlockIdx, j]; for i in iStart..iEnd { if spsMat.domain.contains((i,j)) { - // rows[idx] = i; - // cols[idx] = j; - // vals[idx] = spsMat[i, j]; - idxAgg.copy(rows[idx], i); - idxAgg.copy(cols[idx], j); - valAgg.copy(vals[idx], spsMat[i, j]); // TODO: (see above note about localAccess) + rows[idx] = i; + cols[idx] = j; + vals[idx] = spsMat[i, j]; + // idxAgg.copy(rows[idx], i); + // idxAgg.copy(cols[idx], j); + // valAgg.copy(vals[idx], spsMat[i, j]); // TODO: (see above note about localAccess) idx += 1; } } @@ -258,7 +292,7 @@ module SparseMatrix { // helper function for sparseMatToPdarrayCSR // computes a row-major flattened scan of a distributed 2D array in parallel - proc flattenedExScanCSR(in nnzPerColBlock: [?d] int, nRowGroups: int, nTasksPerRowBlock: int, nRowsPerGroup: int) { + proc flattenedExScanCSR(in nnzPerColBlock: [?d] int, nRowGroups: int, nRowGroupsPerBlock: int, nRowsPerGroup: int) { const nColBlocks = d.dim(1).size, m = d.dim(0).size, grid = d.targetLocales(), @@ -283,9 +317,11 @@ module SparseMatrix { ); var intermediate: [interDom] int; + writeln("interDom: ", interDom); + // compute an exclusive scan within each row group coforall rg in 0.. Date: Wed, 2 Oct 2024 11:51:02 -0500 Subject: [PATCH 04/10] fix ordering bug in flattened-scan helper procs Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 56 +++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index a3194c0c60..2e62846262 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -78,7 +78,7 @@ module SparseMatrix { for i in spsMat.domain.parentDom.dim(0) { for j in spsMat.domain.parentDom.dim(1) { if spsMat.domain.contains((i, j)) { - writeln("(",i,",",j,")\t", spsMat[i,j]); + writeln("(",i,",",j,")\t", spsMat[i,j], "\t", spsMat[i,j].locale); } } } @@ -116,22 +116,32 @@ module SparseMatrix { writeln("nRowGroupsPerBlock", nRowGroupsPerBlock); writeln("nColsPerBlock: ", nColsPerBlock); + writeln("--------------------"); + writeln(spsMat.domain.parentDom); + coforall loc in grid do on loc { + writeln(loc, "\t", spsMat.domain.parentDom.localSubdomain()); + } + writeln("--------------------"); + // compute the number of non-zeros in each column-section of each row coforall colBlockIdx in 0.. ", iStart, "..", iEnd, "\t", jStart, "..", jEnd, "\t", spsMat.domain.parentDom.localSubdomain()); + writeln("\t(", colBlockIdx, ", ", rg, " : ", rowBlockIdx, ") -> ", iStart, "..", iEnd, "\t", jRange, "\t", spsMat.domain.parentDom.localSubdomain()); // TODO: there is probably a much smarter way to compute this information using the // underlying CSR data structures for i in iStart..iEnd { - for j in jStart..jEnd { + for j in jRange { if spsMat.domain.contains((i,j)) { // TODO: this localAccess assumes that the `rg*nRowsPerGroup` math lines up perfectly // with the matrix's block distribution; this is (probably) not guaranteed @@ -167,11 +177,9 @@ module SparseMatrix { var idxAgg = newSrcAggregator(int), valAgg = newSrcAggregator(spsMat.eltType); - writeln("\t(", colBlockIdx, ", ", rg, " : ", rowBlockIdx, ") -> ", iStart, "..", iEnd, "\t", jStart, "..", jEnd); - for i in iStart..iEnd { var idx = colBlockStartOffsets[i, colBlockIdx]; - writeln("\t\t(", colBlockIdx, ", ", rg, " : ", rowBlockIdx, ") -> row: ", i, "\t ", idx); + writeln("\t\t(", colBlockIdx, ", ", rg, " : ", rowBlockIdx, ") -> row: ", i, "\t", jStart, "..", jEnd, "\t", idx); for j in jStart..jEnd { if spsMat.domain.contains((i,j)) { rows[idx] = i; @@ -300,14 +308,16 @@ module SparseMatrix { var colBlockStartOffsets: [d] int; - // // serial algorithm (for reference): - // var sum = 0; - // for i in 1..m { - // for colBlockIdx in 0.. Date: Wed, 2 Oct 2024 15:12:42 -0500 Subject: [PATCH 05/10] sparseMatToPdarrayCSR behaving correctly in distributed setting Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 214 +++++++++++++++--------------------------- 1 file changed, 76 insertions(+), 138 deletions(-) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 2e62846262..03adf60ba5 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -75,79 +75,36 @@ module SparseMatrix { // vals[idx] = spsMat[i, j]; // } - for i in spsMat.domain.parentDom.dim(0) { - for j in spsMat.domain.parentDom.dim(1) { - if spsMat.domain.contains((i, j)) { - writeln("(",i,",",j,")\t", spsMat[i,j], "\t", spsMat[i,j].locale); - } - } - } - - // matrix shape - const m = spsMat.shape[0], - n = spsMat.shape[1]; - - writeln("(", m, ", ", n, ")"); - // info about matrix block distribution across a 2D grid of locales const grid = spsMat.domain.targetLocales(), nRowBlocks = grid.domain.dim(0).size, // 2 nColBlocks = grid.domain.dim(1).size; // 2 - writeln("grid: ", grid, " \t ", grid.domain); - // number of non-zeros in each row, for each column-block of the matrix - // TODO: use zero-based indexing for SparseSymEntry - const nnzDom = blockDist.createDomain({1..m, 0.. ", iStart, "..", iEnd, "\t", jRange, "\t", spsMat.domain.parentDom.localSubdomain()); + coforall rt in 0.. row: ", i, "\t", jStart, "..", jEnd, "\t", idx); - for j in jStart..jEnd { - if spsMat.domain.contains((i,j)) { - rows[idx] = i; - cols[idx] = j; - vals[idx] = spsMat[i, j]; - // idxAgg.copy(cols[idx], j); - // idxAgg.copy(rows[idx], i); - // valAgg.copy(vals[idx], spsMat[i, j]); // TODO: (see above note about localAccess) - idx += 1; + const nRowTasks = min(here.maxTaskPar, iRange.size), + nRowsPerTask = iRange.size / nRowTasks; + + coforall rt in 0.. Date: Wed, 2 Oct 2024 15:26:29 -0600 Subject: [PATCH 06/10] fix non-dist compiler errors and re-implement sparseMatToPdarrayCSC Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 244 ++++++++++++++++++++++-------------------- tests/sparse_test.py | 14 +-- 2 files changed, 128 insertions(+), 130 deletions(-) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 03adf60ba5..13d9d8f1b7 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -63,6 +63,15 @@ module SparseMatrix { } } + proc getGridInfo(const ref spsMat) where spsMat.chpl_isNonDistributedArray() { + return (reshape([here,], {0..<1, 0..<1}), 1, 1); + } + + proc getGridInfo(const ref spsMat) where !spsMat.chpl_isNonDistributedArray() { + const grid = spsMat.domain.targetLocales(); + return (grid, grid.dim(0).size, grid.dim(1).size); + } + /* Fill the rows, cols, and vals arrays with the non-zero indices and values from the sparse matrix in row-major order. @@ -76,9 +85,7 @@ module SparseMatrix { // } // info about matrix block distribution across a 2D grid of locales - const grid = spsMat.domain.targetLocales(), - nRowBlocks = grid.domain.dim(0).size, // 2 - nColBlocks = grid.domain.dim(1).size; // 2 + const (grid, nRowBlocks, nColBlocks) = getGridInfo(spsMat); // number of non-zeros in each row, for each column-block of the matrix // TODO: make this a sparse array ('spsMat.shape[0]' could be very large) @@ -93,11 +100,11 @@ module SparseMatrix { iRange = lsd.dim(0), jRange = lsd.dim(1); - // TODO: do all this with a single task if iRange.size < here.maxTaskPar (instead of spawning one task per row) + // TODO: use a single task if iRange.size < here.maxTaskPar (instead of spawning one task per row) const nRowTasks = min(here.maxTaskPar, iRange.size), nRowsPerTask = iRange.size / nRowTasks; - coforall rt in 0.. Date: Wed, 2 Oct 2024 16:58:37 -0500 Subject: [PATCH 07/10] fix indexing bug in colMajorExScan Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 13d9d8f1b7..996148adf3 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -175,7 +175,7 @@ module SparseMatrix { const (grid, nRowBlocks, nColBlocks) = getGridInfo(spsMat); // number of non-zeros in each column, for each row-block of the matrix - // TODO: make this a sparse array ('spsMat.shape[0]' could be very large) + // TODO: make this a sparse array ('spsMat.shape[1]' could be very large) const nnzDom = blockDist.createDomain({0.. Date: Mon, 7 Oct 2024 12:05:36 -0600 Subject: [PATCH 08/10] re-add a few TODO's about improving performance using lower-level accesses to CSR/CSC data structures Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 996148adf3..a925c7f685 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -108,6 +108,8 @@ module SparseMatrix { const iStart = rt * nRowsPerTask + iRange.low, iStop = if rt == nRowTasks-1 then iRange.last else (rt+1) * nRowsPerTask + iRange.low - 1; + // TODO: try to avoid repeated 'contains' checks here (e.g, by directly querying the + // local subdomain's array of non-zero indices) for i in iStart..iStop { for j in jRange { if spsMat.domain.contains((i,j)) @@ -142,6 +144,8 @@ module SparseMatrix { var idxAgg = newDstAggregator(int), valAgg = newDstAggregator(spsMat.eltType); + // TODO: try to avoid repeated 'contains' checks here (e.g, by directly iterating + // over the local subdomain's array of non-zero indices) for i in iStart..iStop { var idx = colBlockStartOffsets.localAccess[i, colBlockIdx]; for j in jRange { @@ -195,6 +199,8 @@ module SparseMatrix { const jStart = ct * nColsPerTask + jRange.low, jStop = if ct == nColTasks-1 then jRange.last else (ct+1) * nColsPerTask + jRange.low - 1; + // TODO: try to avoid repeated 'contains' checks here (e.g, by directly querying the + // local subdomain's array of non-zero indices) for i in iRange { for j in jStart..jStop { if spsMat.domain.contains((i,j)) @@ -229,6 +235,8 @@ module SparseMatrix { var idxAgg = newDstAggregator(int), valAgg = newDstAggregator(spsMat.eltType); + // TODO: try to avoid repeated 'contains' checks here (e.g, by directly iterating + // over the local subdomain's array of non-zero indices) for j in jStart..jStop { var idx = rowBlockStartOffsets.localAccess[rowBlockIdx, j]; for i in iRange { From 117d3e9fdacd00ce601b6445164ed23358035ab7 Mon Sep 17 00:00:00 2001 From: Jeremiah Corrado Date: Tue, 8 Oct 2024 10:39:24 -0600 Subject: [PATCH 09/10] use CSR/CSC 'startIdx' array to compute number of non-zeros in each row/column. Use 'rows/colsAndVals' iterators to avoid use of 'contains' query when populating 1D arrays Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 246 ++++++++++++++++-------------------------- 1 file changed, 95 insertions(+), 151 deletions(-) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index a925c7f685..9592c6141f 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -63,13 +63,20 @@ module SparseMatrix { } } - proc getGridInfo(const ref spsMat) where spsMat.chpl_isNonDistributedArray() { - return (reshape([here,], {0..<1, 0..<1}), 1, 1); + proc getGrid(const ref spsMat) where spsMat.chpl_isNonDistributedArray() { + return reshape([here,], {0..<1, 0..<1}); } - proc getGridInfo(const ref spsMat) where !spsMat.chpl_isNonDistributedArray() { - const grid = spsMat.domain.targetLocales(); - return (grid, grid.dim(0).size, grid.dim(1).size); + proc getGrid(const ref spsMat) where !spsMat.chpl_isNonDistributedArray() { + return spsMat.domain.targetLocales(); + } + + proc getLSD(const ref spsMat) where spsMat.chpl_isNonDistributedArray() { + return spsMat.domain; + } + + proc getLSD(const ref spsMat) where !spsMat.chpl_isNonDistributedArray() { + return spsMat.domain.localSubdomain(); } /* @@ -78,85 +85,53 @@ module SparseMatrix { */ proc sparseMatToPdarrayCSR(const ref spsMat, ref rows, ref cols, ref vals) { // // serial algorithm (for reference): - // for((i,j), idx) in zip(spsMat.domain,0..) { - // rows[idx] = i; - // cols[idx] = j; - // vals[idx] = spsMat[i, j]; + // var idx = 0; + // for i in spsMat.domain.parentDom.dim(0) { + // for j in spsMat.domain.parentDom.dim(1) { + // if spsMat.domain.contains((i, j)) { + // rows[idx] = i; + // cols[idx] = j; + // vals[idx] = spsMat[i, j]; + // idx += 1; + // } + // } // } - // info about matrix block distribution across a 2D grid of locales - const (grid, nRowBlocks, nColBlocks) = getGridInfo(spsMat); + const grid = getGrid(spsMat); // number of non-zeros in each row, for each column-block of the matrix // TODO: make this a sparse array ('spsMat.shape[0]' could be very large) - const nnzDom = blockDist.createDomain({1..spsMat.shape[0], 0.. Date: Tue, 8 Oct 2024 12:12:18 -0500 Subject: [PATCH 10/10] add 'getLocalSubarray' queries to fix multi-locale build errors Signed-off-by: Jeremiah Corrado --- src/SparseMatrix.chpl | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 9592c6141f..e42cbe810b 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -79,6 +79,18 @@ module SparseMatrix { return spsMat.domain.localSubdomain(); } + proc getLSA(const ref spsMat, rowBlockIdx: int, colBlockIdx: int) const ref + where spsMat.chpl_isNonDistributedArray() + { + return spsMat; + } + + proc getLSA(const ref spsMat, rowBlockIdx: int, colBlockIdx: int) const ref + where !spsMat.chpl_isNonDistributedArray() + { + return spsMat.getLocalSubarray(rowBlockIdx, colBlockIdx); + } + /* Fill the rows, cols, and vals arrays with the non-zero indices and values from the sparse matrix in row-major order. @@ -123,11 +135,13 @@ module SparseMatrix { coforall (rowBlockIdx, colBlockIdx) in grid.domain with (ref rows, ref cols, ref vals) { on grid[rowBlockIdx, colBlockIdx] { const lsd = getLSD(spsMat); + const ref lsa = getLSA(spsMat, rowBlockIdx, colBlockIdx); forall i in lsd.rows() with (var idxAgg = newDstAggregator(int), - var valAgg = newDstAggregator(spsMat.eltType)) { + var valAgg = newDstAggregator(spsMat.eltType), + const ref lsa) { var idx = colBlockStartOffsets.localAccess[i, colBlockIdx]; - for (j, v) in spsMat.colsAndVals(i) { + for (j, v) in lsa.colsAndVals(i) { idxAgg.copy(rows[idx], i); idxAgg.copy(cols[idx], j); valAgg.copy(vals[idx], v); @@ -182,12 +196,14 @@ module SparseMatrix { coforall (rowBlockIdx, colBlockIdx) in grid.domain with (ref rows, ref cols, ref vals) { on grid[rowBlockIdx, colBlockIdx] { const lsd = getLSD(spsMat); + const ref lsa = getLSA(spsMat, rowBlockIdx, colBlockIdx); forall j in lsd.cols() with (var idxAgg = newDstAggregator(int), - var valAgg = newDstAggregator(spsMat.eltType)) { + var valAgg = newDstAggregator(spsMat.eltType), + const ref lsa) { var idx = rowBlockStartOffsets.localAccess[rowBlockIdx, j]; - for (i, v) in spsMat.rowsAndVals(j) { + for (i, v) in lsa.rowsAndVals(j) { idxAgg.copy(rows[idx], i); idxAgg.copy(cols[idx], j); valAgg.copy(vals[idx], v);