Skip to content

Commit

Permalink
Partitioning compiles, tested, produces valid reordering
Browse files Browse the repository at this point in the history
  • Loading branch information
brian-kelley committed Sep 30, 2019
1 parent 164c874 commit 742aa7c
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 36 deletions.
2 changes: 1 addition & 1 deletion perf_test/sparse/KokkosSparse_gs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ void runGS(string matrixPath, string devName, bool symmetric)
else
{
//this constructor is for cluster (block) coloring
kh.create_gs_handle(clusterSize);
kh.create_gs_handle(KokkosSparse::CLUSTER_SHUFFLE, clusterSize);
}
//zero out LHS initially
KokkosBlas::fill(x, 0);
Expand Down
2 changes: 1 addition & 1 deletion src/common/KokkosKernels_Handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ class KokkosKernelsHandle
this->is_owner_of_the_gs_handle = true;
this->gsHandle = new GaussSeidelHandleType(gs_algorithm);
}
void create_gs_handle(ClusteringAlgorithm clusterAlgo, nnz_lno_t verts_per_cluster) {
void create_gs_handle(KokkosSparse::ClusteringAlgorithm clusterAlgo, nnz_lno_t verts_per_cluster) {
this->destroy_gs_handle();
this->is_owner_of_the_gs_handle = true;
this->gsHandle = new GaussSeidelHandleType(clusterAlgo, verts_per_cluster);
Expand Down
3 changes: 2 additions & 1 deletion src/sparse/KokkosSparse_gauss_seidel_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace KokkosSparse{

enum GSAlgorithm{GS_DEFAULT, GS_PERMUTED, GS_TEAM, GS_CLUSTER};

enum ClusteringAlgorithm{CLUSTER_DEFAULT, CLUSTER_RCM, CLUSTER_SHUFFLE, CLUSTER_GRADIENT};
enum ClusteringAlgorithm{CLUSTER_DEFAULT, CLUSTER_RCM, CLUSTER_SHUFFLE};

template <class size_type_, class lno_t_, class scalar_t_,
class ExecutionSpace,
Expand Down Expand Up @@ -209,6 +209,7 @@ namespace KokkosSparse{

//getters
GSAlgorithm get_algorithm_type() const {return this->algorithm_type;}
ClusteringAlgorithm get_clustering_algo() const {return this->cluster_algo;}
bool is_owner_of_coloring() const {return this->owner_of_coloring;}

nnz_lno_persistent_work_host_view_t get_color_xadj() {
Expand Down
3 changes: 2 additions & 1 deletion src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,14 +1163,15 @@ namespace KokkosSparse{
nnz_lno_persistent_work_view_t clusterOrder;
auto clusterSize = gsHandler->get_cluster_size();
nnz_lno_t numClusters = (num_rows + clusterSize - 1) / clusterSize;
switch(cluster_algo)
switch(gsHandler->get_clustering_algo())
{
case CLUSTER_RCM:
{
RCM<HandleType, rowmap_t, colinds_t> rcm(num_rows, xadj, adj);
clusterOrder = rcm.rcm();
break;
}
case CLUSTER_DEFAULT:
case CLUSTER_SHUFFLE:
{
ShuffleReorder<HandleType, rowmap_t, colinds_t> shuf(num_rows, xadj, adj);
Expand Down
54 changes: 24 additions & 30 deletions src/sparse/impl/KokkosSparse_partitioning_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,6 @@ struct RCM
typedef Kokkos::TeamPolicy<MyExecSpace> team_policy_t ;
typedef typename team_policy_t::member_type team_member_t ;

typedef Kokkos::MinLoc<nnz_lno_t, nnz_lno_t, MyTempMemorySpace> MinLocReducer;
typedef Kokkos::MaxLoc<nnz_lno_t, nnz_lno_t, MyTempMemorySpace> MaxLocReducer;
typedef Kokkos::ValLocScalar<nnz_lno_t, nnz_lno_t> ValLoc;

typedef nnz_lno_t LO;

RCM(size_type numRows_, lno_row_view_t& rowmap_, lno_nnz_view_t& colinds_)
Expand Down Expand Up @@ -629,7 +625,7 @@ struct ShuffleReorder
typedef typename team_policy_t::member_type team_member_t ;

ShuffleReorder(size_type numRows_, lno_row_view_t& rowmap_, lno_nnz_view_t& colinds_)
: numRows(numRows_), rowmap(rowmap_), colinds(colinds_)
: numRows(numRows_), rowmap(rowmap_), colinds(colinds_), randPool(0xDEADBEEF)
{}

nnz_lno_t numRows;
Expand All @@ -641,16 +637,16 @@ struct ShuffleReorder

struct ShuffleFunctor
{
ShuffleFunctor(nnz_view_t& order_, const_lno_nnz_view_t& row_map_, const_lno_nnz_view_t& col_inds_, nnz_lno_t clusterSize_, RandPool& randPool_)
: order(order_), row_map(row_map_), col_inds(col_inds_), clusterSize(clusterSize_), numRows(row_map.extent(0) - 1), nodeLocks(numRows)
ShuffleFunctor(nnz_view_t& order_, const_lno_row_view_t& row_map_, const_lno_nnz_view_t& col_inds_, nnz_lno_t clusterSize_, RandPool& randPool_)
: order(order_), row_map(row_map_), col_inds(col_inds_), clusterSize(clusterSize_), numRows(row_map.extent(0) - 1), randPool(randPool_), nodeLocks(numRows)
{}

KOKKOS_INLINE_FUNCTION nnz_lno_t origToCluster(nnz_lno_t orig)
KOKKOS_INLINE_FUNCTION nnz_lno_t origToCluster(nnz_lno_t orig) const
{
return origToPermuted(orig) / clusterSize;
}

KOKKOS_INLINE_FUNCTION nnz_lno_t origToPermuted(nnz_lno_t orig)
KOKKOS_INLINE_FUNCTION nnz_lno_t origToPermuted(nnz_lno_t orig) const
{
return order(orig);
}
Expand All @@ -665,9 +661,9 @@ struct ShuffleReorder

KOKKOS_INLINE_FUNCTION void operator()(const team_member_t t) const
{
SharedData* wholeTeamShared = t.team_shmem().get_shmem(t.team_size() * sizeof(SharedData)));
SharedData* wholeTeamShared = (SharedData*) t.team_shmem().get_shmem(t.team_size() * sizeof(SharedData));
SharedData& sh = wholeTeamShared[t.team_rank()];
for(int i = 0; i < 50)
for(int iter = 0; iter < 50; iter++)
{
//Each thread first chooses one row randomly
Kokkos::single(Kokkos::PerThread(t),
Expand Down Expand Up @@ -701,15 +697,15 @@ struct ShuffleReorder
nnz_lno_t nei = col_inds(row_start1 + i);
if(origToCluster(nei) != cluster1)
lnumOutside++;
}, numOutsideNeighbors);
if(numOutsideNeighbors == 0)
}, sh.numOutsideNeighbors);
if(sh.numOutsideNeighbors == 0)
{
//Can't profit by swapping this row with anything,
//since swapping two nodes in the same cluster doesn't change the cluster graph.
Kokkos::single(Kokkos::PerThread(t),
[&]()
{
nodeLocks.clear(sh.node1);
nodeLocks.reset(sh.node1);
});
continue;
}
Expand All @@ -724,9 +720,8 @@ struct ShuffleReorder
lnode2 = nei;
else
{
nnz_lno_t chosen = state.rand64(numRows);
auto state = randPool.get_state();
if(state.rand(numOutsideNeighbors) == 0)
if(state.rand(sh.numOutsideNeighbors) == 0)
lnode2 = nei;
randPool.free_state(state);
}
Expand All @@ -739,7 +734,7 @@ struct ShuffleReorder
if(nodeLocks.set(sh.node2))
sh.lockedBoth = true;
if(!sh.lockedBoth)
nodeLocks.clear(sh.node1);
nodeLocks.reset(sh.node1);
});
if(!sh.lockedBoth)
{
Expand All @@ -750,7 +745,7 @@ struct ShuffleReorder
//Count the neighbors of node1 and node2 in their own clusters, and in each other's clusters.
//node1's number of self-cluster neighbors is already available.
nnz_lno_t cluster2 = origToCluster(sh.node2);
nnz_lno_t node1Self = degree1 - 1 - numOutsideNeighbors;
nnz_lno_t node1Self = degree1 - 1 - sh.numOutsideNeighbors;
nnz_lno_t node1Cross = 0;
nnz_lno_t node2Self = 0;
nnz_lno_t node2Cross = 0;
Expand Down Expand Up @@ -790,8 +785,8 @@ struct ShuffleReorder
order(sh.node2) = tmp;
}
//in any case, can now unlock both nodes
nodeLocks.clear(sh.node1);
nodeLocks.clear(sh.node2);
nodeLocks.reset(sh.node1);
nodeLocks.reset(sh.node2);
});
}
}
Expand All @@ -802,9 +797,9 @@ struct ShuffleReorder
}

nnz_view_t order;
const_lno_nnz_view_t row_map;
const_lno_row_view_t row_map;
const_lno_nnz_view_t col_inds;
nnz_lno_t numClusters;
nnz_lno_t clusterSize;
nnz_lno_t numRows;
RandPool randPool;
bitset_t nodeLocks;
Expand Down Expand Up @@ -832,22 +827,21 @@ struct ShuffleReorder
//nothing to do, all in the same cluster
return order;
}
RandPool randPool(0xDEADBEEF);
ShuffleFunctor shuf(order, rowmap, entries, clusterSize, randPool);
ShuffleFunctor shuf(order, rowmap, colinds, clusterSize, randPool);
int team_size = 0;
int vector_size = 0;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
int max_allowed_team_size = team_policy::team_size_max(shuf);
get_suggested_vector_team_size<nnz_lno_t, MyExecSpace>(
int max_allowed_team_size = team_policy_t::team_size_max(shuf);
KokkosKernels::Impl::get_suggested_vector_team_size<nnz_lno_t, MyExecSpace>(
max_allowed_team_size,
vector_size,
team_size,
numRows, entries.extent(0));
numRows, colinds.extent(0));
#else
get_suggested_vector_size<nnz_lno_t, MyExecSpace>(
KokkosKernels::Impl::get_suggested_vector_size<nnz_lno_t, MyExecSpace>(
vector_size,
numRows, entries.extent(0));
team_size = get_suggested_team_size<team_policy>(shuf, vector_size);
numRows, colinds.extent(0));
team_size = get_suggested_team_size<team_policy_t>(shuf, vector_size);
#endif
Kokkos::parallel_for(team_policy_t(256, team_size, vector_size), shuf);
return order;
Expand Down
36 changes: 34 additions & 2 deletions unit_test/sparse/Test_Sparse_gauss_seidel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
#include <iostream>
#include <complex>
#include "KokkosSparse_gauss_seidel.hpp"
#include "KokkosSparse_rcm_impl.hpp"
#include "KokkosSparse_partitioning_impl.hpp"

#ifndef kokkos_complex_double
#define kokkos_complex_double Kokkos::complex<double>
Expand Down Expand Up @@ -287,7 +287,7 @@ void test_cluster_sgs(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t row_s
output << "Testing cluster size = " << clusterSize << '\n';
#endif
KernelHandle kh;
kh.create_gs_handle(clusterSize);
kh.create_gs_handle(KokkosSparse::CLUSTER_SHUFFLE, clusterSize);
//only need to do G-S setup (symbolic/numeric) once
Kokkos::Impl::Timer timer;
KokkosSparse::Experimental::gauss_seidel_symbolic<KernelHandle, lno_view_t, lno_nnz_view_t>
Expand Down Expand Up @@ -351,13 +351,45 @@ void test_rcm(lno_t numRows, size_type nnzPerRow, lno_t bandwidth)
//make a new CRS graph based on permuting the rows and columns of mat
}

template <typename scalar_t, typename lno_t, typename size_type, typename device>
void test_greedy_partition(lno_t numRows, size_type nnzPerRow, lno_t bandwidth)
{
using namespace Test;
typedef typename KokkosSparse::CrsMatrix<scalar_t, lno_t, device, void, size_type> crsMat_t;
typedef typename crsMat_t::StaticCrsGraphType graph_t;
typedef typename graph_t::row_map_type lno_row_view_t;
typedef typename graph_t::entries_type lno_nnz_view_t;
typedef KokkosKernelsHandle
<size_type, lno_t, scalar_t,
typename device::execution_space, typename device::memory_space,typename device::memory_space> KernelHandle;
srand(245);
size_type nnzTotal = nnzPerRow * numRows;
lno_t nnzVariance = nnzPerRow / 4;
crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(numRows, numRows, nnzTotal, nnzVariance, bandwidth);
KokkosSparse::Impl::ShuffleReorder<KernelHandle, lno_row_view_t, lno_nnz_view_t> shuf(numRows, A.graph.row_map, A.graph.entries);
auto order = shuf.shuffledClusterOrder(8);
auto orderHost = Kokkos::create_mirror_view(order);
Kokkos::deep_copy(orderHost, order);
std::set<lno_t> rowSet;
for(lno_t i = 0; i < numRows; i++)
rowSet.insert(orderHost(i));
if((lno_t) rowSet.size() != numRows)
{
std::cerr << "Only got back " << rowSet.size() << " unique row IDs!\n";
return;
}
}

#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \
TEST_F( TestCategory, sparse ## _ ## gauss_seidel ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \
test_gauss_seidel<SCALAR,ORDINAL,OFFSET,DEVICE>(10000, 10000 * 30, 200, 10); \
} \
TEST_F( TestCategory, sparse ## _ ## rcm ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \
test_rcm<SCALAR,ORDINAL,OFFSET,DEVICE>(10000, 50, 2000); \
} \
TEST_F( TestCategory, sparse ## _ ## greedy_partition ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \
test_greedy_partition<SCALAR,ORDINAL,OFFSET,DEVICE>(10000, 50, 2000); \
} \
TEST_F( TestCategory, sparse ## _ ## cluster_sgs ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \
test_cluster_sgs<SCALAR,ORDINAL,OFFSET,DEVICE>(10000, 10000 * 30, 200, 10); \
}
Expand Down

0 comments on commit 742aa7c

Please sign in to comment.