Skip to content

Commit

Permalink
Merge pull request #971 from brian-kelley/cherrypick949
Browse files Browse the repository at this point in the history
Cherry pick #949: fix CrsMatrix raw ptr ctor
  • Loading branch information
ndellingwood authored May 12, 2021
2 parents 86991c1 + 6722134 commit e21a2c8
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 60 deletions.
86 changes: 31 additions & 55 deletions src/sparse/KokkosSparse_CrsMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,37 @@ class CrsMatrix {
OrdinalType* rowmap,
OrdinalType* cols)
{
ctor_impl (label, nrows, ncols, annz, val, rowmap, cols);
using Kokkos::Unmanaged;
using HostRowmap = Kokkos::View<SizeType*, Kokkos::HostSpace>;
using UnmanagedRowmap = Kokkos::View<const SizeType*, Kokkos::HostSpace, Kokkos::MemoryTraits<Unmanaged>>;
using UnmanagedEntries = Kokkos::View<const OrdinalType*, Kokkos::HostSpace, Kokkos::MemoryTraits<Unmanaged>>;
using UnmanagedValues = Kokkos::View<const ScalarType*, Kokkos::HostSpace, Kokkos::MemoryTraits<Unmanaged>>;
//Allocate device rowmap, entries, values views
typename row_map_type::non_const_type rowmapDevice(Kokkos::ViewAllocateWithoutInitializing("rowmap"), nrows + 1);
index_type entriesDevice(Kokkos::ViewAllocateWithoutInitializing("entries"), annz);
//given rowmap in ordinal_type, so may need to convert to size_type explicitly
HostRowmap rowmapConverted;
UnmanagedRowmap rowmapRaw;
if(!std::is_same<OrdinalType, SizeType>::value)
{
rowmapConverted = HostRowmap(Kokkos::ViewAllocateWithoutInitializing("rowmap raw"), nrows + 1);
for(OrdinalType i = 0; i <= nrows; i++)
rowmapConverted(i) = rowmap[i];
rowmapRaw = rowmapConverted;
}
else
{
rowmapRaw = UnmanagedRowmap((const SizeType*) rowmap, nrows + 1);
}
Kokkos::deep_copy(rowmapDevice, rowmapRaw);
UnmanagedEntries entriesRaw(cols, annz);
Kokkos::deep_copy(entriesDevice, entriesRaw);
//Construct graph and populate all members
this->numCols_ = ncols;
this->graph = StaticCrsGraphType(entriesDevice, rowmapDevice);
this->values = values_type(Kokkos::ViewAllocateWithoutInitializing("values"), annz);
UnmanagedValues valuesRaw(val, annz);
Kokkos::deep_copy(this->values, valuesRaw);

// FIXME (mfh 09 Aug 2013) Specialize this on the Device type.
// Only use cuSPARSE for the Cuda Device.
Expand Down Expand Up @@ -646,15 +676,6 @@ class CrsMatrix {
#endif // KOKKOS_USE_CUSPARSE
}

void
ctor_impl (const std::string &label,
const OrdinalType nrows,
const OrdinalType ncols,
const size_type annz,
ScalarType* val,
OrdinalType* rows,
OrdinalType* cols);

KOKKOS_INLINE_FUNCTION
OrdinalType
sumIntoValues (const OrdinalType rowi,
Expand Down Expand Up @@ -883,50 +904,5 @@ class CrsMatrix {
ordinal_type numCols_;
};

//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

template< typename ScalarType , typename OrdinalType, class Device, class MemoryTraits, typename SizeType >
void
CrsMatrix<ScalarType , OrdinalType, Device, MemoryTraits, SizeType >::
ctor_impl (const std::string &label,
const OrdinalType nrows,
const OrdinalType ncols,
const size_type annz,
ScalarType* val,
OrdinalType* rows,
OrdinalType* cols)
{
std::string str = label;
values = values_type (str.append (".values"), annz);

numCols_ = ncols;

// FIXME (09 Aug 2013) CrsArray only takes std::vector for now.
// We'll need to fix that.
std::vector<int> row_lengths (nrows, 0);

// FIXME (mfh 21 Jun 2013) This calls for a parallel_for kernel.
for (OrdinalType i = 0; i < nrows; ++i) {
row_lengths[i] = rows[i + 1] - rows[i];
}

graph = Kokkos::create_staticcrsgraph<staticcrsgraph_type> (str.append (".graph"), row_lengths);
typename values_type::HostMirror h_values = Kokkos::create_mirror_view (values);
typename index_type::HostMirror h_entries = Kokkos::create_mirror_view (graph.entries);

// FIXME (mfh 21 Jun 2013) This needs to be a parallel copy.
// Furthermore, why are the arrays copied twice? -- once here, to a
// host view, and once below, in the deep copy?
for (size_type i = 0; i < annz; ++i) {
if (val) {
h_values(i) = val[i];
}
h_entries(i) = cols[i];
}

Kokkos::deep_copy (values, h_values);
Kokkos::deep_copy (graph.entries, h_entries);
}
}
#endif
49 changes: 44 additions & 5 deletions unit_test/sparse/Test_Sparse_CrsMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,15 @@
#include <Kokkos_Core.hpp>
#include <stdexcept>
#include "KokkosSparse_CrsMatrix.hpp"
#include "Kokkos_ArithTraits.hpp"

#ifndef kokkos_complex_double
#define kokkos_complex_double Kokkos::complex<double>
#define kokkos_complex_float Kokkos::complex<float>
#endif
// #ifndef kokkos_complex_double
// #define kokkos_complex_double Kokkos::complex<double>
// #define kokkos_complex_float Kokkos::complex<float>
// #endif

typedef Kokkos::complex<double> kokkos_complex_double;
typedef Kokkos::complex<float> kokkos_complex_float;

namespace Test{ // anonymous

Expand Down Expand Up @@ -189,6 +193,40 @@ testCrsMatrix ()
//printf ("A is %d by %d\n", A.numRows (), A.numCols ());
}

template <typename scalar_t, typename lno_t, typename size_type, typename device>
void
testCrsMatrixRawConstructor()
{
int nrows = 5;
//note: last 2 columns will be empty.
//This makes sure the ncols provided to constructor is preserved.
int ncols = 7;
int nnz = 9;
//NOTE: this is not a mistake, the raw ptr constructor takes rowmap as ordinal.
std::vector<lno_t> rowmap = {0, 0, 2, 5, 6, 9};
std::vector<lno_t> entries = {3, 4, 0, 1, 2, 2, 0, 3, 4};
std::vector<scalar_t> values;
for(int i = 0; i < nnz; i++)
values.push_back(Kokkos::ArithTraits<scalar_t>::one() * (1.0 * rand() / RAND_MAX));
KokkosSparse::CrsMatrix<scalar_t, lno_t, device, void, size_type> A(
"A", nrows, ncols, nnz, values.data(), rowmap.data(), entries.data());
EXPECT_EQ(A.numRows(), nrows);
EXPECT_EQ(A.numCols(), ncols);
EXPECT_EQ(A.nnz(), nnz);
//verify rowmap, entries, values: should all be identical to original raw arrays
//(except the rowmap elements are now size_type)
auto checkRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map);
auto checkEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries);
auto checkValues = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values);
for(int i = 0; i < nrows + 1; i++)
EXPECT_EQ(checkRowmap(i), (size_type) rowmap[i]);
for(int i = 0; i < nnz; i++)
{
EXPECT_EQ(checkEntries(i), entries[i]);
EXPECT_EQ(checkValues(i), values[i]);
}
}

template <typename scalar_t, typename lno_t, typename size_type, typename device>
void
testCrsMatrixHostMirror ()
Expand Down Expand Up @@ -226,6 +264,7 @@ testCrsMatrixHostMirror ()
#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \
TEST_F( TestCategory, sparse ## _ ## crsmatrix ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \
testCrsMatrix<SCALAR, ORDINAL, OFFSET, DEVICE> (); \
testCrsMatrixRawConstructor<SCALAR, ORDINAL, OFFSET, DEVICE> (); \
} \
TEST_F( TestCategory, sparse ## _ ## crsmatrix_host_mirror ## _ ## SCALAR ## _ ## ORDINAL ## _ ## OFFSET ## _ ## DEVICE ) { \
testCrsMatrixHostMirror<SCALAR, ORDINAL, OFFSET, DEVICE> (); \
Expand Down Expand Up @@ -329,4 +368,4 @@ TEST_F( TestCategory, sparse ## _ ## crsmatrix_host_mirror ## _ ## SCALAR ## _ #
EXECUTE_TEST(kokkos_complex_float, int64_t, size_t, TestExecSpace)
#endif


#undef EXECUTE_TEST

0 comments on commit e21a2c8

Please sign in to comment.