Skip to content

Commit

Permalink
Add some OpenMP SIMD directives for the sparse matrix code.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Jul 26, 2024
1 parent b9587b5 commit 80f7ba4
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 13 deletions.
22 changes: 14 additions & 8 deletions include/tatami/sparse/CompressedSparseMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,14 @@ class PrimaryMyopicFullDense : public MyopicDenseExtractor<Value_, Index_> {

const Value_* fetch(Index_ i, Value_* buffer) {
auto offset = my_pointers[i];
auto vIt = my_values.begin() + offset;
auto iIt = my_indices.begin() + offset;
size_t delta = my_pointers[i+1] - my_pointers[i];

std::fill_n(buffer, my_secondary, static_cast<Value_>(0));
for (size_t x = 0; x < delta; ++x, ++vIt, ++iIt) {
buffer[*iIt] = *vIt;
#ifdef _OPENMP
#pragma omp simd
#endif
for (size_t x = 0; x < delta; ++x) {
auto cur_offset = offset + x;
buffer[my_indices[cur_offset]] = my_values[cur_offset];
}
return buffer;
}
Expand Down Expand Up @@ -116,11 +117,16 @@ class PrimaryMyopicBlockDense : public MyopicDenseExtractor<Value_, Index_> {
auto iStart = my_indices.begin() + my_pointers[i];
auto iEnd = my_indices.begin() + my_pointers[i + 1];
sparse_utils::refine_primary_block_limits(iStart, iEnd, my_secondary, my_block_start, my_block_length);
size_t offset = (iStart - my_indices.begin());
size_t number = iEnd - iStart;

std::fill_n(buffer, my_block_length, static_cast<Value_>(0));
auto vIt = my_values.begin() + (iStart - my_indices.begin());
for (; iStart != iEnd; ++iStart, ++vIt) {
buffer[*iStart - my_block_start] = *vIt;
#ifdef _OPENMP
#pragma omp simd
#endif
for (size_t i = 0; i < number; ++i) {
auto cur_offset = offset + i;
buffer[my_indices[cur_offset] - my_block_start] = my_values[cur_offset];
}
return buffer;
}
Expand Down
19 changes: 15 additions & 4 deletions include/tatami/sparse/FragmentedSparseMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ class PrimaryMyopicFullDense : public MyopicDenseExtractor<Value_, Index_> {
const auto& curi = my_indices[i];

std::fill_n(buffer, my_secondary, static_cast<Value_>(0));
for (size_t x = 0, end = curv.size(); x < end; ++x) {
size_t end = curv.size();
#ifdef _OPENMP
#pragma omp simd
#endif
for (size_t x = 0; x < end; ++x) {
buffer[curi[x]] = curv[x];
}
return buffer;
Expand Down Expand Up @@ -91,14 +95,21 @@ class PrimaryMyopicBlockDense : public MyopicDenseExtractor<Value_, Index_> {

const Value_* fetch(Index_ i, Value_* buffer) {
const auto& curi = my_indices[i];
const auto& curv = my_values[i];

auto iStart = curi.begin();
auto iEnd = curi.end();
sparse_utils::refine_primary_block_limits(iStart, iEnd, my_secondary, my_block_start, my_block_length);
size_t offset = (iStart - curi.begin());
size_t number = iEnd - iStart;

std::fill_n(buffer, my_block_length, static_cast<Value_>(0));
auto vIt = my_values[i].begin() + (iStart - curi.begin());
for (; iStart != iEnd; ++iStart, ++vIt) {
buffer[*iStart - my_block_start] = *vIt;
#ifdef _OPENMP
#pragma omp simd
#endif
for (size_t i = 0; i < number; ++i) {
auto cur_offset = offset + i;
buffer[curi[cur_offset] - my_block_start] = curv[cur_offset];
}
return buffer;
}
Expand Down
12 changes: 11 additions & 1 deletion include/tatami/sparse/convert_to_compressed_sparse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ void count_compressed_sparse_non_zeros_consistent(const tatami::Matrix<Value_, I
auto wrk = consecutive_extractor<false>(matrix, row, start, length);
for (Index_ p = start, pe = start + length; p < pe; ++p) {
auto ptr = wrk->fetch(buffer_v.data());
size_t count = 0;
Count_ count = 0;
#ifdef _OPENMP
// Reduction is okay as we're dealing with an integer anyway.
#pragma omp simd reduction(+:count)
#endif
for (Index_ s = 0; s < secondary; ++s) {
count += (ptr[s] != 0);
}
Expand Down Expand Up @@ -73,6 +77,9 @@ void count_compressed_sparse_non_zeros_inconsistent(const tatami::Matrix<Value_,

for (Index_ x = 0; x < length; ++x) {
auto range = wrk->fetch(NULL, buffer_i.data());
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < range.number; ++i) {
++my_counts[range.index[i]];
}
Expand All @@ -87,6 +94,9 @@ void count_compressed_sparse_non_zeros_inconsistent(const tatami::Matrix<Value_,

for (Index_ x = 0; x < length; ++x) {
auto ptr = wrk->fetch(buffer_v.data());
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ p = 0; p < primary; ++p) {
my_counts[p] += (ptr[p] != 0);
}
Expand Down
8 changes: 8 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,12 @@ if(OpenMP_FOUND)

create_isometric_binary_test(omp_isometric_binary_test)
target_link_libraries(omp_isometric_binary_test OpenMP::OpenMP_CXX)

add_executable(
omp_sparse_test
src/sparse/convert_to_compressed_sparse.cpp
src/sparse/CompressedSparseMatrix.cpp
src/sparse/FragmentedSparseMatrix.cpp
)
decorate_executable(omp_sparse_test)
endif()

0 comments on commit 80f7ba4

Please sign in to comment.