From 80f7ba46cc60dd77281544d4a48033551595d833 Mon Sep 17 00:00:00 2001 From: LTLA Date: Thu, 25 Jul 2024 23:28:30 -0700 Subject: [PATCH] Add some OpenMP SIMD directives for the sparse matrix code. --- .../tatami/sparse/CompressedSparseMatrix.hpp | 22 ++++++++++++------- .../tatami/sparse/FragmentedSparseMatrix.hpp | 19 ++++++++++++---- .../sparse/convert_to_compressed_sparse.hpp | 12 +++++++++- tests/CMakeLists.txt | 8 +++++++ 4 files changed, 48 insertions(+), 13 deletions(-) diff --git a/include/tatami/sparse/CompressedSparseMatrix.hpp b/include/tatami/sparse/CompressedSparseMatrix.hpp index 34c9afea..356ce09a 100644 --- a/include/tatami/sparse/CompressedSparseMatrix.hpp +++ b/include/tatami/sparse/CompressedSparseMatrix.hpp @@ -44,13 +44,14 @@ class PrimaryMyopicFullDense : public MyopicDenseExtractor { const Value_* fetch(Index_ i, Value_* buffer) { auto offset = my_pointers[i]; - auto vIt = my_values.begin() + offset; - auto iIt = my_indices.begin() + offset; size_t delta = my_pointers[i+1] - my_pointers[i]; - std::fill_n(buffer, my_secondary, static_cast(0)); - for (size_t x = 0; x < delta; ++x, ++vIt, ++iIt) { - buffer[*iIt] = *vIt; +#ifdef _OPENMP + #pragma omp simd +#endif + for (size_t x = 0; x < delta; ++x) { + auto cur_offset = offset + x; + buffer[my_indices[cur_offset]] = my_values[cur_offset]; } return buffer; } @@ -116,11 +117,16 @@ class PrimaryMyopicBlockDense : public MyopicDenseExtractor { auto iStart = my_indices.begin() + my_pointers[i]; auto iEnd = my_indices.begin() + my_pointers[i + 1]; sparse_utils::refine_primary_block_limits(iStart, iEnd, my_secondary, my_block_start, my_block_length); + size_t offset = (iStart - my_indices.begin()); + size_t number = iEnd - iStart; std::fill_n(buffer, my_block_length, static_cast(0)); - auto vIt = my_values.begin() + (iStart - my_indices.begin()); - for (; iStart != iEnd; ++iStart, ++vIt) { - buffer[*iStart - my_block_start] = *vIt; +#ifdef _OPENMP + #pragma omp simd +#endif + for (size_t i = 0; i < number; ++i) { + auto cur_offset = offset + i; + buffer[my_indices[cur_offset] - my_block_start] = my_values[cur_offset]; } return buffer; } diff --git a/include/tatami/sparse/FragmentedSparseMatrix.hpp b/include/tatami/sparse/FragmentedSparseMatrix.hpp index ffbb2b89..b953798a 100644 --- a/include/tatami/sparse/FragmentedSparseMatrix.hpp +++ b/include/tatami/sparse/FragmentedSparseMatrix.hpp @@ -41,7 +41,11 @@ class PrimaryMyopicFullDense : public MyopicDenseExtractor { const auto& curi = my_indices[i]; std::fill_n(buffer, my_secondary, static_cast(0)); - for (size_t x = 0, end = curv.size(); x < end; ++x) { + size_t end = curv.size(); +#ifdef _OPENMP + #pragma omp simd +#endif + for (size_t x = 0; x < end; ++x) { buffer[curi[x]] = curv[x]; } return buffer; @@ -91,14 +95,21 @@ class PrimaryMyopicBlockDense : public MyopicDenseExtractor { const Value_* fetch(Index_ i, Value_* buffer) { const auto& curi = my_indices[i]; + const auto& curv = my_values[i]; + auto iStart = curi.begin(); auto iEnd = curi.end(); sparse_utils::refine_primary_block_limits(iStart, iEnd, my_secondary, my_block_start, my_block_length); + size_t offset = (iStart - curi.begin()); + size_t number = iEnd - iStart; std::fill_n(buffer, my_block_length, static_cast(0)); - auto vIt = my_values[i].begin() + (iStart - curi.begin()); - for (; iStart != iEnd; ++iStart, ++vIt) { - buffer[*iStart - my_block_start] = *vIt; +#ifdef _OPENMP + #pragma omp simd +#endif + for (size_t i = 0; i < number; ++i) { + auto cur_offset = offset + i; + buffer[curi[cur_offset] - my_block_start] = curv[cur_offset]; } return buffer; } diff --git a/include/tatami/sparse/convert_to_compressed_sparse.hpp b/include/tatami/sparse/convert_to_compressed_sparse.hpp index fcb01a76..ef33a491 100644 --- a/include/tatami/sparse/convert_to_compressed_sparse.hpp +++ b/include/tatami/sparse/convert_to_compressed_sparse.hpp @@ -44,7 +44,11 @@ void count_compressed_sparse_non_zeros_consistent(const tatami::Matrix(matrix, row, start, length); for (Index_ p = start, pe = start + length; p < pe; ++p) { auto ptr = wrk->fetch(buffer_v.data()); - size_t count = 0; + Count_ count = 0; +#ifdef _OPENMP + // Reduction is okay as we're dealing with an integer anyway. + #pragma omp simd reduction(+:count) +#endif for (Index_ s = 0; s < secondary; ++s) { count += (ptr[s] != 0); } @@ -73,6 +77,9 @@ void count_compressed_sparse_non_zeros_inconsistent(const tatami::Matrixfetch(NULL, buffer_i.data()); +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < range.number; ++i) { ++my_counts[range.index[i]]; } @@ -87,6 +94,9 @@ void count_compressed_sparse_non_zeros_inconsistent(const tatami::Matrixfetch(buffer_v.data()); +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ p = 0; p < primary; ++p) { my_counts[p] += (ptr[p] != 0); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 266ace10..353ed8b5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -162,4 +162,12 @@ if(OpenMP_FOUND) create_isometric_binary_test(omp_isometric_binary_test) target_link_libraries(omp_isometric_binary_test OpenMP::OpenMP_CXX) + + add_executable( + omp_sparse_test + src/sparse/convert_to_compressed_sparse.cpp + src/sparse/CompressedSparseMatrix.cpp + src/sparse/FragmentedSparseMatrix.cpp + ) + decorate_executable(omp_sparse_test) endif()