Skip to content

Commit

Permalink
Sprinkle in OpenMP's simd pragma for isometric ops. (#108)
Browse files Browse the repository at this point in the history
This allows us to exploit SIMD for obviously parallel along-vector operations
without the hassle of an external library or using instrinsics directly.
  • Loading branch information
LTLA authored Jul 26, 2024
1 parent 78aadde commit b9587b5
Show file tree
Hide file tree
Showing 14 changed files with 247 additions and 29 deletions.
3 changes: 3 additions & 0 deletions include/tatami/isometric/arithmetic_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ enum class ArithmeticOperation : char {
/**
* @cond
*/
#ifdef _OPENMP
#pragma omp declare simd
#endif
// We deliberately use an auto type so as to defer a decision on what the output
// type should be; an appropriate coercion is left to the caller classes.
template<ArithmeticOperation op_, bool right_, typename Value_, typename Scalar_>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ class DenseExpandedFull : public DenseExtractor<oracle_, OutputValue_, Index_> {
std::fill_n(buffer, my_extent, my_operation.template fill<OutputValue_, InputValue_>(my_row, i));
}

#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ j = 0; j < num; ++j) {
buffer[my_output_ibuffer[j]] = my_output_vbuffer[j];
}
Expand Down Expand Up @@ -299,6 +302,9 @@ class DenseExpandedBlock : public DenseExtractor<oracle_, OutputValue_, Index_>
std::fill_n(buffer, my_block_length, my_operation.template fill<OutputValue_, InputValue_>(my_row, i));
}

#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ j = 0; j < num; ++j) {
buffer[my_output_ibuffer[j] - my_block_start] = my_output_vbuffer[j];
}
Expand Down Expand Up @@ -341,6 +347,10 @@ class DenseExpandedIndex : public DenseExtractor<oracle_, OutputValue_, Index_>
if (my_extent) {
my_remapping_offset = indices.front();
my_remapping.resize(indices.back() - my_remapping_offset + 1);

#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < my_extent; ++i) {
my_remapping[indices[i] - my_remapping_offset] = i;
}
Expand Down Expand Up @@ -373,6 +383,9 @@ class DenseExpandedIndex : public DenseExtractor<oracle_, OutputValue_, Index_>
std::fill_n(buffer, my_extent, my_operation.template fill<OutputValue_, InputValue_>(my_row, i));
}

#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ j = 0; j < num; ++j) {
buffer[my_remapping[my_output_ibuffer[j] - my_remapping_offset]] = my_output_vbuffer[j];
}
Expand Down
9 changes: 8 additions & 1 deletion include/tatami/isometric/binary/arithmetic_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class DelayedBinaryIsometricArithmetic {
*/
template<typename Index_, typename InputValue_, typename OutputValue_>
void dense(bool, Index_, Index_, Index_ length, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output_buffer[i];
Expand All @@ -54,7 +57,11 @@ class DelayedBinaryIsometricArithmetic {

template<typename Index_, typename InputValue_, typename OutputValue_>
void dense(bool, Index_, const std::vector<Index_>& indices, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const {
for (Index_ i = 0, length = indices.size(); i < length; ++i) {
Index_ length = indices.size();
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output_buffer[i];
val = delayed_arithmetic<op_, true>(val, right_buffer[i]);
Expand Down
9 changes: 8 additions & 1 deletion include/tatami/isometric/binary/boolean_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ struct DelayedBinaryIsometricBoolean {
*/
template<typename Index_, typename InputValue_, typename OutputValue_>
void dense(bool, Index_, Index_, Index_ length, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output_buffer[i];
Expand All @@ -51,7 +54,11 @@ struct DelayedBinaryIsometricBoolean {

template<typename Index_, typename InputValue_, typename OutputValue_>
void dense(bool, Index_, const std::vector<Index_>& indices, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const {
for (Index_ i = 0, length = indices.size(); i < length; ++i) {
Index_ length = indices.size();
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output_buffer[i];
val = delayed_boolean<op_>(val, right_buffer[i]);
Expand Down
9 changes: 8 additions & 1 deletion include/tatami/isometric/binary/compare_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ struct DelayedBinaryIsometricCompare {
*/
template<typename Index_, typename InputValue_, typename OutputValue_>
void dense(bool, Index_, Index_, Index_ length, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output_buffer[i];
Expand All @@ -53,7 +56,11 @@ struct DelayedBinaryIsometricCompare {

template<typename Index_, typename InputValue_, typename OutputValue_>
void dense(bool, Index_, const std::vector<Index_>& indices, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const {
for (Index_ i = 0, length = indices.size(); i < length; ++i) {
Index_ length = indices.size();
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output_buffer[i];
val = delayed_compare<op_>(val, right_buffer[i]);
Expand Down
3 changes: 3 additions & 0 deletions include/tatami/isometric/boolean_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ enum class BooleanOperation : char {
/**
* @cond
*/
#ifdef _OPENMP
#pragma omp declare simd
#endif
template<BooleanOperation op_>
bool delayed_boolean(bool val, bool scalar) {
if constexpr(op_ == BooleanOperation::AND) {
Expand Down
6 changes: 6 additions & 0 deletions include/tatami/isometric/compare_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ enum class CompareOperation : char {
/**
* @cond
*/
#ifdef _OPENMP
#pragma omp declare simd
#endif
template<CompareOperation op_, typename Value_>
bool delayed_compare(Value_ val, Value_ scalar) {
if constexpr(op_ == CompareOperation::EQUAL) {
Expand Down Expand Up @@ -58,6 +61,9 @@ enum class SpecialCompareOperation : char {
/**
* @cond
*/
#ifdef _OPENMP
#pragma omp declare simd
#endif
template<SpecialCompareOperation op_, bool pass_, typename Value_>
bool delayed_special_compare(Value_ val) {
if constexpr(op_ == SpecialCompareOperation::ISNAN) {
Expand Down
21 changes: 21 additions & 0 deletions include/tatami/isometric/unary/DelayedUnaryIsometricOperation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,16 @@ class DenseExpandedFull : public DenseExtractor<oracle_, OutputValue_, Index_> {
}

if constexpr(same_value) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < range.number; ++i) {
buffer[range.index[i]] = vbuffer[i];
}
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < range.number; ++i) {
buffer[range.index[i]] = my_result_vbuffer[i];
}
Expand Down Expand Up @@ -320,10 +326,16 @@ class DenseExpandedBlock : public DenseExtractor<oracle_, OutputValue_, Index_>
}

if constexpr(same_value) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < range.number; ++i) {
buffer[range.index[i] - my_block_start] = vbuffer[i];
}
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < range.number; ++i) {
buffer[range.index[i] - my_block_start] = my_result_vbuffer[i];
}
Expand Down Expand Up @@ -361,6 +373,9 @@ class DenseExpandedIndex : public DenseExtractor<oracle_, OutputValue_, Index_>
if (my_extent) {
my_remapping_offset = indices.front();
my_remapping.resize(indices.back() - my_remapping_offset + 1);
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < my_extent; ++i) {
my_remapping[indices[i] - my_remapping_offset] = i;
}
Expand Down Expand Up @@ -409,10 +424,16 @@ class DenseExpandedIndex : public DenseExtractor<oracle_, OutputValue_, Index_>
}

if constexpr(same_value) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < range.number; ++i) {
buffer[my_remapping[range.index[i] - my_remapping_offset]] = vbuffer[i];
}
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < range.number; ++i) {
buffer[my_remapping[range.index[i] - my_remapping_offset]] = my_result_vbuffer[i];
}
Expand Down
15 changes: 14 additions & 1 deletion include/tatami/isometric/unary/arithmetic_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ namespace tatami {
*/
template<ArithmeticOperation op_, bool right_, typename InputValue_, typename Index_, typename Scalar_, typename OutputValue_>
void delayed_arithmetic_run_simple(const InputValue_* input, Index_ length, Scalar_ scalar, OutputValue_* output) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
Expand Down Expand Up @@ -222,6 +225,9 @@ class DelayedUnaryIsometricArithmeticVector {
if (row == my_by_row) {
delayed_arithmetic_run_simple<op_, right_>(input, length, my_vector[idx], output);
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
Expand All @@ -238,7 +244,11 @@ class DelayedUnaryIsometricArithmeticVector {
if (row == my_by_row) {
delayed_arithmetic_run_simple<op_, right_>(input, static_cast<Index_>(indices.size()), my_vector[idx], output);
} else {
for (Index_ i = 0, length = indices.size(); i < length; ++i) {
Index_ length = indices.size();
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
val = delayed_arithmetic<op_, right_>(val, my_vector[indices[i]]);
Expand All @@ -254,6 +264,9 @@ class DelayedUnaryIsometricArithmeticVector {
if (row == my_by_row) {
delayed_arithmetic_run_simple<op_, right_>(input_value, number, my_vector[idx], output_value);
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < number; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output_value[i];
Expand Down
18 changes: 17 additions & 1 deletion include/tatami/isometric/unary/boolean_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ namespace tatami {
*/
template<typename InputValue_, typename Index_, typename OutputValue_>
void delayed_boolean_cast(const InputValue_* input, Index_ length, OutputValue_* output) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
Expand All @@ -30,6 +33,9 @@ void delayed_boolean_cast(const InputValue_* input, Index_ length, OutputValue_*

template<typename InputValue_, typename Index_, typename OutputValue_>
void delayed_boolean_not(const InputValue_* input, Index_ length, OutputValue_* output) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
Expand Down Expand Up @@ -316,6 +322,9 @@ class DelayedUnaryIsometricBooleanVector {
if (row == my_by_row) {
delayed_boolean_run_simple<op_>(input, length, my_vector[idx], output);
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
Expand All @@ -332,7 +341,11 @@ class DelayedUnaryIsometricBooleanVector {
if (row == my_by_row) {
delayed_boolean_run_simple<op_>(input, static_cast<Index_>(indices.size()), my_vector[idx], output);
} else {
for (Index_ i = 0, length = indices.size(); i < length; ++i) {
Index_ length = indices.size();
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
val = delayed_boolean<op_>(val, my_vector[indices[i]]);
Expand All @@ -348,6 +361,9 @@ class DelayedUnaryIsometricBooleanVector {
if (row == my_by_row) {
delayed_boolean_run_simple<op_>(input_value, number, my_vector[idx], output_value);
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < number; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output_value[i];
Expand Down
15 changes: 14 additions & 1 deletion include/tatami/isometric/unary/compare_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ namespace tatami {
*/
template<CompareOperation op_, typename InputValue_, typename Index_, typename OutputValue_>
void delayed_compare_run_simple(const InputValue_* input, Index_ length, InputValue_ scalar, OutputValue_* output) {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
Expand Down Expand Up @@ -176,6 +179,9 @@ class DelayedUnaryIsometricCompareVector {
if (row == my_by_row) {
delayed_compare_run_simple<op_, InputValue_>(input, length, my_vector[idx], output);
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
Expand All @@ -192,7 +198,11 @@ class DelayedUnaryIsometricCompareVector {
if (row == my_by_row) {
delayed_compare_run_simple<op_, InputValue_>(input, static_cast<Index_>(indices.size()), my_vector[idx], output);
} else {
for (Index_ i = 0, length = indices.size(); i < length; ++i) {
Index_ length = indices.size();
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < length; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
val = delayed_compare<op_, InputValue_>(val, my_vector[indices[i]]);
Expand All @@ -208,6 +218,9 @@ class DelayedUnaryIsometricCompareVector {
if (row == my_by_row) {
delayed_compare_run_simple<op_, InputValue_>(input, number, my_vector[idx], output);
} else {
#ifdef _OPENMP
#pragma omp simd
#endif
for (Index_ i = 0; i < number; ++i) {
if constexpr(std::is_same<InputValue_, OutputValue_>::value) {
auto& val = output[i];
Expand Down
Loading

0 comments on commit b9587b5

Please sign in to comment.