diff --git a/include/tatami/isometric/arithmetic_utils.hpp b/include/tatami/isometric/arithmetic_utils.hpp index e54d51be..83b6d367 100644 --- a/include/tatami/isometric/arithmetic_utils.hpp +++ b/include/tatami/isometric/arithmetic_utils.hpp @@ -33,6 +33,9 @@ enum class ArithmeticOperation : char { /** * @cond */ +#ifdef _OPENMP +#pragma omp declare simd +#endif // We deliberately use an auto type so as to defer a decision on what the output // type should be; an appropriate coercion is left to the caller classes. template diff --git a/include/tatami/isometric/binary/DelayedBinaryIsometricOperation.hpp b/include/tatami/isometric/binary/DelayedBinaryIsometricOperation.hpp index cb5bf4f1..3999efd6 100644 --- a/include/tatami/isometric/binary/DelayedBinaryIsometricOperation.hpp +++ b/include/tatami/isometric/binary/DelayedBinaryIsometricOperation.hpp @@ -235,6 +235,9 @@ class DenseExpandedFull : public DenseExtractor { std::fill_n(buffer, my_extent, my_operation.template fill(my_row, i)); } +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ j = 0; j < num; ++j) { buffer[my_output_ibuffer[j]] = my_output_vbuffer[j]; } @@ -299,6 +302,9 @@ class DenseExpandedBlock : public DenseExtractor std::fill_n(buffer, my_block_length, my_operation.template fill(my_row, i)); } +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ j = 0; j < num; ++j) { buffer[my_output_ibuffer[j] - my_block_start] = my_output_vbuffer[j]; } @@ -341,6 +347,10 @@ class DenseExpandedIndex : public DenseExtractor if (my_extent) { my_remapping_offset = indices.front(); my_remapping.resize(indices.back() - my_remapping_offset + 1); + +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < my_extent; ++i) { my_remapping[indices[i] - my_remapping_offset] = i; } @@ -373,6 +383,9 @@ class DenseExpandedIndex : public DenseExtractor std::fill_n(buffer, my_extent, my_operation.template fill(my_row, i)); } +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ j = 0; j < num; ++j) { buffer[my_remapping[my_output_ibuffer[j] - my_remapping_offset]] = my_output_vbuffer[j]; } diff --git a/include/tatami/isometric/binary/arithmetic_helpers.hpp b/include/tatami/isometric/binary/arithmetic_helpers.hpp index f55e5234..8760702f 100644 --- a/include/tatami/isometric/binary/arithmetic_helpers.hpp +++ b/include/tatami/isometric/binary/arithmetic_helpers.hpp @@ -42,6 +42,9 @@ class DelayedBinaryIsometricArithmetic { */ template void dense(bool, Index_, Index_, Index_ length, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output_buffer[i]; @@ -54,7 +57,11 @@ class DelayedBinaryIsometricArithmetic { template void dense(bool, Index_, const std::vector& indices, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const { - for (Index_ i = 0, length = indices.size(); i < length; ++i) { + Index_ length = indices.size(); +#ifdef _OPENMP + #pragma omp simd +#endif + for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output_buffer[i]; val = delayed_arithmetic(val, right_buffer[i]); diff --git a/include/tatami/isometric/binary/boolean_helpers.hpp b/include/tatami/isometric/binary/boolean_helpers.hpp index 830a7848..05f591f7 100644 --- a/include/tatami/isometric/binary/boolean_helpers.hpp +++ b/include/tatami/isometric/binary/boolean_helpers.hpp @@ -39,6 +39,9 @@ struct DelayedBinaryIsometricBoolean { */ template void dense(bool, Index_, Index_, Index_ length, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output_buffer[i]; @@ -51,7 +54,11 @@ struct DelayedBinaryIsometricBoolean { template void dense(bool, Index_, const std::vector& indices, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const { - for (Index_ i = 0, length = indices.size(); i < length; ++i) { + Index_ length = indices.size(); +#ifdef _OPENMP + #pragma omp simd +#endif + for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output_buffer[i]; val = delayed_boolean(val, right_buffer[i]); diff --git a/include/tatami/isometric/binary/compare_helpers.hpp b/include/tatami/isometric/binary/compare_helpers.hpp index 79d92791..23b661a6 100644 --- a/include/tatami/isometric/binary/compare_helpers.hpp +++ b/include/tatami/isometric/binary/compare_helpers.hpp @@ -41,6 +41,9 @@ struct DelayedBinaryIsometricCompare { */ template void dense(bool, Index_, Index_, Index_ length, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output_buffer[i]; @@ -53,7 +56,11 @@ struct DelayedBinaryIsometricCompare { template void dense(bool, Index_, const std::vector& indices, const InputValue_* left_buffer, const InputValue_* right_buffer, OutputValue_* output_buffer) const { - for (Index_ i = 0, length = indices.size(); i < length; ++i) { + Index_ length = indices.size(); +#ifdef _OPENMP + #pragma omp simd +#endif + for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output_buffer[i]; val = delayed_compare(val, right_buffer[i]); diff --git a/include/tatami/isometric/boolean_utils.hpp b/include/tatami/isometric/boolean_utils.hpp index 693fac16..d19e51fe 100644 --- a/include/tatami/isometric/boolean_utils.hpp +++ b/include/tatami/isometric/boolean_utils.hpp @@ -22,6 +22,9 @@ enum class BooleanOperation : char { /** * @cond */ +#ifdef _OPENMP +#pragma omp declare simd +#endif template bool delayed_boolean(bool val, bool scalar) { if constexpr(op_ == BooleanOperation::AND) { diff --git a/include/tatami/isometric/compare_utils.hpp b/include/tatami/isometric/compare_utils.hpp index 18a2a58e..5954e3e4 100644 --- a/include/tatami/isometric/compare_utils.hpp +++ b/include/tatami/isometric/compare_utils.hpp @@ -26,6 +26,9 @@ enum class CompareOperation : char { /** * @cond */ +#ifdef _OPENMP +#pragma omp declare simd +#endif template bool delayed_compare(Value_ val, Value_ scalar) { if constexpr(op_ == CompareOperation::EQUAL) { @@ -58,6 +61,9 @@ enum class SpecialCompareOperation : char { /** * @cond */ +#ifdef _OPENMP +#pragma omp declare simd +#endif template bool delayed_special_compare(Value_ val) { if constexpr(op_ == SpecialCompareOperation::ISNAN) { diff --git a/include/tatami/isometric/unary/DelayedUnaryIsometricOperation.hpp b/include/tatami/isometric/unary/DelayedUnaryIsometricOperation.hpp index 5d92afcb..61ad229e 100644 --- a/include/tatami/isometric/unary/DelayedUnaryIsometricOperation.hpp +++ b/include/tatami/isometric/unary/DelayedUnaryIsometricOperation.hpp @@ -246,10 +246,16 @@ class DenseExpandedFull : public DenseExtractor { } if constexpr(same_value) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < range.number; ++i) { buffer[range.index[i]] = vbuffer[i]; } } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < range.number; ++i) { buffer[range.index[i]] = my_result_vbuffer[i]; } @@ -320,10 +326,16 @@ class DenseExpandedBlock : public DenseExtractor } if constexpr(same_value) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < range.number; ++i) { buffer[range.index[i] - my_block_start] = vbuffer[i]; } } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < range.number; ++i) { buffer[range.index[i] - my_block_start] = my_result_vbuffer[i]; } @@ -361,6 +373,9 @@ class DenseExpandedIndex : public DenseExtractor if (my_extent) { my_remapping_offset = indices.front(); my_remapping.resize(indices.back() - my_remapping_offset + 1); +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < my_extent; ++i) { my_remapping[indices[i] - my_remapping_offset] = i; } @@ -409,10 +424,16 @@ class DenseExpandedIndex : public DenseExtractor } if constexpr(same_value) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < range.number; ++i) { buffer[my_remapping[range.index[i] - my_remapping_offset]] = vbuffer[i]; } } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < range.number; ++i) { buffer[my_remapping[range.index[i] - my_remapping_offset]] = my_result_vbuffer[i]; } diff --git a/include/tatami/isometric/unary/arithmetic_helpers.hpp b/include/tatami/isometric/unary/arithmetic_helpers.hpp index 87a9a8af..2b8eb1ea 100644 --- a/include/tatami/isometric/unary/arithmetic_helpers.hpp +++ b/include/tatami/isometric/unary/arithmetic_helpers.hpp @@ -19,6 +19,9 @@ namespace tatami { */ template void delayed_arithmetic_run_simple(const InputValue_* input, Index_ length, Scalar_ scalar, OutputValue_* output) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -222,6 +225,9 @@ class DelayedUnaryIsometricArithmeticVector { if (row == my_by_row) { delayed_arithmetic_run_simple(input, length, my_vector[idx], output); } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -238,7 +244,11 @@ class DelayedUnaryIsometricArithmeticVector { if (row == my_by_row) { delayed_arithmetic_run_simple(input, static_cast(indices.size()), my_vector[idx], output); } else { - for (Index_ i = 0, length = indices.size(); i < length; ++i) { + Index_ length = indices.size(); +#ifdef _OPENMP + #pragma omp simd +#endif + for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; val = delayed_arithmetic(val, my_vector[indices[i]]); @@ -254,6 +264,9 @@ class DelayedUnaryIsometricArithmeticVector { if (row == my_by_row) { delayed_arithmetic_run_simple(input_value, number, my_vector[idx], output_value); } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < number; ++i) { if constexpr(std::is_same::value) { auto& val = output_value[i]; diff --git a/include/tatami/isometric/unary/boolean_helpers.hpp b/include/tatami/isometric/unary/boolean_helpers.hpp index b4d3ea31..6e6b5b9a 100644 --- a/include/tatami/isometric/unary/boolean_helpers.hpp +++ b/include/tatami/isometric/unary/boolean_helpers.hpp @@ -18,6 +18,9 @@ namespace tatami { */ template void delayed_boolean_cast(const InputValue_* input, Index_ length, OutputValue_* output) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -30,6 +33,9 @@ void delayed_boolean_cast(const InputValue_* input, Index_ length, OutputValue_* template void delayed_boolean_not(const InputValue_* input, Index_ length, OutputValue_* output) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -316,6 +322,9 @@ class DelayedUnaryIsometricBooleanVector { if (row == my_by_row) { delayed_boolean_run_simple(input, length, my_vector[idx], output); } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -332,7 +341,11 @@ class DelayedUnaryIsometricBooleanVector { if (row == my_by_row) { delayed_boolean_run_simple(input, static_cast(indices.size()), my_vector[idx], output); } else { - for (Index_ i = 0, length = indices.size(); i < length; ++i) { + Index_ length = indices.size(); +#ifdef _OPENMP + #pragma omp simd +#endif + for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; val = delayed_boolean(val, my_vector[indices[i]]); @@ -348,6 +361,9 @@ class DelayedUnaryIsometricBooleanVector { if (row == my_by_row) { delayed_boolean_run_simple(input_value, number, my_vector[idx], output_value); } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < number; ++i) { if constexpr(std::is_same::value) { auto& val = output_value[i]; diff --git a/include/tatami/isometric/unary/compare_helpers.hpp b/include/tatami/isometric/unary/compare_helpers.hpp index 67bd321b..891f9595 100644 --- a/include/tatami/isometric/unary/compare_helpers.hpp +++ b/include/tatami/isometric/unary/compare_helpers.hpp @@ -18,6 +18,9 @@ namespace tatami { */ template void delayed_compare_run_simple(const InputValue_* input, Index_ length, InputValue_ scalar, OutputValue_* output) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -176,6 +179,9 @@ class DelayedUnaryIsometricCompareVector { if (row == my_by_row) { delayed_compare_run_simple(input, length, my_vector[idx], output); } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -192,7 +198,11 @@ class DelayedUnaryIsometricCompareVector { if (row == my_by_row) { delayed_compare_run_simple(input, static_cast(indices.size()), my_vector[idx], output); } else { - for (Index_ i = 0, length = indices.size(); i < length; ++i) { + Index_ length = indices.size(); +#ifdef _OPENMP + #pragma omp simd +#endif + for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; val = delayed_compare(val, my_vector[indices[i]]); @@ -208,6 +218,9 @@ class DelayedUnaryIsometricCompareVector { if (row == my_by_row) { delayed_compare_run_simple(input, number, my_vector[idx], output); } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < number; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; diff --git a/include/tatami/isometric/unary/math_helpers.hpp b/include/tatami/isometric/unary/math_helpers.hpp index 9ac15616..8e421f24 100644 --- a/include/tatami/isometric/unary/math_helpers.hpp +++ b/include/tatami/isometric/unary/math_helpers.hpp @@ -39,6 +39,9 @@ class DelayedUnaryIsometricAbs { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -106,6 +109,9 @@ class DelayedUnaryIsometricSign { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -194,6 +200,9 @@ class DelayedUnaryIsometricLog { template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -260,6 +269,9 @@ class DelayedUnaryIsometricSqrt { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -325,6 +337,9 @@ class DelayedUnaryIsometricCeiling { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -390,6 +405,9 @@ class DelayedUnaryIsometricFloor { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -455,6 +473,9 @@ class DelayedUnaryIsometricTrunc { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -532,6 +553,9 @@ class DelayedUnaryIsometricLog1p { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -599,6 +623,9 @@ class DelayedUnaryIsometricRound { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -664,6 +691,9 @@ class DelayedUnaryIsometricExp { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -729,6 +759,9 @@ class DelayedUnaryIsometricExpm1 { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -794,6 +827,9 @@ class DelayedUnaryIsometricAcos { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -860,6 +896,9 @@ class DelayedUnaryIsometricAcosh { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -926,6 +965,9 @@ class DelayedUnaryIsometricAsin { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -991,6 +1033,9 @@ class DelayedUnaryIsometricAsinh { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1056,6 +1101,9 @@ class DelayedUnaryIsometricAtan { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1121,6 +1169,9 @@ class DelayedUnaryIsometricAtanh { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1186,6 +1237,9 @@ class DelayedUnaryIsometricCos { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1251,6 +1305,9 @@ class DelayedUnaryIsometricCosh { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1316,6 +1373,9 @@ class DelayedUnaryIsometricSin { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1381,6 +1441,9 @@ class DelayedUnaryIsometricSinh { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1446,6 +1509,9 @@ class DelayedUnaryIsometricTan { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1511,6 +1577,9 @@ class DelayedUnaryIsometricTanh { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1576,6 +1645,9 @@ class DelayedUnaryIsometricGamma { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; @@ -1642,6 +1714,9 @@ class DelayedUnaryIsometricLgamma { private: template void core(const InputValue_* input, Index_ length, OutputValue_* output) const { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { if constexpr(std::is_same::value) { auto& val = output[i]; diff --git a/include/tatami/isometric/unary/substitute_helpers.hpp b/include/tatami/isometric/unary/substitute_helpers.hpp index 3b8ca8fa..adbcc73a 100644 --- a/include/tatami/isometric/unary/substitute_helpers.hpp +++ b/include/tatami/isometric/unary/substitute_helpers.hpp @@ -21,6 +21,9 @@ bool delayed_substitute_is_sparse(Value_ compared, Value_ substitute) { return !delayed_compare(0, compared) || substitute == 0; } +#ifdef _OPENMP +#pragma omp declare simd +#endif template void delayed_substitute_run(Value_& val, Value_ compared, Value_ substitute) { if (delayed_compare(val, compared)) { @@ -30,6 +33,9 @@ void delayed_substitute_run(Value_& val, Value_ compared, Value_ substitute) { template void delayed_substitute_run_simple(Value_* buffer, Index_ length, Value_ compared, Value_ substitute) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { delayed_substitute_run(buffer[i], compared, substitute); } @@ -188,6 +194,9 @@ class DelayedUnaryIsometricSubstituteVector { if (row == my_by_row) { delayed_substitute_run_simple(output, length, my_compared[idx], my_substitute[idx]); } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { Index_ is = i + start; delayed_substitute_run(output[i], my_compared[is], my_substitute[is]); @@ -200,7 +209,11 @@ class DelayedUnaryIsometricSubstituteVector { if (row == my_by_row) { delayed_substitute_run_simple(output, static_cast(indices.size()), my_compared[idx], my_substitute[idx]); } else { - for (Index_ i = 0, length = indices.size(); i < length; ++i) { + Index_ length = indices.size(); +#ifdef _OPENMP + #pragma omp simd +#endif + for (Index_ i = 0; i < length; ++i) { auto ii = indices[i]; delayed_substitute_run(output[i], my_compared[ii], my_substitute[ii]); } @@ -212,6 +225,9 @@ class DelayedUnaryIsometricSubstituteVector { if (row == my_by_row) { delayed_substitute_run_simple(output_value, number, my_compared[idx], my_substitute[idx]); } else { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < number; ++i) { auto ii = indices[i]; delayed_substitute_run(output_value[i], my_compared[ii], my_substitute[ii]); @@ -427,6 +443,9 @@ bool delayed_special_substitute_is_sparse(Value_ substitute) { return !delayed_special_compare(0) || substitute == 0; } +#ifdef _OPENMP +#pragma omp declare simd +#endif template void delayed_special_substitute_run(Value_& val, Value_ substitute) { if (delayed_special_compare(val)) { @@ -436,6 +455,9 @@ void delayed_special_substitute_run(Value_& val, Value_ substitute) { template void delayed_special_substitute_run_simple(Value_* buffer, Index_ length, Value_ substitute) { +#ifdef _OPENMP + #pragma omp simd +#endif for (Index_ i = 0; i < length; ++i) { delayed_special_substitute_run(buffer[i], substitute); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 16c2db3f..266ace10 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -66,20 +66,23 @@ add_executable( ) decorate_executable(subset_test) -add_executable( - isometric_unary_test - src/isometric/unary/DelayedUnaryIsometricOperation.cpp - src/isometric/unary/arithmetic_vector_helpers.cpp - src/isometric/unary/arithmetic_scalar_helpers.cpp - src/isometric/unary/math_helpers.cpp - src/isometric/unary/compare_scalar_helpers.cpp - src/isometric/unary/compare_vector_helpers.cpp - src/isometric/unary/boolean_scalar_helpers.cpp - src/isometric/unary/boolean_vector_helpers.cpp - src/isometric/unary/substitute_scalar_helpers.cpp - src/isometric/unary/substitute_vector_helpers.cpp -) -decorate_executable(isometric_unary_test) +macro(create_isometric_unary_test target) + add_executable( + ${target} + src/isometric/unary/DelayedUnaryIsometricOperation.cpp + src/isometric/unary/arithmetic_vector_helpers.cpp + src/isometric/unary/arithmetic_scalar_helpers.cpp + src/isometric/unary/math_helpers.cpp + src/isometric/unary/compare_scalar_helpers.cpp + src/isometric/unary/compare_vector_helpers.cpp + src/isometric/unary/boolean_scalar_helpers.cpp + src/isometric/unary/boolean_vector_helpers.cpp + src/isometric/unary/substitute_scalar_helpers.cpp + src/isometric/unary/substitute_vector_helpers.cpp + ) + decorate_executable(${target}) +endmacro() +create_isometric_unary_test(isometric_unary_test) add_executable( isometric_other_test @@ -87,14 +90,17 @@ add_executable( ) decorate_executable(isometric_other_test) -add_executable( - isometric_binary_test - src/isometric/binary/DelayedBinaryIsometricOperation.cpp - src/isometric/binary/arithmetic_helpers.cpp - src/isometric/binary/compare_helpers.cpp - src/isometric/binary/boolean_helpers.cpp -) -decorate_executable(isometric_binary_test) +macro(create_isometric_binary_test target) + add_executable( + ${target} + src/isometric/binary/DelayedBinaryIsometricOperation.cpp + src/isometric/binary/arithmetic_helpers.cpp + src/isometric/binary/compare_helpers.cpp + src/isometric/binary/boolean_helpers.cpp + ) + decorate_executable(${target}) +endmacro() +create_isometric_binary_test(isometric_binary_test) add_executable( sparse_test @@ -150,4 +156,10 @@ find_package(OpenMP) if(OpenMP_FOUND) create_partest(omp_test) target_link_libraries(omp_test OpenMP::OpenMP_CXX) + + create_isometric_unary_test(omp_isometric_unary_test) + target_link_libraries(omp_isometric_unary_test OpenMP::OpenMP_CXX) + + create_isometric_binary_test(omp_isometric_binary_test) + target_link_libraries(omp_isometric_binary_test OpenMP::OpenMP_CXX) endif()