From ea718e34859e4d4f2c4e13122fba4eedc32509bd Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:35:32 +0100 Subject: [PATCH] Properly set properties of group_load/group_store to striped (#2238) PR proposes to pass `striped` property into `group_load` and `group_store` calls. It will help to fully replicate the legacy behavior of `sub_group::load`and `sub_group::store`. --- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 23 +++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 7d1bdb738ff..b27b60609b9 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -41,8 +41,11 @@ using dpctl::tensor::kernels::alignment_utils::is_aligned; using dpctl::tensor::kernels::alignment_utils::required_alignment; -using sycl::ext::oneapi::experimental::group_load; -using sycl::ext::oneapi::experimental::group_store; +namespace syclex = sycl::ext::oneapi::experimental; +using syclex::group_load; +using syclex::group_store; + +constexpr auto striped = syclex::properties{syclex::data_placement_striped}; template constexpr T dispatch_erf_op(T elem) @@ -529,8 +532,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) sycl::vec<_DataType_input1, vec_sz> x1{}; \ sycl::vec<_DataType_input2, vec_sz> x2{}; \ \ - group_load(sg, input1_multi_ptr, x1); \ - group_load(sg, input2_multi_ptr, x2); \ + group_load(sg, input1_multi_ptr, x1, striped); \ + group_load(sg, input2_multi_ptr, x2, striped); \ \ res_vec = __vec_operation__; \ } \ @@ -540,8 +543,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) sycl::vec<_DataType_input1, vec_sz> tmp_x1{}; \ sycl::vec<_DataType_input2, vec_sz> tmp_x2{}; \ \ - group_load(sg, input1_multi_ptr, tmp_x1); \ - group_load(sg, input2_multi_ptr, tmp_x2); \ + group_load(sg, input1_multi_ptr, tmp_x1, \ + striped); \ + group_load(sg, input2_multi_ptr, tmp_x2, \ + striped); \ \ sycl::vec<_DataType_output, vec_sz> x1 = \ dpnp_vec_cast<_DataType_output, \ @@ -559,8 +564,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) sycl::vec<_DataType_input1, vec_sz> x1{}; \ sycl::vec<_DataType_input2, vec_sz> x2{}; \ \ - group_load(sg, input1_multi_ptr, x1); \ - group_load(sg, input2_multi_ptr, x2); \ + group_load(sg, input1_multi_ptr, x1, striped); \ + group_load(sg, input2_multi_ptr, x2, striped); \ \ for (size_t k = 0; k < vec_sz; ++k) { \ const _DataType_output input1_elem = x1[k]; \ @@ -568,7 +573,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) res_vec[k] = __operation__; \ } \ } \ - group_store(sg, res_vec, result_multi_ptr); \ + group_store(sg, res_vec, result_multi_ptr, striped); \ } \ else { \ for (size_t k = start + sg.get_local_id()[0]; \