From 4ea370c172f0461e53bb4790ace9fee8d6e46cd7 Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Wed, 27 Sep 2023 10:39:16 +0900 Subject: [PATCH] [GPU] Support bfwzyx format in strided_slice. (#20027) * [GPU] Support bfwzyx format in strided_slice. GroundingDino model needs 6dims format. Signed-off-by: hyunback --- .../src/graph/impls/ocl/strided_slice.cpp | 3 +- .../cl_kernels/strided_slice_ref.cl | 121 ++++++-- .../strided_slice_kernel_ref.cpp | 58 ++-- .../dynamic/strided_slice.cpp | 52 ++++ .../test_cases/strided_slice_gpu_test.cpp | 263 ++++++++++++++++++ 5 files changed, 453 insertions(+), 44 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp index e64498f73f709a..ab10c1ff106f9d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp @@ -202,7 +202,8 @@ attach_strided_slice_impl::attach_strided_slice_impl() { auto formats = { format::bfyx, - format::bfzyx + format::bfzyx, + format::bfwzyx, }; implementation_map::add(impl_types::ocl, diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl index 23344ae06222ad..49e554703b8b74 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl @@ -8,7 +8,7 @@ inline void FUNC(get_slice_step)(OPTIONAL_SHAPE_INFO_ARG const __global STRIDE_TYPE* stride, int* step_batch, int* step_feature, - int* step_z, int* step_y, int* step_x) + int* step_w, int* step_z, int* step_y, int* step_x) { const uint batch_index = 0; const uint feature_index = 1; @@ -19,13 +19,23 @@ inline void FUNC(get_slice_step)(OPTIONAL_SHAPE_INFO_ARG const uint z_index = 2; const uint y_index = 3; const uint x_index = 4; +#elif OUTPUT_LAYOUT_BFWZYX + const uint w_index = 2; + const uint z_index = 3; + const uint y_index = 4; + const uint x_index = 5; #endif *step_batch = batch_index < STRIDE_DIMS ? stride[batch_index] : 1; *step_feature = feature_index < STRIDE_DIMS ? stride[feature_index] : 1; #ifdef OUTPUT_LAYOUT_BFYX + *step_w = 0; *step_z = 0; #elif OUTPUT_LAYOUT_BFZYX + *step_w = 0; + *step_z = z_index < STRIDE_DIMS ? stride[z_index] : 1; +#elif OUTPUT_LAYOUT_BFWZYX + *step_w = w_index < STRIDE_DIMS ? stride[w_index] : 1; *step_z = z_index < STRIDE_DIMS ? stride[z_index] : 1; #endif *step_y = y_index < STRIDE_DIMS ? stride[y_index] : 1; @@ -50,10 +60,11 @@ inline int FUNC(check_end_bound)(const end_num, inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG const __global END_TYPE* end, int* end_batch, int* end_feature, - int* end_z, int* end_y, int* end_x) + int* end_w, int* end_z, int* end_y, int* end_x) { const uint out_batch_num = INPUT0_BATCH_NUM; const uint out_feature_num = INPUT0_FEATURE_NUM; + const uint out_w_num = INPUT0_SIZE_W; const uint out_z_num = INPUT0_SIZE_Z; const uint out_y_num = INPUT0_SIZE_Y; const uint out_x_num = INPUT0_SIZE_X; @@ -66,10 +77,18 @@ inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG const uint z_index = 2; const uint y_index = 3; const uint x_index = 4; +#elif OUTPUT_LAYOUT_BFWZYX + const uint w_index = 2; + const uint z_index = 3; + const uint y_index = 4; + const uint x_index = 5; #endif END_TYPE batch = batch_index < END_DIMS ? end[batch_index] : 0; END_TYPE feature = feature_index < END_DIMS ? end[feature_index] : 0; -#ifdef OUTPUT_LAYOUT_BFZYX +#ifdef OUTPUT_LAYOUT_BFWZYX + END_TYPE w = w_index < END_DIMS ? end[w_index] : 0; + END_TYPE z = z_index < END_DIMS ? end[z_index] : 0; +#elif OUTPUT_LAYOUT_BFZYX END_TYPE z = z_index < END_DIMS ? end[z_index] : 0; #endif END_TYPE y = y_index < END_DIMS ? end[y_index] : 0; @@ -77,7 +96,10 @@ inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG batch = (END_BATCH == 0) ? batch : TO_END_TYPE(out_batch_num); feature = (END_FEATURE == 0) ? feature : TO_END_TYPE(out_feature_num); -#ifdef OUTPUT_LAYOUT_BFZYX +#ifdef OUTPUT_LAYOUT_BFWZYX + w = (END_W == 0) ? w: TO_END_TYPE(out_w_num); + z = (END_Z == 0) ? z: TO_END_TYPE(out_z_num); +#elif OUTPUT_LAYOUT_BFZYX z = (END_Z == 0) ? z: TO_END_TYPE(out_z_num); #endif y = (END_Y == 0) ? y : TO_END_TYPE(out_y_num); @@ -86,9 +108,13 @@ inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG *end_batch = FUNC_CALL(check_end_bound)(batch, out_batch_num); *end_feature = FUNC_CALL(check_end_bound)(feature, out_feature_num); #ifdef OUTPUT_LAYOUT_BFYX + *end_w = 0; *end_z = 0; #elif OUTPUT_LAYOUT_BFZYX *end_z = FUNC_CALL(check_end_bound)(z, out_z_num); +#elif OUTPUT_LAYOUT_BFWZYX + *end_w = FUNC_CALL(check_end_bound)(w, out_w_num); + *end_z = FUNC_CALL(check_end_bound)(z, out_z_num); #endif *end_y = FUNC_CALL(check_end_bound)(y, out_y_num); *end_x = FUNC_CALL(check_end_bound)(x, out_x_num); @@ -97,16 +123,17 @@ inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG inline void FUNC(check_negative_stride)(OPTIONAL_SHAPE_INFO_ARG const __global END_TYPE* end, const int steps_batch, const int steps_feature, - const int steps_z, const int steps_y, const int steps_x, + const int steps_w, const int steps_z, const int steps_y, const int steps_x, int* begin_batch, int* begin_feature, - int* begin_z, int* begin_y, int* begin_x) + int* begin_w, int* begin_z, int* begin_y, int* begin_x) { - bool is_negative = (steps_batch < 0) || (steps_feature < 0) || (steps_z < 0) || (steps_y < 0) || (steps_x < 0); + bool is_negative = (steps_batch < 0) || (steps_feature < 0) || (steps_w < 0) || (steps_z < 0) || (steps_y < 0) || (steps_x < 0); if (is_negative) { - int end_batch, end_feature, end_z, end_y, end_x; - FUNC_CALL(get_slice_end)(OPTIONAL_SHAPE_INFO_TENSOR end, &end_batch, &end_feature, &end_z, &end_y, &end_x); + int end_batch, end_feature, end_w, end_z, end_y, end_x; + FUNC_CALL(get_slice_end)(OPTIONAL_SHAPE_INFO_TENSOR end, &end_batch, &end_feature, &end_w, &end_z, &end_y, &end_x); const int slice_end_batch = end_batch; const int slice_end_feature = end_feature; + const int slice_end_w = end_w; const int slice_end_z = end_z; const int slice_end_y = end_y; const int slice_end_x = end_x; @@ -115,6 +142,8 @@ inline void FUNC(check_negative_stride)(OPTIONAL_SHAPE_INFO_ARG *begin_batch = slice_end_batch - 1; if ((steps_feature < 0) && (*begin_feature <= slice_end_feature)) *begin_feature = slice_end_feature - 1; + if ((steps_w < 0) && (*begin_w <= slice_end_w)) + *begin_w = slice_end_w - 1; if ((steps_z < 0) && (*begin_z <= slice_end_z)) *begin_z = slice_end_z - 1; if ((steps_y < 0) && (*begin_y <= slice_end_y)) @@ -125,12 +154,13 @@ inline void FUNC(check_negative_stride)(OPTIONAL_SHAPE_INFO_ARG } #else // END_TYPE inline void FUNC(check_negative_stride)(const int steps_batch, const int steps_feature, - const int steps_z, const int steps_y, const int steps_x, + const int steps_w, const int steps_z, const int steps_y, const int steps_x, int* begin_batch, int* begin_feature, - int* begin_z, int* begin_y, int* begin_x) + int* begin_w, int* begin_z, int* begin_y, int* begin_x) { const int slice_end_batch = SLICE_END_BATCH; const int slice_end_feature = SLICE_END_FEATURE; + const int slice_end_w = SLICE_END_W; const int slice_end_z = SLICE_END_Z; const int slice_end_y = SLICE_END_Y; const int slice_end_x = SLICE_END_X; @@ -139,6 +169,8 @@ inline void FUNC(check_negative_stride)(const int steps_batch, const int steps_f *begin_batch = slice_end_batch - 1; if ((steps_feature < 0) && (*begin_feature <= slice_end_feature)) *begin_feature = slice_end_feature - 1; + if ((steps_w < 0) && (*begin_w <= slice_end_w)) + *begin_w = slice_end_w - 1; if ((steps_z < 0) && (*begin_z <= slice_end_z)) *begin_z = slice_end_z - 1; if ((steps_y < 0) && (*begin_y <= slice_end_y)) @@ -165,10 +197,11 @@ inline int FUNC(check_begin_bound)(BEGIN_TYPE begin_num, inline void FUNC(get_slice_begin)(OPTIONAL_SHAPE_INFO_ARG const __global BEGIN_TYPE* begin, int* begin_batch, int* begin_feature, - int* begin_z, int* begin_y, int* begin_x) + int* begin_w, int* begin_z, int* begin_y, int* begin_x) { const uint out_batch_num = INPUT0_BATCH_NUM; const uint out_feature_num = INPUT0_FEATURE_NUM; + const uint out_w_num = INPUT0_SIZE_W; const uint out_z_num = INPUT0_SIZE_Z; const uint out_y_num = INPUT0_SIZE_Y; const uint out_x_num = INPUT0_SIZE_X; @@ -181,10 +214,18 @@ inline void FUNC(get_slice_begin)(OPTIONAL_SHAPE_INFO_ARG const uint z_index = 2; const uint y_index = 3; const uint x_index = 4; +#elif OUTPUT_LAYOUT_BFWZYX + const uint w_index = 2; + const uint z_index = 3; + const uint y_index = 4; + const uint x_index = 5; #endif BEGIN_TYPE batch = batch_index < BEGIN_DIMS ? begin[batch_index] : 0; BEGIN_TYPE feature = feature_index < BEGIN_DIMS ? begin[feature_index] : 0; -#ifdef OUTPUT_LAYOUT_BFZYX +#ifdef OUTPUT_LAYOUT_BFWZYX + BEGIN_TYPE w = w_index < BEGIN_DIMS ? begin[w_index] : 0; + BEGIN_TYPE z = z_index < BEGIN_DIMS ? begin[z_index] : 0; +#elif OUTPUT_LAYOUT_BFZYX BEGIN_TYPE z = z_index < BEGIN_DIMS ? begin[z_index] : 0; #endif BEGIN_TYPE y = y_index < BEGIN_DIMS ? begin[y_index] : 0; @@ -192,7 +233,10 @@ inline void FUNC(get_slice_begin)(OPTIONAL_SHAPE_INFO_ARG batch = (BEGIN_BATCH == 0) ? batch : 0; feature = (BEGIN_FEATURE == 0) ? feature : 0; -#ifdef OUTPUT_LAYOUT_BFZYX +#ifdef OUTPUT_LAYOUT_BFWZYX + w = (BEGIN_W == 0) ? w: 0; + z = (BEGIN_Z == 0) ? z: 0; +#elif OUTPUT_LAYOUT_BFZYX z = (BEGIN_Z == 0) ? z: 0; #endif y = (BEGIN_Y == 0) ? y : 0; @@ -201,8 +245,13 @@ inline void FUNC(get_slice_begin)(OPTIONAL_SHAPE_INFO_ARG *begin_batch = FUNC_CALL(check_begin_bound)(batch, out_batch_num); *begin_feature = FUNC_CALL(check_begin_bound)(feature, out_feature_num); #ifdef OUTPUT_LAYOUT_BFYX + *begin_w = 0; *begin_z = 0; #elif OUTPUT_LAYOUT_BFZYX + *begin_w = 0; + *begin_z = FUNC_CALL(check_begin_bound)(z, out_z_num); +#elif OUTPUT_LAYOUT_BFWZYX + *begin_w = FUNC_CALL(check_begin_bound)(w, out_w_num); *begin_z = FUNC_CALL(check_begin_bound)(z, out_z_num); #endif *begin_y = FUNC_CALL(check_begin_bound)(y, out_y_num); @@ -226,36 +275,40 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG const uint batch = get_global_id(0); const uint feature = get_global_id(1); #ifdef STRIDE_TYPE - int step_batch, step_feature, step_z, step_y, step_x; - FUNC_CALL(get_slice_step)(OPTIONAL_SHAPE_INFO_TENSOR stride, &step_batch, &step_feature, &step_z, &step_y, &step_x); + int step_batch, step_feature, step_w, step_z, step_y, step_x; + FUNC_CALL(get_slice_step)(OPTIONAL_SHAPE_INFO_TENSOR stride, &step_batch, &step_feature, &step_w, &step_z, &step_y, &step_x); const int slice_steps_batch = step_batch; const int slice_steps_feature = step_feature; + const int slice_steps_w = step_w; const int slice_steps_z = step_z; const int slice_steps_y = step_y; const int slice_steps_x = step_x; #else // STRIDE_TYPE const int slice_steps_batch = SLICE_STEPS_BATCH; const int slice_steps_feature = SLICE_STEPS_FEATURE; + const int slice_steps_w = SLICE_STEPS_W; const int slice_steps_z = SLICE_STEPS_Z; const int slice_steps_y = SLICE_STEPS_Y; const int slice_steps_x = SLICE_STEPS_X; #endif // STRIDE_TYPE #ifdef BEGIN_TYPE - int begin_batch, begin_feature, begin_z, begin_y, begin_x; - FUNC_CALL(get_slice_begin)(OPTIONAL_SHAPE_INFO_TENSOR begin, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x); + int begin_batch, begin_feature, begin_w, begin_z, begin_y, begin_x; + FUNC_CALL(get_slice_begin)(OPTIONAL_SHAPE_INFO_TENSOR begin, &begin_batch, &begin_feature, &begin_w, &begin_z, &begin_y, &begin_x); #ifdef END_TYPE - FUNC_CALL(check_negative_stride)(OPTIONAL_SHAPE_INFO_TENSOR end, slice_steps_batch, slice_steps_feature, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x); + FUNC_CALL(check_negative_stride)(OPTIONAL_SHAPE_INFO_TENSOR end, slice_steps_batch, slice_steps_feature, slice_steps_w, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_w, &begin_z, &begin_y, &begin_x); #else // END_TYPE - FUNC_CALL(check_negative_stride)(slice_steps_batch, slice_steps_feature, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x); + FUNC_CALL(check_negative_stride)(slice_steps_batch, slice_steps_feature, slice_steps_w, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_w, &begin_z, &begin_y, &begin_x); #endif // END_TYPE const int slice_begin_batch = begin_batch; const int slice_begin_feature = begin_feature; + const int slice_begin_w = begin_w; const int slice_begin_z = begin_z; const int slice_begin_y = begin_y; const int slice_begin_x = begin_x; #else // BEGIN_TYPE const int slice_begin_batch = SLICE_BEGIN_BATCH; const int slice_begin_feature = SLICE_BEGIN_FEATURE; + const int slice_begin_w = SLICE_BEGIN_W; const int slice_begin_z = SLICE_BEGIN_Z; const int slice_begin_y = SLICE_BEGIN_Y; const int slice_begin_x = SLICE_BEGIN_X; @@ -264,32 +317,51 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG #if NEW_AXIS_MODE // If NEW_AXIS_MODE that just copy input to output #ifdef OUTPUT_LAYOUT_BFYX + const uint w_input = 0; const uint z_input = 0; const uint y_input = (uint)get_global_id(2) / INPUT0_SIZE_X; const uint x_input = (uint)get_global_id(2) % INPUT0_SIZE_X; #elif OUTPUT_LAYOUT_BFZYX + const uint w_input = 0; const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint y_input = yx_input / INPUT0_SIZE_X; const uint x_input = yx_input % INPUT0_SIZE_X; +#elif OUTPUT_LAYOUT_BFWZYX + const uint zyx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); + const uint w_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); + const uint z_input = zyx_input / (INPUT0_SIZE_X * INPUT0_SIZE_Y); + const uint yx_input = zyx_input % (INPUT0_SIZE_X * INPUT0_SIZE_Y); + const uint y_input = yx_input / INPUT0_SIZE_X; + const uint x_input = yx_input % INPUT0_SIZE_X; #endif const uint input_index = INPUT0_OFFSET + batch * INPUT0_BATCH_PITCH + feature * INPUT0_FEATURE_PITCH + + w_input * INPUT0_W_PITCH + z_input * INPUT0_Z_PITCH + y_input * INPUT0_Y_PITCH + x_input * INPUT0_X_PITCH; output[input_index] = input[input_index]; #else // NEW_AXIS_MODE #ifdef OUTPUT_LAYOUT_BFYX + const uint w = 0; const uint z = 0; const uint y = get_global_id(2) / OUTPUT_SIZE_X; const uint x = get_global_id(2) % OUTPUT_SIZE_X; #elif OUTPUT_LAYOUT_BFZYX + const uint w = 0; const uint yx = get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint z = get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint y = yx / OUTPUT_SIZE_X; const uint x = yx % OUTPUT_SIZE_X; +#elif OUTPUT_LAYOUT_BFWZYX + const uint zyx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint w = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint z = zyx / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint yx = zyx % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint y = yx / OUTPUT_SIZE_X; + const uint x = yx % OUTPUT_SIZE_X; #endif #if SHRINK_MODE @@ -297,7 +369,12 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG const uint input_index = INPUT0_OFFSET + (slice_begin_batch + in_indices[0] * slice_steps_batch) * INPUT0_BATCH_PITCH + (slice_begin_feature + in_indices[1] * slice_steps_feature) * INPUT0_FEATURE_PITCH + - #if INPUT0_LAYOUT_BFZYX + #if INPUT0_LAYOUT_BFWZYX + (slice_begin_w + in_indices[2] * slice_steps_w) * INPUT0_W_PITCH + + (slice_begin_z + in_indices[3] * slice_steps_z) * INPUT0_Z_PITCH + + (slice_begin_y + in_indices[4] * slice_steps_y) * INPUT0_Y_PITCH + + (slice_begin_x + in_indices[5] * slice_steps_x) * INPUT0_X_PITCH; + #elif INPUT0_LAYOUT_BFZYX (slice_begin_z + in_indices[2] * slice_steps_z) * INPUT0_Z_PITCH + (slice_begin_y + in_indices[3] * slice_steps_y) * INPUT0_Y_PITCH + (slice_begin_x + in_indices[4] * slice_steps_x) * INPUT0_X_PITCH; @@ -309,6 +386,7 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG const uint input_index = INPUT0_OFFSET + (slice_begin_batch + batch * slice_steps_batch) * INPUT0_BATCH_PITCH + (slice_begin_feature + feature * slice_steps_feature) * INPUT0_FEATURE_PITCH + + (slice_begin_w + w * slice_steps_w) * INPUT0_W_PITCH + (slice_begin_z + z * slice_steps_z) * INPUT0_Z_PITCH + (slice_begin_y + y * slice_steps_y) * INPUT0_Y_PITCH + (slice_begin_x + x * slice_steps_x) * INPUT0_X_PITCH; @@ -317,6 +395,7 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG const uint output_index = OUTPUT_OFFSET + batch * OUTPUT_BATCH_PITCH + feature * OUTPUT_FEATURE_PITCH + + w * OUTPUT_W_PITCH + z * OUTPUT_Z_PITCH + y * OUTPUT_Y_PITCH + x * OUTPUT_X_PITCH; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp index 8c64249302c85f..88e80f0c05851b 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp @@ -14,11 +14,18 @@ static void makeJitConstForParam(JitConstants& jit, const std::string name, cons jit.AddConstant(MakeJitConstant(name + "_SIZES", vec)); jit.AddConstant(MakeJitConstant(name + "_BATCH", vec[0])); jit.AddConstant(MakeJitConstant(name + "_FEATURE", vec[1])); - if (vec.size() == 5) { // BFZYX + if (vec.size() == 6) { // BFWZYX + jit.AddConstant(MakeJitConstant(name + "_W", vec[2])); + jit.AddConstant(MakeJitConstant(name + "_Z", vec[3])); + jit.AddConstant(MakeJitConstant(name + "_Y", vec[4])); + jit.AddConstant(MakeJitConstant(name + "_X", vec[5])); + } else if (vec.size() == 5) { // BFZYX + jit.AddConstant(MakeJitConstant(name + "_W", 0)); jit.AddConstant(MakeJitConstant(name + "_Z", vec[2])); jit.AddConstant(MakeJitConstant(name + "_Y", vec[3])); jit.AddConstant(MakeJitConstant(name + "_X", vec[4])); } else { // BFYX + jit.AddConstant(MakeJitConstant(name + "_W", 0)); jit.AddConstant(MakeJitConstant(name + "_Z", 0)); jit.AddConstant(MakeJitConstant(name + "_Y", vec[2])); jit.AddConstant(MakeJitConstant(name + "_X", vec[3])); @@ -27,7 +34,7 @@ static void makeJitConstForParam(JitConstants& jit, const std::string name, cons static size_t GetUsedOutDimsCount(const strided_slice_params& params) { auto dims = params.outputs[0].GetDims(); - size_t first_non_unit_dim = 0; // order is xy(z)fb, so by default consider that we use all dims + size_t first_non_unit_dim = 0; // order is xy(zw)fb, so by default consider that we use all dims for (size_t i = 0; i < dims.size(); i++) { if (dims[i].v != 1) { break; @@ -70,7 +77,7 @@ bool StridedSliceKernelRef::Validate(const Params& p, const optional_params& o) if (params.inputs.empty()) return false; - if (params.outputs[0].Dimentions() > 5 || params.inputs[0].Dimentions() > 5) + if (params.outputs[0].Dimentions() > 6 || params.inputs[0].Dimentions() > 6) return false; for (auto& fused_op : params.fused_ops) { @@ -86,7 +93,7 @@ bool StridedSliceKernelRef::Validate(const Params& p, const optional_params& o) size_t used_out_dims = GetUsedOutDimsCount(params); // Count of actual output dims + count of shrinked axes shouldn't exceed 5 to be able to find input index correctly - if (used_out_dims + shrinked_axes > 5) { + if (used_out_dims + shrinked_axes > 6) { return false; } } @@ -99,14 +106,15 @@ CommonDispatchData StridedSliceKernelRef::SetDefault(const strided_slice_params& auto out_layout = params.outputs[0].GetLayout(); std::vector> dims_by_gws = {{ Tensor::DataChannelName::BATCH }, { Tensor::DataChannelName::FEATURE }, - { Tensor::DataChannelName::X, Tensor::DataChannelName::Y, Tensor::DataChannelName::Z }}; + { Tensor::DataChannelName::X, Tensor::DataChannelName::Y, + Tensor::DataChannelName::Z, Tensor::DataChannelName::W }}; // If the new_axis_mask is set, then begin, end, and stride are ignored // and a new length 1 dimension is adding. Input data just copying to output // TODO: remove data copying in case where only shape size changing dispatchData.gws = { params.outputs[0].Batch().v, params.outputs[0].Feature().v, - params.outputs[0].Z().v * params.outputs[0].Y().v * params.outputs[0].X().v }; + params.outputs[0].W().v * params.outputs[0].Z().v * params.outputs[0].Y().v * params.outputs[0].X().v }; dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo, in_layout, out_layout, dims_by_gws); @@ -161,33 +169,37 @@ JitConstants StridedSliceKernelRef::GetJitConstants(const strided_slice_params& if (shrink_mode) { jit.AddConstant(MakeJitConstant("SHRINK_MODE", true)); makeJitConstForParam(jit, "SHRINK", params.shrink_axis_mask); - std::vector bfzyx_in_order; - if (params.outputs[0].Dimentions() == 5) - bfzyx_in_order = {"batch", "feature", "z", "y", "x"}; + std::vector bfwzyx_in_order; + if (params.outputs[0].Dimentions() == 6) + bfwzyx_in_order = {"batch", "feature", "w", "z", "y", "x"}; + else if (params.outputs[0].Dimentions() == 5) + bfwzyx_in_order = {"batch", "feature", "z", "y", "x"}; else - bfzyx_in_order = {"batch", "feature", "y", "x"}; + bfwzyx_in_order = {"batch", "feature", "y", "x"}; // Insert zeroes to indices order for shinked axes for (size_t i = 0; i < params.shrink_axis_mask.size(); i++) { if (params.shrink_axis_mask[i] == 1) { - bfzyx_in_order.insert(bfzyx_in_order.begin() + i, "0"); + bfwzyx_in_order.insert(bfwzyx_in_order.begin() + i, "0"); } } - auto get_input_idx_order = [&](std::vector bfzyx_in_order) -> std::string { - return bfzyx_in_order[0] + "," + - bfzyx_in_order[1] + "," + - bfzyx_in_order[2] + "," + - bfzyx_in_order[3] + "," + - bfzyx_in_order[4]; + auto get_input_idx_order = [&](std::vector bfwzyx_in_order) -> std::string { + std::string order = bfwzyx_in_order[0] + "," + + bfwzyx_in_order[1] + "," + + bfwzyx_in_order[2] + "," + + bfwzyx_in_order[3] + "," + + bfwzyx_in_order[4]; + if (bfwzyx_in_order.size() == 6) order += "," + bfwzyx_in_order[5]; + return order; }; - // Erase indices that exceeds 5d tensor. It should be safe, because we check in Validate method that + // Erase indices that exceeds 6d tensor. It should be safe, because we check in Validate method that // shrinked axes don't result in too big dims count - while (bfzyx_in_order.size() > 5) { - bfzyx_in_order.pop_back(); + while (bfwzyx_in_order.size() > 6) { + bfwzyx_in_order.pop_back(); } - jit.AddConstant(MakeJitConstant("INPUT_INDICES_ORDER", get_input_idx_order(bfzyx_in_order))); + jit.AddConstant(MakeJitConstant("INPUT_INDICES_ORDER", get_input_idx_order(bfwzyx_in_order))); } return jit; @@ -211,7 +223,9 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op if (!newParams.fused_ops.empty()) { std::vector idx_order; - if (input.Dimentions() == 5) { + if (input.Dimentions() == 6) { + idx_order = {"b", "f", "w", "z", "y", "x"}; + } else if (input.Dimentions() == 5) { idx_order = {"b", "f", "z", "y", "x"}; } else if (input.Dimentions() == 4) { idx_order = {"b", "f", "y", "x"}; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/strided_slice.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/strided_slice.cpp index d039d644f7f4a6..052d6574d682d7 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/strided_slice.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/strided_slice.cpp @@ -271,5 +271,57 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D, StridedSliceLa ::testing::Values(emptyAdditionalConfig)), StridedSliceLayerGPUTest::getTestCaseName); + +const std::vector testCasesCommon5D = { + StridedSliceParams{ { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 10, 20 }, { 1, 5, 28, 26 }, { 1, 1, 1, 2 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 0, 0, 1 }, { 0, 1, 0, 1 }, { }, { }, { } }, + StridedSliceParams{ { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 }, { }, { }, { } }, +}; + +const std::vector inputShapesDynamic5D = { + {{-1, -1, -1, -1, -1}, + {{ 1, 5, 32, 32, 32 }, { 2, 5, 32, 32, 32 }, { 1, 5, 64, 64, 64 }}}, + + {{1, 64, -1, -1, -1}, + {{ 1, 64, 1, 16, 32 }, { 1, 64, 1, 32, 64 }, { 1, 64, 1, 64, 64 }}}, + + {{1, -1, 16, 32, -1}, + {{ 1, 16, 16, 32, 1 }, { 1, 32, 16, 32, 1 }, { 1, 64, 16, 32, 1 }}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D, StridedSliceLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic5D), + ::testing::ValuesIn(testCasesCommon5D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + StridedSliceLayerGPUTest::getTestCaseName); + + +const std::vector testCasesCommon6D = { + StridedSliceParams{ { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 10, 20 }, { 1, 5, 28, 26 }, { 1, 1, 1, 2 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { }, { }, { } }, +}; + +const std::vector inputShapesDynamic6D = { + {{-1, -1, -1, -1, -1, -1}, + {{ 1, 5, 5, 32, 32, 32 }, { 2, 5, 7, 32, 32, 64 }, { 1, 3, 5, 64, 64, 64 }}}, + + {{1, -1, 16, 32, -1, -1}, + {{ 1, 16, 16, 32, 1, 32 }, { 1, 32, 16, 32, 32, 64 }, { 1, 64, 16, 32, 32, 64 }}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_6D, StridedSliceLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic6D), + ::testing::ValuesIn(testCasesCommon6D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + StridedSliceLayerGPUTest::getTestCaseName); + } // namespace } // namespace GPULayerTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp index 71e193b47abde1..7a0280380b766c 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp @@ -118,6 +118,120 @@ class strided_slice_gpu: public ::testing::Test { } } + void test_2x2x2x2x2_full(bool is_caching_test) { + // Input (BFZYX): 2x2x2x2x2 + // Begin (BFZYX): 0x0x0x0x0 + // End (BFZYX): 2x2x2x2x2 + // Stride (BFZYX): 1x1x1x1x1 + // Output (BFZYX): 2x2x2x2x2 + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2, 2 }, data_types::f32, format::bfzyx }); + + set_values(input, { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, + 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, + }); + std::vector begin_data = { 0, 0, 0, 0, 0 }; + std::vector end_data = { 2, 2, 2, 2, 2 }; + std::vector strides_data = { 1, 1, 1, 1, 1 }; + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2, 2})); + + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); + + network->set_input_data("input", input); + + auto outputs = network->execute(); + + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "strided_slice"); + + auto output = outputs.at("strided_slice").get_memory(); + + std::vector answers = { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, + 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, + }; + + cldnn::mem_lock output_ptr(output, get_test_stream()); + + ASSERT_EQ(output_ptr.size(), answers.size()); + for (size_t i = 0; i < answers.size(); ++i) + { + ASSERT_TRUE(are_equal(answers[i], output_ptr[i])); + } + } + + + void test_2x2x2x2x2x2_full(bool is_caching_test) { + // Input (BFWZYX): 2x2x2x2x2x2 + // Begin (BFWZYX): 0x0x0x0x0x0 + // End (BFWZYX): 2x2x2x2x2x2 + // Stride (BFWZYX): 1x1x1x1x1x1 + // Output (BFWZYX): 2x2x2x2x2x2 + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2, 2, 2 }, data_types::f32, format::bfwzyx }); + + set_values(input, { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, + 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, + 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, + 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, + 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, + }); + std::vector begin_data = { 0, 0, 0, 0, 0, 0 }; + std::vector end_data = { 2, 2, 2, 2, 2, 2 }; + std::vector strides_data = { 1, 1, 1, 1, 1, 1 }; + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2, 2, 2})); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"strided_slice", {format::bfwzyx, "", impl_types::ocl}} })); + + cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + + network->set_input_data("input", input); + + auto outputs = network->execute(); + + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "strided_slice"); + + auto output = outputs.at("strided_slice").get_memory(); + + std::vector answers = { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, + 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, + 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, + 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, + 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, + }; + + cldnn::mem_lock output_ptr(output, get_test_stream()); + + ASSERT_EQ(output_ptr.size(), answers.size()); + for (size_t i = 0; i < answers.size(); ++i) + { + ASSERT_TRUE(are_equal(answers[i], output_ptr[i])); + } + } + void test_2x2x2x2_full_pad(bool is_caching_test) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 0x0x0x0 @@ -601,6 +715,94 @@ class strided_slice_gpu: public ::testing::Test { } } + void test_2x2x2x2x1x1(bool is_caching_test) { + // Input (BFWZYX): 2x2x2x2x1x1 + // Output (BFWZYX): 1x2x2x2x1x1 + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, { 2, 2, 1, 1, 2, 2 }}); + + set_values(input, { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, + 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + }); + std::vector begin_data = { 0, 0, 0, 0, 0, 0 }; + std::vector end_data = { 1, 2, 2, 2, 1, 1 }; + std::vector strides_data = { 1, 1, 1, 1, 1, 1 }; + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 2, 1, 1})); + + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); + + network->set_input_data("input", input); + + auto outputs = network->execute(); + + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "strided_slice"); + + auto output = outputs.at("strided_slice").get_memory(); + + std::vector answers = { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, + }; + + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < answers.size(); ++i) + { + ASSERT_TRUE(are_equal(answers[i], output_ptr[i])); + } + } + + void test_2x2x2x2x1x1_2(bool is_caching_test, impl_types impl_type = impl_types::any) { + // Input (BFWZYX): 2x2x2x2x1x1 + // Output (BFWZYX): 2x1x1x1x1x1 + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, { 2, 2, 1, 1, 2, 2 } }); + + set_values(input, { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, + 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + }); + std::vector begin_data = { 0, 0, 0, 0 }; + std::vector end_data = { 2, 2, 2, 2 }; + std::vector strides_data = { 1, 2, 2, 2 }; + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1, 1})); + + auto config = get_test_default_config(engine); + if (impl_type != impl_types::any) + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"strided_slice", {format::bfwzyx, "", impl_types::cpu}} })); + + cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + + network->set_input_data("input", input); + + auto outputs = network->execute(); + + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "strided_slice"); + + auto output = outputs.at("strided_slice").get_memory(); + + std::vector answers = { + 0.0f, 8.0f, + }; + + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < answers.size(); ++i) + { + ASSERT_TRUE(are_equal(answers[i], output_ptr[i])); + } + } + void test_2x2x2x2_full_negative_stride(bool is_caching_test) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 0x0x0x0 @@ -1933,6 +2135,47 @@ class strided_slice_gpu_i8: public ::testing::Test { ASSERT_TRUE(are_equal(answers[i], output_ptr[i])); } } + + void test_2x2x2x2x1x1(bool is_caching_test) { + // Input (BFWZYX): 2x2x2x2x1x1 + // Output (BFWZYX): 1x2x2x2x1x1 + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::i8, format::bfwzyx, { 2, 2, 1, 1, 2, 2 } }); + + set_values(input, { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + }); + std::vector begin_data = { 0, 0, 0 }; + std::vector end_data = { 1, 2, 2 }; + std::vector strides_data = { 1, 1, 1 }; + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 2, 1, 1})); + + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); + + network->set_input_data("input", input); + + auto outputs = network->execute(); + + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "strided_slice"); + + auto output = outputs.at("strided_slice").get_memory(); + + std::vector answers = { + 0, 1, 2, 3, 4, 5, 6, 7, + }; + + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < answers.size(); ++i) { + ASSERT_TRUE(are_equal(answers[i], output_ptr[i])); + } + } }; class strided_slice_gpu_f32_i32: public ::testing::Test { @@ -1999,6 +2242,14 @@ TEST_F(strided_slice_gpu_constants, test_2x2x2x2_full) { this->test_2x2x2x2_full(false); } +TEST_F(strided_slice_gpu, test_2x2x2x2x2_full) { + this->test_2x2x2x2x2_full(false); +} + +TEST_F(strided_slice_gpu, test_2x2x2x2x2x2_full) { + this->test_2x2x2x2x2x2_full(false); +} + TEST_F(strided_slice_gpu, test_2x2x2x2_full_pad) { this->test_2x2x2x2_full_pad(false); } @@ -2087,6 +2338,18 @@ TEST_F(strided_slice_gpu_constants, test_2x2x2x1x1_2) { this->test_2x2x2x1x1_2(false); } +TEST_F(strided_slice_gpu, test_2x2x2x2x1x1) { + this->test_2x2x2x2x1x1(false); +} + +TEST_F(strided_slice_gpu, test_2x2x2x2x1x1_2) { + this->test_2x2x2x2x1x1_2(false); +} + +TEST_F(strided_slice_gpu_i8, test_2x2x2x2x1x1) { + this->test_2x2x2x2x1x1(false); +} + TEST_F(strided_slice_gpu_f32_i32, test_1x1x1x8x1_new_axis_5d) { this->test_1x1x1x8x1_new_axis_5d(false); }