From 4ea370c172f0461e53bb4790ace9fee8d6e46cd7 Mon Sep 17 00:00:00 2001
From: hyunback kim <hyunback.kim@intel.com>
Date: Wed, 27 Sep 2023 10:39:16 +0900
Subject: [PATCH] [GPU] Support bfwzyx format in strided_slice. (#20027)

* [GPU] Support bfwzyx format in strided_slice.

GroundingDino model needs 6dims format.

Signed-off-by: hyunback <hyunback.kim@intel.com>
---
 .../src/graph/impls/ocl/strided_slice.cpp     |   3 +-
 .../cl_kernels/strided_slice_ref.cl           | 121 ++++++--
 .../strided_slice_kernel_ref.cpp              |  58 ++--
 .../dynamic/strided_slice.cpp                 |  52 ++++
 .../test_cases/strided_slice_gpu_test.cpp     | 263 ++++++++++++++++++
 5 files changed, 453 insertions(+), 44 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
index e64498f73f709a..ab10c1ff106f9d 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
@@ -202,7 +202,8 @@ attach_strided_slice_impl::attach_strided_slice_impl() {
 
     auto formats = {
         format::bfyx,
-        format::bfzyx
+        format::bfzyx,
+        format::bfwzyx,
     };
 
     implementation_map<strided_slice>::add(impl_types::ocl,
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl
index 23344ae06222ad..49e554703b8b74 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl
@@ -8,7 +8,7 @@
 inline void FUNC(get_slice_step)(OPTIONAL_SHAPE_INFO_ARG
                                  const __global STRIDE_TYPE* stride,
                                  int* step_batch, int* step_feature,
-                                 int* step_z, int* step_y, int* step_x)
+                                 int* step_w, int* step_z, int* step_y, int* step_x)
 {
     const uint batch_index = 0;
     const uint feature_index = 1;
@@ -19,13 +19,23 @@ inline void FUNC(get_slice_step)(OPTIONAL_SHAPE_INFO_ARG
     const uint z_index = 2;
     const uint y_index = 3;
     const uint x_index = 4;
+#elif OUTPUT_LAYOUT_BFWZYX
+    const uint w_index = 2;
+    const uint z_index = 3;
+    const uint y_index = 4;
+    const uint x_index = 5;
 #endif
 
     *step_batch = batch_index < STRIDE_DIMS ? stride[batch_index] : 1;
     *step_feature = feature_index < STRIDE_DIMS ? stride[feature_index] : 1;
 #ifdef OUTPUT_LAYOUT_BFYX
+    *step_w = 0;
     *step_z = 0;
 #elif OUTPUT_LAYOUT_BFZYX
+    *step_w = 0;
+    *step_z = z_index < STRIDE_DIMS ? stride[z_index] : 1;
+#elif OUTPUT_LAYOUT_BFWZYX
+    *step_w = w_index < STRIDE_DIMS ? stride[w_index] : 1;
     *step_z = z_index < STRIDE_DIMS ? stride[z_index] : 1;
 #endif
     *step_y = y_index < STRIDE_DIMS ? stride[y_index] : 1;
@@ -50,10 +60,11 @@ inline int FUNC(check_end_bound)(const end_num,
 inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG
                                 const __global END_TYPE* end,
                                 int* end_batch, int* end_feature,
-                                int* end_z, int* end_y, int* end_x)
+                                int* end_w, int* end_z, int* end_y, int* end_x)
 {
     const uint out_batch_num = INPUT0_BATCH_NUM;
     const uint out_feature_num = INPUT0_FEATURE_NUM;
+    const uint out_w_num = INPUT0_SIZE_W;
     const uint out_z_num = INPUT0_SIZE_Z;
     const uint out_y_num = INPUT0_SIZE_Y;
     const uint out_x_num = INPUT0_SIZE_X;
@@ -66,10 +77,18 @@ inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG
     const uint z_index = 2;
     const uint y_index = 3;
     const uint x_index = 4;
+#elif OUTPUT_LAYOUT_BFWZYX
+    const uint w_index = 2;
+    const uint z_index = 3;
+    const uint y_index = 4;
+    const uint x_index = 5;
 #endif
     END_TYPE batch = batch_index < END_DIMS ? end[batch_index] : 0;
     END_TYPE feature = feature_index < END_DIMS ? end[feature_index] : 0;
-#ifdef OUTPUT_LAYOUT_BFZYX
+#ifdef OUTPUT_LAYOUT_BFWZYX
+    END_TYPE w = w_index < END_DIMS ? end[w_index] : 0;
+    END_TYPE z = z_index < END_DIMS ? end[z_index] : 0;
+#elif OUTPUT_LAYOUT_BFZYX
     END_TYPE z = z_index < END_DIMS ? end[z_index] : 0;
 #endif
     END_TYPE y = y_index < END_DIMS ? end[y_index] : 0;
@@ -77,7 +96,10 @@ inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG
 
     batch = (END_BATCH == 0) ? batch : TO_END_TYPE(out_batch_num);
     feature = (END_FEATURE == 0) ? feature : TO_END_TYPE(out_feature_num);
-#ifdef OUTPUT_LAYOUT_BFZYX
+#ifdef OUTPUT_LAYOUT_BFWZYX
+    w = (END_W == 0) ? w: TO_END_TYPE(out_w_num);
+    z = (END_Z == 0) ? z: TO_END_TYPE(out_z_num);
+#elif OUTPUT_LAYOUT_BFZYX
     z = (END_Z == 0) ? z: TO_END_TYPE(out_z_num);
 #endif
     y = (END_Y == 0) ? y : TO_END_TYPE(out_y_num);
@@ -86,9 +108,13 @@ inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG
     *end_batch = FUNC_CALL(check_end_bound)(batch, out_batch_num);
     *end_feature = FUNC_CALL(check_end_bound)(feature, out_feature_num);
 #ifdef OUTPUT_LAYOUT_BFYX
+    *end_w = 0;
     *end_z = 0;
 #elif OUTPUT_LAYOUT_BFZYX
     *end_z = FUNC_CALL(check_end_bound)(z, out_z_num);
+#elif OUTPUT_LAYOUT_BFWZYX
+    *end_w = FUNC_CALL(check_end_bound)(w, out_w_num);
+    *end_z = FUNC_CALL(check_end_bound)(z, out_z_num);
 #endif
     *end_y = FUNC_CALL(check_end_bound)(y, out_y_num);
     *end_x = FUNC_CALL(check_end_bound)(x, out_x_num);
@@ -97,16 +123,17 @@ inline void FUNC(get_slice_end)(OPTIONAL_SHAPE_INFO_ARG
 inline void FUNC(check_negative_stride)(OPTIONAL_SHAPE_INFO_ARG
                                         const __global END_TYPE* end,
                                         const int steps_batch, const int steps_feature,
-                                        const int steps_z, const int steps_y, const int steps_x,
+                                        const int steps_w, const int steps_z, const int steps_y, const int steps_x,
                                         int* begin_batch, int* begin_feature,
-                                        int* begin_z, int* begin_y, int* begin_x)
+                                        int* begin_w, int* begin_z, int* begin_y, int* begin_x)
 {
-    bool is_negative = (steps_batch < 0) || (steps_feature < 0) || (steps_z < 0) || (steps_y < 0) || (steps_x < 0);
+    bool is_negative = (steps_batch < 0) || (steps_feature < 0) || (steps_w < 0) || (steps_z < 0) || (steps_y < 0) || (steps_x < 0);
     if (is_negative) {
-        int end_batch, end_feature, end_z, end_y, end_x;
-        FUNC_CALL(get_slice_end)(OPTIONAL_SHAPE_INFO_TENSOR end, &end_batch, &end_feature, &end_z, &end_y, &end_x);
+        int end_batch, end_feature, end_w, end_z, end_y, end_x;
+        FUNC_CALL(get_slice_end)(OPTIONAL_SHAPE_INFO_TENSOR end, &end_batch, &end_feature, &end_w, &end_z, &end_y, &end_x);
         const int slice_end_batch = end_batch;
         const int slice_end_feature = end_feature;
+        const int slice_end_w = end_w;
         const int slice_end_z = end_z;
         const int slice_end_y = end_y;
         const int slice_end_x = end_x;
@@ -115,6 +142,8 @@ inline void FUNC(check_negative_stride)(OPTIONAL_SHAPE_INFO_ARG
             *begin_batch = slice_end_batch - 1;
         if ((steps_feature < 0) && (*begin_feature <= slice_end_feature))
             *begin_feature = slice_end_feature - 1;
+        if ((steps_w < 0) && (*begin_w <= slice_end_w))
+            *begin_w = slice_end_w - 1;
         if ((steps_z < 0) && (*begin_z <= slice_end_z))
             *begin_z = slice_end_z - 1;
         if ((steps_y < 0) && (*begin_y <= slice_end_y))
@@ -125,12 +154,13 @@ inline void FUNC(check_negative_stride)(OPTIONAL_SHAPE_INFO_ARG
 }
 #else // END_TYPE
 inline void FUNC(check_negative_stride)(const int steps_batch, const int steps_feature,
-                                        const int steps_z, const int steps_y, const int steps_x,
+                                        const int steps_w, const int steps_z, const int steps_y, const int steps_x,
                                         int* begin_batch, int* begin_feature,
-                                        int* begin_z, int* begin_y, int* begin_x)
+                                        int* begin_w, int* begin_z, int* begin_y, int* begin_x)
 {
     const int slice_end_batch = SLICE_END_BATCH;
     const int slice_end_feature = SLICE_END_FEATURE;
+    const int slice_end_w = SLICE_END_W;
     const int slice_end_z = SLICE_END_Z;
     const int slice_end_y = SLICE_END_Y;
     const int slice_end_x = SLICE_END_X;
@@ -139,6 +169,8 @@ inline void FUNC(check_negative_stride)(const int steps_batch, const int steps_f
         *begin_batch = slice_end_batch - 1;
     if ((steps_feature < 0) && (*begin_feature <= slice_end_feature))
         *begin_feature = slice_end_feature - 1;
+    if ((steps_w < 0) && (*begin_w <= slice_end_w))
+        *begin_w = slice_end_w - 1;
     if ((steps_z < 0) && (*begin_z <= slice_end_z))
         *begin_z = slice_end_z - 1;
     if ((steps_y < 0) && (*begin_y <= slice_end_y))
@@ -165,10 +197,11 @@ inline int FUNC(check_begin_bound)(BEGIN_TYPE begin_num,
 inline void FUNC(get_slice_begin)(OPTIONAL_SHAPE_INFO_ARG
                                   const __global BEGIN_TYPE* begin,
                                   int* begin_batch, int* begin_feature,
-                                  int* begin_z, int* begin_y, int* begin_x)
+                                  int* begin_w, int* begin_z, int* begin_y, int* begin_x)
 {
     const uint out_batch_num = INPUT0_BATCH_NUM;
     const uint out_feature_num = INPUT0_FEATURE_NUM;
+    const uint out_w_num = INPUT0_SIZE_W;
     const uint out_z_num = INPUT0_SIZE_Z;
     const uint out_y_num = INPUT0_SIZE_Y;
     const uint out_x_num = INPUT0_SIZE_X;
@@ -181,10 +214,18 @@ inline void FUNC(get_slice_begin)(OPTIONAL_SHAPE_INFO_ARG
     const uint z_index = 2;
     const uint y_index = 3;
     const uint x_index = 4;
+#elif OUTPUT_LAYOUT_BFWZYX
+    const uint w_index = 2;
+    const uint z_index = 3;
+    const uint y_index = 4;
+    const uint x_index = 5;
 #endif
     BEGIN_TYPE batch = batch_index < BEGIN_DIMS ? begin[batch_index] : 0;
     BEGIN_TYPE feature = feature_index < BEGIN_DIMS ? begin[feature_index] : 0;
-#ifdef OUTPUT_LAYOUT_BFZYX
+#ifdef OUTPUT_LAYOUT_BFWZYX
+    BEGIN_TYPE w = w_index < BEGIN_DIMS ? begin[w_index] : 0;
+    BEGIN_TYPE z = z_index < BEGIN_DIMS ? begin[z_index] : 0;
+#elif OUTPUT_LAYOUT_BFZYX
     BEGIN_TYPE z = z_index < BEGIN_DIMS ? begin[z_index] : 0;
 #endif
     BEGIN_TYPE y = y_index < BEGIN_DIMS ? begin[y_index] : 0;
@@ -192,7 +233,10 @@ inline void FUNC(get_slice_begin)(OPTIONAL_SHAPE_INFO_ARG
 
     batch = (BEGIN_BATCH == 0) ? batch : 0;
     feature = (BEGIN_FEATURE == 0) ? feature : 0;
-#ifdef OUTPUT_LAYOUT_BFZYX
+#ifdef OUTPUT_LAYOUT_BFWZYX
+    w = (BEGIN_W == 0) ? w: 0;
+    z = (BEGIN_Z == 0) ? z: 0;
+#elif OUTPUT_LAYOUT_BFZYX
     z = (BEGIN_Z == 0) ? z: 0;
 #endif
     y = (BEGIN_Y == 0) ? y : 0;
@@ -201,8 +245,13 @@ inline void FUNC(get_slice_begin)(OPTIONAL_SHAPE_INFO_ARG
     *begin_batch = FUNC_CALL(check_begin_bound)(batch, out_batch_num);
     *begin_feature = FUNC_CALL(check_begin_bound)(feature, out_feature_num);
 #ifdef OUTPUT_LAYOUT_BFYX
+    *begin_w = 0;
     *begin_z = 0;
 #elif OUTPUT_LAYOUT_BFZYX
+    *begin_w = 0;
+    *begin_z = FUNC_CALL(check_begin_bound)(z, out_z_num);
+#elif OUTPUT_LAYOUT_BFWZYX
+    *begin_w = FUNC_CALL(check_begin_bound)(w, out_w_num);
     *begin_z = FUNC_CALL(check_begin_bound)(z, out_z_num);
 #endif
     *begin_y = FUNC_CALL(check_begin_bound)(y, out_y_num);
@@ -226,36 +275,40 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG
     const uint batch = get_global_id(0);
     const uint feature = get_global_id(1);
 #ifdef STRIDE_TYPE
-    int step_batch, step_feature, step_z, step_y, step_x;
-    FUNC_CALL(get_slice_step)(OPTIONAL_SHAPE_INFO_TENSOR stride, &step_batch, &step_feature, &step_z, &step_y, &step_x);
+    int step_batch, step_feature, step_w, step_z, step_y, step_x;
+    FUNC_CALL(get_slice_step)(OPTIONAL_SHAPE_INFO_TENSOR stride, &step_batch, &step_feature, &step_w, &step_z, &step_y, &step_x);
     const int slice_steps_batch = step_batch;
     const int slice_steps_feature = step_feature;
+    const int slice_steps_w = step_w;
     const int slice_steps_z = step_z;
     const int slice_steps_y = step_y;
     const int slice_steps_x = step_x;
 #else // STRIDE_TYPE
     const int slice_steps_batch = SLICE_STEPS_BATCH;
     const int slice_steps_feature = SLICE_STEPS_FEATURE;
+    const int slice_steps_w = SLICE_STEPS_W;
     const int slice_steps_z = SLICE_STEPS_Z;
     const int slice_steps_y = SLICE_STEPS_Y;
     const int slice_steps_x = SLICE_STEPS_X;
 #endif // STRIDE_TYPE
 #ifdef BEGIN_TYPE
-    int begin_batch, begin_feature, begin_z, begin_y, begin_x;
-    FUNC_CALL(get_slice_begin)(OPTIONAL_SHAPE_INFO_TENSOR begin, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x);
+    int begin_batch, begin_feature, begin_w, begin_z, begin_y, begin_x;
+    FUNC_CALL(get_slice_begin)(OPTIONAL_SHAPE_INFO_TENSOR begin, &begin_batch, &begin_feature, &begin_w, &begin_z, &begin_y, &begin_x);
 #ifdef END_TYPE
-    FUNC_CALL(check_negative_stride)(OPTIONAL_SHAPE_INFO_TENSOR end, slice_steps_batch, slice_steps_feature, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x);
+    FUNC_CALL(check_negative_stride)(OPTIONAL_SHAPE_INFO_TENSOR end, slice_steps_batch, slice_steps_feature, slice_steps_w, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_w, &begin_z, &begin_y, &begin_x);
 #else // END_TYPE
-    FUNC_CALL(check_negative_stride)(slice_steps_batch, slice_steps_feature, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x);
+    FUNC_CALL(check_negative_stride)(slice_steps_batch, slice_steps_feature, slice_steps_w, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_w, &begin_z, &begin_y, &begin_x);
 #endif // END_TYPE
     const int slice_begin_batch = begin_batch;
     const int slice_begin_feature = begin_feature;
+    const int slice_begin_w = begin_w;
     const int slice_begin_z = begin_z;
     const int slice_begin_y = begin_y;
     const int slice_begin_x = begin_x;
 #else // BEGIN_TYPE
     const int slice_begin_batch = SLICE_BEGIN_BATCH;
     const int slice_begin_feature = SLICE_BEGIN_FEATURE;
+    const int slice_begin_w = SLICE_BEGIN_W;
     const int slice_begin_z = SLICE_BEGIN_Z;
     const int slice_begin_y = SLICE_BEGIN_Y;
     const int slice_begin_x = SLICE_BEGIN_X;
@@ -264,32 +317,51 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG
 #if NEW_AXIS_MODE
     // If NEW_AXIS_MODE that just copy input to output
 #ifdef OUTPUT_LAYOUT_BFYX
+    const uint w_input = 0;
     const uint z_input = 0;
     const uint y_input = (uint)get_global_id(2) / INPUT0_SIZE_X;
     const uint x_input = (uint)get_global_id(2) % INPUT0_SIZE_X;
 #elif OUTPUT_LAYOUT_BFZYX
+    const uint w_input = 0;
     const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y);
     const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y);
     const uint y_input = yx_input / INPUT0_SIZE_X;
     const uint x_input = yx_input % INPUT0_SIZE_X;
+#elif OUTPUT_LAYOUT_BFWZYX
+    const uint zyx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z);
+    const uint w_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z);
+    const uint z_input = zyx_input / (INPUT0_SIZE_X * INPUT0_SIZE_Y);
+    const uint yx_input = zyx_input % (INPUT0_SIZE_X * INPUT0_SIZE_Y);
+    const uint y_input = yx_input / INPUT0_SIZE_X;
+    const uint x_input = yx_input % INPUT0_SIZE_X;
 #endif
     const uint input_index = INPUT0_OFFSET +
         batch * INPUT0_BATCH_PITCH +
         feature * INPUT0_FEATURE_PITCH +
+        w_input * INPUT0_W_PITCH +
         z_input * INPUT0_Z_PITCH +
         y_input * INPUT0_Y_PITCH +
         x_input * INPUT0_X_PITCH;
     output[input_index] = input[input_index];
 #else // NEW_AXIS_MODE
 #ifdef OUTPUT_LAYOUT_BFYX
+    const uint w = 0;
     const uint z = 0;
     const uint y = get_global_id(2) / OUTPUT_SIZE_X;
     const uint x = get_global_id(2) % OUTPUT_SIZE_X;
 #elif OUTPUT_LAYOUT_BFZYX
+    const uint w = 0;
     const uint yx = get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
     const uint z = get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
     const uint y = yx / OUTPUT_SIZE_X;
     const uint x = yx % OUTPUT_SIZE_X;
+#elif OUTPUT_LAYOUT_BFWZYX
+    const uint zyx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z);
+    const uint w = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z);
+    const uint z = zyx / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+    const uint yx = zyx % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+    const uint y = yx / OUTPUT_SIZE_X;
+    const uint x = yx % OUTPUT_SIZE_X;
 #endif
 
 #if SHRINK_MODE
@@ -297,7 +369,12 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG
     const uint input_index = INPUT0_OFFSET +
         (slice_begin_batch + in_indices[0] * slice_steps_batch) * INPUT0_BATCH_PITCH +
         (slice_begin_feature + in_indices[1] * slice_steps_feature) * INPUT0_FEATURE_PITCH +
-    #if INPUT0_LAYOUT_BFZYX
+    #if INPUT0_LAYOUT_BFWZYX
+        (slice_begin_w + in_indices[2] * slice_steps_w) * INPUT0_W_PITCH +
+        (slice_begin_z + in_indices[3] * slice_steps_z) * INPUT0_Z_PITCH +
+        (slice_begin_y + in_indices[4] * slice_steps_y) * INPUT0_Y_PITCH +
+        (slice_begin_x + in_indices[5] * slice_steps_x) * INPUT0_X_PITCH;
+    #elif INPUT0_LAYOUT_BFZYX
         (slice_begin_z + in_indices[2] * slice_steps_z) * INPUT0_Z_PITCH +
         (slice_begin_y + in_indices[3] * slice_steps_y) * INPUT0_Y_PITCH +
         (slice_begin_x + in_indices[4] * slice_steps_x) * INPUT0_X_PITCH;
@@ -309,6 +386,7 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG
     const uint input_index = INPUT0_OFFSET +
             (slice_begin_batch + batch * slice_steps_batch) * INPUT0_BATCH_PITCH +
             (slice_begin_feature + feature * slice_steps_feature) * INPUT0_FEATURE_PITCH +
+            (slice_begin_w + w * slice_steps_w) * INPUT0_W_PITCH +
             (slice_begin_z + z * slice_steps_z) * INPUT0_Z_PITCH +
             (slice_begin_y + y * slice_steps_y) * INPUT0_Y_PITCH +
             (slice_begin_x + x * slice_steps_x) * INPUT0_X_PITCH;
@@ -317,6 +395,7 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG
     const uint output_index = OUTPUT_OFFSET +
             batch * OUTPUT_BATCH_PITCH +
             feature * OUTPUT_FEATURE_PITCH +
+            w * OUTPUT_W_PITCH +
             z * OUTPUT_Z_PITCH +
             y * OUTPUT_Y_PITCH +
             x * OUTPUT_X_PITCH;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
index 8c64249302c85f..88e80f0c05851b 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
@@ -14,11 +14,18 @@ static void makeJitConstForParam(JitConstants& jit, const std::string name, cons
     jit.AddConstant(MakeJitConstant(name + "_SIZES", vec));
     jit.AddConstant(MakeJitConstant(name + "_BATCH", vec[0]));
     jit.AddConstant(MakeJitConstant(name + "_FEATURE", vec[1]));
-    if (vec.size() == 5) {  // BFZYX
+    if (vec.size() == 6) {  // BFWZYX
+        jit.AddConstant(MakeJitConstant(name + "_W", vec[2]));
+        jit.AddConstant(MakeJitConstant(name + "_Z", vec[3]));
+        jit.AddConstant(MakeJitConstant(name + "_Y", vec[4]));
+        jit.AddConstant(MakeJitConstant(name + "_X", vec[5]));
+    } else if (vec.size() == 5) {  // BFZYX
+        jit.AddConstant(MakeJitConstant(name + "_W", 0));
         jit.AddConstant(MakeJitConstant(name + "_Z", vec[2]));
         jit.AddConstant(MakeJitConstant(name + "_Y", vec[3]));
         jit.AddConstant(MakeJitConstant(name + "_X", vec[4]));
     } else {  // BFYX
+        jit.AddConstant(MakeJitConstant(name + "_W", 0));
         jit.AddConstant(MakeJitConstant(name + "_Z", 0));
         jit.AddConstant(MakeJitConstant(name + "_Y", vec[2]));
         jit.AddConstant(MakeJitConstant(name + "_X", vec[3]));
@@ -27,7 +34,7 @@ static void makeJitConstForParam(JitConstants& jit, const std::string name, cons
 
 static size_t GetUsedOutDimsCount(const strided_slice_params& params) {
     auto dims = params.outputs[0].GetDims();
-    size_t first_non_unit_dim = 0; // order is xy(z)fb, so by default consider that we use all dims
+    size_t first_non_unit_dim = 0; // order is xy(zw)fb, so by default consider that we use all dims
     for (size_t i = 0; i < dims.size(); i++) {
         if (dims[i].v != 1) {
             break;
@@ -70,7 +77,7 @@ bool StridedSliceKernelRef::Validate(const Params& p, const optional_params& o)
     if (params.inputs.empty())
         return false;
 
-    if (params.outputs[0].Dimentions() > 5 || params.inputs[0].Dimentions() > 5)
+    if (params.outputs[0].Dimentions() > 6 || params.inputs[0].Dimentions() > 6)
         return false;
 
     for (auto& fused_op : params.fused_ops) {
@@ -86,7 +93,7 @@ bool StridedSliceKernelRef::Validate(const Params& p, const optional_params& o)
         size_t used_out_dims = GetUsedOutDimsCount(params);
 
         // Count of actual output dims + count of shrinked axes shouldn't exceed 5 to be able to find input index correctly
-        if (used_out_dims + shrinked_axes > 5) {
+        if (used_out_dims + shrinked_axes > 6) {
             return false;
         }
     }
@@ -99,14 +106,15 @@ CommonDispatchData StridedSliceKernelRef::SetDefault(const strided_slice_params&
     auto out_layout = params.outputs[0].GetLayout();
     std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws = {{ Tensor::DataChannelName::BATCH },
                                                                      { Tensor::DataChannelName::FEATURE },
-                                                                     { Tensor::DataChannelName::X, Tensor::DataChannelName::Y, Tensor::DataChannelName::Z }};
+                                                                     { Tensor::DataChannelName::X, Tensor::DataChannelName::Y,
+                                                                       Tensor::DataChannelName::Z, Tensor::DataChannelName::W }};
 
     // If the new_axis_mask is set, then begin, end, and stride are ignored
     // and a new length 1 dimension is adding. Input data just copying to output
     // TODO: remove data copying in case where only shape size changing
     dispatchData.gws = { params.outputs[0].Batch().v,
                          params.outputs[0].Feature().v,
-                         params.outputs[0].Z().v * params.outputs[0].Y().v * params.outputs[0].X().v };
+                         params.outputs[0].W().v * params.outputs[0].Z().v * params.outputs[0].Y().v * params.outputs[0].X().v };
 
     dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo, in_layout, out_layout, dims_by_gws);
 
@@ -161,33 +169,37 @@ JitConstants StridedSliceKernelRef::GetJitConstants(const strided_slice_params&
     if (shrink_mode) {
         jit.AddConstant(MakeJitConstant("SHRINK_MODE", true));
         makeJitConstForParam(jit, "SHRINK", params.shrink_axis_mask);
-        std::vector<std::string> bfzyx_in_order;
-        if (params.outputs[0].Dimentions() == 5)
-            bfzyx_in_order = {"batch", "feature", "z", "y", "x"};
+        std::vector<std::string> bfwzyx_in_order;
+        if (params.outputs[0].Dimentions() == 6)
+            bfwzyx_in_order = {"batch", "feature", "w", "z", "y", "x"};
+        else if (params.outputs[0].Dimentions() == 5)
+            bfwzyx_in_order = {"batch", "feature", "z", "y", "x"};
         else
-            bfzyx_in_order = {"batch", "feature", "y", "x"};
+            bfwzyx_in_order = {"batch", "feature", "y", "x"};
 
         // Insert zeroes to indices order for shinked axes
         for (size_t i = 0; i < params.shrink_axis_mask.size(); i++) {
             if (params.shrink_axis_mask[i] == 1) {
-                bfzyx_in_order.insert(bfzyx_in_order.begin() + i, "0");
+                bfwzyx_in_order.insert(bfwzyx_in_order.begin() + i, "0");
             }
         }
 
-        auto get_input_idx_order = [&](std::vector<std::string> bfzyx_in_order) -> std::string {
-            return bfzyx_in_order[0] + "," +
-                   bfzyx_in_order[1] + "," +
-                   bfzyx_in_order[2] + "," +
-                   bfzyx_in_order[3] + "," +
-                   bfzyx_in_order[4];
+        auto get_input_idx_order = [&](std::vector<std::string> bfwzyx_in_order) -> std::string {
+            std::string order = bfwzyx_in_order[0] + "," +
+                                bfwzyx_in_order[1] + "," +
+                                bfwzyx_in_order[2] + "," +
+                                bfwzyx_in_order[3] + "," +
+                                bfwzyx_in_order[4];
+            if (bfwzyx_in_order.size() == 6) order += "," + bfwzyx_in_order[5];
+            return order;
         };
-        // Erase indices that exceeds 5d tensor. It should be safe, because we check in Validate method that
+        // Erase indices that exceeds 6d tensor. It should be safe, because we check in Validate method that
         // shrinked axes don't result in too big dims count
-        while (bfzyx_in_order.size() > 5) {
-            bfzyx_in_order.pop_back();
+        while (bfwzyx_in_order.size() > 6) {
+            bfwzyx_in_order.pop_back();
         }
 
-        jit.AddConstant(MakeJitConstant("INPUT_INDICES_ORDER", get_input_idx_order(bfzyx_in_order)));
+        jit.AddConstant(MakeJitConstant("INPUT_INDICES_ORDER", get_input_idx_order(bfwzyx_in_order)));
     }
 
     return jit;
@@ -211,7 +223,9 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op
 
     if (!newParams.fused_ops.empty()) {
         std::vector<std::string> idx_order;
-        if (input.Dimentions() == 5) {
+        if (input.Dimentions() == 6) {
+            idx_order = {"b", "f", "w", "z", "y", "x"};
+        } else if (input.Dimentions() == 5) {
             idx_order = {"b", "f", "z", "y", "x"};
         } else if (input.Dimentions() == 4) {
             idx_order = {"b", "f", "y", "x"};
diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/strided_slice.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/strided_slice.cpp
index d039d644f7f4a6..052d6574d682d7 100644
--- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/strided_slice.cpp
+++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/strided_slice.cpp
@@ -271,5 +271,57 @@ INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D, StridedSliceLa
                              ::testing::Values(emptyAdditionalConfig)),
                          StridedSliceLayerGPUTest::getTestCaseName);
 
+
+const std::vector<StridedSliceParams> testCasesCommon5D = {
+        StridedSliceParams{ { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 10, 20 }, { 1, 5, 28, 26 }, { 1, 1, 1, 2 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 0, 0, 1 }, { 0, 1, 0, 1 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 },  { 0, 0, 1, 0 }, { 0, 0, 0, 1 },  { },  { },  { } },
+};
+
+const std::vector<InputShape> inputShapesDynamic5D = {
+        {{-1, -1, -1, -1, -1},
+         {{ 1, 5, 32, 32, 32 }, { 2, 5, 32, 32, 32 }, { 1, 5, 64, 64, 64 }}},
+
+        {{1, 64, -1, -1, -1},
+         {{ 1, 64, 1, 16, 32 }, { 1, 64, 1, 32, 64 }, { 1, 64, 1, 64, 64 }}},
+
+        {{1, -1, 16, 32, -1},
+        {{ 1, 16, 16, 32, 1 }, { 1, 32, 16, 32, 1 }, { 1, 64, 16, 32, 1 }}},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D, StridedSliceLayerGPUTest,
+                         ::testing::Combine(
+                             ::testing::ValuesIn(inputShapesDynamic5D),
+                             ::testing::ValuesIn(testCasesCommon5D),
+                             ::testing::ValuesIn(inputPrecisions),
+                             ::testing::ValuesIn(restInputTypes),
+                             ::testing::Values(emptyAdditionalConfig)),
+                         StridedSliceLayerGPUTest::getTestCaseName);
+
+
+const std::vector<StridedSliceParams> testCasesCommon6D = {
+        StridedSliceParams{ { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 10, 20 }, { 1, 5, 28, 26 }, { 1, 1, 1, 2 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 },  { },  { },  { } },
+};
+
+const std::vector<InputShape> inputShapesDynamic6D = {
+        {{-1, -1, -1, -1, -1, -1},
+         {{ 1, 5, 5, 32, 32, 32 }, { 2, 5, 7, 32, 32, 64 }, { 1, 3, 5, 64, 64, 64 }}},
+
+        {{1, -1, 16, 32, -1, -1},
+        {{ 1, 16, 16, 32, 1, 32 }, { 1, 32, 16, 32, 32, 64 }, { 1, 64, 16, 32, 32, 64 }}},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_6D, StridedSliceLayerGPUTest,
+                         ::testing::Combine(
+                             ::testing::ValuesIn(inputShapesDynamic6D),
+                             ::testing::ValuesIn(testCasesCommon6D),
+                             ::testing::ValuesIn(inputPrecisions),
+                             ::testing::ValuesIn(restInputTypes),
+                             ::testing::Values(emptyAdditionalConfig)),
+                         StridedSliceLayerGPUTest::getTestCaseName);
+
 } // namespace
 } // namespace GPULayerTestsDefinitions
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp
index 71e193b47abde1..7a0280380b766c 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/strided_slice_gpu_test.cpp
@@ -118,6 +118,120 @@ class strided_slice_gpu: public ::testing::Test {
         }
     }
 
+    void test_2x2x2x2x2_full(bool is_caching_test) {
+        // Input (BFZYX): 2x2x2x2x2
+        // Begin (BFZYX): 0x0x0x0x0
+        // End (BFZYX): 2x2x2x2x2
+        // Stride (BFZYX): 1x1x1x1x1
+        // Output (BFZYX): 2x2x2x2x2
+
+        auto& engine = get_test_engine();
+        auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2, 2 }, data_types::f32, format::bfzyx });
+
+        set_values(input, {
+                0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+                16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f,
+                23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
+        });
+        std::vector<int64_t> begin_data = { 0, 0, 0, 0, 0 };
+        std::vector<int64_t> end_data = { 2, 2, 2, 2, 2 };
+        std::vector<int64_t> strides_data = { 1, 1, 1, 1, 1 };
+
+        topology topology;
+        topology.add(input_layout("input", input->get_layout()));
+        topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2, 2}));
+
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
+
+        network->set_input_data("input", input);
+
+        auto outputs = network->execute();
+
+        ASSERT_EQ(outputs.size(), size_t(1));
+        ASSERT_EQ(outputs.begin()->first, "strided_slice");
+
+        auto output = outputs.at("strided_slice").get_memory();
+
+        std::vector<float> answers = {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+            9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+            16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f,
+            23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
+        };
+
+        cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+        ASSERT_EQ(output_ptr.size(), answers.size());
+        for (size_t i = 0; i < answers.size(); ++i)
+        {
+            ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
+        }
+    }
+
+
+    void test_2x2x2x2x2x2_full(bool is_caching_test) {
+        // Input (BFWZYX): 2x2x2x2x2x2
+        // Begin (BFWZYX): 0x0x0x0x0x0
+        // End (BFWZYX): 2x2x2x2x2x2
+        // Stride (BFWZYX): 1x1x1x1x1x1
+        // Output (BFWZYX): 2x2x2x2x2x2
+
+        auto& engine = get_test_engine();
+        auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2, 2, 2 }, data_types::f32, format::bfwzyx });
+
+        set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
+            8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+            16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f,
+            24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
+            32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f,
+            48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f,
+            56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f,
+        });
+        std::vector<int64_t> begin_data = { 0, 0, 0, 0, 0, 0 };
+        std::vector<int64_t> end_data = { 2, 2, 2, 2, 2, 2 };
+        std::vector<int64_t> strides_data = { 1, 1, 1, 1, 1, 1 };
+
+        topology topology;
+        topology.add(input_layout("input", input->get_layout()));
+        topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2, 2, 2}));
+
+        auto config = get_test_default_config(engine);
+        config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"strided_slice", {format::bfwzyx, "", impl_types::ocl}} }));
+
+        cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+
+        network->set_input_data("input", input);
+
+        auto outputs = network->execute();
+
+        ASSERT_EQ(outputs.size(), size_t(1));
+        ASSERT_EQ(outputs.begin()->first, "strided_slice");
+
+        auto output = outputs.at("strided_slice").get_memory();
+
+        std::vector<float> answers = {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
+            8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+            16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f,
+            24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
+            32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f,
+            48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f,
+            56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f,
+        };
+
+        cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+        ASSERT_EQ(output_ptr.size(), answers.size());
+        for (size_t i = 0; i < answers.size(); ++i)
+        {
+            ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
+        }
+    }
+
     void test_2x2x2x2_full_pad(bool is_caching_test) {
         // Input (BFYX): 2x2x2x2
         // Begin (BFYX): 0x0x0x0
@@ -601,6 +715,94 @@ class strided_slice_gpu: public ::testing::Test {
         }
     }
 
+    void test_2x2x2x2x1x1(bool is_caching_test) {
+        // Input (BFWZYX): 2x2x2x2x1x1
+        // Output (BFWZYX): 1x2x2x2x1x1
+
+        auto& engine = get_test_engine();
+        auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, { 2, 2, 1, 1, 2, 2 }});
+
+        set_values(input, {
+                0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
+                8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+        });
+        std::vector<int64_t> begin_data = { 0, 0, 0, 0, 0, 0 };
+        std::vector<int64_t> end_data = { 1, 2, 2, 2, 1, 1 };
+        std::vector<int64_t> strides_data = { 1, 1, 1, 1, 1, 1 };
+
+        topology topology;
+        topology.add(input_layout("input", input->get_layout()));
+        topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 2, 1, 1}));
+
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
+
+        network->set_input_data("input", input);
+
+        auto outputs = network->execute();
+
+        ASSERT_EQ(outputs.size(), size_t(1));
+        ASSERT_EQ(outputs.begin()->first, "strided_slice");
+
+        auto output = outputs.at("strided_slice").get_memory();
+
+        std::vector<float> answers = {
+                0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
+        };
+
+        cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+        for (size_t i = 0; i < answers.size(); ++i)
+        {
+            ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
+        }
+    }
+
+    void test_2x2x2x2x1x1_2(bool is_caching_test, impl_types impl_type = impl_types::any) {
+        // Input (BFWZYX): 2x2x2x2x1x1
+        // Output (BFWZYX): 2x1x1x1x1x1
+
+        auto& engine = get_test_engine();
+        auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, { 2, 2, 1, 1, 2, 2 } });
+
+        set_values(input, {
+                0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
+                8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+        });
+        std::vector<int64_t> begin_data = { 0, 0, 0, 0 };
+        std::vector<int64_t> end_data = { 2, 2, 2, 2 };
+        std::vector<int64_t> strides_data = { 1, 2, 2, 2 };
+
+        topology topology;
+        topology.add(input_layout("input", input->get_layout()));
+        topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1, 1}));
+
+        auto config = get_test_default_config(engine);
+        if (impl_type != impl_types::any)
+            config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"strided_slice", {format::bfwzyx, "", impl_types::cpu}} }));
+
+        cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+
+        network->set_input_data("input", input);
+
+        auto outputs = network->execute();
+
+        ASSERT_EQ(outputs.size(), size_t(1));
+        ASSERT_EQ(outputs.begin()->first, "strided_slice");
+
+        auto output = outputs.at("strided_slice").get_memory();
+
+        std::vector<float> answers = {
+                0.0f, 8.0f,
+        };
+
+        cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+        for (size_t i = 0; i < answers.size(); ++i)
+        {
+            ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
+        }
+    }
+
     void test_2x2x2x2_full_negative_stride(bool is_caching_test) {
         // Input (BFYX): 2x2x2x2
         // Begin (BFYX): 0x0x0x0
@@ -1933,6 +2135,47 @@ class strided_slice_gpu_i8: public ::testing::Test {
             ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
         }
     }
+
+    void test_2x2x2x2x1x1(bool is_caching_test) {
+        // Input (BFWZYX): 2x2x2x2x1x1
+        // Output (BFWZYX): 1x2x2x2x1x1
+
+        auto& engine = get_test_engine();
+        auto input = engine.allocate_memory({ data_types::i8, format::bfwzyx, { 2, 2, 1, 1, 2, 2 } });
+
+        set_values<int8_t>(input, {
+                0, 1, 2, 3, 4, 5, 6, 7,
+                8, 9, 10, 11, 12, 13, 14, 15,
+        });
+        std::vector<int64_t> begin_data = { 0, 0, 0 };
+        std::vector<int64_t> end_data = { 1, 2, 2 };
+        std::vector<int64_t> strides_data = { 1, 1, 1 };
+
+        topology topology;
+        topology.add(input_layout("input", input->get_layout()));
+        topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 2, 1, 1}));
+
+        cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
+
+        network->set_input_data("input", input);
+
+        auto outputs = network->execute();
+
+        ASSERT_EQ(outputs.size(), size_t(1));
+        ASSERT_EQ(outputs.begin()->first, "strided_slice");
+
+        auto output = outputs.at("strided_slice").get_memory();
+
+        std::vector<int8_t> answers = {
+                0, 1, 2, 3, 4, 5, 6, 7,
+        };
+
+        cldnn::mem_lock<int8_t> output_ptr(output, get_test_stream());
+
+        for (size_t i = 0; i < answers.size(); ++i) {
+            ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
+        }
+    }
 };
 
 class strided_slice_gpu_f32_i32: public ::testing::Test {
@@ -1999,6 +2242,14 @@ TEST_F(strided_slice_gpu_constants, test_2x2x2x2_full) {
     this->test_2x2x2x2_full(false);
 }
 
+TEST_F(strided_slice_gpu, test_2x2x2x2x2_full) {
+    this->test_2x2x2x2x2_full(false);
+}
+
+TEST_F(strided_slice_gpu, test_2x2x2x2x2x2_full) {
+    this->test_2x2x2x2x2x2_full(false);
+}
+
 TEST_F(strided_slice_gpu, test_2x2x2x2_full_pad) {
     this->test_2x2x2x2_full_pad(false);
 }
@@ -2087,6 +2338,18 @@ TEST_F(strided_slice_gpu_constants, test_2x2x2x1x1_2) {
     this->test_2x2x2x1x1_2(false);
 }
 
+TEST_F(strided_slice_gpu, test_2x2x2x2x1x1) {
+    this->test_2x2x2x2x1x1(false);
+}
+
+TEST_F(strided_slice_gpu, test_2x2x2x2x1x1_2) {
+    this->test_2x2x2x2x1x1_2(false);
+}
+
+TEST_F(strided_slice_gpu_i8, test_2x2x2x2x1x1) {
+    this->test_2x2x2x2x1x1(false);
+}
+
 TEST_F(strided_slice_gpu_f32_i32, test_1x1x1x8x1_new_axis_5d) {
     this->test_1x1x1x8x1_new_axis_5d(false);
 }