From 841da05801a03facb0c6fcddaa60dcd7042c48c8 Mon Sep 17 00:00:00 2001
From: liuyuang <liuyuang@baidu.com>
Date: Tue, 29 Nov 2022 15:48:32 +0800
Subject: [PATCH] NPU and MLU support 0D index for gather. NPU and MLU support
 0D index and 0D updates for scatter.

---
 backends/mlu/kernels/gather_kernel.cc         | 12 ++---
 .../unittests/test_zero_dim_tensor_mlu.py     | 49 +++++++++++++++++++
 backends/npu/kernels/gather_kernel.cc         |  5 +-
 backends/npu/kernels/scatter_kernel.cc        |  5 +-
 .../unittests/test_zero_dim_tensor_npu.py     | 49 +++++++++++++++++++
 5 files changed, 110 insertions(+), 10 deletions(-)

diff --git a/backends/mlu/kernels/gather_kernel.cc b/backends/mlu/kernels/gather_kernel.cc
index 760e4f2223..8149fcea40 100644
--- a/backends/mlu/kernels/gather_kernel.cc
+++ b/backends/mlu/kernels/gather_kernel.cc
@@ -35,10 +35,10 @@ void GatherKernel(const Context& dev_ctx,
             index_dims[1]));
   } else {
     PADDLE_ENFORCE_EQ(
-        index_dims.size(),
-        1,
+        index_dims.size() == 1 || index_dims.size() == 0,
+        true,
         phi::errors::InvalidArgument(
-            "The index should be 1D, when it is not 2D, but we get %d",
+            "The index should be 0D or 1D, when it is not 2D, but we get %d",
             index_dims.size()));
   }
 
@@ -80,10 +80,10 @@ void GatherGradKernel(const Context& dev_ctx,
             index_dims[1]));
   } else {
     PADDLE_ENFORCE_EQ(
-        index_dims.size(),
-        1,
+        index_dims.size() == 1 || index_dims.size() == 0,
+        true,
         phi::errors::InvalidArgument(
-            "The index should be 1D, when it is not 2D, but we get %d",
+            "The index should be 0D or 1D, when it is not 2D, but we get %d",
             index_dims.size()));
   }
 
diff --git a/backends/mlu/tests/unittests/test_zero_dim_tensor_mlu.py b/backends/mlu/tests/unittests/test_zero_dim_tensor_mlu.py
index 476546ad24..937a202e48 100644
--- a/backends/mlu/tests/unittests/test_zero_dim_tensor_mlu.py
+++ b/backends/mlu/tests/unittests/test_zero_dim_tensor_mlu.py
@@ -436,6 +436,55 @@ def test_searchsorted(self):
         self.assertEqual(out.shape, [])
         self.assertEqual(out.numpy(), 0)
 
+    def test_gather_1D(self):
+        x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
+        index = paddle.full([], 2, 'int64')
+        out = paddle.gather(x, index)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out.numpy(), 5)
+        self.assertEqual(out.grad.shape, [])
+
+    def test_gather_xD_axis_0(self):
+        x = paddle.to_tensor(
+            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
+        )
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index)
+        out.backward()
+
+        self.assertEqual(out.shape, [3])
+        for i in range(3):
+            self.assertEqual(out.numpy()[i], x.numpy()[1][i])
+        self.assertEqual(out.grad.shape, [3])
+
+    def test_gather_xD_axis_1(self):
+        x = paddle.to_tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index, axis=1)
+
+        self.assertEqual(out.shape, [2])
+        for i in range(2):
+            self.assertEqual(out.numpy()[i], x.numpy()[i][1])
+
+    def test_scatter_0D(self):
+        x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0])
+        index = paddle.full([], 2, 'int64')
+        updates = paddle.full([], 4.0)
+        out = paddle.scatter(x, index, updates)
+
+        self.assertEqual(out.numpy()[2], 4)
+
+    def test_scatter_XD(self):
+        x = paddle.to_tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        index = paddle.full([], 1, 'int64')
+        updates = paddle.to_tensor([1.0, 2.0, 3.0])
+        out = paddle.scatter(x, index, updates)
+
+        for i in range(3):
+            self.assertEqual(out.numpy()[1][i], updates.numpy()[i])
+
 
 # Use to test API whose zero-dim input tensors don't have grad and not need to test backward in OpTest.
 class TestNoBackwardAPI(unittest.TestCase):
diff --git a/backends/npu/kernels/gather_kernel.cc b/backends/npu/kernels/gather_kernel.cc
index a61765b73f..897a8d6529 100644
--- a/backends/npu/kernels/gather_kernel.cc
+++ b/backends/npu/kernels/gather_kernel.cc
@@ -48,8 +48,9 @@ void GatherGradKernel(const Context& dev_ctx,
   // step1: Unsqueeze index
   phi::DenseTensor tmp_tensor(index);
   const auto index_dims = index.dims();
-  if (index_dims.size() == 1) {
-    std::vector<int64_t> new_dim = {index_dims[0], 1};
+  if (index_dims.size() == 1 || index_dims.size() == 0) {
+    std::vector<int64_t> new_dim =
+        {index_dims.size() == 0 ? 1 : index_dims[0], 1};
     tmp_tensor.Resize(phi::make_ddim(new_dim));
     p_index = &tmp_tensor;
   }
diff --git a/backends/npu/kernels/scatter_kernel.cc b/backends/npu/kernels/scatter_kernel.cc
index bc8e08a0e9..3a62e3c1e6 100644
--- a/backends/npu/kernels/scatter_kernel.cc
+++ b/backends/npu/kernels/scatter_kernel.cc
@@ -29,8 +29,9 @@ void ScatterKernel(const Context& dev_ctx,
 
   phi::DenseTensor tmp_tensor(index);
   const auto index_dims = index.dims();
-  if (index_dims.size() == 1) {
-    std::vector<int64_t> new_dim = {index_dims[0], 1};
+  if (index_dims.size() == 1 || index_dims.size() == 0) {
+    std::vector<int64_t> new_dim =
+        {index_dims.size() == 0 ? 1 : index_dims[0], 1};
     tmp_tensor.Resize(phi::make_ddim(new_dim));
   }
 
diff --git a/backends/npu/tests/unittests/test_zero_dim_tensor_npu.py b/backends/npu/tests/unittests/test_zero_dim_tensor_npu.py
index b5a26f5013..2ac9b4872c 100644
--- a/backends/npu/tests/unittests/test_zero_dim_tensor_npu.py
+++ b/backends/npu/tests/unittests/test_zero_dim_tensor_npu.py
@@ -436,6 +436,55 @@ def test_searchsorted(self):
         self.assertEqual(out.shape, [])
         self.assertEqual(out.numpy(), 0)
 
+    def test_gather_1D(self):
+        x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
+        index = paddle.full([], 2, 'int64')
+        out = paddle.gather(x, index)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out.numpy(), 5)
+        self.assertEqual(out.grad.shape, [])
+
+    def test_gather_xD_axis_0(self):
+        x = paddle.to_tensor(
+            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
+        )
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index)
+        out.backward()
+
+        self.assertEqual(out.shape, [3])
+        for i in range(3):
+            self.assertEqual(out.numpy()[i], x.numpy()[1][i])
+        self.assertEqual(out.grad.shape, [3])
+
+    def test_gather_xD_axis_1(self):
+        x = paddle.to_tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index, axis=1)
+
+        self.assertEqual(out.shape, [2])
+        for i in range(2):
+            self.assertEqual(out.numpy()[i], x.numpy()[i][1])
+
+    def test_scatter_0D(self):
+        x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0])
+        index = paddle.full([], 2, 'int64')
+        updates = paddle.full([], 4.0)
+        out = paddle.scatter(x, index, updates)
+
+        self.assertEqual(out.numpy()[2], 4)
+
+    def test_scatter_XD(self):
+        x = paddle.to_tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        index = paddle.full([], 1, 'int64')
+        updates = paddle.to_tensor([1.0, 2.0, 3.0])
+        out = paddle.scatter(x, index, updates)
+
+        for i in range(3):
+            self.assertEqual(out.numpy()[1][i], updates.numpy()[i])
+
 # Use to test API whose zero-dim input tensors don't have grad and not need to test backward in OpTest.
 class TestNoBackwardAPI(unittest.TestCase):
     def setUp(self):