diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc
index db44df1e6fde4..8852f3a419adc 100644
--- a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#ifdef PADDLE_WITH_ASCEND_CL
 #include <memory>
 #include <string>
 
@@ -61,13 +60,13 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
     auto place = ctx.GetPlace();
 
     auto stream =
-       ctx.template device_context<paddle::platform::NPUDeviceContext>()
-           .stream();
+        ctx.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
 
     Tensor y_power(y->type());
     y_power.mutable_data<T>(y->dims(), place);
-    auto y_power_runner = NpuOpRunner("Power", {*y},
-             {y_power}, {{"power", static_cast<float>(-1)}});
+    auto y_power_runner = NpuOpRunner("Power", {*y}, {y_power},
+                                      {{"power", static_cast<float>(-1)}});
     y_power_runner.Run(stream);
 
     if (dx) {
@@ -75,32 +74,33 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
 
       Tensor tensor_zeros(x->type());
       tensor_zeros.mutable_data<T>(x->dims(), place);
-      auto tensor_zeros_runner = NpuOpRunner("ZerosLike", {*x},
-               {tensor_zeros}, {});
+      auto tensor_zeros_runner =
+          NpuOpRunner("ZerosLike", {*x}, {tensor_zeros}, {});
       tensor_zeros_runner.Run(stream);
 
       Tensor x_zero(paddle::framework::proto::VarType::BOOL);
       x_zero.mutable_data<bool>(x->dims(), place);
-      auto x_zero_runner = NpuOpRunner("Equal", {*x, tensor_zeros},
-              {x_zero}, {});
+      auto x_zero_runner =
+          NpuOpRunner("Equal", {*x, tensor_zeros}, {x_zero}, {});
       x_zero_runner.Run(stream);
 
       Tensor x_nozero(paddle::framework::proto::VarType::BOOL);
       x_nozero.mutable_data<bool>(x->dims(), place);
-      auto x_nozero_runner = NpuOpRunner("LogicalNot", {x_zero},
-              {x_nozero}, {});
+      auto x_nozero_runner =
+          NpuOpRunner("LogicalNot", {x_zero}, {x_nozero}, {});
       x_nozero_runner.Run(stream);
 
       Tensor x_nozero_f(x->type());
       x_nozero_f.mutable_data<T>(x->dims(), place);
-      auto x_nozero_f_runner = NpuOpRunner("Cast", {x_nozero},
-               {x_nozero_f}, {{"dst_type", static_cast<int32_t>(0)}});
+      auto x_nozero_f_runner =
+          NpuOpRunner("Cast", {x_nozero}, {x_nozero_f},
+                      {{"dst_type", static_cast<int32_t>(0)}});
       x_nozero_f_runner.Run(stream);
 
       Tensor x_grad_w(x->type());
       x_grad_w.mutable_data<T>(x->dims(), place);
-      auto x_grad_w_runner = NpuOpRunner("Mul", {x_nozero_f, y_power},
-               {x_grad_w}, {});
+      auto x_grad_w_runner =
+          NpuOpRunner("Mul", {x_nozero_f, y_power}, {x_grad_w}, {});
       x_grad_w_runner.Run(stream);
 
       auto x_grad_runner = NpuOpRunner("Mul", {x_grad_w, *dout}, {*dx}, {});
@@ -112,14 +112,12 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
 
       Tensor neg_out(y->type());
       neg_out.mutable_data<T>(y->dims(), place);
-      auto neg_out_runner = NpuOpRunner("Neg", {*out},
-              {neg_out}, {});
+      auto neg_out_runner = NpuOpRunner("Neg", {*out}, {neg_out}, {});
       neg_out_runner.Run(stream);
 
       Tensor y_grad_w(y->type());
       y_grad_w.mutable_data<T>(y->dims(), place);
-      auto y_grad_w_runner = NpuOpRunner("Div", {neg_out, *y},
-              {y_grad_w}, {});
+      auto y_grad_w_runner = NpuOpRunner("Div", {neg_out, *y}, {y_grad_w}, {});
       y_grad_w_runner.Run(stream);
 
       auto y_grad_runner = NpuOpRunner("Mul", {y_grad_w, *dout}, {*dy}, {});
@@ -143,4 +141,3 @@ REGISTER_OP_NPU_KERNEL(
     ops::ElementwiseDivGradNPUKernel<paddle::platform::NPUDeviceContext, float>,
     ops::ElementwiseDivGradNPUKernel<paddle::platform::NPUDeviceContext,
                                      paddle::platform::float16>);
-#endif
diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc
new file mode 100644
index 0000000000000..da0116114747f
--- /dev/null
+++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc
@@ -0,0 +1,52 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <memory>
+#include <string>
+
+#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename T>
+class ElementwiseFloorDivNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<Tensor>("X");
+    auto* y = ctx.Input<Tensor>("Y");
+    auto* out = ctx.Output<Tensor>("Out");
+
+    out->mutable_data<T>(ctx.GetPlace());
+
+    auto stream =
+        ctx.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
+
+    auto runner = NpuOpRunner("FloorDiv", {*x, *y}, {*out}, {});
+    runner.Run(stream);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_NPU_KERNEL(elementwise_floordiv,
+                       ops::ElementwiseFloorDivNPUKernel<int>,
+                       ops::ElementwiseFloorDivNPUKernel<int64_t>);
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py
new file mode 100644
index 0000000000000..93538e938670f
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py
@@ -0,0 +1,67 @@
+#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import unittest
+import sys
+sys.path.append("..")
+from op_test import OpTest
+import paddle
+
+paddle.enable_static()
+
+
+@unittest.skipIf(not paddle.is_compiled_with_npu(),
+                 "core is not compiled with NPU")
+class TestElementwiseFloorDiv(OpTest):
+    def setUp(self):
+        self.op_type = "elementwise_floordiv"
+        self.set_npu()
+        self.init_dtype()
+        self.init_input_output()
+
+        self.inputs = {
+            'X': OpTest.np_dtype_to_fluid_dtype(self.x),
+            'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
+        }
+        self.attrs = {}
+        self.outputs = {'Out': self.out}
+
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.place = paddle.NPUPlace(0)
+
+    def init_input_output(self):
+        self.x = np.random.uniform(1, 1000, [10, 10]).astype(self.dtype)
+        self.y = np.random.uniform(1, 1000, [10, 10]).astype(self.dtype)
+        self.out = np.floor_divide(self.x, self.y)
+
+    def init_dtype(self):
+        self.dtype = "int64"
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place, check_dygraph=False)
+
+
+@unittest.skipIf(not paddle.is_compiled_with_npu(),
+                 "core is not compiled with NPU")
+class TestElementwiseFloorDiv2(TestElementwiseFloorDiv):
+    def init_dtype(self):
+        self.dtype = "int32"
+
+
+if __name__ == '__main__':
+    unittest.main()