diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc
index 37a129297f07e..f80f2359cf701 100644
--- a/paddle/fluid/framework/dlpack_tensor.cc
+++ b/paddle/fluid/framework/dlpack_tensor.cc
@@ -89,11 +89,6 @@ struct DLDeviceVisitor
         platform::errors::Unimplemented("platform::XPUPlace is not supported"));
   }
 
-  inline ::DLDevice operator()(const platform::NPUPlace &place) const {
-    PADDLE_THROW(
-        platform::errors::Unimplemented("platform::NPUPlace is not supported"));
-  }
-
   inline ::DLDevice operator()(const platform::NPUPinnedPlace &place) const {
     PADDLE_THROW(platform::errors::Unimplemented(
         "platform::NPUPinnedPlace is not supported"));
diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
index fa570394d80f3..01eb5dfbd9f02 100644
--- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
+++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
@@ -55,8 +55,6 @@ static phi::Backend ConvertPlaceToBackend(const phi::Place& place) {
       return phi::Backend::GPU;
     case phi::AllocationType::XPU:
       return phi::Backend::XPU;
-    case phi::AllocationType::NPU:
-      return phi::Backend::NPU;
     default:
       PADDLE_THROW(platform::errors::InvalidArgument(
           "Cannot convert place(%d).", static_cast<int>(place.GetType())));
diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
index 941e8ea050e08..1d2a372684339 100644
--- a/paddle/fluid/framework/op_registry.h
+++ b/paddle/fluid/framework/op_registry.h
@@ -374,9 +374,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
 #define REGISTER_OP_XPU_KERNEL(op_type, ...) \
   REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)
 
-#define REGISTER_OP_NPU_KERNEL(op_type, ...) \
-  REGISTER_OP_KERNEL(op_type, NPU, ::paddle::platform::NPUPlace, __VA_ARGS__)
-
 #define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class,  \
                               customized_name,                     \
                               customized_type_value,               \
@@ -413,12 +410,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
 
-#define REGISTER_OP_NPU_KERNEL_FUNCTOR(op_type, ...)                  \
-  REGISTER_OP_KERNEL_EX(                                              \
-      op_type, NPU, ::paddle::platform::NPUPlace, DEFAULT_TYPE,       \
-      ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
-      __VA_ARGS__)
-
 #define REGISTER_OP_IPU_KERNEL_FUNCTOR(op_type, ...)                  \
   REGISTER_OP_KERNEL_EX(                                              \
       op_type, IPU, ::paddle::platform::IPUPlace, DEFAULT_TYPE,       \
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index 3820667116bc6..8937d58f8465e 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -1327,8 +1327,6 @@ void ParallelExecutor::InitExecutorPrivateMemberInfo(
     device_name = "CPU";
   } else if (member_->use_device_ == p::kCUDA) {
     device_name = "CUDA";
-  } else if (member_->use_device_ == p::kNPU) {
-    device_name = "NPU";
   } else if (member_->use_device_ == p::kXPU) {
     device_name = "XPU";
   } else {
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 928153dab5e62..466eee7d9414f 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -138,8 +138,6 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) {
     case paddle_infer::PlaceType::kGPU:
       // NOTE: phi also support phi::Backend::GPUDNN.
       return phi::Backend::GPU;
-    case paddle_infer::PlaceType::kNPU:
-      return phi::Backend::NPU;
     case paddle_infer::PlaceType::kXPU:
       return phi::Backend::XPU;
     case paddle_infer::PlaceType::kCPU:
diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc
index 0d5c8f98020a8..28353150c265c 100644
--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -82,8 +82,6 @@ bool NativePaddlePredictor::Init(
     place_ = paddle::platform::CUDAPlace(config_.device);
   } else if (config_.use_xpu) {
     place_ = paddle::platform::XPUPlace(config_.device);
-  } else if (config_.use_npu) {
-    place_ = paddle::platform::NPUPlace(config_.device);
   } else {
     place_ = paddle::platform::CPUPlace();
   }
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index 9fd94fa5d57e2..f5a9f4f02f8bc 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -124,9 +124,6 @@ T *Tensor::mutable_data(PlaceType place) {
     case static_cast<int>(PlaceType::kXPU): {
       return tensor->mutable_data<T>(paddle::platform::XPUPlace(device_));
     }
-    case static_cast<int>(PlaceType::kNPU): {
-      return tensor->mutable_data<T>(paddle::platform::NPUPlace(device_));
-    }
     case static_cast<int>(PlaceType::kCUSTOM): {
       return tensor->mutable_data<T>(
           paddle::platform::CustomPlace(device_type_, device_));
diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h
index 3838bc1c829c4..25b5ba8be7a21 100644
--- a/paddle/fluid/inference/api/paddle_tensor.h
+++ b/paddle/fluid/inference/api/paddle_tensor.h
@@ -67,7 +67,7 @@ enum DataType {
   // TODO(Inference): support more data types if needed.
 };
 
-enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU, kCUSTOM };
+enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kIPU, kCUSTOM };
 
 enum class DataLayout { kUNK = -1, kAny, kNHWC, kNCHW };
 
diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc
index 4a0160affefcb..2b34747fd124d 100644
--- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc
+++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc
@@ -53,6 +53,3 @@ namespace plat = paddle::platform;
 REGISTER_OP_WITHOUT_GRADIENT(c_sync_comm_stream,
                              ops::CSyncCommStreamOp,
                              ops::CSyncCommStreamOpMaker);
-
-REGISTER_OP_NPU_KERNEL(c_sync_comm_stream,
-                       ops::CSyncCommStreamKernel<float, plat::NPUPlace>);
diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc
index 4b2ee8763cd65..8af460259a37b 100644
--- a/paddle/fluid/operators/fill_constant_op.cc
+++ b/paddle/fluid/operators/fill_constant_op.cc
@@ -95,9 +95,6 @@ class FillConstantOp : public framework::OperatorWithKernel {
         case 3:
           kt.set_backend(phi::Backend::XPU);
           break;
-        case 4:
-          kt.set_backend(phi::Backend::NPU);
-          break;
         default:
           PADDLE_THROW(platform::errors::Unimplemented(
               "Could NOT determine the place of variable, place_type = %d .",
@@ -161,8 +158,7 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker {
                  "0: CPUPlace. "
                  "1: CUDAPlace. "
                  "2: CUDAPinnedPlace. "
-                 "3: XPUPlace. "
-                 "4: NPUPlace. ")
+                 "3: XPUPlace. ")
         .SetDefault(-1);
     AddOutput("Out",
               "(Tensor) Tensor of specified shape will be filled "
diff --git a/paddle/fluid/operators/memcpy_d2h_op.cc b/paddle/fluid/operators/memcpy_d2h_op.cc
index 60d7a6ee14ba7..4b73dfd0353e1 100644
--- a/paddle/fluid/operators/memcpy_d2h_op.cc
+++ b/paddle/fluid/operators/memcpy_d2h_op.cc
@@ -86,16 +86,15 @@ class MemcpyD2HOpProtoMaker : public framework::OpProtoAndCheckerMaker {
     AddOutput("Out",
               "(phi::DenseTensor) The type of output "
               "is the same as input X.");
-    AddAttr<int>(
-        "dst_place_type",
-        "Determine the dst place of tensor copy. "
-        "By Now it ONLY support XPU/NPUPlace/CUDAPlace <-> CUDAPinnedPlace/CPU"
-        "Other place type is Unimplemented and will cause ERROR."
-        "0: dst is on CPUPlace. "
-        "1: dst is on CUDAPinnedPlace. ");
+    AddAttr<int>("dst_place_type",
+                 "Determine the dst place of tensor copy. "
+                 "By Now it ONLY support XPU/CUDAPlace <-> CUDAPinnedPlace/CPU"
+                 "Other place type is Unimplemented and will cause ERROR."
+                 "0: dst is on CPUPlace. "
+                 "1: dst is on CUDAPinnedPlace. ");
     AddComment(R"DOC(
     MemcpyD2H Operator.
-    By now, it ONLY supports the memcopy between NPUPlace/CUDAPlace <-> CUDAPinnedPlace/CPU.
+    By now, it ONLY supports the memcopy between CUDAPlace <-> CUDAPinnedPlace/CPU.
     You would have to update it if you want other more capacities.
 Out = X,  when type in [phi::DenseTensor]
 raise error if the type is not listed above.
diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc
index 8997205af193a..d90243f8f9ec3 100644
--- a/paddle/fluid/operators/memcpy_h2d_op.cc
+++ b/paddle/fluid/operators/memcpy_h2d_op.cc
@@ -91,13 +91,12 @@ class MemcpyH2DOpProtoMaker : public framework::OpProtoAndCheckerMaker {
                  "Determine the dst place of tensor copy. "
                  "By Now it support:"
                  "0. CUDAPinnedPlace/CPU <->CUDAPlace"
-                 "1. NPUPinnedPlace/CPU <-> NPUPlace"
-                 "2. CPU <->XPUPlace"
-                 "3. CPU <->IPUPlace"
+                 "1. CPU <->XPUPlace"
+                 "2. CPU <->IPUPlace"
                  "Other place type is Unimplemented and will cause ERROR.");
     AddComment(R"DOC(
     MemcpyD2H Operator.
-    By now, it ONLY supports the memcopy between CUDAPinnedPlace/CPU <-> NPUPlace/CUDAPlace.
+    By now, it ONLY supports the memcopy between CUDAPinnedPlace/CPU <-> CUDAPlace.
     You would have to update it if you want other more capacities.
 Out = X,  when type in [phi::DenseTensor]
 raise error if the type is not listed above.
diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc
index 6d6baf1e5295c..c1ee17789ac46 100644
--- a/paddle/fluid/operators/memcpy_op.cc
+++ b/paddle/fluid/operators/memcpy_op.cc
@@ -105,20 +105,17 @@ class MemcpyOpProtoMaker : public framework::OpProtoAndCheckerMaker {
               "is the same as input X.");
     AddAttr<int>("dst_place_type",
                  "Determine the dst place of tensor copy. "
-                 "By Now it ONLY support CUDAPlace <-> CUDAPinnedPlace or "
-                 "NPUPlace <-> CPUPlace. "
+                 "By Now it ONLY support CUDAPlace <-> CUDAPinnedPlace."
                  "Other place type is Unimplemented and will cause ERROR."
                  "0: dst is on CPUPlace. "
                  "1: dst is on CUDAPlace. "
                  "2: dst is on CUDAPinnedPlace. "
                  "3: dst is on XPUPlace. "
-                 "4: dst is on NPUPlace. "
-                 "5: dst is on NPUPinnerPlace. "
-                 "6: dst is on CustomDevicePlace");
+                 "4: dst is on NPUPinnerPlace. "
+                 "5: dst is on CustomDevicePlace");
     AddComment(R"DOC(
     Memcpy Operator.
-    By now, it ONLY supports the memcopy between CUDAPinnedPlace <-> CUDAPlace or
-    NPUPlace <-> CPUPlace, and used as an internal op by Recompute-Offload.
+    By now, it ONLY supports the memcopy between CUDAPinnedPlace <-> CUDAPlace, and used as an internal op by Recompute-Offload.
     You would have to update it if you want other more capacities.
 
 Out = X,  when type in [phi::DenseTensor]
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
index 96ddef2c60efe..f6cdb19f8d1f8 100644
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -106,7 +106,6 @@ DeviceType Place2DeviceType(const platform::Place& place);
 constexpr DeviceType kCPU = DeviceType::CPU;
 constexpr DeviceType kCUDA = DeviceType::CUDA;
 constexpr DeviceType kXPU = DeviceType::XPU;
-constexpr DeviceType kNPU = DeviceType::NPU;
 constexpr DeviceType kIPU = DeviceType::IPU;
 constexpr DeviceType kCUSTOM_DEVICE = DeviceType::CUSTOM_DEVICE;
 
diff --git a/paddle/fluid/platform/device_event.h b/paddle/fluid/platform/device_event.h
index dc40fbe186e88..402974b89e5c9 100644
--- a/paddle/fluid/platform/device_event.h
+++ b/paddle/fluid/platform/device_event.h
@@ -26,7 +26,6 @@
 using ::paddle::platform::kCPU;
 using ::paddle::platform::kCUDA;
 using ::paddle::platform::kCUSTOM_DEVICE;
-using ::paddle::platform::kNPU;
 using ::paddle::platform::kXPU;
 
 USE_EVENT(kCPU)
diff --git a/paddle/fluid/platform/place.cc b/paddle/fluid/platform/place.cc
index ba5a25aa21a73..b8452a594e358 100644
--- a/paddle/fluid/platform/place.cc
+++ b/paddle/fluid/platform/place.cc
@@ -102,8 +102,6 @@ Place PlaceHelper::CreatePlace(const std::string &dev_type, size_t dev_id) {
     return platform::CPUPlace();
   } else if (dev_type == "gpu") {
     return platform::CUDAPlace(dev_id);
-  } else if (dev_type == "npu") {
-    return platform::NPUPlace(dev_id);
   } else if (dev_type == "xpu") {
     return platform::XPUPlace(dev_id);
   } else {
diff --git a/paddle/fluid/platform/place.h b/paddle/fluid/platform/place.h
index 3ea1dcded2c62..fe20429ae829e 100644
--- a/paddle/fluid/platform/place.h
+++ b/paddle/fluid/platform/place.h
@@ -28,7 +28,6 @@ using Place = phi::Place;
 using CPUPlace = phi::CPUPlace;
 using CUDAPlace = phi::GPUPlace;
 using CUDAPinnedPlace = phi::GPUPinnedPlace;
-using NPUPlace = phi::NPUPlace;
 using NPUPinnedPlace = phi::NPUPinnedPlace;
 using XPUPlace = phi::XPUPlace;
 using IPUPlace = phi::IPUPlace;
@@ -88,11 +87,6 @@ typename Visitor::result_type VisitPlace(const Place &place,
       return typename Visitor::result_type();
 #endif
     }
-    case phi::AllocationType::NPU: {
-      PADDLE_THROW(platform::errors::Unavailable(
-          "Paddle is not compiled with NPU. Cannot visit npu_pinned"));
-      return typename Visitor::result_type();
-    }
     case phi::AllocationType::NPUPINNED: {
       PADDLE_THROW(platform::errors::Unavailable(
           "Paddle is not compiled with NPU. Cannot visit npu_pinned"));
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index d69417a6c0a73..3029cee9e0d31 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -159,7 +159,7 @@ void InitTensorWithNumpyValue(TensorObject* self,
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "Place should be one of "
-        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/CustomPlace"));
+        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/CustomPlace"));
   }
 }
 
diff --git a/paddle/fluid/pybind/eager_math_op_patch.cc b/paddle/fluid/pybind/eager_math_op_patch.cc
index d764aedd8a1d6..69d0465bf7cdd 100644
--- a/paddle/fluid/pybind/eager_math_op_patch.cc
+++ b/paddle/fluid/pybind/eager_math_op_patch.cc
@@ -108,7 +108,7 @@ void InitTensorWithNumpyValue(const py::object& array,
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "Place should be one of "
-        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/CustomPlace"));
+        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/CustomPlace"));
   }
 }
 
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index 0312ad8d96041..59bc745aedc03 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -52,7 +52,6 @@ extern PyTypeObject* g_place_pytype;
 extern PyTypeObject* g_cudaplace_pytype;
 extern PyTypeObject* g_cpuplace_pytype;
 extern PyTypeObject* g_xpuplace_pytype;
-extern PyTypeObject* g_npuplace_pytype;
 extern PyTypeObject* g_cudapinnedplace_pytype;
 extern PyTypeObject* g_customplace_pytype;
 extern PyTypeObject* g_framework_tensor_pytype;
@@ -529,9 +528,6 @@ platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) {
   } else if (PyObject_IsInstance(
                  obj, reinterpret_cast<PyObject*>(g_xpuplace_pytype))) {
     place = ::pybind11::handle(obj).cast<platform::XPUPlace>();
-  } else if (PyObject_IsInstance(
-                 obj, reinterpret_cast<PyObject*>(g_npuplace_pytype))) {
-    place = ::pybind11::handle(obj).cast<platform::NPUPlace>();
   } else if (PyObject_IsInstance(
                  obj, reinterpret_cast<PyObject*>(g_cudapinnedplace_pytype))) {
     place = ::pybind11::handle(obj).cast<platform::CUDAPinnedPlace>();
@@ -542,7 +538,7 @@ platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "argument (position %d) must be "
         "one "
-        "of(Place,CUDAPlace,CPUPlace,XPUPlace,NPUPlace,CUDAPinnedPlace,"
+        "of(Place,CUDAPlace,CPUPlace,XPUPlace,CUDAPinnedPlace,"
         "CustomPlace), "
         "but got %s",
         arg_pos + 1,
diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
index 8d5bd524a1c14..677f9bfd98026 100644
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -144,8 +144,6 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) {
     return place_obj.cast<platform::XPUPlace>();
   } else if (py::isinstance<platform::CUDAPinnedPlace>(place_obj)) {
     return place_obj.cast<platform::CUDAPinnedPlace>();
-  } else if (py::isinstance<platform::NPUPlace>(place_obj)) {
-    return place_obj.cast<platform::NPUPlace>();
   } else if (py::isinstance<platform::IPUPlace>(place_obj)) {
     return place_obj.cast<platform::IPUPlace>();
   } else if (py::isinstance<platform::Place>(place_obj)) {
@@ -155,7 +153,7 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) {
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "Place should be one of "
-        "Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"
+        "Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/IPUPlace/"
         "CustomPlace"));
   }
 }
@@ -208,7 +206,7 @@ static void InitVarBaseAndTensor(imperative::VarBase *self,
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "Place should be one of "
-        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"));
+        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/IPUPlace/"));
   }
   self->SetDataType(framework::TransToProtoVarType(tensor->dtype()));
 }
@@ -711,14 +709,6 @@ void BindImperative(py::module *m_ptr) {
            py::arg("zero_copy") = false,
            py::arg("name") = "",
            py::arg("stop_gradient") = -1)
-      .def("__init__",
-           &InitVarBaseFromNumpyWithArg<platform::NPUPlace>,
-           py::arg("value"),
-           py::arg("place"),
-           py::arg("persistable") = false,
-           py::arg("zero_copy") = false,
-           py::arg("name") = "",
-           py::arg("stop_gradient") = -1)
       .def("__init__",
            &InitVarBaseFromNumpyWithArg<platform::CustomPlace>,
            py::arg("value"),
@@ -752,11 +742,6 @@ void BindImperative(py::module *m_ptr) {
            py::arg("tensor"),
            py::arg("place"),
            py::arg("name") = "")
-      .def("__init__",
-           &InitVarBaseFromTensorWithArg<platform::NPUPlace>,
-           py::arg("tensor"),
-           py::arg("place"),
-           py::arg("name") = "")
       .def("__init__",
            &InitVarBaseFromTensorWithArg<platform::CustomPlace>,
            py::arg("tensor"),
@@ -1860,18 +1845,6 @@ void BindImperative(py::module *m_ptr) {
             return new_var;
           },
           py::return_value_policy::copy)
-      .def(
-          "_copy_to",
-          [](const std::shared_ptr<imperative::VarBase> &self,
-             const platform::NPUPlace &place,
-             bool blocking) {
-            auto new_var = self->NewVarBase(place, blocking);
-            if (!blocking) {
-              IncreaseVarbaseReferenceCountUntilCopyComplete(self, place);
-            }
-            return new_var;
-          },
-          py::return_value_policy::copy)
       .def(
           "_copy_to",
           [](const std::shared_ptr<imperative::VarBase> &self,
@@ -2202,11 +2175,6 @@ void BindImperative(py::module *m_ptr) {
               self.SetExpectedPlace(*p);
               VLOG(4) << "Tracer(" << &self << ")"
                       << " set expected place " << *p;
-            } else if (py::isinstance<platform::NPUPlace>(obj)) {
-              auto p = obj.cast<platform::NPUPlace *>();
-              self.SetExpectedPlace(*p);
-              VLOG(4) << "Tracer(" << &self << ")"
-                      << " set expected place " << *p;
             } else if (py::isinstance<platform::IPUPlace>(obj)) {
               auto p = obj.cast<platform::IPUPlace *>();
               self.SetExpectedPlace(*p);
@@ -2225,7 +2193,7 @@ void BindImperative(py::module *m_ptr) {
             } else {
               PADDLE_THROW(platform::errors::InvalidArgument(
                   "Incompatible Place Type: supports XPUPlace, CUDAPlace, "
-                  "CPUPlace, NPUPlace, IPUPlace"
+                  "CPUPlace, IPUPlace"
                   "and CUDAPinnedPlace, "
                   "but got Unknown Type!"));
             }
@@ -2358,28 +2326,6 @@ void BindImperative(py::module *m_ptr) {
                                                  inplace_map);
              }
            })
-      .def("trace",
-           [](imperative::Tracer &self,
-              const std::string &type,
-              const PyNameVarBaseMap &ins,
-              const PyNameVarBaseMap &outs,
-              framework::AttributeMap attrs,
-              const platform::NPUPlace &place,
-              bool trace_backward,
-              const std::map<std::string, std::string> &inplace_map = {}) {
-             auto ins_map = ConvertToNameVarBaseMap(ins);
-             auto outs_map = ConvertToNameVarBaseMap(outs);
-             {
-               py::gil_scoped_release release;
-               self.TraceOp<imperative::VarBase>(type,
-                                                 std::move(ins_map),
-                                                 std::move(outs_map),
-                                                 std::move(attrs),
-                                                 place,
-                                                 trace_backward,
-                                                 inplace_map);
-             }
-           })
       .def("trace",
            [](imperative::Tracer &self,
               const std::string &type,
@@ -2471,7 +2417,6 @@ void BindImperative(py::module *m_ptr) {
   m.def("varbase_copy", &VarBaseCopy<platform::CUDAPlace>);
   m.def("varbase_copy", &VarBaseCopy<platform::XPUPlace>);
   m.def("varbase_copy", &VarBaseCopy<platform::CUDAPinnedPlace>);
-  m.def("varbase_copy", &VarBaseCopy<platform::NPUPlace>);
   m.def("varbase_copy", &VarBaseCopy<platform::CustomPlace>);
 
   m.def(
diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
index e00c22423eb28..a8bfd5a9917e7 100644
--- a/paddle/fluid/pybind/inference_api.cc
+++ b/paddle/fluid/pybind/inference_api.cc
@@ -637,7 +637,6 @@ void BindPaddlePlace(py::module *m) {
       .value("CPU", PaddlePlace::kCPU)
       .value("GPU", PaddlePlace::kGPU)
       .value("XPU", PaddlePlace::kXPU)
-      .value("NPU", PaddlePlace::kNPU)
       .value("CUSTOM", PaddlePlace::kCUSTOM);
 }
 
diff --git a/paddle/fluid/pybind/place.cc b/paddle/fluid/pybind/place.cc
index 59ed791c4522c..ae25b00c93465 100644
--- a/paddle/fluid/pybind/place.cc
+++ b/paddle/fluid/pybind/place.cc
@@ -189,7 +189,6 @@ PyTypeObject *g_customplace_pytype = nullptr;
 PyTypeObject *g_cudaplace_pytype = nullptr;
 PyTypeObject *g_cpuplace_pytype = nullptr;
 PyTypeObject *g_xpuplace_pytype = nullptr;
-PyTypeObject *g_npuplace_pytype = nullptr;
 PyTypeObject *g_cudapinnedplace_pytype = nullptr;
 PyTypeObject *g_ipuplace_pytype = nullptr;
 
@@ -366,7 +365,6 @@ void BindPlace(pybind11::module &m) {  // NOLINT
       .def("_equals", &IsSamePlace<platform::CUDAPlace, platform::CUDAPlace>)
       .def("_equals", &IsSamePlace<platform::CUDAPlace, platform::CPUPlace>)
       .def("_equals", &IsSamePlace<platform::CUDAPlace, platform::XPUPlace>)
-      .def("_equals", &IsSamePlace<platform::CUDAPlace, platform::NPUPlace>)
       .def("_equals",
            &IsSamePlace<platform::CUDAPlace, platform::CUDAPinnedPlace>)
       .def("_get_device_id",
@@ -495,7 +493,6 @@ void BindPlace(pybind11::module &m) {  // NOLINT
       .def("_type", &PlaceIndex<platform::CPUPlace>)
       .def("_equals", &IsSamePlace<platform::CPUPlace, platform::Place>)
       .def("_equals", &IsSamePlace<platform::CPUPlace, platform::XPUPlace>)
-      .def("_equals", &IsSamePlace<platform::CPUPlace, platform::NPUPlace>)
       .def("_equals", &IsSamePlace<platform::CPUPlace, platform::CUDAPlace>)
       .def("_equals", &IsSamePlace<platform::CPUPlace, platform::CPUPlace>)
       .def("_equals",
@@ -548,8 +545,6 @@ void BindPlace(pybind11::module &m) {  // NOLINT
            &IsSamePlace<platform::CUDAPinnedPlace, platform::CUDAPlace>)
       .def("_equals",
            &IsSamePlace<platform::CUDAPinnedPlace, platform::XPUPlace>)
-      .def("_equals",
-           &IsSamePlace<platform::CUDAPinnedPlace, platform::NPUPlace>)
       .def("_equals",
            &IsSamePlace<platform::CUDAPinnedPlace, platform::CPUPlace>)
       .def("_equals",
@@ -557,30 +552,6 @@ void BindPlace(pybind11::module &m) {  // NOLINT
       .def("__repr__", string::to_string<const platform::CUDAPinnedPlace &>)
       .def("__str__", string::to_string<const platform::CUDAPinnedPlace &>);
 
-  // NPUPlace
-  py::class_<platform::NPUPlace> npuplace(m, "NPUPlace", R"DOC(
-    NPUPlace is a descriptor of a device.
-    It represents a NPU device on which a tensor will be allocated and a model will run.
-    Examples:
-        .. code-block:: python
-          # required: npu
-          import paddle
-          place = paddle.NPUPlace(0)
-        )DOC");
-  g_npuplace_pytype = reinterpret_cast<PyTypeObject *>(npuplace.ptr());
-  npuplace.def("__init__", [](platform::NPUPlace &self, int dev_id) {})
-      .def("_type", &PlaceIndex<platform::NPUPlace>)
-      .def("_equals", &IsSamePlace<platform::NPUPlace, platform::Place>)
-      .def("_equals", &IsSamePlace<platform::NPUPlace, platform::CUDAPlace>)
-      .def("_equals", &IsSamePlace<platform::NPUPlace, platform::CPUPlace>)
-      .def("_equals", &IsSamePlace<platform::NPUPlace, platform::XPUPlace>)
-      .def("_equals", &IsSamePlace<platform::NPUPlace, platform::NPUPlace>)
-      .def("_equals",
-           &IsSamePlace<platform::NPUPlace, platform::CUDAPinnedPlace>)
-      .def("get_device_id",
-           [](const platform::NPUPlace &self) { return self.GetDeviceId(); })
-      .def("__str__", string::to_string<const platform::NPUPlace &>);
-
   // IPUPlace
   py::class_<platform::IPUPlace> ipuplace(m, "IPUPlace", R"DOC(
     IPUPlace is a descriptor of a device.
@@ -625,7 +596,6 @@ void BindPlace(pybind11::module &m) {  // NOLINT
       .def("_equals", &IsSamePlace<platform::IPUPlace, platform::CUDAPlace>)
       .def("_equals", &IsSamePlace<platform::IPUPlace, platform::CPUPlace>)
       .def("_equals", &IsSamePlace<platform::IPUPlace, platform::XPUPlace>)
-      .def("_equals", &IsSamePlace<platform::IPUPlace, platform::NPUPlace>)
       .def("_equals", &IsSamePlace<platform::IPUPlace, platform::IPUPlace>)
       .def("_equals",
            &IsSamePlace<platform::IPUPlace, platform::CUDAPinnedPlace>)
@@ -639,7 +609,6 @@ void BindPlace(pybind11::module &m) {  // NOLINT
       .def("_equals", &IsSamePlace<platform::Place, platform::CUDAPlace>)
       .def("_equals", &IsSamePlace<platform::Place, platform::CPUPlace>)
       .def("_equals", &IsSamePlace<platform::Place, platform::XPUPlace>)
-      .def("_equals", &IsSamePlace<platform::Place, platform::NPUPlace>)
       .def("_equals", &IsSamePlace<platform::Place, platform::IPUPlace>)
       .def("_equals", &IsSamePlace<platform::Place, platform::CUDAPinnedPlace>)
       .def("_equals", &IsSamePlace<platform::Place, platform::CustomPlace>)
@@ -685,10 +654,6 @@ void BindPlace(pybind11::module &m) {  // NOLINT
               const platform::CUDAPinnedPlace &cuda_pinned_place) {
              self = cuda_pinned_place;
            })
-      .def("set_place",
-           [](platform::Place &self, const platform::NPUPlace &npu_place) {
-             self = npu_place;
-           })
       .def("set_place",
            [](platform::Place &self, const platform::IPUPlace &ipu_place) {
              self = ipu_place;
diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc
index 799e48b1594de..bdd2a50d96e01 100644
--- a/paddle/fluid/pybind/tensor.cc
+++ b/paddle/fluid/pybind/tensor.cc
@@ -245,10 +245,6 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            [](phi::DenseTensor &self, paddle::platform::CPUPlace &place) {
              self.mutable_data<float>(place);
            })
-      .def("_alloc_float",
-           [](phi::DenseTensor &self, paddle::platform::NPUPlace &place) {
-             self.mutable_data<float>(place);
-           })
       .def("_alloc_double",
            [](phi::DenseTensor &self, paddle::platform::CPUPlace &place) {
              self.mutable_data<double>(place);
@@ -315,13 +311,6 @@ void BindTensor(pybind11::module &m) {  // NOLINT
                  self.mutable_data(place, framework::TransToPhiDataType(type)));
            })
       .def("_clear", &phi::DenseTensor::clear)
-      .def("_mutable_data",
-           [](phi::DenseTensor &self,
-              paddle::platform::NPUPlace &place,
-              paddle::framework::proto::VarType::Type type) {
-             return reinterpret_cast<uintptr_t>(
-                 self.mutable_data(place, framework::TransToPhiDataType(type)));
-           })
       .def("_copy_from",
            &TensorCopyFrom<paddle::platform::CPUPlace>,
            py::arg("tensor"),
@@ -342,11 +331,6 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            py::arg("tensor"),
            py::arg("place"),
            py::arg("batch_size") = -1)
-      .def("_copy_from",
-           &TensorCopyFrom<paddle::platform::NPUPlace>,
-           py::arg("tensor"),
-           py::arg("place"),
-           py::arg("batch_size") = -1)
       .def("_copy_from",
            &TensorCopyFrom<paddle::platform::CUDAPinnedPlace>,
            py::arg("tensor"),
@@ -382,11 +366,6 @@ void BindTensor(pybind11::module &m) {  // NOLINT
            py::arg("array"),
            py::arg("place"),
            py::arg("zero_copy") = false)
-      .def("set",
-           SetTensorFromPyArray<paddle::platform::NPUPlace>,
-           py::arg("array"),
-           py::arg("place"),
-           py::arg("zero_copy") = false)
       .def("set",
            SetTensorFromPyArray<paddle::platform::IPUPlace>,
            py::arg("array"),
@@ -402,7 +381,7 @@ void BindTensor(pybind11::module &m) {  // NOLINT
 
         Args:
           lod (numpy.ndarray): The data to set.
-          place (CPUPlace|CUDAPlace|XPUPlace|IPUPlace|CUDAPinnedPlace|NPUPlace): The place where the
+          place (CPUPlace|CUDAPlace|XPUPlace|IPUPlace|CUDAPinnedPlace): The place where the
           Tensor is to be set.
           zero_copy (bool, optional): Whether to share memory with the input numpy array.
           This parameter only works with CPUPlace. Default: False.
diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h
index 24bcc63dbd278..603d12c4f7750 100644
--- a/paddle/phi/api/include/tensor.h
+++ b/paddle/phi/api/include/tensor.h
@@ -629,7 +629,7 @@ class PADDLE_API Tensor final {
    * unified to Tensor, but Tensor itself is heterogeneous.
    *
    * Tensor can generally be represented by void* and size_t, place.
-   * This is suitable for most scenarios including CPU, GPU, HIP, NPU, etc.,
+   * This is suitable for most scenarios including CPU, GPU, HIP, etc.,
    * but there are a few cases where this definition cannot be described,
    * such as the Tensor representation in third-party lib such as Metal,
    * OpenCL, etc., as well as some special Tensor implementations, including
diff --git a/paddle/phi/api/profiler/device_tracer.cc b/paddle/phi/api/profiler/device_tracer.cc
index 7bd09ff8413b0..ae026683f1509 100644
--- a/paddle/phi/api/profiler/device_tracer.cc
+++ b/paddle/phi/api/profiler/device_tracer.cc
@@ -696,8 +696,6 @@ class DeviceTracerImpl : public DeviceTracer {
           event->set_device_id(r.place.GetDeviceId());
         } else if (r.place.GetType() == phi::AllocationType::GPUPINNED) {
           event->set_place(proto::MemEvent::CUDAPinnedPlace);
-        } else if (r.place.GetType() == phi::AllocationType::NPU) {
-          event->set_place(proto::MemEvent::NPUPlace);
         } else {
           PADDLE_THROW(
               errors::Unimplemented("The current place is not supported."));
diff --git a/paddle/phi/common/backend.h b/paddle/phi/common/backend.h
index b7f30797ca78e..dd76eccca7af2 100644
--- a/paddle/phi/common/backend.h
+++ b/paddle/phi/common/backend.h
@@ -91,9 +91,6 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
     case Backend::XPU:
       os << "XPU";
       break;
-    case Backend::NPU:
-      os << "NPU";
-      break;
     case Backend::ONEDNN:
       os << "ONEDNN";
       break;
@@ -137,8 +134,6 @@ inline Backend StringToBackend(const char* backend_cstr) {
     return Backend::GPU;
   } else if (s == std::string("XPU")) {
     return Backend::XPU;
-  } else if (s == std::string("NPU")) {
-    return Backend::NPU;
   } else if (s == std::string("OneDNN")) {
     return Backend::ONEDNN;
   } else if (s == std::string("GPUDNN")) {
@@ -173,8 +168,6 @@ inline std::string BackendToString(const Backend& backend) {
       return "GPU";
     case Backend::XPU:
       return "XPU";
-    case Backend::NPU:
-      return "NPU";
     case Backend::ONEDNN:
       return "ONEDNN";
     case Backend::GPUDNN:
diff --git a/paddle/phi/common/place.cc b/paddle/phi/common/place.cc
index bbe30cacfcf3f..d97f3f4fd7598 100644
--- a/paddle/phi/common/place.cc
+++ b/paddle/phi/common/place.cc
@@ -35,8 +35,6 @@ const char *AllocationTypeStr(AllocationType type) {
       return "gpu_pinned";
     case AllocationType::XPU:
       return "xpu";
-    case AllocationType::NPU:
-      return "npu";
     case AllocationType::NPUPINNED:
       return "npu_pinned";
     case AllocationType::IPU:
@@ -76,8 +74,6 @@ Place GetPinnedPlace(const Place &place) {
     case AllocationType::GPU:
       return phi::GPUPinnedPlace();
       break;
-    case AllocationType::NPU:
-      return phi::NPUPinnedPlace();
     default:
       return place;
   }
diff --git a/paddle/phi/common/place.h b/paddle/phi/common/place.h
index 543a79977eb7e..fcd9634a1892d 100644
--- a/paddle/phi/common/place.h
+++ b/paddle/phi/common/place.h
@@ -163,16 +163,6 @@ class XPUPlace : public Place {
       : Place(AllocationType::XPU, place.GetDeviceId()) {}
 };
 
-class NPUPlace : public Place {
- public:
-  NPUPlace() : Place(AllocationType::NPU, 0) {}
-  explicit NPUPlace(int device_id) : Place(AllocationType::NPU, device_id) {}
-
-  NPUPlace(const NPUPlace&) = default;
-  NPUPlace(const Place& place)  // NOLINT
-      : Place(AllocationType::NPU, place.GetDeviceId()) {}
-};
-
 class NPUPinnedPlace : public Place {
  public:
   NPUPinnedPlace() : Place(AllocationType::NPUPINNED) {}
@@ -220,7 +210,6 @@ namespace experimental {
 using AllocationType = phi::AllocationType;
 using GPUPinnedPlace = phi::GPUPinnedPlace;
 using XPUPlace = phi::XPUPlace;
-using NPUPlace = phi::NPUPlace;
 }  // namespace experimental
 
 using AllocationType = phi::AllocationType;
diff --git a/paddle/phi/core/compat/convert_utils.cc b/paddle/phi/core/compat/convert_utils.cc
index d41047a32166a..947c7fb45c5fc 100644
--- a/paddle/phi/core/compat/convert_utils.cc
+++ b/paddle/phi/core/compat/convert_utils.cc
@@ -37,8 +37,6 @@ Backend TransToPhiBackend(const phi::Place& place) {
       return Backend::GPU;
     case AllocationType::XPU:
       return Backend::XPU;
-    case AllocationType::NPU:
-      return Backend::NPU;
     case AllocationType::IPU:
       return Backend::IPU;
     case AllocationType::CUSTOM:
diff --git a/paddle/phi/kernels/funcs/math_function.cc b/paddle/phi/kernels/funcs/math_function.cc
index 6e54718afb542..d13ce8a2a90b5 100644
--- a/paddle/phi/kernels/funcs/math_function.cc
+++ b/paddle/phi/kernels/funcs/math_function.cc
@@ -161,13 +161,6 @@ void set_constant_with_place<phi::XPUPlace>(const phi::DeviceContext& context,
 #endif
 }
 
-template <>
-void set_constant_with_place<phi::NPUPlace>(const phi::DeviceContext& context,
-                                            phi::DenseTensor* tensor,
-                                            float value) {
-  PADDLE_THROW(phi::errors::Unimplemented("NPUPlace is not supported"));
-}
-
 template <>
 void set_constant_with_place<phi::NPUPinnedPlace>(
     const phi::DeviceContext& context, phi::DenseTensor* tensor, float value) {
diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py
index d1eaf0dbd13ae..2cb0af3530eab 100644
--- a/python/paddle/amp/auto_cast.py
+++ b/python/paddle/amp/auto_cast.py
@@ -348,7 +348,7 @@ def amp_guard(
         or tracer._expected_place.is_custom_place()
     ):
         warnings.warn(
-            'amp_guard can only be enabled on CUDAPlace, XPUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
+            'amp_guard can only be enabled on CUDAPlace, XPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
             % tracer._expected_place
         )
         enable = False
diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py
index 3c6338e345ddb..66456d48253f8 100755
--- a/python/paddle/framework/__init__.py
+++ b/python/paddle/framework/__init__.py
@@ -24,7 +24,6 @@
 from ..fluid.core import IPUPlace  # noqa: F401
 from ..fluid.core import CUDAPlace  # noqa: F401
 from ..fluid.core import CUDAPinnedPlace  # noqa: F401
-from ..fluid.core import NPUPlace  # noqa: F401
 from ..fluid.core import CustomPlace  # noqa: F401
 
 from ..fluid import core  # noqa: F401
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index db1006b0b1af0..6b384af34ba6c 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -2209,7 +2209,7 @@ def _memcpy(input, place=None, output=None):
     """
 
     The OP copies the :attr:`input` to the :attr:`output`.
-    NOTE: currently, only support CUDAPlace <-> CUDAPinnedPlace or NPUPlace <-> CPUPlace.
+    NOTE: currently, only support CUDAPlace <-> CUDAPinnedPlace.
 
     Parameters:
         input (Tensor): A tensor. Its data type supports float16, float32, float64, int32, int64, and bool.
diff --git a/tools/timeline.py b/tools/timeline.py
index 814d51d5f3943..0ef4a886f4623 100644
--- a/tools/timeline.py
+++ b/tools/timeline.py
@@ -194,14 +194,6 @@ def _allocate_pids(self):
                             % (k, mevent.device_id),
                             pid,
                         )
-                elif mevent.place == profiler_pb2.MemEvent.NPUPlace:
-                    if (k, mevent.device_id, "NPU") not in self._mem_devices:
-                        pid = self._allocate_pid()
-                        self._mem_devices[(k, mevent.device_id, "NPU")] = pid
-                        self._chrome_trace.emit_pid(
-                            "memory usage on %s:npu:%d" % (k, mevent.device_id),
-                            pid,
-                        )
                 if (k, 0, "CPU") not in self._mem_devices:
                     pid = self._allocate_pid()
                     self._mem_devices[(k, 0, "CPU")] = pid
@@ -259,7 +251,6 @@ def _allocate_memory_event(self):
             profiler_pb2.MemEvent.CPUPlace: "CPU",
             profiler_pb2.MemEvent.CUDAPlace: "GPU",
             profiler_pb2.MemEvent.CUDAPinnedPlace: "CUDAPinnedPlace",
-            profiler_pb2.MemEvent.NPUPlace: "NPU",
         }
         for k, profile_pb in self._profile_dict.items():
             mem_list = []