PaddlePaddle · chenwhql · Oct 26, 2022 · Oct 12, 2022 · Oct 12, 2022 · Oct 13, 2022
diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu
@@ -153,7 +153,7 @@ TEST(Operator, CPUtoGPU) {
   gpu_op->Run(scope, cuda_place);
   VLOG(3) << "after gpu_op run";
 
-  // auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
+  // auto* output2_ptr = output2->Get<phi::DenseTensor>().data<float>();
   paddle::platform::DeviceContextPool& pool =
       paddle::platform::DeviceContextPool::Instance();
   auto dev_ctx = pool.Get(cuda_place);

diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc
@@ -188,7 +188,7 @@ void DataFeed::AddFeedVar(Variable* var, const std::string& name) {
       if (var == nullptr) {
         feed_vec_[i] = nullptr;
       } else {
-        feed_vec_[i] = var->GetMutable<LoDTensor>();
+        feed_vec_[i] = var->GetMutable<phi::DenseTensor>();
       }
     }
   }
@@ -257,7 +257,7 @@ void DataFeed::CheckStart() {
 void DataFeed::AssignFeedVar(const Scope& scope) {
   CheckInit();
   for (size_t i = 0; i < use_slots_.size(); ++i) {
-    feed_vec_[i] = scope.FindVar(use_slots_[i])->GetMutable<LoDTensor>();
+    feed_vec_[i] = scope.FindVar(use_slots_[i])->GetMutable<phi::DenseTensor>();
   }
 }
 
@@ -1889,7 +1889,8 @@ void PaddleBoxDataFeed::AssignFeedVar(const Scope& scope) {
   // set rank offset memory
   int phase = GetCurrentPhase();  // join: 1, update: 0
   if (enable_pv_merge_ && phase == 1) {
-    rank_offset_ = scope.FindVar(rank_offset_name_)->GetMutable<LoDTensor>();
+    rank_offset_ =
+        scope.FindVar(rank_offset_name_)->GetMutable<phi::DenseTensor>();
   }
 }
 
@@ -2478,7 +2479,7 @@ void SlotRecordInMemoryDataFeed::AssignFeedVar(const Scope& scope) {
   CheckInit();
   for (int i = 0; i < use_slot_size_; ++i) {
     feed_vec_[i] =
-        scope.FindVar(used_slots_info_[i].slot)->GetMutable<LoDTensor>();
+        scope.FindVar(used_slots_info_[i].slot)->GetMutable<phi::DenseTensor>();
   }
 }
 
@@ -2717,8 +2718,8 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) {
   // alloc gpu memory
   pack_->resize_tensor();
 
-  LoDTensor& float_tensor = pack_->float_tensor();
-  LoDTensor& uint64_tensor = pack_->uint64_tensor();
+  phi::DenseTensor& float_tensor = pack_->float_tensor();
+  phi::DenseTensor& uint64_tensor = pack_->uint64_tensor();
 
   int64_t float_offset = 0;
   int64_t uint64_offset = 0;

diff --git a/paddle/fluid/framework/data_feed.cu b/paddle/fluid/framework/data_feed.cu
@@ -219,7 +219,7 @@ int GraphDataGenerator::AcquireInstance(BufState *state) {
   return 0;
 }
 
-// TODO opt
+// TODO(fengdanlei): opt
 __global__ void GraphFillFeatureKernel(uint64_t *id_tensor,
                                        int *fill_ins_num,
                                        uint64_t *walk,
@@ -319,7 +319,7 @@ __global__ void GraphFillSlotKernel(uint64_t *id_tensor,
   CUDA_KERNEL_LOOP(idx, len) {
     int slot_idx = idx / total_ins;
     int ins_idx = idx % total_ins;
-    ((uint64_t *)(id_tensor[slot_idx]))[ins_idx] =
+    ((uint64_t *)(id_tensor[slot_idx]))[ins_idx] =  // NOLINT
         feature_buf[ins_idx * slot_num + slot_idx];
   }
 }
@@ -330,7 +330,7 @@ __global__ void GraphFillSlotLodKernelOpt(uint64_t *id_tensor,
   CUDA_KERNEL_LOOP(idx, len) {
     int slot_idx = idx / total_ins;
     int ins_idx = idx % total_ins;
-    ((uint64_t *)(id_tensor[slot_idx]))[ins_idx] = ins_idx;
+    ((uint64_t *)(id_tensor[slot_idx]))[ins_idx] = ins_idx;  // NOLINT
   }
 }
 
@@ -365,12 +365,12 @@ int GraphDataGenerator::FillInsBuf() {
       FillFeatureBuf(d_walk_, d_feature_);
       if (debug_mode_) {
         int len = buf_size_ > 5000 ? 5000 : buf_size_;
-        uint64_t h_walk[len];
+        uint64_t h_walk[len];  // NOLINT
         cudaMemcpy(h_walk,
                    d_walk_->ptr(),
                    len * sizeof(uint64_t),
                    cudaMemcpyDeviceToHost);
-        uint64_t h_feature[len * slot_num_];
+        uint64_t h_feature[len * slot_num_];  // NOLINT
         cudaMemcpy(h_feature,
                    d_feature_->ptr(),
                    len * slot_num_ * sizeof(uint64_t),
@@ -431,7 +431,7 @@ int GraphDataGenerator::FillInsBuf() {
   ins_buf_pair_len_ += h_pair_num;
 
   if (debug_mode_) {
-    uint64_t h_ins_buf[ins_buf_pair_len_ * 2];
+    uint64_t h_ins_buf[ins_buf_pair_len_ * 2];  // NOLINT
     cudaMemcpy(h_ins_buf,
                ins_buf,
                2 * ins_buf_pair_len_ * sizeof(uint64_t),
@@ -446,7 +446,7 @@ int GraphDataGenerator::FillInsBuf() {
     if (!FLAGS_enable_opt_get_features && slot_num_ > 0) {
       uint64_t *feature_buf =
           reinterpret_cast<uint64_t *>(d_feature_buf_->ptr());
-      uint64_t h_feature_buf[(batch_size_ * 2 * 2) * slot_num_];
+      uint64_t h_feature_buf[(batch_size_ * 2 * 2) * slot_num_];  // NOLINT
       cudaMemcpy(h_feature_buf,
                  feature_buf,
                  (batch_size_ * 2 * 2) * slot_num_ * sizeof(uint64_t),
@@ -574,7 +574,7 @@ int GraphDataGenerator::GenerateBatch() {
                          0,
                          stream_>>>(clk_tensor_ptr_, total_instance);
   } else {
-    ins_cursor = (uint64_t *)id_tensor_ptr_;
+    ins_cursor = (uint64_t *)id_tensor_ptr_;  // NOLINT
   }
 
   if (slot_num_ > 0) {
@@ -583,12 +583,12 @@ int GraphDataGenerator::GenerateBatch() {
       FillFeatureBuf(ins_cursor, feature_buf, total_instance);
       // FillFeatureBuf(id_tensor_ptr_, feature_buf, total_instance);
       if (debug_mode_) {
-        uint64_t h_walk[total_instance];
+        uint64_t h_walk[total_instance];  // NOLINT
         cudaMemcpy(h_walk,
                    ins_cursor,
                    total_instance * sizeof(uint64_t),
                    cudaMemcpyDeviceToHost);
-        uint64_t h_feature[total_instance * slot_num_];
+        uint64_t h_feature[total_instance * slot_num_];  // NOLINT
         cudaMemcpy(h_feature,
                    feature_buf,
                    total_instance * slot_num_ * sizeof(uint64_t),
@@ -608,16 +608,17 @@ int GraphDataGenerator::GenerateBatch() {
       GraphFillSlotKernel<<<GET_BLOCKS(total_instance * slot_num_),
                             CUDA_NUM_THREADS,
                             0,
-                            stream_>>>((uint64_t *)d_slot_tensor_ptr_->ptr(),
-                                       feature_buf,
-                                       total_instance * slot_num_,
-                                       total_instance,
-                                       slot_num_);
+                            stream_>>>(
+          (uint64_t *)d_slot_tensor_ptr_->ptr(),  // NOLINT
+          feature_buf,
+          total_instance * slot_num_,
+          total_instance,
+          slot_num_);
       GraphFillSlotLodKernelOpt<<<GET_BLOCKS((total_instance + 1) * slot_num_),
                                   CUDA_NUM_THREADS,
                                   0,
                                   stream_>>>(
-          (uint64_t *)d_slot_lod_tensor_ptr_->ptr(),
+          (uint64_t *)d_slot_lod_tensor_ptr_->ptr(),  // NOLINT
           (total_instance + 1) * slot_num_,
           total_instance + 1);
     } else {
@@ -828,7 +829,7 @@ void GraphDataGenerator::FillOneStep(uint64_t *d_start_ids,
     int *h_prefix_sum = new int[len + 1];
     int *h_actual_size = new int[len];
     int *h_offset2idx = new int[once_max_sample_keynum];
-    uint64_t h_sample_keys[once_max_sample_keynum];
+    uint64_t h_sample_keys[once_max_sample_keynum];  // NOLINT
     cudaMemcpy(h_offset2idx,
                d_tmp_sampleidx2row,
                once_max_sample_keynum * sizeof(int),
@@ -870,11 +871,12 @@ int GraphDataGenerator::FillFeatureBuf(
   platform::CUDADeviceGuard guard(gpuid_);
 
   auto gpu_graph_ptr = GraphGpuWrapper::GetInstance();
-  int ret = gpu_graph_ptr->get_feature_of_nodes(gpuid_,
-                                                (uint64_t *)d_walk->ptr(),
-                                                (uint64_t *)d_feature->ptr(),
-                                                buf_size_,
-                                                slot_num_);
+  int ret = gpu_graph_ptr->get_feature_of_nodes(
+      gpuid_,
+      (uint64_t *)d_walk->ptr(),     // NOLINT
+      (uint64_t *)d_feature->ptr(),  // NOLINT
+      buf_size_,
+      slot_num_);
   return ret;
 }
 
@@ -1044,8 +1046,9 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
   return total_row != 0;
 }
 
-void GraphDataGenerator::AllocResource(const paddle::platform::Place &place,
-                                       std::vector<LoDTensor *> feed_vec) {
+void GraphDataGenerator::AllocResource(
+    const paddle::platform::Place &place,
+    std::vector<phi::DenseTensor *> feed_vec) {
   place_ = place;
   gpuid_ = place_.GetDeviceId();
   VLOG(3) << "gpuid " << gpuid_;
@@ -1195,7 +1198,7 @@ void GraphDataGenerator::SetConfig(
       meta_path_[i].push_back(iter->second);
     }
   }
-};
+}
 
 }  // namespace framework
 }  // namespace paddle

diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h
@@ -402,7 +402,7 @@ class CustomParser {
                             const char* str,
                             std::vector<Record>* instances) {
     return 0;
-  };
+  }
   virtual bool ParseOneInstance(
       const std::string& line,
       std::function<void(std::vector<SlotRecord>&, int)>
@@ -562,8 +562,8 @@ class MiniBatchGpuPack {
       }
     }
   }
-  LoDTensor& float_tensor(void) { return float_tensor_; }
-  LoDTensor& uint64_tensor(void) { return uint64_tensor_; }
+  phi::DenseTensor& float_tensor(void) { return float_tensor_; }
+  phi::DenseTensor& uint64_tensor(void) { return uint64_tensor_; }
 
   HostBuffer<size_t>& offsets(void) { return offsets_; }
   HostBuffer<void*>& h_tensor_ptrs(void) { return h_tensor_ptrs_; }
@@ -628,9 +628,9 @@ class MiniBatchGpuPack {
   const SlotRecord* batch_ins_ = nullptr;
 
   // uint64 tensor
-  LoDTensor uint64_tensor_;
+  phi::DenseTensor uint64_tensor_;
   // float tensor
-  LoDTensor float_tensor_;
+  phi::DenseTensor float_tensor_;
   // batch
   HostBuffer<size_t> offsets_;
   HostBuffer<void*> h_tensor_ptrs_;
@@ -892,11 +892,11 @@ struct BufState {
 
 class GraphDataGenerator {
  public:
-  GraphDataGenerator(){};
-  virtual ~GraphDataGenerator(){};
+  GraphDataGenerator() {}
+  virtual ~GraphDataGenerator() {}
   void SetConfig(const paddle::framework::DataFeedDesc& data_feed_desc);
   void AllocResource(const paddle::platform::Place& place,
-                     std::vector<LoDTensor*> feed_vec);
+                     std::vector<phi::DenseTensor*> feed_vec);
   int AcquireInstance(BufState* state);
   int GenerateBatch();
   int FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk);
@@ -906,7 +906,7 @@ class GraphDataGenerator {
   void FillOneStep(uint64_t* start_ids,
                    uint64_t* walk,
                    int len,
-                   NeighborSampleResult& sample_res,
+                   NeighborSampleResult& sample_res,  // NOLINT
                    int cur_degree,
                    int step,
                    int* len_per_row);
@@ -935,7 +935,7 @@ class GraphDataGenerator {
   int64_t* clk_tensor_ptr_;
   cudaStream_t stream_;
   paddle::platform::Place place_;
-  std::vector<LoDTensor*> feed_vec_;
+  std::vector<phi::DenseTensor*> feed_vec_;
   std::vector<size_t> offset_;
   std::shared_ptr<phi::Allocation> d_prefix_sum_;
   std::vector<std::shared_ptr<phi::Allocation>> d_device_keys_;
@@ -1106,9 +1106,9 @@ class DataFeed {
       use_slots_index_;  // -1: not used; >=0: the index of use_slots_
 
   // The data read by DataFeed will be stored here
-  std::vector<LoDTensor*> feed_vec_;
+  std::vector<phi::DenseTensor*> feed_vec_;
 
-  LoDTensor* rank_offset_;
+  phi::DenseTensor* rank_offset_;
 
   // the batch size defined by user
   int default_batch_size_;
@@ -1599,7 +1599,7 @@ class MultiSlotInMemoryDataFeed : public InMemoryDataFeed<Record> {
   virtual bool ParseOneInstanceFromPipe(Record* instance);
   virtual void ParseOneInstanceFromSo(const char* str,
                                       Record* instance,
-                                      CustomParser* parser){};
+                                      CustomParser* parser) {}
   virtual int ParseInstanceFromSo(int len,
                                   const char* str,
                                   std::vector<Record>* instances,

diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc
@@ -122,9 +122,9 @@ void TransformData(const OpKernelType &expected_kernel_type,
 void SetTensorToVariable(const Variable &in_var,
                          const phi::DenseTensor &tensor,
                          Variable *out_var) {
-  if (in_var.IsType<LoDTensor>()) {
-    auto &in_lod_tensor = in_var.Get<LoDTensor>();
-    auto *tran_lod_tensor = out_var->GetMutable<LoDTensor>();
+  if (in_var.IsType<phi::DenseTensor>()) {
+    auto &in_lod_tensor = in_var.Get<phi::DenseTensor>();
+    auto *tran_lod_tensor = out_var->GetMutable<phi::DenseTensor>();
     tran_lod_tensor->set_lod(in_lod_tensor.lod());
     tran_lod_tensor->set_layout(in_lod_tensor.layout());
 #ifdef PADDLE_WITH_MKLDNN
@@ -139,7 +139,8 @@ void SetTensorToVariable(const Variable &in_var,
     trans_selected_rows->mutable_value()->ShareDataWith(tensor);
   } else {
     PADDLE_THROW(platform::errors::Unavailable(
-        "Unsupported variable type, only supports LoDTensor or SelectedRows, "
+        "Unsupported variable type, only supports phi::DenseTensor or "
+        "SelectedRows, "
         "but the input variable type is %s.",
         ToTypeName(in_var.Type())));
   }

diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@@ -131,7 +131,7 @@ void AllReduceOpHandle::AllReduceImpl(
         var,
         platform::errors::NotFound("Variable %s is not found in local scope.",
                                    in_var_handles[i]->name()));
-    auto &lod_tensor = var->Get<LoDTensor>();
+    auto &lod_tensor = var->Get<phi::DenseTensor>();
 
     if (i == 0) {
       numel = static_cast<int64_t>(lod_tensor.numel());
@@ -246,7 +246,7 @@ void AllReduceOpHandle::AllReduceFunc(
   } else {  // Special handle CPU only Operator's gradient. Like CRF
     auto &trg = *local_exec_scopes_[0]
                      ->FindVar(out_var_names[0])
-                     ->GetMutable<LoDTensor>();
+                     ->GetMutable<phi::DenseTensor>();
 
     // Reduce All phi::DenseTensor to trg in CPU
     ReduceBufferData func(lod_tensor_data, trg.data(), numel);

diff --git a/paddle/fluid/framework/details/async_ssa_graph_executor.cc b/paddle/fluid/framework/details/async_ssa_graph_executor.cc
@@ -177,17 +177,18 @@ FetchResultType AsyncSSAGraphExecutor::Run(
   auto &val = PADDLE_GET(FetchList, fetch_data);
   for (size_t fetch_idx = 0; fetch_idx < fetch_tensors.size(); ++fetch_idx) {
     if (data_is_lod_tensor(val.at(fetch_idx))) {
-      std::vector<const LoDTensor *> lodtensor_ptrs;
-      lodtensor_ptrs.push_back(&(PADDLE_GET(LoDTensor, val.at(fetch_idx))));
-      LoDTensor var;
+      std::vector<const phi::DenseTensor *> lodtensor_ptrs;
+      lodtensor_ptrs.push_back(
+          &(PADDLE_GET(phi::DenseTensor, val.at(fetch_idx))));
+      phi::DenseTensor var;
       MergeLoDTensor(&var, lodtensor_ptrs, platform::CPUPlace());
       ret.emplace_back(var);
     } else {
       auto array = PADDLE_GET(LoDTensorArray, val.at(fetch_idx));
       LoDTensorArray item_array;
       item_array.reserve(array.size());
       for (size_t i = 0; i < array.size(); ++i) {
-        std::vector<const LoDTensor *> lodtensor_ptrs;
+        std::vector<const phi::DenseTensor *> lodtensor_ptrs;
         lodtensor_ptrs.push_back(&array[i]);
         item_array.emplace_back();
         MergeLoDTensor(

diff --git a/paddle/fluid/framework/details/eager_deletion_op_handle.cc b/paddle/fluid/framework/details/eager_deletion_op_handle.cc
@@ -150,8 +150,9 @@ void EagerDeletionOpHandle::RunImpl() {
 
     Variable *var = vars_[i];
 
-    if (var->IsType<LoDTensor>()) {
-      garbages.emplace_back(var->GetMutable<LoDTensor>()->MoveMemoryHolder());
+    if (var->IsType<phi::DenseTensor>()) {
+      garbages.emplace_back(
+          var->GetMutable<phi::DenseTensor>()->MoveMemoryHolder());
     } else if (var->IsType<phi::SelectedRows>()) {
       garbages.emplace_back(var->GetMutable<phi::SelectedRows>()
                                 ->mutable_value()