Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove the declaration of using LoDTensor in framework/lod_tensor.h (Part2) #46953

Merged
merged 10 commits into from
Oct 26, 2022
2 changes: 1 addition & 1 deletion paddle/fluid/framework/data_device_transform_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ TEST(Operator, CPUtoGPU) {
gpu_op->Run(scope, cuda_place);
VLOG(3) << "after gpu_op run";

// auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
// auto* output2_ptr = output2->Get<phi::DenseTensor>().data<float>();
paddle::platform::DeviceContextPool& pool =
paddle::platform::DeviceContextPool::Instance();
auto dev_ctx = pool.Get(cuda_place);
Expand Down
13 changes: 7 additions & 6 deletions paddle/fluid/framework/data_feed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ void DataFeed::AddFeedVar(Variable* var, const std::string& name) {
if (var == nullptr) {
feed_vec_[i] = nullptr;
} else {
feed_vec_[i] = var->GetMutable<LoDTensor>();
feed_vec_[i] = var->GetMutable<phi::DenseTensor>();
}
}
}
Expand Down Expand Up @@ -257,7 +257,7 @@ void DataFeed::CheckStart() {
void DataFeed::AssignFeedVar(const Scope& scope) {
CheckInit();
for (size_t i = 0; i < use_slots_.size(); ++i) {
feed_vec_[i] = scope.FindVar(use_slots_[i])->GetMutable<LoDTensor>();
feed_vec_[i] = scope.FindVar(use_slots_[i])->GetMutable<phi::DenseTensor>();
}
}

Expand Down Expand Up @@ -1889,7 +1889,8 @@ void PaddleBoxDataFeed::AssignFeedVar(const Scope& scope) {
// set rank offset memory
int phase = GetCurrentPhase(); // join: 1, update: 0
if (enable_pv_merge_ && phase == 1) {
rank_offset_ = scope.FindVar(rank_offset_name_)->GetMutable<LoDTensor>();
rank_offset_ =
scope.FindVar(rank_offset_name_)->GetMutable<phi::DenseTensor>();
}
}

Expand Down Expand Up @@ -2478,7 +2479,7 @@ void SlotRecordInMemoryDataFeed::AssignFeedVar(const Scope& scope) {
CheckInit();
for (int i = 0; i < use_slot_size_; ++i) {
feed_vec_[i] =
scope.FindVar(used_slots_info_[i].slot)->GetMutable<LoDTensor>();
scope.FindVar(used_slots_info_[i].slot)->GetMutable<phi::DenseTensor>();
}
}

Expand Down Expand Up @@ -2717,8 +2718,8 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) {
// alloc gpu memory
pack_->resize_tensor();

LoDTensor& float_tensor = pack_->float_tensor();
LoDTensor& uint64_tensor = pack_->uint64_tensor();
phi::DenseTensor& float_tensor = pack_->float_tensor();
phi::DenseTensor& uint64_tensor = pack_->uint64_tensor();

int64_t float_offset = 0;
int64_t uint64_offset = 0;
Expand Down
53 changes: 28 additions & 25 deletions paddle/fluid/framework/data_feed.cu
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ int GraphDataGenerator::AcquireInstance(BufState *state) {
return 0;
}

// TODO opt
// TODO(fengdanlei): opt
__global__ void GraphFillFeatureKernel(uint64_t *id_tensor,
int *fill_ins_num,
uint64_t *walk,
Expand Down Expand Up @@ -319,7 +319,7 @@ __global__ void GraphFillSlotKernel(uint64_t *id_tensor,
CUDA_KERNEL_LOOP(idx, len) {
int slot_idx = idx / total_ins;
int ins_idx = idx % total_ins;
((uint64_t *)(id_tensor[slot_idx]))[ins_idx] =
((uint64_t *)(id_tensor[slot_idx]))[ins_idx] = // NOLINT
feature_buf[ins_idx * slot_num + slot_idx];
}
}
Expand All @@ -330,7 +330,7 @@ __global__ void GraphFillSlotLodKernelOpt(uint64_t *id_tensor,
CUDA_KERNEL_LOOP(idx, len) {
int slot_idx = idx / total_ins;
int ins_idx = idx % total_ins;
((uint64_t *)(id_tensor[slot_idx]))[ins_idx] = ins_idx;
((uint64_t *)(id_tensor[slot_idx]))[ins_idx] = ins_idx; // NOLINT
}
}

Expand Down Expand Up @@ -365,12 +365,12 @@ int GraphDataGenerator::FillInsBuf() {
FillFeatureBuf(d_walk_, d_feature_);
if (debug_mode_) {
int len = buf_size_ > 5000 ? 5000 : buf_size_;
uint64_t h_walk[len];
uint64_t h_walk[len]; // NOLINT
cudaMemcpy(h_walk,
d_walk_->ptr(),
len * sizeof(uint64_t),
cudaMemcpyDeviceToHost);
uint64_t h_feature[len * slot_num_];
uint64_t h_feature[len * slot_num_]; // NOLINT
cudaMemcpy(h_feature,
d_feature_->ptr(),
len * slot_num_ * sizeof(uint64_t),
Expand Down Expand Up @@ -431,7 +431,7 @@ int GraphDataGenerator::FillInsBuf() {
ins_buf_pair_len_ += h_pair_num;

if (debug_mode_) {
uint64_t h_ins_buf[ins_buf_pair_len_ * 2];
uint64_t h_ins_buf[ins_buf_pair_len_ * 2]; // NOLINT
cudaMemcpy(h_ins_buf,
ins_buf,
2 * ins_buf_pair_len_ * sizeof(uint64_t),
Expand All @@ -446,7 +446,7 @@ int GraphDataGenerator::FillInsBuf() {
if (!FLAGS_enable_opt_get_features && slot_num_ > 0) {
uint64_t *feature_buf =
reinterpret_cast<uint64_t *>(d_feature_buf_->ptr());
uint64_t h_feature_buf[(batch_size_ * 2 * 2) * slot_num_];
uint64_t h_feature_buf[(batch_size_ * 2 * 2) * slot_num_]; // NOLINT
cudaMemcpy(h_feature_buf,
feature_buf,
(batch_size_ * 2 * 2) * slot_num_ * sizeof(uint64_t),
Expand Down Expand Up @@ -574,7 +574,7 @@ int GraphDataGenerator::GenerateBatch() {
0,
stream_>>>(clk_tensor_ptr_, total_instance);
} else {
ins_cursor = (uint64_t *)id_tensor_ptr_;
ins_cursor = (uint64_t *)id_tensor_ptr_; // NOLINT
}

if (slot_num_ > 0) {
Expand All @@ -583,12 +583,12 @@ int GraphDataGenerator::GenerateBatch() {
FillFeatureBuf(ins_cursor, feature_buf, total_instance);
// FillFeatureBuf(id_tensor_ptr_, feature_buf, total_instance);
if (debug_mode_) {
uint64_t h_walk[total_instance];
uint64_t h_walk[total_instance]; // NOLINT
cudaMemcpy(h_walk,
ins_cursor,
total_instance * sizeof(uint64_t),
cudaMemcpyDeviceToHost);
uint64_t h_feature[total_instance * slot_num_];
uint64_t h_feature[total_instance * slot_num_]; // NOLINT
cudaMemcpy(h_feature,
feature_buf,
total_instance * slot_num_ * sizeof(uint64_t),
Expand All @@ -608,16 +608,17 @@ int GraphDataGenerator::GenerateBatch() {
GraphFillSlotKernel<<<GET_BLOCKS(total_instance * slot_num_),
CUDA_NUM_THREADS,
0,
stream_>>>((uint64_t *)d_slot_tensor_ptr_->ptr(),
feature_buf,
total_instance * slot_num_,
total_instance,
slot_num_);
stream_>>>(
(uint64_t *)d_slot_tensor_ptr_->ptr(), // NOLINT
feature_buf,
total_instance * slot_num_,
total_instance,
slot_num_);
GraphFillSlotLodKernelOpt<<<GET_BLOCKS((total_instance + 1) * slot_num_),
CUDA_NUM_THREADS,
0,
stream_>>>(
(uint64_t *)d_slot_lod_tensor_ptr_->ptr(),
(uint64_t *)d_slot_lod_tensor_ptr_->ptr(), // NOLINT
(total_instance + 1) * slot_num_,
total_instance + 1);
} else {
Expand Down Expand Up @@ -828,7 +829,7 @@ void GraphDataGenerator::FillOneStep(uint64_t *d_start_ids,
int *h_prefix_sum = new int[len + 1];
int *h_actual_size = new int[len];
int *h_offset2idx = new int[once_max_sample_keynum];
uint64_t h_sample_keys[once_max_sample_keynum];
uint64_t h_sample_keys[once_max_sample_keynum]; // NOLINT
cudaMemcpy(h_offset2idx,
d_tmp_sampleidx2row,
once_max_sample_keynum * sizeof(int),
Expand Down Expand Up @@ -870,11 +871,12 @@ int GraphDataGenerator::FillFeatureBuf(
platform::CUDADeviceGuard guard(gpuid_);

auto gpu_graph_ptr = GraphGpuWrapper::GetInstance();
int ret = gpu_graph_ptr->get_feature_of_nodes(gpuid_,
(uint64_t *)d_walk->ptr(),
(uint64_t *)d_feature->ptr(),
buf_size_,
slot_num_);
int ret = gpu_graph_ptr->get_feature_of_nodes(
gpuid_,
(uint64_t *)d_walk->ptr(), // NOLINT
(uint64_t *)d_feature->ptr(), // NOLINT
buf_size_,
slot_num_);
return ret;
}

Expand Down Expand Up @@ -1044,8 +1046,9 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
return total_row != 0;
}

void GraphDataGenerator::AllocResource(const paddle::platform::Place &place,
std::vector<LoDTensor *> feed_vec) {
void GraphDataGenerator::AllocResource(
const paddle::platform::Place &place,
std::vector<phi::DenseTensor *> feed_vec) {
place_ = place;
gpuid_ = place_.GetDeviceId();
VLOG(3) << "gpuid " << gpuid_;
Expand Down Expand Up @@ -1195,7 +1198,7 @@ void GraphDataGenerator::SetConfig(
meta_path_[i].push_back(iter->second);
}
}
};
}

} // namespace framework
} // namespace paddle
Expand Down
26 changes: 13 additions & 13 deletions paddle/fluid/framework/data_feed.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ class CustomParser {
const char* str,
std::vector<Record>* instances) {
return 0;
};
}
virtual bool ParseOneInstance(
const std::string& line,
std::function<void(std::vector<SlotRecord>&, int)>
Expand Down Expand Up @@ -562,8 +562,8 @@ class MiniBatchGpuPack {
}
}
}
LoDTensor& float_tensor(void) { return float_tensor_; }
LoDTensor& uint64_tensor(void) { return uint64_tensor_; }
phi::DenseTensor& float_tensor(void) { return float_tensor_; }
phi::DenseTensor& uint64_tensor(void) { return uint64_tensor_; }

HostBuffer<size_t>& offsets(void) { return offsets_; }
HostBuffer<void*>& h_tensor_ptrs(void) { return h_tensor_ptrs_; }
Expand Down Expand Up @@ -628,9 +628,9 @@ class MiniBatchGpuPack {
const SlotRecord* batch_ins_ = nullptr;

// uint64 tensor
LoDTensor uint64_tensor_;
phi::DenseTensor uint64_tensor_;
// float tensor
LoDTensor float_tensor_;
phi::DenseTensor float_tensor_;
// batch
HostBuffer<size_t> offsets_;
HostBuffer<void*> h_tensor_ptrs_;
Expand Down Expand Up @@ -892,11 +892,11 @@ struct BufState {

class GraphDataGenerator {
public:
GraphDataGenerator(){};
virtual ~GraphDataGenerator(){};
GraphDataGenerator() {}
virtual ~GraphDataGenerator() {}
void SetConfig(const paddle::framework::DataFeedDesc& data_feed_desc);
void AllocResource(const paddle::platform::Place& place,
std::vector<LoDTensor*> feed_vec);
std::vector<phi::DenseTensor*> feed_vec);
int AcquireInstance(BufState* state);
int GenerateBatch();
int FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk);
Expand All @@ -906,7 +906,7 @@ class GraphDataGenerator {
void FillOneStep(uint64_t* start_ids,
uint64_t* walk,
int len,
NeighborSampleResult& sample_res,
NeighborSampleResult& sample_res, // NOLINT
int cur_degree,
int step,
int* len_per_row);
Expand Down Expand Up @@ -935,7 +935,7 @@ class GraphDataGenerator {
int64_t* clk_tensor_ptr_;
cudaStream_t stream_;
paddle::platform::Place place_;
std::vector<LoDTensor*> feed_vec_;
std::vector<phi::DenseTensor*> feed_vec_;
std::vector<size_t> offset_;
std::shared_ptr<phi::Allocation> d_prefix_sum_;
std::vector<std::shared_ptr<phi::Allocation>> d_device_keys_;
Expand Down Expand Up @@ -1106,9 +1106,9 @@ class DataFeed {
use_slots_index_; // -1: not used; >=0: the index of use_slots_

// The data read by DataFeed will be stored here
std::vector<LoDTensor*> feed_vec_;
std::vector<phi::DenseTensor*> feed_vec_;

LoDTensor* rank_offset_;
phi::DenseTensor* rank_offset_;

// the batch size defined by user
int default_batch_size_;
Expand Down Expand Up @@ -1599,7 +1599,7 @@ class MultiSlotInMemoryDataFeed : public InMemoryDataFeed<Record> {
virtual bool ParseOneInstanceFromPipe(Record* instance);
virtual void ParseOneInstanceFromSo(const char* str,
Record* instance,
CustomParser* parser){};
CustomParser* parser) {}
virtual int ParseInstanceFromSo(int len,
const char* str,
std::vector<Record>* instances,
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/framework/data_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ void TransformData(const OpKernelType &expected_kernel_type,
void SetTensorToVariable(const Variable &in_var,
const phi::DenseTensor &tensor,
Variable *out_var) {
if (in_var.IsType<LoDTensor>()) {
auto &in_lod_tensor = in_var.Get<LoDTensor>();
auto *tran_lod_tensor = out_var->GetMutable<LoDTensor>();
if (in_var.IsType<phi::DenseTensor>()) {
auto &in_lod_tensor = in_var.Get<phi::DenseTensor>();
auto *tran_lod_tensor = out_var->GetMutable<phi::DenseTensor>();
tran_lod_tensor->set_lod(in_lod_tensor.lod());
tran_lod_tensor->set_layout(in_lod_tensor.layout());
#ifdef PADDLE_WITH_MKLDNN
Expand All @@ -139,7 +139,8 @@ void SetTensorToVariable(const Variable &in_var,
trans_selected_rows->mutable_value()->ShareDataWith(tensor);
} else {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported variable type, only supports LoDTensor or SelectedRows, "
"Unsupported variable type, only supports phi::DenseTensor or "
"SelectedRows, "
"but the input variable type is %s.",
ToTypeName(in_var.Type())));
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/details/all_reduce_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ void AllReduceOpHandle::AllReduceImpl(
var,
platform::errors::NotFound("Variable %s is not found in local scope.",
in_var_handles[i]->name()));
auto &lod_tensor = var->Get<LoDTensor>();
auto &lod_tensor = var->Get<phi::DenseTensor>();

if (i == 0) {
numel = static_cast<int64_t>(lod_tensor.numel());
Expand Down Expand Up @@ -246,7 +246,7 @@ void AllReduceOpHandle::AllReduceFunc(
} else { // Special handle CPU only Operator's gradient. Like CRF
auto &trg = *local_exec_scopes_[0]
->FindVar(out_var_names[0])
->GetMutable<LoDTensor>();
->GetMutable<phi::DenseTensor>();

// Reduce All phi::DenseTensor to trg in CPU
ReduceBufferData func(lod_tensor_data, trg.data(), numel);
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/framework/details/async_ssa_graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,17 +177,18 @@ FetchResultType AsyncSSAGraphExecutor::Run(
auto &val = PADDLE_GET(FetchList, fetch_data);
for (size_t fetch_idx = 0; fetch_idx < fetch_tensors.size(); ++fetch_idx) {
if (data_is_lod_tensor(val.at(fetch_idx))) {
std::vector<const LoDTensor *> lodtensor_ptrs;
lodtensor_ptrs.push_back(&(PADDLE_GET(LoDTensor, val.at(fetch_idx))));
LoDTensor var;
std::vector<const phi::DenseTensor *> lodtensor_ptrs;
lodtensor_ptrs.push_back(
&(PADDLE_GET(phi::DenseTensor, val.at(fetch_idx))));
phi::DenseTensor var;
MergeLoDTensor(&var, lodtensor_ptrs, platform::CPUPlace());
ret.emplace_back(var);
} else {
auto array = PADDLE_GET(LoDTensorArray, val.at(fetch_idx));
LoDTensorArray item_array;
item_array.reserve(array.size());
for (size_t i = 0; i < array.size(); ++i) {
std::vector<const LoDTensor *> lodtensor_ptrs;
std::vector<const phi::DenseTensor *> lodtensor_ptrs;
lodtensor_ptrs.push_back(&array[i]);
item_array.emplace_back();
MergeLoDTensor(
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/framework/details/eager_deletion_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,9 @@ void EagerDeletionOpHandle::RunImpl() {

Variable *var = vars_[i];

if (var->IsType<LoDTensor>()) {
garbages.emplace_back(var->GetMutable<LoDTensor>()->MoveMemoryHolder());
if (var->IsType<phi::DenseTensor>()) {
garbages.emplace_back(
var->GetMutable<phi::DenseTensor>()->MoveMemoryHolder());
} else if (var->IsType<phi::SelectedRows>()) {
garbages.emplace_back(var->GetMutable<phi::SelectedRows>()
->mutable_value()
Expand Down
Loading