Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Eager] Supported Eager Dygraph Forward & AutoGrad functions #37323

Merged
merged 13 commits into from
Nov 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions paddle/fluid/eager/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
add_subdirectory(accumulation)
add_subdirectory(api)
add_subdirectory(accumulation)
add_subdirectory(tests)

cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api)
cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api)
cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api)
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulation)
cc_library(utils SRCS utils.cc DEPS pten pten_api global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta)
3 changes: 3 additions & 0 deletions paddle/fluid/eager/api/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
add_subdirectory(utils)
add_subdirectory(generated)

cc_library(eager_api SRCS all.cc DEPS global_utils eager_scale)
1 change: 1 addition & 0 deletions paddle/fluid/eager/api/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@
//
#pragma once

#include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
1 change: 1 addition & 0 deletions paddle/fluid/eager/api/generated/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add_subdirectory(eager_generated)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
add_subdirectory(backwards)
add_subdirectory(forwards)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cc_library(scale_node SRCS scale_node.cc DEPS global_utils pten pten_api grad_node_info)
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/eager_tensor.h"

#include "paddle/pten/api/all.h"

#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"

#include "glog/logging.h"

namespace egr {

template <typename DeviceContext>
static void ScaleDeviceDispatch(const pten::DenseTensor& dense_tensor,
const DeviceContext& dev_ctx, float scale,
float bias, bool bias_after_scale,
pten::DenseTensor* dense_out) {
switch (dense_tensor.dtype()) {
case pten::DataType::FLOAT64: {
pten::Scale<double>(dev_ctx, dense_tensor /* tensor */, scale /* scale */,
bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
break;
}
case pten::DataType::FLOAT32: {
pten::Scale<float>(dev_ctx, dense_tensor /* tensor */, scale /* scale */,
bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
break;
}
case pten::DataType::INT64: {
pten::Scale<int64_t>(dev_ctx, dense_tensor /* tensor */,
scale /* scale */, bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
break;
}
case pten::DataType::INT32: {
pten::Scale<int32_t>(dev_ctx, dense_tensor /* tensor */,
scale /* scale */, bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
break;
}
default: {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Detected unsupported data type."
"Only Float64, Float32, Int64, Int32 are supported for now."));
break;
}
}
}

void ScaleAPI(const egr::EagerTensor& x, float scale, float bias,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eager API 后续这块计划是怎样的?暂时均以手写封装推进吗?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

还是说在pten api解决目前的性能问题之前,均会以这种方式独立编写?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

没有,这里主要是因为之前所有测试都是以Scale为基础写的,但是Pten暂时没迁Scale
如果后面Scale迁移了我会替换成Pten版的

bool bias_after_scale, egr::EagerTensor* out) {
// TODO(jiabin): Support multiple tensor here, Create DenseTensor is not a
// proper way to Demo it
// Run Forward Function
auto dense_tensor = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
// Init output tensor
auto tensor_meta = pten::DenseTensorMeta(
dense_tensor->dtype(), dense_tensor->dims(), dense_tensor->layout());
auto place = dense_tensor->place();
size_t bytes_size = paddle::framework::product(dense_tensor->dims()) *
SizeOf(dense_tensor->dtype());
auto dense_out = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size), 0),
std::move(tensor_meta));
// Handle Device Context
const paddle::platform::Place& expected_kernel_place =
Controller::Instance().GetExpectedPlace();
paddle::platform::DeviceContextPool& pool =
paddle::platform::DeviceContextPool::Instance();

if (expected_kernel_place == paddle::platform::CPUPlace()) {
auto* dev_ctx = dynamic_cast<paddle::platform::CPUDeviceContext*>(
pool.Get(expected_kernel_place));
if (!dev_ctx) {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Cannot convert device_context to CPUDeviceContext."
"This indicates backend mismatch."
"Pleas double check your expected place"));
}
ScaleDeviceDispatch<paddle::platform::CPUDeviceContext>(
*dense_tensor.get(), *dev_ctx, scale, bias, bias_after_scale,
dense_out.get());

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} else if (expected_kernel_place == paddle::platform::CUDAPlace()) {
auto* dev_ctx = dynamic_cast<paddle::platform::CUDADeviceContext*>(
pool.Get(expected_kernel_place));
if (!dev_ctx) {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Cannot convert device_context to CUDADeviceContext."
"This indicates backend mismatch."
"Pleas double check your expected place"));
}
ScaleDeviceDispatch<paddle::platform::CUDADeviceContext>(
*dense_tensor.get(), *dev_ctx, scale, bias, bias_after_scale,
dense_out.get());
#endif
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Detected unsupported backend."
"Only CPU and CUDA Backend are supported for now."
"Please double check if your backend falls into the above two "
"categories."));
}

out->set_impl(dense_out);
}

void GradNodeScale::SetTensorWrappers_X(
const std::vector<egr::EagerTensor>& tensors) {
// Does nothing for scale
}

void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; }

std::vector<std::vector<egr::EagerTensor>> GradNodeScale::operator()(
const std::vector<std::vector<egr::EagerTensor>>& grads) {
// 1. Check Output Size
PADDLE_ENFORCE(
((grads.size() == 1) && (grads[0].size() == 1)),
paddle::platform::errors::Fatal(
"ScaleGradNode takes exactly 1 grad tensor."
"However received: %d",
"This indicates an issue with Eager Dygraph Backward logic",
grads.size()));
std::vector<std::vector<egr::EagerTensor>> outs;
// 2. Create needed out parttern
egr::EagerTensor out;
// Apply Gradient Hooks
if (GradientHooksRegistered()) {
// TODO(jiabin): Shall we apply hook slot by slot here or accept
// vector<vector<pten::tensor>> to apply all hooks?
std::vector<std::vector<egr::EagerTensor>> hooked_grads =
ApplyGradientHooks(grads);
ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */,
true /* bias_after_scale */, &out);
} else {
ScaleAPI(grads[0][0], scale_, 0.0 /* bias */, true /* bias_after_scale */,
&out);
}

// Apply Reduce Hooks
if (ReduceHooksRegistered()) {
ApplyReduceHooks();
}
return {{out}};
}

} // namespace egr
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tensor_wrapper.h"

/*
Each Operation has a specific GradNode inheritted from GradNodeBase
A specific GradNode defines
1. Input Tensors
2. overrides operator() to perform actual backward computations

TODO: Generate GradNode via auto-code-generation
*/
namespace egr {

void ScaleAPI(const egr::EagerTensor& x, float scale, float bias,
bool bias_after_scale, egr::EagerTensor* out);

class GradNodeScale : public GradNodeBase {
public:
// Constructor: configure fwd input tensors to grad node
GradNodeScale(size_t bwd_in_slot_num, size_t bwd_out_slot_num)
: GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~GradNodeScale() override = default;

// Functor: perform backward computations
virtual std::vector<std::vector<egr::EagerTensor>> operator()(
const std::vector<std::vector<egr::EagerTensor>>& grads) override;

void SetTensorWrappers_X(const std::vector<egr::EagerTensor>& tensors);

void SetAttributes_scale(float scale);

// Members: define fwd input tensors
// For Scale there is no fwd input tensor needed
private:
float scale_{1.0};
};

} // namespace egr
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cc_library(eager_scale SRCS scale.cc DEPS pten_api pten autograd_meta scale_node)
100 changes: 100 additions & 0 deletions paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
* This File should be automatically generated by coding auto generator.
* All ops C++ autograd logic is defined here, in Python-C extension API
* system we try to avoid any autograd related code, and move them all to
* here.
*
* Currently, we just manually do some fwd autograd here. And we will replace
* them with auto code generator later.
* **/

#include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/utils.h"

#include "paddle/pten/api/all.h"
#include "paddle/pten/include/core.h"

namespace egr {

egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias,
bool bias_after_scale, bool trace_backward) {
// 1. Run Forward
// 1.1 Create outputs
egr::EagerTensor out;
// 1.2 Need by original op, we assemble ins, outs, attrs here

// 1.3 Call forward C++ api
ScaleAPI(x, scale, bias, bias_after_scale, &out);

// 2. Build Backward Depends
// 2.1 Get AutogradMetas for all ins and outs
auto p_autograd_in = EagerUtils::unsafe_autograd_meta(x);
// NOTE: Call EagerUtils::multi_autograd_meta when we have vector of outputs
auto p_autograd_out = EagerUtils::autograd_meta(&out);

// 2.2 Add GradNode
// 2.2.1 ComputeRequireGrad
// TODO(jiabin) : make this function accept different kinds of input
// TODO(zhanlve): which one is more efficient:
// 1. construct a vector of pointers
// 2. call "ComputeRequireGrad" multiple times
bool require_any_grad =
EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_in);
if (require_any_grad) {
EagerUtils::PassStopGradient(false /*generate_grad*/, p_autograd_out);

// 2.2.2 Set OutRankInfo for outputs this needs to be as same as Edges's
// input_rank_
/** Note:
// 1. We provide EagerUtils::SetMultiOutRank(vector<AutogradMeta*>),
// since we have some of Operator has servel slot name with duplicate
outputs.
// 2. We call AutogradMeta's SetOutput Rank only when we have single output
with
// single slot name.
**/
p_autograd_out->SetSingleOutRankWithSlot(0, 0);

// Init GradNode
auto scale_node = std::make_shared<GradNodeScale>(/* fwd_in_slot_num */ 1,
/* bwd_in_slot_num */ 1);

// Pass Attributes to GradNode
scale_node->SetAttributes_scale(scale);

// Set Next Edges
scale_node->AddEdges(*p_autograd_in, /*slot id*/ 0);

// Set TensorWrappers
scale_node->SetTensorWrappers_X({x});

// Set Grad out rank as same as fwd input and set stop gradient to bwd
scale_node->SetGradOutMeta(*p_autograd_in, /*slot id*/ 0);
// Set Grad out rank as same as fwd input and set stop gradient to bwd
scale_node->SetGradInMeta(*p_autograd_out, /*slot id*/ 0);

// Set History for output set current Grad Node for
EagerUtils::SetHistory(p_autograd_out, scale_node);
}

return out;
}

} // namespace egr
23 changes: 23 additions & 0 deletions paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/fluid/eager/eager_tensor.h"
namespace egr {

egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias,
bool bias_after_scale, bool trace_backward);

} // namespace egr
4 changes: 3 additions & 1 deletion paddle/fluid/eager/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
set(eager_deps pten pten_api utils tensor_utils global_utils pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node)
set(eager_deps pten pten_api tensor_utils utils global_utils pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node)
set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy)

add_subdirectory(data_structure_tests)
add_subdirectory(task_tests)
1 change: 1 addition & 0 deletions paddle/fluid/eager/tests/task_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
cc_test(test_egr_task_tensor_utils SRCS tensor_utils_test.cc DEPS ${eager_deps})
cc_test(test_egr_task_eager_utils SRCS eager_utils_test.cc DEPS ${eager_deps})
cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
Loading