Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Eager Hook] Support GradientHook and ReduceHook, expose related interface to python #39893

Merged
merged 2 commits into from
Feb 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions paddle/fluid/eager/accumulation/accumulation_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ operator()(
}

void GradNodeAccumulation::RegisterReduceHook(
const std::function<void(void)>& hook) {
reduce_hooks_.emplace_back(hook);
std::shared_ptr<TensorVoidHook>&& hook) {
reduce_hooks_.emplace_back(std::move(hook));
}

void GradNodeAccumulation::ApplyReduceHooks() {
for (auto& hook : reduce_hooks_) {
hook();
(*hook)();
}
}
} // namespace egr
5 changes: 3 additions & 2 deletions paddle/fluid/eager/accumulation/accumulation_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h"

namespace egr {

Expand All @@ -39,7 +40,7 @@ class GradNodeAccumulation : public GradNodeBase {
/**
* Register ReduceHook
* **/
void RegisterReduceHook(const std::function<void(void)>& hook);
void RegisterReduceHook(std::shared_ptr<TensorVoidHook>&& hook);

/**
* Apply ReduceHook here
Expand All @@ -54,7 +55,7 @@ class GradNodeAccumulation : public GradNodeBase {
const paddle::experimental::Tensor&)>
retain_grad_hook_;

std::vector<std::function<void(void)>> reduce_hooks_;
std::vector<std::shared_ptr<TensorVoidHook>> reduce_hooks_;
};

} // namespace egr
51 changes: 25 additions & 26 deletions paddle/fluid/eager/api/utils/hook_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@
namespace egr {
namespace egr_utils_api {

void RegisterGradientHookForTensor(
int64_t RegisterGradientHookForTensor(
const paddle::experimental::Tensor& tensor,
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook) {
std::shared_ptr<egr::TensorHook>&& hook) {
// Find grad_node and out_rank from AutogradMeta
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
auto rank_info = EagerUtils::unsafe_autograd_meta(tensor)->OutRankInfo();

grad_node->RegisterGradientHook(rank_info.first, rank_info.second, hook);
return grad_node->RegisterGradientHook(rank_info.first, rank_info.second,
std::move(hook));
}

void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
const std::function<void(void)>& hook) {
std::shared_ptr<egr::TensorVoidHook>&& hook) {
if (IsLeafTensor(tensor)) {
VLOG(6) << "Register ReduceHook for leaf tensor";
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
Expand All @@ -45,7 +45,7 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node);
accumulation_grad_node->RegisterReduceHook(hook);
accumulation_grad_node->RegisterReduceHook(std::move(hook));
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Only can register reduce hook for leaf Tensor."));
Expand All @@ -65,28 +65,27 @@ static void RetainGradForRegularNode(
meta->WeakGrad();

// Define Hook
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) {
if (!weak_grad_tensor.expired()) {
auto grad_tensor = weak_grad_tensor.lock();
if (t.defined()) {
VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
// Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl());
return *grad_tensor.get();
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
};
auto hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) {
if (!weak_grad_tensor.expired()) {
auto grad_tensor = weak_grad_tensor.lock();
if (t.defined()) {
VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
// Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl());
return *grad_tensor.get();
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
};

// Append to GradientHooks
RegisterGradientHookForTensor(tensor, hook);
RegisterGradientHookForTensor(tensor,
std::make_shared<egr::CppTensorHook>(hook));
}

void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/eager/api/utils/hook_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@

#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/api/all.h"
namespace egr {
namespace egr_utils_api {

void RegisterGradientHookForTensor(
int64_t RegisterGradientHookForTensor(
const paddle::experimental::Tensor& tensor,
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook);
std::shared_ptr<egr::TensorHook>&& hook);

void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
const std::function<void(void)>& hook);
std::shared_ptr<egr::TensorVoidHook>&& hook);
void RetainGradForTensor(const paddle::experimental::Tensor& tensor);

} // namespace egr_utils_api
Expand Down
7 changes: 4 additions & 3 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2040,12 +2040,13 @@ static std::string GenerateGradNodeCCContents(

const char* BWD_RETURN_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = "
"egr::GradNodeBase::ApplyGradientHooks(grads);\n"
"GradNode%s::ApplyGradientHooks(grads);\n"
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n"
" %s\n"
" return outputs;\n";
generated_grad_function_body = paddle::string::Sprintf(
BWD_RETURN_TEMPLATE, in_vars.size(), generated_grad_function_body);
generated_grad_function_body =
paddle::string::Sprintf(BWD_RETURN_TEMPLATE, fwd_op_type, in_vars.size(),
generated_grad_function_body);

// [Generation] Get Full Grad Function
const char* GRAD_FUNCTION_TEMPLATE =
Expand Down
25 changes: 12 additions & 13 deletions paddle/fluid/eager/grad_node_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -210,22 +210,22 @@ const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const {
return adj_edges_;
}

void GradNodeBase::RegisterGradientHook(
size_t slot_id, size_t rank,
const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook) {
gradient_hooks_.emplace_back(std::make_tuple(slot_id, rank, hook));
int64_t GradNodeBase::RegisterGradientHook(
size_t slot_id, size_t rank, std::shared_ptr<egr::TensorHook>&& hook) {
gradient_hooks_.emplace(next_hook_id_,
std::make_tuple(slot_id, rank, std::move(hook)));
return next_hook_id_++;
}

std::vector<std::vector<paddle::experimental::Tensor>>
GradNodeBase::ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors) {
std::vector<std::vector<paddle::experimental::Tensor>> outs(tensors.size());
for (auto& tuple : gradient_hooks_) {
size_t slot_id = std::get<0>(tuple);
size_t rank = std::get<1>(tuple);
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook = std::get<2>(tuple);
for (auto& hook_pair : gradient_hooks_) {
size_t slot_id = std::get<0>(hook_pair.second);
size_t rank = std::get<1>(hook_pair.second);

auto hook = std::get<2>(hook_pair.second);

PADDLE_ENFORCE(slot_id < tensors.size(),
paddle::platform::errors::Fatal(
Expand All @@ -242,12 +242,11 @@ GradNodeBase::ApplyGradientHooks(
slot_out.resize(tensors[slot_id].size());
paddle::experimental::Tensor& out = slot_out[rank];
if (!out.defined() || !out.initialized()) {
VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name();
out = hook(tensors[slot_id][rank]);
out = (*hook)(tensors[slot_id][rank]);
} else {
// If more than one hook is registered, the input to the next hook func
// should be the output of the previous hook
out = hook(out);
out = (*hook)(out);
}
}

Expand Down
31 changes: 22 additions & 9 deletions paddle/fluid/eager/grad_node_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#pragma once

#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/api/all.h"

namespace egr {
Expand Down Expand Up @@ -135,14 +136,24 @@ class GradNodeBase {
/**
* Register GradientHook
* **/
void RegisterGradientHook(size_t slot_id, size_t rank,
const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook);
int64_t RegisterGradientHook(size_t slot_id, size_t rank,
std::shared_ptr<egr::TensorHook>&& hook);

/**
* Remove GradientHook
* **/
bool RemoveGradientHook(const int64_t& hook_id) {
auto remove_cnt = gradient_hooks_.erase(hook_id);
if (remove_cnt == 0) {
return false;
}
return true;
}

/**
* Apply GradientHook
* **/
inline bool GradientHooksRegistered() { return gradient_hooks_.size() != 0; }
inline bool GradientHooksRegistered() { return !gradient_hooks_.empty(); }

std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors);
Expand All @@ -166,12 +177,14 @@ class GradNodeBase {
// Gradient Hooks
// Customer may register a list of hooks which will be called in order during
// backward
// Each entry consists one pair of <out_rank, std::function>
std::vector<std::tuple<
/* slot id */ size_t, /* rank */ size_t,
/* hook */ std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>>>
// Each entry consists one pair of
// <hook_id, <out_rank, std::shared_ptr<TensorHook>>>
std::map<int64_t, std::tuple<
/* slot id */ size_t, /* rank */ size_t,
/* hook */ std::shared_ptr<TensorHook>>>
gradient_hooks_;

int64_t next_hook_id_{0};
};

class Edge {
Expand Down
63 changes: 63 additions & 0 deletions paddle/fluid/eager/hooks.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <functional>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/phi/api/include/tensor.h"
namespace egr {

class TensorHook {
public:
virtual ~TensorHook() = default;
virtual paddle::experimental::Tensor operator()(
const paddle::experimental::Tensor& var) = 0;
};

class TensorVoidHook {
public:
virtual ~TensorVoidHook() = default;
virtual void operator()() = 0;
};

class CppTensorHook : public TensorHook {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check name

public:
explicit CppTensorHook(std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>&& fn)
: fn_(std::move(fn)) {}

paddle::experimental::Tensor operator()(
const paddle::experimental::Tensor& var) override {
return fn_(var);
}

private:
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
fn_;
};

class CppTensorVoidHook : public TensorVoidHook {
public:
explicit CppTensorVoidHook(std::function<void()>&& fn) : fn_(std::move(fn)) {}

void operator()() override { return fn_(); }

private:
std::function<void()> fn_;
};
} // namespace egr
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/fluid/eager/utils.h"

#include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h"

Expand Down Expand Up @@ -116,7 +117,8 @@ TEST(AccumulationNode, Tensor) {
VLOG(6) << "Running Reduce Hook";
};

node->RegisterReduceHook(reduce_hook_1);
node->RegisterReduceHook(
std::make_shared<egr::CppTensorVoidHook>(reduce_hook_1));

// operator()
paddle::experimental::Tensor _ret = node->operator()({{et0}})[0][0];
Expand All @@ -141,7 +143,8 @@ TEST(AccumulationNode, Tensor) {
ret_et0_ptr[0] = 100.0; // set to 100.0
VLOG(6) << "Running Reduce Hook";
};
node->RegisterReduceHook(reduce_hook_2);
node->RegisterReduceHook(
std::make_shared<egr::CppTensorVoidHook>(reduce_hook_2));
node->ApplyReduceHooks();

// Check ApplyReduceHooks result
Expand Down
Loading