Skip to content

Commit

Permalink
【Hackathon 6th Fundable Projects 4 No.5】remove scale_loss_grad_op_han…
Browse files Browse the repository at this point in the history
…dle -part (#66293)

* remove scale_loss_grad_op_handle

* fix ci
  • Loading branch information
ccsuzzh authored Jul 22, 2024
1 parent d50af66 commit a486468
Show file tree
Hide file tree
Showing 10 changed files with 7 additions and 478 deletions.
1 change: 0 additions & 1 deletion paddle/fluid/framework/compiled_program.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/framework/details/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ set(op_handle_srcs
nan_inf_utils_detail.cc
var_handle.cc
op_handle_base.cc
scale_loss_grad_op_handle.cc
share_tensor_buffer_functor.cc
computation_op_handle.cc
share_tensor_buffer_op_handle.cc
Expand Down
11 changes: 0 additions & 11 deletions paddle/fluid/framework/details/build_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,7 @@ struct BuildStrategy {
// `FLAGS_cpu_deterministic=true` to env.
enum class ReduceStrategy { kAllReduce = 0, kReduce = 1, kNoReduce = 2 };

enum class GradientScaleStrategy {
kCoeffNumDevice = 0,
kOne = 1,
// user can customize gradient scale to use, and just feed
// it into exe.run().
kCustomized = 2,
};

ReduceStrategy reduce_{ReduceStrategy::kAllReduce};
GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice};

std::string debug_graphviz_path_{""};

Expand Down Expand Up @@ -238,8 +229,6 @@ inline std::ostream &operator<<(std::ostream &os,
const BuildStrategy &strategy) {
os << "BuildStrategy: " << &strategy << std::endl;
os << "reduce_: " << static_cast<int>(strategy.reduce_) << std::endl;
os << "gradient_scale_: " << static_cast<int>(strategy.gradient_scale_)
<< std::endl;
os << "debug_graphviz_path_: " << strategy.debug_graphviz_path_ << std::endl;
os << "enable_backward_optimizer_op_deps_: "
<< strategy.enable_backward_optimizer_op_deps_ << std::endl;
Expand Down
127 changes: 0 additions & 127 deletions paddle/fluid/framework/details/scale_loss_grad_op_handle.cc

This file was deleted.

73 changes: 0 additions & 73 deletions paddle/fluid/framework/details/scale_loss_grad_op_handle.h

This file was deleted.

48 changes: 6 additions & 42 deletions paddle/fluid/framework/ir/graph_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ limitations under the License. */
#include <stack>

#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/program_utils.h"
Expand Down Expand Up @@ -474,37 +473,6 @@ void RemoveControlDepInputAndOuput(OpDesc *op_desc) {
op_desc->Flush();
}

static OpDesc *ReplaceScaleLossGradOp(const Node &node, OpDesc *desc) {
desc->SetType("fill_constant");
desc->SetAttr("shape", std::vector<int64_t>({1}));
desc->SetAttr("value", 1.0f);

if (node.IsWrappedBy<details::OpHandleBase>()) {
details::OpHandleBase &op_hander =
const_cast<Node *>(&node)->Wrapper<details::OpHandleBase>();
desc->SetAttr(
"dtype",
dynamic_cast<details::ScaleLossGradOpHandle *>(&op_hander)->DType());
desc->SetAttr(
"value",
dynamic_cast<details::ScaleLossGradOpHandle *>(&op_hander)->Coeff());
}

desc->SetAttr("force_cpu", false);
desc->SetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName(),
(static_cast<int>(OpRole::kBackward) | static_cast<int>(OpRole::kLoss)));
// TODO(Ruibiao) : Set OpDeviceAttrName when needed

std::vector<std::string> output_names;
output_names.reserve(node.outputs.size());
for (auto out : node.outputs) {
output_names.emplace_back(out->Name());
}
desc->SetOutput("Out", output_names);
return desc;
}

void ReplaceAllReduceOp(const Node &node,
proto::BlockDesc *block,
std::vector<OpDesc> *ops) {
Expand Down Expand Up @@ -662,21 +630,17 @@ static void GetGraphOpDesc(const std::vector<Node *> &nodes,
for (Node *n : nodes) {
// if node is not Op, skip
if (!n->IsOp()) continue;
// create fill_constant op
if (n->Name() == "scale_loss_grad") {
VLOG(4) << "convert op node scale_loss_grad to desc fill_constant";
ops->emplace_back();
auto &desc = ops->back();
ReplaceScaleLossGradOp(*n, &desc);
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
} else if ((n->Name() == "allreduce" || n->Name() == "fused_all_reduce") &&
dynamic_cast<details::NCCLOpHandleBase *>(
&(n->Wrapper<details::OpHandleBase>())) != nullptr) {
if ((n->Name() == "allreduce" || n->Name() == "fused_all_reduce") &&
dynamic_cast<details::NCCLOpHandleBase *>(
&(n->Wrapper<details::OpHandleBase>())) != nullptr) {
VLOG(4) << "convert op node " << n->Name() << " to desc c_allreduce_sum";
ReplaceAllReduceOp(*n, block, ops);
VLOG(4) << n->ToString();
continue;
}
#endif
} else if (n->Op()) {
if (n->Op()) {
VLOG(4) << "convert op node to desc " << n->Op()->Type();
if (is_fused_opt(n)) {
OpDesc depend_desc(n->Op()->Block());
Expand Down
75 changes: 0 additions & 75 deletions paddle/fluid/framework/ir/graph_to_program_pass_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -410,81 +410,6 @@ TEST(GraphToProgramPass, MultiBlock) {
FLAGS_convert_all_blocks = flag_temp;
}

void BuildProgramWithScaleLossGrad(Graph* g) {
OpDesc op1;
op1.SetType("op1");
OpDesc op2;
op2.SetType("op2");
OpDesc op3;
op3.SetType("op3");
OpDesc op4;
op4.SetType("op4");
VarDesc var1("var1");
VarDesc var2("var2");

ir::Node* o1 = g->CreateOpNode(&op1);
ir::Node* o2 = g->CreateOpNode(&op2);
ir::Node* o3 =
g->CreateEmptyNode("scale_loss_grad", ir::Node::Type::kOperation);
ir::Node* o4 =
g->CreateEmptyNode("scale_loss_grad", ir::Node::Type::kOperation);
ir::Node* v1 = g->CreateVarNode(&var1);
ir::Node* v2 = g->CreateVarNode(&var2);

// o1->v1->o2
o1->outputs.push_back(v1);
o2->inputs.push_back(v1);
v1->inputs.push_back(o1);
v1->outputs.push_back(o2);
// o3->v1
o3->outputs.push_back(v1);
v1->inputs.push_back(o1);
v1->inputs.push_back(o3);
// o4->v2
o4->outputs.push_back(v2);
v2->inputs.push_back(o4);
}

TEST(GraphToProgramPass, ReplaceScaleLossGrad) {
// Step1: Build a program with multi block
ProgramDesc before_prog;
Graph before_graph(before_prog);
BuildProgramWithScaleLossGrad(&before_graph);

// Step2 : Convert graph back to program
auto pass = paddle::framework::ir::PassRegistry::Instance().Get(
"graph_to_program_pass");

ProgramDesc after_prog;
pass->SetNotOwned<paddle::framework::ProgramDesc>("program", &after_prog);
pass->Apply(&before_graph);

// Step3 : statistics scale_loss_grad and fill_constant number
int scale_node_num = 0, fill_node_num = 0;
const auto& before_nodes_set = before_graph.Nodes();
for (const auto& n : before_nodes_set) {
if (n->Name() == "scale_loss_grad") {
++scale_node_num;
} else if (n->Name() == "fill_constant") {
++fill_node_num;
}
}

int scale_op_num = 0, fill_op_num = 0;
const auto& block = after_prog.Block(0);
for (const auto& op : block.AllOps()) {
if (op->Type() == "fill_constant") {
++fill_op_num;
} else if (op->Type() == "scale_loss_grad") {
++scale_op_num;
}
}

// Check pass OK
EXPECT_EQ(scale_op_num, 0);
EXPECT_EQ(scale_node_num + fill_node_num, fill_op_num);
}

} // namespace ir
} // namespace framework
} // namespace paddle
Expand Down
Loading

0 comments on commit a486468

Please sign in to comment.