Skip to content

Commit

Permalink
merge develop
Browse files Browse the repository at this point in the history
  • Loading branch information
seiriosPlus committed Aug 28, 2020
2 parents 1bc093a + edf5f31 commit c2ebb07
Show file tree
Hide file tree
Showing 73 changed files with 4,182 additions and 472 deletions.
14 changes: 13 additions & 1 deletion paddle/fluid/framework/op_version_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ struct OpUpdateRecord {
kModifyAttr,
kNewAttr,
kNewInput,
kNewOutput
kNewOutput,
kBugfixWithBehaviorChanged,
};
Type type_;
std::string remark_;
Expand Down Expand Up @@ -82,6 +83,11 @@ struct NewOutput : OpUpdateRecord {
std::string name_;
};

struct BugfixWithBehaviorChanged : OpUpdateRecord {
explicit BugfixWithBehaviorChanged(const std::string& remark)
: OpUpdateRecord({Type::kBugfixWithBehaviorChanged, remark}) {}
};

class OpVersionDesc {
public:
OpVersionDesc& ModifyAttr(const std::string& name, const std::string& remark,
Expand Down Expand Up @@ -110,6 +116,12 @@ class OpVersionDesc {
return *this;
}

OpVersionDesc& BugfixWithBehaviorChanged(const std::string& remark) {
infos_.push_back(std::shared_ptr<OpUpdateRecord>(
new compatible::BugfixWithBehaviorChanged(remark)));
return *this;
}

private:
std::vector<std::shared_ptr<OpUpdateRecord>> infos_;
};
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/op_version_registry_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ namespace compatible {

TEST(test_operator_version, test_operator_version) {
REGISTER_OP_VERSION(test__)
.AddCheckpoint(
R"ROC(Fix the bug of reshape op, support the case of axis < 0)ROC",
framework::compatible::OpVersionDesc().BugfixWithBehaviorChanged(
"Support the case of axis < 0"))
.AddCheckpoint(
R"ROC(
Upgrade reshape, modified one attribute [axis] and add a new attribute [size].
Expand Down
18 changes: 14 additions & 4 deletions paddle/fluid/framework/tensor_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -913,10 +913,20 @@ std::ostream& print_tensor(std::ostream& os, const framework::Tensor& tensor) {
auto element_num = tensor.numel();

os << " - data: [";
if (element_num > 0) {
os << inspect[0];
for (int j = 1; j < element_num; ++j) {
os << " " << inspect[j];
// Note: int8_t && uint8_t is typedf of char, ostream unable to print properly
if (typeid(int8_t) == typeid(T) || typeid(uint8_t) == typeid(T)) {
if (element_num > 0) {
os << signed(inspect[0]);
for (int j = 1; j < element_num; ++j) {
os << " " << signed(inspect[j]);
}
}
} else {
if (element_num > 0) {
os << inspect[0];
for (int j = 1; j < element_num; ++j) {
os << " " << inspect[j];
}
}
}
os << "]";
Expand Down
33 changes: 0 additions & 33 deletions paddle/fluid/imperative/backward_strategy.h

This file was deleted.

9 changes: 5 additions & 4 deletions paddle/fluid/imperative/basic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/profiler.h"

DECLARE_bool(sort_sum_gradient);

namespace paddle {
namespace imperative {

void BasicEngine::Init(VarBase* var, const detail::BackwardStrategy& strategy,
bool retain_graph) {
backward_strategy_ = strategy;
void BasicEngine::Init(VarBase* var, bool retain_graph) {
sorted_sum_gradient_ = FLAGS_sort_sum_gradient;
retain_graph_ = retain_graph;
init_node_ = var->GradVarBase()->GradNode();
var->GradVarBase()->ClearGradNode();
Expand Down Expand Up @@ -105,7 +106,7 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) {

auto& accumulator = accumulators_[var.get()];
if (!accumulator) {
if (backward_strategy_.sorted_sum_gradient_) {
if (sorted_sum_gradient_) {
accumulator.reset(new SortedGradientAccumulator(var.get()));
} else {
accumulator.reset(new EagerGradientAccumulator(var.get()));
Expand Down
6 changes: 2 additions & 4 deletions paddle/fluid/imperative/basic_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/imperative/backward_strategy.h"
#include "paddle/fluid/imperative/engine.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"

Expand All @@ -30,8 +29,7 @@ class OpBase;

class BasicEngine : public Engine {
public:
void Init(VarBase* var, const detail::BackwardStrategy& strategy,
bool retain_graph = false);
void Init(VarBase* var, bool retain_graph = false);

void Execute() override;

Expand All @@ -46,7 +44,7 @@ class BasicEngine : public Engine {

private:
std::shared_ptr<GradOpNode> init_node_;
detail::BackwardStrategy backward_strategy_;
bool sorted_sum_gradient_;
std::unordered_map<GradOpNode*, size_t> node_deps_;
std::unordered_map<VariableWrapper*, std::unique_ptr<GradientAccumulator>>
accumulators_;
Expand Down
22 changes: 11 additions & 11 deletions paddle/fluid/imperative/partial_grad_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/string/string_helper.h"

DECLARE_bool(sort_sum_gradient);

namespace paddle {
namespace imperative {

Expand Down Expand Up @@ -529,8 +531,7 @@ class PartialGradTask {
const std::vector<std::shared_ptr<VarBase>> &output_targets,
const std::vector<std::shared_ptr<VarBase>> &output_grads,
const std::vector<std::shared_ptr<VarBase>> &no_grad_vars,
const platform::Place &place,
const detail::BackwardStrategy &strategy, bool create_graph,
const platform::Place &place, bool create_graph,
bool retain_graph, bool allow_unused, bool only_inputs);

std::vector<std::shared_ptr<VarBase>> Run();
Expand Down Expand Up @@ -577,23 +578,22 @@ class PartialGradTask {
bool retain_graph_;
bool allow_unused_;
bool only_inputs_;
detail::BackwardStrategy strategy_;
bool sorted_sum_gradient_{FLAGS_sort_sum_gradient};
};

PartialGradTask::PartialGradTask(
const std::vector<std::shared_ptr<VarBase>> &input_targets,
const std::vector<std::shared_ptr<VarBase>> &output_targets,
const std::vector<std::shared_ptr<VarBase>> &output_grads,
const std::vector<std::shared_ptr<VarBase>> &no_grad_vars,
const platform::Place &place, const detail::BackwardStrategy &strategy,
bool create_graph, bool retain_graph, bool allow_unused, bool only_inputs) {
const platform::Place &place, bool create_graph, bool retain_graph,
bool allow_unused, bool only_inputs) {
input_targets_ = input_targets;
place_ = place;
create_graph_ = create_graph;
retain_graph_ = retain_graph;
allow_unused_ = allow_unused;
only_inputs_ = only_inputs;
strategy_ = strategy;

PADDLE_ENFORCE_EQ(only_inputs_, true,
platform::errors::Unimplemented(
Expand Down Expand Up @@ -981,7 +981,7 @@ void PartialGradTask::PrepareInitialGradientAccumulators(const OpBase *op) {

if (!accumulator) {
accumulator.reset(new GradientAccumulationInfo(
var, strategy_.sorted_sum_gradient_, create_graph_));
var, sorted_sum_gradient_, create_graph_));
}

accumulator->IncreaseTotalRefCnt();
Expand Down Expand Up @@ -1033,11 +1033,11 @@ PartialGradEngine::PartialGradEngine(
const std::vector<std::shared_ptr<VarBase>> &output_targets,
const std::vector<std::shared_ptr<VarBase>> &output_grads,
const std::vector<std::shared_ptr<VarBase>> &no_grad_vars,
const platform::Place &place, const detail::BackwardStrategy &strategy,
bool create_graph, bool retain_graph, bool allow_unused, bool only_inputs)
const platform::Place &place, bool create_graph, bool retain_graph,
bool allow_unused, bool only_inputs)
: task_(new PartialGradTask(input_targets, output_targets, output_grads,
no_grad_vars, place, strategy, create_graph,
retain_graph, allow_unused, only_inputs)) {}
no_grad_vars, place, create_graph, retain_graph,
allow_unused, only_inputs)) {}

PartialGradEngine::~PartialGradEngine() { Clear(); }

Expand Down
4 changes: 1 addition & 3 deletions paddle/fluid/imperative/partial_grad_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <memory>
#include <vector>
#include "paddle/fluid/imperative/backward_strategy.h"
#include "paddle/fluid/imperative/engine.h"
#include "paddle/fluid/platform/place.h"

Expand All @@ -33,8 +32,7 @@ class PartialGradEngine : public Engine {
const std::vector<std::shared_ptr<VarBase>> &output_targets,
const std::vector<std::shared_ptr<VarBase>> &output_grads,
const std::vector<std::shared_ptr<VarBase>> &no_grad_vars,
const platform::Place &place,
const detail::BackwardStrategy &strategy, bool create_graph,
const platform::Place &place, bool create_graph,
bool retain_graph, bool allow_unused, bool only_inputs);

~PartialGradEngine();
Expand Down
6 changes: 2 additions & 4 deletions paddle/fluid/imperative/tests/test_tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,8 @@ TEST(test_tracer, test_trace_op_with_multi_device_inputs) {
framework::AttributeMap reduce_attr_map;
tracer.TraceOp("reduce_sum", reduce_in, reduce_out, reduce_attr_map,
gpu_place, true);
detail::BackwardStrategy back_st;
imperative::BasicEngine engine;
engine.Init(reduce_sum_out.get(), back_st);
engine.Init(reduce_sum_out.get());
engine.Execute();

framework::LoDTensor rlt;
Expand Down Expand Up @@ -356,9 +355,8 @@ TEST(test_tracer, test_var_without_grad_var) {
ASSERT_EQ(y_in->GradVarBase()->GradOpNum(), 0UL);
ASSERT_EQ(vout->GradVarBase()->GradOpNum(), 1UL);

detail::BackwardStrategy back_st;
imperative::BasicEngine engine;
engine.Init(vout.get(), back_st);
engine.Init(vout.get());
engine.Execute();

// check the grad
Expand Down
17 changes: 11 additions & 6 deletions paddle/fluid/operators/elementwise/elementwise_pow_op.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,20 @@ namespace operators {
template <typename T>
struct PowFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
#ifdef __CUDA_ARCH__
// On CUDAPlace, std::pow(3, 1) calls pow(float, float), and
// it will return a float number like 2.99... , which floor to 2
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
// TODO(wujionghao): A potential speed improvement is supporting different
// types in C++.
// #ifdef __CUDA_ARCH__
// // On CUDAPlace, std::pow(3, 1) calls pow(float, float), and
// // it will return a float number like 2.99... , which floor to 2
// // when cast to int by default and it is wrong.
// // Use llrint to cast it to the nearest integer, which is 3.
// if (std::is_integral<T>::value) {
// return std::llrint(std::pow(a, b));
// }
// #endif
if (std::is_integral<T>::value) {
return std::llrint(std::pow(a, b));
}
#endif
return std::pow(a, b);
}
};
Expand Down
13 changes: 13 additions & 0 deletions paddle/fluid/platform/flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,16 @@ DEFINE_int32(
"summary will be shown."
"If FLAGS_call_stack_level == 2, the python stack, c++ stack, and "
"error message summary will be shown.");

/**
* Debug related FLAG
* Name: sort_sum_gradient
* Since Version: 2.0.0
* Value Range: bool, default=false
* Example:
* Note: If True, gradients are summed by the reverse order of
* the forward execution sequence.
*/
DEFINE_bool(sort_sum_gradient, false,
"Sum gradients by the reverse order of "
"the forward execution sequence.");
3 changes: 2 additions & 1 deletion paddle/fluid/pybind/global_value_getter_setter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ DECLARE_bool(enable_rpc_profiler);
DECLARE_int32(multiple_of_cupti_buffer_size);
DECLARE_bool(reader_queue_speed_test_mode);
DECLARE_int32(call_stack_level);
DECLARE_bool(sort_sum_gradient);
// device management
DECLARE_int32(paddle_num_threads);
// executor
Expand Down Expand Up @@ -340,7 +341,7 @@ static void RegisterGlobalVarGetterSetter() {
REGISTER_PUBLIC_GLOBAL_VAR(
FLAGS_eager_delete_tensor_gb, FLAGS_enable_parallel_graph,
FLAGS_allocator_strategy, FLAGS_use_system_allocator, FLAGS_check_nan_inf,
FLAGS_call_stack_level, FLAGS_cpu_deterministic,
FLAGS_call_stack_level, FLAGS_sort_sum_gradient, FLAGS_cpu_deterministic,
FLAGS_enable_rpc_profiler, FLAGS_multiple_of_cupti_buffer_size,
FLAGS_reader_queue_speed_test_mode, FLAGS_pe_profile_fname,
FLAGS_print_sub_graph_dir, FLAGS_fraction_of_cpu_memory_to_use,
Expand Down
Loading

0 comments on commit c2ebb07

Please sign in to comment.