diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e7ffe72b5fb8..6bb0e5f51f4be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,8 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") +message(STATUS "AR tools: ${CMAKE_AR}") + if(WIN32) set(CMAKE_SUPPRESS_REGENERATION ON) set(CMAKE_STATIC_LIBRARY_PREFIX lib) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 056463205ab44..15040858042a7 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -12,7 +12,7 @@ paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], va paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2')) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03')) -paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'aba8093edebf2d5c869b735b92811e45')) +paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'f482e93b38b4018796969a2e1dde479d')) paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0')) paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2')) paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -277,7 +277,7 @@ paddle.fluid.layers.DynamicRNN.block (ArgSpec(args=['self'], varargs=None, keywo paddle.fluid.layers.DynamicRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')), ('document', 'b9174d4e91505b0c8ecc193eb51e248d')) paddle.fluid.layers.DynamicRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'b439a176a3328de8a75bdc5c08eece4a')) paddle.fluid.layers.DynamicRNN.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'f29ad2478b6b2ad4f413d2936a331ea0')) -paddle.fluid.layers.DynamicRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '169d694d2224f62b4f3afdc3dbc19e95')) +paddle.fluid.layers.DynamicRNN.step_input (ArgSpec(args=['self', 'x', 'level'], varargs=None, keywords=None, defaults=(0,)), ('document', '7568c5ac7622a10288d3307a94134655')) paddle.fluid.layers.DynamicRNN.update_memory (ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None), ('document', '5d83987da13b98363d6a807a52d8024f')) paddle.fluid.layers.StaticRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.StaticRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)), ('document', 'c24e368e23afac1ed91a78a639d7a9c7')) @@ -393,9 +393,9 @@ paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold' paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d')) paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645')) -paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '11fbf7e8dd2289805de291b453a33ee7')) -paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '5b5577bb3d24070da819674255d16196')) -paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4efbd93876832d4d35497cdbc7a1e6d8')) +paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '2ab36d4f7a564f5f65e455807ad06c67')) +paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '59066bac9db0ac6ce414d05780b7333f')) +paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '74c39c595dc70d6be2f16d8e462d282b')) paddle.fluid.contrib.HDFSClient.__init__ (ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.HDFSClient.delete (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', 'c3721aa2d4d9ef5a857dd47b2681c03e')) paddle.fluid.contrib.HDFSClient.download (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'ca55bde92184d3fd0f9f5c963b25e634')) @@ -494,7 +494,7 @@ paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinne paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '0eed2f198dc73c08a41b61edbc755753')) +paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'f8f3df23c5633c614db781a91b81fb62')) paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca')) paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85')) paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -518,11 +518,11 @@ paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', de paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4')) paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d')) paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad')) -paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '283bc0b8a0e26ae186b8b9bee4aec560')) +paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796')) paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '5f80a7ed70052f01665e4c74acccfa69')) +paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '9621ae612e595b6c34eb3bb5f3eb1a45')) paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0')) paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada')) -paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', '44fe286ab6175a5464d3a961a68c266a')) -paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', '11b3704ea42cfd537953387a7e58dae8')) +paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', 'f45fcb7add066c8e042c6774fc7c3db2')) +paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', 'b4a94ee0e2cefb495619275c2f8c61d2')) diff --git a/paddle/fluid/framework/details/graph_test_base.h b/paddle/fluid/framework/details/graph_test_base.h index 126959bcd80a4..d139f8488309e 100644 --- a/paddle/fluid/framework/details/graph_test_base.h +++ b/paddle/fluid/framework/details/graph_test_base.h @@ -68,11 +68,11 @@ class SplitOpMaker : public OpProtoAndCheckerMaker { class DummyVarTypeInference : public VarTypeInference { public: - void operator()(const OpDesc& op_desc, BlockDesc* block) const override { - auto& inputs = op_desc.Input("X"); - auto type = block->Var(inputs.front())->GetType(); - auto out_var_name = op_desc.Output("Out").front(); - block->Var(out_var_name)->SetType(type); + void operator()(framework::InferVarTypeContext* ctx) const override { + auto& inputs = ctx->Input("X"); + auto type = ctx->GetType(inputs.front()); + auto out_var_name = ctx->Output("Out").front(); + ctx->SetType(out_var_name, type); } }; diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 0901e59f9786b..e13ff99f3fdb5 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -16,6 +16,8 @@ limitations under the License. */ #include #include +#include +#include #include #include "paddle/fluid/framework/grad_op_desc_maker.h" #include "paddle/fluid/framework/inplace_op_inference.h" @@ -127,9 +129,9 @@ struct OpInfoFiller { template struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { - info->infer_var_type_ = [](const OpDesc& fwd_op, BlockDesc* block) { + info->infer_var_type_ = [](InferVarTypeContext* context) { T inference; - inference(fwd_op, block); + inference(context); }; } }; diff --git a/paddle/fluid/framework/grad_op_desc_maker.h b/paddle/fluid/framework/grad_op_desc_maker.h index 9bccb1a32bf63..f2f4c53eea215 100644 --- a/paddle/fluid/framework/grad_op_desc_maker.h +++ b/paddle/fluid/framework/grad_op_desc_maker.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once #include +#include #include +#include #include #include #include "paddle/fluid/framework/op_desc.h" @@ -55,11 +57,11 @@ class GradOpDescMakerBase { std::back_inserter(ret_val), [this](const std::string& fwd_var_name) -> std::string { auto g_name = GradVarName(fwd_var_name); - if (no_grad_set_.count(g_name)) { - return kEmptyVarName; - } else { + if (no_grad_set_.empty() || !no_grad_set_.count(g_name)) { (*this->grad_to_var_)[g_name] = fwd_var_name; return g_name; + } else { + return kEmptyVarName; } }); if (!drop_empty_grad) { diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 3808dd5fbaeaa..8d2cc5adec1fe 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -46,6 +46,7 @@ cc_library(fuse_pass_base SRCS fuse_pass_base.cc DEPS pass) pass_library(graph_to_program_pass base) pass_library(graph_viz_pass base) pass_library(lock_free_optimize_pass base) +pass_library(cpu_quantize_placement_pass base) pass_library(cpu_quantize_pass inference) pass_library(cpu_quantize_squash_pass inference) pass_library(fc_fuse_pass inference) @@ -103,6 +104,7 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto) cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto) cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass) +cc_test(test_cpu_quantize_placement_pass SRCS cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass) cc_test(test_cpu_quantize_pass SRCS cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor) cc_test(test_cpu_quantize_squash_pass SRCS cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) if(NOT WIN32) diff --git a/paddle/fluid/framework/ir/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/cpu_quantize_placement_pass.cc new file mode 100644 index 0000000000000..50bbe4915b350 --- /dev/null +++ b/paddle/fluid/framework/ir/cpu_quantize_placement_pass.cc @@ -0,0 +1,58 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/cpu_quantize_placement_pass.h" +#include +#include + +namespace paddle { +namespace framework { +namespace ir { + +std::unique_ptr CPUQuantizePlacementPass::ApplyImpl( + std::unique_ptr graph) const { + VLOG(3) << "Marks operators which are to be quantized."; + const auto& excluded_ids_list = + Get>("quantize_excluded_op_ids"); + const auto& op_types_list = + Get>("quantize_enabled_op_types"); + for (const Node* n : graph->Nodes()) { + if (n->IsOp()) { + if (std::find(excluded_ids_list.begin(), excluded_ids_list.end(), + n->id()) != excluded_ids_list.end()) + continue; + auto* op = n->Op(); + if (op->HasAttr("use_quantizer") || op->HasProtoAttr("use_quantizer")) { + if (op_types_list.empty()) { + op->SetAttr("use_quantizer", true); + } else if (std::find(op_types_list.begin(), op_types_list.end(), + n->Name()) != op_types_list.end()) { + op->SetAttr("use_quantizer", true); + } + } + } + } + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(cpu_quantize_placement_pass, + paddle::framework::ir::CPUQuantizePlacementPass) + // a vector of operator type names to be quantized ("conv2d" etc.) + .RequirePassAttr("quantize_enabled_op_types") + // a vector of operator ids that are to be excluded from quantization + .RequirePassAttr("quantize_excluded_op_ids"); diff --git a/paddle/fluid/framework/ir/cpu_quantize_placement_pass.h b/paddle/fluid/framework/ir/cpu_quantize_placement_pass.h new file mode 100644 index 0000000000000..ef3861b2493b5 --- /dev/null +++ b/paddle/fluid/framework/ir/cpu_quantize_placement_pass.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { +/* + * Specifies which operators should be quantized. + */ +class CPUQuantizePlacementPass : public Pass { + protected: + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/cpu_quantize_placement_pass_tester.cc b/paddle/fluid/framework/ir/cpu_quantize_placement_pass_tester.cc new file mode 100644 index 0000000000000..5a4d622645a43 --- /dev/null +++ b/paddle/fluid/framework/ir/cpu_quantize_placement_pass_tester.cc @@ -0,0 +1,129 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/cpu_quantize_placement_pass.h" + +#include +#include + +namespace paddle { +namespace framework { +namespace ir { + +void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, + const std::vector& inputs, + const std::vector& outputs, + boost::tribool use_quantizer) { + auto* op = prog->MutableBlock(0)->AppendOp(); + + op->SetType(type); + + if (!boost::indeterminate(use_quantizer)) + op->SetAttr("use_quantizer", use_quantizer); + + if (type == "conv2d") { + op->SetAttr("name", name); + op->SetInput("Input", {inputs[0]}); + op->SetInput("Filter", {inputs[1]}); + op->SetInput("Bias", {inputs[2]}); + } else if (type == "relu") { + op->SetInput("X", inputs); + } else if (type == "concat") { + op->SetAttr("axis", 1); + op->SetInput("X", {inputs[0], inputs[1]}); + } else if (type == "pool2d") { + op->SetInput("X", {inputs[0]}); + } else { + FAIL() << "Unexpected operator type."; + } + op->SetOutput("Out", {outputs[0]}); +} + +// operator use_quantizer +// --------------------------------------- +// (a,b)->concat->c none +// (c,weights,bias)->conv->f false +// f->relu->g none +// g->pool->h false +// (h,weights2,bias2)->conv->k false +// k->pool->l false +ProgramDesc BuildProgramDesc() { + ProgramDesc prog; + + for (auto& v : + std::vector({"a", "b", "c", "weights", "bias", "f", "g", + "h", "weights2", "bias2", "k", "l"})) { + auto* var = prog.MutableBlock(0)->Var(v); + var->SetType(proto::VarType::SELECTED_ROWS); + if (v == "weights" || v == "bias") { + var->SetPersistable(true); + } + } + + SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}, boost::indeterminate); + SetOp(&prog, "conv2d", "conv1", {"c", "weights", "bias"}, {"f"}, false); + SetOp(&prog, "relu", "relu1", {"f"}, {"g"}, boost::indeterminate); + SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}, false); + SetOp(&prog, "conv2d", "conv2", {"h", "weights2", "bias2"}, {"k"}, false); + SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}, false); + + return prog; +} + +void MainTest(std::initializer_list quantize_enabled_op_types, + std::initializer_list quantize_excluded_op_ids, + unsigned expected_use_quantizer_true_count) { + auto prog = BuildProgramDesc(); + + std::unique_ptr graph(new ir::Graph(prog)); + + auto pass = PassRegistry::Instance().Get("cpu_quantize_placement_pass"); + pass->Set("quantize_enabled_op_types", + new std::unordered_set(quantize_enabled_op_types)); + pass->Set("quantize_excluded_op_ids", + new std::unordered_set(quantize_excluded_op_ids)); + + graph = pass->Apply(std::move(graph)); + + unsigned use_quantizer_true_count = 0; + + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (op->HasAttr("use_quantizer") && + boost::get(op->GetAttr("use_quantizer"))) { + ++use_quantizer_true_count; + } + } + } + + EXPECT_EQ(use_quantizer_true_count, expected_use_quantizer_true_count); +} + +TEST(QuantizerPlacementPass, enabled_pool) { MainTest({"pool2d"}, {}, 2); } + +TEST(QuantizerPlacementPass, enabled_conv_excluded_one) { + MainTest({"conv2d"}, {4}, 1); +} + +TEST(QuantizerPlacementPass, excluded_none) { + // 2 conv + 2 pool + MainTest({}, {}, 4); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(cpu_quantize_placement_pass); diff --git a/paddle/fluid/framework/ir/graph_test.cc b/paddle/fluid/framework/ir/graph_test.cc index 7ed2f96eb2423..a95588a57b434 100644 --- a/paddle/fluid/framework/ir/graph_test.cc +++ b/paddle/fluid/framework/ir/graph_test.cc @@ -43,20 +43,20 @@ class SumOpMaker : public OpProtoAndCheckerMaker { class SumOpVarTypeInference : public VarTypeInference { public: - void operator()(const OpDesc &op_desc, BlockDesc *block) const override { - auto &inputs = op_desc.Input("X"); + void operator()(InferVarTypeContext *ctx) const override { + auto &inputs = ctx->Input("X"); auto default_var_type = proto::VarType::SELECTED_ROWS; bool any_input_is_lod_tensor = std::any_of( - inputs.begin(), inputs.end(), [block](const std::string &name) { - return block->Var(name)->GetType() == proto::VarType::LOD_TENSOR; + inputs.begin(), inputs.end(), [&ctx](const std::string &name) { + return ctx->GetType(name) == proto::VarType::LOD_TENSOR; }); if (any_input_is_lod_tensor) { default_var_type = proto::VarType::LOD_TENSOR; } - auto out_var_name = op_desc.Output("Out").front(); - block->Var(out_var_name)->SetType(default_var_type); + auto out_var_name = ctx->Output("Out").front(); + ctx->SetType(out_var_name, default_var_type); } }; @@ -71,7 +71,7 @@ class DummyOpMaker : public OpProtoAndCheckerMaker { class DummyOpVarTypeInference : public VarTypeInference { public: - void operator()(const OpDesc &op_desc, BlockDesc *block) const override {} + void operator()(framework::InferVarTypeContext *ctx) const override {} }; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 0e7b0cbeb98f3..8f9c6cb5e924a 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/shape_inference.h" +#include "paddle/fluid/framework/var_type_inference.h" namespace paddle { namespace framework { @@ -677,7 +678,8 @@ void OpDesc::InferVarType(BlockDesc *block) const { // var type inference. Hence, we don't do any "default" setting here. auto &info = OpInfoMap::Instance().Get(this->Type()); if (info.infer_var_type_) { - info.infer_var_type_(*this, block); + InferVarTypeContext context(this, block); + info.infer_var_type_(&context); } } diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index a7f09df491753..5f21dae60586e 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -44,6 +44,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, << dst_place; return; } +#ifdef PADDLE_WITH_MKLDNN + if (src.layout() == DataLayout::kMKLDNN) { + dst->set_mkldnn_prim_desc(src.get_mkldnn_prim_desc()); + } +#endif memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size); } diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h index d02c699b979d7..f55520901c53f 100644 --- a/paddle/fluid/framework/type_defs.h +++ b/paddle/fluid/framework/type_defs.h @@ -27,6 +27,7 @@ namespace framework { class OperatorBase; class OpDesc; class InferShapeContext; +class InferVarTypeContext; class BlockDesc; class Variable; @@ -53,7 +54,7 @@ using GradOpMakerFN = std::function>( const std::vector& grad_block)>; using InferVarTypeFN = - std::function; + std::function; using InferShapeFN = std::function; diff --git a/paddle/fluid/framework/var_type_inference.h b/paddle/fluid/framework/var_type_inference.h index 64236b78d2e39..2e9c64d3e6854 100644 --- a/paddle/fluid/framework/var_type_inference.h +++ b/paddle/fluid/framework/var_type_inference.h @@ -14,6 +14,8 @@ limitations under the License. */ #pragma once #include +#include +#include #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/type_defs.h" @@ -21,26 +23,123 @@ limitations under the License. */ namespace paddle { namespace framework { +class OpDesc; +class BlockDesc; +// default infer var type context +class InferVarTypeContext { + public: + InferVarTypeContext(const OpDesc* op, BlockDesc* block) + : op_(op), block_(block) {} + + virtual ~InferVarTypeContext() {} + + virtual Attribute GetAttr(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(op_); + return op_->GetAttr(name); + } + + virtual bool HasVar(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(block_); + return block_->FindVarRecursive(name) != nullptr; + } + + virtual bool HasInput(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(op_); + return op_->Inputs().count(name) > 0; + } + + virtual bool HasOutput(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(op_); + return op_->Outputs().count(name) > 0; + } + + virtual const std::vector& Input(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(op_); + return op_->Input(name); + } + + virtual const std::vector& Output( + const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(op_); + return op_->Output(name); + } + + virtual proto::VarType::Type GetType(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(block_); + return block_->FindRecursiveOrCreateVar(name).GetType(); + } + + virtual void SetType(const std::string& name, proto::VarType::Type type) { + PADDLE_ENFORCE_NOT_NULL(block_); + block_->FindRecursiveOrCreateVar(name).SetType(type); + } + + virtual proto::VarType::Type GetDataType(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(block_); + return block_->FindRecursiveOrCreateVar(name).GetDataType(); + } + + virtual void SetDataType(const std::string& name, proto::VarType::Type type) { + PADDLE_ENFORCE_NOT_NULL(block_); + block_->FindRecursiveOrCreateVar(name).SetDataType(type); + } + + virtual std::vector GetDataTypes( + const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(block_); + return block_->FindRecursiveOrCreateVar(name).GetDataTypes(); + } + + virtual void SetDataTypes( + const std::string& name, + const std::vector& multiple_data_type) { + PADDLE_ENFORCE_NOT_NULL(block_); + block_->FindRecursiveOrCreateVar(name).SetDataTypes(multiple_data_type); + } + + virtual std::vector GetShape(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(block_); + return block_->FindRecursiveOrCreateVar(name).GetShape(); + } + + virtual void SetShape(const std::string& name, + const std::vector& dims) { + PADDLE_ENFORCE_NOT_NULL(block_); + block_->FindRecursiveOrCreateVar(name).SetShape(dims); + } + + virtual int32_t GetLoDLevel(const std::string& name) const { + PADDLE_ENFORCE_NOT_NULL(block_); + return block_->FindRecursiveOrCreateVar(name).GetLoDLevel(); + } + + virtual void SetLoDLevel(const std::string& name, int32_t lod_level) { + PADDLE_ENFORCE_NOT_NULL(block_); + block_->FindRecursiveOrCreateVar(name).SetLoDLevel(lod_level); + } + + protected: + const OpDesc* op_; + BlockDesc* block_; +}; + class VarTypeInference { public: virtual ~VarTypeInference() {} - virtual void operator()(const OpDesc& op_desc, BlockDesc* block) const = 0; + virtual void operator()(InferVarTypeContext* context) const = 0; // NOLINT }; class PassInDtypeAndVarTypeToOutput : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const final { + void operator()(framework::InferVarTypeContext* ctx) const final { // NOLINT auto in_out_var_names = this->GetInputOutputWithSameType(); for (auto& i_o_n : in_out_var_names) { - auto& x_name = op_desc.Input(i_o_n.first).at(0); - auto& out_name = op_desc.Output(i_o_n.second).at(0); + auto& x_name = ctx->Input(i_o_n.first).at(0); + auto& out_name = ctx->Output(i_o_n.second).at(0); - auto& x = block->FindRecursiveOrCreateVar(x_name); - auto& out = block->FindRecursiveOrCreateVar(out_name); - out.SetType(x.GetType()); - out.SetDataType(x.GetDataType()); + ctx->SetType(out_name, ctx->GetType(x_name)); + ctx->SetDataType(out_name, ctx->GetDataType(x_name)); } } diff --git a/paddle/fluid/framework/var_type_inference_test.cc b/paddle/fluid/framework/var_type_inference_test.cc index 2a75394fca719..6bbb25a573d07 100644 --- a/paddle/fluid/framework/var_type_inference_test.cc +++ b/paddle/fluid/framework/var_type_inference_test.cc @@ -44,20 +44,20 @@ class SumOpMaker : public OpProtoAndCheckerMaker { class SumOpVarTypeInference : public VarTypeInference { public: - void operator()(const OpDesc &op_desc, BlockDesc *block) const override { - auto &inputs = op_desc.Input("X"); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto &inputs = ctx->Input("X"); auto default_var_type = proto::VarType::SELECTED_ROWS; bool any_input_is_lod_tensor = std::any_of( - inputs.begin(), inputs.end(), [block](const std::string &name) { - return block->Var(name)->GetType() == proto::VarType::LOD_TENSOR; + inputs.begin(), inputs.end(), [&ctx](const std::string &name) { + return ctx->GetType(name) == proto::VarType::LOD_TENSOR; }); if (any_input_is_lod_tensor) { default_var_type = proto::VarType::LOD_TENSOR; } - auto out_var_name = op_desc.Output("Out").front(); - block->Var(out_var_name)->SetType(default_var_type); + auto out_var_name = ctx->Output("Out").front(); + ctx->SetType(out_var_name, default_var_type); } }; } // namespace framework diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index ec8dedd605235..0d116a6495477 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -2,4 +2,5 @@ if(WITH_PYTHON) cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind) cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context pybind) cc_library(engine SRCS engine.cc) +cc_library(imperative_profiler SRCS profiler.cc) endif() diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 5530823b90f65..3d1de95f58ded 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -214,13 +214,11 @@ framework::LoDTensor& VarBase::GradValue() { } std::map> OpBase::ApplyGrad() { - if (grad_op_descs_.empty() && backward_id_ <= 0) { - VLOG(3) << "op with no grad: " << Type(); - return {}; - } + PADDLE_ENFORCE(!grad_op_descs_.empty() || backward_id_ > 0, + "%s has no backward implementation", Type()); VLOG(3) << "apply op grad: " << Type(); - std::vector tmp_grad_outputs; + std::vector tmp_grad_outputs; if (backward_id_ > 0) { VLOG(3) << "py_layer_grad"; tmp_grad_outputs.resize(1); @@ -239,30 +237,66 @@ std::map> OpBase::ApplyGrad() { VLOG(3) << "apply grad op " << grad_op_desc->Type(); // Allocate tmp grad output variable - for (auto it : grad_output_variable_map) { + for (const auto& it : grad_output_variable_map) { auto& outputs = tmp_grad_outputs[k][it.first]; outputs.reserve(it.second.size()); for (size_t i = 0; i < it.second.size(); ++i) { + VarBase* origin_grad_var_base = it.second[i]; + // Allocate a new variable - Variable* tmp_var = new framework::Variable(); - tmp_var->GetMutable(); - outputs.emplace_back(tmp_var); + VarBase* tmp_grad_var_base = new VarBase( + string::Sprintf("%s@IGrad", origin_grad_var_base->Name()), + origin_grad_var_base->DataType(), origin_grad_var_base->Dims(), + place_, true, false); + outputs.emplace_back(tmp_grad_var_base); } } - // Run grad op - framework::RuntimeContext ctx(grad_input_vars_[k], tmp_grad_outputs[k]); - // No need to do compile time infer shape here. // grad_op_desc_->InferShape(*block_); // grad_op_desc->InferVarType(block_); std::unique_ptr opbase = framework::OpRegistry::CreateOp(*grad_op_desc); + + auto& info = framework::OpInfoMap::Instance().Get(grad_op_desc->Type()); + if (info.infer_var_type_) { + RuntimeInferVarTypeContext infer_var_type_ctx( + &grad_input_vars_[k], &tmp_grad_outputs[k], &attrs_); + info.infer_var_type_(&infer_var_type_ctx); + } + framework::OperatorWithKernel* op_kernel = dynamic_cast(opbase.get()); PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); + // Run grad op + framework::VariableValueMap grad_invars_map; + framework::VariableValueMap grad_outvars_map; + + for (const auto& it : grad_input_vars_[k]) { + auto& grad_invars = grad_invars_map[it.first]; + grad_invars.reserve(it.second.size()); + for (const VarBase* grad_inp : it.second) { + PADDLE_ENFORCE_NOT_NULL(grad_inp->var_, "op %s input %s nullptr", + grad_op_desc->Type(), grad_inp->Name()); + + grad_invars.emplace_back(grad_inp->var_); + } + } + + for (const auto& it : tmp_grad_outputs[k]) { + auto& grad_outvars = grad_outvars_map[it.first]; + grad_outvars.reserve(it.second.size()); + for (VarBase* grad_out : it.second) { + PADDLE_ENFORCE_NOT_NULL(grad_out->var_, "op %s output %s nullptr", + grad_op_desc->Type(), grad_out->Name()); + + grad_outvars.emplace_back(grad_out->var_); + } + } + + framework::RuntimeContext ctx(grad_invars_map, grad_outvars_map); framework::Scope scope; PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place_); p.op.RuntimeInferShape(scope, place_, ctx); @@ -273,14 +307,14 @@ std::map> OpBase::ApplyGrad() { // Add tmp grad outputs to original grad vars for (size_t k = 0; k < grad_output_vars_.size(); ++k) { - for (auto it : grad_output_vars_[k]) { + for (const auto& it : grad_output_vars_[k]) { auto& outputs = tmp_grad_outputs[k][it.first]; - auto& origin_outputs = it.second; + const auto& origin_outputs = it.second; PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size()); for (size_t i = 0; i < outputs.size(); ++i) { - framework::Variable* grad = outputs[i]; - framework::Variable* orig_grad = origin_outputs[i]; + framework::Variable* grad = outputs[i]->var_; + framework::Variable* orig_grad = origin_outputs[i]->var_; AddTo(grad, orig_grad, place_); delete grad; } @@ -328,28 +362,35 @@ void PyLayer::RegisterFunc(int func_id, const py::object& py_func) { int PyLayer::NumFuncs() { return py_funcs_.size(); } -std::vector PyLayer::Apply(int func_id, - const std::vector& inputs) { - std::vector invars; - for (const VarBase* in : inputs) { - invars.push_back(in->var_); - } +std::vector PyLayer::Apply( + int func_id, const std::vector& inputs) { PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end()); - return CallPythonFunc(py_funcs_[func_id], invars); + return CallPythonFunc(py_funcs_[func_id], inputs); } -std::vector PyLayer::ApplyGrad( - int func_id, const std::vector& inputs) { +std::vector PyLayer::ApplyGrad(int func_id, + const std::vector& inputs) { PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end()); - return CallPythonFunc(py_funcs_[func_id], inputs); + auto rets = CallPythonFunc(py_funcs_[func_id], inputs); + + std::vector outs; + outs.reserve(rets.size()); + for (size_t i = 0U; i != rets.size(); ++i) { + outs.emplace_back(new VarBase( + string::Sprintf("%s_out_%d", framework::GradVarName(PyLayer::kFwdOut), + i), + rets[i], nullptr, true)); + } + + return outs; } std::vector PyLayer::CallPythonFunc( - const py::object& callable, const std::vector& ins) { + const py::object& callable, const std::vector& ins) { py::gil_scoped_acquire guard; py::tuple in_args(ins.size()); for (size_t i = 0; i < ins.size(); ++i) { - const framework::LoDTensor& t = ins[i]->Get(); + const framework::LoDTensor& t = ins[i]->var_->Get(); in_args[i] = t.IsInitialized() ? py::cast(t) : py::cast(nullptr); } VLOG(3) << "pyfunc in " << py::len(in_args); @@ -359,6 +400,7 @@ std::vector PyLayer::CallPythonFunc( auto ret_tuple = py::cast(ret); size_t ret_num = py::len(ret_tuple); std::vector outs; + outs.reserve(ret_num); VLOG(3) << "pyfunc out " << ret_num; for (size_t i = 0; i < ret_num; ++i) { try { @@ -369,7 +411,7 @@ std::vector PyLayer::CallPythonFunc( auto* tensor = var->GetMutable(); tensor->ShareDataWith(*py_out_tensor); tensor->set_lod(py_out_tensor->lod()); - outs.push_back(var); + outs.emplace_back(var); } catch (py::cast_error&) { PADDLE_THROW("The %d-th output must be LoDTensor", i); } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 618a5b7a03295..72c548d5e92de 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -18,14 +18,16 @@ #include "paddle/fluid/framework/python_headers.h" // clang-format on -#include // NOLINT -#include // NOLINT -#include // NOLINT -#include // NOLINT +#include // NOLINT +#include // NOLINT +#include // NOLINT +#include // NOLINT +#include // NOLINT #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/var_desc.h" +#include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/operators/math/math_function.h" @@ -135,13 +137,13 @@ class VarBase { persistable) {} private: + // TODO(minqiyang): need support SelectedRows VarBase(const std::string& name, framework::proto::VarType::Type dtype, const framework::DDim& shape, const platform::Place& place, framework::Variable* var, VarBase* grad, bool stop_gradient, bool persistable) : name_(name), - dtype_(dtype), - place_(place), + type_(framework::proto::VarType::LOD_TENSOR), var_(var), grads_(grad), stop_gradient_(stop_gradient), @@ -151,10 +153,12 @@ class VarBase { pre_op_out_idx_(-1) { if (!var_) { var_ = new framework::Variable(); - auto tensor = var_->GetMutable(); - tensor->Resize(shape); - tensor->mutable_data(place_, dtype_); } + auto tensor = var_->GetMutable(); + tensor->Resize(shape); + tensor->mutable_data(place, dtype); + VLOG(10) << "create varbase: " << name_ << " type: " << dtype + << " place: " << place; } public: @@ -184,7 +188,23 @@ class VarBase { } } - inline framework::proto::VarType::Type DType() const { return dtype_; } + inline framework::DDim Dims() const { + return var_->Get().dims(); + } + + // data type. e.g.. FP32 + inline void SetDataType(framework::proto::VarType::Type type) { + auto tensor = var_->GetMutable(); + tensor->mutable_data(tensor->place(), type); + } + inline framework::proto::VarType::Type DataType() const { + auto tensor = var_->Get(); + return tensor.type(); + } + + // tensor type. e.g.. LoDTensor + inline void SetType(framework::proto::VarType::Type type) { type_ = type; } + inline framework::proto::VarType::Type Type() const { return type_; } inline void SetStopGradient(bool stop_gradient) { stop_gradient_ = stop_gradient; @@ -238,7 +258,7 @@ class VarBase { } std::string name_; - framework::proto::VarType::Type dtype_; + framework::proto::VarType::Type type_; platform::Place place_; framework::Variable* var_; @@ -294,17 +314,23 @@ class PYBIND11_HIDDEN OpBase { void InvokeBackwardHooks(); - void TrackPreOp(const VarBase* inp_var, const std::string& inp_name) { - if (inp_var->PreOp() && !inp_var->IsStopGradient()) { - VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot " - << inp_name; - pre_ops_[inp_name].push_back(inp_var->PreOp()); - pre_ops_out_idx_[inp_name].push_back(inp_var->PreOpOutIdx()); - } else { - VLOG(3) << "no pre op in slot " << inp_name - << " input var stop_gradient: " << inp_var->IsStopGradient(); - pre_ops_[inp_name].push_back(nullptr); - // pre_ops_out_idx_[inp_name].push_back(-1); + void TrackPreOp(const std::string& inp_name, + const std::vector& inputs) { + auto& pre_ops_list = pre_ops_[inp_name]; + pre_ops_list.reserve(inputs.size()); + auto& pre_ops_out_idx_list = pre_ops_out_idx_[inp_name]; + for (VarBase* inp_var : inputs) { + if (inp_var->PreOp() && !inp_var->IsStopGradient()) { + VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot " + << inp_name; + pre_ops_list.emplace_back(inp_var->PreOp()); + pre_ops_out_idx_list.push_back(inp_var->PreOpOutIdx()); + } else { + VLOG(3) << "no pre op in slot " << inp_name + << " input var stop_gradient: " << inp_var->IsStopGradient(); + pre_ops_list.emplace_back(nullptr); + // pre_ops_out_idx_list.push_back(-1); + } } } @@ -328,11 +354,13 @@ class PYBIND11_HIDDEN OpBase { std::map> pre_ops_out_idx_; // Inputs to a vector of bwd ops. - std::vector grad_input_vars_; + std::vector grad_input_vars_; // Outputs to a vector of bwd ops. - std::vector grad_output_vars_; + std::vector grad_output_vars_; std::vector backward_hooks_; + + framework::AttributeMap attrs_; }; class Layer { @@ -359,12 +387,131 @@ class PyLayer { static std::vector Apply( int func_id, const std::vector& inputs); - static std::vector ApplyGrad( - int func_id, const std::vector& inputs); + static std::vector ApplyGrad(int func_id, + const std::vector& inputs); private: static std::vector CallPythonFunc( - const py::object& callable, const std::vector& ins); + const py::object& callable, const std::vector& ins); +}; + +// infer var type context for imperative mode +class PYBIND11_HIDDEN RuntimeInferVarTypeContext + : public framework::InferVarTypeContext { + public: + RuntimeInferVarTypeContext(const imperative::VarBasePtrMap* inputs, + imperative::VarBasePtrMap* outputs, + const framework::AttributeMap* attrs_map) + : InferVarTypeContext(nullptr, nullptr), + inputs_(inputs), + outputs_(outputs), + attrs_(attrs_map), + input_names_(), + output_names_(), + var_set_() { + input_names_.reserve(inputs_->size()); + for (auto& it : *inputs_) { + for (imperative::VarBase* var : it.second) { + input_names_[it.first].emplace_back(var->Name()); + var_set_[var->Name()] = var; + } + } + + output_names_.reserve(outputs_->size()); + for (auto& it : *outputs_) { + for (imperative::VarBase* var : it.second) { + output_names_[it.first].emplace_back(var->Name()); + var_set_[var->Name()] = var; + } + } + } + + virtual ~RuntimeInferVarTypeContext() {} + + framework::Attribute GetAttr(const std::string& name) const override { + PADDLE_ENFORCE_NOT_NULL(attrs_); + return attrs_->at(name); + } + + bool HasVar(const std::string& name) const override { + return var_set_.count(name) > 0; + } + + bool HasInput(const std::string& name) const override { + PADDLE_ENFORCE_NOT_NULL(inputs_); + return inputs_->count(name) > 0; + } + + bool HasOutput(const std::string& name) const override { + PADDLE_ENFORCE_NOT_NULL(outputs_); + return outputs_->count(name) > 0; + } + + const std::vector& Input( + const std::string& name) const override { + return input_names_.at(name); + } + + const std::vector& Output( + const std::string& name) const override { + return output_names_.at(name); + } + + framework::proto::VarType::Type GetType( + const std::string& name) const override { + return var_set_.at(name)->Type(); + } + + void SetType(const std::string& name, + framework::proto::VarType::Type type) override { + var_set_[name]->SetType(type); + } + + framework::proto::VarType::Type GetDataType( + const std::string& name) const override { + return var_set_.at(name)->DataType(); + } + + void SetDataType(const std::string& name, + framework::proto::VarType::Type type) override { + var_set_[name]->SetDataType(type); + } + + std::vector GetDataTypes( + const std::string& name) const override { + PADDLE_THROW("GetDataTypes is not supported in runtime InferVarType"); + } + + void SetDataTypes(const std::string& name, + const std::vector& + multiple_data_type) override { + PADDLE_THROW("SetDataTypes is not supported in runtime InferVarType"); + } + + std::vector GetShape(const std::string& name) const override { + PADDLE_THROW("Do not handle Shape in runtime InferVarType"); + } + + void SetShape(const std::string& name, + const std::vector& dims) override { + PADDLE_THROW("Do not handle Shape in runtime InferVarType"); + } + + int32_t GetLoDLevel(const std::string& name) const override { + PADDLE_THROW("Do not handle LoDLevel in runtime InferVarType"); + } + + void SetLoDLevel(const std::string& name, int32_t lod_level) override { + PADDLE_THROW("Do not handle LoDLevel in runtime InferVarType"); + } + + private: + const imperative::VarBasePtrMap* inputs_; + imperative::VarBasePtrMap* outputs_; + const framework::AttributeMap* attrs_; + std::unordered_map> input_names_; + std::unordered_map> output_names_; + std::unordered_map var_set_; }; } // namespace imperative diff --git a/paddle/fluid/imperative/profiler.cc b/paddle/fluid/imperative/profiler.cc new file mode 100644 index 0000000000000..34570b3a60ec8 --- /dev/null +++ b/paddle/fluid/imperative/profiler.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/imperative/profiler.h" + +#ifdef WITH_GPERFTOOLS +#include "gperftools/profiler.h" +#endif +#include +#include +#include // NOLINT +#include // NOLINT + +DEFINE_string( + tracer_profile_fname, "xxgperf", + "Profiler filename for imperative tracer, which generated by gperftools." + "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."); + +namespace paddle { +namespace imperative { + +static std::once_flag gTracerProfileOnce; +#ifdef WITH_GPERFTOOLS +static bool gTracerProfilerStarted = false; +#endif + +void StartProfile() { + if (!FLAGS_tracer_profile_fname.empty()) { + std::call_once(gTracerProfileOnce, [] { +#ifdef WITH_GPERFTOOLS + ProfilerStart(FLAGS_tracer_profile_fname.c_str()); + gTracerProfilerStarted = true; +#else + LOG(WARNING) << "Paddle is not compiled with gperftools. " + "FLAGS_tracer_profile_fname will be ignored"; +#endif + }); + } +} + +void StopProfile() { +#ifdef WITH_GPERFTOOLS + ProfilerFlush(); +#else + LOG(WARNING) << "Paddle is not compiled with gperftools. " + "FLAGS_tracer_profile_fname will be ignored"; +#endif +} + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/profiler.h b/paddle/fluid/imperative/profiler.h new file mode 100644 index 0000000000000..d52aeed4e8175 --- /dev/null +++ b/paddle/fluid/imperative/profiler.h @@ -0,0 +1,25 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace paddle { +namespace imperative { + +extern void StartProfile(); + +extern void StopProfile(); + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 7ee92b4d8c46d..0cfdea030eb4e 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -19,38 +19,26 @@ #include #include +#include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" -#ifdef WITH_GPERFTOOLS -#include "gperftools/profiler.h" -#endif - -DEFINE_string( - tracer_profile_fname, "", - "Profiler filename for imperative tracer, which generated by gperftools." - "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."); - namespace paddle { namespace imperative { -static std::once_flag gTracerProfileOnce; -#ifdef WITH_GPERFTOOLS -static bool gTracerProfilerStarted = false; -#endif - void CreateGradOp(const framework::OpDesc& op_desc, const std::unordered_set& no_grad_set, const std::vector& grad_sub_block, std::vector* grad_op_descs, std::unordered_map* grad_to_var) { PADDLE_ENFORCE(grad_op_descs->empty()); - std::vector> descs = - framework::OpInfoMap::Instance() - .Get(op_desc.Type()) - .GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block); + const framework::OpInfo& op_info = + framework::OpInfoMap::Instance().Get(op_desc.Type()); + if (!op_info.grad_op_maker_) return; + std::vector> descs = + op_info.GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block); for (auto& desc : descs) { grad_op_descs->emplace_back(desc.release()); } @@ -145,31 +133,13 @@ framework::VariableNameMap CreateOutputVarNameMap( return result; } -Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) { - if (!FLAGS_tracer_profile_fname.empty()) { - std::call_once(gTracerProfileOnce, [] { -#ifdef WITH_GPERFTOOLS - ProfilerStart(FLAGS_tracer_profile_fname.c_str()); - gTracerProfilerStarted = true; -#else - LOG(WARNING) << "Paddle is not compiled with gperftools. " - "FLAGS_tracer_profile_fname will be ignored"; -#endif - }); - } -} +Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {} std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, - const VarBasePtrMap& outputs, + VarBasePtrMap* outputs, framework::AttributeMap attrs_map, const platform::Place expected_place, const bool stop_gradient) { -#ifdef WITH_GPERFTOOLS - if (gTracerProfilerStarted) { - ProfilerFlush(); - } -#endif - framework::VariableValueMap invars_map; framework::VariableValueMap outvars_map; @@ -184,7 +154,6 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, inp->Name()); invars.emplace_back(inp->var_); - op->TrackPreOp(inp, it.first); if (!stop_gradient) { current_vars_map[inp->Name()] = inp; } @@ -192,9 +161,10 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, << " inited: " << inp->var_->IsInitialized() << " stop_grad: " << inp->IsStopGradient(); } + op->TrackPreOp(it.first, it.second); } - op->output_vars_ = outputs; + op->output_vars_ = *outputs; for (auto it : op->output_vars_) { auto& outvars = outvars_map[it.first]; const std::vector& outputs = it.second; @@ -217,7 +187,7 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, framework::VariableNameMap invars_name_map = CreateInputVarNameMap(op, inputs); framework::VariableNameMap outvars_name_map = - CreateOutputVarNameMap(op, outputs); + CreateOutputVarNameMap(op, *outputs); auto& info = framework::OpInfoMap::Instance().Get(op->Type()); if (info.Checker() != nullptr) { @@ -228,6 +198,11 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, framework::OpRegistry::CreateOp(op->Type(), invars_name_map, outvars_name_map, attrs_map); + if (info.infer_var_type_) { + RuntimeInferVarTypeContext infer_var_type_ctx(&inputs, outputs, &attrs_map); + info.infer_var_type_(&infer_var_type_ctx); + } + // TODO(minqiyang): Support infer var type in imperative mode // Run forward op VLOG(3) << "tracer running " << op->Type(); @@ -252,6 +227,7 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, VLOG(5) << "start construct backward op"; // construct grad op descs + op->attrs_ = attrs_map; std::unique_ptr fwd_op_desc(new framework::OpDesc( op->Type(), invars_name_map, outvars_name_map, attrs_map)); std::unique_ptr> grad_to_var( @@ -278,12 +254,12 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, auto fwd_var_it = current_vars_map.find(grad_invar); PADDLE_ENFORCE(fwd_var_it != current_vars_map.end()); // Forward inputs or outputs. - grad_in_vars.emplace_back(fwd_var_it->second->var_); + grad_in_vars.emplace_back(fwd_var_it->second); } else { VarBase* var = current_vars_map[var_it->second]; InitGrad(var, prepared_op.GetDeviceContext()); // Douts. - grad_in_vars.emplace_back(var->grads_->var_); + grad_in_vars.emplace_back(var->grads_); } vars_saved_for_backward.insert(it.first); @@ -300,7 +276,7 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, op->Type()); VarBase* var = current_vars_map[var_it->second]; InitGrad(var, prepared_op.GetDeviceContext()); - grad_out_vars.push_back(var->grads_->var_); + grad_out_vars.push_back(var->grads_); } } } @@ -319,9 +295,7 @@ std::vector Tracer::PyTrace(OpBase* op, std::vector ret_vars = PyLayer::Apply(op->forward_id_, inputs); - for (VarBase* inp : inputs) { - op->TrackPreOp(inp, PyLayer::kFwdInp); - } + op->TrackPreOp(PyLayer::kFwdInp, inputs); std::vector& outputs = op->output_vars_[PyLayer::kFwdOut]; outputs.reserve(ret_vars.size()); @@ -342,23 +316,23 @@ std::vector Tracer::PyTrace(OpBase* op, auto& grad_output_vars = op->grad_output_vars_[0][framework::GradVarName(PyLayer::kFwdOut)]; - for (const VarBase* inp : inputs) { - grad_input_vars.push_back(inp->var_); + for (VarBase* inp : inputs) { + grad_input_vars.push_back(inp); } for (VarBase* out : outputs) { - grad_input_vars.push_back(out->var_); + grad_input_vars.push_back(out); } // TODO(minqiyang): Add GPU support for PyLayer, only support CPU now platform::CPUPlace place; for (VarBase* out : outputs) { InitGrad(out, platform::DeviceContextPool::Instance().Get(place)); - grad_input_vars.push_back(out->grads_->var_); + grad_input_vars.push_back(out->grads_); } for (VarBase* inp : inputs) { InitGrad(inp, platform::DeviceContextPool::Instance().Get(place)); - grad_output_vars.push_back(inp->grads_->var_); + grad_output_vars.push_back(inp->grads_); } } return outputs; diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index 7b65d55e9eff1..a87f3b8009dd5 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -48,7 +48,7 @@ class Tracer { virtual ~Tracer() {} std::set Trace(OpBase* op, const VarBasePtrMap& inputs, - const VarBasePtrMap& outputs, + VarBasePtrMap* outputs, // NOLINT framework::AttributeMap attrs_map, const platform::Place expected_place, const bool stop_gradient = false); diff --git a/paddle/fluid/imperative/type_defs.h b/paddle/fluid/imperative/type_defs.h index fc9e42f8d0e99..c51ce931defbc 100644 --- a/paddle/fluid/imperative/type_defs.h +++ b/paddle/fluid/imperative/type_defs.h @@ -25,6 +25,7 @@ class VarBase; class OpBase; typedef std::map> VarBasePtrMap; +typedef std::map> ConstVarBasePtrMap; typedef std::map> OpBasePtrMap; } // namespace imperative diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 762640d6d1ce1..d27ef8fe3c33f 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -91,5 +91,5 @@ if(WITH_TESTING) add_subdirectory(tests/book) if(WITH_INFERENCE_API_TEST) add_subdirectory(tests/api) - endif() + endif() endif() diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 8f7b6f31dec72..d9ac73b0638ad 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -110,7 +110,7 @@ set(TRANSFORMER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/transformer") download_model_and_data(${TRANSFORMER_INSTALL_DIR} "temp%2Ftransformer_model.tar.gz" "temp%2Ftransformer_data.txt.tar.gz") inference_analysis_test(test_analyzer_transformer SRCS analyzer_transformer_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8) + ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 SERIAL) # ocr set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc index 9d17f38ab7641..f765f55611291 100644 --- a/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc @@ -183,10 +183,13 @@ void SetInput(std::vector> *inputs) { } // Easy for profiling independently. -TEST(Analyzer_Transformer, profile) { +void profile(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; + if (use_mkldnn) { + cfg.EnableMKLDNN(); + } std::vector> input_slots_all; SetInput(&input_slots_all); @@ -194,6 +197,11 @@ TEST(Analyzer_Transformer, profile) { input_slots_all, &outputs, FLAGS_num_threads); } +TEST(Analyzer_Transformer, profile) { profile(); } +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_Transformer, profile_mkldnn) { profile(true); } +#endif + // Check the fuse status TEST(Analyzer_Transformer, fuse_statis) { AnalysisConfig cfg; @@ -206,9 +214,12 @@ TEST(Analyzer_Transformer, fuse_statis) { } // Compare result of NativeConfig and AnalysisConfig -TEST(Analyzer_Transformer, compare) { +void compare(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); + if (use_mkldnn) { + cfg.EnableMKLDNN(); + } std::vector> input_slots_all; SetInput(&input_slots_all); @@ -216,5 +227,10 @@ TEST(Analyzer_Transformer, compare) { reinterpret_cast(&cfg), input_slots_all); } +TEST(Analyzer_Transformer, compare) { compare(); } +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_Transformer, compare_mkldnn) { compare(true /* use_mkldnn */); } +#endif + } // namespace inference } // namespace paddle diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index cf78c83297a87..4cef49280dfb5 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -178,10 +178,10 @@ Beam Search Decode Operator. This Operator constructs the full hypotheses for each source sentence by walking back along the LoDTensorArray Input(ids) whose lods can be used to restore the path in the beam search tree. -The Output(SentenceIds) and Output(SentenceScores) separately contain the -generated id sequences and the corresponding scores. The shapes and lods of the -two LodTensor are same. The lod level is 2 and the two levels separately -indicate how many hypotheses each source sentence has and how many ids each +The Output(SentenceIds) and Output(SentenceScores) separately contain the +generated id sequences and the corresponding scores. The shapes and lods of the +two LodTensor are same. The lod level is 2 and the two levels separately +indicate how many hypotheses each source sentence has and how many ids each hypothesis has. )DOC"); } @@ -203,15 +203,12 @@ class BeamSearchDecodeInferShape : public framework::InferShapeBase { class BeamSearchDecodeInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - for (auto& o : op_desc.Output("SentenceIds")) { - auto& sentence_ids = block->FindRecursiveOrCreateVar(o); - sentence_ids.SetType(framework::proto::VarType::LOD_TENSOR); + void operator()(framework::InferVarTypeContext* ctx) const override { + for (auto& o : ctx->Output("SentenceIds")) { + ctx->SetType(o, framework::proto::VarType::LOD_TENSOR); } - for (auto& o : op_desc.Output("SentenceScores")) { - auto& sentence_scores = block->FindRecursiveOrCreateVar(o); - sentence_scores.SetType(framework::proto::VarType::LOD_TENSOR); + for (auto& o : ctx->Output("SentenceScores")) { + ctx->SetType(o, framework::proto::VarType::LOD_TENSOR); } } }; diff --git a/paddle/fluid/operators/beam_search_op.cc b/paddle/fluid/operators/beam_search_op.cc index fa6b09b4e7ec5..a6aa35e056936 100644 --- a/paddle/fluid/operators/beam_search_op.cc +++ b/paddle/fluid/operators/beam_search_op.cc @@ -65,7 +65,7 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(true); AddComment(R"DOC( -This operator does the search in beams for one time step. +This operator does the search in beams for one time step. Specifically, it selects the top-K candidate word ids of current step from Input(ids) according to their Input(scores) for all source sentences, where K is Attr(beam_size) and Input(ids), Input(scores) are predicted results @@ -120,15 +120,12 @@ class BeamSearchOp : public framework::OperatorWithKernel { class BeamSearchInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - for (auto &o : op_desc.Output("selected_ids")) { - auto &selected_ids = block->FindRecursiveOrCreateVar(o); - selected_ids.SetType(framework::proto::VarType::LOD_TENSOR); + void operator()(framework::InferVarTypeContext *ctx) const override { + for (auto &o : ctx->Output("selected_ids")) { + ctx->SetType(o, framework::proto::VarType::LOD_TENSOR); } - for (auto &o : op_desc.Output("selected_scores")) { - auto &selected_scores = block->FindRecursiveOrCreateVar(o); - selected_scores.SetType(framework::proto::VarType::LOD_TENSOR); + for (auto &o : ctx->Output("selected_scores")) { + ctx->SetType(o, framework::proto::VarType::LOD_TENSOR); } } }; diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 194f9cf5033a3..5d5ad9e711a97 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -50,9 +50,19 @@ class ConcatOp : public framework::OperatorWithKernel { if (j == axis) { out_dims[axis] += ins[i][j]; } else { - PADDLE_ENFORCE_EQ(out_dims[j], ins[i][j], - "Input tensors should have the same " - "elements except the specify axis."); + if (ctx->IsRuntime()) { + // check all shape in run time + PADDLE_ENFORCE_EQ(out_dims[j], ins[i][j], + "Input tensors should have the same " + "elements except the specify axis."); + } else { + // not check -1 with other in compile time + if (out_dims[j] != -1 && ins[i][j] != -1) { + PADDLE_ENFORCE_EQ(out_dims[j], ins[i][j], + "Input tensors should have the same " + "elements except the specify axis."); + } + } } } } diff --git a/paddle/fluid/operators/controlflow/get_places_op.cc b/paddle/fluid/operators/controlflow/get_places_op.cc index 1a157688f3d02..fa77f97419b6d 100644 --- a/paddle/fluid/operators/controlflow/get_places_op.cc +++ b/paddle/fluid/operators/controlflow/get_places_op.cc @@ -93,11 +93,9 @@ execution. class GetPlacesInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - for (auto &o_name : op_desc.Output("Out")) { - block->FindRecursiveOrCreateVar(o_name).SetType( - framework::proto::VarType::PLACE_LIST); + void operator()(framework::InferVarTypeContext *ctx) const override { + for (auto &o_name : ctx->Output("Out")) { + ctx->SetType(o_name, framework::proto::VarType::PLACE_LIST); } } }; diff --git a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc index fa18ade3234ed..45f18ac9255bd 100644 --- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc +++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc @@ -100,16 +100,13 @@ class WriteToArrayInferShape : public framework::InferShapeBase { class WriteToArrayInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto x_name = op_desc.Input("X")[0]; - auto out_name = op_desc.Output("Out")[0]; + void operator()(framework::InferVarTypeContext *ctx) const override { + auto x_name = ctx->Input("X")[0]; + auto out_name = ctx->Output("Out")[0]; VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY"; - auto &out = block->FindRecursiveOrCreateVar(out_name); - out.SetType(framework::proto::VarType::LOD_TENSOR_ARRAY); - auto *x = block->FindVarRecursive(x_name); - if (x != nullptr) { - out.SetDataType(x->GetDataType()); + ctx->SetType(out_name, framework::proto::VarType::LOD_TENSOR_ARRAY); + if (ctx->HasVar(x_name)) { + ctx->SetDataType(out_name, ctx->GetDataType(x_name)); } } }; diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 8352ba4f2b846..deb8ec3bb2d56 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -365,19 +365,16 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { class WhileGradOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto p_names = op_desc.Input(kX); - auto pg_ig_names = op_desc.Output(framework::GradVarName(kX)); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto p_names = ctx->Input(kX); + auto pg_ig_names = ctx->Output(framework::GradVarName(kX)); for (size_t i = 0; i < p_names.size(); ++i) { - auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i])); - auto *g_var = block->FindVarRecursive(pg_ig_names[i]); - if (g_var != nullptr) { // Gradient could be @EMPTY@ + if (ctx->HasVar(pg_ig_names[i])) { VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i] - << " type: " << p_var.GetType(); - g_var->SetType(p_var.GetType()); - g_var->SetDataType(p_var.GetDataType()); + << " type: " << ctx->GetType(p_names[i]); + ctx->SetType(pg_ig_names[i], ctx->GetType(p_names[i])); + ctx->SetDataType(pg_ig_names[i], ctx->GetDataType(p_names[i])); } } } diff --git a/paddle/fluid/operators/distributed_ops/fake_init_op.cc b/paddle/fluid/operators/distributed_ops/fake_init_op.cc index 28ebdcb03ea83..5ee35e0458a64 100644 --- a/paddle/fluid/operators/distributed_ops/fake_init_op.cc +++ b/paddle/fluid/operators/distributed_ops/fake_init_op.cc @@ -56,8 +56,7 @@ class FakeInitOp : public framework::OperatorBase { class FakeInitOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override {} + void operator()(framework::InferVarTypeContext *ctx) const override {} }; class FakeInitOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/fluid/operators/distributed_ops/merge_ids_op.cc b/paddle/fluid/operators/distributed_ops/merge_ids_op.cc index da0185b8c492e..1b0b4dd316933 100644 --- a/paddle/fluid/operators/distributed_ops/merge_ids_op.cc +++ b/paddle/fluid/operators/distributed_ops/merge_ids_op.cc @@ -114,11 +114,10 @@ class MergeIdsOp : public framework::OperatorWithKernel { class MergeIdsOpInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto *input_var = block->Var(op_desc.Input("Ids")[0]); - for (auto &out_var : op_desc.Output("Out")) { - block->Var(out_var)->SetType(input_var->GetType()); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto input_type = ctx->GetType(ctx->Input("Ids")[0]); + for (auto &out_var : ctx->Output("Out")) { + ctx->SetType(out_var, input_type); } } }; diff --git a/paddle/fluid/operators/distributed_ops/split_ids_op.cc b/paddle/fluid/operators/distributed_ops/split_ids_op.cc index f61d387fbef63..191ca1efe8ca5 100644 --- a/paddle/fluid/operators/distributed_ops/split_ids_op.cc +++ b/paddle/fluid/operators/distributed_ops/split_ids_op.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed_ops/split_ids_op.h" +#include + namespace paddle { namespace operators { @@ -71,11 +73,10 @@ class SplitIdsOp : public framework::OperatorWithKernel { class SplitIdsOpInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto *input_var = block->Var(op_desc.Input("Ids")[0]); - for (auto &out_var : op_desc.Output("Out")) { - block->Var(out_var)->SetType(input_var->GetType()); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto input_type = ctx->GetType(ctx->Input("Ids")[0]); + for (auto &out_var : ctx->Output("Out")) { + ctx->SetType(out_var, input_type); } } }; diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index eb4617a935935..242f5390b8067 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -55,17 +55,8 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { "The input tensor Input's rank of FCOp should be larger than " "in_num_col_dims."); - auto in_mat_dims = framework::flatten_to_2d(in_dims, in_num_col_dims); - PADDLE_ENFORCE_EQ( - in_mat_dims[1], w_dims[0], - "Fully Connected input and weigth size do not match. %s, %s"); - std::vector output_dims; - output_dims.reserve(static_cast(in_num_col_dims + 1)); - for (int i = 0; i < in_num_col_dims; ++i) { - output_dims.push_back(in_dims[i]); - } - output_dims.push_back(w_dims[1]); + FCOutputSize(in_dims, w_dims, output_dims, in_num_col_dims); ctx->SetOutputDim("Out", framework::make_ddim(output_dims)); ctx->ShareLoD("Input", "Out"); @@ -128,6 +119,9 @@ void FCOpMaker::Make() { AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); + AddAttr(framework::kAllKernelsMustComputeRuntimeShape, + "Skip calling InferShape() function in the runtime.") + .SetDefault(true); AddComment(R"DOC( Fully Connected Operator. @@ -142,13 +136,20 @@ class FCOpKernel : public framework::OpKernel { void Compute(const paddle::framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); - auto input = ctx.Input("Input"); + auto input = ctx.Input("Input"); auto w = ctx.Input("W"); auto bias = ctx.Input("Bias"); - auto output = ctx.Output("Out"); + auto output = ctx.Output("Out"); + int in_num_col_dims = ctx.Attr("in_num_col_dims"); auto w_dims = w->dims(); + + std::vector output_dims; + FCOutputSize(input->dims(), w_dims, output_dims, in_num_col_dims); + output->Resize(framework::make_ddim(output_dims)); + output->set_lod(input->lod()); + auto out_dims = output->dims(); - int M = framework::product(out_dims) / out_dims[out_dims.size() - 1]; + int M = framework::product(out_dims) / w_dims[1]; const T* input_data = input->data(); const T* w_data = w->data(); diff --git a/paddle/fluid/operators/fc_op.h b/paddle/fluid/operators/fc_op.h index e1b780fc0c401..b82a63cd830b5 100644 --- a/paddle/fluid/operators/fc_op.h +++ b/paddle/fluid/operators/fc_op.h @@ -48,5 +48,21 @@ class FCOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override; }; +inline void FCOutputSize(const framework::DDim& in_dims, + const framework::DDim& w_dims, + std::vector& out_dims, // NOLINT + int in_num_col_dims) { + auto in_mat_dims = framework::flatten_to_2d(in_dims, in_num_col_dims); + PADDLE_ENFORCE_EQ( + in_mat_dims[1], w_dims[0], + "Fully Connected input and weigth size do not match. %s, %s"); + + out_dims.reserve(static_cast(in_num_col_dims + 1)); + for (int i = 0; i < in_num_col_dims; ++i) { + out_dims.push_back(in_dims[i]); + } + out_dims.push_back(w_dims[1]); +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index c86430524e182..cf2f4776cf2ae 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -39,12 +39,11 @@ class FillConstantOp : public framework::OperatorWithKernel { class FillConstantOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { + void operator()(framework::InferVarTypeContext* ctx) const override { auto data_type = static_cast( - boost::get(op_desc.GetAttr("dtype"))); - auto& out_var_name = op_desc.Output("Out").front(); - block->Var(out_var_name)->SetDataType(data_type); + boost::get(ctx->GetAttr("dtype"))); + auto& out_var_name = ctx->Output("Out").front(); + ctx->SetDataType(out_var_name, data_type); } }; diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc index a0026427e2514..9cc94ab88d59d 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc @@ -88,7 +88,8 @@ class FusedEmbeddingSeqPoolOpMaker : public framework::OpProtoAndCheckerMaker { "(boolean, default false) " "Sparse update.") .SetDefault(false); - AddAttr(framework::kAllKernelsMustComputeRuntimeShape, "") + AddAttr(framework::kAllKernelsMustComputeRuntimeShape, + "Skip calling InferShape() function in the runtime.") .SetDefault(true); AddComment(R"DOC( FusedEmbeddingSeqPool Operator. @@ -137,22 +138,20 @@ class FusedEmbeddingSeqPoolOpGrad : public framework::OperatorWithKernel { class FusedEmbeddingSeqPoolOpGradVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - auto out_var_name = op_desc.Output(framework::GradVarName("W")).front(); - auto attr = op_desc.GetAttr("is_sparse"); + void operator()(framework::InferVarTypeContext* ctx) const override { + auto out_var_name = ctx->Output(framework::GradVarName("W")).front(); + auto attr = ctx->GetAttr("is_sparse"); bool is_sparse = boost::get(attr); if (is_sparse) { VLOG(3) << "fused_embedding_seq_pool_grad op " << framework::GradVarName("W") << " is set to SelectedRows"; - block->Var(out_var_name) - ->SetType(framework::proto::VarType::SELECTED_ROWS); + ctx->SetType(out_var_name, framework::proto::VarType::SELECTED_ROWS); } else { VLOG(3) << "fused_embedding_seq_pool_grad op " << framework::GradVarName("W") << " is set to LoDTensor"; - block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); + ctx->SetType(out_var_name, framework::proto::VarType::LOD_TENSOR); } - block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType()); + ctx->SetDataType(out_var_name, ctx->GetDataType(ctx->Input("W")[0])); } }; diff --git a/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc b/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc index a4ae19d9c1e3b..c0893359af2f4 100644 --- a/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc +++ b/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc @@ -81,15 +81,12 @@ GetTensorFromSelectedRows is used to get the tensor from SelectedRows. class GetTensorFromSelectedRowsOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const final { - auto out_var_name = op_desc.Output("Out").front(); - auto in_var_name = op_desc.Input("X").front(); - - auto out_var = block->FindRecursiveOrCreateVar(out_var_name); - auto in_var = block->FindRecursiveOrCreateVar(in_var_name); - out_var.SetType(framework::proto::VarType::LOD_TENSOR); - out_var.SetDataType(in_var.GetDataType()); + void operator()(framework::InferVarTypeContext *ctx) const { // NOLINT + auto out_var_name = ctx->Output("Out").front(); + auto in_var_name = ctx->Input("X").front(); + + ctx->SetType(out_var_name, framework::proto::VarType::LOD_TENSOR); + ctx->SetDataType(out_var_name, ctx->GetDataType(in_var_name)); } }; diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index f6395fb32feac..82222d0a7e739 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -54,7 +54,8 @@ class HashOpMaker : public framework::OpProtoAndCheckerMaker { )DOC"); AddAttr("num_hash", "").SetDefault(1); AddAttr("mod_by", "").SetDefault(100000); - AddAttr(framework::kAllKernelsMustComputeRuntimeShape, "") + AddAttr(framework::kAllKernelsMustComputeRuntimeShape, + "Skip calling InferShape() function in the runtime.") .SetDefault(true); } }; diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.cc b/paddle/fluid/operators/hierarchical_sigmoid_op.cc index 6ca6f0bc04aa6..d0e1057c4357e 100644 --- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc +++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc @@ -197,38 +197,32 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel { class HierarchicalSigmoidGradOpGradVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - auto w_grad_var_name = op_desc.Output(framework::GradVarName("W")).front(); - auto bias_grad_var_name_vec = - op_desc.Output(framework::GradVarName("Bias")); + void operator()(framework::InferVarTypeContext* ctx) const override { + auto w_grad_var_name = ctx->Output(framework::GradVarName("W")).front(); + auto bias_grad_var_name_vec = ctx->Output(framework::GradVarName("Bias")); std::string bias_grad_var_name; bool hasBias = false; if (bias_grad_var_name_vec.size()) { hasBias = true; - bias_grad_var_name = - op_desc.Output(framework::GradVarName("Bias")).front(); + bias_grad_var_name = ctx->Output(framework::GradVarName("Bias")).front(); } - auto attr = op_desc.GetAttr("is_sparse"); + auto attr = ctx->GetAttr("is_sparse"); bool is_sparse = boost::get(attr); if (is_sparse) { VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W") << " is set to SelectedRows"; - block->Var(w_grad_var_name) - ->SetType(framework::proto::VarType::SELECTED_ROWS); + ctx->SetType(w_grad_var_name, framework::proto::VarType::SELECTED_ROWS); } else { VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W") << " is set to LoDTensor"; - block->Var(w_grad_var_name) - ->SetType(framework::proto::VarType::LOD_TENSOR); + ctx->SetType(w_grad_var_name, framework::proto::VarType::LOD_TENSOR); } if (hasBias) { VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("Bias") << " is set to LoDTensor"; - block->Var(bias_grad_var_name) - ->SetType(framework::proto::VarType::LOD_TENSOR); + ctx->SetType(bias_grad_var_name, framework::proto::VarType::LOD_TENSOR); } - block->Var(w_grad_var_name)->SetDataType(block->Var("W")->GetDataType()); + ctx->SetDataType(w_grad_var_name, ctx->GetDataType(ctx->Input("W")[0])); } }; diff --git a/paddle/fluid/operators/lod_rank_table_op.cc b/paddle/fluid/operators/lod_rank_table_op.cc index 166952fe23192..0a43ac0c52f9b 100644 --- a/paddle/fluid/operators/lod_rank_table_op.cc +++ b/paddle/fluid/operators/lod_rank_table_op.cc @@ -64,11 +64,9 @@ class LoDRankTableInferShape : public framework::InferShapeBase { class LoDRankTableInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - for (auto &o : op_desc.Output("Out")) { - block->FindRecursiveOrCreateVar(o).SetType( - framework::proto::VarType::LOD_RANK_TABLE); + void operator()(framework::InferVarTypeContext *ctx) const override { + for (auto &o : ctx->Output("Out")) { + ctx->SetType(o, framework::proto::VarType::LOD_RANK_TABLE); } } }; diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc index 9b91cf5260163..61e3427370456 100644 --- a/paddle/fluid/operators/lod_tensor_to_array_op.cc +++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc @@ -201,10 +201,9 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase { class LoDTensorToArrayInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - for (auto &out_var : op_desc.Output("Out")) { - block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR_ARRAY); + void operator()(framework::InferVarTypeContext *ctx) const override { + for (auto &out_var : ctx->Output("Out")) { + ctx->SetType(out_var, framework::proto::VarType::LOD_TENSOR_ARRAY); } } }; diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index 0029932bc068c..8d1ebe6b1ce33 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -147,22 +147,20 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - auto out_var_name = op_desc.Output(framework::GradVarName("W")).front(); - auto attr = op_desc.GetAttr("is_sparse"); + void operator()(framework::InferVarTypeContext* ctx) const override { + auto out_var_name = ctx->Output(framework::GradVarName("W")).front(); + auto attr = ctx->GetAttr("is_sparse"); bool is_sparse = boost::get(attr); if (is_sparse) { VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") << " is set to SelectedRows"; - block->Var(out_var_name) - ->SetType(framework::proto::VarType::SELECTED_ROWS); + ctx->SetType(out_var_name, framework::proto::VarType::SELECTED_ROWS); } else { VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") << " is set to LoDTensor"; - block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); + ctx->SetType(out_var_name, framework::proto::VarType::LOD_TENSOR); } - block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType()); + ctx->SetDataType(out_var_name, ctx->GetDataType(ctx->Input("W")[0])); } }; diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 3a926a716f54a..69c0486eb6347 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -123,7 +123,7 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel { auto& dev_ctx = ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - auto input = ctx.Input("Input"); + auto input = ctx.Input("Input"); auto w = ctx.Input("W"); auto bias = ctx.Input("Bias"); @@ -151,7 +151,13 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel { const T* input_data = input->data(); const T* w_data = w->data(); - auto output = ctx.Output("Out"); + auto output = ctx.Output("Out"); + int in_num_col_dims = ctx.Attr("in_num_col_dims"); + std::vector output_dims; + FCOutputSize(input->dims(), w->dims(), output_dims, in_num_col_dims); + output->Resize(framework::make_ddim(output_dims)); + output->set_lod(input->lod()); + T* output_data = output->mutable_data(ctx.GetPlace()); auto dst_memory = mem.dst(output_data); @@ -204,19 +210,21 @@ class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel { Tensor* input_grad = ctx.Output(framework::GradVarName("Input")); Tensor* w_grad = ctx.Output(framework::GradVarName("W")); + const Tensor* input = ctx.Input("Input"); + const T* input_data = input->data(); + + const Tensor* w = ctx.Input("W"); + const T* w_data = w->data(); + if (input_grad) { + input_grad->Resize(input->dims()); input_grad_data = input_grad->mutable_data(ctx.GetPlace()); } if (w_grad) { + w_grad->Resize(w->dims()); w_grad_data = w_grad->mutable_data(ctx.GetPlace()); } - const Tensor* input = ctx.Input("Input"); - const T* input_data = input->data(); - - const Tensor* w = ctx.Input("W"); - const T* w_data = w->data(); - const Tensor* out_grad = ctx.Input(framework::GradVarName("Out")); const T* out_grad_data = out_grad->data(); diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index e41bfb80dfc04..4debc7ca5ec90 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -73,6 +73,29 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { } }; +template +class TransposeINT8MKLDNNOpKernel : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const override { + std::vector axis = ctx.Attr>("axis"); + std::vector axis_int8 = {0, 2, 3, 1}; + if (axis.size() != 1) { + PADDLE_ENFORCE_EQ(axis.size(), axis_int8.size()); + for (size_t i = 0; i < axis.size(); i++) { + PADDLE_ENFORCE_EQ(axis[i], axis_int8[i], + "Current INT8 MKLDNN Transpose kernel only surpport " + "axis with [0, 2, 3, 1] due to MKL-DNN kernel " + "implementation."); + } + } + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + output->ShareDataWith(*input); + output->set_layout(DataLayout::kMKLDNN); + output->set_format(input->format()); + } +}; + template class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { public: @@ -140,7 +163,10 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(transpose2, MKLDNN, ::paddle::platform::CPUPlace, - ops::TransposeMKLDNNOpKernel); + ops::TransposeMKLDNNOpKernel, + ops::TransposeINT8MKLDNNOpKernel, + ops::TransposeINT8MKLDNNOpKernel); + REGISTER_OP_KERNEL(transpose, MKLDNN, ::paddle::platform::CPUPlace, ops::TransposeMKLDNNOpKernel); diff --git a/paddle/fluid/operators/nccl/nccl_op.cc b/paddle/fluid/operators/nccl/nccl_op.cc index 0018139cb06fe..6a0ae0dede695 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cc @@ -60,12 +60,9 @@ class NCCLInitOp : public framework::OperatorBase { class NCCLInitOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto out_var_name = op_desc.Output("Communicator").front(); - auto &out_var = block->FindRecursiveOrCreateVar(out_var_name); - auto var_type = framework::proto::VarType::RAW; - out_var.SetType(var_type); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto out_var_name = ctx->Output("Communicator").front(); + ctx->SetType(out_var_name, framework::proto::VarType::RAW); } }; diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index 256da34912560..fa7cc58c08455 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -237,23 +237,21 @@ class NCEOpGrad : public framework::OperatorWithKernel { class NCEOpGradVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto weight_grad = op_desc.Output(framework::GradVarName("Weight")).front(); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto weight_grad = ctx->Output(framework::GradVarName("Weight")).front(); - auto attr = op_desc.GetAttr("is_sparse"); + auto attr = ctx->GetAttr("is_sparse"); bool is_sparse = boost::get(attr); if (is_sparse) { VLOG(3) << "nce_op_grad op " << weight_grad << " and " << " is set to SelectedRows"; - block->Var(weight_grad) - ->SetType(framework::proto::VarType::SELECTED_ROWS); + ctx->SetType(weight_grad, framework::proto::VarType::SELECTED_ROWS); } else { VLOG(3) << "nce_op_grad op " << weight_grad << " and " << " is set to LoDTensor"; - block->Var(weight_grad)->SetType(framework::proto::VarType::LOD_TENSOR); + ctx->SetType(weight_grad, framework::proto::VarType::LOD_TENSOR); } - block->Var(weight_grad)->SetDataType(block->Var("Input")->GetDataType()); + ctx->SetDataType(weight_grad, ctx->GetDataType(ctx->Input("Input")[0])); } }; diff --git a/paddle/fluid/operators/ngraph/ngraph_engine_op.cc b/paddle/fluid/operators/ngraph/ngraph_engine_op.cc index f941f917c82b3..479c95ba08c31 100644 --- a/paddle/fluid/operators/ngraph/ngraph_engine_op.cc +++ b/paddle/fluid/operators/ngraph/ngraph_engine_op.cc @@ -37,8 +37,7 @@ class NgraphEngineOpMaker : public framework::OpProtoAndCheckerMaker { class NgraphEngineInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override {} + void operator()(framework::InferVarTypeContext *ctx) const override {} }; } // namespace operators diff --git a/paddle/fluid/operators/optimizers/adam_op.h b/paddle/fluid/operators/optimizers/adam_op.h index 09255f60e6953..6262ef0c2d380 100644 --- a/paddle/fluid/operators/optimizers/adam_op.h +++ b/paddle/fluid/operators/optimizers/adam_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include // for sqrt in CPU and CUDA #include +#include #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/threadpool.h" @@ -311,17 +312,17 @@ struct SparseAdamFunctor { T beta1_pow = *beta1_pow_; T beta2_pow = *beta2_pow_; lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow); - size_t row_count = numel / row_numel_; + int64_t row_count = static_cast(numel / row_numel_); - for (size_t i = 0U, j = 0U; i != row_count; ++i) { + for (int64_t i = 0, j = 0; i != row_count; ++i) { if (i == *(rows_ + j)) { - for (size_t k = 0U; k != row_numel_; ++k) { + for (int64_t k = 0; k != row_numel_; ++k) { T g = grad_[j * row_numel_ + k]; adam_update(i * row_numel_ + k, g); } ++j; } else { - for (size_t k = 0U; k != row_numel_; ++k) { + for (int64_t k = 0; k != row_numel_; ++k) { T mom1 = moment1_[i * row_numel_ + k]; T mom2 = moment2_[i * row_numel_ + k]; T p = param_[i * row_numel_ + k]; @@ -427,43 +428,23 @@ class AdamOpKernel : public framework::OpKernel { } } - framework::SelectedRows cpu_grad_merge; + framework::SelectedRows tmp_grad_merge; const framework::SelectedRows* grad_merge_ptr; if (is_strict_sorted) { grad_merge_ptr = &grad; } else { // merge duplicated rows if any. // The rows of grad_merge have been sorted inside MergeAdd functor - framework::SelectedRows* grad_merge_var; scatter::MergeAdd merge_func; - if (platform::is_cpu_place(ctx.GetPlace())) { - grad_merge_var = &cpu_grad_merge; - } else { - // FIXME(qiao): GPU also need to fix this - grad_merge_var = const_cast(ctx.scope()) - .Var() - ->GetMutable(); - } merge_func(ctx.template device_context(), grad, - grad_merge_var, true); - grad_merge_ptr = grad_merge_var; + &tmp_grad_merge, true); + grad_merge_ptr = &tmp_grad_merge; } auto& grad_merge = *grad_merge_ptr; auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); - const int64_t* rows = nullptr; -// When compiled without CUDA, the CUDAData() interface should not be -// provided. -#if defined(PADDLE_WITH_CUDA) - if (platform::is_gpu_place(ctx.GetPlace())) { - rows = grad_merge.rows().CUDAData(ctx.GetPlace()); - } else { -#endif - rows = grad_merge.rows().data(); -#if defined(PADDLE_WITH_CUDA) - } -#endif + const int64_t* rows = grad_merge.rows().Data(ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); if (platform::is_cpu_place(ctx.GetPlace())) { @@ -488,7 +469,7 @@ class AdamOpKernel : public framework::OpKernel { } } #ifndef _WIN32 - else if (FLAGS_inner_op_parallelism > 1 && + else if (FLAGS_inner_op_parallelism > 1 && // NOLINT min_row_size_to_use_multithread > 0 && param.dims()[0] > min_row_size_to_use_multithread) { VLOG(3) << "use multi thread, inner_op_parallelism=" @@ -516,11 +497,11 @@ class AdamOpKernel : public framework::OpKernel { for (int i = 0; i < FLAGS_inner_op_parallelism; ++i) { int64_t start = i * line_in_each_thread; int64_t end = (i + 1) * line_in_each_thread; - if (start >= param_row_count) { + if (start >= static_cast(param_row_count)) { break; } - if (end > param_row_count) { - end = param_row_count; + if (end > static_cast(param_row_count)) { + end = static_cast(param_row_count); } fs.push_back( framework::Async([&functor, &row_id_to_grad_row_offset, @@ -545,8 +526,8 @@ class AdamOpKernel : public framework::OpKernel { } for (size_t i = 0; i < fs.size(); ++i) fs[i].wait(); } -#endif // !_WIN32 - else { +#endif // !_WIN32 + else { // NOLINT functor(param.numel()); } } else if (platform::is_gpu_place(ctx.GetPlace())) { diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.cc b/paddle/fluid/operators/optimizers/lars_momentum_op.cc index 574a03680b669..126b665dd4d93 100644 --- a/paddle/fluid/operators/optimizers/lars_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/lars_momentum_op.cc @@ -56,9 +56,9 @@ This optimizer use LARS (https://arxiv.org/abs/1708.03888) to optimize each weight using a local learning rate: $$ -local\_lr = \eta * +local\_lr = \eta * \frac{\left \| param \right \|}{\left \| grad \right \| + \beta *\left \| param \right \|} \\ -velocity = mu * velocity + +velocity = mu * velocity + local\_lr * (grad + \beta * param) \\ param = param - velocity. \\ $$ @@ -72,8 +72,7 @@ use L2 regularizers in case of using LARS. class LarsMomentumOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override {} + void operator()(framework::InferVarTypeContext* ctx) const override {} }; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/optimizers/momentum_op.cc b/paddle/fluid/operators/optimizers/momentum_op.cc index cde238c076b69..7cf218c20f4c8 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.cc +++ b/paddle/fluid/operators/optimizers/momentum_op.cc @@ -21,18 +21,14 @@ using Tensor = framework::Tensor; class MomentumOpInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - auto input_var = op_desc.Input("Param")[0]; - for (auto& out_var : op_desc.Output("ParamOut")) { - if (block->FindRecursiveOrCreateVar(input_var).GetType() == - framework::proto::VarType::SELECTED_ROWS) { - block->FindRecursiveOrCreateVar(out_var).SetType( - framework::proto::VarType::SELECTED_ROWS); - } else if (block->FindRecursiveOrCreateVar(input_var).GetType() == + void operator()(framework::InferVarTypeContext* ctx) const override { + auto& input_var = ctx->Input("Param")[0]; + for (auto& out_var : ctx->Output("ParamOut")) { + if (ctx->GetType(input_var) == framework::proto::VarType::SELECTED_ROWS) { + ctx->SetType(out_var, framework::proto::VarType::SELECTED_ROWS); + } else if (ctx->GetType(input_var) == framework::proto::VarType::LOD_TENSOR) { - block->FindRecursiveOrCreateVar(out_var).SetType( - framework::proto::VarType::LOD_TENSOR); + ctx->SetType(out_var, framework::proto::VarType::LOD_TENSOR); } else { PADDLE_THROW( "Only support LodTensor and SelectedRows, Unexpected Input Type."); diff --git a/paddle/fluid/operators/optimizers/momentum_op.h b/paddle/fluid/operators/optimizers/momentum_op.h index 3ed1bff5ff499..29a2ae6755aa6 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.h +++ b/paddle/fluid/operators/optimizers/momentum_op.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" @@ -69,6 +70,7 @@ class MomentumOp : public framework::OperatorWithKernel { ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("VelocityOut", param_dim); } + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { auto input_data_type = framework::GetDataTypeOfVar(ctx.InputVar("Param")); @@ -351,23 +353,14 @@ class MomentumOpKernel : public framework::OpKernel { VLOG(3) << "Grad SelectedRows contains no data!"; return; } - auto* merged_grad = const_cast(ctx.scope()) - .Var() - ->GetMutable(); + + framework::SelectedRows tmp_merged_grad; + framework::SelectedRows* merged_grad = &tmp_merged_grad; math::scatter::MergeAdd merge_func; merge_func(ctx.template device_context(), *grad, merged_grad); - const int64_t* rows = nullptr; -#ifdef PADDLE_WITH_CUDA - if (platform::is_gpu_place(ctx.GetPlace())) { - rows = merged_grad->rows().CUDAData(ctx.GetPlace()); - } else { -#endif - rows = merged_grad->rows().data(); -#ifdef PADDLE_WITH_CUDA - } -#endif + const int64_t* rows = merged_grad->rows().Data(ctx.GetPlace()); int64_t row_numel = merged_grad->value().numel() / merged_grad->rows().size(); platform::ForRange for_range( diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.h b/paddle/fluid/operators/optimizers/rmsprop_op.h index 389c84d246409..4550052b2d614 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.h +++ b/paddle/fluid/operators/optimizers/rmsprop_op.h @@ -216,24 +216,14 @@ class RmspropOpKernel : public framework::OpKernel { } } else if (grad_var->IsType()) { auto &grad = grad_var->Get(); - auto *merged_grad = const_cast(ctx.scope()) - .Var() - ->GetMutable(); - + framework::SelectedRows tmp_merged_grad; + framework::SelectedRows *merged_grad = &tmp_merged_grad; math::scatter::MergeAdd merge_func; merge_func(dev_ctx, grad, merged_grad); platform::ForRange for_range(dev_ctx, limit); - const int64_t *rows; -#ifdef PADDLE_WITH_CUDA - if (platform::is_gpu_place(ctx.GetPlace())) { - rows = merged_grad->rows().CUDAData(ctx.GetPlace()); - } else { -#endif - rows = merged_grad->rows().data(); -#ifdef PADDLE_WITH_CUDA - } -#endif + const int64_t *rows = merged_grad->rows().Data(ctx.GetPlace()); + auto &merged_tensor = merged_grad->value(); int64_t row_count = merged_grad->rows().size(); int64_t row_numel = merged_tensor.numel() / row_count; diff --git a/paddle/fluid/operators/optimizers/sgd_op.cc b/paddle/fluid/operators/optimizers/sgd_op.cc index 690381a67f89d..34e99a14ff77c 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cc +++ b/paddle/fluid/operators/optimizers/sgd_op.cc @@ -50,20 +50,18 @@ class SGDOp : public framework::OperatorWithKernel { class SGDOpInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto input_var_n = op_desc.Input("Param")[0]; - auto in_var_type = block->FindRecursiveOrCreateVar(input_var_n).GetType(); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto &input_var_n = ctx->Input("Param")[0]; + auto in_var_type = ctx->GetType(input_var_n); PADDLE_ENFORCE(in_var_type == framework::proto::VarType::SELECTED_ROWS || in_var_type == framework::proto::VarType::LOD_TENSOR, "The input Var's type should be LoDtensor or SelectedRows," " but the received var(%s)'s type is %s", input_var_n, in_var_type); - for (auto &out_var_n : op_desc.Output("ParamOut")) { - auto &out_var = block->FindRecursiveOrCreateVar(out_var_n); - if (out_var.GetType() != in_var_type) { - out_var.SetType(in_var_type); + for (auto &out_var_n : ctx->Output("ParamOut")) { + if (ctx->GetType(out_var_n) != in_var_type) { + ctx->SetType(out_var_n, in_var_type); } } } diff --git a/paddle/fluid/operators/py_func_op.cc b/paddle/fluid/operators/py_func_op.cc index 53eff2de3e386..5300e807472d3 100644 --- a/paddle/fluid/operators/py_func_op.cc +++ b/paddle/fluid/operators/py_func_op.cc @@ -14,8 +14,11 @@ #include "paddle/fluid/operators/py_func_op.h" +#include #include #include +#include +#include #include #include "paddle/fluid/framework/op_registry.h" @@ -91,15 +94,12 @@ static void CallPythonFunc(py::object *callable, } } -class PyFuncOpVarTypInference : public framework::VarTypeInference { +class PyFuncOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op, - framework::BlockDesc *block) const override { - auto &outs = op.Outputs(); - bool has_out = (outs.count("Out") > 0 && !outs.at("Out").empty()); + void operator()(framework::InferVarTypeContext *ctx) const override { + bool has_out = (ctx->HasOutput("Out") && !ctx->Output("Out").empty()); - auto &ins = op.Inputs(); - bool has_in = (ins.count("X") > 0 && !ins.at("X").empty()); + bool has_in = (ctx->HasInput("X") && !ctx->Input("X").empty()); /** * X or Out can be empty, so that py_func can be more flexible @@ -107,8 +107,8 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference { */ PADDLE_ENFORCE(has_in || has_out, "Input(X) or Output(Out) must exist"); - PADDLE_ENFORCE_GE(boost::get(op.GetAttr(kForwardPythonCallableId)), 0, - "Function id cannot be less than 0"); + PADDLE_ENFORCE_GE(boost::get(ctx->GetAttr(kForwardPythonCallableId)), + 0, "Function id cannot be less than 0"); if (!has_out) return; @@ -118,7 +118,7 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference { * the corresponding forward variable */ const std::string kGradVarSuffix = framework::kGradVarSuffix; - auto &out_var_names = outs.at("Out"); + auto &out_var_names = ctx->Output("Out"); for (auto &out_var_name : out_var_names) { if (out_var_name == framework::kEmptyVarName || out_var_name.size() < kGradVarSuffix.size()) { @@ -128,18 +128,17 @@ class PyFuncOpVarTypInference : public framework::VarTypeInference { size_t len = out_var_name.size() - kGradVarSuffix.size(); if (out_var_name.substr(len) == kGradVarSuffix) { auto fwd_var_name = out_var_name.substr(0, len); - auto *out_var_desc = block->FindVarRecursive(out_var_name); - auto *fwd_var_desc = block->FindVarRecursive(fwd_var_name); - PADDLE_ENFORCE_NOT_NULL(out_var_desc, "Backward variable %s not found", - out_var_name); - PADDLE_ENFORCE_NOT_NULL(fwd_var_desc, "Forward variable %s not found", - fwd_var_name); + PADDLE_ENFORCE(ctx->HasVar(out_var_name), + "Backward variable %s not found", out_var_name); + PADDLE_ENFORCE(ctx->HasVar(fwd_var_name), + "Backward variable %s not found", fwd_var_name); VLOG(10) << "Infer var_desc of Output(" << out_var_name << ") as Input(" << fwd_var_name << ")"; - out_var_desc->SetShape(fwd_var_desc->GetShape()); - out_var_desc->SetDataType(fwd_var_desc->GetDataType()); - out_var_desc->SetLoDLevel(fwd_var_desc->GetLoDLevel()); - out_var_desc->SetType(fwd_var_desc->GetType()); + + ctx->SetShape(out_var_name, ctx->GetShape(fwd_var_name)); + ctx->SetDataType(out_var_name, ctx->GetDataType(fwd_var_name)); + ctx->SetLoDLevel(out_var_name, ctx->GetLoDLevel(fwd_var_name)); + ctx->SetType(out_var_name, ctx->GetType(fwd_var_name)); } } } @@ -309,5 +308,5 @@ class PyFuncOp : public framework::OperatorBase { namespace ops = paddle::operators; REGISTER_OPERATOR(py_func, ops::PyFuncOp, ops::PyFuncOpMaker, - ops::PyFuncOpVarTypInference, ops::PyFuncOpShapeInference, + ops::PyFuncOpVarTypeInference, ops::PyFuncOpShapeInference, ops::PyFuncOpGradDescMaker); diff --git a/paddle/fluid/operators/reader/create_custom_reader_op.cc b/paddle/fluid/operators/reader/create_custom_reader_op.cc index 85394b336fc96..fdc7b0f6a0e8d 100644 --- a/paddle/fluid/operators/reader/create_custom_reader_op.cc +++ b/paddle/fluid/operators/reader/create_custom_reader_op.cc @@ -85,10 +85,10 @@ class CreateCustomReaderOpMaker : public DecoratedReaderMakerBase { AddComment(R"DOC( CreateCustomReader Operator - A custom reader can be used for input data preprocessing. - A custom reader holds its own sub-block, which will be executed in CPU - in its 'ReadNext()' function. Users can configurate their own - preprocessing pipelines by inserting operators into custom reader's + A custom reader can be used for input data preprocessing. + A custom reader holds its own sub-block, which will be executed in CPU + in its 'ReadNext()' function. Users can configurate their own + preprocessing pipelines by inserting operators into custom reader's sub-block. )DOC"); } @@ -123,23 +123,22 @@ class CustomReaderInferShape : public framework::InferShapeBase { class CustomReaderInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - framework::VarDesc* out_reader = block->FindVar(op_desc.Output("Out")[0]); - PADDLE_ENFORCE_NOT_NULL(out_reader); - out_reader->SetType(framework::proto::VarType::READER); + void operator()(framework::InferVarTypeContext* ctx) const override { + auto& out_var_name = ctx->Output("Out")[0]; + PADDLE_ENFORCE(ctx->HasVar(out_var_name)); + ctx->SetType(out_var_name, framework::proto::VarType::READER); auto sink_var_names = - boost::get>(op_desc.GetAttr("sink_var_names")); + boost::get>(ctx->GetAttr("sink_var_names")); const auto* sub_block = - boost::get(op_desc.GetAttr("sub_block")); + boost::get(ctx->GetAttr("sub_block")); std::vector res_data_types; for (const std::string& var_name : sink_var_names) { framework::VarDesc* var = sub_block->FindVar(var_name); PADDLE_ENFORCE_NOT_NULL(var); res_data_types.emplace_back(var->GetDataType()); } - out_reader->SetDataTypes(res_data_types); + ctx->SetDataTypes(out_var_name, res_data_types); } }; diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc index 846b2ed77e46d..33a69ad5fec2b 100644 --- a/paddle/fluid/operators/reader/read_op.cc +++ b/paddle/fluid/operators/reader/read_op.cc @@ -51,19 +51,16 @@ class ReadInferShape : public framework::InferShapeBase { class ReadInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - bool infer_out = boost::get(op_desc.GetAttr("infer_out")); + void operator()(framework::InferVarTypeContext* ctx) const override { + bool infer_out = boost::get(ctx->GetAttr("infer_out")); if (infer_out) { - std::string reader_name = op_desc.Input("Reader")[0]; - std::vector out_names = op_desc.Output("Out"); - framework::VarDesc* reader = block->FindVarRecursive(reader_name); - auto dtypes = reader->GetDataTypes(); + std::string reader_name = ctx->Input("Reader")[0]; + std::vector out_names = ctx->Output("Out"); + auto dtypes = ctx->GetDataTypes(reader_name); PADDLE_ENFORCE_EQ(dtypes.size(), out_names.size()); for (size_t i = 0; i < dtypes.size(); ++i) { - framework::VarDesc& out = block->FindRecursiveOrCreateVar(out_names[i]); - out.SetType(framework::proto::VarType::LOD_TENSOR); - out.SetDataType(dtypes[i]); + ctx->SetType(out_names[i], framework::proto::VarType::LOD_TENSOR); + ctx->SetDataType(out_names[i], dtypes[i]); } } } diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index 3921eedf94abb..64a1f6b68702f 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -98,11 +98,10 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const { } } -void FileReaderInferVarType::operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const { - std::string reader_name = op_desc.Output("Out")[0]; - framework::VarDesc* reader = block->FindVarRecursive(reader_name); - reader->SetType(framework::proto::VarType::READER); +void FileReaderInferVarType::operator()( + framework::InferVarTypeContext* ctx) const { + std::string reader_name = ctx->Output("Out")[0]; + ctx->SetType(reader_name, framework::proto::VarType::READER); } void DecoratedReaderInferShape::operator()( @@ -125,13 +124,11 @@ void DecoratedReaderInferShape::operator()( } void DecoratedReaderInferVarType::operator()( - const framework::OpDesc& op_desc, framework::BlockDesc* block) const { - std::string in_reader_name = op_desc.Input("UnderlyingReader")[0]; - framework::VarDesc* in_reader = block->FindVarRecursive(in_reader_name); - std::string out_reader_name = op_desc.Output("Out")[0]; - framework::VarDesc* out_reader = block->FindVarRecursive(out_reader_name); - out_reader->SetType(framework::proto::VarType::READER); - out_reader->SetDataTypes(in_reader->GetDataTypes()); + framework::InferVarTypeContext* ctx) const { + const std::string& in_reader_name = ctx->Input("UnderlyingReader")[0]; + const std::string& out_reader_name = ctx->Output("Out")[0]; + ctx->SetType(out_reader_name, framework::proto::VarType::READER); + ctx->SetDataTypes(out_reader_name, ctx->GetDataTypes(in_reader_name)); } void DecoratedReaderMakerBase::Make() { diff --git a/paddle/fluid/operators/reader/reader_op_registry.h b/paddle/fluid/operators/reader/reader_op_registry.h index 25c3e7d77b788..795a5806050ef 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.h +++ b/paddle/fluid/operators/reader/reader_op_registry.h @@ -14,7 +14,9 @@ #pragma once +#include #include +#include #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" @@ -59,8 +61,7 @@ class FileReaderInferShape : public framework::InferShapeBase { class FileReaderInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override; + void operator()(framework::InferVarTypeContext* ctx) const override; }; // general infershape for decorated reader @@ -72,8 +73,7 @@ class DecoratedReaderInferShape : public framework::InferShapeBase { // general var type inference for decorated reader class DecoratedReaderInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override; + void operator()(framework::InferVarTypeContext* ctx) const override; }; class DecoratedReaderMakerBase : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc index fcc598f4f1613..b02c098099625 100644 --- a/paddle/fluid/operators/save_op.cc +++ b/paddle/fluid/operators/save_op.cc @@ -159,12 +159,9 @@ This operator will serialize and write LoDTensor / SelectedRows variable to file class SaveOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto out_var_name = op_desc.Output(LOOKUP_TABLE_PATH).front(); - auto &out_var = block->FindRecursiveOrCreateVar(out_var_name); - auto var_type = framework::proto::VarType::RAW; - out_var.SetType(var_type); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto out_var_name = ctx->Output(LOOKUP_TABLE_PATH).front(); + ctx->SetType(out_var_name, framework::proto::VarType::RAW); } }; diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index 4ea77ed30db21..4e4a015e18305 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/scale_op.h" +#include #include #include "paddle/fluid/operators/detail/safe_ref.h" @@ -69,17 +70,13 @@ if bias_after_scale=True: class ScaleOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto &in_var_name = op_desc.Input("X").front(); - auto &in_var = detail::Ref(block->FindVarRecursive(in_var_name)); - - auto out_var_name = op_desc.Output("Out").front(); - auto *out_var = block->FindVarRecursive(out_var_name); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto &in_var_name = ctx->Input("X").front(); + auto out_var_name = ctx->Output("Out").front(); if (in_var_name != out_var_name) { - out_var->SetType(in_var.GetType()); - out_var->SetDataType(in_var.GetDataType()); + ctx->SetType(out_var_name, ctx->GetType(in_var_name)); + ctx->SetDataType(out_var_name, ctx->GetDataType(in_var_name)); } } }; diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc index f357c9c08d042..cc4eedbf4de22 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc @@ -30,13 +30,6 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel { "Output(X) of SequenceEnumerate operator should not be null."); const auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - x_dims.size(), 2, - "Input(X) of SequenceEnumerate operator's rank should be 2."); - PADDLE_ENFORCE_EQ(x_dims[1], 1, - "Input(X) of SequenceEnumerate operator's 2nd " - "dimension should be 1."); - const auto win_size = ctx->Attrs().Get("win_size"); ctx->SetOutputDim("Out", {x_dims[0], win_size}); ctx->ShareLoD("X", "Out"); @@ -59,7 +52,8 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker { }); AddAttr("pad_value", "(int) The enumerate sequence padding value.") .SetDefault(0); - AddAttr(framework::kAllKernelsMustComputeRuntimeShape, "") + AddAttr(framework::kAllKernelsMustComputeRuntimeShape, + "Skip calling InferShape() function in the runtime.") .SetDefault(true); AddComment(R"DOC( Sequence Enumerate Operator. diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h index 18da69993b2ad..6a1eb6e625b69 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h @@ -27,30 +27,47 @@ class SequenceEnumerateKernel : public framework::OpKernel { auto* in = context.Input("X"); auto* out = context.Output("Out"); int win_size = context.Attr("win_size"); - int pad_value = context.Attr("pad_value"); + auto pad_value = static_cast(context.Attr("pad_value")); auto in_dims = in->dims(); - auto in_lod = in->lod(); - + auto lod0 = in->lod()[0]; PADDLE_ENFORCE_EQ( - static_cast(in_dims[0]), in_lod[0].back(), + static_cast(in_dims[0]), lod0.back(), "The actual input data's size mismatched with LoD information."); + PADDLE_ENFORCE_EQ( + in_dims.size(), 2UL, + "Input(X) of SequenceEnumerate operator's rank should be 2."); + PADDLE_ENFORCE_EQ(in_dims[1], 1, + "Input(X) of SequenceEnumerate operator's 2nd " + "dimension should be 1."); // Generate enumerate sequence set - auto lod0 = in_lod[0]; auto in_data = in->data(); out->Resize({in_dims[0], win_size}); + out->set_lod(in->lod()); auto out_data = out->mutable_data(context.GetPlace()); for (size_t i = 0; i < lod0.size() - 1; ++i) { - for (size_t idx = lod0[i]; idx < lod0[i + 1]; ++idx) { - for (int word_idx = 0; word_idx < win_size; ++word_idx) { - size_t word_pos = idx + word_idx; - out_data[win_size * idx + word_idx] = - word_pos < lod0[i + 1] ? in_data[word_pos] : pad_value; + int start = lod0[i]; + int end = lod0[i + 1]; + int copy_size = win_size < end - start + 1 ? win_size : end - start + 1; + int mid = end + 1 - copy_size; + int pad_num = win_size - copy_size; + copy_size *= sizeof(T); + for (int idx = start; idx < mid; ++idx) { + std::memcpy(out_data, in_data + idx, copy_size); + out_data += win_size; + } + for (int idx = mid; idx < end; ++idx) { + copy_size -= sizeof(T); + pad_num++; + std::memcpy(out_data, in_data + idx, copy_size); + T* pdata = out_data + copy_size / sizeof(T); + for (int i = 0; i < pad_num; ++i) { + pdata[i] = pad_value; } + out_data += win_size; } } - out->set_lod(in->lod()); } }; diff --git a/paddle/fluid/operators/slice_op.cu b/paddle/fluid/operators/slice_op.cu index 1af57b89a3506..24a564f9ef9d6 100644 --- a/paddle/fluid/operators/slice_op.cu +++ b/paddle/fluid/operators/slice_op.cu @@ -31,18 +31,18 @@ __global__ void Padding(const paddle::platform::float16* d_out, paddle::platform::float16* d_in) { int64_t out_idx = threadIdx.x + blockDim.x * blockIdx.x; if (out_idx < n) { + int64_t out_idx_tmp = out_idx; int coords[D] = {0}; for (int i = D - 1; i >= 0; --i) { - coords[i] = out_idx % out_dims[i]; - out_idx /= out_dims[i]; + coords[i] = out_idx_tmp % out_dims[i]; + out_idx_tmp /= out_dims[i]; coords[i] += offsets[i]; } int64_t in_idx = 0; - for (int i = 0; i < D - 1; ++i) { - in_idx += coords[i] * in_dims[i + 1]; + for (int i = 0; i < D; ++i) { + in_idx = in_idx * in_dims[i] + coords[i]; } - in_idx += coords[D - 1]; d_in[in_idx] = d_out[out_idx]; } @@ -80,8 +80,8 @@ class SliceGradKernel(0)); int64_t numel = d_out->numel(); - dim3 blocks((numel - 1) / PADDLE_CUDA_NUM_THREADS + 1, 1, 1); - dim3 threads(PADDLE_CUDA_NUM_THREADS, 1, 1); + dim3 blocks((numel - 1) / PADDLE_CUDA_NUM_THREADS + 1); + dim3 threads(PADDLE_CUDA_NUM_THREADS); auto stream = ctx.cuda_device_context().stream(); auto out_shape = framework::vectorize2int(out_dims); diff --git a/paddle/fluid/operators/split_selected_rows_op.cc b/paddle/fluid/operators/split_selected_rows_op.cc index 0e7b1463d1ba8..88dfebc0cff0d 100644 --- a/paddle/fluid/operators/split_selected_rows_op.cc +++ b/paddle/fluid/operators/split_selected_rows_op.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/operators/split_selected_rows_op.h" +#include + namespace paddle { namespace operators { @@ -60,10 +62,9 @@ class SplitSelectedRowsOp : public framework::OperatorWithKernel { class SplitSelectedRowsOpInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - for (auto &out_var : op_desc.Output("Out")) { - block->Var(out_var)->SetType(framework::proto::VarType::SELECTED_ROWS); + void operator()(framework::InferVarTypeContext *ctx) const override { + for (auto &out_var : ctx->Output("Out")) { + ctx->SetType(out_var, framework::proto::VarType::SELECTED_ROWS); } } }; diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index 7abfbbd3cb5e5..1391148ccf5d1 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -12,6 +12,7 @@ limitations under the License. */ #include "paddle/fluid/operators/sum_op.h" #include +#include #include #include @@ -159,24 +160,20 @@ the LoD information with the first input. class SumOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - auto& inputs = op_desc.Input("X"); + void operator()(framework::InferVarTypeContext* ctx) const override { + auto& inputs = ctx->Input("X"); auto var_type = framework::proto::VarType::SELECTED_ROWS; - for (auto& name : op_desc.Input("X")) { - VLOG(10) << name << " " - << block->FindRecursiveOrCreateVar(name).GetType(); + for (auto& name : ctx->Input("X")) { + VLOG(10) << name << " " << ctx->GetType(name); } bool any_input_is_lod_tensor = std::any_of( - inputs.begin(), inputs.end(), [block](const std::string& name) { - return block->FindRecursiveOrCreateVar(name).GetType() == - framework::proto::VarType::LOD_TENSOR; + inputs.begin(), inputs.end(), [ctx](const std::string& name) { + return ctx->GetType(name) == framework::proto::VarType::LOD_TENSOR; }); - auto is_tensor_array = [block](const std::string& name) { - return block->FindRecursiveOrCreateVar(name).GetType() == - framework::proto::VarType::LOD_TENSOR_ARRAY; + auto is_tensor_array = [ctx](const std::string& name) { + return ctx->GetType(name) == framework::proto::VarType::LOD_TENSOR_ARRAY; }; bool any_input_is_tensor_array = @@ -188,8 +185,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference { if (!all_inputs_are_tensor_array) { std::ostringstream os; for (auto& each : inputs) { - os << " " << each << " type is " - << block->FindRecursiveOrCreateVar(each).GetType() << "\n"; + os << " " << each << " type is " << ctx->GetType(each) << "\n"; } PADDLE_ENFORCE(all_inputs_are_tensor_array, "Not all inputs are tensor array:\n%s", os.str()); @@ -199,11 +195,9 @@ class SumOpVarTypeInference : public framework::VarTypeInference { var_type = framework::proto::VarType::LOD_TENSOR; } - auto out_var_name = op_desc.Output("Out").front(); - auto& out_var = block->FindRecursiveOrCreateVar(out_var_name); - out_var.SetType(var_type); - auto& in_var = detail::Ref(block->FindVarRecursive(inputs.front())); - out_var.SetDataType(in_var.GetDataType()); + auto out_var_name = ctx->Output("Out").front(); + ctx->SetType(out_var_name, var_type); + ctx->SetDataType(out_var_name, ctx->GetDataType(inputs.front())); } }; diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc index 58a74ec2c104f..2b83c42f205c6 100644 --- a/paddle/fluid/operators/tensor_array_to_tensor_op.cc +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -177,10 +177,9 @@ class LoDTensorArray2TensorGradInferShape : public framework::InferShapeBase { class LoDTensorArray2TensorGradInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - for (auto &out_var : op_desc.Output(framework::GradVarName("X"))) { - block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR_ARRAY); + void operator()(framework::InferVarTypeContext *ctx) const override { + for (auto &out_var : ctx->Output(framework::GradVarName("X"))) { + ctx->SetType(out_var, framework::proto::VarType::LOD_TENSOR_ARRAY); } } }; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc index a8c86de9f9a1a..6cf3e65e00ff6 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc @@ -46,8 +46,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { class TensorRTEngineInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override {} + void operator()(framework::InferVarTypeContext *ctx) const override {} }; } // namespace operators diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc index e3132ae76f624..bb6a1c5b16569 100644 --- a/paddle/fluid/operators/uniform_random_op.cc +++ b/paddle/fluid/operators/uniform_random_op.cc @@ -112,17 +112,16 @@ uniform distribution. The random result is in set [min, max]. class UniformRandomOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto out_var_name = op_desc.Output("Out").front(); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto out_var_name = ctx->Output("Out").front(); auto var_data_type = static_cast( - boost::get(op_desc.GetAttr("dtype"))); + boost::get(ctx->GetAttr("dtype"))); - auto out_var = block->FindRecursiveOrCreateVar(out_var_name); - if (out_var.GetType() != framework::proto::VarType::SELECTED_ROWS) { - out_var.SetType(framework::proto::VarType::LOD_TENSOR); + if (ctx->GetType(out_var_name) != + framework::proto::VarType::SELECTED_ROWS) { + ctx->SetType(out_var_name, framework::proto::VarType::LOD_TENSOR); } - out_var.SetDataType(var_data_type); + ctx->SetDataType(out_var_name, var_data_type); } }; diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 4ac5b83c56b11..f1385f57184ec 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,6 +1,6 @@ set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer scope_pool - tracer analysis_predictor) + tracer analysis_predictor imperative_profiler) if(WITH_PYTHON) list(APPEND PYBIND_DEPS py_func_op) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 6bbda69297a48..e9ed4e16443eb 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -38,20 +38,22 @@ void BindTracer(pybind11::module* m) { .def("trace", [](imperative::Tracer& self, imperative::OpBase* op, const imperative::VarBasePtrMap& inputs, - const imperative::VarBasePtrMap& outputs, + imperative::VarBasePtrMap* outputs, framework::AttributeMap attrs_map, const platform::CPUPlace expected_place, const bool stop_gradient = false) { + pybind11::gil_scoped_release release; return self.Trace(op, inputs, outputs, attrs_map, expected_place, stop_gradient); }) .def("trace", [](imperative::Tracer& self, imperative::OpBase* op, const imperative::VarBasePtrMap& inputs, - const imperative::VarBasePtrMap& outputs, + imperative::VarBasePtrMap* outputs, framework::AttributeMap attrs_map, const platform::CUDAPlace expected_place, const bool stop_gradient = false) { + pybind11::gil_scoped_release release; return self.Trace(op, inputs, outputs, attrs_map, expected_place, stop_gradient); }) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 5a753d0a78099..691b437ab0cf4 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -36,6 +36,7 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/version.h" #include "paddle/fluid/imperative/layer.h" +#include "paddle/fluid/imperative/profiler.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/memory/allocation/legacy_allocator.h" #include "paddle/fluid/operators/activation_op.h" @@ -156,6 +157,11 @@ PYBIND11_MODULE(core, m) { m.def("print_mem_usage", []() { return memory::allocation::GPUMemMonitor.PrintMemUsage(); }); + m.def("start_imperative_gperf_profiler", + []() { imperative::StartProfile(); }); + + m.def("stop_imperative_gperf_profiler", []() { imperative::StopProfile(); }); + py::class_(m, "VarBase", R"DOC()DOC") .def( py::init`_ (e.g., numpy 1d -array of float32, int, list of int) +Item should be of supported type (e.g., numpy array or list/tuple of float +or int). An example implementation for single item data reader creator: @@ -62,8 +61,6 @@ def reader(): yield numpy.random.uniform(-1, 1, size=width*height), label return reader - -TODO(yuyang18): Should we add whole design doc here? """ import paddle.reader.decorator diff --git a/python/paddle/reader/creator.py b/python/paddle/reader/creator.py index c861020225fb6..353aca92f42d8 100644 --- a/python/paddle/reader/creator.py +++ b/python/paddle/reader/creator.py @@ -44,8 +44,11 @@ def text_file(path): Creates a data reader that outputs text line by line from given text file. Trailing new line ('\\\\n') of each line will be removed. - :path: path of the text file. - :returns: data reader of text file + Args: + path (str): path of the text file. + + Returns: + callable: data reader of text file. """ def reader(): @@ -59,10 +62,15 @@ def reader(): def recordio(paths, buf_size=100): """ - Creates a data reader from given RecordIO file paths separated by ",", - glob pattern is supported. - :path: path of recordio files, can be a string or a string list. - :returns: data reader of recordio files. + Creates a data reader from given RecordIO file paths separated + by ",", glob pattern is supported. + + Args: + paths (str|list(str)): path of recordio files. + buf_size (int): prefetched buffer size. + + Returns: + callable: data reader of recordio files. """ import recordio as rec diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index b2ef9f7580900..685d08b9e0b21 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -242,20 +242,18 @@ class XmapEndSignal(): def xmap_readers(mapper, reader, process_num, buffer_size, order=False): """ - Use multiprocess to map samples from reader by a mapper defined by user. - And this function contains a buffered decorator. - :param mapper: a function to map sample. - :type mapper: callable - :param reader: the data reader to read from - :type reader: callable - :param process_num: process number to handle original sample - :type process_num: int - :param buffer_size: max buffer size - :type buffer_size: int - :param order: keep the order of reader - :type order: bool - :return: the decarated reader - :rtype: callable + Use multi-threads to map samples from reader by a mapper defined by user. + + Args: + mapper (callable): a function to map the data from reader. + reader (callable): a data reader which yields the data. + process_num (int): thread number to handle original sample. + buffer_size (int): size of the queue to read data in. + order (bool): whether to keep the data order from original reader. + Default False. + + Returns: + callable: a decorated reader with data mapping. """ end = XmapEndSignal() @@ -477,7 +475,7 @@ def get_line(self, cut_lines=True, line_break="\n"): """ :param cut_lines: cut buffer to lines :type cut_lines: bool - :param line_break: line break of the file, like \n or \r + :param line_break: line break of the file, like '\\\\n' or '\\\\r' :type line_break: string :return: one line or a buffer of bytes diff --git a/tools/manylinux1/build_scripts/build.sh b/tools/manylinux1/build_scripts/build.sh index 1b0059a8c69fc..3be94a42d530b 100644 --- a/tools/manylinux1/build_scripts/build.sh +++ b/tools/manylinux1/build_scripts/build.sh @@ -153,3 +153,9 @@ done # Restore LD_LIBRARY_PATH LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}" + +# According to ar issues: https://lists.gnu.org/archive/html/bug-binutils/2016-05/msg00211.html +# we should install new version ar with 64-bit supported here +wget https://ftp.gnu.org/gnu/binutils/binutils-2.27.tar.gz +tar xzf binutils-2.27.tar.gz && cd binutils-2.27 +./configure --prefix=/opt/rh/devtoolset-2/root/usr/ --enable-64-bit-archive && make -j `nproc` && make install