From 6cfb9a326251dcf9fe7aeef14ac4f3ff56d23111 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Mon, 31 Jul 2017 20:21:04 +0800
Subject: [PATCH 1/3] Refine InferShape for recurrent_network_op. * the tensor
 only contains shape and does not hold memory when inferring shape.

---
 paddle/operators/recurrent_network_op.cc      | 147 +++++++-----------
 paddle/operators/recurrent_network_op.h       |  16 +-
 paddle/operators/recurrent_network_op_test.cc |  33 ++--
 3 files changed, 84 insertions(+), 112 deletions(-)
diff --git a/paddle/operators/recurrent_network_op.cc b/paddle/operators/recurrent_network_op.cc
index 1a101d6ddf149..b21a21c6e9146 100644
--- a/paddle/operators/recurrent_network_op.cc
+++ b/paddle/operators/recurrent_network_op.cc
@@ -29,7 +29,8 @@ namespace rnn {
 
 void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
                    const std::vector<Link>& inlinks,
-                   const size_t seq_len) {
+                   const size_t seq_len,
+                   bool infer_shape) {
   PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided.");
   for (size_t i = 0; i < inlinks.size(); ++i) {
     Tensor* input =
@@ -42,7 +43,9 @@ void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
       Tensor* step_input = step_scopes[j]
                                ->CreateVariable(inlinks[i].internal)
                                ->GetMutable<Tensor>();
-      *step_input = input->Slice<float>(j, j + 1);
+      if (!infer_shape) {
+        *step_input = input->Slice<float>(j, j + 1);
+      }
       step_input->Resize(step_dims);
     }
   }
@@ -50,20 +53,23 @@ void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
 
 void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
                    const std::vector<Link>& outlinks,
-                   const size_t seq_len) {
+                   const size_t seq_len,
+                   bool infer_shape) {
   for (size_t i = 0; i < outlinks.size(); i++) {
     Tensor* output =
         step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable<Tensor>();
 
-    // TODO(qingiqng) remove following code after adding
-    // InferShape in RecurrentGradientOp
-    DDim step_dims = step_scopes[0]
-                         ->GetVariable(outlinks[i].internal)
-                         ->GetMutable<Tensor>()
-                         ->dims();
-    std::vector<int> dims_vec = vectorize(step_dims);
-    dims_vec.insert(dims_vec.begin(), seq_len);
-    output->mutable_data<float>(make_ddim(dims_vec), platform::CPUPlace());
+    if (infer_shape) {
+      DDim step_dims = step_scopes[0]
+                           ->GetVariable(outlinks[i].internal)
+                           ->GetMutable<Tensor>()
+                           ->dims();
+      std::vector<int> dims_vec = vectorize(step_dims);
+      dims_vec.insert(dims_vec.begin(), seq_len);
+      output->Resize(make_ddim(dims_vec));
+    } else {
+      output->mutable_data<float>(platform::CPUPlace());
+    }
 
     for (size_t j = 0; j < seq_len; j++) {
       Tensor* step_output = step_scopes[j]
@@ -79,8 +85,9 @@ void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
 
 void LinkMemories(std::vector<std::shared_ptr<Scope>>& scopes,
                   const std::vector<rnn::MemoryAttr>& memories,
-                  size_t step_id,
-                  int offset) {
+                  const size_t step_id,
+                  const int offset,
+                  bool infer_shape) {
   PADDLE_ENFORCE(step_id < scopes.size(),
                  "step [%d] is out of range of step scopes' size [%d]",
                  step_id,
@@ -97,18 +104,14 @@ void LinkMemories(std::vector<std::shared_ptr<Scope>>& scopes,
   std::shared_ptr<Scope> scope = scopes[step_id];
   std::shared_ptr<Scope> linked_scope = scopes[step_id + offset];
   for (auto& attr : memories) {
-    auto mem = scope->CreateVariable(attr.pre_var)->GetMutable<Tensor>();
+    auto mem = scope->GetVariable(attr.pre_var)->GetMutable<Tensor>();
     // maybe share variable is better?
     auto linked_mem = linked_scope->GetVariable(attr.var)->GetMutable<Tensor>();
-    mem->ShareDataWith<float>(*linked_mem);
-
-    // TODO(qingqing) remove following code
-    // the memory of current step should be allocated in step net
-    auto m = scope->CreateVariable(attr.var)->GetMutable<Tensor>();
-    // for unit test, as addOp and mulOp are null currently, if not
-    // mutable_data, mem.data() in output will be error. We will
-    // remove this line after merge the correct addOp and mulOp.
-    m->mutable_data<float>(mem->dims(), platform::CPUPlace());
+    if (infer_shape) {
+      mem->Resize(linked_mem->dims());
+    } else {
+      mem->ShareDataWith<float>(*linked_mem);
+    }
   }
 }
 
@@ -176,61 +179,43 @@ void RecurrentAlgorithm::InferShape(const std::shared_ptr<Scope>& scope) const {
                  ->GetMutable<Tensor>()
                  ->dims()[0];
   CreateScopes(scope);
-  auto step_scopes = GetStepScopes(scope);
 
-  // SegmentInputs is called in InferShape. The input must hold memory in
-  // SegmentInputs. But the other op only set dimension for the output in
-  // InferShape. That's a problem. Wether the RNN op needs InferShape or not?
-  // Wether the following functions (SegmentInputs, InitMemories, ...) need
-  // to rewrite for RNN op?
-  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
+  auto step_scopes = GetStepScopes(scope);
+  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true);
 
-  InitMemories(step_scopes[0]);
+  InitMemories(step_scopes[0], true);
 
   PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
                  "stepnet [%s] is not in scope.",
                  arg_->step_net);
   Variable* net = scope->GetVariable(arg_->step_net);
   PADDLE_ENFORCE(net != nullptr, "failed to get step net");
-  // If the InferShape is called in OperatorBase's run function,
-  // the rnn op only needs to do InferShape for the first time step
   for (size_t i = 0; i < seq_len_; i++) {
     if (i > 0) {
-      rnn::LinkMemories(step_scopes, arg_->memories, i, -1);
+      rnn::LinkMemories(step_scopes, arg_->memories, i, -1, true);
     }
     net->GetMutable<NetOp>()->InferShape(step_scopes[i]);
   }
-
-  auto outlinks = arg_->outlinks;
-  for (size_t i = 0; i < outlinks.size(); i++) {
-    DDim step_dims = step_scopes[0]
-                         ->GetVariable(outlinks[i].internal)
-                         ->GetMutable<Tensor>()
-                         ->dims();
-    std::vector<int> dims_vec = vectorize(step_dims);
-    // now only support fixed length
-    dims_vec.insert(dims_vec.begin(), seq_len_);
-    Tensor* output =
-        step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable<Tensor>();
-    output->Resize(make_ddim(dims_vec));
-  }
+  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true);
 }
 
 void RecurrentAlgorithm::Run(const std::shared_ptr<Scope>& scope,
                              const platform::DeviceContext& dev_ctx) const {
   auto step_scopes = GetStepScopes(scope);
 
+  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false);
+
+  InitMemories(step_scopes[0], false);
+
   Variable* net = scope->GetVariable(arg_->step_net);
   for (size_t step_id = 0; step_id < seq_len_; step_id++) {
-    // the link memory is done in InferShape
-    // maybe remove following code after testing
     if (step_id > 0) {
-      rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1);
+      rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1, false);
     }
     net->GetMutable<NetOp>()->Run(step_scopes[step_id], dev_ctx);
   }
 
-  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_);
+  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false);
 }
 
 void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
@@ -246,6 +231,7 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
       // Now all variables in scope must be created outside of op.
       auto net_op = scope->GetVariable(arg_->step_net)->GetMutable<NetOp>();
       for (auto& input : net_op->inputs_) {
+        // the weight are located in parent scope
         step_scope->CreateVariable(input);
       }
       for (auto& output : net_op->outputs_) {
@@ -257,7 +243,8 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
   }
 }
 
-void RecurrentAlgorithm::InitMemories(std::shared_ptr<Scope> step_scope) const {
+void RecurrentAlgorithm::InitMemories(std::shared_ptr<Scope> step_scope,
+                                      bool infer_shape) const {
   for (auto& attr : arg_->memories) {
     Tensor* pre_mem =
         step_scope->CreateVariable(attr.pre_var)->GetMutable<Tensor>();
@@ -267,14 +254,11 @@ void RecurrentAlgorithm::InitMemories(std::shared_ptr<Scope> step_scope) const {
                    attr.boot_var);
     Tensor* boot_mem =
         step_scope->GetVariable(attr.boot_var)->GetMutable<Tensor>();
-    pre_mem->ShareDataWith<float>(*boot_mem);
-
-    // TODO(qingqing) remove following code
-    // the memory of current step should be allocated in step net
-    // here for unit test
-    auto cur_step_mem =
-        step_scope->CreateVariable(attr.var)->GetMutable<Tensor>();
-    cur_step_mem->mutable_data<float>(boot_mem->dims(), platform::CPUPlace());
+    if (infer_shape) {
+      pre_mem->Resize(boot_mem->dims());
+    } else {
+      pre_mem->ShareDataWith<float>(*boot_mem);
+    }
   }
 }
 
@@ -336,35 +320,37 @@ void RecurrentGradientAlgorithm::Run(
     const std::shared_ptr<Scope>& scope,
     const platform::DeviceContext& dev_ctx) const {
   auto step_scopes = GetStepScopes(scope);
-  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
+  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false);
   PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
                  "step net is not in scope.");
   Variable* net = scope->GetVariable(arg_->step_net);
   PADDLE_ENFORCE(net != nullptr, "failed to get step net");
   for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
     if (static_cast<size_t>(step_id) != seq_len_ - 1) {
-      rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1);
+      rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, false);
     }
     net->GetMutable<NetOp>()->Run(step_scopes[step_id], dev_ctx);
   }
-  LinkBootMemoryGradients(step_scopes[0]);
-  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_);
+  LinkBootMemoryGradients(step_scopes[0], false);
+  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false);
 }
 
 void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
-    std::shared_ptr<Scope> step_scope) const {
+    std::shared_ptr<Scope> step_scope, bool infer_shape) const {
   for (auto& attr : arg_->memories) {
     Tensor* mem_grad =
         step_scope->CreateVariable(attr.var)->GetMutable<Tensor>();
-    PADDLE_ENFORCE(mem_grad != nullptr,
-                   "boot_tensor should be retrieved before");
     PADDLE_ENFORCE(step_scope->HasVariable(attr.boot_var),
                    "memory [%s]'s boot variable [%s] not exists",
                    attr.var,
                    attr.boot_var);
     Tensor* boot_mem_grad =
         step_scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
-    boot_mem_grad->ShareDataWith<float>(*mem_grad);
+    if (infer_shape) {
+      boot_mem_grad->Resize(mem_grad->dims());
+    } else {
+      boot_mem_grad->ShareDataWith<float>(*mem_grad);
+    }
   }
 }
 
@@ -374,7 +360,7 @@ void RecurrentGradientAlgorithm::InferShape(
                  ->GetMutable<Tensor>()
                  ->dims()[0];
   auto step_scopes = GetStepScopes(scope);
-  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
+  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true);
 
   PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
                  "step net is not in scope.");
@@ -383,25 +369,12 @@ void RecurrentGradientAlgorithm::InferShape(
 
   for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
     if (static_cast<size_t>(step_id) != seq_len_ - 1) {
-      rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1);
+      rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, true);
     }
     net->GetMutable<NetOp>()->InferShape(step_scopes[step_id]);
   }
-
-  auto outlinks = arg_->outlinks;
-  for (size_t i = 0; i < outlinks.size(); i++) {
-    DDim step_dims = step_scopes[0]
-                         ->GetVariable(outlinks[i].internal)
-                         ->GetMutable<Tensor>()
-                         ->dims();
-    std::vector<int> dims_vec = vectorize(step_dims);
-    // now only support fixed length
-    dims_vec.insert(dims_vec.begin(), seq_len_);
-    Tensor* output =
-        step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable<Tensor>();
-    output->Resize(make_ddim(dims_vec));
-  }
-  LinkBootMemoryGradients(step_scopes[0]);
+  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true);
+  LinkBootMemoryGradients(step_scopes[0], true);
 }
 
 void RecurrentGradientOp::Init() {
diff --git a/paddle/operators/recurrent_network_op.h b/paddle/operators/recurrent_network_op.h
index 8946c8ce38117..87a997b82e465 100644
--- a/paddle/operators/recurrent_network_op.h
+++ b/paddle/operators/recurrent_network_op.h
@@ -72,19 +72,22 @@ struct ArgumentName {
  */
 void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
                    const std::vector<Link>& inlinks,
-                   const size_t seq_len);
+                   const size_t seq_len,
+                   bool infer_shape);
 
 /**
  * Process outputs of step nets and merge to variables.
  */
 void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
                    const std::vector<Link>& outlinks,
-                   const size_t seq_len);
+                   const size_t seq_len,
+                   bool infer_shape);
 
 void LinkMemories(std::vector<std::shared_ptr<Scope>>& step_scopes,
                   const std::vector<MemoryAttr>& memories,
-                  size_t step_id,
-                  int offset);
+                  const size_t step_id,
+                  const int offset,
+                  bool infer_shape);
 
 void InitArgument(const ArgumentName& name, Argument* arg);
 
@@ -125,7 +128,7 @@ class RecurrentAlgorithm {
                 ->GetMutable<std::vector<std::shared_ptr<Scope>>>();
   }
 
-  void InitMemories(std::shared_ptr<Scope> step_scopes) const;
+  void InitMemories(std::shared_ptr<Scope> step_scopes, bool infer_shape) const;
 
 private:
   std::unique_ptr<rnn::Argument> arg_;
@@ -149,7 +152,8 @@ class RecurrentGradientAlgorithm {
   void Run(const std::shared_ptr<Scope>& scope,
            const platform::DeviceContext& dev_ctx) const;
 
-  void LinkBootMemoryGradients(std::shared_ptr<Scope> step_scopes) const;
+  void LinkBootMemoryGradients(std::shared_ptr<Scope> step_scopes,
+                               bool infer_shape) const;
 
   /**
    * InferShape must be called before Run.
diff --git a/paddle/operators/recurrent_network_op_test.cc b/paddle/operators/recurrent_network_op_test.cc
index 6784ac6001ad1..86588a969c8bd 100644
--- a/paddle/operators/recurrent_network_op_test.cc
+++ b/paddle/operators/recurrent_network_op_test.cc
@@ -56,7 +56,7 @@ class RecurrentOpTest : public ::testing::Test {
     w->GetMutable<Tensor>()->mutable_data<float>(
         make_ddim(std::vector<int>{30, 30}), platform::CPUPlace());
 
-    for (auto boot : std::vector<std::string>{"x_boot", "h_boot"}) {
+    for (auto boot : std::vector<std::string>{"h_boot"}) {
       LOG(INFO) << "create global variable " << boot;
       Variable* h_boot = scope_->CreateVariable(boot);
       h_boot->GetMutable<Tensor>()->mutable_data<float>(
@@ -80,7 +80,6 @@ class RecurrentOpTest : public ::testing::Test {
     op_desc.add_inputs("x0");
     op_desc.add_inputs("x1");
     // boot_memories 3
-    op_desc.add_inputs("x_boot");
     op_desc.add_inputs("h_boot");
     // step net 5
     op_desc.add_inputs("step_net");
@@ -92,7 +91,7 @@ class RecurrentOpTest : public ::testing::Test {
     auto _input_format = std::vector<int>{
         0,  // in_link
         3,  // memories
-        5   // step_net
+        4   // step_net
     };
     auto input_format = op_desc.add_attrs();
     input_format->set_name("input_format");
@@ -130,12 +129,11 @@ class RecurrentOpTest : public ::testing::Test {
       inlink_alias->add_strings(item);
     }
     // pre memories
-    for (const auto& item :
-         std::vector<std::string>{"rnn/x@pre", "rnn/h@pre"}) {
+    for (const auto& item : std::vector<std::string>{"rnn/h@pre"}) {
       pre_memories->add_strings(item);
     }
     // memories
-    for (const auto& item : std::vector<std::string>{"rnn/x", "rnn/h"}) {
+    for (const auto& item : std::vector<std::string>{"rnn/h"}) {
       memories->add_strings(item);
     }
     // output alias
@@ -152,14 +150,11 @@ class RecurrentOpTest : public ::testing::Test {
     LOG(INFO) << "create variable step_net";
     Variable* var = scope_->CreateVariable("step_net");
     auto net = var->GetMutable<NetOp>();
-    // rnn/s is net's input or output?
-    net->inputs_ = {"rnn/h@pre", "rnn/w", "rnn/x"};
-    net->inputs_ = {"rnn/s", "rnn/h"};
     net->AddOp(
         OpRegistry::CreateOp("mul", {"rnn/h@pre", "rnn/w"}, {"rnn/s"}, {}));
 
     net->AddOp(
-        OpRegistry::CreateOp("add_two", {"rnn/x", "rnn/s"}, {"rnn/h"}, {}));
+        OpRegistry::CreateOp("add_two", {"x@alias", "rnn/s"}, {"rnn/h"}, {}));
     net->CompleteAddOp();
   }
 
@@ -303,7 +298,7 @@ class RecurrentGradientAlgorithmTest : public ::testing::Test {
     std::vector<std::shared_ptr<Scope>>* step_scopes =
         scope_->GetVariable("step_scopes")
             ->GetMutable<std::vector<std::shared_ptr<Scope>>>();
-    rnn::SegmentInputs(*step_scopes, std::vector<rnn::Link>{inlink}, 10);
+    rnn::SegmentInputs(*step_scopes, std::vector<rnn::Link>{inlink}, 10, true);
   }
 
   void LinkeMemories() {
@@ -318,7 +313,7 @@ class RecurrentGradientAlgorithmTest : public ::testing::Test {
         scope_->GetVariable("step_scopes")
             ->GetMutable<std::vector<std::shared_ptr<Scope>>>();
     for (int i = 1; i < 10; ++i) {
-      rnn::LinkMemories(*step_scopes, memories, i, -1);
+      rnn::LinkMemories(*step_scopes, memories, i, -1, true);
     }
   }
 
@@ -347,7 +342,7 @@ TEST(RecurrentOp, LinkMemories) {
     scope->CreateVariable("pre_h");
     auto tensor = scope->CreateVariable("h")->GetMutable<Tensor>();
     float* data = tensor->mutable_data<float>(make_ddim({15, 20}), CPUPlace());
-    for (int i = 0; i < 15 * 20; ++i) {
+    for (int j = 0; j < 15 * 20; ++j) {
       data[i] = rand() * (1. / (double)RAND_MAX);
     }
     step_scopes.push_back(scope);
@@ -362,7 +357,7 @@ TEST(RecurrentOp, LinkMemories) {
   memories.push_back(mem_attr);
 
   for (int i = 1; i < len; ++i) {
-    rnn::LinkMemories(step_scopes, memories, i, -1);
+    rnn::LinkMemories(step_scopes, memories, i, -1, false);
   }
   // check
   for (int i = 0; i < len - 1; ++i) {
@@ -372,13 +367,13 @@ TEST(RecurrentOp, LinkMemories) {
                          ->GetVariable("pre_h")
                          ->GetMutable<Tensor>()
                          ->data<float>();
-    for (size_t i = 0; i < 15 * 20; ++i) {
-      ASSERT_FLOAT_EQ(a[i], b[i]);
+    for (size_t j = 0; j < 15 * 20; ++j) {
+      ASSERT_FLOAT_EQ(a[j], b[j]);
     }
   }
 
   for (int i = len - 2; i >= 0; --i) {
-    rnn::LinkMemories(step_scopes, memories, i, 1);
+    rnn::LinkMemories(step_scopes, memories, i, 1, false);
   }
   // check
   for (int i = len - 2; i >= 0; --i) {
@@ -390,8 +385,8 @@ TEST(RecurrentOp, LinkMemories) {
                          ->GetVariable("h")
                          ->GetMutable<Tensor>()
                          ->data<float>();
-    for (size_t i = 0; i < 15 * 20; ++i) {
-      ASSERT_FLOAT_EQ(a[i], b[i]);
+    for (size_t j = 0; j < 15 * 20; ++j) {
+      ASSERT_FLOAT_EQ(a[j], b[j]);
     }
   }
 }

From 8925295a4b63dd6dc95b95b909be0ef4e2c5f4b0 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Tue, 1 Aug 2017 16:34:59 +0800
Subject: [PATCH 2/3] follow comments.

---
 paddle/operators/recurrent_network_op.cc      | 111 +++++++++---------
 paddle/operators/recurrent_network_op.h       |  11 +-
 paddle/operators/recurrent_network_op_test.cc |  14 ++-
 3 files changed, 69 insertions(+), 67 deletions(-)

diff --git a/paddle/operators/recurrent_network_op.cc b/paddle/operators/recurrent_network_op.cc
index b21a21c6e9146..dcb1ac19d2cb1 100644
--- a/paddle/operators/recurrent_network_op.cc
+++ b/paddle/operators/recurrent_network_op.cc
@@ -30,11 +30,14 @@ namespace rnn {
 void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
                    const std::vector<Link>& inlinks,
                    const size_t seq_len,
-                   bool infer_shape) {
+                   bool infer_shape_mode) {
   PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided.");
   for (size_t i = 0; i < inlinks.size(); ++i) {
-    Tensor* input =
-        step_scopes[0]->GetVariable(inlinks[i].external)->GetMutable<Tensor>();
+    auto input_var = step_scopes[0]->GetVariable(inlinks[i].external);
+    PADDLE_ENFORCE(input_var != nullptr,
+                   "input link [%s] is not in scope.",
+                   inlinks[i].external);
+    Tensor* input = input_var->GetMutable<Tensor>();
     DDim dims = input->dims();
     PADDLE_ENFORCE(static_cast<size_t>(dims[0]) == seq_len,
                    "all the inlinks must have same length");
@@ -43,7 +46,7 @@ void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
       Tensor* step_input = step_scopes[j]
                                ->CreateVariable(inlinks[i].internal)
                                ->GetMutable<Tensor>();
-      if (!infer_shape) {
+      if (!infer_shape_mode) {
         *step_input = input->Slice<float>(j, j + 1);
       }
       step_input->Resize(step_dims);
@@ -54,12 +57,14 @@ void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
 void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
                    const std::vector<Link>& outlinks,
                    const size_t seq_len,
-                   bool infer_shape) {
+                   bool infer_shape_mode) {
   for (size_t i = 0; i < outlinks.size(); i++) {
+    PADDLE_ENFORCE(step_scopes[0]->HasVariable(outlinks[i].external),
+                   "output link [%s] is not in scope.",
+                   outlinks[i].external);
     Tensor* output =
         step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable<Tensor>();
-
-    if (infer_shape) {
+    if (infer_shape_mode) {
       DDim step_dims = step_scopes[0]
                            ->GetVariable(outlinks[i].internal)
                            ->GetMutable<Tensor>()
@@ -69,16 +74,15 @@ void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
       output->Resize(make_ddim(dims_vec));
     } else {
       output->mutable_data<float>(platform::CPUPlace());
-    }
-
-    for (size_t j = 0; j < seq_len; j++) {
-      Tensor* step_output = step_scopes[j]
-                                ->GetVariable(outlinks[i].internal)
-                                ->GetMutable<Tensor>();
-      // TODO(luotao02) data type and platform::DeviceContext() should set
-      // correctly
-      (output->Slice<float>(j, j + 1))
-          .CopyFrom<float>(*step_output, platform::CPUPlace());
+      for (size_t j = 0; j < seq_len; j++) {
+        Tensor* step_output = step_scopes[j]
+                                  ->GetVariable(outlinks[i].internal)
+                                  ->GetMutable<Tensor>();
+        // TODO(luotao02) data type and platform::DeviceContext() should set
+        // correctly
+        (output->Slice<float>(j, j + 1))
+            .CopyFrom<float>(*step_output, platform::CPUPlace());
+      }
     }
   }
 }
@@ -87,7 +91,7 @@ void LinkMemories(std::vector<std::shared_ptr<Scope>>& scopes,
                   const std::vector<rnn::MemoryAttr>& memories,
                   const size_t step_id,
                   const int offset,
-                  bool infer_shape) {
+                  bool infer_shape_mode) {
   PADDLE_ENFORCE(step_id < scopes.size(),
                  "step [%d] is out of range of step scopes' size [%d]",
                  step_id,
@@ -107,7 +111,7 @@ void LinkMemories(std::vector<std::shared_ptr<Scope>>& scopes,
     auto mem = scope->GetVariable(attr.pre_var)->GetMutable<Tensor>();
     // maybe share variable is better?
     auto linked_mem = linked_scope->GetVariable(attr.var)->GetMutable<Tensor>();
-    if (infer_shape) {
+    if (infer_shape_mode) {
       mem->Resize(linked_mem->dims());
     } else {
       mem->ShareDataWith<float>(*linked_mem);
@@ -179,43 +183,39 @@ void RecurrentAlgorithm::InferShape(const std::shared_ptr<Scope>& scope) const {
                  ->GetMutable<Tensor>()
                  ->dims()[0];
   CreateScopes(scope);
-
   auto step_scopes = GetStepScopes(scope);
-  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true);
-
-  InitMemories(step_scopes[0], true);
-
-  PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
-                 "stepnet [%s] is not in scope.",
-                 arg_->step_net);
+  rnn::SegmentInputs(
+      step_scopes, arg_->inlinks, seq_len_, true /*infer_shape_mode*/);
+  InitMemories(step_scopes[0], true /*infer_shape_mode*/);
   Variable* net = scope->GetVariable(arg_->step_net);
   PADDLE_ENFORCE(net != nullptr, "failed to get step net");
   for (size_t i = 0; i < seq_len_; i++) {
     if (i > 0) {
-      rnn::LinkMemories(step_scopes, arg_->memories, i, -1, true);
+      rnn::LinkMemories(
+          step_scopes, arg_->memories, i, -1, true /*infer_shape_mode*/);
     }
     net->GetMutable<NetOp>()->InferShape(step_scopes[i]);
   }
-  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true);
+  rnn::ConcatOutputs(
+      step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/);
 }
 
 void RecurrentAlgorithm::Run(const std::shared_ptr<Scope>& scope,
                              const platform::DeviceContext& dev_ctx) const {
   auto step_scopes = GetStepScopes(scope);
-
-  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false);
-
-  InitMemories(step_scopes[0], false);
-
+  rnn::SegmentInputs(
+      step_scopes, arg_->inlinks, seq_len_, false /*infer_shape_mode*/);
+  InitMemories(step_scopes[0], false /*infer_shape_mode*/);
   Variable* net = scope->GetVariable(arg_->step_net);
   for (size_t step_id = 0; step_id < seq_len_; step_id++) {
     if (step_id > 0) {
-      rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1, false);
+      rnn::LinkMemories(
+          step_scopes, arg_->memories, step_id, -1, false /*infer_shape_mode*/);
     }
     net->GetMutable<NetOp>()->Run(step_scopes[step_id], dev_ctx);
   }
-
-  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false);
+  rnn::ConcatOutputs(
+      step_scopes, arg_->outlinks, seq_len_, false /*infer_shape_mode*/);
 }
 
 void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
@@ -227,7 +227,6 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
   if (seq_len_ > step_scopes->size()) {
     for (size_t i = step_scopes->size(); i < seq_len_; ++i) {
       std::shared_ptr<Scope> step_scope = std::make_shared<Scope>(scope);
-
       // Now all variables in scope must be created outside of op.
       auto net_op = scope->GetVariable(arg_->step_net)->GetMutable<NetOp>();
       for (auto& input : net_op->inputs_) {
@@ -237,14 +236,13 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr<Scope> scope) const {
       for (auto& output : net_op->outputs_) {
         step_scope->CreateVariable(output);
       }
-
       step_scopes->push_back(std::make_shared<Scope>(step_scope));
     }
   }
 }
 
 void RecurrentAlgorithm::InitMemories(std::shared_ptr<Scope> step_scope,
-                                      bool infer_shape) const {
+                                      bool infer_shape_mode) const {
   for (auto& attr : arg_->memories) {
     Tensor* pre_mem =
         step_scope->CreateVariable(attr.pre_var)->GetMutable<Tensor>();
@@ -254,7 +252,7 @@ void RecurrentAlgorithm::InitMemories(std::shared_ptr<Scope> step_scope,
                    attr.boot_var);
     Tensor* boot_mem =
         step_scope->GetVariable(attr.boot_var)->GetMutable<Tensor>();
-    if (infer_shape) {
+    if (infer_shape_mode) {
       pre_mem->Resize(boot_mem->dims());
     } else {
       pre_mem->ShareDataWith<float>(*boot_mem);
@@ -320,23 +318,23 @@ void RecurrentGradientAlgorithm::Run(
     const std::shared_ptr<Scope>& scope,
     const platform::DeviceContext& dev_ctx) const {
   auto step_scopes = GetStepScopes(scope);
-  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false);
-  PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
-                 "step net is not in scope.");
+  rnn::SegmentInputs(
+      step_scopes, arg_->inlinks, seq_len_, false /*infer_shape_mode*/);
   Variable* net = scope->GetVariable(arg_->step_net);
-  PADDLE_ENFORCE(net != nullptr, "failed to get step net");
   for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
     if (static_cast<size_t>(step_id) != seq_len_ - 1) {
-      rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, false);
+      rnn::LinkMemories(
+          step_scopes, arg_->memories, step_id, 1, false /*infer_shape_mode*/);
     }
     net->GetMutable<NetOp>()->Run(step_scopes[step_id], dev_ctx);
   }
   LinkBootMemoryGradients(step_scopes[0], false);
-  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false);
+  rnn::ConcatOutputs(
+      step_scopes, arg_->outlinks, seq_len_, false /*infer_shape_mode*/);
 }
 
 void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
-    std::shared_ptr<Scope> step_scope, bool infer_shape) const {
+    std::shared_ptr<Scope> step_scope, bool infer_shape_mode) const {
   for (auto& attr : arg_->memories) {
     Tensor* mem_grad =
         step_scope->CreateVariable(attr.var)->GetMutable<Tensor>();
@@ -346,7 +344,7 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
                    attr.boot_var);
     Tensor* boot_mem_grad =
         step_scope->CreateVariable(attr.boot_var)->GetMutable<Tensor>();
-    if (infer_shape) {
+    if (infer_shape_mode) {
       boot_mem_grad->Resize(mem_grad->dims());
     } else {
       boot_mem_grad->ShareDataWith<float>(*mem_grad);
@@ -360,21 +358,20 @@ void RecurrentGradientAlgorithm::InferShape(
                  ->GetMutable<Tensor>()
                  ->dims()[0];
   auto step_scopes = GetStepScopes(scope);
-  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true);
-
-  PADDLE_ENFORCE(scope->HasVariable(arg_->step_net),
-                 "step net is not in scope.");
+  rnn::SegmentInputs(
+      step_scopes, arg_->inlinks, seq_len_, true /*infer_shape_mode*/);
   Variable* net = scope->GetVariable(arg_->step_net);
   PADDLE_ENFORCE(net != nullptr, "failed to get step net");
-
   for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
     if (static_cast<size_t>(step_id) != seq_len_ - 1) {
-      rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, true);
+      rnn::LinkMemories(
+          step_scopes, arg_->memories, step_id, 1, true /*infer_shape_mode*/);
     }
     net->GetMutable<NetOp>()->InferShape(step_scopes[step_id]);
   }
-  rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true);
-  LinkBootMemoryGradients(step_scopes[0], true);
+  rnn::ConcatOutputs(
+      step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/);
+  LinkBootMemoryGradients(step_scopes[0], true /*infer_shape_mode*/);
 }
 
 void RecurrentGradientOp::Init() {
diff --git a/paddle/operators/recurrent_network_op.h b/paddle/operators/recurrent_network_op.h
index 87a997b82e465..3f722d5608962 100644
--- a/paddle/operators/recurrent_network_op.h
+++ b/paddle/operators/recurrent_network_op.h
@@ -73,7 +73,7 @@ struct ArgumentName {
 void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
                    const std::vector<Link>& inlinks,
                    const size_t seq_len,
-                   bool infer_shape);
+                   bool infer_shape_mode);
 
 /**
  * Process outputs of step nets and merge to variables.
@@ -81,13 +81,13 @@ void SegmentInputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
 void ConcatOutputs(std::vector<std::shared_ptr<Scope>>& step_scopes,
                    const std::vector<Link>& outlinks,
                    const size_t seq_len,
-                   bool infer_shape);
+                   bool infer_shape_mode);
 
 void LinkMemories(std::vector<std::shared_ptr<Scope>>& step_scopes,
                   const std::vector<MemoryAttr>& memories,
                   const size_t step_id,
                   const int offset,
-                  bool infer_shape);
+                  bool infer_shape_mode);
 
 void InitArgument(const ArgumentName& name, Argument* arg);
 
@@ -128,7 +128,8 @@ class RecurrentAlgorithm {
                 ->GetMutable<std::vector<std::shared_ptr<Scope>>>();
   }
 
-  void InitMemories(std::shared_ptr<Scope> step_scopes, bool infer_shape) const;
+  void InitMemories(std::shared_ptr<Scope> step_scopes,
+                    bool infer_shape_mode) const;
 
 private:
   std::unique_ptr<rnn::Argument> arg_;
@@ -153,7 +154,7 @@ class RecurrentGradientAlgorithm {
            const platform::DeviceContext& dev_ctx) const;
 
   void LinkBootMemoryGradients(std::shared_ptr<Scope> step_scopes,
-                               bool infer_shape) const;
+                               bool infer_shape_mode) const;
 
   /**
    * InferShape must be called before Run.
diff --git a/paddle/operators/recurrent_network_op_test.cc b/paddle/operators/recurrent_network_op_test.cc
index 86588a969c8bd..635c2fe038167 100644
--- a/paddle/operators/recurrent_network_op_test.cc
+++ b/paddle/operators/recurrent_network_op_test.cc
@@ -298,7 +298,10 @@ class RecurrentGradientAlgorithmTest : public ::testing::Test {
     std::vector<std::shared_ptr<Scope>>* step_scopes =
         scope_->GetVariable("step_scopes")
             ->GetMutable<std::vector<std::shared_ptr<Scope>>>();
-    rnn::SegmentInputs(*step_scopes, std::vector<rnn::Link>{inlink}, 10, true);
+    rnn::SegmentInputs(*step_scopes,
+                       std::vector<rnn::Link>{inlink},
+                       10,
+                       true /*infer_shape_mode*/);
   }
 
   void LinkeMemories() {
@@ -313,7 +316,8 @@ class RecurrentGradientAlgorithmTest : public ::testing::Test {
         scope_->GetVariable("step_scopes")
             ->GetMutable<std::vector<std::shared_ptr<Scope>>>();
     for (int i = 1; i < 10; ++i) {
-      rnn::LinkMemories(*step_scopes, memories, i, -1, true);
+      rnn::LinkMemories(
+          *step_scopes, memories, i, -1, true /*infer_shape_mode*/);
     }
   }
 
@@ -343,7 +347,7 @@ TEST(RecurrentOp, LinkMemories) {
     auto tensor = scope->CreateVariable("h")->GetMutable<Tensor>();
     float* data = tensor->mutable_data<float>(make_ddim({15, 20}), CPUPlace());
     for (int j = 0; j < 15 * 20; ++j) {
-      data[i] = rand() * (1. / (double)RAND_MAX);
+      data[j] = rand() * (1. / (double)RAND_MAX);
     }
     step_scopes.push_back(scope);
   }
@@ -357,7 +361,7 @@ TEST(RecurrentOp, LinkMemories) {
   memories.push_back(mem_attr);
 
   for (int i = 1; i < len; ++i) {
-    rnn::LinkMemories(step_scopes, memories, i, -1, false);
+    rnn::LinkMemories(step_scopes, memories, i, -1, false /*infer_shape_mode*/);
   }
   // check
   for (int i = 0; i < len - 1; ++i) {
@@ -373,7 +377,7 @@ TEST(RecurrentOp, LinkMemories) {
   }
 
   for (int i = len - 2; i >= 0; --i) {
-    rnn::LinkMemories(step_scopes, memories, i, 1, false);
+    rnn::LinkMemories(step_scopes, memories, i, 1, false /*infer_shape_mode*/);
   }
   // check
   for (int i = len - 2; i >= 0; --i) {

From b94584cf4b70dc9074779b512f8e4eb14ad032e0 Mon Sep 17 00:00:00 2001
From: dangqingqing <dangqingqing@baidu.com>
Date: Tue, 1 Aug 2017 17:18:09 +0800
Subject: [PATCH 3/3] Rename recurrent_network_op recurrent_op.

---
 paddle/operators/CMakeLists.txt                     |  6 ++----
 .../{recurrent_network_op.cc => recurrent_op.cc}    | 13 +++++++++----
 .../{recurrent_network_op.h => recurrent_op.h}      |  0
 ...rent_network_op_test.cc => recurrent_op_test.cc} |  2 +-
 paddle/pybind/CMakeLists.txt                        |  2 +-
 5 files changed, 13 insertions(+), 10 deletions(-)
 rename paddle/operators/{recurrent_network_op.cc => recurrent_op.cc} (97%)
 rename paddle/operators/{recurrent_network_op.h => recurrent_op.h} (100%)
 rename paddle/operators/{recurrent_network_op_test.cc => recurrent_op_test.cc} (99%)

diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 5085e1b92555e..9d28404f687dc 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -55,7 +55,5 @@ op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op
 
 op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
 
-op_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc
-tensor op_registry operator net)
-cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc DEPS
-recurrent_network_op gtest mul_op add_op)
+op_library(recurrent_op SRCS recurrent_op.cc DEPS op_desc tensor op_registry operator net)
+cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op)
diff --git a/paddle/operators/recurrent_network_op.cc b/paddle/operators/recurrent_op.cc
similarity index 97%
rename from paddle/operators/recurrent_network_op.cc
rename to paddle/operators/recurrent_op.cc
index dcb1ac19d2cb1..b3132c2020d33 100644
--- a/paddle/operators/recurrent_network_op.cc
+++ b/paddle/operators/recurrent_op.cc
@@ -12,7 +12,7 @@
    See the License for the specific language governing permissions and
    limitations under the License. */
 
-#include "paddle/operators/recurrent_network_op.h"
+#include "paddle/operators/recurrent_op.h"
 
 #include <glog/logging.h>
 #include <cstring>
@@ -108,8 +108,13 @@ void LinkMemories(std::vector<std::shared_ptr<Scope>>& scopes,
   std::shared_ptr<Scope> scope = scopes[step_id];
   std::shared_ptr<Scope> linked_scope = scopes[step_id + offset];
   for (auto& attr : memories) {
+    PADDLE_ENFORCE(scope->HasVariable(attr.pre_var),
+                   "the pre-memory [%s] is not in scope.",
+                   attr.pre_var);
+    PADDLE_ENFORCE(linked_scope->HasVariable(attr.var),
+                   "the memory [%s] is not in linked scope.",
+                   attr.var);
     auto mem = scope->GetVariable(attr.pre_var)->GetMutable<Tensor>();
-    // maybe share variable is better?
     auto linked_mem = linked_scope->GetVariable(attr.var)->GetMutable<Tensor>();
     if (infer_shape_mode) {
       mem->Resize(linked_mem->dims());
@@ -295,12 +300,12 @@ class RecurrentAlgorithmProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
     const auto& name = RecurrentOp::kArgName;
     // inputs and outputs stored in proto
     AddInputs(name.inlinks,
-              "the input that need to be segmented for each step.");
+              "the inputs that need to be segmented for each step.");
     AddInputs(name.boot_memories, "variables to initialize memories.");
     AddInput(name.step_net, "network shared by all steps.");
 
     AddOutputs(name.outlinks,
-               "the output that need to concated for all steps.");
+               "the outputs that need to concated for all steps.");
     AddOutput(name.step_scopes, "step scopes");
 
     // Attributes stored in AttributeMap
diff --git a/paddle/operators/recurrent_network_op.h b/paddle/operators/recurrent_op.h
similarity index 100%
rename from paddle/operators/recurrent_network_op.h
rename to paddle/operators/recurrent_op.h
diff --git a/paddle/operators/recurrent_network_op_test.cc b/paddle/operators/recurrent_op_test.cc
similarity index 99%
rename from paddle/operators/recurrent_network_op_test.cc
rename to paddle/operators/recurrent_op_test.cc
index 635c2fe038167..4bff8a0ed6335 100644
--- a/paddle/operators/recurrent_network_op_test.cc
+++ b/paddle/operators/recurrent_op_test.cc
@@ -18,7 +18,7 @@
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/operator.h"
 #include "paddle/framework/tensor.h"
-#include "paddle/operators/recurrent_network_op.h"
+#include "paddle/operators/recurrent_op.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt
index 7d0e68a8f30de..43d8e17ec1c94 100644
--- a/paddle/pybind/CMakeLists.txt
+++ b/paddle/pybind/CMakeLists.txt
@@ -1,2 +1,2 @@
 cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python
-        add_op fc_op sgd_op cross_entropy_op recurrent_network_op)
+        add_op fc_op sgd_op cross_entropy_op recurrent_op)