From 9298a9ec0d291aa919d59f57ce6a8562d781bd85 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 24 Mar 2017 14:35:39 +0800
Subject: [PATCH 1/5] stride pooling for seqlastin and seqfirstin

---
 .../layers/SequenceLastInstanceLayer.cpp      | 23 ++++----
 paddle/gserver/layers/SequencePoolLayer.cpp   | 11 +++-
 paddle/gserver/layers/SequencePoolLayer.h     |  7 +++
 paddle/gserver/tests/test_LayerGrad.cpp       | 16 ++++--
 paddle/parameter/Argument.cpp                 | 40 ++++++++++++++
 paddle/parameter/Argument.h                   |  8 +++
 paddle/parameter/tests/CMakeLists.txt         |  1 +
 paddle/parameter/tests/test_argument.cpp      | 52 +++++++++++++++++++
 proto/ModelConfig.proto                       |  5 ++
 python/paddle/trainer/config_parser.py        | 18 +++++--
 .../paddle/trainer_config_helpers/layers.py   | 21 ++++++++
 .../tests/configs/last_first_seq.py           |  3 ++
 .../configs/protostr/last_first_seq.protostr  | 33 ++++++++++++
 .../configs/protostr/shared_gru.protostr      |  2 +
 .../configs/protostr/shared_lstm.protostr     |  2 +
 .../protostr/simple_rnn_layers.protostr       |  6 +++
 .../configs/protostr/test_rnn_group.protostr  |  6 +++
 17 files changed, 235 insertions(+), 19 deletions(-)
 create mode 100644 paddle/parameter/tests/test_argument.cpp
diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
index 7a13cd7ad0fec..7ac087663a6c6 100644
--- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
+++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
@@ -25,6 +25,9 @@ namespace paddle {
  * Input: a sequence
  * If SequenceLevel = kNonseq:
  *   Output: a sequence containing only the last instance of the input sequence
+ *   If stride_ > 0:
+ *      Output: a shorten sequence containing several last instances of the
+ *              input sequence with stride window.
  * If SequenceLevel = kSeq:
  *   Check input sequence must has sub-sequence
  *   Output: a sequence containing only the last instance of each sub-sequence
@@ -37,6 +40,8 @@ class SequenceLastInstanceLayer : public SequencePoolLayer {
 protected:
   MatrixPtr tmpSrc_;
   MatrixPtr tmpDest_;
+  bool select_first_;
+  std::vector<int> insId_;
 
 public:
   explicit SequenceLastInstanceLayer(const LayerConfig& config)
@@ -54,6 +59,7 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer);
 bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
                                      const ParameterMap& parameterMap) {
   SequencePoolLayer::init(layerMap, parameterMap);
+  select_first_ = config_.select_first();
 
   tmpSrc_ =
       Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_);
@@ -74,9 +80,13 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
     AsyncGpuBlock asyncGpuBlock;
     REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str());
 
+    insId_.clear();
     for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
-      int insId =
-          config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1;
+      int insId = (stride_ > 0)
+                      ? (select_first_ ? stridePositions_[seqId]
+                                       : stridePositions_[seqId + 1] - 1)
+                      : (select_first_ ? starts[seqId] : starts[seqId + 1] - 1);
+      insId_.push_back(insId);
 
       outputValue->subMatrix(seqId, 1, tmpDest_)
           ->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_)));
@@ -96,18 +106,13 @@ void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) {
 
   MatrixPtr inputGrad = getInputGrad(0);
   MatrixPtr outputGrad = getOutputGrad();
-  const int* starts = startPositions_->getData(false);
-  size_t numSequences = startPositions_->getSize() - 1;
 
   if (inputGrad) {
     AsyncGpuBlock asyncGpuBlock;
     REGISTER_TIMER_INFO("SequenceLastInstanceLayerBackward", getName().c_str());
 
-    for (size_t seqId = 0; seqId < numSequences; ++seqId) {
-      int insId =
-          config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1;
-
-      inputGrad->subMatrix(insId, 1, tmpDest_)
+    for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
+      inputGrad->subMatrix(insId_[seqId], 1, tmpDest_)
           ->add(*(outputGrad->subMatrix(seqId, 1, tmpSrc_)));
     }
   }
diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp
index 5807c4249620d..2bf180a043b98 100644
--- a/paddle/gserver/layers/SequencePoolLayer.cpp
+++ b/paddle/gserver/layers/SequencePoolLayer.cpp
@@ -37,6 +37,7 @@ bool SequencePoolLayer::init(const LayerMap& layerMap,
   } else {
     LOG(FATAL) << "Unknown trans_type: " << config_.trans_type();
   }
+  stride_ = config_.seq_pool_stride();
   setNeedSequenceInfo(false);
   return true;
 }
@@ -55,8 +56,6 @@ void SequencePoolLayer::forward(PassType passType) {
   CHECK_EQ(starts->getData()[newBatchSize_], input.getBatchSize());
   CHECK_EQ(newBatchSize_, starts->getSize() - 1);
 
-  resetOutput(newBatchSize_, dim);
-
   /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
    * thus, in this case, output_ has no sequenceStartPositions.
    * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
@@ -67,6 +66,14 @@ void SequencePoolLayer::forward(PassType passType) {
         << "when trans_type = seq, input must hasSubseq";
     output_.degradeSequence(input);
   }
+  if (stride_ > 0) {
+    CHECK_EQ(input.hasSubseq(), 0UL)
+        << "sequence stride pooling is not suitable for hasSubseq now";
+    output_.poolSequenceWithStride(input, stride_, &stridePositions_);
+    newBatchSize_ = stridePositions_.size() - 1;
+  }
+
+  resetOutput(newBatchSize_, dim);
 }
 
 void SequencePoolLayer::backward(const UpdateCallback& callback) {
diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h
index 85b51ccd1dc7e..5ca1c1a82b913 100644
--- a/paddle/gserver/layers/SequencePoolLayer.h
+++ b/paddle/gserver/layers/SequencePoolLayer.h
@@ -26,6 +26,10 @@ namespace paddle {
  *    Output: output size is the number of input sequences (NOT input instances)
  *    output[i] = seqlastin/average/max_{for each instance in this
  * sequence}{input[i]}
+ *    If stride_ > 0:
+ *        Check input sequence must don't have sub-sequence
+ *        Output: a shorten sequence, pooling is performed upon a small local
+ * area
  * If SequenceLevel = kSeq:
  *    Check input sequence must has sub-sequence
  *    Output: output size is the number of input sub-sequences
@@ -42,6 +46,9 @@ class SequencePoolLayer : public Layer {
   enum SequenceLevel { kNonSeq = 0, kSeq = 1 };
   size_t newBatchSize_;
   ICpuGpuVectorPtr startPositions_;
+  int stride_;
+  // store the start position of each stride window
+  std::vector<int> stridePositions_;
 
 public:
   explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {}
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 5f8a7b79a06e0..ce83531416f07 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -804,10 +804,14 @@ TEST(Layer, ExpandLayer) {
   testExpandLayer("seq", true);       // seq expand to hasSubseq
 }
 
-void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) {
+void testDegradeLayer(bool hasSubseq,
+                      string layer_type,
+                      string trans_type,
+                      int stride = -1) {
   TestConfig config;
   config.layerConfig.set_type(layer_type);
   config.layerConfig.set_size(10);
+  config.layerConfig.set_seq_pool_stride(stride);
   config.biasSize = 0;
 
   config.inputDefs.push_back(
@@ -827,12 +831,14 @@ void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) {
   if (layer_type == "average") {
     for (auto strategy : {"average", "sum", "squarerootn"}) {
       LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type
-                << " average_strategy=" << strategy;
+                << " average_strategy=" << strategy
+                << " seq_pool_stride=" << stride;
       config.layerConfig.set_average_strategy(strategy);
       testDegradeLayerGrad(config, layer_type);
     }
   } else {
-    LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type;
+    LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type
+              << " seq_pool_stride=" << stride;
     testDegradeLayerGrad(config, layer_type);
   }
 }
@@ -847,6 +853,10 @@ TEST(Layer, SequenceLastInstanceLayer) {
   testDegradeLayer(false,
                    "seqlastins",
                    "non-seq");  // seq seqlastins to non-seq
+  testDegradeLayer(false,
+                   "seqlastins",
+                   "non-seq",
+                   5);  // seq seqlastins to a shorten seq, stride window = 5
   testDegradeLayer(true,
                    "seqlastins",
                    "non-seq");  // hasSubseq seqlastins to non-seq
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 4139f59a2c8e6..2657c00ebb3c7 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -559,6 +559,46 @@ void Argument::degradeSequence(const Argument& input) {
   tgtBuf[numSequences] = numSubSequences;
 }
 
+void Argument::poolSequenceWithStride(const Argument& input,
+                                      size_t stride,
+                                      std::vector<int>* stridePostions) {
+  /*
+   * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
+   * then sequenceStartPositions = [0, 2, 3, 4, 7],
+   * and stridePostions = [0, 5, 9, 14, 17, 22, 27, 30]
+   */
+  CHECK(input.sequenceStartPositions);
+  CHECK_EQ(input.hasSubseq(), 0UL);
+  CHECK_GT(stride, 0) << "stride must larger than 0";
+  size_t numSequences = input.getNumSequences();
+  ICpuGpuVector::resizeOrCreate(
+      sequenceStartPositions, numSequences + 1, false);
+  const int* starts = input.sequenceStartPositions->getData(false);
+  int* tgtBuf = sequenceStartPositions->getMutableData(false);
+  // first index of target sequence and stride positions are both 0
+  tgtBuf[0] = 0;
+  (*stridePostions).clear();
+  for (size_t seqId = 0; seqId < numSequences; ++seqId) {
+    size_t seqLength = starts[seqId + 1] - starts[seqId];
+    (*stridePostions).emplace_back(starts[seqId]);
+    if (seqLength == 0) {
+      // empty sequence
+      tgtBuf[seqId + 1] = tgtBuf[seqId];
+    } else if (seqLength < stride) {
+      tgtBuf[seqId + 1] = tgtBuf[seqId] + 1;
+    } else {
+      tgtBuf[seqId + 1] = tgtBuf[seqId] + ceil((float)seqLength / stride);
+      int size =
+          (seqLength % stride) ? seqLength / stride : seqLength / stride - 1;
+      for (int i = 0; i < size; i++) {
+        (*stridePostions).emplace_back((*stridePostions).back() + stride);
+      }
+    }
+  }
+  (*stridePostions).emplace_back(starts[numSequences]);
+  CHECK_EQ((*stridePostions).size() - 1, tgtBuf[numSequences]);
+}
+
 void Argument::getValueString(
     std::unordered_map<std::string, std::string>* out) const {
   if (value) {
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 9fd84bc4b7e0a..760029c2fe6ba 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -291,6 +291,14 @@ struct Argument {
    */
   void degradeSequence(const Argument& input);
 
+  /*
+   After pooling with stride n (n is smaller than sequence length),
+   a long sequence will be shorten.
+   This function is not suitable for sequence with sub-sequence now.
+   */
+  void poolSequenceWithStride(const Argument& input,
+                              size_t stride,
+                              std::vector<int>* stridePositions);
   /**
    * @brief getValueString will return the argument's output in string. There
    * are several kinds of output. The keys of output dictionary are 'value',
diff --git a/paddle/parameter/tests/CMakeLists.txt b/paddle/parameter/tests/CMakeLists.txt
index cab264db8e500..181ccdc1f099e 100644
--- a/paddle/parameter/tests/CMakeLists.txt
+++ b/paddle/parameter/tests/CMakeLists.txt
@@ -1 +1,2 @@
 add_simple_unittest(test_common)
+add_simple_unittest(test_argument)
diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp
new file mode 100644
index 0000000000000..ba17e8a298e80
--- /dev/null
+++ b/paddle/parameter/tests/test_argument.cpp
@@ -0,0 +1,52 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include <paddle/parameter/Argument.h>
+
+using namespace paddle;  // NOLINT
+
+TEST(Argument, poolSequenceWithStride) {
+  Argument input, output;
+  ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false);
+  int* inStart = input.sequenceStartPositions->getMutableData(false);
+  inStart[0] = 0;
+  inStart[1] = 9;
+  inStart[2] = 14;
+  inStart[3] = 17;
+  inStart[4] = 30;
+
+  std::vector<int> stridePositions;
+  stridePositions.clear();
+  output.poolSequenceWithStride(input, 5 /* stride */, &stridePositions);
+
+  const int* outStart = output.sequenceStartPositions->getData(false);
+  CHECK_EQ(outStart[0], 0);
+  CHECK_EQ(outStart[1], 2);
+  CHECK_EQ(outStart[2], 3);
+  CHECK_EQ(outStart[3], 4);
+  CHECK_EQ(outStart[4], 7);
+
+  CHECK_EQ(stridePositions.size(), 8);
+  int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30};
+  for (int i = 0; i < 8; i++) {
+    CHECK_EQ(stridePositions[i], strideResult[i]);
+  }
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  initMain(argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 65d5d50277b66..4f9b53d6f6553 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -441,6 +441,11 @@ message LayerConfig {
 
   // blank label used in ctc loss
   optional uint32 blank = 52 [default = 0];
+
+  // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which 
+  // controls the scope of pooling operation. can be set > 0.
+  // leave empty or set to -1 to disable this stride pooling.
+  optional int32 seq_pool_stride = 53 [default = -1];
 }
 
 message EvaluatorConfig {
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 1394773b4ff12..bfe71501758e3 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2480,6 +2480,7 @@ def __init__(self,
                  active_type='linear',
                  trans_type='non-seq',
                  bias=False,
+                 stride=-1,
                  **xargs):
         super(SequenceLastInstanceLayer, self).__init__(
             name,
@@ -2490,10 +2491,11 @@ def __init__(self,
             **xargs)
         config_assert(
             len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
+        if trans_type == 'seq':
+            config_assert(stride == -1, 'subseq do not support stride window')
         self.config.trans_type = trans_type
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            self.set_layer_size(input_layer.size)
+        self.config.seq_pool_stride = stride
+        self.set_layer_size(self.get_input_layer(0).size)
         self.create_bias_parameter(bias, self.config.size)
 
 
@@ -2505,10 +2507,16 @@ def __init__(self,
                  active_type='linear',
                  trans_type='non-seq',
                  bias=False,
+                 stride=-1,
                  **xargs):
         super(SequenceFirstInstanceLayer, self).__init__(
-            name, inputs=inputs, active_type=active_type, bias=bias, **xargs)
-        self.config.trans_type = trans_type
+            name,
+            inputs=inputs,
+            active_type=active_type,
+            trans_type=trans_type,
+            bias=bias,
+            stride=stride,
+            **xargs)
         self.config.select_first = True
 
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index b006eb46d99fd..9e4ca5794b08e 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1301,10 +1301,15 @@ def grumemory(input,
 def last_seq(input,
              name=None,
              agg_level=AggregateLevel.EACH_TIMESTEP,
+             stride=-1,
              layer_attr=None):
     """
     Get Last Timestamp Activation of a sequence.
 
+    If stride > 0, get last timestamp upon a stride window of sequence. 
+    And a long sequence will be shorten. Note that for sequence with 
+    sub-sequence, stride is default -1 now.
+
     The simple usage is:
 
     .. code-block:: python
@@ -1316,6 +1321,8 @@ def last_seq(input,
     :type name: basestring
     :param input: Input layer name.
     :type input: LayerOutput
+    :param stride: parameter of stride window.  
+    :type stride: Int
     :param layer_attr: extra layer attributes.
     :type layer_attr: ExtraLayerAttribute.
     :return: LayerOutput object.
@@ -1327,11 +1334,15 @@ def last_seq(input,
                        " series information at all. Maybe you want to use"
                        " first_seq instead.")
 
+    if agg_level == AggregateLevel.EACH_SEQUENCE:
+        assert stride == -1
+
     Layer(
         name=name,
         type=LayerType.SEQUENCE_LAST_INSTANCE,
         inputs=[input.name],
         trans_type=agg_level,
+        stride=stride,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
     return LayerOutput(
         name,
@@ -1345,10 +1356,16 @@ def last_seq(input,
 def first_seq(input,
               name=None,
               agg_level=AggregateLevel.EACH_TIMESTEP,
+              stride=-1,
               layer_attr=None):
     """
     Get First Timestamp Activation of a sequence.
 
+    If stride > 0, get first timestamp upon a stride window of sequence,
+    and a long sequence will be shorten. Note that for sequence with 
+    sub-sequence, stride is default -1 now.
+
+
     The simple usage is:
 
     .. code-block:: python
@@ -1372,11 +1389,15 @@ def first_seq(input,
                        ' time series information at all. Maybe you want to use'
                        ' last_seq instead.')
 
+    if agg_level == AggregateLevel.EACH_SEQUENCE:
+        assert stride == -1
+
     Layer(
         name=name,
         type=LayerType.SEQUENCE_FIRST_INSTANCE,
         inputs=[input.name],
         trans_type=agg_level,
+        stride=stride,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
     return LayerOutput(
         name,
diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
index 3a1a0132b64bb..3c6dbc95e5489 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
@@ -14,4 +14,7 @@
     for al in agg_level:
         opts.append(op(input=din, agg_level=al))
 
+for op in seq_op:
+    opts.append(op(input=din, agg_level=AggregateLevel.EACH_TIMESTEP, stride=5))
+
 outputs(opts)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
index 7b2911f8e367e..12b2255f3a411 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
@@ -15,6 +15,7 @@ layers {
   }
   select_first: true
   trans_type: "seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__first_seq_1__"
@@ -26,6 +27,7 @@ layers {
   }
   select_first: true
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__last_seq_0__"
@@ -36,6 +38,7 @@ layers {
     input_layer_name: "data"
   }
   trans_type: "seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__last_seq_1__"
@@ -46,12 +49,38 @@ layers {
     input_layer_name: "data"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
+}
+layers {
+  name: "__first_seq_2__"
+  type: "seqlastins"
+  size: 30
+  active_type: "linear"
+  inputs {
+    input_layer_name: "data"
+  }
+  select_first: true
+  trans_type: "non-seq"
+  seq_pool_stride: 5
+}
+layers {
+  name: "__last_seq_2__"
+  type: "seqlastins"
+  size: 30
+  active_type: "linear"
+  inputs {
+    input_layer_name: "data"
+  }
+  trans_type: "non-seq"
+  seq_pool_stride: 5
 }
 input_layer_names: "data"
 output_layer_names: "__first_seq_0__"
 output_layer_names: "__first_seq_1__"
 output_layer_names: "__last_seq_0__"
 output_layer_names: "__last_seq_1__"
+output_layer_names: "__first_seq_2__"
+output_layer_names: "__last_seq_2__"
 sub_models {
   name: "root"
   layer_names: "data"
@@ -59,11 +88,15 @@ sub_models {
   layer_names: "__first_seq_1__"
   layer_names: "__last_seq_0__"
   layer_names: "__last_seq_1__"
+  layer_names: "__first_seq_2__"
+  layer_names: "__last_seq_2__"
   input_layer_names: "data"
   output_layer_names: "__first_seq_0__"
   output_layer_names: "__first_seq_1__"
   output_layer_names: "__last_seq_0__"
   output_layer_names: "__last_seq_1__"
+  output_layer_names: "__first_seq_2__"
+  output_layer_names: "__last_seq_2__"
   is_recurrent_layer_group: false
 }
 
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
index b6905824f0cb0..64530146a1458 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
@@ -128,6 +128,7 @@ layers {
     input_layer_name: "__simple_gru_0__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__last_seq_1__"
@@ -138,6 +139,7 @@ layers {
     input_layer_name: "__simple_gru_1__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__fc_layer_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
index 0a83499b72480..79fa4c74f081a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
@@ -210,6 +210,7 @@ layers {
     input_layer_name: "__lstm_group_0__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__last_seq_1__"
@@ -220,6 +221,7 @@ layers {
     input_layer_name: "__lstm_group_1__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__fc_layer_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
index dacb40185f863..68fa881b4f140 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
@@ -143,6 +143,7 @@ layers {
     input_layer_name: "__recurrent_layer_0__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__first_seq_0__"
@@ -154,6 +155,7 @@ layers {
   }
   select_first: true
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__last_seq_1__"
@@ -164,6 +166,7 @@ layers {
     input_layer_name: "__lstmemory_0__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__first_seq_1__"
@@ -175,6 +178,7 @@ layers {
   }
   select_first: true
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__last_seq_2__"
@@ -185,6 +189,7 @@ layers {
     input_layer_name: "__gru_0__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__first_seq_2__"
@@ -196,6 +201,7 @@ layers {
   }
   select_first: true
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 parameters {
   name: "___fc_layer_0__.w0"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
index a0fb729e062bd..77b447aa9db2a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
@@ -96,6 +96,7 @@ layers {
     input_layer_name: "rnn_forward"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__recurrent_group_1__"
@@ -145,6 +146,7 @@ layers {
   }
   select_first: true
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__recurrent_group_2__"
@@ -193,6 +195,7 @@ layers {
     input_layer_name: "rnn_subseq_forward"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__lstm_group_0___recurrent_group"
@@ -282,6 +285,7 @@ layers {
     input_layer_name: "__lstm_group_0__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__gru_group_0___recurrent_group"
@@ -330,6 +334,7 @@ layers {
     input_layer_name: "__gru_group_0__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 layers {
   name: "__recurrent_group_3__"
@@ -378,6 +383,7 @@ layers {
     input_layer_name: "__fc_layer_0__"
   }
   trans_type: "non-seq"
+  seq_pool_stride: -1
 }
 parameters {
   name: "___mixed_0__.w0"

From cbbec595472189ac252c742cdc6d5f2b435235bd Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 24 Mar 2017 17:05:04 +0800
Subject: [PATCH 2/5] adjust poolSequenceWithStride interface for average and
 max

---
 .../gserver/layers/SequenceLastInstanceLayer.cpp  |  8 +++-----
 paddle/gserver/layers/SequencePoolLayer.cpp       |  2 +-
 paddle/gserver/layers/SequencePoolLayer.h         |  2 +-
 paddle/parameter/Argument.cpp                     | 15 +++++++++------
 paddle/parameter/Argument.h                       |  2 +-
 paddle/parameter/tests/test_argument.cpp          |  7 +++----
 python/paddle/trainer_config_helpers/layers.py    |  3 ++-
 7 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
index 7ac087663a6c6..c70c2b7421181 100644
--- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
+++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
@@ -72,7 +72,8 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
 void SequenceLastInstanceLayer::forward(PassType passType) {
   SequencePoolLayer::forward(passType);
 
-  const int* starts = startPositions_->getData(false);
+  auto starts = (stride_ > 0) ? stridePositions_->getData()
+                              : startPositions_->getData(false);
   MatrixPtr inputValue = getInputValue(0);
   MatrixPtr outputValue = getOutputValue();
 
@@ -82,10 +83,7 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
 
     insId_.clear();
     for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
-      int insId = (stride_ > 0)
-                      ? (select_first_ ? stridePositions_[seqId]
-                                       : stridePositions_[seqId + 1] - 1)
-                      : (select_first_ ? starts[seqId] : starts[seqId + 1] - 1);
+      int insId = select_first_ ? starts[seqId] : starts[seqId + 1] - 1;
       insId_.push_back(insId);
 
       outputValue->subMatrix(seqId, 1, tmpDest_)
diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp
index 2bf180a043b98..f853905103a0e 100644
--- a/paddle/gserver/layers/SequencePoolLayer.cpp
+++ b/paddle/gserver/layers/SequencePoolLayer.cpp
@@ -70,7 +70,7 @@ void SequencePoolLayer::forward(PassType passType) {
     CHECK_EQ(input.hasSubseq(), 0UL)
         << "sequence stride pooling is not suitable for hasSubseq now";
     output_.poolSequenceWithStride(input, stride_, &stridePositions_);
-    newBatchSize_ = stridePositions_.size() - 1;
+    newBatchSize_ = stridePositions_->getSize() - 1;
   }
 
   resetOutput(newBatchSize_, dim);
diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h
index 5ca1c1a82b913..92d7a841f0c73 100644
--- a/paddle/gserver/layers/SequencePoolLayer.h
+++ b/paddle/gserver/layers/SequencePoolLayer.h
@@ -48,7 +48,7 @@ class SequencePoolLayer : public Layer {
   ICpuGpuVectorPtr startPositions_;
   int stride_;
   // store the start position of each stride window
-  std::vector<int> stridePositions_;
+  IVectorPtr stridePositions_;
 
 public:
   explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {}
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 2657c00ebb3c7..3cc637587bc28 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) {
 
 void Argument::poolSequenceWithStride(const Argument& input,
                                       size_t stride,
-                                      std::vector<int>* stridePostions) {
+                                      IVectorPtr* stridePostions) {
   /*
    * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
    * then sequenceStartPositions = [0, 2, 3, 4, 7],
@@ -577,10 +577,10 @@ void Argument::poolSequenceWithStride(const Argument& input,
   int* tgtBuf = sequenceStartPositions->getMutableData(false);
   // first index of target sequence and stride positions are both 0
   tgtBuf[0] = 0;
-  (*stridePostions).clear();
+  std::vector<int> stridePos;
   for (size_t seqId = 0; seqId < numSequences; ++seqId) {
     size_t seqLength = starts[seqId + 1] - starts[seqId];
-    (*stridePostions).emplace_back(starts[seqId]);
+    stridePos.emplace_back(starts[seqId]);
     if (seqLength == 0) {
       // empty sequence
       tgtBuf[seqId + 1] = tgtBuf[seqId];
@@ -591,12 +591,15 @@ void Argument::poolSequenceWithStride(const Argument& input,
       int size =
           (seqLength % stride) ? seqLength / stride : seqLength / stride - 1;
       for (int i = 0; i < size; i++) {
-        (*stridePostions).emplace_back((*stridePostions).back() + stride);
+        stridePos.emplace_back(stridePos.back() + stride);
       }
     }
   }
-  (*stridePostions).emplace_back(starts[numSequences]);
-  CHECK_EQ((*stridePostions).size() - 1, tgtBuf[numSequences]);
+  stridePos.emplace_back(starts[numSequences]);
+  int size = stridePos.size();
+  CHECK_EQ(size - 1, tgtBuf[numSequences]);
+  IVector::resizeOrCreate(*stridePostions, size, false);
+  (*stridePostions)->copyFrom(stridePos.data(), size);
 }
 
 void Argument::getValueString(
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 760029c2fe6ba..95ea90ffc2a60 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -298,7 +298,7 @@ struct Argument {
    */
   void poolSequenceWithStride(const Argument& input,
                               size_t stride,
-                              std::vector<int>* stridePositions);
+                              IVectorPtr* stridePositions);
   /**
    * @brief getValueString will return the argument's output in string. There
    * are several kinds of output. The keys of output dictionary are 'value',
diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp
index ba17e8a298e80..692bbada10d03 100644
--- a/paddle/parameter/tests/test_argument.cpp
+++ b/paddle/parameter/tests/test_argument.cpp
@@ -27,8 +27,7 @@ TEST(Argument, poolSequenceWithStride) {
   inStart[3] = 17;
   inStart[4] = 30;
 
-  std::vector<int> stridePositions;
-  stridePositions.clear();
+  IVectorPtr stridePositions;
   output.poolSequenceWithStride(input, 5 /* stride */, &stridePositions);
 
   const int* outStart = output.sequenceStartPositions->getData(false);
@@ -38,10 +37,10 @@ TEST(Argument, poolSequenceWithStride) {
   CHECK_EQ(outStart[3], 4);
   CHECK_EQ(outStart[4], 7);
 
-  CHECK_EQ(stridePositions.size(), 8);
+  CHECK_EQ(stridePositions->getSize(), 8);
   int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30};
   for (int i = 0; i < 8; i++) {
-    CHECK_EQ(stridePositions[i], strideResult[i]);
+    CHECK_EQ(stridePositions->getData()[i], strideResult[i]);
   }
 }
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 542bbbe086ec0..5f3250e7224ff 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1406,7 +1406,6 @@ def first_seq(input,
     and a long sequence will be shorten. Note that for sequence with 
     sub-sequence, stride is default -1 now.
 
-
     The simple usage is:
 
     .. code-block:: python
@@ -1418,6 +1417,8 @@ def first_seq(input,
     :type name: basestring
     :param input: Input layer name.
     :type input: LayerOutput
+    :param stride: parameter of stride window.  
+    :type stride: Int
     :param layer_attr: extra layer attributes.
     :type layer_attr: ExtraLayerAttribute.
     :return: LayerOutput object.

From d369577fed4b85dfeb40610fb21974893ac6dcb9 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 31 Mar 2017 17:34:00 +0800
Subject: [PATCH 3/5] add reversed poolSequenceWithStride

---
 .../layers/SequenceLastInstanceLayer.cpp      |  5 ++-
 paddle/gserver/layers/SequencePoolLayer.cpp   |  5 +--
 paddle/gserver/layers/SequencePoolLayer.h     |  2 ++
 paddle/parameter/Argument.cpp                 | 21 ++++++------
 paddle/parameter/Argument.h                   |  3 +-
 paddle/parameter/tests/test_argument.cpp      | 32 +++++++++++--------
 python/paddle/trainer/config_parser.py        |  2 +-
 7 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
index c70c2b7421181..d29e981ad66a5 100644
--- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
+++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
@@ -40,7 +40,6 @@ class SequenceLastInstanceLayer : public SequencePoolLayer {
 protected:
   MatrixPtr tmpSrc_;
   MatrixPtr tmpDest_;
-  bool select_first_;
   std::vector<int> insId_;
 
 public:
@@ -59,7 +58,7 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer);
 bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
                                      const ParameterMap& parameterMap) {
   SequencePoolLayer::init(layerMap, parameterMap);
-  select_first_ = config_.select_first();
+  reversed_ = config_.select_first();
 
   tmpSrc_ =
       Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_);
@@ -83,7 +82,7 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
 
     insId_.clear();
     for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
-      int insId = select_first_ ? starts[seqId] : starts[seqId + 1] - 1;
+      int insId = reversed_ ? starts[seqId] : starts[seqId + 1] - 1;
       insId_.push_back(insId);
 
       outputValue->subMatrix(seqId, 1, tmpDest_)
diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp
index f853905103a0e..8c49502011582 100644
--- a/paddle/gserver/layers/SequencePoolLayer.cpp
+++ b/paddle/gserver/layers/SequencePoolLayer.cpp
@@ -68,8 +68,9 @@ void SequencePoolLayer::forward(PassType passType) {
   }
   if (stride_ > 0) {
     CHECK_EQ(input.hasSubseq(), 0UL)
-        << "sequence stride pooling is not suitable for hasSubseq now";
-    output_.poolSequenceWithStride(input, stride_, &stridePositions_);
+        << "sequence stride pooling is invalid for hasSubseq now";
+    output_.poolSequenceWithStride(
+        input, stride_, &stridePositions_, reversed_);
     newBatchSize_ = stridePositions_->getSize() - 1;
   }
 
diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h
index 92d7a841f0c73..ff67c0ccadd20 100644
--- a/paddle/gserver/layers/SequencePoolLayer.h
+++ b/paddle/gserver/layers/SequencePoolLayer.h
@@ -49,6 +49,8 @@ class SequencePoolLayer : public Layer {
   int stride_;
   // store the start position of each stride window
   IVectorPtr stridePositions_;
+  // Whether it is reversed sequence
+  bool reversed_ = false;
 
 public:
   explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {}
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 3cc637587bc28..afbda8bdc403f 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -561,11 +561,13 @@ void Argument::degradeSequence(const Argument& input) {
 
 void Argument::poolSequenceWithStride(const Argument& input,
                                       size_t stride,
-                                      IVectorPtr* stridePostions) {
+                                      IVectorPtr* stridePostions,
+                                      bool reversed) {
   /*
    * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
-   * then sequenceStartPositions = [0, 2, 3, 4, 7],
-   * and stridePostions = [0, 5, 9, 14, 17, 22, 27, 30]
+   * then sequenceStartPositions = [0, 2, 3, 4, 7].
+   * If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30];
+   * else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30]
    */
   CHECK(input.sequenceStartPositions);
   CHECK_EQ(input.hasSubseq(), 0UL);
@@ -584,14 +586,13 @@ void Argument::poolSequenceWithStride(const Argument& input,
     if (seqLength == 0) {
       // empty sequence
       tgtBuf[seqId + 1] = tgtBuf[seqId];
-    } else if (seqLength < stride) {
-      tgtBuf[seqId + 1] = tgtBuf[seqId] + 1;
     } else {
-      tgtBuf[seqId + 1] = tgtBuf[seqId] + ceil((float)seqLength / stride);
-      int size =
-          (seqLength % stride) ? seqLength / stride : seqLength / stride - 1;
-      for (int i = 0; i < size; i++) {
-        stridePos.emplace_back(stridePos.back() + stride);
+      int size = ceil((float)seqLength / stride);
+      tgtBuf[seqId + 1] = tgtBuf[seqId] + size;
+      for (int i = 0; i < size - 1; i++) {
+        int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride
+                           : stridePos.back() + stride;
+        stridePos.emplace_back(cur);
       }
     }
   }
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 95ea90ffc2a60..49a0660ccf155 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -298,7 +298,8 @@ struct Argument {
    */
   void poolSequenceWithStride(const Argument& input,
                               size_t stride,
-                              IVectorPtr* stridePositions);
+                              IVectorPtr* stridePositions,
+                              bool reversed = false);
   /**
    * @brief getValueString will return the argument's output in string. There
    * are several kinds of output. The keys of output dictionary are 'value',
diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp
index 692bbada10d03..81fe4ee397351 100644
--- a/paddle/parameter/tests/test_argument.cpp
+++ b/paddle/parameter/tests/test_argument.cpp
@@ -27,20 +27,26 @@ TEST(Argument, poolSequenceWithStride) {
   inStart[3] = 17;
   inStart[4] = 30;
 
-  IVectorPtr stridePositions;
-  output.poolSequenceWithStride(input, 5 /* stride */, &stridePositions);
-
-  const int* outStart = output.sequenceStartPositions->getData(false);
-  CHECK_EQ(outStart[0], 0);
-  CHECK_EQ(outStart[1], 2);
-  CHECK_EQ(outStart[2], 3);
-  CHECK_EQ(outStart[3], 4);
-  CHECK_EQ(outStart[4], 7);
-
-  CHECK_EQ(stridePositions->getSize(), 8);
   int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30};
-  for (int i = 0; i < 8; i++) {
-    CHECK_EQ(stridePositions->getData()[i], strideResult[i]);
+  int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30};
+
+  for (auto reversed : {false, true}) {
+    IVectorPtr stridePositions;
+    output.poolSequenceWithStride(
+        input, 5 /* stride */, &stridePositions, reversed);
+
+    const int* outStart = output.sequenceStartPositions->getData(false);
+    CHECK_EQ(outStart[0], 0);
+    CHECK_EQ(outStart[1], 2);
+    CHECK_EQ(outStart[2], 3);
+    CHECK_EQ(outStart[3], 4);
+    CHECK_EQ(outStart[4], 7);
+
+    CHECK_EQ(stridePositions->getSize(), 8);
+    auto result = reversed ? strideResultReversed : strideResult;
+    for (int i = 0; i < 8; i++) {
+      CHECK_EQ(stridePositions->getData()[i], result[i]);
+    }
   }
 }
 
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 1a6d1c512d393..dc89419c40f8d 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2497,7 +2497,7 @@ def __init__(self,
         config_assert(
             len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
         if trans_type == 'seq':
-            config_assert(stride == -1, 'subseq do not support stride window')
+            config_assert(stride == -1, 'subseq does not support stride window')
         self.config.trans_type = trans_type
         self.config.seq_pool_stride = stride
         self.set_layer_size(self.get_input_layer(0).size)

From dd613047b1b37e2698437582b7e128b08efe6b5a Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 7 Apr 2017 11:00:58 +0800
Subject: [PATCH 4/5] refine annotation

---
 .../layers/SequenceLastInstanceLayer.cpp       |  6 ++++--
 paddle/gserver/layers/SequencePoolLayer.h      |  8 ++++----
 paddle/parameter/Argument.cpp                  |  2 +-
 paddle/parameter/Argument.h                    |  2 +-
 python/paddle/trainer_config_helpers/layers.py | 18 ++++++++++--------
 5 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
index d29e981ad66a5..c631c5ef3a9eb 100644
--- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
+++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
@@ -26,8 +26,10 @@ namespace paddle {
  * If SequenceLevel = kNonseq:
  *   Output: a sequence containing only the last instance of the input sequence
  *   If stride_ > 0:
- *      Output: a shorten sequence containing several last instances of the
- *              input sequence with stride window.
+ *      Output: a shorten sequence. The operation of getting last instance of a
+ *              sequence is independently performed on every slice of the input
+ *              sequence, which is obtained by sliding a window with the window
+ *              size set to stride_.
  * If SequenceLevel = kSeq:
  *   Check input sequence must has sub-sequence
  *   Output: a sequence containing only the last instance of each sub-sequence
diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h
index ff67c0ccadd20..8e183ecda80a1 100644
--- a/paddle/gserver/layers/SequencePoolLayer.h
+++ b/paddle/gserver/layers/SequencePoolLayer.h
@@ -27,9 +27,9 @@ namespace paddle {
  *    output[i] = seqlastin/average/max_{for each instance in this
  * sequence}{input[i]}
  *    If stride_ > 0:
- *        Check input sequence must don't have sub-sequence
+ *        Check input sequence must not have sub-sequence
  *        Output: a shorten sequence, pooling is performed upon a small local
- * area
+ *                area
  * If SequenceLevel = kSeq:
  *    Check input sequence must has sub-sequence
  *    Output: output size is the number of input sub-sequences
@@ -47,9 +47,9 @@ class SequencePoolLayer : public Layer {
   size_t newBatchSize_;
   ICpuGpuVectorPtr startPositions_;
   int stride_;
-  // store the start position of each stride window
+  // store the start position of each window
   IVectorPtr stridePositions_;
-  // Whether it is reversed sequence
+  // Whether the input sequence is reversed or not
   bool reversed_ = false;
 
 public:
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index afbda8bdc403f..3fa1e50d1e798 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -589,7 +589,7 @@ void Argument::poolSequenceWithStride(const Argument& input,
     } else {
       int size = ceil((float)seqLength / stride);
       tgtBuf[seqId + 1] = tgtBuf[seqId] + size;
-      for (int i = 0; i < size - 1; i++) {
+      for (int i = 0; i < size - 1; ++i) {
         int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride
                            : stridePos.back() + stride;
         stridePos.emplace_back(cur);
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 49a0660ccf155..91aca98e186ae 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -294,7 +294,7 @@ struct Argument {
   /*
    After pooling with stride n (n is smaller than sequence length),
    a long sequence will be shorten.
-   This function is not suitable for sequence with sub-sequence now.
+   This function is invalid for sequence having sub-sequence.
    */
   void poolSequenceWithStride(const Argument& input,
                               size_t stride,
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 5f3250e7224ff..e98b1dfc8f9ad 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1347,9 +1347,10 @@ def last_seq(input,
     """
     Get Last Timestamp Activation of a sequence.
 
-    If stride > 0, get last timestamp upon a stride window of sequence. 
-    And a long sequence will be shorten. Note that for sequence with 
-    sub-sequence, stride is default -1 now.
+    If stride > 0, this layer slides a window whose size is determined by stride, 
+    and return the last value of the window as the output. Thus, a long sequence 
+    will be shorten. Note that for sequence with sub-sequence, the default value 
+    of stride is -1.
 
     The simple usage is:
 
@@ -1362,7 +1363,7 @@ def last_seq(input,
     :type name: basestring
     :param input: Input layer name.
     :type input: LayerOutput
-    :param stride: parameter of stride window.  
+    :param stride: window size.  
     :type stride: Int
     :param layer_attr: extra layer attributes.
     :type layer_attr: ExtraLayerAttribute.
@@ -1402,9 +1403,10 @@ def first_seq(input,
     """
     Get First Timestamp Activation of a sequence.
 
-    If stride > 0, get first timestamp upon a stride window of sequence,
-    and a long sequence will be shorten. Note that for sequence with 
-    sub-sequence, stride is default -1 now.
+    If stride > 0, this layer slides a window whose size is determined by stride, 
+    and return the first value of the window as the output. Thus, a long sequence 
+    will be shorten. Note that for sequence with sub-sequence, the default value 
+    of stride is -1.
 
     The simple usage is:
 
@@ -1417,7 +1419,7 @@ def first_seq(input,
     :type name: basestring
     :param input: Input layer name.
     :type input: LayerOutput
-    :param stride: parameter of stride window.  
+    :param stride: window size.  
     :type stride: Int
     :param layer_attr: extra layer attributes.
     :type layer_attr: ExtraLayerAttribute.

From e6366e349671126692a7711df47866009dc18f16 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Tue, 11 Apr 2017 17:02:17 +0800
Subject: [PATCH 5/5] update with comments

---
 .../layers/SequenceLastInstanceLayer.cpp      |  8 +++----
 paddle/gserver/layers/SequencePoolLayer.h     |  4 ++--
 paddle/gserver/tests/test_LayerGrad.cpp       | 24 +++++++++++--------
 paddle/parameter/Argument.cpp                 | 11 ++++-----
 4 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
index c631c5ef3a9eb..944c7051668dc 100644
--- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
+++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
@@ -42,7 +42,7 @@ class SequenceLastInstanceLayer : public SequencePoolLayer {
 protected:
   MatrixPtr tmpSrc_;
   MatrixPtr tmpDest_;
-  std::vector<int> insId_;
+  std::vector<int> instanceIds_;
 
 public:
   explicit SequenceLastInstanceLayer(const LayerConfig& config)
@@ -82,10 +82,10 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
     AsyncGpuBlock asyncGpuBlock;
     REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str());
 
-    insId_.clear();
+    instanceIds_.clear();
     for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
       int insId = reversed_ ? starts[seqId] : starts[seqId + 1] - 1;
-      insId_.push_back(insId);
+      instanceIds_.push_back(insId);
 
       outputValue->subMatrix(seqId, 1, tmpDest_)
           ->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_)));
@@ -111,7 +111,7 @@ void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) {
     REGISTER_TIMER_INFO("SequenceLastInstanceLayerBackward", getName().c_str());
 
     for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) {
-      inputGrad->subMatrix(insId_[seqId], 1, tmpDest_)
+      inputGrad->subMatrix(instanceIds_[seqId], 1, tmpDest_)
           ->add(*(outputGrad->subMatrix(seqId, 1, tmpSrc_)));
     }
   }
diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h
index 8e183ecda80a1..293d1bf27823f 100644
--- a/paddle/gserver/layers/SequencePoolLayer.h
+++ b/paddle/gserver/layers/SequencePoolLayer.h
@@ -47,9 +47,9 @@ class SequencePoolLayer : public Layer {
   size_t newBatchSize_;
   ICpuGpuVectorPtr startPositions_;
   int stride_;
-  // store the start position of each window
+  // Store the start position of each window.
   IVectorPtr stridePositions_;
-  // Whether the input sequence is reversed or not
+  // Whether the input sequence is reversed or not.
   bool reversed_ = false;
 
 public:
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index f2763842c25c3..193b876c31626 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -807,7 +807,7 @@ TEST(Layer, ExpandLayer) {
 void testDegradeLayer(bool hasSubseq,
                       string layer_type,
                       string trans_type,
-                      int stride = -1) {
+                      int stride) {
   TestConfig config;
   config.layerConfig.set_type(layer_type);
   config.layerConfig.set_size(10);
@@ -844,29 +844,33 @@ void testDegradeLayer(bool hasSubseq,
 }
 
 TEST(Layer, MaxLayer) {
-  testDegradeLayer(false, "max", "non-seq");  // seq max to non-seq
-  testDegradeLayer(true, "max", "non-seq");   // hasSubseq max to non-seq
-  testDegradeLayer(true, "max", "seq");       // hasSubseq max to seq
+  testDegradeLayer(false, "max", "non-seq", -1);  // seq max to non-seq
+  testDegradeLayer(true, "max", "non-seq", -1);   // hasSubseq max to non-seq
+  testDegradeLayer(true, "max", "seq", -1);       // hasSubseq max to seq
 }
 
 TEST(Layer, SequenceLastInstanceLayer) {
   testDegradeLayer(false,
                    "seqlastins",
-                   "non-seq");  // seq seqlastins to non-seq
+                   "non-seq",
+                   -1);  // seq seqlastins to non-seq
   testDegradeLayer(false,
                    "seqlastins",
                    "non-seq",
                    5);  // seq seqlastins to a shorten seq, stride window = 5
   testDegradeLayer(true,
                    "seqlastins",
-                   "non-seq");  // hasSubseq seqlastins to non-seq
-  testDegradeLayer(true, "seqlastins", "seq");  // hasSubseq seqlastins to seq
+                   "non-seq",
+                   -1);  // hasSubseq seqlastins to non-seq
+  testDegradeLayer(
+      true, "seqlastins", "seq", -1);  // hasSubseq seqlastins to seq
 }
 
 TEST(Layer, AverageLayer) {
-  testDegradeLayer(false, "average", "non-seq");  // seq average to non-seq
-  testDegradeLayer(true, "average", "non-seq");  // hasSubseq average to non-seq
-  testDegradeLayer(true, "average", "seq");      // hasSubseq average to seq
+  testDegradeLayer(false, "average", "non-seq", -1);  // seq average to non-seq
+  testDegradeLayer(
+      true, "average", "non-seq", -1);           // hasSubseq average to non-seq
+  testDegradeLayer(true, "average", "seq", -1);  // hasSubseq average to seq
 }
 
 TEST(Layer, SequenceConcatLayer) {
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 3fa1e50d1e798..645bf73799063 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -563,12 +563,11 @@ void Argument::poolSequenceWithStride(const Argument& input,
                                       size_t stride,
                                       IVectorPtr* stridePostions,
                                       bool reversed) {
-  /*
-   * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
-   * then sequenceStartPositions = [0, 2, 3, 4, 7].
-   * If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30];
-   * else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30]
-   */
+  // If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
+  // then sequenceStartPositions = [0, 2, 3, 4, 7].
+  // If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30];
+  // else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30]
+
   CHECK(input.sequenceStartPositions);
   CHECK_EQ(input.hasSubseq(), 0UL);
   CHECK_GT(stride, 0) << "stride must larger than 0";