From 84627bb934ed6b4c7213eeebc0fe59e5fbe7a84b Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 7 Aug 2017 14:03:13 +0800
Subject: [PATCH 1/6] add config helper for sequence slice layer.

---
 doc/api/v2/config/layer.rst                   |  5 ++
 python/paddle/trainer/config_parser.py        | 45 +++++++++++
 .../paddle/trainer_config_helpers/layers.py   | 68 ++++++++++++++++
 .../tests/configs/file_list.sh                |  3 +-
 .../protostr/test_seq_slice_layer.protostr    | 79 +++++++++++++++++++
 .../tests/configs/test_seq_slice_layer.py     | 13 +++
 6 files changed, 212 insertions(+), 1 deletion(-)
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py

diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst
index 372272a53c12c..232ea6b49b3a3 100644
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -257,6 +257,11 @@ seq_concat
 ..  autoclass:: paddle.v2.layer.seq_concat
     :noindex:
 
+seq_slice
+---------
+..  autoclass:: paddle.v2.layer.seq_slice
+    :noindex:
+
 Reshaping Layers
 ================
 
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 9ea69fc5e5763..11e54ba42039d 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2657,6 +2657,51 @@ def __init__(self, name, inputs, bias=False, **xargs):
         self.create_bias_parameter(bias, size)
 
 
+@config_layer('seq_slice')
+class SeqSliceLayer(LayerBase):
+    def __init__(self, name, inputs, starts, ends, bias=False, **xargs):
+        if isinstance(inputs, list):
+            assert len(inputs) == 1, ('the first input of sequence slice layer '
+                                      'is a single sequence input.')
+        else:
+            inputs = [inputs]
+
+        if starts is not None:
+            if isinstance(starts, list):
+                assert len(starts) == 1, (
+                    'the start indices for sequence slice layer cannot '
+                    'be a list having more than one element.')
+                starts = starts[0]
+            inputs.append(starts)
+
+        if ends is not None:
+            if isinstance(ends, list):
+                assert len(ends) == 1, (
+                    'the end indices for sequence slice layer cannot '
+                    'be a list having more than one element.')
+                ends = ends[0]
+            inputs.append(ends)
+        assert len(inputs) >= 2, (
+            'the sequence slice layer has at least two inputs.')
+
+        super(SeqSliceLayer, self).__init__(
+            name, 'seq_slice', 0, inputs=inputs, **xargs)
+        input_layer0 = self.get_input_layer(0)
+        size = input_layer0.size
+        self.set_layer_size(size)
+
+        if len(inputs) == 3:
+            assert (
+                self.get_input_layer(1).size == self.get_input_layer(2).size), (
+                    'If start and end indices are both given to'
+                    'sequence slice layer, they should have the same width.')
+        elif len(inputs) == 2:
+            if starts is not None:
+                self.config.select_first = True
+            else:
+                self.config.select_first = False
+
+
 @config_layer('out_prod')
 class OuterProdLayer(LayerBase):
     def __init__(self, name, inputs, device=None):
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index ea5fdcc50f6ab..15636b14429d1 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -131,6 +131,7 @@
     'crop_layer',
     'clip_layer',
     'slice_projection',
+    'seq_slice_layer',
 ]
 
 
@@ -225,6 +226,7 @@ class LayerType(object):
     PRELU = 'prelu'
     CROP_LAYER = 'crop'
     CLIP_LAYER = 'clip'
+    SEQ_SLICE = 'seq_slice'
 
     @staticmethod
     def is_layer_type(type_name):
@@ -6119,3 +6121,69 @@ def clip_layer(input, min, max, name=None):
         max=max)
     return LayerOutput(
         name, LayerType.CLIP_LAYER, parents=[input], size=input.size)
+
+
+@wrap_name_default()
+def seq_slice_layer(input, starts, ends, name=None):
+    """
+    seq_slice_layer will return one or several sub-sequences from the
+    input sequence layer given start and end indices.
+
+        - If only start indices are given, and end indices are set to None,
+          this layer slices the input sequence from the given start indices
+          to its end.
+        - If only end indices are given, and start indices are set to None,
+          this layer slices the input sequence from its beginning to the
+          given end indices.
+        - If start and end indices are both given, they should have the same
+          number of elements.
+
+    If start or end indices contains more than one elements, the input sequence
+    will be sliced for multiple times.
+
+
+    .. code-block:: python
+
+        seq_silce = seq_slice_layer(input=input_seq,
+                                    starts=start_pos, ends=end_pos)
+
+    :param name: name of this layer.
+    :type name: basestring
+    :param input: input for this layer, it should be a sequence.
+    :type input: LayerOutput
+    :param starts: start indices to slice the input sequence.
+    :type starts: LayerOutput|None
+    :param ends: end indices to slice the input sequence.
+    :type ends: LayerOutput|None
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+
+    """
+
+    assert isinstance(input, LayerOutput), (
+        'The first input of seq_slice layer must be a PaddlePaddle layer.')
+
+    if starts is not None:
+        assert isinstance(starts, LayerOutput), (
+            'The start indices for seq_slice layer '
+            'must be a PaddlePaddle layer.')
+    if ends is not None:
+        assert isinstance(ends, LayerOutput), (
+            'The end indices for seq_slice layer must be a PaddlePaddle layer.')
+    assert starts is not None or ends is not None, (
+        'start and end indices '
+        'cannot be set to None at the same time, at least one of '
+        'them should be given.')
+    if starts is not None and ends is not None:
+        assert starts.size == ends.size, (
+            'If start and end indices are both given to seq_slice_layer, '
+            'they should have the same width.')
+
+    Layer(
+        name=name,
+        type=LayerType.SEQ_SLICE,
+        inputs=input.name,
+        starts=starts.name if starts is not None else None,
+        ends=ends.name if ends is not None else None)
+    return LayerOutput(
+        name, LayerType.SEQ_SLICE, parents=[input], size=input.size)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index 0ffa58bc1e208..1ce865ceace9e 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -7,6 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
 test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
 test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
-test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer)
+test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
+test_seq_slice_layer)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
new file mode 100644
index 0000000000000..5b73d614fe862
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
@@ -0,0 +1,79 @@
+type: "nn"
+layers {
+  name: "word"
+  type: "data"
+  size: 128
+  active_type: ""
+}
+layers {
+  name: "starts"
+  type: "data"
+  size: 5
+  active_type: ""
+}
+layers {
+  name: "ends"
+  type: "data"
+  size: 5
+  active_type: ""
+}
+layers {
+  name: "__seq_slice_layer_0__"
+  type: "seq_slice"
+  size: 128
+  active_type: ""
+  inputs {
+    input_layer_name: "word"
+  }
+  inputs {
+    input_layer_name: "starts"
+  }
+  inputs {
+    input_layer_name: "ends"
+  }
+}
+layers {
+  name: "__seq_slice_layer_1__"
+  type: "seq_slice"
+  size: 128
+  active_type: ""
+  inputs {
+    input_layer_name: "word"
+  }
+  inputs {
+    input_layer_name: "starts"
+  }
+  select_first: true
+}
+layers {
+  name: "__seq_slice_layer_2__"
+  type: "seq_slice"
+  size: 128
+  active_type: ""
+  inputs {
+    input_layer_name: "word"
+  }
+  inputs {
+    input_layer_name: "ends"
+  }
+  select_first: false
+}
+input_layer_names: "word"
+output_layer_names: "__seq_slice_layer_0__"
+output_layer_names: "__seq_slice_layer_1__"
+output_layer_names: "__seq_slice_layer_2__"
+sub_models {
+  name: "root"
+  layer_names: "word"
+  layer_names: "starts"
+  layer_names: "ends"
+  layer_names: "__seq_slice_layer_0__"
+  layer_names: "__seq_slice_layer_1__"
+  layer_names: "__seq_slice_layer_2__"
+  input_layer_names: "word"
+  output_layer_names: "__seq_slice_layer_0__"
+  output_layer_names: "__seq_slice_layer_1__"
+  output_layer_names: "__seq_slice_layer_2__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
new file mode 100644
index 0000000000000..510ad3220893f
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+#coding=utf-8
+from paddle.trainer_config_helpers import *
+
+input_seq = data_layer("word", size=128)
+starts = data_layer("starts", size=5)
+ends = data_layer("ends", size=5)
+
+seq_slice1 = seq_slice_layer(input=input_seq, starts=starts, ends=ends)
+seq_slice2 = seq_slice_layer(input=input_seq, starts=starts, ends=None)
+seq_slice3 = seq_slice_layer(input=input_seq, starts=None, ends=ends)
+
+outputs(seq_slice1, seq_slice2, seq_slice3)

From 2988a58ef01a56e84cff02463972e0150bc6ab13 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Tue, 8 Aug 2017 08:52:05 +0800
Subject: [PATCH 2/6] add unittest.

---
 paddle/gserver/tests/CMakeLists.txt           |   6 +
 .../gserver/tests/test_SeqSliceLayerGrad.cpp  | 214 ++++++++++++++++++
 2 files changed, 220 insertions(+)
 create mode 100644 paddle/gserver/tests/test_SeqSliceLayerGrad.cpp

diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index 4546d12a90308..9fdb148864e18 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -30,6 +30,12 @@ add_unittest_without_exec(test_CRFLayerGrad
 add_test(NAME test_CRFLayerGrad
     COMMAND test_CRFLayerGrad)
 
+################ test_SeqSliceLayerGrad ####################
+add_unittest_without_exec(test_SeqSliceLayerGrad
+    test_SeqSliceLayerGrad.cpp
+    LayerGradUtil.cpp)
+add_test(NAME test_SeqSliceLayerGrad
+    COMMAND test_SeqSliceLayerGrad)
 
 add_unittest_without_exec(test_ActivationGrad
     test_ActivationGrad.cpp
diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
new file mode 100644
index 0000000000000..e456dd5db7f45
--- /dev/null
+++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
@@ -0,0 +1,214 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "ModelConfig.pb.h"
+#include "paddle/gserver/layers/DataLayer.h"
+#include "paddle/trainer/Trainer.h"
+
+#include "LayerGradUtil.h"
+#include "paddle/testing/TestUtil.h"
+
+using namespace paddle;  // NOLINT
+using namespace std;     // NOLINT
+
+DECLARE_int32(gpu_id);
+DECLARE_bool(thread_local_rand_use_global_seed);
+
+const int MAX_SEQ_NUM = 5;
+const int MAX_SEQ_LEN = 5;
+const int MAX_BEAM_SIZE = 3;
+
+vector<real> randSampling(real range, int n) {
+  CHECK_GE(range, n);
+  vector<real> num(range);
+  iota(begin(num), end(num), 0.);
+  if (range == n) return num;
+
+  random_shuffle(begin(num), end(num));
+  num.resize(n);
+  sort(begin(num), end(num));
+  return num;
+}
+
+void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
+  seqStartPos.resize(1, 0);
+  subSeqStartPos.resize(1, 0);
+
+  // srand((size_t)(time(NULL)));
+  srand(1);
+  int seqNum = 1 + (rand() % MAX_SEQ_NUM);
+  for (int i = 0; i < seqNum; ++i) {
+    int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);
+    for (int j = 0; j < subSeqNum; ++j)
+      subSeqStartPos.push_back(subSeqStartPos.back() +
+                               (1 + (rand() % MAX_SEQ_LEN)));
+    seqStartPos.push_back(subSeqStartPos.back());
+  }
+}
+
+/*
+  generate start indices according to sequence start positions.
+ */
+void genStarts(vector<int>& seqStartPos,
+               vector<vector<real>>& starts,
+               size_t beamSize) {
+  starts.clear();
+  starts.resize(seqStartPos.size() - 1, vector<real>(beamSize, -1.));
+
+  for (size_t i = 0; i < seqStartPos.size() - 1; ++i) {
+    int seqLen = seqStartPos[i + 1] - seqStartPos[i];
+    vector<real> randStarts =
+        randSampling(seqLen, min(seqLen, static_cast<int>(beamSize)));
+    copy(begin(randStarts), end(randStarts), begin(starts[i]));
+  }
+}
+
+/*
+  generate end indices according to sequence start positions and start indices.
+ */
+void genEnds(vector<int>& seqStartPos,
+             vector<vector<real>>& starts,
+             vector<vector<real>>& ends,
+             size_t beamSize) {
+  CHECK_EQ(seqStartPos.size() - 1, starts.size());
+  ends.clear();
+  ends.resize(seqStartPos.size() - 1, vector<real>(beamSize, -1.));
+
+  for (size_t i = 0; i < starts.size(); ++i) {
+    for (size_t j = 0; j < starts[i].size(); ++j) {
+      int seqLen = seqStartPos[i + 1] - seqStartPos[i];
+      CHECK_GE(seqLen - 1, starts[i][j]);
+      if (starts[i][j] == -1.) break;
+      if (starts[i][j] == (seqLen - 1)) {
+        ends[i][j] = starts[i][j];
+      } else {
+        ends[i][j] = starts[i][j] + randSampling(seqLen - starts[i][j], 1)[0];
+      }
+    }
+  }
+}
+
+void genTestData(vector<int>& seqStartPos,
+                 vector<int>& subSeqStartPos,
+                 vector<vector<real>>& starts,
+                 vector<vector<real>>& ends,
+                 bool hasSubseq) {
+  size_t beamSize = MAX_BEAM_SIZE;
+  genSeqInfo(seqStartPos, subSeqStartPos);
+
+  genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize);
+  genEnds(hasSubseq ? subSeqStartPos : seqStartPos, starts, ends, beamSize);
+}
+
+template <typename T>
+void flatten2dVector(vector<vector<T>>& inVec, vector<T>& outVec) {
+  size_t totalSize{0};
+  for (auto const& items : inVec) totalSize += items.size();
+  outVec.reserve(totalSize);
+
+  for (auto& items : inVec)
+    move(items.begin(), items.end(), back_inserter(outVec));
+}
+
+void testSeqSliceLayer(bool hasSubseq,
+                       bool useGpu,
+                       vector<int>& seqStartPos,
+                       vector<int>& subSeqStartPos,
+                       vector<vector<real>>& starts,
+                       vector<vector<real>>& ends) {
+  // layer size is not crutial for this layer,
+  // so here use a small layer size in the unittest.
+  const size_t layerSize{4};
+  TestConfig config;
+  config.layerConfig.set_type("seq_slice");
+  config.layerConfig.set_size(layerSize);
+
+  // add the first input
+  MatrixPtr seqInputPtr =
+      Matrix::create(hasSubseq ? subSeqStartPos.back() : seqStartPos.back(),
+                     layerSize,
+                     false,
+                     false);
+  seqInputPtr->randomizeUniform();
+
+  if (hasSubseq) {
+    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
+                                "seq_input",
+                                seqInputPtr,
+                                seqStartPos,
+                                subSeqStartPos});
+  } else {
+    config.inputDefs.push_back(
+        {INPUT_SELF_DEFINE_DATA, "seq_input", seqInputPtr, seqStartPos});
+  }
+  config.layerConfig.add_inputs();
+
+  // add start indices
+  if (starts.size()) {
+    vector<real> startsToVec;
+    flatten2dVector(starts, startsToVec);
+
+    MatrixPtr startMatrixPtr =
+        Matrix::create(starts.size(), starts[0].size(), false, false);
+    startMatrixPtr->copyFrom(startsToVec.data(), startsToVec.size());
+
+    config.inputDefs.push_back(
+        {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr});
+    config.layerConfig.add_inputs();
+  }
+
+  // add end indices
+  if (ends.size()) {
+    vector<real> endsToVec;
+    flatten2dVector(ends, endsToVec);
+    MatrixPtr endMatrixPtr =
+        Matrix::create(ends.size(), ends[0].size(), false, false);
+    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr});
+    config.layerConfig.add_inputs();
+  }
+
+  testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false);
+}
+
+TEST(Layer, SeqSliceLayer) {
+  vector<int> seqStartPos;
+  vector<int> subSeqStartPos;
+  vector<vector<real>> starts;
+  vector<vector<real>> ends;
+
+  genSeqInfo(seqStartPos, subSeqStartPos);
+  for (bool hasSubseq : {false, true}) {
+    genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq);
+    for (bool useGpu : {false, true}) {
+      vector<vector<real>> tmp;
+      testSeqSliceLayer(
+          hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends);
+      testSeqSliceLayer(
+          hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, tmp);
+      testSeqSliceLayer(
+          hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, ends);
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  initMain(argc, argv);
+  hl_start();
+  hl_init(FLAGS_gpu_id);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

From 26bc5b12596c945956f7a6b003712805e579a36d Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Tue, 8 Aug 2017 18:48:11 +0800
Subject: [PATCH 3/6] add implementations.

---
 paddle/gserver/layers/KmaxSeqScoreLayer.cpp   |   5 +
 paddle/gserver/layers/SequenceSliceLayer.cpp  | 228 ++++++++++++++++++
 .../gserver/layers/SubNestedSequenceLayer.cpp |  16 +-
 .../gserver/tests/test_SeqSliceLayerGrad.cpp  |  25 +-
 paddle/parameter/Argument.cpp                 |  27 ++-
 5 files changed, 278 insertions(+), 23 deletions(-)
 create mode 100644 paddle/gserver/layers/SequenceSliceLayer.cpp

diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
index 8ce591d476246..e96fd61fc1e96 100644
--- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
@@ -97,6 +97,11 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
     scores_ = inputScore;
   }
 
+  // TODO(caoying)
+  // Here selSubSeqIdx is automatically converted from real to int
+  // This is very dangerous if user fill this matrix himself, invalid data may
+  // occur. The selected indices should be stored in
+  // CpuSparseMatrix with SparseValueType set to NO_VALUE.
   Matrix::resizeOrCreate(
       output_.value,
       input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
new file mode 100644
index 0000000000000..410aba663e005
--- /dev/null
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -0,0 +1,228 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Layer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/Vector.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Stat.h"
+
+namespace paddle {
+
+class SequenceSliceLayer : public Layer {
+public:
+  explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback = nullptr) override;
+
+private:
+  // TODO(caoying)
+  // Here selSubSeqIdx is automatically converted from real to int
+  // This is very dangerous if user fill this matrix himself, invalid data
+  // may occur. The selected indices should be stored in CpuSparseMatrix
+  // with SparseValueType set to NO_VALUE.
+  MatrixPtr startIdsOnCpu_;
+  MatrixPtr endIdsOnCpu_;
+
+  std::vector<int> selectedRows_;
+  IVectorPtr rowIndice_;
+  std::vector<std::vector<int>> inputSeqInfoVec_;
+  std::vector<int> outSubSeqStartPos_;
+  std::vector<int> outSeqStartPos_;
+
+  void checkInputs();
+  void copySliceIdsToCpu();
+  void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends);
+};
+
+REGISTER_LAYER(seq_slice, SequenceSliceLayer);
+
+bool SequenceSliceLayer::init(const LayerMap& layerMap,
+                              const ParameterMap& parameterMap) {
+  /* Initialize the basic parent class */
+  Layer::init(layerMap, parameterMap);
+  CHECK_GE(inputLayers_.size(), 2U);
+  CHECK_LE(inputLayers_.size(), 3U);
+
+  setNeedSequenceInfo(false);
+  return true;
+}
+
+void SequenceSliceLayer::checkInputs() {
+  const Argument& inputSeq = getInput(0);
+  CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
+                           << "must be a sequence.";
+  // Check inputs
+  const MatrixPtr indices1 = getInputValue(1);
+  CHECK_EQ(indices1->getHeight(),
+           inputSeq.hasSubseq() ? inputSeq.getNumSubSequences()
+                                : inputSeq.getNumSequences())
+      << "Height of the second input should be equal to number of sequence "
+      << "in the first input.";
+  if (inputLayers_.size() == 3) {
+    const MatrixPtr indices2 = getInputValue(2);
+    CHECK_EQ(indices2->getHeight(), indices1->getHeight())
+        << "start indices and end indices should have the same height.";
+    CHECK_EQ(indices2->getWidth(), indices1->getWidth())
+        << "start indices and end indices should have the same Width.";
+  }
+}
+
+void SequenceSliceLayer::copySliceIdsToCpu() {
+  if (!useGpu_) {
+    if (inputLayers_.size() == 2U) {
+      if (config_.select_first()) {
+        startIdsOnCpu_ = getInputValue(1);
+        endIdsOnCpu_ = nullptr;
+      } else {
+        startIdsOnCpu_ = nullptr;
+        endIdsOnCpu_ = getInputValue(1);
+      }
+    } else if (inputLayers_.size() == 3U) {
+      startIdsOnCpu_ = getInputValue(1);
+      endIdsOnCpu_ = getInputValue(2);
+    }
+    return;
+  }
+
+  const MatrixPtr indices1 = getInputValue(1);
+  if (inputLayers_.size() == 2U) {
+    if (config_.select_first()) {
+      Matrix::resizeOrCreate(startIdsOnCpu_,
+                             indices1->getHeight(),
+                             indices1->getWidth(),
+                             false /* trans */,
+                             false /* useGpu */);
+      startIdsOnCpu_->copyFrom(*indices1);
+      endIdsOnCpu_ = nullptr;
+    } else {
+      Matrix::resizeOrCreate(endIdsOnCpu_,
+                             indices1->getHeight(),
+                             indices1->getWidth(),
+                             false /* trans */,
+                             false /* useGpu */);
+      endIdsOnCpu_->copyFrom(*indices1);
+      startIdsOnCpu_ = nullptr;
+    }
+  } else if (inputLayers_.size() == 3U) {
+    Matrix::resizeOrCreate(startIdsOnCpu_,
+                           indices1->getHeight(),
+                           indices1->getWidth(),
+                           false /* trans */,
+                           false /* useGpu */);
+    startIdsOnCpu_->copyFrom(*indices1);
+
+    const MatrixPtr indices2 = getInputValue(2);
+    Matrix::resizeOrCreate(endIdsOnCpu_,
+                           indices2->getHeight(),
+                           indices2->getWidth(),
+                           false /* trans */,
+                           false /* useGpu */);
+    endIdsOnCpu_->copyFrom(*indices2);
+  }
+}
+
+void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
+                                         const MatrixPtr ends) {
+  outSeqStartPos_.resize(1, 0);
+  outSubSeqStartPos_.resize(1, 0);
+  selectedRows_.clear();
+
+  size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
+  // iterate over sequence
+  size_t rowIdx = 0;
+  for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
+    // iterate over sub-sequence in a sequence
+    for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
+      // iterate over each index for slicing.
+      for (size_t k = 0; k < beamSize; ++k) {
+        if (starts) {
+          if (starts->getElement(rowIdx, k) == -1.) break;
+        } else if (ends->getElement(rowIdx, k) == -1.)
+          break;
+
+        int begPos = inputSeqInfoVec_[i][j];
+        if (starts) begPos += starts->getElement(rowIdx, k);
+
+        int endPos = inputSeqInfoVec_[i][j + 1] - 1;
+        if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
+
+        int seqLen = endPos - begPos + 1;
+        CHECK(seqLen);
+        for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
+        inputSeqInfoVec_.size() > 1
+            ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
+            : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen);
+      }
+      rowIdx++;
+    }
+    if (inputSeqInfoVec_.size() > 1)
+      outSeqStartPos_.push_back(outSubSeqStartPos_.back());
+  }
+
+  if (useGpu_) {
+    rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
+    rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
+  } else {
+    rowIndice_ =
+        IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
+  }
+
+  // create the sequence information for the output.
+  ICpuGpuVector::resizeOrCreate(
+      output_.sequenceStartPositions, outSeqStartPos_.size(), false);
+  output_.sequenceStartPositions->copyFrom(
+      outSeqStartPos_.data(), outSeqStartPos_.size(), false);
+
+  if (inputSeqInfoVec_.size() > 1) {
+    ICpuGpuVector::resizeOrCreate(
+        output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false);
+    output_.subSequenceStartPositions->copyFrom(
+        outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false);
+  }
+}
+
+void SequenceSliceLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  checkInputs();
+
+  const Argument& inputSeq = getInput(0);
+  inputSeqInfoVec_.clear();
+  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
+                              inputSeq.subSequenceStartPositions,
+                              inputSeqInfoVec_);
+  copySliceIdsToCpu();
+
+  // calculate the selected row indices in a batch,
+  // and build the output sequence information.
+  calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr,
+                  endIdsOnCpu_ ? endIdsOnCpu_ : nullptr);
+
+  resetOutput(selectedRows_.size(), getSize());
+
+  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
+}
+
+void SequenceSliceLayer::backward(const UpdateCallback& callback) {
+  MatrixPtr inputSeqGrad = getInputGrad(0);
+  MatrixPtr outputGrad = getOutputGrad();
+
+  outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
index 76f587fff760d..0db03002702ca 100644
--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@@ -52,11 +52,10 @@ class SubNestedSequenceLayer : public Layer {
    *   ]
    *
    * ths output is saved to private member rowIndice_;
-   * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-   *  16,17,18,19,20,21,22,23,24,25,26,27]
+   * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27]
    */
 
-  void calSelectedCols(const MatrixPtr selectedIndices,
+  void calSelectedRows(const MatrixPtr selectedIndices,
                        const std::vector<std::vector<int>>& inputSeqInfo);
 
   // if the second input of this layer is on GPU memory, copy it to CPU memory.
@@ -67,7 +66,7 @@ class SubNestedSequenceLayer : public Layer {
   std::vector<std::vector<int>> inputSeqInfoVec_;
 
   // the final selected row indices in a batch,
-  // rowIdx_ and selectedRows_ actually share a same memory.
+  // rowIndice_ and selectedRows_ actually share a same memory.
   IVectorPtr rowIndice_;
   std::vector<int> selectedRows_;
 };
@@ -83,7 +82,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
   return true;
 }
 
-void SubNestedSequenceLayer::calSelectedCols(
+void SubNestedSequenceLayer::calSelectedRows(
     const MatrixPtr selectedIndices,
     const std::vector<std::vector<int>>& inputSeqInfo) {
   selectedRows_.clear();
@@ -96,6 +95,11 @@ void SubNestedSequenceLayer::calSelectedCols(
   for (size_t i = 0; i < seqNum; ++i) {
     for (size_t j = 0; j < beamSize; ++j) {
       if (selectedIndices->getElement(i, j) == -1.) break;
+      // TODO(caoying)
+      // Here selSubSeqIdx is automatically converted from real to int
+      // This is very dangerous if user fill this matrix himself, invalid data
+      // may occur. The selected indices should be stored in
+      // CpuSparseMatrix with SparseValueType set to NO_VALUE.
       int selSubSeqIdx = selectedIndices->getElement(i, j);
       CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
 
@@ -160,7 +164,7 @@ void SubNestedSequenceLayer::forward(PassType passType) {
   Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                               inputSeq.subSequenceStartPositions,
                               inputSeqInfoVec_);
-  calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
+  calSelectedRows(selIdsCpu_, inputSeqInfoVec_);
 
   resetOutput(selectedRows_.size(), getSize());
   getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
index e456dd5db7f45..d560ca650bc5b 100644
--- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
@@ -26,9 +26,9 @@ using namespace std;     // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
-const int MAX_SEQ_NUM = 5;
-const int MAX_SEQ_LEN = 5;
-const int MAX_BEAM_SIZE = 3;
+const int MAX_SEQ_NUM = 17;
+const int MAX_SEQ_LEN = 23;
+const int MAX_BEAM_SIZE = 13;
 
 vector<real> randSampling(real range, int n) {
   CHECK_GE(range, n);
@@ -46,8 +46,7 @@ void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
   seqStartPos.resize(1, 0);
   subSeqStartPos.resize(1, 0);
 
-  // srand((size_t)(time(NULL)));
-  srand(1);
+  srand((size_t)(time(NULL)));
   int seqNum = 1 + (rand() % MAX_SEQ_NUM);
   for (int i = 0; i < seqNum; ++i) {
     int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);
@@ -105,7 +104,7 @@ void genTestData(vector<int>& seqStartPos,
                  vector<vector<real>>& starts,
                  vector<vector<real>>& ends,
                  bool hasSubseq) {
-  size_t beamSize = MAX_BEAM_SIZE;
+  size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE);
   genSeqInfo(seqStartPos, subSeqStartPos);
 
   genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize);
@@ -167,16 +166,21 @@ void testSeqSliceLayer(bool hasSubseq,
     config.inputDefs.push_back(
         {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr});
     config.layerConfig.add_inputs();
+    config.layerConfig.set_select_first(true);
   }
 
   // add end indices
   if (ends.size()) {
     vector<real> endsToVec;
     flatten2dVector(ends, endsToVec);
+
     MatrixPtr endMatrixPtr =
         Matrix::create(ends.size(), ends[0].size(), false, false);
+    endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size());
+
     config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr});
     config.layerConfig.add_inputs();
+    config.layerConfig.set_select_first(false);
   }
 
   testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false);
@@ -188,10 +192,15 @@ TEST(Layer, SeqSliceLayer) {
   vector<vector<real>> starts;
   vector<vector<real>> ends;
 
+  std::vector<bool> mode = {false};
+#ifndef PADDLE_ONLY_CPU
+  mode.push_back(true);
+#endif
   genSeqInfo(seqStartPos, subSeqStartPos);
-  for (bool hasSubseq : {false, true}) {
+  for (bool hasSubseq : {true, false}) {
+    LOG(INFO) << "hasSubSeq : " << hasSubseq;
     genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq);
-    for (bool useGpu : {false, true}) {
+    for (bool useGpu : mode) {
       vector<vector<real>> tmp;
       testSeqSliceLayer(
           hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends);
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 0547ac93cd183..06f7e5245fbc3 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -670,19 +670,28 @@ void Argument::reorganizeSeqInfo(
     const ICpuGpuVectorPtr seqStartPos,
     const ICpuGpuVectorPtr subSeqStartPos,
     std::vector<std::vector<int>>& reorganizedSeqInfo) {
-  int* seqStarts = seqStartPos->getMutableData(false);
-  int* subSeqStarts = subSeqStartPos->getMutableData(false);
+  CHECK(seqStartPos);
 
   int seqNum = seqStartPos->getSize() - 1;
-  reorganizedSeqInfo.resize(seqNum, std::vector<int>());
-  int seqIdx = 0;
-  for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
-    reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
-    if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
-      seqIdx++;
-      if (seqIdx == seqNum) return;
+  int* seqStarts = seqStartPos->getMutableData(false);
+
+  if (subSeqStartPos) {
+    int* subSeqStarts = subSeqStartPos->getMutableData(false);
+    reorganizedSeqInfo.resize(seqNum, std::vector<int>());
+    int seqIdx = 0;
+    for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
       reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+      if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
+        seqIdx++;
+        if (seqIdx == seqNum) return;
+        reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+      }
     }
+  } else {
+    reorganizedSeqInfo.resize(1, std::vector<int>(seqNum + 1, 0));
+    memcpy(reorganizedSeqInfo[0].data(),
+           seqStarts,
+           sizeof(int) * seqStartPos->getSize());
   }
 }
 

From b97f020f9c34da04e093deb4691f6286f4017e62 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Thu, 10 Aug 2017 10:37:07 +0800
Subject: [PATCH 4/6] fix unittest error.

---
 paddle/gserver/layers/SequenceSliceLayer.cpp    |  3 +--
 python/paddle/trainer_config_helpers/layers.py  |  1 +
 .../protostr/test_kmax_seq_socre_layer.protostr | 17 +++++------------
 .../tests/configs/test_kmax_seq_socre_layer.py  |  4 +---
 4 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
index 424f898553993..165ee6311a62d 100644
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -70,9 +70,8 @@ void SequenceSliceLayer::checkInputs() {
   const Argument& inputSeq = getInput(0);
   CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
                            << "must be a sequence.";
-  // Check inputs
   const MatrixPtr indices1 = getInputValue(1);
-  CHECK_EQ(indices1->getHeight(),
+  CHECK_EQ(static_cast<size_t>(indices1->getHeight()),
            inputSeq.hasSubseq() ? inputSeq.getNumSubSequences()
                                 : inputSeq.getNumSequences())
       << "Height of the second input should be equal to number of sequence "
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index e51332da0d06d..79d24cfe5bd54 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -6242,6 +6242,7 @@ def seq_slice_layer(input, starts, ends, name=None):
         name, LayerType.SEQ_SLICE, parents=[input], size=input.size)
 
 
+@wrap_name_default()
 @layer_support()
 def kmax_sequence_score_layer(input, name=None, beam_size=1):
     """
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
index 81bd71f68eb3f..3d32220bfbf5f 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
@@ -1,12 +1,6 @@
 type: "nn"
 layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "data"
+  name: "input_seq"
   type: "data"
   size: 128
   active_type: ""
@@ -17,7 +11,7 @@ layers {
   size: 1
   active_type: "exponential"
   inputs {
-    input_layer_name: "data"
+    input_layer_name: "input_seq"
     input_parameter_name: "___fc_layer_0__.w0"
   }
   bias_parameter_name: "___fc_layer_0__.wbias"
@@ -51,15 +45,14 @@ parameters {
   initial_strategy: 0
   initial_smart: false
 }
-input_layer_names: "data"
+input_layer_names: "input_seq"
 output_layer_names: "__kmax_sequence_score_layer_0__"
 sub_models {
   name: "root"
-  layer_names: "input"
-  layer_names: "data"
+  layer_names: "input_seq"
   layer_names: "__fc_layer_0__"
   layer_names: "__kmax_sequence_score_layer_0__"
-  input_layer_names: "data"
+  input_layer_names: "input_seq"
   output_layer_names: "__kmax_sequence_score_layer_0__"
   is_recurrent_layer_group: false
 }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
index d245c5a41c793..48d0cd55da248 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
@@ -2,9 +2,7 @@
 #coding=utf-8
 from paddle.trainer_config_helpers import *
 
-data = data_layer(name='input', size=300)
-
-data = data_layer(name="data", size=128)
+data = data_layer(name="input_seq", size=128)
 scores = fc_layer(input=data, size=1, act=ExpActivation())
 kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5)
 

From 82e4fab4e31d730d2d9d4df7e223881e9db693a9 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 23 Aug 2017 14:07:53 +0800
Subject: [PATCH 5/6] follow comments.

---
 paddle/gserver/layers/KmaxSeqScoreLayer.cpp   | 26 ++++----
 paddle/gserver/layers/SequenceSliceLayer.cpp  | 63 ++++++++-----------
 .../gserver/layers/SubNestedSequenceLayer.cpp | 29 +++++----
 python/paddle/trainer/config_parser.py        |  5 +-
 4 files changed, 58 insertions(+), 65 deletions(-)

diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
index 3b5060e3ce980..d5407555b248d 100644
--- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
@@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
       << "input of " << getName()
       << " must be a sequence or a nested sequence.";
   CHECK_EQ(input.value->getWidth(), 1UL)
-      << "input of " << getName()
-      << " is score over a sequence or a nested sequence, so its width "
-      << " must be 1.";
+      << "input of " << getName() << " are scores over a sequence or "
+      << "a nested sequence, so its width must be 1.";
 
   if (useGpu_) {
-    // this Layer runs only in CPU, if the model is runing on GPU,
-    // then copy the input to this layer from GPU to CPU.
+    /*
+     * currently, this Layer only runs in CPU, if the other part of the model is
+     * runing on GPU, then copy the input to this layer from GPU to CPU.
+     */
     Matrix::resizeOrCreate(scores_,
                            inputScore->getHeight(),
                            1,
@@ -97,13 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
     scores_ = inputScore;
   }
 
-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but output of this layer which is some selected indices of the give
+   * sequence are actually filled with int types so that storing int types
+   * information in a real number matrix is dangerous, since real numbers will
+   * be convered to int types.
+   */
   Matrix::resizeOrCreate(
       output_.value,
       input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
index 165ee6311a62d..4da65ade0b423 100644
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -31,13 +31,15 @@ class SequenceSliceLayer : public Layer {
   void backward(const UpdateCallback& callback = nullptr) override;
 
 private:
-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but the second and the (optional) third input which are some
+   * selected indices of the give sequence to trim the sequence, are actually
+   * filled with int types so that storing int types information in real number
+   * matrices is very dangerous, since real numbers will be convered to int
+   * types. If a user fills this matrix himself, invalid data may occor.
+   */
 
   MatrixPtr startIdsOnCpu_;
   MatrixPtr endIdsOnCpu_;
@@ -68,7 +70,7 @@ bool SequenceSliceLayer::init(const LayerMap& layerMap,
 
 void SequenceSliceLayer::checkInputs() {
   const Argument& inputSeq = getInput(0);
-  CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
+  CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer "
                            << "must be a sequence.";
   const MatrixPtr indices1 = getInputValue(1);
   CHECK_EQ(static_cast<size_t>(indices1->getHeight()),
@@ -86,22 +88,6 @@ void SequenceSliceLayer::checkInputs() {
 }
 
 void SequenceSliceLayer::copySliceIdsToCpu() {
-  if (!useGpu_) {
-    if (inputLayers_.size() == 2U) {
-      if (config_.select_first()) {
-        startIdsOnCpu_ = getInputValue(1);
-        endIdsOnCpu_ = nullptr;
-      } else {
-        startIdsOnCpu_ = nullptr;
-        endIdsOnCpu_ = getInputValue(1);
-      }
-    } else if (inputLayers_.size() == 3U) {
-      startIdsOnCpu_ = getInputValue(1);
-      endIdsOnCpu_ = getInputValue(2);
-    }
-    return;
-  }
-
   const MatrixPtr indices1 = getInputValue(1);
   if (inputLayers_.size() == 2U) {
     if (config_.select_first()) {
@@ -141,22 +127,19 @@ void SequenceSliceLayer::copySliceIdsToCpu() {
 
 void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
                                          const MatrixPtr ends) {
+  CHECK(starts && ends);
+
   outSeqStartPos_.resize(1, 0);
   outSubSeqStartPos_.resize(1, 0);
   selectedRows_.clear();
 
   size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
-  // iterate over sequence
   size_t rowIdx = 0;
   for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
-    // iterate over sub-sequence in a sequence
     for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
-      // iterate over each index for slicing.
       for (size_t k = 0; k < beamSize; ++k) {
-        if (starts) {
-          if (starts->getElement(rowIdx, k) == -1.) break;
-        } else if (ends->getElement(rowIdx, k) == -1.)
-          break;
+        if (starts && starts->getElement(rowIdx, k) == -1.) break;
+        if (ends && ends->getElement(rowIdx, k) == -1.) break;
 
         int begPos = inputSeqInfoVec_[i][j];
         if (starts) begPos += starts->getElement(rowIdx, k);
@@ -165,7 +148,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
         if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
 
         int seqLen = endPos - begPos + 1;
-        CHECK(seqLen);
+        CHECK_LT(seqLen, 0U);
         for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
         inputSeqInfoVec_.size() > 1
             ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
@@ -208,7 +191,16 @@ void SequenceSliceLayer::forward(PassType passType) {
   Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                               inputSeq.subSequenceStartPositions,
                               inputSeqInfoVec_);
-  copySliceIdsToCpu();
+  if (!useGpu_) {
+    if (inputLayers_.size() == 2U) {
+      startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr;
+      endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1);
+    } else if (inputLayers_.size() == 3U) {
+      startIdsOnCpu_ = getInputValue(1);
+      endIdsOnCpu_ = getInputValue(2);
+    }
+  } else
+    copySliceIdsToCpu();
 
   // calculate the selected row indices in a batch,
   // and build the output sequence information.
@@ -221,10 +213,7 @@ void SequenceSliceLayer::forward(PassType passType) {
 }
 
 void SequenceSliceLayer::backward(const UpdateCallback& callback) {
-  MatrixPtr inputSeqGrad = getInputGrad(0);
-  MatrixPtr outputGrad = getOutputGrad();
-
-  outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
+  getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_);
 }
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
index c8607d50f5878..e9bee77212065 100644
--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@@ -58,23 +58,28 @@ class SubNestedSequenceLayer : public Layer {
   void calSelectedRows(const MatrixPtr selectedIndices,
                        const std::vector<std::vector<int>>& inputSeqInfo);
 
-  // if the second input of this layer is on GPU memory, copy it to CPU memory.
-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but the second is some selected indices of the give sequence to trim
+   * the nested sequence, are actually filled with int types so that storing
+   * int types information in real number matrices is very dangerous, since
+   * real numbers will be convered to int types. If a user fills this matrix
+   * himself, invalid data may occor.
+   *
+   * if the second input of this layer is on GPU memory, copy it to CPU memory.
+   */
   MatrixPtr selIdsCpu_;
 
-  // reorganized sequenceStartPositions and subSequenceStartPositions
-  // into a 2d vector to facilitate the sequence selection process.
+  /*
+   * reorganize sequenceStartPositions and subSequenceStartPositions
+   * into a 2d vector to facilitate the sequence selection process.
+   */
   std::vector<std::vector<int>> inputSeqInfoVec_;
 
-  // the final selected row indices in a batch,
-  // rowIndice_ and selectedRows_ actually share a same memory.
+  /* store the final selected row indices in a batch */
   IVectorPtr rowIndice_;
+  /* rowIndice_ and selectedRows_ actually share a same memory. */
   std::vector<int> selectedRows_;
 };
 
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index af14007de603c..2fcccc69482cf 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2717,10 +2717,7 @@ def __init__(self, name, inputs, starts, ends, bias=False, **xargs):
                     'If start and end indices are both given to'
                     'sequence slice layer, they should have the same width.')
         elif len(inputs) == 2:
-            if starts is not None:
-                self.config.select_first = True
-            else:
-                self.config.select_first = False
+            self.config.select_first = (starts is not None)
 
 
 @config_layer('sub_nested_seq')

From 377401fb0cc7947d09b007a2c52cb679905cf2b5 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 23 Aug 2017 14:13:51 +0800
Subject: [PATCH 6/6] fix a bug.

---
 paddle/gserver/layers/SequenceSliceLayer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp
index 4da65ade0b423..5d72d373047fd 100644
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@@ -127,7 +127,8 @@ void SequenceSliceLayer::copySliceIdsToCpu() {
 
 void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
                                          const MatrixPtr ends) {
-  CHECK(starts && ends);
+  CHECK(starts || ends) << "At least one of the start or end indices "
+                        << "should be given.";
 
   outSeqStartPos_.resize(1, 0);
   outSubSeqStartPos_.resize(1, 0);
@@ -148,7 +149,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
         if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
 
         int seqLen = endPos - begPos + 1;
-        CHECK_LT(seqLen, 0U);
+        CHECK_GT(seqLen, 0U);
         for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
         inputSeqInfoVec_.size() > 1
             ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)