From 84627bb934ed6b4c7213eeebc0fe59e5fbe7a84b Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 7 Aug 2017 14:03:13 +0800 Subject: [PATCH 1/6] add config helper for sequence slice layer. --- doc/api/v2/config/layer.rst | 5 ++ python/paddle/trainer/config_parser.py | 45 +++++++++++ .../paddle/trainer_config_helpers/layers.py | 68 ++++++++++++++++ .../tests/configs/file_list.sh | 3 +- .../protostr/test_seq_slice_layer.protostr | 79 +++++++++++++++++++ .../tests/configs/test_seq_slice_layer.py | 13 +++ 6 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 372272a53c12c..232ea6b49b3a3 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -257,6 +257,11 @@ seq_concat .. autoclass:: paddle.v2.layer.seq_concat :noindex: +seq_slice +--------- +.. autoclass:: paddle.v2.layer.seq_slice + :noindex: + Reshaping Layers ================ diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 9ea69fc5e5763..11e54ba42039d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2657,6 +2657,51 @@ def __init__(self, name, inputs, bias=False, **xargs): self.create_bias_parameter(bias, size) +@config_layer('seq_slice') +class SeqSliceLayer(LayerBase): + def __init__(self, name, inputs, starts, ends, bias=False, **xargs): + if isinstance(inputs, list): + assert len(inputs) == 1, ('the first input of sequence slice layer ' + 'is a single sequence input.') + else: + inputs = [inputs] + + if starts is not None: + if isinstance(starts, list): + assert len(starts) == 1, ( + 'the start indices for sequence slice layer cannot ' + 'be a list having more than one element.') + starts = starts[0] + inputs.append(starts) + + if ends is not None: + if isinstance(ends, list): + assert len(ends) == 1, ( + 'the end indices for sequence slice layer cannot ' + 'be a list having more than one element.') + ends = ends[0] + inputs.append(ends) + assert len(inputs) >= 2, ( + 'the sequence slice layer has at least two inputs.') + + super(SeqSliceLayer, self).__init__( + name, 'seq_slice', 0, inputs=inputs, **xargs) + input_layer0 = self.get_input_layer(0) + size = input_layer0.size + self.set_layer_size(size) + + if len(inputs) == 3: + assert ( + self.get_input_layer(1).size == self.get_input_layer(2).size), ( + 'If start and end indices are both given to' + 'sequence slice layer, they should have the same width.') + elif len(inputs) == 2: + if starts is not None: + self.config.select_first = True + else: + self.config.select_first = False + + @config_layer('out_prod') class OuterProdLayer(LayerBase): def __init__(self, name, inputs, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index ea5fdcc50f6ab..15636b14429d1 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -131,6 +131,7 @@ 'crop_layer', 'clip_layer', 'slice_projection', + 'seq_slice_layer', ] @@ -225,6 +226,7 @@ class LayerType(object): PRELU = 'prelu' CROP_LAYER = 'crop' CLIP_LAYER = 'clip' + SEQ_SLICE = 'seq_slice' @staticmethod def is_layer_type(type_name): @@ -6119,3 +6121,69 @@ def clip_layer(input, min, max, name=None): max=max) return LayerOutput( name, LayerType.CLIP_LAYER, parents=[input], size=input.size) + + +@wrap_name_default() +def seq_slice_layer(input, starts, ends, name=None): + """ + seq_slice_layer will return one or several sub-sequences from the + input sequence layer given start and end indices. + + - If only start indices are given, and end indices are set to None, + this layer slices the input sequence from the given start indices + to its end. + - If only end indices are given, and start indices are set to None, + this layer slices the input sequence from its beginning to the + given end indices. + - If start and end indices are both given, they should have the same + number of elements. + + If start or end indices contains more than one elements, the input sequence + will be sliced for multiple times. + + + .. code-block:: python + + seq_silce = seq_slice_layer(input=input_seq, + starts=start_pos, ends=end_pos) + + :param name: name of this layer. + :type name: basestring + :param input: input for this layer, it should be a sequence. + :type input: LayerOutput + :param starts: start indices to slice the input sequence. + :type starts: LayerOutput|None + :param ends: end indices to slice the input sequence. + :type ends: LayerOutput|None + :return: LayerOutput object. + :rtype: LayerOutput + + """ + + assert isinstance(input, LayerOutput), ( + 'The first input of seq_slice layer must be a PaddlePaddle layer.') + + if starts is not None: + assert isinstance(starts, LayerOutput), ( + 'The start indices for seq_slice layer ' + 'must be a PaddlePaddle layer.') + if ends is not None: + assert isinstance(ends, LayerOutput), ( + 'The end indices for seq_slice layer must be a PaddlePaddle layer.') + assert starts is not None or ends is not None, ( + 'start and end indices ' + 'cannot be set to None at the same time, at least one of ' + 'them should be given.') + if starts is not None and ends is not None: + assert starts.size == ends.size, ( + 'If start and end indices are both given to seq_slice_layer, ' + 'they should have the same width.') + + Layer( + name=name, + type=LayerType.SEQ_SLICE, + inputs=input.name, + starts=starts.name if starts is not None else None, + ends=ends.name if ends is not None else None) + return LayerOutput( + name, LayerType.SEQ_SLICE, parents=[input], size=input.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 0ffa58bc1e208..1ce865ceace9e 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer) +test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer +test_seq_slice_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr new file mode 100644 index 0000000000000..5b73d614fe862 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr @@ -0,0 +1,79 @@ +type: "nn" +layers { + name: "word" + type: "data" + size: 128 + active_type: "" +} +layers { + name: "starts" + type: "data" + size: 5 + active_type: "" +} +layers { + name: "ends" + type: "data" + size: 5 + active_type: "" +} +layers { + name: "__seq_slice_layer_0__" + type: "seq_slice" + size: 128 + active_type: "" + inputs { + input_layer_name: "word" + } + inputs { + input_layer_name: "starts" + } + inputs { + input_layer_name: "ends" + } +} +layers { + name: "__seq_slice_layer_1__" + type: "seq_slice" + size: 128 + active_type: "" + inputs { + input_layer_name: "word" + } + inputs { + input_layer_name: "starts" + } + select_first: true +} +layers { + name: "__seq_slice_layer_2__" + type: "seq_slice" + size: 128 + active_type: "" + inputs { + input_layer_name: "word" + } + inputs { + input_layer_name: "ends" + } + select_first: false +} +input_layer_names: "word" +output_layer_names: "__seq_slice_layer_0__" +output_layer_names: "__seq_slice_layer_1__" +output_layer_names: "__seq_slice_layer_2__" +sub_models { + name: "root" + layer_names: "word" + layer_names: "starts" + layer_names: "ends" + layer_names: "__seq_slice_layer_0__" + layer_names: "__seq_slice_layer_1__" + layer_names: "__seq_slice_layer_2__" + input_layer_names: "word" + output_layer_names: "__seq_slice_layer_0__" + output_layer_names: "__seq_slice_layer_1__" + output_layer_names: "__seq_slice_layer_2__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py new file mode 100644 index 0000000000000..510ad3220893f --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +input_seq = data_layer("word", size=128) +starts = data_layer("starts", size=5) +ends = data_layer("ends", size=5) + +seq_slice1 = seq_slice_layer(input=input_seq, starts=starts, ends=ends) +seq_slice2 = seq_slice_layer(input=input_seq, starts=starts, ends=None) +seq_slice3 = seq_slice_layer(input=input_seq, starts=None, ends=ends) + +outputs(seq_slice1, seq_slice2, seq_slice3) From 2988a58ef01a56e84cff02463972e0150bc6ab13 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 8 Aug 2017 08:52:05 +0800 Subject: [PATCH 2/6] add unittest. --- paddle/gserver/tests/CMakeLists.txt | 6 + .../gserver/tests/test_SeqSliceLayerGrad.cpp | 214 ++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 paddle/gserver/tests/test_SeqSliceLayerGrad.cpp diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 4546d12a90308..9fdb148864e18 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -30,6 +30,12 @@ add_unittest_without_exec(test_CRFLayerGrad add_test(NAME test_CRFLayerGrad COMMAND test_CRFLayerGrad) +################ test_SeqSliceLayerGrad #################### +add_unittest_without_exec(test_SeqSliceLayerGrad + test_SeqSliceLayerGrad.cpp + LayerGradUtil.cpp) +add_test(NAME test_SeqSliceLayerGrad + COMMAND test_SeqSliceLayerGrad) add_unittest_without_exec(test_ActivationGrad test_ActivationGrad.cpp diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp new file mode 100644 index 0000000000000..e456dd5db7f45 --- /dev/null +++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp @@ -0,0 +1,214 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/trainer/Trainer.h" + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +DECLARE_int32(gpu_id); +DECLARE_bool(thread_local_rand_use_global_seed); + +const int MAX_SEQ_NUM = 5; +const int MAX_SEQ_LEN = 5; +const int MAX_BEAM_SIZE = 3; + +vector randSampling(real range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0.); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + sort(begin(num), end(num)); + return num; +} + +void genSeqInfo(vector& seqStartPos, vector& subSeqStartPos) { + seqStartPos.resize(1, 0); + subSeqStartPos.resize(1, 0); + + // srand((size_t)(time(NULL))); + srand(1); + int seqNum = 1 + (rand() % MAX_SEQ_NUM); + for (int i = 0; i < seqNum; ++i) { + int subSeqNum = 1 + (rand() % MAX_SEQ_NUM); + for (int j = 0; j < subSeqNum; ++j) + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + seqStartPos.push_back(subSeqStartPos.back()); + } +} + +/* + generate start indices according to sequence start positions. + */ +void genStarts(vector& seqStartPos, + vector>& starts, + size_t beamSize) { + starts.clear(); + starts.resize(seqStartPos.size() - 1, vector(beamSize, -1.)); + + for (size_t i = 0; i < seqStartPos.size() - 1; ++i) { + int seqLen = seqStartPos[i + 1] - seqStartPos[i]; + vector randStarts = + randSampling(seqLen, min(seqLen, static_cast(beamSize))); + copy(begin(randStarts), end(randStarts), begin(starts[i])); + } +} + +/* + generate end indices according to sequence start positions and start indices. + */ +void genEnds(vector& seqStartPos, + vector>& starts, + vector>& ends, + size_t beamSize) { + CHECK_EQ(seqStartPos.size() - 1, starts.size()); + ends.clear(); + ends.resize(seqStartPos.size() - 1, vector(beamSize, -1.)); + + for (size_t i = 0; i < starts.size(); ++i) { + for (size_t j = 0; j < starts[i].size(); ++j) { + int seqLen = seqStartPos[i + 1] - seqStartPos[i]; + CHECK_GE(seqLen - 1, starts[i][j]); + if (starts[i][j] == -1.) break; + if (starts[i][j] == (seqLen - 1)) { + ends[i][j] = starts[i][j]; + } else { + ends[i][j] = starts[i][j] + randSampling(seqLen - starts[i][j], 1)[0]; + } + } + } +} + +void genTestData(vector& seqStartPos, + vector& subSeqStartPos, + vector>& starts, + vector>& ends, + bool hasSubseq) { + size_t beamSize = MAX_BEAM_SIZE; + genSeqInfo(seqStartPos, subSeqStartPos); + + genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize); + genEnds(hasSubseq ? subSeqStartPos : seqStartPos, starts, ends, beamSize); +} + +template +void flatten2dVector(vector>& inVec, vector& outVec) { + size_t totalSize{0}; + for (auto const& items : inVec) totalSize += items.size(); + outVec.reserve(totalSize); + + for (auto& items : inVec) + move(items.begin(), items.end(), back_inserter(outVec)); +} + +void testSeqSliceLayer(bool hasSubseq, + bool useGpu, + vector& seqStartPos, + vector& subSeqStartPos, + vector>& starts, + vector>& ends) { + // layer size is not crutial for this layer, + // so here use a small layer size in the unittest. + const size_t layerSize{4}; + TestConfig config; + config.layerConfig.set_type("seq_slice"); + config.layerConfig.set_size(layerSize); + + // add the first input + MatrixPtr seqInputPtr = + Matrix::create(hasSubseq ? subSeqStartPos.back() : seqStartPos.back(), + layerSize, + false, + false); + seqInputPtr->randomizeUniform(); + + if (hasSubseq) { + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + "seq_input", + seqInputPtr, + seqStartPos, + subSeqStartPos}); + } else { + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "seq_input", seqInputPtr, seqStartPos}); + } + config.layerConfig.add_inputs(); + + // add start indices + if (starts.size()) { + vector startsToVec; + flatten2dVector(starts, startsToVec); + + MatrixPtr startMatrixPtr = + Matrix::create(starts.size(), starts[0].size(), false, false); + startMatrixPtr->copyFrom(startsToVec.data(), startsToVec.size()); + + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr}); + config.layerConfig.add_inputs(); + } + + // add end indices + if (ends.size()) { + vector endsToVec; + flatten2dVector(ends, endsToVec); + MatrixPtr endMatrixPtr = + Matrix::create(ends.size(), ends[0].size(), false, false); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr}); + config.layerConfig.add_inputs(); + } + + testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false); +} + +TEST(Layer, SeqSliceLayer) { + vector seqStartPos; + vector subSeqStartPos; + vector> starts; + vector> ends; + + genSeqInfo(seqStartPos, subSeqStartPos); + for (bool hasSubseq : {false, true}) { + genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq); + for (bool useGpu : {false, true}) { + vector> tmp; + testSeqSliceLayer( + hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends); + testSeqSliceLayer( + hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, tmp); + testSeqSliceLayer( + hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, ends); + } + } +} + +int main(int argc, char** argv) { + initMain(argc, argv); + hl_start(); + hl_init(FLAGS_gpu_id); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 26bc5b12596c945956f7a6b003712805e579a36d Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 8 Aug 2017 18:48:11 +0800 Subject: [PATCH 3/6] add implementations. --- paddle/gserver/layers/KmaxSeqScoreLayer.cpp | 5 + paddle/gserver/layers/SequenceSliceLayer.cpp | 228 ++++++++++++++++++ .../gserver/layers/SubNestedSequenceLayer.cpp | 16 +- .../gserver/tests/test_SeqSliceLayerGrad.cpp | 25 +- paddle/parameter/Argument.cpp | 27 ++- 5 files changed, 278 insertions(+), 23 deletions(-) create mode 100644 paddle/gserver/layers/SequenceSliceLayer.cpp diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp index 8ce591d476246..e96fd61fc1e96 100644 --- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -97,6 +97,11 @@ void KmaxSeqScoreLayer::forward(PassType passType) { scores_ = inputScore; } + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data may + // occur. The selected indices should be stored in + // CpuSparseMatrix with SparseValueType set to NO_VALUE. Matrix::resizeOrCreate( output_.value, input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp new file mode 100644 index 0000000000000..410aba663e005 --- /dev/null +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -0,0 +1,228 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +class SequenceSliceLayer : public Layer { +public: + explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +private: + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data + // may occur. The selected indices should be stored in CpuSparseMatrix + // with SparseValueType set to NO_VALUE. + MatrixPtr startIdsOnCpu_; + MatrixPtr endIdsOnCpu_; + + std::vector selectedRows_; + IVectorPtr rowIndice_; + std::vector> inputSeqInfoVec_; + std::vector outSubSeqStartPos_; + std::vector outSeqStartPos_; + + void checkInputs(); + void copySliceIdsToCpu(); + void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends); +}; + +REGISTER_LAYER(seq_slice, SequenceSliceLayer); + +bool SequenceSliceLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + CHECK_GE(inputLayers_.size(), 2U); + CHECK_LE(inputLayers_.size(), 3U); + + setNeedSequenceInfo(false); + return true; +} + +void SequenceSliceLayer::checkInputs() { + const Argument& inputSeq = getInput(0); + CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer " + << "must be a sequence."; + // Check inputs + const MatrixPtr indices1 = getInputValue(1); + CHECK_EQ(indices1->getHeight(), + inputSeq.hasSubseq() ? inputSeq.getNumSubSequences() + : inputSeq.getNumSequences()) + << "Height of the second input should be equal to number of sequence " + << "in the first input."; + if (inputLayers_.size() == 3) { + const MatrixPtr indices2 = getInputValue(2); + CHECK_EQ(indices2->getHeight(), indices1->getHeight()) + << "start indices and end indices should have the same height."; + CHECK_EQ(indices2->getWidth(), indices1->getWidth()) + << "start indices and end indices should have the same Width."; + } +} + +void SequenceSliceLayer::copySliceIdsToCpu() { + if (!useGpu_) { + if (inputLayers_.size() == 2U) { + if (config_.select_first()) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = nullptr; + } else { + startIdsOnCpu_ = nullptr; + endIdsOnCpu_ = getInputValue(1); + } + } else if (inputLayers_.size() == 3U) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = getInputValue(2); + } + return; + } + + const MatrixPtr indices1 = getInputValue(1); + if (inputLayers_.size() == 2U) { + if (config_.select_first()) { + Matrix::resizeOrCreate(startIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + startIdsOnCpu_->copyFrom(*indices1); + endIdsOnCpu_ = nullptr; + } else { + Matrix::resizeOrCreate(endIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + endIdsOnCpu_->copyFrom(*indices1); + startIdsOnCpu_ = nullptr; + } + } else if (inputLayers_.size() == 3U) { + Matrix::resizeOrCreate(startIdsOnCpu_, + indices1->getHeight(), + indices1->getWidth(), + false /* trans */, + false /* useGpu */); + startIdsOnCpu_->copyFrom(*indices1); + + const MatrixPtr indices2 = getInputValue(2); + Matrix::resizeOrCreate(endIdsOnCpu_, + indices2->getHeight(), + indices2->getWidth(), + false /* trans */, + false /* useGpu */); + endIdsOnCpu_->copyFrom(*indices2); + } +} + +void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, + const MatrixPtr ends) { + outSeqStartPos_.resize(1, 0); + outSubSeqStartPos_.resize(1, 0); + selectedRows_.clear(); + + size_t beamSize = starts ? starts->getWidth() : ends->getWidth(); + // iterate over sequence + size_t rowIdx = 0; + for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) { + // iterate over sub-sequence in a sequence + for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) { + // iterate over each index for slicing. + for (size_t k = 0; k < beamSize; ++k) { + if (starts) { + if (starts->getElement(rowIdx, k) == -1.) break; + } else if (ends->getElement(rowIdx, k) == -1.) + break; + + int begPos = inputSeqInfoVec_[i][j]; + if (starts) begPos += starts->getElement(rowIdx, k); + + int endPos = inputSeqInfoVec_[i][j + 1] - 1; + if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); + + int seqLen = endPos - begPos + 1; + CHECK(seqLen); + for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); + inputSeqInfoVec_.size() > 1 + ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) + : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen); + } + rowIdx++; + } + if (inputSeqInfoVec_.size() > 1) + outSeqStartPos_.push_back(outSubSeqStartPos_.back()); + } + + if (useGpu_) { + rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); + rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); + } else { + rowIndice_ = + IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); + } + + // create the sequence information for the output. + ICpuGpuVector::resizeOrCreate( + output_.sequenceStartPositions, outSeqStartPos_.size(), false); + output_.sequenceStartPositions->copyFrom( + outSeqStartPos_.data(), outSeqStartPos_.size(), false); + + if (inputSeqInfoVec_.size() > 1) { + ICpuGpuVector::resizeOrCreate( + output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false); + output_.subSequenceStartPositions->copyFrom( + outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false); + } +} + +void SequenceSliceLayer::forward(PassType passType) { + Layer::forward(passType); + checkInputs(); + + const Argument& inputSeq = getInput(0); + inputSeqInfoVec_.clear(); + Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, + inputSeq.subSequenceStartPositions, + inputSeqInfoVec_); + copySliceIdsToCpu(); + + // calculate the selected row indices in a batch, + // and build the output sequence information. + calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr, + endIdsOnCpu_ ? endIdsOnCpu_ : nullptr); + + resetOutput(selectedRows_.size(), getSize()); + + getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); +} + +void SequenceSliceLayer::backward(const UpdateCallback& callback) { + MatrixPtr inputSeqGrad = getInputGrad(0); + MatrixPtr outputGrad = getOutputGrad(); + + outputGrad->addToRows(*inputSeqGrad, *rowIndice_); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index 76f587fff760d..0db03002702ca 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -52,11 +52,10 @@ class SubNestedSequenceLayer : public Layer { * ] * * ths output is saved to private member rowIndice_; - * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, - * 16,17,18,19,20,21,22,23,24,25,26,27] + * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27] */ - void calSelectedCols(const MatrixPtr selectedIndices, + void calSelectedRows(const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo); // if the second input of this layer is on GPU memory, copy it to CPU memory. @@ -67,7 +66,7 @@ class SubNestedSequenceLayer : public Layer { std::vector> inputSeqInfoVec_; // the final selected row indices in a batch, - // rowIdx_ and selectedRows_ actually share a same memory. + // rowIndice_ and selectedRows_ actually share a same memory. IVectorPtr rowIndice_; std::vector selectedRows_; }; @@ -83,7 +82,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap, return true; } -void SubNestedSequenceLayer::calSelectedCols( +void SubNestedSequenceLayer::calSelectedRows( const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo) { selectedRows_.clear(); @@ -96,6 +95,11 @@ void SubNestedSequenceLayer::calSelectedCols( for (size_t i = 0; i < seqNum; ++i) { for (size_t j = 0; j < beamSize; ++j) { if (selectedIndices->getElement(i, j) == -1.) break; + // TODO(caoying) + // Here selSubSeqIdx is automatically converted from real to int + // This is very dangerous if user fill this matrix himself, invalid data + // may occur. The selected indices should be stored in + // CpuSparseMatrix with SparseValueType set to NO_VALUE. int selSubSeqIdx = selectedIndices->getElement(i, j); CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); @@ -160,7 +164,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, inputSeq.subSequenceStartPositions, inputSeqInfoVec_); - calSelectedCols(selIdsCpu_, inputSeqInfoVec_); + calSelectedRows(selIdsCpu_, inputSeqInfoVec_); resetOutput(selectedRows_.size(), getSize()); getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp index e456dd5db7f45..d560ca650bc5b 100644 --- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp +++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp @@ -26,9 +26,9 @@ using namespace std; // NOLINT DECLARE_int32(gpu_id); DECLARE_bool(thread_local_rand_use_global_seed); -const int MAX_SEQ_NUM = 5; -const int MAX_SEQ_LEN = 5; -const int MAX_BEAM_SIZE = 3; +const int MAX_SEQ_NUM = 17; +const int MAX_SEQ_LEN = 23; +const int MAX_BEAM_SIZE = 13; vector randSampling(real range, int n) { CHECK_GE(range, n); @@ -46,8 +46,7 @@ void genSeqInfo(vector& seqStartPos, vector& subSeqStartPos) { seqStartPos.resize(1, 0); subSeqStartPos.resize(1, 0); - // srand((size_t)(time(NULL))); - srand(1); + srand((size_t)(time(NULL))); int seqNum = 1 + (rand() % MAX_SEQ_NUM); for (int i = 0; i < seqNum; ++i) { int subSeqNum = 1 + (rand() % MAX_SEQ_NUM); @@ -105,7 +104,7 @@ void genTestData(vector& seqStartPos, vector>& starts, vector>& ends, bool hasSubseq) { - size_t beamSize = MAX_BEAM_SIZE; + size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE); genSeqInfo(seqStartPos, subSeqStartPos); genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize); @@ -167,16 +166,21 @@ void testSeqSliceLayer(bool hasSubseq, config.inputDefs.push_back( {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr}); config.layerConfig.add_inputs(); + config.layerConfig.set_select_first(true); } // add end indices if (ends.size()) { vector endsToVec; flatten2dVector(ends, endsToVec); + MatrixPtr endMatrixPtr = Matrix::create(ends.size(), ends[0].size(), false, false); + endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size()); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr}); config.layerConfig.add_inputs(); + config.layerConfig.set_select_first(false); } testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false); @@ -188,10 +192,15 @@ TEST(Layer, SeqSliceLayer) { vector> starts; vector> ends; + std::vector mode = {false}; +#ifndef PADDLE_ONLY_CPU + mode.push_back(true); +#endif genSeqInfo(seqStartPos, subSeqStartPos); - for (bool hasSubseq : {false, true}) { + for (bool hasSubseq : {true, false}) { + LOG(INFO) << "hasSubSeq : " << hasSubseq; genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq); - for (bool useGpu : {false, true}) { + for (bool useGpu : mode) { vector> tmp; testSeqSliceLayer( hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends); diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 0547ac93cd183..06f7e5245fbc3 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -670,19 +670,28 @@ void Argument::reorganizeSeqInfo( const ICpuGpuVectorPtr seqStartPos, const ICpuGpuVectorPtr subSeqStartPos, std::vector>& reorganizedSeqInfo) { - int* seqStarts = seqStartPos->getMutableData(false); - int* subSeqStarts = subSeqStartPos->getMutableData(false); + CHECK(seqStartPos); int seqNum = seqStartPos->getSize() - 1; - reorganizedSeqInfo.resize(seqNum, std::vector()); - int seqIdx = 0; - for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { - reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); - if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { - seqIdx++; - if (seqIdx == seqNum) return; + int* seqStarts = seqStartPos->getMutableData(false); + + if (subSeqStartPos) { + int* subSeqStarts = subSeqStartPos->getMutableData(false); + reorganizedSeqInfo.resize(seqNum, std::vector()); + int seqIdx = 0; + for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { + seqIdx++; + if (seqIdx == seqNum) return; + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + } } + } else { + reorganizedSeqInfo.resize(1, std::vector(seqNum + 1, 0)); + memcpy(reorganizedSeqInfo[0].data(), + seqStarts, + sizeof(int) * seqStartPos->getSize()); } } From b97f020f9c34da04e093deb4691f6286f4017e62 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 10 Aug 2017 10:37:07 +0800 Subject: [PATCH 4/6] fix unittest error. --- paddle/gserver/layers/SequenceSliceLayer.cpp | 3 +-- python/paddle/trainer_config_helpers/layers.py | 1 + .../protostr/test_kmax_seq_socre_layer.protostr | 17 +++++------------ .../tests/configs/test_kmax_seq_socre_layer.py | 4 +--- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 424f898553993..165ee6311a62d 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -70,9 +70,8 @@ void SequenceSliceLayer::checkInputs() { const Argument& inputSeq = getInput(0); CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer " << "must be a sequence."; - // Check inputs const MatrixPtr indices1 = getInputValue(1); - CHECK_EQ(indices1->getHeight(), + CHECK_EQ(static_cast(indices1->getHeight()), inputSeq.hasSubseq() ? inputSeq.getNumSubSequences() : inputSeq.getNumSequences()) << "Height of the second input should be equal to number of sequence " diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index e51332da0d06d..79d24cfe5bd54 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6242,6 +6242,7 @@ def seq_slice_layer(input, starts, ends, name=None): name, LayerType.SEQ_SLICE, parents=[input], size=input.size) +@wrap_name_default() @layer_support() def kmax_sequence_score_layer(input, name=None, beam_size=1): """ diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr index 81bd71f68eb3f..3d32220bfbf5f 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr @@ -1,12 +1,6 @@ type: "nn" layers { - name: "input" - type: "data" - size: 300 - active_type: "" -} -layers { - name: "data" + name: "input_seq" type: "data" size: 128 active_type: "" @@ -17,7 +11,7 @@ layers { size: 1 active_type: "exponential" inputs { - input_layer_name: "data" + input_layer_name: "input_seq" input_parameter_name: "___fc_layer_0__.w0" } bias_parameter_name: "___fc_layer_0__.wbias" @@ -51,15 +45,14 @@ parameters { initial_strategy: 0 initial_smart: false } -input_layer_names: "data" +input_layer_names: "input_seq" output_layer_names: "__kmax_sequence_score_layer_0__" sub_models { name: "root" - layer_names: "input" - layer_names: "data" + layer_names: "input_seq" layer_names: "__fc_layer_0__" layer_names: "__kmax_sequence_score_layer_0__" - input_layer_names: "data" + input_layer_names: "input_seq" output_layer_names: "__kmax_sequence_score_layer_0__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py index d245c5a41c793..48d0cd55da248 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py @@ -2,9 +2,7 @@ #coding=utf-8 from paddle.trainer_config_helpers import * -data = data_layer(name='input', size=300) - -data = data_layer(name="data", size=128) +data = data_layer(name="input_seq", size=128) scores = fc_layer(input=data, size=1, act=ExpActivation()) kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5) From 82e4fab4e31d730d2d9d4df7e223881e9db693a9 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 23 Aug 2017 14:07:53 +0800 Subject: [PATCH 5/6] follow comments. --- paddle/gserver/layers/KmaxSeqScoreLayer.cpp | 26 ++++---- paddle/gserver/layers/SequenceSliceLayer.cpp | 63 ++++++++----------- .../gserver/layers/SubNestedSequenceLayer.cpp | 29 +++++---- python/paddle/trainer/config_parser.py | 5 +- 4 files changed, 58 insertions(+), 65 deletions(-) diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp index 3b5060e3ce980..d5407555b248d 100644 --- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) { << "input of " << getName() << " must be a sequence or a nested sequence."; CHECK_EQ(input.value->getWidth(), 1UL) - << "input of " << getName() - << " is score over a sequence or a nested sequence, so its width " - << " must be 1."; + << "input of " << getName() << " are scores over a sequence or " + << "a nested sequence, so its width must be 1."; if (useGpu_) { - // this Layer runs only in CPU, if the model is runing on GPU, - // then copy the input to this layer from GPU to CPU. + /* + * currently, this Layer only runs in CPU, if the other part of the model is + * runing on GPU, then copy the input to this layer from GPU to CPU. + */ Matrix::resizeOrCreate(scores_, inputScore->getHeight(), 1, @@ -97,13 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) { scores_ = inputScore; } - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but output of this layer which is some selected indices of the give + * sequence are actually filled with int types so that storing int types + * information in a real number matrix is dangerous, since real numbers will + * be convered to int types. + */ Matrix::resizeOrCreate( output_.value, input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 165ee6311a62d..4da65ade0b423 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -31,13 +31,15 @@ class SequenceSliceLayer : public Layer { void backward(const UpdateCallback& callback = nullptr) override; private: - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but the second and the (optional) third input which are some + * selected indices of the give sequence to trim the sequence, are actually + * filled with int types so that storing int types information in real number + * matrices is very dangerous, since real numbers will be convered to int + * types. If a user fills this matrix himself, invalid data may occor. + */ MatrixPtr startIdsOnCpu_; MatrixPtr endIdsOnCpu_; @@ -68,7 +70,7 @@ bool SequenceSliceLayer::init(const LayerMap& layerMap, void SequenceSliceLayer::checkInputs() { const Argument& inputSeq = getInput(0); - CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer " + CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer " << "must be a sequence."; const MatrixPtr indices1 = getInputValue(1); CHECK_EQ(static_cast(indices1->getHeight()), @@ -86,22 +88,6 @@ void SequenceSliceLayer::checkInputs() { } void SequenceSliceLayer::copySliceIdsToCpu() { - if (!useGpu_) { - if (inputLayers_.size() == 2U) { - if (config_.select_first()) { - startIdsOnCpu_ = getInputValue(1); - endIdsOnCpu_ = nullptr; - } else { - startIdsOnCpu_ = nullptr; - endIdsOnCpu_ = getInputValue(1); - } - } else if (inputLayers_.size() == 3U) { - startIdsOnCpu_ = getInputValue(1); - endIdsOnCpu_ = getInputValue(2); - } - return; - } - const MatrixPtr indices1 = getInputValue(1); if (inputLayers_.size() == 2U) { if (config_.select_first()) { @@ -141,22 +127,19 @@ void SequenceSliceLayer::copySliceIdsToCpu() { void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, const MatrixPtr ends) { + CHECK(starts && ends); + outSeqStartPos_.resize(1, 0); outSubSeqStartPos_.resize(1, 0); selectedRows_.clear(); size_t beamSize = starts ? starts->getWidth() : ends->getWidth(); - // iterate over sequence size_t rowIdx = 0; for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) { - // iterate over sub-sequence in a sequence for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) { - // iterate over each index for slicing. for (size_t k = 0; k < beamSize; ++k) { - if (starts) { - if (starts->getElement(rowIdx, k) == -1.) break; - } else if (ends->getElement(rowIdx, k) == -1.) - break; + if (starts && starts->getElement(rowIdx, k) == -1.) break; + if (ends && ends->getElement(rowIdx, k) == -1.) break; int begPos = inputSeqInfoVec_[i][j]; if (starts) begPos += starts->getElement(rowIdx, k); @@ -165,7 +148,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); int seqLen = endPos - begPos + 1; - CHECK(seqLen); + CHECK_LT(seqLen, 0U); for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); inputSeqInfoVec_.size() > 1 ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) @@ -208,7 +191,16 @@ void SequenceSliceLayer::forward(PassType passType) { Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, inputSeq.subSequenceStartPositions, inputSeqInfoVec_); - copySliceIdsToCpu(); + if (!useGpu_) { + if (inputLayers_.size() == 2U) { + startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr; + endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1); + } else if (inputLayers_.size() == 3U) { + startIdsOnCpu_ = getInputValue(1); + endIdsOnCpu_ = getInputValue(2); + } + } else + copySliceIdsToCpu(); // calculate the selected row indices in a batch, // and build the output sequence information. @@ -221,10 +213,7 @@ void SequenceSliceLayer::forward(PassType passType) { } void SequenceSliceLayer::backward(const UpdateCallback& callback) { - MatrixPtr inputSeqGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - - outputGrad->addToRows(*inputSeqGrad, *rowIndice_); + getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_); } } // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index c8607d50f5878..e9bee77212065 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -58,23 +58,28 @@ class SubNestedSequenceLayer : public Layer { void calSelectedRows(const MatrixPtr selectedIndices, const std::vector>& inputSeqInfo); - // if the second input of this layer is on GPU memory, copy it to CPU memory. - // TODO(caoying) - // In PaddlePaddle, the currently available matrixes all a have real-typed - // data field, but the selected indices information are actually int-typed - // (with -1 as a special token). Storing indices information in real-typed - // Matrix leads to converting real to int. This is very dangerous if a user - // fills this matrix himself, invalid data may occur. - // The selected indices should be stored in an int-typed matrix. + /* + * TODO(caoying) + * In PaddePaddle, currently all matrices are real number types, + * but the second is some selected indices of the give sequence to trim + * the nested sequence, are actually filled with int types so that storing + * int types information in real number matrices is very dangerous, since + * real numbers will be convered to int types. If a user fills this matrix + * himself, invalid data may occor. + * + * if the second input of this layer is on GPU memory, copy it to CPU memory. + */ MatrixPtr selIdsCpu_; - // reorganized sequenceStartPositions and subSequenceStartPositions - // into a 2d vector to facilitate the sequence selection process. + /* + * reorganize sequenceStartPositions and subSequenceStartPositions + * into a 2d vector to facilitate the sequence selection process. + */ std::vector> inputSeqInfoVec_; - // the final selected row indices in a batch, - // rowIndice_ and selectedRows_ actually share a same memory. + /* store the final selected row indices in a batch */ IVectorPtr rowIndice_; + /* rowIndice_ and selectedRows_ actually share a same memory. */ std::vector selectedRows_; }; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index af14007de603c..2fcccc69482cf 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2717,10 +2717,7 @@ def __init__(self, name, inputs, starts, ends, bias=False, **xargs): 'If start and end indices are both given to' 'sequence slice layer, they should have the same width.') elif len(inputs) == 2: - if starts is not None: - self.config.select_first = True - else: - self.config.select_first = False + self.config.select_first = (starts is not None) @config_layer('sub_nested_seq') From 377401fb0cc7947d09b007a2c52cb679905cf2b5 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 23 Aug 2017 14:13:51 +0800 Subject: [PATCH 6/6] fix a bug. --- paddle/gserver/layers/SequenceSliceLayer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 4da65ade0b423..5d72d373047fd 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -127,7 +127,8 @@ void SequenceSliceLayer::copySliceIdsToCpu() { void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, const MatrixPtr ends) { - CHECK(starts && ends); + CHECK(starts || ends) << "At least one of the start or end indices " + << "should be given."; outSeqStartPos_.resize(1, 0); outSubSeqStartPos_.resize(1, 0); @@ -148,7 +149,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); int seqLen = endPos - begPos + 1; - CHECK_LT(seqLen, 0U); + CHECK_GT(seqLen, 0U); for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); inputSeqInfoVec_.size() > 1 ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)