From 9298a9ec0d291aa919d59f57ce6a8562d781bd85 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 24 Mar 2017 14:35:39 +0800 Subject: [PATCH 1/5] stride pooling for seqlastin and seqfirstin --- .../layers/SequenceLastInstanceLayer.cpp | 23 ++++---- paddle/gserver/layers/SequencePoolLayer.cpp | 11 +++- paddle/gserver/layers/SequencePoolLayer.h | 7 +++ paddle/gserver/tests/test_LayerGrad.cpp | 16 ++++-- paddle/parameter/Argument.cpp | 40 ++++++++++++++ paddle/parameter/Argument.h | 8 +++ paddle/parameter/tests/CMakeLists.txt | 1 + paddle/parameter/tests/test_argument.cpp | 52 +++++++++++++++++++ proto/ModelConfig.proto | 5 ++ python/paddle/trainer/config_parser.py | 18 +++++-- .../paddle/trainer_config_helpers/layers.py | 21 ++++++++ .../tests/configs/last_first_seq.py | 3 ++ .../configs/protostr/last_first_seq.protostr | 33 ++++++++++++ .../configs/protostr/shared_gru.protostr | 2 + .../configs/protostr/shared_lstm.protostr | 2 + .../protostr/simple_rnn_layers.protostr | 6 +++ .../configs/protostr/test_rnn_group.protostr | 6 +++ 17 files changed, 235 insertions(+), 19 deletions(-) create mode 100644 paddle/parameter/tests/test_argument.cpp diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 7a13cd7ad0fec..7ac087663a6c6 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -25,6 +25,9 @@ namespace paddle { * Input: a sequence * If SequenceLevel = kNonseq: * Output: a sequence containing only the last instance of the input sequence + * If stride_ > 0: + * Output: a shorten sequence containing several last instances of the + * input sequence with stride window. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: a sequence containing only the last instance of each sub-sequence @@ -37,6 +40,8 @@ class SequenceLastInstanceLayer : public SequencePoolLayer { protected: MatrixPtr tmpSrc_; MatrixPtr tmpDest_; + bool select_first_; + std::vector insId_; public: explicit SequenceLastInstanceLayer(const LayerConfig& config) @@ -54,6 +59,7 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer); bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { SequencePoolLayer::init(layerMap, parameterMap); + select_first_ = config_.select_first(); tmpSrc_ = Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); @@ -74,9 +80,13 @@ void SequenceLastInstanceLayer::forward(PassType passType) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str()); + insId_.clear(); for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - int insId = - config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1; + int insId = (stride_ > 0) + ? (select_first_ ? stridePositions_[seqId] + : stridePositions_[seqId + 1] - 1) + : (select_first_ ? starts[seqId] : starts[seqId + 1] - 1); + insId_.push_back(insId); outputValue->subMatrix(seqId, 1, tmpDest_) ->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_))); @@ -96,18 +106,13 @@ void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) { MatrixPtr inputGrad = getInputGrad(0); MatrixPtr outputGrad = getOutputGrad(); - const int* starts = startPositions_->getData(false); - size_t numSequences = startPositions_->getSize() - 1; if (inputGrad) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceLastInstanceLayerBackward", getName().c_str()); - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - int insId = - config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1; - - inputGrad->subMatrix(insId, 1, tmpDest_) + for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { + inputGrad->subMatrix(insId_[seqId], 1, tmpDest_) ->add(*(outputGrad->subMatrix(seqId, 1, tmpSrc_))); } } diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index 5807c4249620d..2bf180a043b98 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -37,6 +37,7 @@ bool SequencePoolLayer::init(const LayerMap& layerMap, } else { LOG(FATAL) << "Unknown trans_type: " << config_.trans_type(); } + stride_ = config_.seq_pool_stride(); setNeedSequenceInfo(false); return true; } @@ -55,8 +56,6 @@ void SequencePoolLayer::forward(PassType passType) { CHECK_EQ(starts->getData()[newBatchSize_], input.getBatchSize()); CHECK_EQ(newBatchSize_, starts->getSize() - 1); - resetOutput(newBatchSize_, dim); - /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq, * thus, in this case, output_ has no sequenceStartPositions. * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this @@ -67,6 +66,14 @@ void SequencePoolLayer::forward(PassType passType) { << "when trans_type = seq, input must hasSubseq"; output_.degradeSequence(input); } + if (stride_ > 0) { + CHECK_EQ(input.hasSubseq(), 0UL) + << "sequence stride pooling is not suitable for hasSubseq now"; + output_.poolSequenceWithStride(input, stride_, &stridePositions_); + newBatchSize_ = stridePositions_.size() - 1; + } + + resetOutput(newBatchSize_, dim); } void SequencePoolLayer::backward(const UpdateCallback& callback) { diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 85b51ccd1dc7e..5ca1c1a82b913 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -26,6 +26,10 @@ namespace paddle { * Output: output size is the number of input sequences (NOT input instances) * output[i] = seqlastin/average/max_{for each instance in this * sequence}{input[i]} + * If stride_ > 0: + * Check input sequence must don't have sub-sequence + * Output: a shorten sequence, pooling is performed upon a small local + * area * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences @@ -42,6 +46,9 @@ class SequencePoolLayer : public Layer { enum SequenceLevel { kNonSeq = 0, kSeq = 1 }; size_t newBatchSize_; ICpuGpuVectorPtr startPositions_; + int stride_; + // store the start position of each stride window + std::vector stridePositions_; public: explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 5f8a7b79a06e0..ce83531416f07 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -804,10 +804,14 @@ TEST(Layer, ExpandLayer) { testExpandLayer("seq", true); // seq expand to hasSubseq } -void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) { +void testDegradeLayer(bool hasSubseq, + string layer_type, + string trans_type, + int stride = -1) { TestConfig config; config.layerConfig.set_type(layer_type); config.layerConfig.set_size(10); + config.layerConfig.set_seq_pool_stride(stride); config.biasSize = 0; config.inputDefs.push_back( @@ -827,12 +831,14 @@ void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) { if (layer_type == "average") { for (auto strategy : {"average", "sum", "squarerootn"}) { LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type - << " average_strategy=" << strategy; + << " average_strategy=" << strategy + << " seq_pool_stride=" << stride; config.layerConfig.set_average_strategy(strategy); testDegradeLayerGrad(config, layer_type); } } else { - LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type; + LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type + << " seq_pool_stride=" << stride; testDegradeLayerGrad(config, layer_type); } } @@ -847,6 +853,10 @@ TEST(Layer, SequenceLastInstanceLayer) { testDegradeLayer(false, "seqlastins", "non-seq"); // seq seqlastins to non-seq + testDegradeLayer(false, + "seqlastins", + "non-seq", + 5); // seq seqlastins to a shorten seq, stride window = 5 testDegradeLayer(true, "seqlastins", "non-seq"); // hasSubseq seqlastins to non-seq diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 4139f59a2c8e6..2657c00ebb3c7 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -559,6 +559,46 @@ void Argument::degradeSequence(const Argument& input) { tgtBuf[numSequences] = numSubSequences; } +void Argument::poolSequenceWithStride(const Argument& input, + size_t stride, + std::vector* stridePostions) { + /* + * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, + * then sequenceStartPositions = [0, 2, 3, 4, 7], + * and stridePostions = [0, 5, 9, 14, 17, 22, 27, 30] + */ + CHECK(input.sequenceStartPositions); + CHECK_EQ(input.hasSubseq(), 0UL); + CHECK_GT(stride, 0) << "stride must larger than 0"; + size_t numSequences = input.getNumSequences(); + ICpuGpuVector::resizeOrCreate( + sequenceStartPositions, numSequences + 1, false); + const int* starts = input.sequenceStartPositions->getData(false); + int* tgtBuf = sequenceStartPositions->getMutableData(false); + // first index of target sequence and stride positions are both 0 + tgtBuf[0] = 0; + (*stridePostions).clear(); + for (size_t seqId = 0; seqId < numSequences; ++seqId) { + size_t seqLength = starts[seqId + 1] - starts[seqId]; + (*stridePostions).emplace_back(starts[seqId]); + if (seqLength == 0) { + // empty sequence + tgtBuf[seqId + 1] = tgtBuf[seqId]; + } else if (seqLength < stride) { + tgtBuf[seqId + 1] = tgtBuf[seqId] + 1; + } else { + tgtBuf[seqId + 1] = tgtBuf[seqId] + ceil((float)seqLength / stride); + int size = + (seqLength % stride) ? seqLength / stride : seqLength / stride - 1; + for (int i = 0; i < size; i++) { + (*stridePostions).emplace_back((*stridePostions).back() + stride); + } + } + } + (*stridePostions).emplace_back(starts[numSequences]); + CHECK_EQ((*stridePostions).size() - 1, tgtBuf[numSequences]); +} + void Argument::getValueString( std::unordered_map* out) const { if (value) { diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 9fd84bc4b7e0a..760029c2fe6ba 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -291,6 +291,14 @@ struct Argument { */ void degradeSequence(const Argument& input); + /* + After pooling with stride n (n is smaller than sequence length), + a long sequence will be shorten. + This function is not suitable for sequence with sub-sequence now. + */ + void poolSequenceWithStride(const Argument& input, + size_t stride, + std::vector* stridePositions); /** * @brief getValueString will return the argument's output in string. There * are several kinds of output. The keys of output dictionary are 'value', diff --git a/paddle/parameter/tests/CMakeLists.txt b/paddle/parameter/tests/CMakeLists.txt index cab264db8e500..181ccdc1f099e 100644 --- a/paddle/parameter/tests/CMakeLists.txt +++ b/paddle/parameter/tests/CMakeLists.txt @@ -1 +1,2 @@ add_simple_unittest(test_common) +add_simple_unittest(test_argument) diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp new file mode 100644 index 0000000000000..ba17e8a298e80 --- /dev/null +++ b/paddle/parameter/tests/test_argument.cpp @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +using namespace paddle; // NOLINT + +TEST(Argument, poolSequenceWithStride) { + Argument input, output; + ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false); + int* inStart = input.sequenceStartPositions->getMutableData(false); + inStart[0] = 0; + inStart[1] = 9; + inStart[2] = 14; + inStart[3] = 17; + inStart[4] = 30; + + std::vector stridePositions; + stridePositions.clear(); + output.poolSequenceWithStride(input, 5 /* stride */, &stridePositions); + + const int* outStart = output.sequenceStartPositions->getData(false); + CHECK_EQ(outStart[0], 0); + CHECK_EQ(outStart[1], 2); + CHECK_EQ(outStart[2], 3); + CHECK_EQ(outStart[3], 4); + CHECK_EQ(outStart[4], 7); + + CHECK_EQ(stridePositions.size(), 8); + int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; + for (int i = 0; i < 8; i++) { + CHECK_EQ(stridePositions[i], strideResult[i]); + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 65d5d50277b66..4f9b53d6f6553 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -441,6 +441,11 @@ message LayerConfig { // blank label used in ctc loss optional uint32 blank = 52 [default = 0]; + + // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which + // controls the scope of pooling operation. can be set > 0. + // leave empty or set to -1 to disable this stride pooling. + optional int32 seq_pool_stride = 53 [default = -1]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 1394773b4ff12..bfe71501758e3 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2480,6 +2480,7 @@ def __init__(self, active_type='linear', trans_type='non-seq', bias=False, + stride=-1, **xargs): super(SequenceLastInstanceLayer, self).__init__( name, @@ -2490,10 +2491,11 @@ def __init__(self, **xargs) config_assert( len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') + if trans_type == 'seq': + config_assert(stride == -1, 'subseq do not support stride window') self.config.trans_type = trans_type - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) + self.config.seq_pool_stride = stride + self.set_layer_size(self.get_input_layer(0).size) self.create_bias_parameter(bias, self.config.size) @@ -2505,10 +2507,16 @@ def __init__(self, active_type='linear', trans_type='non-seq', bias=False, + stride=-1, **xargs): super(SequenceFirstInstanceLayer, self).__init__( - name, inputs=inputs, active_type=active_type, bias=bias, **xargs) - self.config.trans_type = trans_type + name, + inputs=inputs, + active_type=active_type, + trans_type=trans_type, + bias=bias, + stride=stride, + **xargs) self.config.select_first = True diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b006eb46d99fd..9e4ca5794b08e 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1301,10 +1301,15 @@ def grumemory(input, def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, + stride=-1, layer_attr=None): """ Get Last Timestamp Activation of a sequence. + If stride > 0, get last timestamp upon a stride window of sequence. + And a long sequence will be shorten. Note that for sequence with + sub-sequence, stride is default -1 now. + The simple usage is: .. code-block:: python @@ -1316,6 +1321,8 @@ def last_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput + :param stride: parameter of stride window. + :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. @@ -1327,11 +1334,15 @@ def last_seq(input, " series information at all. Maybe you want to use" " first_seq instead.") + if agg_level == AggregateLevel.EACH_SEQUENCE: + assert stride == -1 + Layer( name=name, type=LayerType.SEQUENCE_LAST_INSTANCE, inputs=[input.name], trans_type=agg_level, + stride=stride, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name, @@ -1345,10 +1356,16 @@ def last_seq(input, def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, + stride=-1, layer_attr=None): """ Get First Timestamp Activation of a sequence. + If stride > 0, get first timestamp upon a stride window of sequence, + and a long sequence will be shorten. Note that for sequence with + sub-sequence, stride is default -1 now. + + The simple usage is: .. code-block:: python @@ -1372,11 +1389,15 @@ def first_seq(input, ' time series information at all. Maybe you want to use' ' last_seq instead.') + if agg_level == AggregateLevel.EACH_SEQUENCE: + assert stride == -1 + Layer( name=name, type=LayerType.SEQUENCE_FIRST_INSTANCE, inputs=[input.name], trans_type=agg_level, + stride=stride, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py index 3a1a0132b64bb..3c6dbc95e5489 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py +++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py @@ -14,4 +14,7 @@ for al in agg_level: opts.append(op(input=din, agg_level=al)) +for op in seq_op: + opts.append(op(input=din, agg_level=AggregateLevel.EACH_TIMESTEP, stride=5)) + outputs(opts) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr index 7b2911f8e367e..12b2255f3a411 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr @@ -15,6 +15,7 @@ layers { } select_first: true trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__first_seq_1__" @@ -26,6 +27,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_0__" @@ -36,6 +38,7 @@ layers { input_layer_name: "data" } trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -46,12 +49,38 @@ layers { input_layer_name: "data" } trans_type: "non-seq" + seq_pool_stride: -1 +} +layers { + name: "__first_seq_2__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + select_first: true + trans_type: "non-seq" + seq_pool_stride: 5 +} +layers { + name: "__last_seq_2__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + trans_type: "non-seq" + seq_pool_stride: 5 } input_layer_names: "data" output_layer_names: "__first_seq_0__" output_layer_names: "__first_seq_1__" output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_1__" +output_layer_names: "__first_seq_2__" +output_layer_names: "__last_seq_2__" sub_models { name: "root" layer_names: "data" @@ -59,11 +88,15 @@ sub_models { layer_names: "__first_seq_1__" layer_names: "__last_seq_0__" layer_names: "__last_seq_1__" + layer_names: "__first_seq_2__" + layer_names: "__last_seq_2__" input_layer_names: "data" output_layer_names: "__first_seq_0__" output_layer_names: "__first_seq_1__" output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_1__" + output_layer_names: "__first_seq_2__" + output_layer_names: "__last_seq_2__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr index b6905824f0cb0..64530146a1458 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr @@ -128,6 +128,7 @@ layers { input_layer_name: "__simple_gru_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -138,6 +139,7 @@ layers { input_layer_name: "__simple_gru_1__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__fc_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr index 0a83499b72480..79fa4c74f081a 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr @@ -210,6 +210,7 @@ layers { input_layer_name: "__lstm_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -220,6 +221,7 @@ layers { input_layer_name: "__lstm_group_1__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__fc_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr index dacb40185f863..68fa881b4f140 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr @@ -143,6 +143,7 @@ layers { input_layer_name: "__recurrent_layer_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_0__" @@ -154,6 +155,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -164,6 +166,7 @@ layers { input_layer_name: "__lstmemory_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_1__" @@ -175,6 +178,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_2__" @@ -185,6 +189,7 @@ layers { input_layer_name: "__gru_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_2__" @@ -196,6 +201,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } parameters { name: "___fc_layer_0__.w0" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr index a0fb729e062bd..77b447aa9db2a 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr @@ -96,6 +96,7 @@ layers { input_layer_name: "rnn_forward" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_1__" @@ -145,6 +146,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_2__" @@ -193,6 +195,7 @@ layers { input_layer_name: "rnn_subseq_forward" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__lstm_group_0___recurrent_group" @@ -282,6 +285,7 @@ layers { input_layer_name: "__lstm_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__gru_group_0___recurrent_group" @@ -330,6 +334,7 @@ layers { input_layer_name: "__gru_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_3__" @@ -378,6 +383,7 @@ layers { input_layer_name: "__fc_layer_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } parameters { name: "___mixed_0__.w0" From cbbec595472189ac252c742cdc6d5f2b435235bd Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 24 Mar 2017 17:05:04 +0800 Subject: [PATCH 2/5] adjust poolSequenceWithStride interface for average and max --- .../gserver/layers/SequenceLastInstanceLayer.cpp | 8 +++----- paddle/gserver/layers/SequencePoolLayer.cpp | 2 +- paddle/gserver/layers/SequencePoolLayer.h | 2 +- paddle/parameter/Argument.cpp | 15 +++++++++------ paddle/parameter/Argument.h | 2 +- paddle/parameter/tests/test_argument.cpp | 7 +++---- python/paddle/trainer_config_helpers/layers.py | 3 ++- 7 files changed, 20 insertions(+), 19 deletions(-) diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 7ac087663a6c6..c70c2b7421181 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -72,7 +72,8 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, void SequenceLastInstanceLayer::forward(PassType passType) { SequencePoolLayer::forward(passType); - const int* starts = startPositions_->getData(false); + auto starts = (stride_ > 0) ? stridePositions_->getData() + : startPositions_->getData(false); MatrixPtr inputValue = getInputValue(0); MatrixPtr outputValue = getOutputValue(); @@ -82,10 +83,7 @@ void SequenceLastInstanceLayer::forward(PassType passType) { insId_.clear(); for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - int insId = (stride_ > 0) - ? (select_first_ ? stridePositions_[seqId] - : stridePositions_[seqId + 1] - 1) - : (select_first_ ? starts[seqId] : starts[seqId + 1] - 1); + int insId = select_first_ ? starts[seqId] : starts[seqId + 1] - 1; insId_.push_back(insId); outputValue->subMatrix(seqId, 1, tmpDest_) diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index 2bf180a043b98..f853905103a0e 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -70,7 +70,7 @@ void SequencePoolLayer::forward(PassType passType) { CHECK_EQ(input.hasSubseq(), 0UL) << "sequence stride pooling is not suitable for hasSubseq now"; output_.poolSequenceWithStride(input, stride_, &stridePositions_); - newBatchSize_ = stridePositions_.size() - 1; + newBatchSize_ = stridePositions_->getSize() - 1; } resetOutput(newBatchSize_, dim); diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 5ca1c1a82b913..92d7a841f0c73 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -48,7 +48,7 @@ class SequencePoolLayer : public Layer { ICpuGpuVectorPtr startPositions_; int stride_; // store the start position of each stride window - std::vector stridePositions_; + IVectorPtr stridePositions_; public: explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 2657c00ebb3c7..3cc637587bc28 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) { void Argument::poolSequenceWithStride(const Argument& input, size_t stride, - std::vector* stridePostions) { + IVectorPtr* stridePostions) { /* * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, * then sequenceStartPositions = [0, 2, 3, 4, 7], @@ -577,10 +577,10 @@ void Argument::poolSequenceWithStride(const Argument& input, int* tgtBuf = sequenceStartPositions->getMutableData(false); // first index of target sequence and stride positions are both 0 tgtBuf[0] = 0; - (*stridePostions).clear(); + std::vector stridePos; for (size_t seqId = 0; seqId < numSequences; ++seqId) { size_t seqLength = starts[seqId + 1] - starts[seqId]; - (*stridePostions).emplace_back(starts[seqId]); + stridePos.emplace_back(starts[seqId]); if (seqLength == 0) { // empty sequence tgtBuf[seqId + 1] = tgtBuf[seqId]; @@ -591,12 +591,15 @@ void Argument::poolSequenceWithStride(const Argument& input, int size = (seqLength % stride) ? seqLength / stride : seqLength / stride - 1; for (int i = 0; i < size; i++) { - (*stridePostions).emplace_back((*stridePostions).back() + stride); + stridePos.emplace_back(stridePos.back() + stride); } } } - (*stridePostions).emplace_back(starts[numSequences]); - CHECK_EQ((*stridePostions).size() - 1, tgtBuf[numSequences]); + stridePos.emplace_back(starts[numSequences]); + int size = stridePos.size(); + CHECK_EQ(size - 1, tgtBuf[numSequences]); + IVector::resizeOrCreate(*stridePostions, size, false); + (*stridePostions)->copyFrom(stridePos.data(), size); } void Argument::getValueString( diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 760029c2fe6ba..95ea90ffc2a60 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -298,7 +298,7 @@ struct Argument { */ void poolSequenceWithStride(const Argument& input, size_t stride, - std::vector* stridePositions); + IVectorPtr* stridePositions); /** * @brief getValueString will return the argument's output in string. There * are several kinds of output. The keys of output dictionary are 'value', diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp index ba17e8a298e80..692bbada10d03 100644 --- a/paddle/parameter/tests/test_argument.cpp +++ b/paddle/parameter/tests/test_argument.cpp @@ -27,8 +27,7 @@ TEST(Argument, poolSequenceWithStride) { inStart[3] = 17; inStart[4] = 30; - std::vector stridePositions; - stridePositions.clear(); + IVectorPtr stridePositions; output.poolSequenceWithStride(input, 5 /* stride */, &stridePositions); const int* outStart = output.sequenceStartPositions->getData(false); @@ -38,10 +37,10 @@ TEST(Argument, poolSequenceWithStride) { CHECK_EQ(outStart[3], 4); CHECK_EQ(outStart[4], 7); - CHECK_EQ(stridePositions.size(), 8); + CHECK_EQ(stridePositions->getSize(), 8); int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; for (int i = 0; i < 8; i++) { - CHECK_EQ(stridePositions[i], strideResult[i]); + CHECK_EQ(stridePositions->getData()[i], strideResult[i]); } } diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 542bbbe086ec0..5f3250e7224ff 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1406,7 +1406,6 @@ def first_seq(input, and a long sequence will be shorten. Note that for sequence with sub-sequence, stride is default -1 now. - The simple usage is: .. code-block:: python @@ -1418,6 +1417,8 @@ def first_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput + :param stride: parameter of stride window. + :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. From d369577fed4b85dfeb40610fb21974893ac6dcb9 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 31 Mar 2017 17:34:00 +0800 Subject: [PATCH 3/5] add reversed poolSequenceWithStride --- .../layers/SequenceLastInstanceLayer.cpp | 5 ++- paddle/gserver/layers/SequencePoolLayer.cpp | 5 +-- paddle/gserver/layers/SequencePoolLayer.h | 2 ++ paddle/parameter/Argument.cpp | 21 ++++++------ paddle/parameter/Argument.h | 3 +- paddle/parameter/tests/test_argument.cpp | 32 +++++++++++-------- python/paddle/trainer/config_parser.py | 2 +- 7 files changed, 40 insertions(+), 30 deletions(-) diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index c70c2b7421181..d29e981ad66a5 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -40,7 +40,6 @@ class SequenceLastInstanceLayer : public SequencePoolLayer { protected: MatrixPtr tmpSrc_; MatrixPtr tmpDest_; - bool select_first_; std::vector insId_; public: @@ -59,7 +58,7 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer); bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { SequencePoolLayer::init(layerMap, parameterMap); - select_first_ = config_.select_first(); + reversed_ = config_.select_first(); tmpSrc_ = Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); @@ -83,7 +82,7 @@ void SequenceLastInstanceLayer::forward(PassType passType) { insId_.clear(); for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - int insId = select_first_ ? starts[seqId] : starts[seqId + 1] - 1; + int insId = reversed_ ? starts[seqId] : starts[seqId + 1] - 1; insId_.push_back(insId); outputValue->subMatrix(seqId, 1, tmpDest_) diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index f853905103a0e..8c49502011582 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -68,8 +68,9 @@ void SequencePoolLayer::forward(PassType passType) { } if (stride_ > 0) { CHECK_EQ(input.hasSubseq(), 0UL) - << "sequence stride pooling is not suitable for hasSubseq now"; - output_.poolSequenceWithStride(input, stride_, &stridePositions_); + << "sequence stride pooling is invalid for hasSubseq now"; + output_.poolSequenceWithStride( + input, stride_, &stridePositions_, reversed_); newBatchSize_ = stridePositions_->getSize() - 1; } diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 92d7a841f0c73..ff67c0ccadd20 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -49,6 +49,8 @@ class SequencePoolLayer : public Layer { int stride_; // store the start position of each stride window IVectorPtr stridePositions_; + // Whether it is reversed sequence + bool reversed_ = false; public: explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 3cc637587bc28..afbda8bdc403f 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -561,11 +561,13 @@ void Argument::degradeSequence(const Argument& input) { void Argument::poolSequenceWithStride(const Argument& input, size_t stride, - IVectorPtr* stridePostions) { + IVectorPtr* stridePostions, + bool reversed) { /* * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, - * then sequenceStartPositions = [0, 2, 3, 4, 7], - * and stridePostions = [0, 5, 9, 14, 17, 22, 27, 30] + * then sequenceStartPositions = [0, 2, 3, 4, 7]. + * If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30]; + * else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30] */ CHECK(input.sequenceStartPositions); CHECK_EQ(input.hasSubseq(), 0UL); @@ -584,14 +586,13 @@ void Argument::poolSequenceWithStride(const Argument& input, if (seqLength == 0) { // empty sequence tgtBuf[seqId + 1] = tgtBuf[seqId]; - } else if (seqLength < stride) { - tgtBuf[seqId + 1] = tgtBuf[seqId] + 1; } else { - tgtBuf[seqId + 1] = tgtBuf[seqId] + ceil((float)seqLength / stride); - int size = - (seqLength % stride) ? seqLength / stride : seqLength / stride - 1; - for (int i = 0; i < size; i++) { - stridePos.emplace_back(stridePos.back() + stride); + int size = ceil((float)seqLength / stride); + tgtBuf[seqId + 1] = tgtBuf[seqId] + size; + for (int i = 0; i < size - 1; i++) { + int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride + : stridePos.back() + stride; + stridePos.emplace_back(cur); } } } diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 95ea90ffc2a60..49a0660ccf155 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -298,7 +298,8 @@ struct Argument { */ void poolSequenceWithStride(const Argument& input, size_t stride, - IVectorPtr* stridePositions); + IVectorPtr* stridePositions, + bool reversed = false); /** * @brief getValueString will return the argument's output in string. There * are several kinds of output. The keys of output dictionary are 'value', diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp index 692bbada10d03..81fe4ee397351 100644 --- a/paddle/parameter/tests/test_argument.cpp +++ b/paddle/parameter/tests/test_argument.cpp @@ -27,20 +27,26 @@ TEST(Argument, poolSequenceWithStride) { inStart[3] = 17; inStart[4] = 30; - IVectorPtr stridePositions; - output.poolSequenceWithStride(input, 5 /* stride */, &stridePositions); - - const int* outStart = output.sequenceStartPositions->getData(false); - CHECK_EQ(outStart[0], 0); - CHECK_EQ(outStart[1], 2); - CHECK_EQ(outStart[2], 3); - CHECK_EQ(outStart[3], 4); - CHECK_EQ(outStart[4], 7); - - CHECK_EQ(stridePositions->getSize(), 8); int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; - for (int i = 0; i < 8; i++) { - CHECK_EQ(stridePositions->getData()[i], strideResult[i]); + int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30}; + + for (auto reversed : {false, true}) { + IVectorPtr stridePositions; + output.poolSequenceWithStride( + input, 5 /* stride */, &stridePositions, reversed); + + const int* outStart = output.sequenceStartPositions->getData(false); + CHECK_EQ(outStart[0], 0); + CHECK_EQ(outStart[1], 2); + CHECK_EQ(outStart[2], 3); + CHECK_EQ(outStart[3], 4); + CHECK_EQ(outStart[4], 7); + + CHECK_EQ(stridePositions->getSize(), 8); + auto result = reversed ? strideResultReversed : strideResult; + for (int i = 0; i < 8; i++) { + CHECK_EQ(stridePositions->getData()[i], result[i]); + } } } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 1a6d1c512d393..dc89419c40f8d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2497,7 +2497,7 @@ def __init__(self, config_assert( len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') if trans_type == 'seq': - config_assert(stride == -1, 'subseq do not support stride window') + config_assert(stride == -1, 'subseq does not support stride window') self.config.trans_type = trans_type self.config.seq_pool_stride = stride self.set_layer_size(self.get_input_layer(0).size) From dd613047b1b37e2698437582b7e128b08efe6b5a Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 7 Apr 2017 11:00:58 +0800 Subject: [PATCH 4/5] refine annotation --- .../layers/SequenceLastInstanceLayer.cpp | 6 ++++-- paddle/gserver/layers/SequencePoolLayer.h | 8 ++++---- paddle/parameter/Argument.cpp | 2 +- paddle/parameter/Argument.h | 2 +- python/paddle/trainer_config_helpers/layers.py | 18 ++++++++++-------- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index d29e981ad66a5..c631c5ef3a9eb 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -26,8 +26,10 @@ namespace paddle { * If SequenceLevel = kNonseq: * Output: a sequence containing only the last instance of the input sequence * If stride_ > 0: - * Output: a shorten sequence containing several last instances of the - * input sequence with stride window. + * Output: a shorten sequence. The operation of getting last instance of a + * sequence is independently performed on every slice of the input + * sequence, which is obtained by sliding a window with the window + * size set to stride_. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: a sequence containing only the last instance of each sub-sequence diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index ff67c0ccadd20..8e183ecda80a1 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -27,9 +27,9 @@ namespace paddle { * output[i] = seqlastin/average/max_{for each instance in this * sequence}{input[i]} * If stride_ > 0: - * Check input sequence must don't have sub-sequence + * Check input sequence must not have sub-sequence * Output: a shorten sequence, pooling is performed upon a small local - * area + * area * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences @@ -47,9 +47,9 @@ class SequencePoolLayer : public Layer { size_t newBatchSize_; ICpuGpuVectorPtr startPositions_; int stride_; - // store the start position of each stride window + // store the start position of each window IVectorPtr stridePositions_; - // Whether it is reversed sequence + // Whether the input sequence is reversed or not bool reversed_ = false; public: diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index afbda8bdc403f..3fa1e50d1e798 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -589,7 +589,7 @@ void Argument::poolSequenceWithStride(const Argument& input, } else { int size = ceil((float)seqLength / stride); tgtBuf[seqId + 1] = tgtBuf[seqId] + size; - for (int i = 0; i < size - 1; i++) { + for (int i = 0; i < size - 1; ++i) { int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride : stridePos.back() + stride; stridePos.emplace_back(cur); diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 49a0660ccf155..91aca98e186ae 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -294,7 +294,7 @@ struct Argument { /* After pooling with stride n (n is smaller than sequence length), a long sequence will be shorten. - This function is not suitable for sequence with sub-sequence now. + This function is invalid for sequence having sub-sequence. */ void poolSequenceWithStride(const Argument& input, size_t stride, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 5f3250e7224ff..e98b1dfc8f9ad 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1347,9 +1347,10 @@ def last_seq(input, """ Get Last Timestamp Activation of a sequence. - If stride > 0, get last timestamp upon a stride window of sequence. - And a long sequence will be shorten. Note that for sequence with - sub-sequence, stride is default -1 now. + If stride > 0, this layer slides a window whose size is determined by stride, + and return the last value of the window as the output. Thus, a long sequence + will be shorten. Note that for sequence with sub-sequence, the default value + of stride is -1. The simple usage is: @@ -1362,7 +1363,7 @@ def last_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput - :param stride: parameter of stride window. + :param stride: window size. :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -1402,9 +1403,10 @@ def first_seq(input, """ Get First Timestamp Activation of a sequence. - If stride > 0, get first timestamp upon a stride window of sequence, - and a long sequence will be shorten. Note that for sequence with - sub-sequence, stride is default -1 now. + If stride > 0, this layer slides a window whose size is determined by stride, + and return the first value of the window as the output. Thus, a long sequence + will be shorten. Note that for sequence with sub-sequence, the default value + of stride is -1. The simple usage is: @@ -1417,7 +1419,7 @@ def first_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput - :param stride: parameter of stride window. + :param stride: window size. :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. From e6366e349671126692a7711df47866009dc18f16 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 11 Apr 2017 17:02:17 +0800 Subject: [PATCH 5/5] update with comments --- .../layers/SequenceLastInstanceLayer.cpp | 8 +++---- paddle/gserver/layers/SequencePoolLayer.h | 4 ++-- paddle/gserver/tests/test_LayerGrad.cpp | 24 +++++++++++-------- paddle/parameter/Argument.cpp | 11 ++++----- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index c631c5ef3a9eb..944c7051668dc 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -42,7 +42,7 @@ class SequenceLastInstanceLayer : public SequencePoolLayer { protected: MatrixPtr tmpSrc_; MatrixPtr tmpDest_; - std::vector insId_; + std::vector instanceIds_; public: explicit SequenceLastInstanceLayer(const LayerConfig& config) @@ -82,10 +82,10 @@ void SequenceLastInstanceLayer::forward(PassType passType) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str()); - insId_.clear(); + instanceIds_.clear(); for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { int insId = reversed_ ? starts[seqId] : starts[seqId + 1] - 1; - insId_.push_back(insId); + instanceIds_.push_back(insId); outputValue->subMatrix(seqId, 1, tmpDest_) ->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_))); @@ -111,7 +111,7 @@ void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) { REGISTER_TIMER_INFO("SequenceLastInstanceLayerBackward", getName().c_str()); for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - inputGrad->subMatrix(insId_[seqId], 1, tmpDest_) + inputGrad->subMatrix(instanceIds_[seqId], 1, tmpDest_) ->add(*(outputGrad->subMatrix(seqId, 1, tmpSrc_))); } } diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 8e183ecda80a1..293d1bf27823f 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -47,9 +47,9 @@ class SequencePoolLayer : public Layer { size_t newBatchSize_; ICpuGpuVectorPtr startPositions_; int stride_; - // store the start position of each window + // Store the start position of each window. IVectorPtr stridePositions_; - // Whether the input sequence is reversed or not + // Whether the input sequence is reversed or not. bool reversed_ = false; public: diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index f2763842c25c3..193b876c31626 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -807,7 +807,7 @@ TEST(Layer, ExpandLayer) { void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type, - int stride = -1) { + int stride) { TestConfig config; config.layerConfig.set_type(layer_type); config.layerConfig.set_size(10); @@ -844,29 +844,33 @@ void testDegradeLayer(bool hasSubseq, } TEST(Layer, MaxLayer) { - testDegradeLayer(false, "max", "non-seq"); // seq max to non-seq - testDegradeLayer(true, "max", "non-seq"); // hasSubseq max to non-seq - testDegradeLayer(true, "max", "seq"); // hasSubseq max to seq + testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq + testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq + testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq } TEST(Layer, SequenceLastInstanceLayer) { testDegradeLayer(false, "seqlastins", - "non-seq"); // seq seqlastins to non-seq + "non-seq", + -1); // seq seqlastins to non-seq testDegradeLayer(false, "seqlastins", "non-seq", 5); // seq seqlastins to a shorten seq, stride window = 5 testDegradeLayer(true, "seqlastins", - "non-seq"); // hasSubseq seqlastins to non-seq - testDegradeLayer(true, "seqlastins", "seq"); // hasSubseq seqlastins to seq + "non-seq", + -1); // hasSubseq seqlastins to non-seq + testDegradeLayer( + true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq } TEST(Layer, AverageLayer) { - testDegradeLayer(false, "average", "non-seq"); // seq average to non-seq - testDegradeLayer(true, "average", "non-seq"); // hasSubseq average to non-seq - testDegradeLayer(true, "average", "seq"); // hasSubseq average to seq + testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq + testDegradeLayer( + true, "average", "non-seq", -1); // hasSubseq average to non-seq + testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq } TEST(Layer, SequenceConcatLayer) { diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 3fa1e50d1e798..645bf73799063 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -563,12 +563,11 @@ void Argument::poolSequenceWithStride(const Argument& input, size_t stride, IVectorPtr* stridePostions, bool reversed) { - /* - * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, - * then sequenceStartPositions = [0, 2, 3, 4, 7]. - * If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30]; - * else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30] - */ + // If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, + // then sequenceStartPositions = [0, 2, 3, 4, 7]. + // If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30]; + // else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30] + CHECK(input.sequenceStartPositions); CHECK_EQ(input.hasSubseq(), 0UL); CHECK_GT(stride, 0) << "stride must larger than 0";