Skip to content

Commit

Permalink
Merge pull request #76 from emailweixu/fix_RecurrentGradientMachine
Browse files Browse the repository at this point in the history
Further fix for memory of RecurrentGradientMachine
  • Loading branch information
Haonan authored Sep 16, 2016
2 parents a9d327b + 9d12ca9 commit aeb2d84
Show file tree
Hide file tree
Showing 12 changed files with 233 additions and 129 deletions.
1 change: 1 addition & 0 deletions paddle/cuda/src/hl_cuda_matrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ limitations under the License. */
#include "hl_matrix_apply.cuh"
#include "hl_sequence.h"
#include "paddle/utils/Logging.h"
#include "hl_device_functions.cuh"

DEFINE_MATRIX_UNARY_OP(Zero, a = 0);
DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b);
Expand Down
115 changes: 59 additions & 56 deletions paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,23 +434,25 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
}
}

seqLengthAndStart_.clear();
info_.clear();
info_.resize(inFrameLines_.size());
seqLengthAndStart_.resize(inFrameLines_.size());

seqInfos_.clear();
seqInfos_.resize(inFrameLines_.size());

{
AsyncGpuBlock asyncGpuBlock;
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
if (shareInlinkInfo) {
input.getSeqLengthAndStart(&seqLengthAndStart_[0], &maxSequenceLength_);
input.getSeqInfo(&seqInfos_[0]);
maxSequenceLength_ = seqInfos_[0][0].topLevelLength;
createInFrameInfo(0, input, passType);
} else {
for (size_t i = 0; i < inFrameLines_.size(); i++) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
input1.getSeqLengthAndStart(&seqLengthAndStart_[i],
&maxSequenceLength_);
input1.getSeqInfo(&seqInfos_[i]);
maxSequenceLength_ = seqInfos_[i][0].topLevelLength;
createInFrameInfo(i, input1, passType);
}
}
Expand Down Expand Up @@ -614,74 +616,75 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
* for all realLayer of inFrameLines one time.
*/

void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
const Argument& input,
PassType passType) {
bool hasSubseq = input.hasSubseq();
// numSequences: # samples(sequences) in a batch
size_t numSequences = input.getNumSequences();
std::vector<int> allIds;

auto& seqInfo = seqInfos_[inlinkId];

numSeqs_.clear();
Info* inlink_info = &info_[inlinks_id];
inlink_info->idIndex.clear();
inlink_info->idIndex.push_back(0); // first idIndex = 0
Info* inlinkInfo = &info_[inlinkId];
inlinkInfo->idIndex.clear();
inlinkInfo->idIndex.push_back(0); // first idIndex = 0

std::vector<int> sequenceStartPositions;
const int* subSequenceStartPositions = nullptr;

if (hasSubseq) { // for sequenceScatterAgentLayer
// numSubSequences : all sentences within all samples(batch)
size_t numSubSequences = input.getNumSubSequences();
std::vector<int> sequenceStartPositions;
inlink_info->seqStartPosIndex.clear();
inlink_info->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
// maxSequenceLength_: max number of sentences(subseq) in allsamples
for (int i = 0; i < maxSequenceLength_; ++i) {
subSequenceStartPositions =
input.subSequenceStartPositions->getData(false);
inlinkInfo->seqStartPosIndex.clear();
inlinkInfo->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
}
// maxSequenceLength_: max topLevelLength in allsamples
for (int i = 0; i < maxSequenceLength_; ++i) {
if (hasSubseq) {
sequenceStartPositions.push_back(0); // first element = 0
int numSeqs = 0;
for (size_t j = 0; j < numSubSequences; ++j) { // for each sentence
// seqLengthAndStart_[inlinks_id][j]:
// a 4-tuple including <subseqlen, subseqstart, seqid, subseqid>
if (std::get<3>(seqLengthAndStart_[inlinks_id][j]) == i) {
++numSeqs;
// subseqstart: the cpuSubSequenceStartPositions of this subseq
int subSeqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
int subSeqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
for (int k = subSeqStart; k < subSeqStart + subSeqLength; ++k) {
allIds.push_back(k);
}
sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqLength);
}
}
inlink_info->idIndex.push_back(allIds.size());
inlink_info->seqStartPosIndex.push_back(sequenceStartPositions.size());
numSeqs_.push_back(numSeqs);
}
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(),
maxSequenceLength_ + numSubSequences);
CHECK_EQ(inlink_info->seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &inlink_info->sequenceStartPositions);
} else { // for scatterAgentLayer
for (int i = 0; i < maxSequenceLength_; ++i) {
int numSeqs = 0;
for (size_t j = 0; j < numSequences; ++j) {
int seqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
if (i >= seqLength) {
break;
int numSeqs = 0;
for (size_t j = 0; j < numSequences; ++j) {
int seqLength = seqInfo[j].topLevelLength;
if (i >= seqLength) {
break;
}
++numSeqs;
if (hasSubseq) {
int subSeqStart = subSequenceStartPositions[seqInfo[j].subSeqStart + i];
int subSeqEnd =
subSequenceStartPositions[seqInfo[j].subSeqStart + i + 1];
for (int k = subSeqStart; k < subSeqEnd; ++k) {
allIds.push_back(k);
}
++numSeqs;
int seqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqEnd - subSeqStart);
} else {
int seqStart = seqInfo[j].seqStart;
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i));
}
inlink_info->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
}
inlinkInfo->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
if (hasSubseq) {
inlinkInfo->seqStartPosIndex.push_back(sequenceStartPositions.size());
}
}
if (hasSubseq) {
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(),
maxSequenceLength_ + input.getNumSubSequences());
CHECK_EQ(inlinkInfo->seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &inlinkInfo->sequenceStartPositions);
}

// copy and check scatterId
copyScattedId(allIds, &inlink_info->allIds, input.getBatchSize());
CHECK_EQ(inlink_info->idIndex.size(),
copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize());
CHECK_EQ(inlinkInfo->idIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
}

Expand All @@ -701,7 +704,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
const int* starts = input.sequenceStartPositions->getData(false);
for (size_t i = 0; i < numSequences; ++i) {
// memory info adopt info of inlinks[0]
int seqId = std::get<2>(seqLengthAndStart_[0][i]);
int seqId = seqInfos_[0][i].seqId;
for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) {
allIds.push_back(k);
}
Expand All @@ -713,7 +716,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo(

} else { // for scatterAgentLayer
for (size_t i = 0; i < numSequences; ++i) {
allIds.push_back(std::get<2>(seqLengthAndStart_[0][i]));
allIds.push_back(seqInfos_[0][i].seqId);
}
}
// copy and check scatterId
Expand Down
6 changes: 1 addition & 5 deletions paddle/gserver/gradientmachines/RecurrentGradientMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,11 +337,7 @@ class RecurrentGradientMachine : public NeuralNetwork {
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;

// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;
std::vector<std::vector<Argument::SeqInfo>> seqInfos_;

// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
Expand Down
58 changes: 58 additions & 0 deletions paddle/gserver/layers/PrintLayer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "Layer.h"

namespace paddle {

class PrintLayer : public Layer {
public:
explicit PrintLayer(const LayerConfig& config)
: Layer(config) {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {}
};

void PrintLayer::forward(PassType passType) {
Layer::forward(passType);
for (size_t i = 0; i != inputLayers_.size(); ++i) {
const auto& argu = getInput(i);
const std::string& name = inputLayers_[i]->getName();
if (argu.value) {
std::ostringstream os;
argu.value->print(os);
LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str();
}
if (argu.ids) {
std::ostringstream os;
argu.ids->print(os, argu.ids->getSize());
LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str();
}
if (auto startPos = argu.sequenceStartPositions) {
std::ostringstream os;
startPos->getVector(false)->print(os, startPos->getSize());
LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str();
}
if (auto subStartPos = argu.subSequenceStartPositions) {
std::ostringstream os;
subStartPos->getVector(false)->print(os, subStartPos->getSize());
LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n"
<< os.str();
}
}
}

REGISTER_LAYER(print, PrintLayer);

} // namespace paddle
7 changes: 4 additions & 3 deletions paddle/gserver/tests/sequence_nest_rnn.conf
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ def outer_step(x):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
return fc_layer(input=[y, inner_mem],
out = fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")
return out

inner_rnn_output = recurrent_group(
step=inner_step,
name="inner",
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")

Expand All @@ -60,11 +62,10 @@ def outer_step(x):
return inner_rnn_output

out = recurrent_group(
name="outer",
step=outer_step,
input=SubsequenceInput(emb))

value_printer_evaluator(input=out)

rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
Expand Down
6 changes: 3 additions & 3 deletions paddle/gserver/tests/sequence_rnn.conf
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,18 @@ emb = embedding_layer(input=data, size=word_dim)

def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
return fc_layer(input=[y, mem],
out = fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")
return out

out = recurrent_group(
name="rnn",
step=step,
input=emb)

value_printer_evaluator(input=out)

rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
Expand Down
13 changes: 8 additions & 5 deletions paddle/gserver/tests/test_RecurrentGradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str());
}

void test(const string& conf1, const string& conf2) {
void test(const string& conf1, const string& conf2, double eps) {
int num_passes = 5;
real* cost1 = new real[num_passes];
const string dir1 = "gserver/tests/t1";
Expand All @@ -104,21 +104,24 @@ void test(const string& conf1, const string& conf2) {

for (int i = 0; i < num_passes; i++) {
LOG(INFO) << "num_passes: " << i << ", cost1=" << cost1[i]
<< ", cost2=" << cost2[i];
ASSERT_NEAR(cost1[i], cost2[i], 1e-3);
<< ", cost2=" << cost2[i]
<< ", diff=" << std::abs(cost1[i] - cost2[i]);
ASSERT_NEAR(cost1[i], cost2[i], eps);
}
delete[] cost1;
delete[] cost2;
}

TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf");
"gserver/tests/sequence_nest_layer_group.conf",
1e-5);
}

TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf");
"gserver/tests/sequence_nest_rnn.conf",
0);
}


Expand Down
Loading

2 comments on commit aeb2d84

@F0REacH
Copy link
Contributor

@F0REacH F0REacH commented on aeb2d84 Sep 16, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @yu239 . Failing to build with this commit - error: comparison between signed and unsigned integer expressions
-- The CXX compiler identification is GNU 4.9.4
-- The C compiler identification is GNU 4.9.4

In file included from /home/foreach/SOFT/BAIDU/PADDLE/Paddle/paddle/utils/Stat.h:26:0,
                 from /home/foreach/SOFT/BAIDU/PADDLE/Paddle/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp:15:
/home/foreach/SOFT/BAIDU/PADDLE/Paddle/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp: In member function ‘void paddle::RecurrentGradientMachine::createInFrameInfo(int, const paddle::Argument&, paddle::enumeration_wrapper::PassType)’:
/home/foreach/SOFT/BAIDU/PADDLE/Paddle/paddle/utils/Logging.h:136:45: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare]
 #define CHECK_EQ(val1, val2) P_CHECK((val1) == (val2))
                                             ^
/home/foreach/SOFT/BAIDU/PADDLE/Paddle/paddle/utils/Logging.h:110:44: note: in definition of macro ‘PREDICT_FALSE’
 #define PREDICT_FALSE(x) (__builtin_expect(x, 0))
                                            ^
/home/foreach/SOFT/BAIDU/PADDLE/Paddle/paddle/utils/Logging.h:136:30: note: in expansion of macro ‘P_CHECK’
 #define CHECK_EQ(val1, val2) P_CHECK((val1) == (val2))
                              ^
/home/foreach/SOFT/BAIDU/PADDLE/Paddle/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp:678:5: note: in expansion of macro ‘CHECK_EQ’
     CHECK_EQ(sequenceStartPositions.size(),

@emailweixu
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's strange that somehow the continuous integration server's gcc 4.8.4 and my own gcc 5.4.0 can pass.

Please sign in to comment.