From 7ed6463ee91e0b71e7beca313554eae36da1c4e4 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 24 May 2017 13:55:58 +0800 Subject: [PATCH 1/3] fix bugs for CrossChannelNormLayer --- .../gserver/layers/CrossChannelNormLayer.cpp | 32 ++++++++++++++----- paddle/gserver/layers/NormLayer.cpp | 10 ------ paddle/gserver/tests/LayerGradUtil.cpp | 7 +++- paddle/gserver/tests/LayerGradUtil.h | 6 ++++ paddle/gserver/tests/test_LayerGrad.cpp | 5 ++- 5 files changed, 40 insertions(+), 20 deletions(-) diff --git a/paddle/gserver/layers/CrossChannelNormLayer.cpp b/paddle/gserver/layers/CrossChannelNormLayer.cpp index 3fbccc11032ca..4dfe460561e71 100644 --- a/paddle/gserver/layers/CrossChannelNormLayer.cpp +++ b/paddle/gserver/layers/CrossChannelNormLayer.cpp @@ -36,6 +36,16 @@ MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data, data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_); } +bool CrossChannelNormLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + CHECK(parameters_[0]); + const NormConfig& conf = config_.inputs(0).norm_conf(); + channels_ = conf.channels(); + scale_.reset(new Weight(channels_, 1, parameters_[0])); + return true; +} + void CrossChannelNormLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inV = getInputValue(0); @@ -63,6 +73,7 @@ void CrossChannelNormLayer::forward(PassType passType) { // compute norm. spatialBuffer_->sumCols(*dataTmp, 1, 0); + spatialBuffer_->add(*normTmp); spatialBuffer_->sqrt2(*spatialBuffer_); normTmp->copyFrom(*spatialBuffer_); outVTmp->copyFrom(*inVTmp); @@ -82,6 +93,9 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) { size_t dataDim = inG->getWidth(); size_t spatialDim = dataDim / channels_; + MatrixPtr inGBuffer; + Matrix::resizeOrCreate(inGBuffer, channels_, spatialDim, false, useGpu_); + dataBuffer_->dotMul(*outG, *outV); Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); @@ -100,22 +114,24 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) { scaleDiff_->add(*channelBuffer_, 1.); sampleBuffer_->dotMul(*inVTmp, *outGTmp); - spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.); + spatialBuffer_->sumCols(*sampleBuffer_, 1., 0.); // scale the grad - inGTmp->copyFrom(*inVTmp); - inGTmp->mulRowVector(*spatialBuffer_); + inGBuffer->copyFrom(*inVTmp); + inGBuffer->mulRowVector(*spatialBuffer_); // divide by square of norm spatialBuffer_->dotMul(*normTmp, *normTmp); - inGTmp->divRowVector(*spatialBuffer_); + inGBuffer->divRowVector(*spatialBuffer_); // subtract - inGTmp->add(*outGTmp, -1, 1); + inGBuffer->add(*outGTmp, -1, 1); // divide by norm - inGTmp->divRowVector(*normTmp); + inGBuffer->divRowVector(*normTmp); // scale the diff - inGTmp->mulColVector(*scale_->getW()); + inGBuffer->mulColVector(*scale_->getW()); + + inGTmp->add(*inGBuffer); } // updata scale - if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_); + if (scale_->getWGrad()) scale_->getWGrad()->add(*scaleDiff_); scale_->getParameterPtr()->incUpdate(callback); } diff --git a/paddle/gserver/layers/NormLayer.cpp b/paddle/gserver/layers/NormLayer.cpp index e094078bfe86e..caef7100929c7 100644 --- a/paddle/gserver/layers/NormLayer.cpp +++ b/paddle/gserver/layers/NormLayer.cpp @@ -56,14 +56,4 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, return true; } -bool CrossChannelNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - CHECK(parameters_[0]); - const NormConfig& conf = config_.inputs(0).norm_conf(); - channels_ = conf.channels(); - scale_.reset(new Weight(channels_, 1, parameters_[0])); - return true; -} - } // namespace paddle diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index e3591ba4df88f..66aafba844dd2 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -465,7 +465,6 @@ void initTestLayer(TestConfig testConf, ParameterConfig paraConfig) { paraConfig.set_name(paraName); paraConfig.set_size(paraSize); - paraConfig.set_initial_std(1); paraConfig.set_is_static(isStatic); auto para = std::make_shared(paraConfig, FLAGS_use_gpu, initialize); @@ -499,6 +498,12 @@ void initTestLayer(TestConfig testConf, paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize()); paraConfig.add_dims(testConf.layerConfig.size()); } + if (testConf.hasParamInitialValue) { + paraConfig.set_initial_mean(testConf.paramInitialMean); + paraConfig.set_initial_std(testConf.paramInitialStd); + } else { + paraConfig.set_initial_std(1); + } initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig); } } diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 18a6525a145fb..5ea7ca0f24c2c 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -125,12 +125,18 @@ struct TestConfig { LayerConfig layerConfig; std::vector inputDefs; size_t biasSize; + real paramInitialMean; + real paramInitialStd; + bool hasParamInitialValue; bool testAccumulate; bool testState; bool staticBias; bool testBatchState; TestConfig() : biasSize(0), + paramInitialMean(0), + paramInitialStd(1), + hasParamInitialValue(false), testAccumulate(true), testState(false), staticBias(false), diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 59d1e9273d42d..6441e08b48d2a 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1661,6 +1661,9 @@ TEST(Layer, PadLayer) { TEST(Layer, CrossChannelNormLayer) { TestConfig config; + config.hasParamInitialValue = true; + config.paramInitialMean = 1.; + config.paramInitialStd = 0.; config.layerConfig.set_type("norm"); config.layerConfig.set_size(100); LayerInputConfig* input = config.layerConfig.add_inputs(); @@ -1674,7 +1677,7 @@ TEST(Layer, CrossChannelNormLayer) { config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); for (auto useGpu : {false, true}) { - testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5); + testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); } } From 2bf4f1bbc1e4abc9c173b89aeb96c40b404e94f4 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 24 May 2017 14:22:41 +0800 Subject: [PATCH 2/3] make adding eps more clear --- paddle/gserver/layers/CrossChannelNormLayer.cpp | 7 +++---- paddle/gserver/tests/LayerGradUtil.h | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/paddle/gserver/layers/CrossChannelNormLayer.cpp b/paddle/gserver/layers/CrossChannelNormLayer.cpp index 4dfe460561e71..d72503217f1c9 100644 --- a/paddle/gserver/layers/CrossChannelNormLayer.cpp +++ b/paddle/gserver/layers/CrossChannelNormLayer.cpp @@ -61,9 +61,7 @@ void CrossChannelNormLayer::forward(PassType passType) { Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_); Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_); Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_); - normBuffer_->zeroMem(); - // add eps to avoid overflow - normBuffer_->addScalar(*normBuffer_, 1e-6); + inV->square2(*dataBuffer_); for (size_t i = 0; i < batchSize; i++) { const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim); @@ -73,7 +71,8 @@ void CrossChannelNormLayer::forward(PassType passType) { // compute norm. spatialBuffer_->sumCols(*dataTmp, 1, 0); - spatialBuffer_->add(*normTmp); + // add eps to avoid overflow + spatialBuffer_->add(1e-6); spatialBuffer_->sqrt2(*spatialBuffer_); normTmp->copyFrom(*spatialBuffer_); outVTmp->copyFrom(*inVTmp); diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 5ea7ca0f24c2c..9dbd202757879 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -134,8 +134,8 @@ struct TestConfig { bool testBatchState; TestConfig() : biasSize(0), - paramInitialMean(0), - paramInitialStd(1), + paramInitialMean(0.0), + paramInitialStd(1.0), hasParamInitialValue(false), testAccumulate(true), testState(false), From c37da0bd3ba14318198bfc6dd8f8ba5e13c1a269 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 5 Jul 2017 18:36:47 +0800 Subject: [PATCH 3/3] Remove hasParamInitialValue flag. --- paddle/gserver/tests/LayerGradUtil.cpp | 9 +++------ paddle/gserver/tests/LayerGradUtil.h | 2 -- paddle/gserver/tests/test_LayerGrad.cpp | 1 - 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index 66aafba844dd2..15b8cedeb8316 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -498,12 +498,9 @@ void initTestLayer(TestConfig testConf, paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize()); paraConfig.add_dims(testConf.layerConfig.size()); } - if (testConf.hasParamInitialValue) { - paraConfig.set_initial_mean(testConf.paramInitialMean); - paraConfig.set_initial_std(testConf.paramInitialStd); - } else { - paraConfig.set_initial_std(1); - } + CHECK_GE(testConf.paramInitialStd, 0); + paraConfig.set_initial_mean(testConf.paramInitialMean); + paraConfig.set_initial_std(testConf.paramInitialStd); initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig); } } diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 9dbd202757879..d299b4dd09418 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -127,7 +127,6 @@ struct TestConfig { size_t biasSize; real paramInitialMean; real paramInitialStd; - bool hasParamInitialValue; bool testAccumulate; bool testState; bool staticBias; @@ -136,7 +135,6 @@ struct TestConfig { : biasSize(0), paramInitialMean(0.0), paramInitialStd(1.0), - hasParamInitialValue(false), testAccumulate(true), testState(false), staticBias(false), diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 6441e08b48d2a..bf0136a10f144 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1661,7 +1661,6 @@ TEST(Layer, PadLayer) { TEST(Layer, CrossChannelNormLayer) { TestConfig config; - config.hasParamInitialValue = true; config.paramInitialMean = 1.; config.paramInitialStd = 0.; config.layerConfig.set_type("norm");