From 76f5896fded50285345d0066e9f623c24c77bd7c Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 4 Nov 2016 17:42:34 +0800 Subject: [PATCH 1/4] fix floating-point overflow problem of tanh --- paddle/cuda/include/hl_base.h | 9 ++ paddle/cuda/src/hl_avx_functions.cc | 2 + paddle/cuda/src/hl_cpu_functions.cc | 4 +- paddle/gserver/tests/test_LayerGrad.cpp | 2 +- paddle/gserver/tests/test_RecurrentLayer.cpp | 2 +- paddle/math/BaseMatrix.cu | 5 +- paddle/math/MathFunctions.cpp | 5 +- paddle/math/tests/CMakeLists.txt | 1 + paddle/math/tests/test_FPException.cpp | 92 ++++++++++++++++++++ 9 files changed, 117 insertions(+), 5 deletions(-) create mode 100644 paddle/math/tests/test_FPException.cpp diff --git a/paddle/cuda/include/hl_base.h b/paddle/cuda/include/hl_base.h index 1fe2774cc5a29..02fa6bc3ace32 100644 --- a/paddle/cuda/include/hl_base.h +++ b/paddle/cuda/include/hl_base.h @@ -209,6 +209,15 @@ typedef struct { #define HL_FLOAT_MIN 2.2250738585072014e-308 #endif + +/** + * The maximum input value for exp, used to avoid overflow problem. + * + * Currently only used for tanh function. + */ +#define EXP_MAX_INPUT 40.0 + + /** * @brief DIVUP(x, y) is similar to ceil(x / y). * @note For CUDA, DIVUP will be used to specify diff --git a/paddle/cuda/src/hl_avx_functions.cc b/paddle/cuda/src/hl_avx_functions.cc index 2d471206f61f2..08976180fff5b 100644 --- a/paddle/cuda/src/hl_avx_functions.cc +++ b/paddle/cuda/src/hl_avx_functions.cc @@ -38,7 +38,9 @@ namespace hppl { } __m256 tanh(const __m256 a) { + __m256 max = _mm256_set1_ps(EXP_MAX_INPUT); __m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a); + tmp = _mm256_min_ps(tmp, max); tmp = exp(tmp); return _mm256_sub_ps( _mm256_div_ps(_mm256_set1_ps(2.0f), diff --git a/paddle/cuda/src/hl_cpu_functions.cc b/paddle/cuda/src/hl_cpu_functions.cc index 3fd6b278d0537..5f45f4c9d53ed 100644 --- a/paddle/cuda/src/hl_cpu_functions.cc +++ b/paddle/cuda/src/hl_cpu_functions.cc @@ -30,7 +30,9 @@ namespace hppl { } real tanh(const real a) { - return (2.0 / (1.0 + exp(-2.0*a))) - 1.0; + real tmp = -2 * a; + tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; + return (2.0 / (1.0 + exp(tmp))) - 1.0; } real linear(const real a) { diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index bf2c2e0499941..46f36db1a9177 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -996,7 +996,7 @@ TEST(Layer, LstmLayer) { TestConfig config; config.layerConfig.set_type("lstmemory"); config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_type("tanh"); config.layerConfig.set_active_state_type("sigmoid"); config.layerConfig.set_active_gate_type("sigmoid"); config.biasSize = 28; diff --git a/paddle/gserver/tests/test_RecurrentLayer.cpp b/paddle/gserver/tests/test_RecurrentLayer.cpp index 9b933b153d158..1c8497e8c526f 100644 --- a/paddle/gserver/tests/test_RecurrentLayer.cpp +++ b/paddle/gserver/tests/test_RecurrentLayer.cpp @@ -369,7 +369,7 @@ TEST(Layer, LstmLayer) { LayerConfig layerConfig; layerConfig.set_type("lstmemory"); layerConfig.set_active_type("relu"); - layerConfig.set_active_state_type("sigmoid"); + layerConfig.set_active_state_type("tanh"); layerConfig.set_active_gate_type("sigmoid"); layerConfig.add_inputs(); diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index 8b888b1ee5e46..bc65edf39ce23 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -625,7 +625,10 @@ void BaseMatrixT::squareDerivative(BaseMatrixT& b) { applyBinary(binary::SquareDerivative(), b); } -DEFINE_MATRIX_BINARY_OP(Tanh, b = 2.0 / (1.0 + exp(-2 * a)) - 1.0); +DEFINE_MATRIX_BINARY_OP(Tanh, + T tmp = -2 * a; + tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; + b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); template<> void BaseMatrixT::tanh(BaseMatrixT& b) { applyBinary(binary::Tanh(), b); diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index da493379e3a37..6fa760de1fa6a 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -160,7 +160,10 @@ void vLog1p(const int n, const T* a, T* r) { binary::vLog1p(), const_cast(a), r, 1, n, n, n); } -DEFINE_MATRIX_BINARY_OP(vTanh, b = 2.0 / (1.0 + std::exp(-2 * a)) - 1.0); +DEFINE_MATRIX_BINARY_OP(vTanh, + T tmp = -2 * a; + tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; + b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); template void vTanh(const int n, const T* a, T* r) { hl_cpu_apply_binary_op, 0, 0>( diff --git a/paddle/math/tests/CMakeLists.txt b/paddle/math/tests/CMakeLists.txt index eb72f11e1c653..247be983ba329 100644 --- a/paddle/math/tests/CMakeLists.txt +++ b/paddle/math/tests/CMakeLists.txt @@ -13,3 +13,4 @@ add_simple_unittest(test_sparseMatrixCompare) add_simple_unittest(test_perturbation) add_simple_unittest(test_CpuGpuVector) add_simple_unittest(test_Allocator) +add_simple_unittest(test_FPException) diff --git a/paddle/math/tests/test_FPException.cpp b/paddle/math/tests/test_FPException.cpp new file mode 100644 index 0000000000000..322e69d7ee090 --- /dev/null +++ b/paddle/math/tests/test_FPException.cpp @@ -0,0 +1,92 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +/** + * This test is about floating point calculation exception. + * Paddle catches FE_INVALID, FE DIVBYZERO and FE_OVERFLOW exceptions. + * + * Some exceptions occur in the middle of a set of formulas, + * that can be circumvented by some tricks. + * For example, + * calculate tanh + * b = 2.0 / (1.0 + exp(-2 * a)) - 1.0 + * + * If the result of (-2 * a) is too large, + * a FE_OVERFLOW exception occurs when calculating exp. + * But the result of tanh is no overflow problem, + * so we can add some tricks to prevent exp calculate an excessive value. + * + */ +#include +#include +#include "paddle/math/Matrix.h" + +using namespace paddle; // NOLINT + +void SetTensorValue(Matrix& matrix, real value) { + int height = matrix.getHeight(); + int width = matrix.getWidth(); + int stride = matrix.getStride(); + real* data = matrix.getData(); + for (int i = 0; i < height; i++) { + int j = rand() % width; // NOLINT + if (typeid(matrix) == typeid(CpuMatrix)) { + data[i * stride + j] = value; + } else if (typeid(matrix) == typeid(GpuMatrix)) { + hl_memcpy(&data[i * stride + j], &value, sizeof(real)); + } else { + } + } +} + +template +void testTanh(real illegal) { + MatrixPtr A = std::make_shared(10, 10); + MatrixPtr B = std::make_shared(10, 10); + A->randomizeUniform(); + B->randomizeUniform(); + + SetTensorValue(*A, illegal); + + A->tanh(*B); +} + +template +void testSigmoid(real illegal) { + MatrixPtr A = std::make_shared(10, 10); + MatrixPtr B = std::make_shared(10, 10); + A->randomizeUniform(); + B->randomizeUniform(); + + SetTensorValue(*A, illegal); + + A->sigmoid(*B); +} + +TEST(fp, overflow) { + for (auto illegal : {-90.0, 90.0}) { + LOG(INFO) << " illegal=" << illegal; + testTanh(illegal); + testSigmoid(illegal); + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + + feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW); + return RUN_ALL_TESTS(); +} From 2a936928358c4b81dd4419e93b4ece68f01b6f5a Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 4 Nov 2016 18:22:06 +0800 Subject: [PATCH 2/4] fix compile bug in osx --- paddle/cuda/src/hl_cpu_functions.cc | 2 +- paddle/math/BaseMatrix.cu | 2 +- paddle/math/MathFunctions.cpp | 2 +- paddle/math/tests/test_FPException.cpp | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/paddle/cuda/src/hl_cpu_functions.cc b/paddle/cuda/src/hl_cpu_functions.cc index 5f45f4c9d53ed..b8352c2d537fb 100644 --- a/paddle/cuda/src/hl_cpu_functions.cc +++ b/paddle/cuda/src/hl_cpu_functions.cc @@ -30,7 +30,7 @@ namespace hppl { } real tanh(const real a) { - real tmp = -2 * a; + real tmp = -2.0 * a; tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; return (2.0 / (1.0 + exp(tmp))) - 1.0; } diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index bc65edf39ce23..d81b99e544158 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -626,7 +626,7 @@ void BaseMatrixT::squareDerivative(BaseMatrixT& b) { } DEFINE_MATRIX_BINARY_OP(Tanh, - T tmp = -2 * a; + T tmp = -2.0 * a; tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); template<> diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index 6fa760de1fa6a..673e6221cb917 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -161,7 +161,7 @@ void vLog1p(const int n, const T* a, T* r) { } DEFINE_MATRIX_BINARY_OP(vTanh, - T tmp = -2 * a; + T tmp = -2.0 * a; tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); template diff --git a/paddle/math/tests/test_FPException.cpp b/paddle/math/tests/test_FPException.cpp index 322e69d7ee090..b86b392ef5481 100644 --- a/paddle/math/tests/test_FPException.cpp +++ b/paddle/math/tests/test_FPException.cpp @@ -32,6 +32,7 @@ limitations under the License. */ #include #include #include "paddle/math/Matrix.h" +#include "paddle/utils/Excepts.h" using namespace paddle; // NOLINT From 8c2ad8da22c2a778bd5c5516d2ebb3b9c2501db6 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 4 Nov 2016 19:29:35 +0800 Subject: [PATCH 3/4] remove errno, paddle trapping floating point exceptions with feenableexcept --- paddle/math/Matrix.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index aaeae98f0d28b..40bbad22df852 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3406,9 +3406,7 @@ void CpuMatrix::tanh(Matrix& output) { size_t dim = getWidth(); CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(output.getWidth(), dim); - errno = 0; vTanh(numSamples * dim, getData(), output.getData()); - CHECK_EQ(errno, 0) << "vTanh error"; } void CpuMatrix::tanhDerivative(Matrix& output) { @@ -3430,10 +3428,8 @@ void CpuMatrix::softrelu(Matrix& output) { out[j] = x; } } - errno = 0; vExp(numSamples * dim, output.getData(), output.getData()); vLog1p(numSamples * dim, output.getData(), output.getData()); - CHECK_EQ(errno, 0) << "vExp+vLog1p error"; } void CpuMatrix::softreluDerivative(Matrix& output) { @@ -3448,9 +3444,7 @@ void CpuMatrix::softreluDerivative(Matrix& output) { MatrixPtr tmpMat = Matrix::create(numSamples, dim); real* tmp = tmpMat->getData(); - errno = 0; vExp(size, output.getData(), tmpMat->getData()); - CHECK_EQ(errno, 0) << "vExp error"; for (size_t i = 0; i < size; ++i) { grad[i] *= (1.0 - 1.0 / tmp[i]); @@ -3473,10 +3467,7 @@ void CpuMatrix::scaledTanh(Matrix& output, real p1, real p2) { out[i] = p2 * in[i]; } - // out = tanh(out) - errno = 0; vTanh(numSamples * dim, out, out); - CHECK_EQ(errno, 0) << "vTanh error"; // out = p1 * out for (size_t i = 0; i < numSamples * dim; ++i) { From 250238598a73c58fd79fd501b08caf2aae0614ea Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Tue, 8 Nov 2016 13:13:36 +0800 Subject: [PATCH 4/4] small fix --- paddle/math/tests/test_FPException.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/math/tests/test_FPException.cpp b/paddle/math/tests/test_FPException.cpp index b86b392ef5481..174278c2aaac4 100644 --- a/paddle/math/tests/test_FPException.cpp +++ b/paddle/math/tests/test_FPException.cpp @@ -48,6 +48,7 @@ void SetTensorValue(Matrix& matrix, real value) { } else if (typeid(matrix) == typeid(GpuMatrix)) { hl_memcpy(&data[i * stride + j], &value, sizeof(real)); } else { + LOG(FATAL) << "should not reach here"; } } }