Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix floating-point overflow problem of tanh #355

Merged
merged 4 commits into from
Nov 8, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions paddle/cuda/include/hl_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@ typedef struct {
#define HL_FLOAT_MIN 2.2250738585072014e-308
#endif


/**
* The maximum input value for exp, used to avoid overflow problem.
*
* Currently only used for tanh function.
*/
#define EXP_MAX_INPUT 40.0


/**
* @brief DIVUP(x, y) is similar to ceil(x / y).
* @note For CUDA, DIVUP will be used to specify
Expand Down
2 changes: 2 additions & 0 deletions paddle/cuda/src/hl_avx_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ namespace hppl {
}

__m256 tanh(const __m256 a) {
__m256 max = _mm256_set1_ps(EXP_MAX_INPUT);
__m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a);
tmp = _mm256_min_ps(tmp, max);
tmp = exp(tmp);
return _mm256_sub_ps(
_mm256_div_ps(_mm256_set1_ps(2.0f),
Expand Down
4 changes: 3 additions & 1 deletion paddle/cuda/src/hl_cpu_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ namespace hppl {
}

real tanh(const real a) {
return (2.0 / (1.0 + exp(-2.0*a))) - 1.0;
real tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
return (2.0 / (1.0 + exp(tmp))) - 1.0;
}

real linear(const real a) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ TEST(Layer, LstmLayer) {
TestConfig config;
config.layerConfig.set_type("lstmemory");
config.layerConfig.set_size(4);
config.layerConfig.set_active_type("sigmoid");
config.layerConfig.set_active_type("tanh");
config.layerConfig.set_active_state_type("sigmoid");
config.layerConfig.set_active_gate_type("sigmoid");
config.biasSize = 28;
Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/tests/test_RecurrentLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ TEST(Layer, LstmLayer) {
LayerConfig layerConfig;
layerConfig.set_type("lstmemory");
layerConfig.set_active_type("relu");
layerConfig.set_active_state_type("sigmoid");
layerConfig.set_active_state_type("tanh");
layerConfig.set_active_gate_type("sigmoid");

layerConfig.add_inputs();
Expand Down
5 changes: 4 additions & 1 deletion paddle/math/BaseMatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,10 @@ void BaseMatrixT<T>::squareDerivative(BaseMatrixT& b) {
applyBinary(binary::SquareDerivative<T>(), b);
}

DEFINE_MATRIX_BINARY_OP(Tanh, b = 2.0 / (1.0 + exp(-2 * a)) - 1.0);
DEFINE_MATRIX_BINARY_OP(Tanh,
T tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
template<>
void BaseMatrixT<real>::tanh(BaseMatrixT& b) {
applyBinary(binary::Tanh<real>(), b);
Expand Down
5 changes: 4 additions & 1 deletion paddle/math/MathFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,10 @@ void vLog1p(const int n, const T* a, T* r) {
binary::vLog1p<T>(), const_cast<T*>(a), r, 1, n, n, n);
}

DEFINE_MATRIX_BINARY_OP(vTanh, b = 2.0 / (1.0 + std::exp(-2 * a)) - 1.0);
DEFINE_MATRIX_BINARY_OP(vTanh,
T tmp = -2.0 * a;
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
template<class T>
void vTanh(const int n, const T* a, T* r) {
hl_cpu_apply_binary_op<T, binary::vTanh<T>, 0, 0>(
Expand Down
9 changes: 0 additions & 9 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3406,9 +3406,7 @@ void CpuMatrix::tanh(Matrix& output) {
size_t dim = getWidth();
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(output.getWidth(), dim);
errno = 0;
vTanh(numSamples * dim, getData(), output.getData());
CHECK_EQ(errno, 0) << "vTanh error";
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CHECK还是保留一下吧,虽然大概率下没有错,不过有可能以后误修改了EXP_MAX_INPUT也能保证不会特别错。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Paddle只检测FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW三种,errno对FE_UNDERFLOW也会报错,paddle对其处理是FLUSH TO ZERO,所以这里是不该留的。

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

void CpuMatrix::tanhDerivative(Matrix& output) {
Expand All @@ -3430,10 +3428,8 @@ void CpuMatrix::softrelu(Matrix& output) {
out[j] = x;
}
}
errno = 0;
vExp(numSamples * dim, output.getData(), output.getData());
vLog1p(numSamples * dim, output.getData(), output.getData());
CHECK_EQ(errno, 0) << "vExp+vLog1p error";
}

void CpuMatrix::softreluDerivative(Matrix& output) {
Expand All @@ -3448,9 +3444,7 @@ void CpuMatrix::softreluDerivative(Matrix& output) {
MatrixPtr tmpMat = Matrix::create(numSamples, dim);
real* tmp = tmpMat->getData();

errno = 0;
vExp(size, output.getData(), tmpMat->getData());
CHECK_EQ(errno, 0) << "vExp error";

for (size_t i = 0; i < size; ++i) {
grad[i] *= (1.0 - 1.0 / tmp[i]);
Expand All @@ -3473,10 +3467,7 @@ void CpuMatrix::scaledTanh(Matrix& output, real p1, real p2) {
out[i] = p2 * in[i];
}

// out = tanh(out)
errno = 0;
vTanh(numSamples * dim, out, out);
CHECK_EQ(errno, 0) << "vTanh error";

// out = p1 * out
for (size_t i = 0; i < numSamples * dim; ++i) {
Expand Down
1 change: 1 addition & 0 deletions paddle/math/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ add_simple_unittest(test_sparseMatrixCompare)
add_simple_unittest(test_perturbation)
add_simple_unittest(test_CpuGpuVector)
add_simple_unittest(test_Allocator)
add_simple_unittest(test_FPException)
94 changes: 94 additions & 0 deletions paddle/math/tests/test_FPException.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */


/**
* This test is about floating point calculation exception.
* Paddle catches FE_INVALID, FE DIVBYZERO and FE_OVERFLOW exceptions.
*
* Some exceptions occur in the middle of a set of formulas,
* that can be circumvented by some tricks.
* For example,
* calculate tanh
* b = 2.0 / (1.0 + exp(-2 * a)) - 1.0
*
* If the result of (-2 * a) is too large,
* a FE_OVERFLOW exception occurs when calculating exp.
* But the result of tanh is no overflow problem,
* so we can add some tricks to prevent exp calculate an excessive value.
*
*/
#include <fenv.h>
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
#include "paddle/utils/Excepts.h"

using namespace paddle; // NOLINT

void SetTensorValue(Matrix& matrix, real value) {
int height = matrix.getHeight();
int width = matrix.getWidth();
int stride = matrix.getStride();
real* data = matrix.getData();
for (int i = 0; i < height; i++) {
int j = rand() % width; // NOLINT
if (typeid(matrix) == typeid(CpuMatrix)) {
data[i * stride + j] = value;
} else if (typeid(matrix) == typeid(GpuMatrix)) {
hl_memcpy(&data[i * stride + j], &value, sizeof(real));
} else {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

加一个LOG(FATAL) << "Unexpected branch"吧

LOG(FATAL) << "should not reach here";
}
}
}

template<typename Matrix>
void testTanh(real illegal) {
MatrixPtr A = std::make_shared<Matrix>(10, 10);
MatrixPtr B = std::make_shared<Matrix>(10, 10);
A->randomizeUniform();
B->randomizeUniform();

SetTensorValue(*A, illegal);

A->tanh(*B);
}

template<typename Matrix>
void testSigmoid(real illegal) {
MatrixPtr A = std::make_shared<Matrix>(10, 10);
MatrixPtr B = std::make_shared<Matrix>(10, 10);
A->randomizeUniform();
B->randomizeUniform();

SetTensorValue(*A, illegal);

A->sigmoid(*B);
}

TEST(fp, overflow) {
for (auto illegal : {-90.0, 90.0}) {
LOG(INFO) << " illegal=" << illegal;
testTanh<CpuMatrix>(illegal);
testSigmoid<CpuMatrix>(illegal);
}
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);

feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
return RUN_ALL_TESTS();
}