Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refine Huber loss, add huber_regression_cost #3571

Merged
merged 12 commits into from
Aug 29, 2017
Merged
11 changes: 8 additions & 3 deletions doc/api/v2/config/layer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,14 @@ multi_binary_label_cross_entropy_cost
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
:noindex:

huber_cost
----------
.. autoclass:: paddle.v2.layer.huber_cost
huber_regression_cost
-------------------------
.. autoclass:: paddle.v2.layer.huber_regression_cost
:noindex:

huber_classification_cost
-------------------------
.. autoclass:: paddle.v2.layer.huber_classification_cost
:noindex:

lambda_cost
Expand Down
139 changes: 98 additions & 41 deletions paddle/gserver/layers/CostLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,13 +572,8 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output,
}
}

//
// Huber loss for robust 2-classes classification
//
REGISTER_LAYER(huber, HuberTwoClass);

bool HuberTwoClass::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
bool HuberCost::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
CostLayer::init(layerMap, parameterMap);
if (useGpu_) {
tmpCpuInput_.reserve(inputLayers_.size());
Expand All @@ -589,69 +584,131 @@ bool HuberTwoClass::init(const LayerMap& layerMap,
return true;
}

void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
forwardImpIn(output, label, cost);
}

void HuberTwoClass::forwardImpIn(Matrix& output,
Argument& label,
Matrix& target) {
//
// Huber loss for robust regression.
//
REGISTER_LAYER(huber_regression, HuberRegressionLoss);

bool HuberRegressionLoss::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
HuberCost::init(layerMap, parameterMap);
delta_ = config_.delta();
return true;
}

void HuberRegressionLoss::forwardImp(Matrix& output,
Argument& label,
Matrix& target) {
HuberCost::forwardImp(output, label, target);
size_t numSamples = target.getHeight();
size_t dim = output.getWidth();
CHECK(label.value);
CHECK_EQ((*label.value).getHeight(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(dim, (*label.value).getWidth());
CHECK_EQ(target.getWidth(), (size_t)1);

real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
real* lbl =
useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
std::vector<real> cost(numSamples, 0);
for (size_t i = 0; i < numSamples; ++i) {
for (size_t j = 0; j < dim; ++j) {
int index = i * dim + j;
real a = std::abs(lbl[index] - out[index]);
if (a <= delta_)
cost[i] += a * a / 2;
else
cost[i] += delta_ * (a - delta_ / 2);
}
}
target.copyFrom(cost.data(), numSamples);
}

void HuberRegressionLoss::backwardImp(Matrix& output,
Argument& label,
Matrix& outputG) {
size_t numSamples = output.getHeight();
size_t dim = output.getWidth();
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
real* lbl =
useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
for (size_t i = 0; i < numSamples; ++i) {
for (size_t j = 0; j < dim; ++j) {
int index = i * dim + j;
real a = lbl[index] - out[index];
if (std::abs(a) <= delta_)
grad[index] += -a;
else
grad[index] += a > 0 ? -delta_ : delta_;
}
}
if (useGpu_) outputG.copyFrom(grad, numSamples * dim);
}

//
// Huber loss for robust 2-classes classification
//
REGISTER_LAYER(huber_classification, HuberTwoClassification);

bool HuberTwoClassification::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
return HuberCost::init(layerMap, parameterMap);
}

void HuberTwoClassification::forwardImp(Matrix& output,
Argument& label,
Matrix& target) {
HuberCost::forwardImp(output, label, target);
size_t numSamples = target.getHeight();
CHECK(label.ids);
CHECK_EQ((*label.ids).getSize(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(output.getWidth(), (size_t)1);
CHECK_EQ(target.getWidth(), (size_t)1);

real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
std::vector<real> cost(numSamples);
std::vector<real> cost(numSamples, 0);
for (size_t i = 0; i < numSamples; ++i) {
int y = 2 * lbl[i] - 1;
if (out[i] * y < -1)
cost[i] = -4 * out[i] * y;
else if (out[i] * y < 1)
cost[i] = (1 - out[i] * y) * (1 - out[i] * y);
else
cost[i] = 0;
real a = out[i] * y;
if (a < -1)
cost[i] = -4 * a;
else if (a < 1)
cost[i] = (1 - a) * (1 - a);
}
target.copyFrom(cost.data(), numSamples);
}

void HuberTwoClass::backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) {
if (useGpu_) {
backwardImpIn(
*tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad);
outputGrad.copyFrom(*tmpCpuInput_[0].grad);
} else {
backwardImpIn(outputValue, label, outputGrad);
}
}

void HuberTwoClass::backwardImpIn(Matrix& output,
Argument& label,
Matrix& outputG) {
void HuberTwoClassification::backwardImp(Matrix& output,
Argument& label,
Matrix& outputG) {
size_t numSamples = output.getHeight();
real* out = output.getData();
real* grad = outputG.getData();
int* lbl = (*label.ids).getData();
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
for (size_t i = 0; i < numSamples; ++i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

forwardImp ,多维情况下应该会有问题,可能因为forwardbackward中取数据用了相同的方式所以test_LayerGrad可以通过。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

int y = 2 * lbl[i] - 1;
if (y * out[i] < -1)
real a = out[i] * y;
if (a < -1)
grad[i] += -4 * y;
else if (y * out[i] < 1)
grad[i] += -2 * (1 - y * out[i]) * y;
else if (a < 1)
grad[i] += -2 * (1 - a) * y;
}
if (useGpu_) outputG.copyFrom(grad, numSamples);
}

/**
* This cost layer compute the sum of its input as loss.
* \f[
Expand Down
65 changes: 48 additions & 17 deletions paddle/gserver/layers/CostLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,37 +304,68 @@ class MultiBinaryLabelCrossEntropy : public CostLayer {
Matrix& outputGrad) override;
};

/**
* Huber loss for robust 2-classes classification.
*
* For label={0, 1}, let y=2*label-1. Given output f, the loss is:
* \f[
* Loss =
* \left\{\begin{matrix}
* 4 * y * f & \textit{if} \ \ y* f < -1 \\
* (1 - y * f)^2 & \textit{if} \ \ -1 < y * f < 1 \\
* 0 & \textit{otherwise}
* \end{matrix}\right.
* \f]
/*
* A base layer for HuberRegressionLoss and HuberTwoClassification.
*/
class HuberTwoClass : public CostLayer {
class HuberCost : public CostLayer {
public:
std::vector<Argument> tmpCpuInput_;

public:
explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {}
explicit HuberCost(const LayerConfig& config) : CostLayer(config) {}

bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;

void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

void forwardImpIn(Matrix& output, Argument& label, Matrix& cost);
void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
};

/**
* Huber loss for robust regression.
*
* Given output f(x), label y and delta, the loss is:
* Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\
* Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise
*/
class HuberRegressionLoss : public HuberCost {
public:
explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {}

bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;

void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

void backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) override;

void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad);
protected:
real delta_;
};

/**
* Huber loss for robust 2-classes classification.
*
* For label={0, 1}, let y=2*label-1. Given output f(x), the loss is:
* Loss = 4 * y * f, if y* f < -1 \\
* Loss = (1 - y * f)^2, if -1 < y * f < 1 \\
* Loss = 0, otherwise
*/
class HuberTwoClassification : public HuberCost {
public:
explicit HuberTwoClassification(const LayerConfig& config)
: HuberCost(config) {}

bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;

void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

void backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) override;
};

typedef std::shared_ptr<CostLayer> CostLayerPtr;
Expand Down
22 changes: 20 additions & 2 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -850,9 +850,27 @@ TEST(Layer, square_error_weighted) {
}
}

TEST(Layer, huber_regression_loss) {
TestConfig config;
config.layerConfig.set_type("huber_regression");
config.biasSize = 0;

config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();

for (auto useGpu : {false, true}) {
for (auto delta : {1, 3, 5}) {
config.layerConfig.set_delta(delta);
testLayerGrad(config, "huber_regression", 100, /* trans */ false, useGpu);
}
}
}

TEST(Layer, huber_two_class) {
TestConfig config;
config.layerConfig.set_type("huber");
config.layerConfig.set_type("huber_classification");
config.biasSize = 0;

config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
Expand All @@ -861,7 +879,7 @@ TEST(Layer, huber_two_class) {
config.layerConfig.add_inputs();

for (auto useGpu : {false, true}) {
testLayerGrad(config, "huber", 100, /* trans */ false, useGpu);
testLayerGrad(config, "huber_two_class", 100, /* trans */ false, useGpu);
}
}

Expand Down
3 changes: 3 additions & 0 deletions proto/ModelConfig.proto
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,9 @@ message LayerConfig {
optional int32 axis = 54 [ default = 2 ];
repeated uint32 offset = 55;
repeated uint32 shape = 56;

// for HuberRegressionLoss
optional double delta = 57 [ default = 1.0 ];
}

message EvaluatorConfig {
Expand Down
13 changes: 12 additions & 1 deletion python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2274,7 +2274,7 @@ def init(cls, name, inputs, device=None, coeff=1.):
define_cost('SumOfSquaresCostLayer', 'square_error')
define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost('HuberTwoClass', 'huber')
define_cost('HuberTwoClassification', 'huber_classification')
define_cost('SumCost', 'sum_cost')
define_cost('SmoothL1Cost', 'smooth_l1')

Expand Down Expand Up @@ -2336,6 +2336,17 @@ def __init__(self, name, inputs, NDCG_num=5, max_sort_size=-1, device=None):
self.config.max_sort_size = max_sort_size


@config_layer('huber_regression')
class HuberRegressionLoss(LayerBase):
def __init__(self, name, inputs, delta=1., coeff=1., device=None):
super(HuberRegressionLoss, self).__init__(
name, 'huber_regression', 1, inputs=inputs, device=device)
config_assert(
len(self.inputs) == 2, 'HuberRegression must have 2 inputs')
self.config.delta = delta
self.config.coeff = coeff


@config_layer('nce')
class NCELayer(LayerBase):
def __init__(self,
Expand Down
Loading