Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refine Huber loss, add huber_regression_cost #3571

Merged
merged 12 commits into from
Aug 29, 2017
Merged
11 changes: 8 additions & 3 deletions doc/api/v2/config/layer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,14 @@ multi_binary_label_cross_entropy_cost
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
:noindex:

huber_cost
----------
.. autoclass:: paddle.v2.layer.huber_cost
huber_regression_cost
-------------------------
.. autoclass:: paddle.v2.layer.huber_regression_cost
:noindex:

huber_classification_cost
-------------------------
.. autoclass:: paddle.v2.layer.huber_classification_cost
:noindex:

lambda_cost
Expand Down
150 changes: 107 additions & 43 deletions paddle/gserver/layers/CostLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,13 +572,8 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output,
}
}

//
// Huber loss for robust 2-classes classification
//
REGISTER_LAYER(huber, HuberTwoClass);

bool HuberTwoClass::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
bool HuberCost::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
CostLayer::init(layerMap, parameterMap);
if (useGpu_) {
tmpCpuInput_.reserve(inputLayers_.size());
Expand All @@ -589,69 +584,138 @@ bool HuberTwoClass::init(const LayerMap& layerMap,
return true;
}

void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(
getInput(i), false, HPPL_STREAM_DEFAULT);
}
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
}
forwardImpIn(output, label, cost);
}

void HuberTwoClass::forwardImpIn(Matrix& output,
Argument& label,
Matrix& target) {
//
// Huber loss for robust regression.
//
REGISTER_LAYER(huber_regression, HuberRegressionLoss);

bool HuberRegressionLoss::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
HuberCost::init(layerMap, parameterMap);
delta_ = config_.delta();
return true;
}

void HuberRegressionLoss::forwardImp(Matrix& output,
Argument& label,
Matrix& target) {
HuberCost::forwardImp(output, label, target);
size_t numSamples = target.getHeight();
CHECK_EQ((*label.ids).getSize(), numSamples);
size_t dim = output.getWidth();
CHECK(label.value);
CHECK_EQ((*label.value).getHeight(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(output.getWidth(), (size_t)1);
CHECK_EQ(dim, (*label.value).getWidth());
CHECK_EQ(target.getWidth(), (size_t)1);

real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
std::vector<real> cost(numSamples);
real* lbl =
useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
std::vector<real> cost(numSamples, 0);
for (size_t i = 0; i < numSamples; ++i) {
int y = 2 * lbl[i] - 1;
if (out[i] * y < -1)
cost[i] = -4 * out[i] * y;
else if (out[i] * y < 1)
cost[i] = (1 - out[i] * y) * (1 - out[i] * y);
else
cost[i] = 0;
for (size_t j = 0; j < dim; ++j) {
int index = i * dim + j;
real a = std::abs(lbl[index] - out[index]);
if (a <= delta_)
cost[i] += a * a / 2;
else
cost[i] += delta_ * (a - delta_ / 2);
}
}
target.copyFrom(cost.data(), numSamples);
}

void HuberTwoClass::backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) {
if (useGpu_) {
backwardImpIn(
*tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad);
outputGrad.copyFrom(*tmpCpuInput_[0].grad);
} else {
backwardImpIn(outputValue, label, outputGrad);
void HuberRegressionLoss::backwardImp(Matrix& output,
Argument& label,
Matrix& outputG) {
size_t numSamples = output.getHeight();
size_t dim = output.getWidth();
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
real* lbl =
useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
for (size_t i = 0; i < numSamples; ++i) {
for (size_t j = 0; j < dim; ++j) {
int index = i * dim + j;
real a = lbl[index] - out[index];
if (std::abs(a) <= delta_)
grad[index] += -a;
else
grad[index] += a > 0 ? -delta_ : delta_;
}
}
if (useGpu_) outputG.copyFrom(grad, numSamples * dim);
}

void HuberTwoClass::backwardImpIn(Matrix& output,
Argument& label,
Matrix& outputG) {
size_t numSamples = output.getHeight();
real* out = output.getData();
real* grad = outputG.getData();
int* lbl = (*label.ids).getData();
//
// Huber loss for robust 2-classes classification
//
REGISTER_LAYER(huber_classification, HuberTwoClassification);

bool HuberTwoClassification::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
return HuberCost::init(layerMap, parameterMap);
}

void HuberTwoClassification::forwardImp(Matrix& output,
Argument& label,
Matrix& target) {
HuberCost::forwardImp(output, label, target);
size_t numSamples = target.getHeight();
size_t dim = output.getWidth();
CHECK(label.ids);
CHECK_EQ((*label.ids).getSize(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(target.getWidth(), (size_t)1);

real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
std::vector<real> cost(numSamples, 0);
for (size_t i = 0; i < numSamples; ++i) {
int y = 2 * lbl[i] - 1;
if (y * out[i] < -1)
grad[i] += -4 * y;
else if (y * out[i] < 1)
grad[i] += -2 * (1 - y * out[i]) * y;
for (size_t j = 0; j < dim; ++j) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huber loss 针对分类问题的这种变种只会有一维输出,不应该出现多维。分类问题不需要修改为多维。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done,已经改成1维了。

int index = i * dim + j;
real a = out[index] * y;
if (a < -1)
cost[i] += -4 * a;
else if (a < 1)
cost[i] += (1 - a) * (1 - a);
}
}
target.copyFrom(cost.data(), numSamples);
}

void HuberTwoClassification::backwardImp(Matrix& output,
Argument& label,
Matrix& outputG) {
size_t numSamples = output.getHeight();
size_t dim = output.getWidth();
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
for (size_t i = 0; i < numSamples; ++i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

forwardImp ,多维情况下应该会有问题,可能因为forwardbackward中取数据用了相同的方式所以test_LayerGrad可以通过。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

int y = 2 * lbl[i] - 1;
for (size_t j = 0; j < dim; ++j) {
int index = i * dim + j;
real a = out[index] * y;
if (a < -1)
grad[index] += -4 * y;
else if (a < 1)
grad[index] += -2 * (1 - a) * y;
}
}
if (useGpu_) outputG.copyFrom(grad, numSamples * dim);
}
/**
* This cost layer compute the sum of its input as loss.
* \f[
Expand Down
65 changes: 48 additions & 17 deletions paddle/gserver/layers/CostLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,37 +304,68 @@ class MultiBinaryLabelCrossEntropy : public CostLayer {
Matrix& outputGrad) override;
};

/**
* Huber loss for robust 2-classes classification.
*
* For label={0, 1}, let y=2*label-1. Given output f, the loss is:
* \f[
* Loss =
* \left\{\begin{matrix}
* 4 * y * f & \textit{if} \ \ y* f < -1 \\
* (1 - y * f)^2 & \textit{if} \ \ -1 < y * f < 1 \\
* 0 & \textit{otherwise}
* \end{matrix}\right.
* \f]
/*
* A base layer for HuberRegressionLoss and HuberTwoClassification.
*/
class HuberTwoClass : public CostLayer {
class HuberCost : public CostLayer {
public:
std::vector<Argument> tmpCpuInput_;

public:
explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {}
explicit HuberCost(const LayerConfig& config) : CostLayer(config) {}

bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;

void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

void forwardImpIn(Matrix& output, Argument& label, Matrix& cost);
void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
};

/**
* Huber loss for robust regression.
*
* Given output f(x), label y and delta, the loss is:
* Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\
* Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise
*/
class HuberRegressionLoss : public HuberCost {
public:
explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {}

bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;

void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

void backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) override;

void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad);
protected:
real delta_;
};

/**
* Huber loss for robust 2-classes classification.
*
* For label={0, 1}, let y=2*label-1. Given output f(x), the loss is:
* Loss = 4 * y * f, if y* f < -1 \\
* Loss = (1 - y * f)^2, if -1 < y * f < 1 \\
* Loss = 0, otherwise
*/
class HuberTwoClassification : public HuberCost {
public:
explicit HuberTwoClassification(const LayerConfig& config)
: HuberCost(config) {}

bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;

void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

void backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) override;
};

typedef std::shared_ptr<CostLayer> CostLayerPtr;
Expand Down
22 changes: 20 additions & 2 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -850,9 +850,27 @@ TEST(Layer, square_error_weighted) {
}
}

TEST(Layer, huber_regression_loss) {
TestConfig config;
config.layerConfig.set_type("huber_regression");
config.biasSize = 0;

config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();

for (auto useGpu : {false, true}) {
for (auto delta : {1, 3, 5}) {
config.layerConfig.set_delta(delta);
testLayerGrad(config, "huber_regression", 100, /* trans */ false, useGpu);
}
}
}

TEST(Layer, huber_two_class) {
TestConfig config;
config.layerConfig.set_type("huber");
config.layerConfig.set_type("huber_classification");
config.biasSize = 0;

config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
Expand All @@ -861,7 +879,7 @@ TEST(Layer, huber_two_class) {
config.layerConfig.add_inputs();

for (auto useGpu : {false, true}) {
testLayerGrad(config, "huber", 100, /* trans */ false, useGpu);
testLayerGrad(config, "huber_two_class", 100, /* trans */ false, useGpu);
}
}

Expand Down
3 changes: 3 additions & 0 deletions proto/ModelConfig.proto
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,9 @@ message LayerConfig {
optional int32 axis = 54 [ default = 2 ];
repeated uint32 offset = 55;
repeated uint32 shape = 56;

// for HuberRegressionLoss
optional double delta = 57 [ default = 1.0 ];
}

message EvaluatorConfig {
Expand Down
13 changes: 12 additions & 1 deletion python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2274,7 +2274,7 @@ def init(cls, name, inputs, device=None, coeff=1.):
define_cost('SumOfSquaresCostLayer', 'square_error')
define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost('HuberTwoClass', 'huber')
define_cost('HuberTwoClassification', 'huber_classification')
define_cost('SumCost', 'sum_cost')
define_cost('SmoothL1Cost', 'smooth_l1')

Expand Down Expand Up @@ -2336,6 +2336,17 @@ def __init__(self, name, inputs, NDCG_num=5, max_sort_size=-1, device=None):
self.config.max_sort_size = max_sort_size


@config_layer('huber_regression')
class HuberRegressionLoss(LayerBase):
def __init__(self, name, inputs, delta=1., coeff=1., device=None):
super(HuberRegressionLoss, self).__init__(
name, 'huber_regression', 1, inputs=inputs, device=device)
config_assert(
len(self.inputs) == 2, 'HuberRegression must have 2 inputs')
self.config.delta = delta
self.config.coeff = coeff


@config_layer('nce')
class NCELayer(LayerBase):
def __init__(self,
Expand Down
Loading