Skip to content

Commit

Permalink
Refactor AFT metric using GPU-CPU reducer
Browse files Browse the repository at this point in the history
  • Loading branch information
hcho3 committed May 27, 2020
1 parent 0d8d222 commit 610384f
Show file tree
Hide file tree
Showing 6 changed files with 315 additions and 192 deletions.
6 changes: 3 additions & 3 deletions src/common/survival_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ struct AFTLoss {
z_u = (log_y_upper - y_pred) / sigma;
cdf_u = Distribution::CDF(z_u);
}
if (std::isinf(y_lower)) { // left-censored
if (y_lower <= 0.0) { // left-censored
cdf_l = 0;
} else { // right-censored or interval-censored
z_l = (log_y_lower - y_pred) / sigma;
Expand Down Expand Up @@ -157,7 +157,7 @@ struct AFTLoss {
pdf_u = Distribution::PDF(z_u);
cdf_u = Distribution::CDF(z_u);
}
if (std::isinf(y_lower)) { // left-censored
if (y_lower <= 0.0) { // left-censored
pdf_l = 0;
cdf_l = 0;
censor_type = CensoringType::kLeftCensored;
Expand Down Expand Up @@ -209,7 +209,7 @@ struct AFTLoss {
cdf_u = Distribution::CDF(z_u);
grad_pdf_u = Distribution::GradPDF(z_u);
}
if (std::isinf(y_lower)) { // left-censored
if (y_lower <= 0.0) { // left-censored
pdf_l = 0;
cdf_l = 0;
grad_pdf_l = 0;
Expand Down
1 change: 1 addition & 0 deletions src/metric/metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ namespace metric {
// List of files that will be force linked in static links.
DMLC_REGISTRY_LINK_TAG(elementwise_metric);
DMLC_REGISTRY_LINK_TAG(multiclass_metric);
DMLC_REGISTRY_LINK_TAG(survival_metric);
DMLC_REGISTRY_LINK_TAG(rank_metric);
#ifdef XGBOOST_USE_CUDA
DMLC_REGISTRY_LINK_TAG(rank_metric_gpu);
Expand Down
187 changes: 5 additions & 182 deletions src/metric/survival_metric.cc
Original file line number Diff line number Diff line change
@@ -1,185 +1,8 @@
/*!
* Copyright 2019 by Contributors
* \file survival_metric.cc
* \brief Metrics for survival analysis
* \author Avinash Barnwal, Hyunsu Cho and Toby Hocking
* Copyright 2020 XGBoost contributors
*/
// Dummy file to keep the CUDA conditional compile trick.

#include <rabit/rabit.h>
#include <xgboost/metric.h>
#include <xgboost/host_device_vector.h>
#include <dmlc/registry.h>
#include <cmath>
#include <memory>
#include <vector>
#include <limits>

#include "xgboost/json.h"

#include "../common/math.h"
#include "../common/survival_util.h"

using AFTParam = xgboost::common::AFTParam;
using ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType;
template <typename Distribution>
using AFTLoss = xgboost::common::AFTLoss<Distribution>;

namespace xgboost {
namespace metric {
// tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(survival_metric);

struct EvalIntervalRegressionAccuracy : public Metric {
public:
explicit EvalIntervalRegressionAccuracy(const char* param) {}

bst_float Eval(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
bool distributed) override {
CHECK_NE(info.labels_lower_bound_.Size(), 0U)
<< "y_lower cannot be empty";
CHECK_NE(info.labels_upper_bound_.Size(), 0U)
<< "y_higher cannot be empty";
CHECK_EQ(preds.Size(), info.labels_lower_bound_.Size());
CHECK_EQ(preds.Size(), info.labels_upper_bound_.Size());

const auto& yhat = preds.HostVector();
const auto& y_lower = info.labels_lower_bound_.HostVector();
const auto& y_upper = info.labels_upper_bound_.HostVector();
const auto& weights = info.weights_.HostVector();
const bool is_null_weight = weights.empty();
CHECK_LE(yhat.size(), static_cast<size_t>(std::numeric_limits<omp_ulong>::max()))
<< "yhat is too big";
const omp_ulong nsize = static_cast<omp_ulong>(yhat.size());

double acc_sum = 0.0;
double weight_sum = 0.0;
#pragma omp parallel for \
firstprivate(nsize, is_null_weight) shared(weights, y_lower, y_upper, yhat) \
reduction(+:acc_sum, weight_sum)
for (omp_ulong i = 0; i < nsize; ++i) {
const double pred = std::exp(yhat[i]);
const double w = is_null_weight ? 1.0 : weights[i];
if (pred >= y_lower[i] && pred <= y_upper[i]) {
acc_sum += 1.0;
}
weight_sum += w;
}

double dat[2]{acc_sum, weight_sum};
if (distributed) {
rabit::Allreduce<rabit::op::Sum>(dat, 2);
}
return static_cast<bst_float>(dat[0] / dat[1]);
}

const char* Name() const override {
return "interval-regression-accuracy";
}
};

/*! \brief Negative log likelihood of Accelerated Failure Time model */
struct EvalAFT : public Metric {
public:
explicit EvalAFT(const char* param) {}

void Configure(const Args& args) override {
param_.UpdateAllowUnknown(args);
}

void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String(this->Name());
out["aft_loss_param"] = ToJson(param_);
}

void LoadConfig(Json const& in) override {
FromJson(in["aft_loss_param"], &param_);
}

template <typename Distribution>
inline void EvalImpl(
const std::vector<float>& weights, const std::vector<float>& y_lower,
const std::vector<float>& y_upper, const std::vector<float>& yhat,
omp_ulong nsize, bool is_null_weight, double aft_loss_distribution_scale,
double* out_nloglik_sum, double* out_weight_sum) {
double nloglik_sum = 0.0;
double weight_sum = 0.0;
#pragma omp parallel for \
shared(weights, y_lower, y_upper, yhat) reduction(+:nloglik_sum, weight_sum)
for (omp_ulong i = 0; i < nsize; ++i) {
// If weights are empty, data is unweighted so we use 1.0 everywhere
const double w = is_null_weight ? 1.0 : weights[i];
const double loss
= AFTLoss<Distribution>::Loss(y_lower[i], y_upper[i], yhat[i], aft_loss_distribution_scale);
nloglik_sum += loss;
weight_sum += w;
}
*out_nloglik_sum = nloglik_sum;
*out_weight_sum = weight_sum;
}

bst_float Eval(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
bool distributed) override {
CHECK_NE(info.labels_lower_bound_.Size(), 0U)
<< "y_lower cannot be empty";
CHECK_NE(info.labels_upper_bound_.Size(), 0U)
<< "y_higher cannot be empty";
CHECK_EQ(preds.Size(), info.labels_lower_bound_.Size());
CHECK_EQ(preds.Size(), info.labels_upper_bound_.Size());

/* Compute negative log likelihood for each data point and compute weighted average */
const auto& yhat = preds.HostVector();
const auto& y_lower = info.labels_lower_bound_.HostVector();
const auto& y_upper = info.labels_upper_bound_.HostVector();
const auto& weights = info.weights_.HostVector();
const bool is_null_weight = weights.empty();
const float aft_loss_distribution_scale = param_.aft_loss_distribution_scale;
CHECK_LE(yhat.size(), static_cast<size_t>(std::numeric_limits<omp_ulong>::max()))
<< "yhat is too big";
const omp_ulong nsize = static_cast<omp_ulong>(yhat.size());
double nloglik_sum, weight_sum;
switch (param_.aft_loss_distribution) {
case ProbabilityDistributionType::kNormal:
EvalImpl<common::NormalDistribution>(weights, y_lower, y_upper, yhat, nsize, is_null_weight,
aft_loss_distribution_scale, &nloglik_sum, &weight_sum);
break;
case ProbabilityDistributionType::kLogistic:
EvalImpl<common::LogisticDistribution>(weights, y_lower, y_upper, yhat, nsize, is_null_weight,
aft_loss_distribution_scale, &nloglik_sum,
&weight_sum);
break;
case ProbabilityDistributionType::kExtreme:
EvalImpl<common::ExtremeDistribution>(weights, y_lower, y_upper, yhat, nsize, is_null_weight,
aft_loss_distribution_scale, &nloglik_sum, &weight_sum);
break;
default:
LOG(FATAL) << "Unrecognized probability distribution type";
}

double dat[2]{nloglik_sum, weight_sum};
if (distributed) {
rabit::Allreduce<rabit::op::Sum>(dat, 2);
}
return static_cast<bst_float>(dat[0] / dat[1]);
}

const char* Name() const override {
return "aft-nloglik";
}

private:
AFTParam param_;
};

XGBOOST_REGISTER_METRIC(AFT, "aft-nloglik")
.describe("Negative log likelihood of Accelerated Failure Time model.")
.set_body([](const char* param) { return new EvalAFT(param); });

XGBOOST_REGISTER_METRIC(IntervalRegressionAccuracy, "interval-regression-accuracy")
.describe("")
.set_body([](const char* param) { return new EvalIntervalRegressionAccuracy(param); });

} // namespace metric
} // namespace xgboost
#if !defined(XGBOOST_USE_CUDA)
#include "survival_metric.cu"
#endif // !defined(XGBOOST_USE_CUDA)
Loading

0 comments on commit 610384f

Please sign in to comment.