Skip to content

Commit

Permalink
fix bug in missing="separate"
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbkoch committed Dec 27, 2024
1 parent 4171091 commit d1d5a9b
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 30 deletions.
42 changes: 16 additions & 26 deletions shared/libebm/PartitionOneDimensionalBoosting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -896,37 +896,29 @@ template<bool bHessian, size_t cCompilerScores> class PartitionOneDimensionalBoo
const TreeNode<bHessian, GetArrayScores(cCompilerScores)>* pMissingValueTreeNode = nullptr;

const auto* aSumBins = aBins;
if(bNominal) {
if(TermBoostFlags_MissingCategory & flags) {
// Nothing to do. Treat missing like any other category.
} else {
if(bMissing) {
if(bMissing) {
if(bNominal) {
if(TermBoostFlags_MissingCategory & flags) {
// Nothing to do. Treat missing like any other category.
} else {
pMissingValueTreeNode = pRootTreeNode;
// Skip the missing bin in the pointer to pointer mapping since it will not be part of the continuous
// region.
pBin = IndexBin(pBin, cBytesPerBin);
}
}
} else {
if(TermBoostFlags_MissingLow & flags) {
if(bMissing) {
} else {
if(TermBoostFlags_MissingLow & flags) {
pMissingBin = pBin;
}
} else if(TermBoostFlags_MissingHigh & flags) {
if(bMissing) {
} else if(TermBoostFlags_MissingHigh & flags) {
pMissingBin = pBin;
pBin = IndexBin(pBin, cBytesPerBin);
}
} else if(TermBoostFlags_MissingSeparate & flags) {
cSamplesTotal -= aSumBins->GetCountSamples();
weightTotal -= aSumBins->GetWeight();
aSumBins = IndexBin(aSumBins, cBytesPerBin);
} else if(TermBoostFlags_MissingSeparate & flags) {
cSamplesTotal -= aSumBins->GetCountSamples();
weightTotal -= aSumBins->GetWeight();
aSumBins = IndexBin(aSumBins, cBytesPerBin);

if(bMissing) {
pBin = IndexBin(pBin, cBytesPerBin);
}
} else {
if(bMissing) {
} else {
pMissingValueTreeNode = pRootTreeNode;
// Skip the missing bin in the pointer to pointer mapping since it will not be part of the continuous
// region.
Expand All @@ -944,11 +936,9 @@ template<bool bHessian, size_t cCompilerScores> class PartitionOneDimensionalBoo
++ppBin;
} while(pBinsEnd != pBin);

if(TermBoostFlags_MissingHigh & flags) {
if(bMissing && !bNominal) {
*ppBin = aBins;
++ppBin;
}
if(bMissing && !bNominal && (TermBoostFlags_MissingHigh & flags)) {
*ppBin = aBins;
++ppBin;
}

if(bNominal) {
Expand Down
59 changes: 55 additions & 4 deletions shared/libebm/tests/boosting_unusual_inputs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2061,6 +2061,29 @@ TEST_CASE("lossguide, boosting, regression") {
CHECK_APPROX(termScore, 0.40592050000000002);
}

TEST_CASE("missing separate continuous with non-missing data, boosting, regression") {
TestBoost test = TestBoost(Task_Regression,
{FeatureTest(4, false, false, false)},
{{0}},
{
TestSample({1}, 20.0),
TestSample({2}, 30.0),
},
{TestSample({1}, 20.0)});

// boost continuous missing separate
double validationMetric = test.Boost(0, TermBoostFlags_MissingSeparate, 1.0).validationMetric;
CHECK_APPROX(validationMetric, 0.0);

double termScore;
termScore = test.GetCurrentTermScore(0, {0}, 0);
CHECK_APPROX(termScore, 20.0);
termScore = test.GetCurrentTermScore(0, {1}, 0);
CHECK_APPROX(termScore, 20.0);
termScore = test.GetCurrentTermScore(0, {2}, 0);
CHECK_APPROX(termScore, 30.0);
}

TEST_CASE("missing separate continuous, boosting, regression") {
TestBoost test = TestBoost(Task_Regression,
{FeatureTest(4, true, false, false)},
Expand All @@ -2073,12 +2096,40 @@ TEST_CASE("missing separate continuous, boosting, regression") {
{TestSample({1}, 20.0)});

// boost continuous missing separate
double validationMetric = test.Boost(0, TermBoostFlags_MissingSeparate).validationMetric;
CHECK_APPROX(validationMetric, 392.04000000000002);
double validationMetric = test.Boost(0, TermBoostFlags_MissingSeparate, 1.0).validationMetric;
CHECK_APPROX(validationMetric, 0.0);

double termScore;
termScore = test.GetCurrentTermScore(0, {0}, 0);
CHECK_APPROX(termScore, 0.10000000000000001);
CHECK_APPROX(termScore, 10.0);
termScore = test.GetCurrentTermScore(0, {1}, 0);
CHECK_APPROX(termScore, 20.0);
termScore = test.GetCurrentTermScore(0, {2}, 0);
CHECK_APPROX(termScore, 30.0);
}

TEST_CASE("missing category nominal, boosting, regression") {
TestBoost test = TestBoost(Task_Regression,
{FeatureTest(4, true, false, true)},
{{0}},
{
TestSample({0}, 10.0),
TestSample({1}, 20.0),
TestSample({2}, 30.0),
},
{TestSample({1}, 20.0)});

// boost nominal missing separate
double validationMetric = test.Boost(0, TermBoostFlags_MissingCategory, 1.0).validationMetric;
CHECK_APPROX(validationMetric, 0.0);

double termScore;
termScore = test.GetCurrentTermScore(0, {0}, 0);
CHECK_APPROX(termScore, 10.0);
termScore = test.GetCurrentTermScore(0, {1}, 0);
CHECK_APPROX(termScore, 20.0);
termScore = test.GetCurrentTermScore(0, {2}, 0);
CHECK_APPROX(termScore, 30.0);
}

static double RandomizedTesting(const AccelerationFlags acceleration) {
Expand Down Expand Up @@ -2193,7 +2244,7 @@ static double RandomizedTesting(const AccelerationFlags acceleration) {
}

TEST_CASE("stress test, boosting") {
const double expected = 16554721767106.137;
const double expected = 15052328055998.955;

double validationMetricExact = RandomizedTesting(AccelerationFlags_NONE);
CHECK(validationMetricExact == expected);
Expand Down

0 comments on commit d1d5a9b

Please sign in to comment.