Skip to content

Commit

Permalink
make exact vs SIMD random test more maintainable
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbkoch committed Dec 24, 2024
1 parent aa03dbe commit 745c561
Showing 1 changed file with 15 additions and 79 deletions.
94 changes: 15 additions & 79 deletions shared/libebm/tests/boosting_unusual_inputs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2061,12 +2061,12 @@ TEST_CASE("lossguide, boosting, regression") {
CHECK_APPROX(termScore, 0.40592050000000002);
}

TEST_CASE("stress test, boosting") {
auto rng = MakeRng(0);
AccelerationFlags acceleration = AccelerationFlags_NONE;

static double RandomizedTesting(const AccelerationFlags acceleration) {
const IntEbm cTrainSamples = 211; // have some non-SIMD residuals
const IntEbm cValidationSamples = 101; // have some non-SIMD residuals
const size_t cRounds = 200;

auto rng = MakeRng(0);
const std::vector<FeatureTest> features = {
FeatureTest(10, false, false, false),
FeatureTest(10, false, false, true),
Expand All @@ -2089,7 +2089,7 @@ TEST_CASE("stress test, boosting") {
if(4 <= features.size()) {
// terms.push_back({0, 1, 2, 3}); // TODO: enable when fast enough
}
const size_t cRounds = 200;

std::vector<IntEbm> boostFlagsAny{TermBoostFlags_PurifyGain,
TermBoostFlags_DisableNewtonGain,
TermBoostFlags_DisableCategorical,
Expand All @@ -2104,7 +2104,6 @@ TEST_CASE("stress test, boosting") {
TermBoostFlags_MissingDrop};

double validationMetric = 1.0;

for(IntEbm classesCount = Task_Regression; classesCount < 5; ++classesCount) {
if(classesCount != Task_Regression && classesCount < 1) {
continue;
Expand Down Expand Up @@ -2164,86 +2163,23 @@ TEST_CASE("stress test, boosting") {
}
}
if(classesCount == 1) {
CHECK(std::numeric_limits<double>::infinity() == validationMetricIteration);
if(std::numeric_limits<double>::infinity() != validationMetricIteration) {
return -std::numeric_limits<double>::infinity();
}
} else {
validationMetric *= validationMetricIteration;
}
}
}
return validationMetric;
}

TEST_CASE("stress test, boosting") {
const double expected = 26838942758406.215;
CHECK(validationMetric == expected);

// Now redo the above, but using acceleration. Results will likely be slightly different but similar.
rng = MakeRng(0);
acceleration = AccelerationFlags_ALL;
validationMetric = 1.0;

for(IntEbm classesCount = Task_Regression; classesCount < 5; ++classesCount) {
if(classesCount != Task_Regression && classesCount < 1) {
continue;
}
const auto train = MakeRandomDataset(rng, classesCount, cTrainSamples, features);
const auto validation = MakeRandomDataset(rng, classesCount, cValidationSamples, features);
for(IntEbm innerBagCount = 0; innerBagCount < 3; ++innerBagCount) {
TestBoost test = TestBoost(classesCount,
features,
terms,
train,
validation,
innerBagCount,
k_testCreateBoosterFlags_Default,
acceleration);

double validationMetricIteration = 0.0;
for(size_t iRound = 0; iRound < cRounds; ++iRound) {
for(IntEbm iTerm = 0; iTerm < static_cast<IntEbm>(terms.size()); ++iTerm) {
const IntEbm cRealBins = features[terms[iTerm][0]].CountRealBins();
const IntEbm cDimensions = terms[iTerm].size();

const TermBoostFlags boostFlags =
static_cast<TermBoostFlags>(ChooseAny(rng, boostFlagsAny) | ChooseFrom(rng, boostFlagsChoose));

const double learningRate = 0.015625;
const IntEbm minSamplesLeaf = TestRand(rng, 5) + 1;
const double minHessian = 0 == TestRand(rng, 5) ? 0.015625 : 0.0;
const double regAlpha = 0 == TestRand(rng, 5) ? 0.015625 : 0.0;
const double regLambda = 0 == TestRand(rng, 5) ? 0.015625 : 0.0;
const double maxDeltaStep = 0 == TestRand(rng, 5) ? 1.0 : 0.0;
const double categoricalSmoothing = 10.0;
const IntEbm maxCategoricalThreshold = 1 + TestRand(rng, cRealBins + 1);
const double categoricalInclusionPercent = 0 == TestRand(rng, 2) ? 0.75 : 1.0;

// we allow 1 cut more than the number of bins to test excessive leaves.
const IntEbm cLeaves = 1 + TestRand(rng, cRealBins + 1);
const std::vector<IntEbm> leaves(cDimensions, cLeaves);
const MonotoneDirection direction =
0 == TestRand(rng, 5) ? static_cast<MonotoneDirection>(TestRand(rng, 2) * 2 - 1) : 0;
const std::vector<MonotoneDirection> monotonicity(cDimensions, direction);

validationMetricIteration = test.Boost(iTerm,
boostFlags,
learningRate,
minSamplesLeaf,
minHessian,
regAlpha,
regLambda,
maxDeltaStep,
categoricalSmoothing,
maxCategoricalThreshold,
categoricalInclusionPercent,
leaves,
monotonicity)
.validationMetric;
}
}
if(classesCount == 1) {
CHECK(std::numeric_limits<double>::infinity() == validationMetricIteration);
} else {
validationMetric *= validationMetricIteration;
}
}
}
double validationMetricExact = RandomizedTesting(AccelerationFlags_NONE);
CHECK(validationMetricExact == expected);

CHECK_APPROX_TOLERANCE(validationMetric, expected, 1e-2);
double validationMetricSIMD = RandomizedTesting(AccelerationFlags_ALL);
CHECK_APPROX_TOLERANCE(validationMetricSIMD, expected, 1e-2);
}

0 comments on commit 745c561

Please sign in to comment.