Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: make crypto.timingSafeEqual test less flaky #8456

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions test/fixtures/crypto-timing-safe-equal-benchmark-func.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
'use strict';

const assert = require('assert');
module.exports = (compareFunc, firstBufFill, secondBufFill, bufSize) => {
const firstBuffer = Buffer.alloc(bufSize, firstBufFill);
const secondBuffer = Buffer.alloc(bufSize, secondBufFill);

const startTime = process.hrtime();
const result = compareFunc(firstBuffer, secondBuffer);
const endTime = process.hrtime(startTime);

// Ensure that the result of the function call gets used, so it doesn't
// get discarded due to engine optimizations.
assert.strictEqual(result, firstBufFill === secondBufFill);

return endTime[0] * 1e9 + endTime[1];
};
123 changes: 123 additions & 0 deletions test/sequential/test-crypto-timing-safe-equal-benchmarks.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
'use strict';
const common = require('../common');
const assert = require('assert');

if (!common.hasCrypto) {
common.skip('missing crypto');
return;
}

if (!common.enoughTestMem) {
common.skip('memory-intensive test');
return;
}

const crypto = require('crypto');

const BENCHMARK_FUNC_PATH =
`${common.fixturesDir}/crypto-timing-safe-equal-benchmark-func`;
function runOneBenchmark(...args) {
const benchmarkFunc = require(BENCHMARK_FUNC_PATH);
const result = benchmarkFunc(...args);

// Don't let the comparison function get cached. This avoid a timing
// inconsistency due to V8 optimization where the function would take
// less time when called with a specific set of parameters.
delete require.cache[require.resolve(BENCHMARK_FUNC_PATH)];
return result;
}

function getTValue(compareFunc) {
const numTrials = 10000;
const bufSize = 10000;
// Perform benchmarks to verify that timingSafeEqual is actually timing-safe.

const rawEqualBenches = Array(numTrials);
const rawUnequalBenches = Array(numTrials);

for (let i = 0; i < numTrials; i++) {
if (Math.random() < 0.5) {
// First benchmark: comparing two equal buffers
rawEqualBenches[i] = runOneBenchmark(compareFunc, 'A', 'A', bufSize);
// Second benchmark: comparing two unequal buffers
rawUnequalBenches[i] = runOneBenchmark(compareFunc, 'B', 'C', bufSize);
} else {
// Flip the order of the benchmarks half of the time.
rawUnequalBenches[i] = runOneBenchmark(compareFunc, 'B', 'C', bufSize);
rawEqualBenches[i] = runOneBenchmark(compareFunc, 'A', 'A', bufSize);
}
}

const equalBenches = filterOutliers(rawEqualBenches);
const unequalBenches = filterOutliers(rawUnequalBenches);

// Use a two-sample t-test to determine whether the timing difference between
// the benchmarks is statistically significant.
// https://wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test

const equalMean = mean(equalBenches);
const unequalMean = mean(unequalBenches);

const equalLen = equalBenches.length;
const unequalLen = unequalBenches.length;

const combinedStd = combinedStandardDeviation(equalBenches, unequalBenches);
const standardErr = combinedStd * Math.sqrt(1 / equalLen + 1 / unequalLen);

return (equalMean - unequalMean) / standardErr;
}

// Returns the mean of an array
function mean(array) {
return array.reduce((sum, val) => sum + val, 0) / array.length;
}

// Returns the sample standard deviation of an array
function standardDeviation(array) {
const arrMean = mean(array);
const total = array.reduce((sum, val) => sum + Math.pow(val - arrMean, 2), 0);
return Math.sqrt(total / (array.length - 1));
}

// Returns the common standard deviation of two arrays
function combinedStandardDeviation(array1, array2) {
const sum1 = Math.pow(standardDeviation(array1), 2) * (array1.length - 1);
const sum2 = Math.pow(standardDeviation(array2), 2) * (array2.length - 1);
return Math.sqrt((sum1 + sum2) / (array1.length + array2.length - 2));
}

// Filter large outliers from an array. A 'large outlier' is a value that is at
// least 50 times larger than the mean. This prevents the tests from failing
// due to the standard deviation increase when a function unexpectedly takes
// a very long time to execute.
function filterOutliers(array) {
const arrMean = mean(array);
return array.filter((value) => value / arrMean < 50);
}

// t_(0.99995, ∞)
// i.e. If a given comparison function is indeed timing-safe, the t-test result
// has a 99.99% chance to be below this threshold. Unfortunately, this means
// that this test will be a bit flakey and will fail 0.01% of the time even if
// crypto.timingSafeEqual is working properly.
// t-table ref: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf
// Note that in reality there are roughly `2 * numTrials - 2` degrees of
// freedom, not ∞. However, assuming `numTrials` is large, this doesn't
// significantly affect the threshold.
const T_THRESHOLD = 3.892;

const t = getTValue(crypto.timingSafeEqual);
assert(
Math.abs(t) < T_THRESHOLD,
`timingSafeEqual should not leak information from its execution time (t=${t})`
);

// As a sanity check to make sure the statistical tests are working, run the
// same benchmarks again, this time with an unsafe comparison function. In this
// case the t-value should be above the threshold.
const unsafeCompare = (bufA, bufB) => bufA.equals(bufB);
const t2 = getTValue(unsafeCompare);
assert(
Math.abs(t2) > T_THRESHOLD,
`Buffer#equals should leak information from its execution time (t=${t2})`
);
132 changes: 0 additions & 132 deletions test/sequential/test-crypto-timing-safe-equal.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,135 +32,3 @@ assert.throws(function() {
assert.throws(function() {
crypto.timingSafeEqual(Buffer.from([1, 2]), 'not a buffer');
}, 'should throw if the second argument is not a buffer');

function getTValue(compareFunc) {
const numTrials = 10000;
const testBufferSize = 10000;
// Perform benchmarks to verify that timingSafeEqual is actually timing-safe.

const rawEqualBenches = Array(numTrials);
const rawUnequalBenches = Array(numTrials);

for (let i = 0; i < numTrials; i++) {

// The `runEqualBenchmark` and `runUnequalBenchmark` functions are
// intentionally redefined on every iteration of this loop, to avoid
// timing inconsistency.
function runEqualBenchmark(compareFunc, bufferA, bufferB) {
const startTime = process.hrtime();
const result = compareFunc(bufferA, bufferB);
const endTime = process.hrtime(startTime);

// Ensure that the result of the function call gets used, so it doesn't
// get discarded due to engine optimizations.
assert.strictEqual(result, true);
return endTime[0] * 1e9 + endTime[1];
}

// This is almost the same as the runEqualBenchmark function, but it's
// duplicated to avoid timing issues with V8 optimization/inlining.
function runUnequalBenchmark(compareFunc, bufferA, bufferB) {
const startTime = process.hrtime();
const result = compareFunc(bufferA, bufferB);
const endTime = process.hrtime(startTime);

assert.strictEqual(result, false);
return endTime[0] * 1e9 + endTime[1];
}

if (i % 2) {
const bufferA1 = Buffer.alloc(testBufferSize, 'A');
const bufferB = Buffer.alloc(testBufferSize, 'B');
const bufferA2 = Buffer.alloc(testBufferSize, 'A');
const bufferC = Buffer.alloc(testBufferSize, 'C');

// First benchmark: comparing two equal buffers
rawEqualBenches[i] = runEqualBenchmark(compareFunc, bufferA1, bufferA2);

// Second benchmark: comparing two unequal buffers
rawUnequalBenches[i] = runUnequalBenchmark(compareFunc, bufferB, bufferC);
} else {
// Swap the order of the benchmarks every second iteration, to avoid any
// patterns caused by memory usage.
const bufferB = Buffer.alloc(testBufferSize, 'B');
const bufferA1 = Buffer.alloc(testBufferSize, 'A');
const bufferC = Buffer.alloc(testBufferSize, 'C');
const bufferA2 = Buffer.alloc(testBufferSize, 'A');
rawUnequalBenches[i] = runUnequalBenchmark(compareFunc, bufferB, bufferC);
rawEqualBenches[i] = runEqualBenchmark(compareFunc, bufferA1, bufferA2);
}
}

const equalBenches = filterOutliers(rawEqualBenches);
const unequalBenches = filterOutliers(rawUnequalBenches);

// Use a two-sample t-test to determine whether the timing difference between
// the benchmarks is statistically significant.
// https://wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test

const equalMean = mean(equalBenches);
const unequalMean = mean(unequalBenches);

const equalLen = equalBenches.length;
const unequalLen = unequalBenches.length;

const combinedStd = combinedStandardDeviation(equalBenches, unequalBenches);
const standardErr = combinedStd * Math.sqrt(1 / equalLen + 1 / unequalLen);

return (equalMean - unequalMean) / standardErr;
}

// Returns the mean of an array
function mean(array) {
return array.reduce((sum, val) => sum + val, 0) / array.length;
}

// Returns the sample standard deviation of an array
function standardDeviation(array) {
const arrMean = mean(array);
const total = array.reduce((sum, val) => sum + Math.pow(val - arrMean, 2), 0);
return Math.sqrt(total / (array.length - 1));
}

// Returns the common standard deviation of two arrays
function combinedStandardDeviation(array1, array2) {
const sum1 = Math.pow(standardDeviation(array1), 2) * (array1.length - 1);
const sum2 = Math.pow(standardDeviation(array2), 2) * (array2.length - 1);
return Math.sqrt((sum1 + sum2) / (array1.length + array2.length - 2));
}

// Filter large outliers from an array. A 'large outlier' is a value that is at
// least 50 times larger than the mean. This prevents the tests from failing
// due to the standard deviation increase when a function unexpectedly takes
// a very long time to execute.
function filterOutliers(array) {
const arrMean = mean(array);
return array.filter((value) => value / arrMean < 50);
}

// t_(0.99995, ∞)
// i.e. If a given comparison function is indeed timing-safe, the t-test result
// has a 99.99% chance to be below this threshold. Unfortunately, this means
// that this test will be a bit flakey and will fail 0.01% of the time even if
// crypto.timingSafeEqual is working properly.
// t-table ref: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf
// Note that in reality there are roughly `2 * numTrials - 2` degrees of
// freedom, not ∞. However, assuming `numTrials` is large, this doesn't
// significantly affect the threshold.
const T_THRESHOLD = 3.892;

const t = getTValue(crypto.timingSafeEqual);
assert(
Math.abs(t) < T_THRESHOLD,
`timingSafeEqual should not leak information from its execution time (t=${t})`
);

// As a sanity check to make sure the statistical tests are working, run the
// same benchmarks again, this time with an unsafe comparison function. In this
// case the t-value should be above the threshold.
const unsafeCompare = (bufA, bufB) => bufA.equals(bufB);
const t2 = getTValue(unsafeCompare);
assert(
Math.abs(t2) > T_THRESHOLD,
`Buffer#equals should leak information from its execution time (t=${t2})`
);