Skip to content

Commit

Permalink
added multiple correction for the bootstrap
Browse files Browse the repository at this point in the history
  • Loading branch information
ddedik committed Jan 10, 2018
1 parent 7d3231e commit 2d918e2
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
14 changes: 11 additions & 3 deletions expan/core/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ def delta(x, y, assume_normal=True, percentiles=[2.5, 97.5],
c_i = normal_sample_difference(x=_x, y=_y, percentiles=percentiles, relative=relative,
multi_test_correction=multi_test_correction, num_tests=num_tests)
else:
c_i, _ = bootstrap(x=_x, y=_y, percentiles=percentiles, nruns=nruns,
relative=relative)
c_i, _ = bootstrap(x=_x, y=_y, percentiles=percentiles, nruns=nruns, relative=relative,
multi_test_correction=multi_test_correction, num_tests=num_tests)

# Return the result structure
# return mu, c_i, ss_x, ss_y, np.nanmean(_x), np.nanmean(_y)
Expand Down Expand Up @@ -250,7 +250,8 @@ def alpha_to_percentiles(alpha):


def bootstrap(x, y, func=_delta_mean, nruns=10000, percentiles=[2.5, 97.5],
min_observations=20, return_bootstraps=False, relative=False):
min_observations=20, return_bootstraps=False, relative=False,
multi_test_correction=False, num_tests=1):
"""
Bootstraps the Confidence Intervals for a particular function comparing
two samples. NaNs are ignored (discarded before calculation).
Expand All @@ -273,6 +274,8 @@ def bootstrap(x, y, func=_delta_mean, nruns=10000, percentiles=[2.5, 97.5],
absolute values. In this case, the interval is mean-ret_val[0] to
mean+ret_val[1]. This is more useful in many situations because it
corresponds with the sem() and std() functions.
multi_test_correction (boolean): flag of whether the correction for multiple testing is needed.
num_tests (integer): number of tests or reported kpis used for multiple correction.
Returns:
tuple:
Expand All @@ -289,6 +292,11 @@ def bootstrap(x, y, func=_delta_mean, nruns=10000, percentiles=[2.5, 97.5],
ss_x = _x.size - np.isnan(_x).sum()
ss_y = _y.size - np.isnan(_y).sum()

# Adjusting percentiles, Bonferroni correction
if multi_test_correction:
percentiles = [float(p) / num_tests if p < 50.0
else 100 - (100 - float(p)) / num_tests if p > 50.0 else p for p in percentiles]

# Checking if enough observations are left after dropping NaNs
if min(ss_x, ss_y) < min_observations:
# Create nan percentile dictionary
Expand Down
9 changes: 5 additions & 4 deletions tests/tests_core/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,13 +409,14 @@ def test__bootstrap__computation(self):
# Checking if no bootstrap data was passed
self.assertIsNone(result3[1])

# Defining data and computing bootstrap
# Bootstrap with multiple correction
sample4 = self.samples.temperature[self.samples.gender == 1]
sample5 = self.samples.temperature[self.samples.gender == 2]
result4 = statx.bootstrap(sample4, sample5, percentiles=[0.1, 99.9])
# Checking if lower percentile of result3 is correct
result4 = statx.bootstrap(sample4, sample5, percentiles=[2.5, 97.5],
multi_test_correction=True, num_tests=25)
# Checking if lower percentile of result4 is correct
self.assertAlmostEqual(result4[0][0.1], -0.67078000000000748)
# Checking if upper percentile of result3 is correct
# Checking if upper percentile of result4 is correct
self.assertAlmostEqual(result4[0][99.9], 0.093849230769235695)


Expand Down

0 comments on commit 2d918e2

Please sign in to comment.