Skip to content
This repository has been archived by the owner on May 16, 2021. It is now read-only.

Commit

Permalink
refactor belief construction
Browse files Browse the repository at this point in the history
add IDM method for belief construction
  • Loading branch information
reineking committed Oct 9, 2014
1 parent 13abe40 commit cf5523b
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 38 deletions.
10 changes: 10 additions & 0 deletions src/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Shows different use cases of the library.
"""

from __future__ import print_function
from pyds import MassFunction
from itertools import product

Expand Down Expand Up @@ -89,3 +90,12 @@
print('vacuous extension of m_1 to {1, 2} =', extended)
projected = extended.map(lambda h: (t[0] for t in h))
print('project m_1 back to its original frame =', projected)

print('\n=== construct belief from data ===')
hist = {'a':2, 'b':0, 'c':1}
print('histogram:', hist)
print('maximum likelihood:', MassFunction.from_samples(hist, 'bayesian', s=0))
print('Laplace smoothing:', MassFunction.from_samples(hist, 'bayesian', s=1))
print('IDM:', MassFunction.from_samples(hist, 'idm'))
print('MaxBel:', MassFunction.from_samples(hist, 'maxbel'))
print('MCD:', MassFunction.from_samples(hist, 'mcd'))
72 changes: 46 additions & 26 deletions src/pyds.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,41 +894,45 @@ def _confidence_intervals(histogram, alpha):
return p_lower, p_upper

@staticmethod
def from_samples(histogram, alpha=0.05, mode='default'):
def from_samples(histogram, method='idm', alpha=0.05, s=1.0):
"""
Generate a mass function from an empirical probability distribution that was obtained from a limited number of samples.
This makes the expected deviation of the empirical distribution from the true distribution explicit.
'histogram' represents the empirical distribution. It is a dictionary mapping each possible event to the respective
number of observations (represented as integers).
'mode' determines the algorithm used for generating the mass function.
Except for mode 'bayesian', all algorithms are based on the idea that the true probabilities lie within confidence intervals
'method' determines the algorithm used for generating the mass function.
Except for method 'bayesian', all algorithms are based on the idea that the true probabilities lie within confidence intervals
represented by the mass function with confidence level 1 - 'alpha'.
The following modes are supported:
'default': Maximize the total belief by solving a linear program. (Attention: this becomes very expensive computationally
'idm': Imprecise Dirichlet model. A small amount of mass (controlled by 's') is assigned to the entire frame.
For more information on 'idm', see:
P. Walley (1996), "Inferences from multinomial data: learning about a bag of marbles",
Journal of the Royal Statistical Society. Series B (Methodological), 3-57.
'maxbel': Maximize the total belief by solving a linear program. (Attention: this becomes very computationally expensive
for larger numbers of events.)
'ordered': Similar to 'default' except that the events are assumed to have a natural order (e.g., intervals), in which case
'maxbel-ordered': Similar to 'maxbel' except that the events are assumed to have a natural order (e.g., intervals), in which case
the mass function can be computed analytically and thus much faster.
For more information on 'default' and 'ordered', see:
For more information on 'maxbel' and 'maxbel-ordered', see:
T. Denoeux (2006), "Constructing belief functions from sample data using multinomial confidence regions",
International Journal of Approximate Reasoning 42, 228-252.
'consonant': Compute the least committed consonant mass function whose pignistic transformation lies within the confidence interval
induced by 'alpha'. Like 'default', it is based on solving a linear program and quickly becomes computationally expensive.
'mcd': Compute the least committed consonant mass function whose pignistic transformation lies within the confidence interval
induced by 'alpha'. Like 'maxbel', it is based on solving a linear program and quickly becomes computationally expensive.
'consonant-approximate': An approximation of 'consonant' that can be computed much more efficiently.
'mcd-approximate': An approximation of 'mcd' that can be computed much more efficiently.
For more information on these two modes, see:
For more information on these two methods, see:
A. Aregui, T. Denoeux (2008), "Constructing consonant belief functions from sample data using confidence sets of pignistic probabilities",
International Journal of Approximate Reasoning 49, 575-594.
'bayesian': Disregard the number of samples and assume the true probability distribution is equal to the empirical one.
'bayesian': Construct a Bayesian mass function based on the relative frequencies. In addition, additive smoothing is applied (controlled by 's').
In case the sample number is 0, returns a vacuous mass function (or uniform distribution for 'bayesian').
Expand All @@ -943,24 +947,40 @@ def from_samples(histogram, alpha=0.05, mode='default'):
return MassFunction()
if sum(histogram.values()) == 0: # return vacuous/uniform belief if there are no samples
vac = MassFunction({tuple(histogram.keys()):1})
if mode == 'bayesian':
if method == 'bayesian':
return vac.pignistic()
else:
return vac
if mode == 'bayesian':
return MassFunction({(h,):v for h, v in histogram.items()}).normalize()
elif mode == 'default':
return MassFunction._from_samples(histogram, alpha)
elif mode == 'ordered':
return MassFunction._from_samples(histogram, alpha, ordered=True)
elif mode == 'consonant':
return MassFunction._from_samples_consonant(histogram, alpha)
elif mode == 'consonant-approximate':
return MassFunction._from_samples_consonant(histogram, alpha, approximate=True)
raise ValueError('unknown mode: %s' % mode)
if method == 'bayesian':
return MassFunction({(h,):v + s for h, v in histogram.items()}).normalize()
elif method == 'idm':
return MassFunction._from_samples_idm(histogram, s)
elif method == 'maxbel':
return MassFunction._from_samples_maxbel(histogram, alpha)
elif method == 'maxbel-ordered':
return MassFunction._from_samples_maxbel(histogram, alpha, ordered=True)
elif method == 'mcd':
return MassFunction._from_samples_mcd(histogram, alpha)
elif method == 'mcd-approximate':
return MassFunction._from_samples_mcd(histogram, alpha, approximate=True)
raise ValueError('unknown method: %s' % method)

@staticmethod
def _from_samples_idm(histogram, s):
"""
Reference:
P. Walley (1996), "Inferences from multinomial data: learning about a bag of marbles",
Journal of the Royal Statistical Society. Series B (Methodological), 3-57.
"""
total = sum(histogram.values())
m = MassFunction()
for h, c in histogram.items():
m[(h,)] = float(c) / (total + s)
m[MassFunction._convert(histogram.keys())] = float(s) / (total + s)
return m

@staticmethod
def _from_samples(histogram, alpha, ordered=False):
def _from_samples_maxbel(histogram, alpha, ordered=False):
"""
Reference:
T. Denoeux (2006), "Constructing belief functions from sample data using multinomial confidence regions",
Expand Down Expand Up @@ -1011,7 +1031,7 @@ def p_lower_set(hs):
return MassFunction.from_array(m_optimal, H)

@staticmethod
def _from_samples_consonant(histogram, alpha, approximate=False):
def _from_samples_mcd(histogram, alpha, approximate=False):
"""
Reference:
A. Aregui, T. Denoeux (2008), "Constructing consonant belief functions from sample data using confidence
Expand Down
29 changes: 17 additions & 12 deletions src/pyds_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,19 +465,19 @@ def test_confidence_intervals(self):

def test_from_samples(self):
"""
Example 1 (default) and example 7 (ordered) from:
Example 1 (maxbel) and example 7 (ordered) from:
T. Denoeux (2006), "Constructing belief functions from sample data using multinomial confidence regions",
International Journal of Approximate Reasoning 42, 228-252.
Example 6 (consonant) from:
Example 6 (mcd) from:
A. Aregui, T. Denoeux (2008), "Constructing consonant belief functions from sample data using confidence sets of pignistic probabilities",
International Journal of Approximate Reasoning 49, 575-594.
"""
precipitation_data = {1:48, 2:17, 3:19, 4:11, 5:6, 6:9}
failure_mode_data = {1:5, 2:11, 3:19, 4:30, 5:58, 6:67, 7:92, 8:118, 9:173, 10:297}
psych_data = {1:91, 2:49, 3:37, 4:43}
# default
m = MassFunction.from_samples(psych_data, 0.05, mode='default')
# maxbel
m = MassFunction.from_samples(psych_data, method='maxbel', alpha=0.05)
p_lower, p_upper = MassFunction._confidence_intervals(psych_data, 0.05)
def p_lower_set(hs):
l = u = 0
Expand All @@ -495,8 +495,8 @@ def p_lower_set(hs):
self.assertEqual(1, sum(m.values())) # constraint (25)
self.assertGreaterEqual(min(m.values()), 0) # constraint (26)
self.assertGreaterEqual(bel_sum, 6.23) # optimization criterion
# ordered
m = MassFunction.from_samples(precipitation_data, 0.05, mode='ordered')
# maxbel-ordered
m = MassFunction.from_samples(precipitation_data, method='maxbel-ordered', alpha=0.05)
self.assertAlmostEqual(0.32, m[(1,)], 2)
self.assertAlmostEqual(0.085, m[(2,)], 3)
self.assertAlmostEqual(0.098, m[(3,)], 3)
Expand All @@ -507,18 +507,23 @@ def p_lower_set(hs):
self.assertAlmostEqual(0.11, m[range(1, 6)], 2)
self.assertAlmostEqual(0.012, m[range(2, 6)], 2)
self.assertAlmostEqual(0.14, m[range(2, 7)], 2)
# consonant
# mcd
poss = {1: 0.171, 2: 0.258, 3: 0.353, 4: 0.462, 5: 0.688, 6: 0.735, 7: 0.804, 8: 0.867, 9: 0.935, 10: 1.0} # 8: 0.873
m = MassFunction.from_samples(failure_mode_data, 0.1, mode='consonant')
m = MassFunction.from_samples(failure_mode_data, method='mcd', alpha=0.1)
self._assert_equal_belief(MassFunction.from_possibility(poss), m, 1)
# consonant-approximate
m = MassFunction.from_samples(failure_mode_data, 0.1, mode='consonant-approximate')
# mcd-approximate
m = MassFunction.from_samples(failure_mode_data, method='mcd-approximate', alpha=0.1)
poss = {1: 0.171, 2: 0.258, 3: 0.353, 4: 0.462, 5: 0.688, 6: 0.747, 7: 0.875, 8: 0.973, 9: 1.0, 10: 1.0}
self._assert_equal_belief(MassFunction.from_possibility(poss), m, 2)
# bayesian
m = MassFunction.from_samples(precipitation_data, 0.05, mode='bayesian')
m = MassFunction.from_samples(precipitation_data, method='bayesian', s=0)
for e, n in precipitation_data.items():
self.assertEqual(n / float(sum(precipitation_data.values())), m[(e,)])
self.assertEqual(n / float(sum(precipitation_data.values())), m[(e,)])
# idm
m = MassFunction.from_samples(precipitation_data, method='idm', s=1)
self.assertAlmostEqual(1. / float(sum(precipitation_data.values()) + 1), m[MassFunction._convert(precipitation_data.keys())])
for e, n in precipitation_data.items():
self.assertAlmostEqual(n / float(sum(precipitation_data.values()) + 1), m[(e,)])

def test_powerset(self):
s = range(2)
Expand Down

0 comments on commit cf5523b

Please sign in to comment.