Skip to content

Commit

Permalink
Add bhattcharyya test
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrejaKovacic committed Oct 18, 2019
1 parent cb6c698 commit 93b0494
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 22 deletions.
20 changes: 17 additions & 3 deletions Orange/distance/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import numpy as np
from scipy import stats
from scipy.spatial.distance import cdist
from scipy import sparse as sp
import sklearn.metrics as skl_metrics
from sklearn.utils.extmath import row_norms, safe_sparse_dot
Expand Down Expand Up @@ -645,20 +644,35 @@ class PearsonRAbsolute(CorrelationDistance):
def fit(self, _):
return PearsonModel(True, self.axis, self.impute)

def _prob_dist(a):
# Makes the vector sum to one, as to mimick probability distribution.
return a / np.sum(a)

def _bhattacharyya(a, b):
# not a real metric, does not obey triangle inequality
return -np.log(np.sum(np.sqrt(a*b)))
a = _prob_dist(a)
b = _prob_dist(b)
if sp.issparse(a):
return - np.log(np.sum(np.sqrt(a.multiply(b))))
return - np.log(np.sum(np.sqrt(a * b)))

class Bhattacharyya(Distance):
supports_discrete = False
supports_sparse = True

def fit(self, data):
return BhattacharyyaModel(self.axis, self.impute)


class BhattacharyyaModel(DistanceModel):

def compute_distances(self, x1, x2):
if x2 is None:
x2 = x1
return cdist(x1, x2, _bhattacharyya)
if self.axis == 1:
return pairwise_distances(x1, x2, _bhattacharyya)
else:
return pairwise_distances(x1.T, x2.T, _bhattacharyya)


class Mahalanobis(Distance):
Expand Down
66 changes: 48 additions & 18 deletions Orange/tests/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable=missing-docstring

from unittest import TestCase
import unittest
import pickle

import numpy as np
Expand All @@ -14,7 +15,8 @@
DiscreteVariable, StringVariable, Instance)
from Orange.distance import (Euclidean, SpearmanR, SpearmanRAbsolute,
PearsonR, PearsonRAbsolute, Manhattan, Cosine,
Jaccard, _preprocess, MahalanobisDistance)
Jaccard, _preprocess, MahalanobisDistance,
Bhattacharyya)
from Orange.distance.distance import _spearmanr2, _corrcoef2
from Orange.misc import DistMatrix
from Orange.tests import named_file, test_filename
Expand Down Expand Up @@ -91,20 +93,20 @@ def test_from_file(self):
self.assertEqual(m.axis, 1)

with named_file(
"""3 axis=1 symmetric
0.12 3.45 6.78
9.01 2.34 5.67
8.90""") as name:
"""3 axis=1 symmetric
0.12 3.45 6.78
9.01 2.34 5.67
8.90""") as name:
m = DistMatrix.from_file(name)
np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
[9.01, 2.34, 0],
[8.90, 0, 0]]))

with named_file(
"""3 row_labels
starič 0.12 3.45 6.78
aleš 9.01 2.34 5.67
anže 8.90""", encoding="utf-8""") as name:
"""3 row_labels
starič 0.12 3.45 6.78
aleš 9.01 2.34 5.67
anže 8.90""", encoding="utf-8""") as name:
m = DistMatrix.from_file(name)
np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
[9.01, 2.34, 0],
Expand Down Expand Up @@ -150,10 +152,10 @@ def assertErrorMsg(content, msg):

def test_save(self):
with named_file(
"""3 axis=1 row_labels
danny 0.12 3.45 6.78
eve 9.01 2.34 5.67
frank 8.90""") as name:
"""3 axis=1 row_labels
danny 0.12 3.45 6.78
eve 9.01 2.34 5.67
frank 8.90""") as name:
m = DistMatrix.from_file(name)
m.save(name)
m = DistMatrix.from_file(name)
Expand All @@ -167,11 +169,11 @@ def test_save(self):
self.assertEqual(m.axis, 1)

with named_file(
"""3 axis=0 asymmetric col_labels row_labels
ann bert chad
danny 0.12 3.45 6.78
eve 9.01 2.34 5.67
frank 8.90 1.23 4.56""") as name:
"""3 axis=0 asymmetric col_labels row_labels
ann bert chad
danny 0.12 3.45 6.78
eve 9.01 2.34 5.67
frank 8.90 1.23 4.56""") as name:
m = DistMatrix.from_file(name)
m.save(name)
m = DistMatrix.from_file(name)
Expand Down Expand Up @@ -943,6 +945,31 @@ def test_dimensions(self):
mah(xt[0], xt[1])


class TestBhattacharyya(TestCase):

@classmethod
def setUpClass(cls):
cls.dist = Bhattacharyya

def test_dense_array(self):
#Also checks normalization
data = Table('iris')
true_out = np.array([[0, 4.48049499e-04, 2.07117086e-05],
[4.48049499e-04, 0, 3.65052724e-04],
[2.07117086e-05, 3.65052724e-04, 0]])
np.testing.assert_array_almost_equal(self.dist(data.X[:3]), true_out)

def test_sparse_array(self):
data = csr_matrix([[0.5, 0.5], [0, 0.5]])
self.assertAlmostEqual(self.dist(data[0], data[1]), 0.3465735902799726, delta=1e-5)

def test_columns(self):
data = np.array([[0.5, 0.2], [0.5, 0.8]])
true_out = np.array([[0, 0.05268025782891318],
[0.05268025782891318, 0]])
np.testing.assert_array_almost_equal(self.dist(data, axis=0), true_out)


class TestDistances(TestCase):
@classmethod
def setUpClass(cls):
Expand Down Expand Up @@ -982,3 +1009,6 @@ def test_distance_to_instance(self):
iris = Table('iris')
inst = Instance(iris.domain, np.concatenate((iris[1].x, iris[1].y)))
self.assertEqual(Euclidean(iris[1], inst), 0)

if __name__ == '__main__':
unittest.main()
7 changes: 6 additions & 1 deletion Orange/widgets/unsupervised/owdistances.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from AnyQt.QtCore import Qt
from scipy.sparse import issparse
from numpy import min as _min
import bottleneck as bn

import Orange.data
Expand All @@ -21,7 +22,7 @@
("Absolute Spearman", distance.SpearmanRAbsolute),
("Pearson", distance.PearsonR),
("Absolute Pearson", distance.PearsonRAbsolute),
("Hamming2", distance.Hamming),
("Hamming", distance.Hamming),
('Bhattacharyya', distance.Bhattacharyya)
]

Expand Down Expand Up @@ -58,6 +59,7 @@ class Error(OWWidget.Error):
dense_metric_sparse_data = Msg("{} requires dense data.")
distances_memory_error = Msg("Not enough memory")
distances_value_error = Msg("Problem in calculation:\n{}")
negative_value_error = Msg("Only non-negative values alowed for Bhattcharyya.")

class Warning(OWWidget.Warning):
ignoring_discrete = Msg("Ignoring categorical features")
Expand Down Expand Up @@ -158,6 +160,9 @@ def _fix_missing():
_fix_discrete, _fix_missing, _fix_nonbinary):
if not check():
return None
if (METRICS[self.metric_idx][0] == 'Bhattacharyya') and _min(data.X) < 0:
self.Error.negative_value_error()
return None
try:
if metric.supports_normalization and self.normalized_dist:
return metric(data, axis=1 - self.axis, impute=True,
Expand Down
9 changes: 9 additions & 0 deletions Orange/widgets/unsupervised/tests/test_owdistances.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,12 @@ def test_too_big_array(self):
def test_migrates_normalized_dist(self):
w = self.create_widget(OWDistances, stored_settings={"metric_idx": 0})
self.assertFalse(w.normalized_dist)

def test_negative_values_bhattacharyya(self):
self.iris.X[0, 0] *= -1
for self.widget.metric_idx, (name, _) in enumerate(METRICS):
if name == "Bhattacharyya":
break
self.send_signal(self.widget.Inputs.data, self.iris)
self.assertTrue(self.widget.Error.negative_value_error.is_shown())
self.iris.X[0, 0] *= -1

0 comments on commit 93b0494

Please sign in to comment.