Skip to content

Commit

Permalink
add rs.utils.weighted_pearsonr and tests (#189)
Browse files Browse the repository at this point in the history
* add `rs.utils.weighted_pearsonr` and tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove redundant division

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* clarify docstring

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* test against uniform weights other than 1

* test against previous results

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: Kevin Dalton <kmdalton@pop-os.localdomain>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 19, 2022
1 parent 8d90ddc commit 28f6c93
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 1 deletion.
2 changes: 1 addition & 1 deletion reciprocalspaceship/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from reciprocalspaceship.utils.math import angle_between
from reciprocalspaceship.utils.phases import canonicalize_phases, get_phase_restrictions
from reciprocalspaceship.utils.rfree import add_rfree, copy_rfree
from reciprocalspaceship.utils.stats import compute_redundancy
from reciprocalspaceship.utils.stats import compute_redundancy, weighted_pearsonr
from reciprocalspaceship.utils.structurefactors import (
compute_structurefactor_multiplicity,
from_structurefactor,
Expand Down
38 changes: 38 additions & 0 deletions reciprocalspaceship/utils/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,41 @@ def compute_redundancy(

mult = mult.sort_index()
return mult.get_hkls(), mult["Count"].to_numpy(np.int32)


def weighted_pearsonr(x, y, w):
"""
Calculate a [weighted Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Weighted_correlation_coefficient).
Note
----
x, y, and w may have arbitrarily shaped leading dimensions. The correlation coefficient will always be computed pairwise along the last axis.
Parameters
----------
x : np.array(float)
An array of observations.
y : np.array(float)
An array of observations the same shape as x.
w : np.array(float)
An array of weights the same shape as x. These needn't be normalized.
Returns
-------
r : float
The Pearson correlation coefficient along the last dimension. This has shape {x,y,w}.shape[:-1].
"""
z = np.reciprocal(w.sum(-1))

mx = z * (w * x).sum(-1)
my = z * (w * y).sum(-1)

dx = x - np.expand_dims(mx, axis=-1)
dy = y - np.expand_dims(my, axis=-1)

cxy = z * (w * dx * dy).sum(-1)
cx = z * (w * dx * dx).sum(-1)
cy = z * (w * dy * dy).sum(-1)

r = cxy / np.sqrt(cx * cy)
return r
69 changes: 69 additions & 0 deletions tests/utils/test_weighted_pearsonr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from io import StringIO

import numpy as np
import pandas as pd
import pytest
from scipy.stats import pearsonr

import reciprocalspaceship as rs


def test_against_previous_result():
csv = """
,x,y,w
0,0.9051822979780747,0.2700784720702334,0.937803097548697
1,0.7653655064893381,0.9096902688106824,0.9022413175501339
2,0.6701438010457531,0.7643360435701648,0.8943602840313861
3,0.1524086489285047,0.9854887367590378,0.23724145891681891
4,0.8673578229262408,0.1660901563869679,0.6802234818049551
5,0.04749197327200072,0.4056733186535064,0.41411735570989516
6,0.5555482004198411,0.4273894191186294,0.36358917098272747
7,0.5463417645646479,0.5092920447904933,0.29441863366197596
8,0.31353494110452584,0.7666249814241163,0.7493823577932279
9,0.3923683608283065,0.18587807020463565,0.9318927399856036
"""
df = pd.read_csv(StringIO(csv))
x, y, w = df.x.to_numpy(), df.y.to_numpy(), df.w.to_numpy()
expected_r = -0.1478766135438829

r = rs.utils.stats.weighted_pearsonr(x, y, w)
assert np.isclose(r, expected_r)


def test_weighted_pearsonr():
n = 100

x, y, sigx, sigy = np.random.random((4, n))
w = np.sqrt(sigx * sigx + sigy * sigy)

# Test execution
rs.utils.stats.weighted_pearsonr(x, y, w)

# Test against scipy pearsonr
w = np.ones(n)
r = rs.utils.stats.weighted_pearsonr(x, y, w)
expected_r = pearsonr(x, y)[0]
assert np.isclose(r, expected_r)

# Test against scipy with another uniform weight value
w = np.ones(n) * 42.0
r = rs.utils.stats.weighted_pearsonr(x, y, w)
expected_r = pearsonr(x, y)[0]
assert np.isclose(r, expected_r)


def test_weighted_pearsonr_batched():
# Test batch execution
a, b, n = 2, 3, 100
x, y, sigx, sigy = np.random.random((4, a, b, n))
w = np.sqrt(sigx * sigx + sigy * sigy)
r = rs.utils.stats.weighted_pearsonr(x, y, w)
assert np.all(np.array(r.shape) == np.array([a, b]))

# Test against scipy pearsonr
w = np.ones((a, b, n))
r = rs.utils.stats.weighted_pearsonr(x, y, w)
for i in range(a):
for j in range(b):
expected_r = pearsonr(x[i, j], y[i, j])[0]
assert np.isclose(r[i, j], expected_r)

0 comments on commit 28f6c93

Please sign in to comment.