Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chevychase #48

Merged
merged 12 commits into from
Feb 2, 2021
45 changes: 31 additions & 14 deletions reciprocalspaceship/algorithms/scale_merged_intensities.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
from scipy.special import logsumexp
from scipy.stats import gamma,norm

def _french_wilson_posterior_quad(Iobs, SigIobs, Sigma, centric, npoints=200):

def _french_wilson_posterior_quad(Iobs, SigIobs, Sigma, centric, npoints=100):
"""
Compute the mean and std deviation of the French-Wilson posterior using
Gauss-Legendre quadrature.
Chebyshev-Gauss quadrature.

Parameters
----------
Expand Down Expand Up @@ -39,29 +40,45 @@ def _french_wilson_posterior_quad(Iobs, SigIobs, Sigma, centric, npoints=200):
SigIobs = np.array(SigIobs, dtype=np.float64)
Sigma = np.array(Sigma, dtype=np.float64)

half_window_size = 20.
Jmin = np.maximum(Iobs - half_window_size*SigIobs, 0.)
Jmax = Jmin + 2.*half_window_size*SigIobs
grid,weights = np.polynomial.legendre.leggauss(npoints)

J = (Jmax - Jmin)[:,None] * grid / 2. + (Jmax + Jmin)[:,None] / 2.
log_prefactor = np.log(Jmax - Jmin) - np.log(2.)
#Integration window based on the normal, likelihood distribution
window_size = 20. #In standard devs
Jmin = Iobs - window_size*SigIobs/2.
Jmin = np.maximum(0., Jmin)
Jmax = Jmin + window_size*SigIobs

logweights = np.log(weights)[None,:]
logJ = np.log(J)
#Prior distribution paramters
a = np.where(centric, 0.5, 1.)
scale = np.where(centric, 2.*Sigma, Sigma)

#Quadrature grid points
grid,weights = np.polynomial.chebyshev.chebgauss(npoints)

#Change of variables for generalizing chebgauss
# Equivalent to: logweights = log(sqrt(1-grid**2.)*w)
logweights = (0.5*(np.log(1-grid) + np.log(1+grid)) + np.log(weights))[None,:]
J = (Jmax - Jmin)[:,None] * grid / 2. + (Jmax + Jmin)[:,None] / 2.
logJ = np.nan_to_num(np.log(J), -np.inf)
log_prefactor = np.log(Jmax - Jmin) - np.log(2.0)

#Log prior (Wilson's prior for intensities)
logP = gamma.logpdf(J, np.expand_dims(a, axis=-1), scale=scale[:,None])

#Log likelihood (Normal about Iobs)
logL = norm.logpdf(J, loc=Iobs[:,None], scale=SigIobs[:,None])
logZ = log_prefactor + logsumexp(logweights + logP + logL, axis=1)

#Compute partition function
logZ = log_prefactor + logsumexp(logweights + logP + logL, axis=1)

#Compute expected value and variance of intensity
log_mean = log_prefactor + logsumexp(logweights + logJ + logP + logL - logZ[:,None], axis=1)
mean = np.exp(log_mean)
variance = np.exp(log_prefactor + logsumexp(logweights+2.*logJ+logP + logL - logZ[:,None], axis=1)) - mean**2

logF = 0.5*logJ
#Compute expected value and variance of structure factor amplitude
logF = 0.5*logJ #Change variables
log_mean_F = log_prefactor + logsumexp(logweights + logF + logP + logL - logZ[:,None], axis=1)
mean_F = np.exp(log_mean_F)
variance_F = np.exp(log_prefactor + logsumexp(logweights+2.*logF+logP + logL - logZ[:,None], axis=1)) - mean_F**2
variance_F = mean - mean_F**2
return mean, np.sqrt(variance), mean_F, np.sqrt(variance_F)

def mean_intensity_by_miller_index(I, H, bandwidth):
Expand Down
19 changes: 19 additions & 0 deletions tests/algorithms/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,22 @@ def data_fw1978_output(request):
elif request.param == "acentric structurefactors":
return data_fw1978_structurefactors(centric=False)

@pytest.fixture
def data_fw_cctbx():
"""
Load French-Wilson test data from cctbx
"""
datapath = ["..", "data", "french_wilson", "fw_test_data.csv"]
inFN = abspath(join(dirname(__file__), *datapath))
return pd.read_csv(inFN)

@pytest.fixture
def data_fw_mcmc():
"""
Load French-Wilson test data from PyMC3
"""
datapath = ["..", "data", "french_wilson", "fw_mcmc_data.csv"]
inFN = abspath(join(dirname(__file__), *datapath))
return pd.read_csv(inFN)


30 changes: 29 additions & 1 deletion tests/algorithms/test_scale_merged_intensities.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,32 @@ def test_scale_merged_intensities_dropna(data_hewl_all, dropna):
assert (scaled["FW-I"].to_numpy() >= 0.).all()



@pytest.mark.parametrize("ref_method", ['mcmc', 'cctbx', 'mcmc vs cctbx'])
def test_fw_posterior_quad(ref_method, data_fw_cctbx, data_fw_mcmc):
"""
Test the _french_wilson_posterior_quad function directly against cctbx output.
The cctbx implementations can be found
- `acentric <https://github.com/cctbx/cctbx_project/blob/8db5aedd7d0897bcea82b9c8dc2d21976437435f/cctbx/french_wilson.py#L92>`_
- `centric <https://github.com/cctbx/cctbx_project/blob/8db5aedd7d0897bcea82b9c8dc2d21976437435f/cctbx/french_wilson.py#L128>`_
"""
if ref_method == 'cctbx':
I,SigI,Sigma,J,SigJ,F,SigF,Centric = data_fw_cctbx.to_numpy(np.float64).T
Centric = Centric.astype(np.bool)
rtol = 0.06
rs_J,rs_SigJ,rs_F,rs_SigF = _french_wilson_posterior_quad(I, SigI, Sigma, Centric)
elif ref_method == 'mcmc':
I,SigI,Sigma,J,SigJ,F,SigF,Centric = data_fw_mcmc.to_numpy(np.float64).T
Centric = Centric.astype(np.bool)
rtol = 0.025
rs_J,rs_SigJ,rs_F,rs_SigF = _french_wilson_posterior_quad(I, SigI, Sigma, Centric)
elif ref_method == 'mcmc vs cctbx':
I,SigI,Sigma,J,SigJ,F,SigF,Centric = data_fw_mcmc.to_numpy(np.float64).T
Centric = Centric.astype(np.bool)
_,_,_,rs_J,rs_SigJ,rs_F,rs_SigF,_ = data_fw_cctbx.to_numpy(np.float64).T
rtol = 0.06

assert np.allclose(rs_J, J, rtol=rtol)
assert np.allclose(rs_SigJ, SigJ, rtol=rtol)
assert np.allclose(rs_F, F, rtol=rtol)
assert np.allclose(rs_SigF, SigF, rtol=rtol)

80 changes: 80 additions & 0 deletions tests/data/french_wilson/diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from reciprocalspaceship.algorithms.scale_merged_intensities import _french_wilson_posterior_quad
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

data_fw_mcmc = pd.read_csv("fw_mcmc_data.csv")
I,SigI,Sigma,J,SigJ,F,SigF,Centric = data_fw_mcmc.to_numpy(np.float64).T

Centric = Centric.astype(np.bool)
rs_J,rs_SigJ,rs_F,rs_SigF = _french_wilson_posterior_quad(I, SigI, Sigma, Centric)

#All the plots
plt.plot(J, rs_J, 'k.', label='Acentric')
plt.plot(J[Centric], rs_J[Centric], 'r.', label='Centric')
plt.xlabel("MCMC J")
plt.ylabel("rs J")
plt.legend()
plt.savefig("J.png")

plt.figure()
plt.plot(F, rs_F, 'k.', label='Acentric')
plt.plot(F[Centric], rs_F[Centric], 'r.', label='Centric')
plt.xlabel("MCMC F")
plt.ylabel("rs F")
plt.legend()
plt.savefig("F.png")

plt.figure()
plt.plot(I/SigI, 100.*(rs_F - F)/F, 'k.', alpha=0.1, label='Acentric')
plt.plot((I/SigI)[Centric], (100.*(rs_F - F)/F)[Centric], 'r.', alpha=0.1, label='Centric')
plt.legend()
plt.xlabel('Iobs/SigIobs')
plt.ylabel('Percent Error (F)')
plt.savefig("Ferr.png")

plt.figure()
plt.plot(I/SigI, 100.*(rs_J - J)/J, 'k.', alpha=0.1, label='Acentric')
plt.plot((I/SigI)[Centric], (100.*(rs_J - J)/J)[Centric], 'r.', alpha=0.1, label='Centric')
plt.legend()
plt.xlabel('Iobs/SigIobs')
plt.ylabel('Percent Error (J)')
plt.savefig("Jerr.png")

plt.figure()
plt.hist((100.*(rs_J - J)/J)[~Centric], 100, color='k', label='Acentric', alpha=0.4)
plt.hist((100.*(rs_J - J)/J)[Centric], 100, color='r', label='Centric', alpha=0.4)
plt.legend()
plt.xlabel('Percent Error (J)')
plt.savefig("J_hist.png")

plt.figure()
plt.hist((100.*(rs_F - F)/F)[~Centric], 100, color='k', label='Acentric', alpha=0.4)
plt.hist((100.*(rs_F - F)/F)[Centric], 100, color='r', label='Centric', alpha=0.4)
plt.legend()
plt.xlabel('Percent Error (F)')
plt.savefig("F_hist.png")

plt.figure()
plt.hist((100.*(rs_SigF - SigF)/SigF)[~Centric], 100, color='k', label='Acentric', alpha=0.4)
plt.hist((100.*(rs_SigF - SigF)/SigF)[Centric], 100, color='r', label='Centric', alpha=0.4)
plt.legend()
plt.xlabel('Percent Error (SigF)')
plt.savefig("SigF_hist.png")

plt.figure()
plt.hist((100.*(rs_SigJ - SigJ)/SigJ)[~Centric], 100, color='k', label='Acentric', alpha=0.4)
plt.hist((100.*(rs_SigJ - SigJ)/SigJ)[Centric], 100, color='r', label='Centric', alpha=0.4)
plt.legend()
plt.xlabel('Percent Error (SigJ)')
plt.savefig("SigJ_hist.png")

plt.figure()
plt.plot((I/SigI)[~Centric], (100.*(rs_SigF - SigF)/SigF)[~Centric], 'k.', label='Acentric', alpha=0.1)
plt.plot((I/SigI)[Centric], (100.*(rs_SigF - SigF)/SigF)[Centric], 'r.', label='Centric', alpha=0.1)
plt.legend()
plt.ylabel('Percent Error (SigF)')
plt.xlabel('I/Sigma')
plt.savefig("SigF_hist.png")

plt.show()
Loading