rs-station · kmdalton · Feb 2, 2021 · Jan 29, 2021 · Jan 29, 2021 · Jan 29, 2021
diff --git a/reciprocalspaceship/algorithms/scale_merged_intensities.py b/reciprocalspaceship/algorithms/scale_merged_intensities.py
@@ -3,10 +3,11 @@
 from scipy.special import logsumexp
 from scipy.stats import gamma,norm
 
-def _french_wilson_posterior_quad(Iobs, SigIobs, Sigma, centric, npoints=200):
+
+def _french_wilson_posterior_quad(Iobs, SigIobs, Sigma, centric, npoints=100):
     """
     Compute the mean and std deviation of the French-Wilson posterior using 
-    Gauss-Legendre quadrature. 
+    Chebyshev-Gauss quadrature. 
 
     Parameters
     ----------
@@ -39,29 +40,45 @@ def _french_wilson_posterior_quad(Iobs, SigIobs, Sigma, centric, npoints=200):
     SigIobs = np.array(SigIobs, dtype=np.float64)
     Sigma = np.array(Sigma, dtype=np.float64)
 
-    half_window_size = 20.
-    Jmin = np.maximum(Iobs - half_window_size*SigIobs, 0.)
-    Jmax = Jmin + 2.*half_window_size*SigIobs
-    grid,weights = np.polynomial.legendre.leggauss(npoints)
-
-    J = (Jmax - Jmin)[:,None] * grid / 2. + (Jmax + Jmin)[:,None] / 2.
-    log_prefactor = np.log(Jmax - Jmin) - np.log(2.)
+    #Integration window based on the normal, likelihood distribution
+    window_size = 20. #In standard devs
+    Jmin = Iobs - window_size*SigIobs/2.
+    Jmin = np.maximum(0., Jmin)
+    Jmax = Jmin + window_size*SigIobs
 
-    logweights = np.log(weights)[None,:] 
-    logJ = np.log(J)
+    #Prior distribution paramters
     a = np.where(centric, 0.5, 1.)
     scale = np.where(centric, 2.*Sigma, Sigma)
+
+    #Quadrature grid points
+    grid,weights = np.polynomial.chebyshev.chebgauss(npoints)
+
+    #Change of variables for generalizing chebgauss
+    # Equivalent to: logweights = log(sqrt(1-grid**2.)*w)
+    logweights = (0.5*(np.log(1-grid) + np.log(1+grid)) + np.log(weights))[None,:] 
+    J = (Jmax - Jmin)[:,None] * grid / 2. + (Jmax + Jmin)[:,None] / 2.
+    logJ = np.nan_to_num(np.log(J), -np.inf)
+    log_prefactor = np.log(Jmax - Jmin) - np.log(2.0)
+
+    #Log prior (Wilson's prior for intensities)
     logP = gamma.logpdf(J, np.expand_dims(a, axis=-1), scale=scale[:,None])
+
+    #Log likelihood (Normal about Iobs)
     logL = norm.logpdf(J, loc=Iobs[:,None], scale=SigIobs[:,None])
-    logZ = log_prefactor + logsumexp(logweights + logP + logL, axis=1)
+
+    #Compute partition function
+    logZ = log_prefactor + logsumexp(logweights + logP + logL, axis=1) 
+
+    #Compute expected value and variance of intensity
     log_mean = log_prefactor + logsumexp(logweights + logJ + logP + logL - logZ[:,None], axis=1)
     mean = np.exp(log_mean)
     variance = np.exp(log_prefactor + logsumexp(logweights+2.*logJ+logP + logL - logZ[:,None], axis=1)) - mean**2
 
-    logF = 0.5*logJ
+    #Compute expected value and variance of structure factor amplitude
+    logF = 0.5*logJ #Change variables
     log_mean_F = log_prefactor + logsumexp(logweights + logF + logP + logL - logZ[:,None], axis=1)
     mean_F = np.exp(log_mean_F)
-    variance_F = np.exp(log_prefactor + logsumexp(logweights+2.*logF+logP + logL - logZ[:,None], axis=1)) - mean_F**2
+    variance_F = mean - mean_F**2
     return mean, np.sqrt(variance), mean_F, np.sqrt(variance_F)
 
 def mean_intensity_by_miller_index(I, H, bandwidth):

diff --git a/tests/algorithms/conftest.py b/tests/algorithms/conftest.py
@@ -103,3 +103,22 @@ def data_fw1978_output(request):
     elif request.param == "acentric structurefactors":
         return data_fw1978_structurefactors(centric=False)
 
+@pytest.fixture
+def data_fw_cctbx():
+    """
+    Load French-Wilson test data from cctbx
+    """
+    datapath = ["..", "data", "french_wilson", "fw_test_data.csv"]
+    inFN = abspath(join(dirname(__file__), *datapath))
+    return pd.read_csv(inFN)
+
+@pytest.fixture
+def data_fw_mcmc():
+    """
+    Load French-Wilson test data from PyMC3
+    """
+    datapath = ["..", "data", "french_wilson", "fw_mcmc_data.csv"]
+    inFN = abspath(join(dirname(__file__), *datapath))
+    return pd.read_csv(inFN)
+
+
diff --git a/tests/algorithms/test_scale_merged_intensities.py b/tests/algorithms/test_scale_merged_intensities.py
@@ -189,4 +189,32 @@ def test_scale_merged_intensities_dropna(data_hewl_all, dropna):
         assert (scaled["FW-I"].to_numpy() >= 0.).all()
 
 
-
+@pytest.mark.parametrize("ref_method", ['mcmc', 'cctbx', 'mcmc vs cctbx'])
+def test_fw_posterior_quad(ref_method, data_fw_cctbx, data_fw_mcmc):
+    """
+    Test the _french_wilson_posterior_quad function directly against cctbx output.
+    The cctbx implementations can be found
+     - `acentric <https://github.com/cctbx/cctbx_project/blob/8db5aedd7d0897bcea82b9c8dc2d21976437435f/cctbx/french_wilson.py#L92>`_
+     - `centric <https://github.com/cctbx/cctbx_project/blob/8db5aedd7d0897bcea82b9c8dc2d21976437435f/cctbx/french_wilson.py#L128>`_
+    """
+    if ref_method == 'cctbx':
+        I,SigI,Sigma,J,SigJ,F,SigF,Centric = data_fw_cctbx.to_numpy(np.float64).T
+        Centric = Centric.astype(np.bool)
+        rtol = 0.06
+        rs_J,rs_SigJ,rs_F,rs_SigF = _french_wilson_posterior_quad(I, SigI, Sigma, Centric)
+    elif ref_method == 'mcmc':
+        I,SigI,Sigma,J,SigJ,F,SigF,Centric = data_fw_mcmc.to_numpy(np.float64).T
+        Centric = Centric.astype(np.bool)
+        rtol = 0.025
+        rs_J,rs_SigJ,rs_F,rs_SigF = _french_wilson_posterior_quad(I, SigI, Sigma, Centric)
+    elif ref_method == 'mcmc vs cctbx':
+        I,SigI,Sigma,J,SigJ,F,SigF,Centric = data_fw_mcmc.to_numpy(np.float64).T
+        Centric = Centric.astype(np.bool)
+        _,_,_,rs_J,rs_SigJ,rs_F,rs_SigF,_ = data_fw_cctbx.to_numpy(np.float64).T
+        rtol = 0.06
+
+    assert np.allclose(rs_J, J, rtol=rtol)
+    assert np.allclose(rs_SigJ, SigJ, rtol=rtol)
+    assert np.allclose(rs_F, F, rtol=rtol)
+    assert np.allclose(rs_SigF, SigF, rtol=rtol)
+
diff --git a/tests/data/french_wilson/diagnostics.py b/tests/data/french_wilson/diagnostics.py
@@ -0,0 +1,80 @@
+from reciprocalspaceship.algorithms.scale_merged_intensities import _french_wilson_posterior_quad
+from matplotlib import pyplot as plt
+import numpy as np
+import pandas as pd
+
+data_fw_mcmc = pd.read_csv("fw_mcmc_data.csv")
+I,SigI,Sigma,J,SigJ,F,SigF,Centric = data_fw_mcmc.to_numpy(np.float64).T
+
+Centric = Centric.astype(np.bool)
+rs_J,rs_SigJ,rs_F,rs_SigF = _french_wilson_posterior_quad(I, SigI, Sigma, Centric)
+
+#All the plots
+plt.plot(J, rs_J, 'k.', label='Acentric')
+plt.plot(J[Centric], rs_J[Centric], 'r.', label='Centric')
+plt.xlabel("MCMC J")
+plt.ylabel("rs J")
+plt.legend()
+plt.savefig("J.png")
+
+plt.figure()
+plt.plot(F, rs_F, 'k.', label='Acentric')
+plt.plot(F[Centric], rs_F[Centric], 'r.', label='Centric')
+plt.xlabel("MCMC F")
+plt.ylabel("rs F")
+plt.legend()
+plt.savefig("F.png")
+
+plt.figure()
+plt.plot(I/SigI, 100.*(rs_F - F)/F, 'k.', alpha=0.1, label='Acentric')
+plt.plot((I/SigI)[Centric], (100.*(rs_F - F)/F)[Centric], 'r.', alpha=0.1, label='Centric')
+plt.legend()
+plt.xlabel('Iobs/SigIobs')
+plt.ylabel('Percent Error (F)')
+plt.savefig("Ferr.png")
+
+plt.figure()
+plt.plot(I/SigI, 100.*(rs_J - J)/J, 'k.', alpha=0.1, label='Acentric')
+plt.plot((I/SigI)[Centric], (100.*(rs_J - J)/J)[Centric], 'r.', alpha=0.1, label='Centric')
+plt.legend()
+plt.xlabel('Iobs/SigIobs')
+plt.ylabel('Percent Error (J)')
+plt.savefig("Jerr.png")
+
+plt.figure()
+plt.hist((100.*(rs_J - J)/J)[~Centric], 100, color='k', label='Acentric', alpha=0.4)
+plt.hist((100.*(rs_J - J)/J)[Centric], 100, color='r', label='Centric', alpha=0.4)
+plt.legend()
+plt.xlabel('Percent Error (J)')
+plt.savefig("J_hist.png")
+
+plt.figure()
+plt.hist((100.*(rs_F - F)/F)[~Centric], 100, color='k', label='Acentric', alpha=0.4)
+plt.hist((100.*(rs_F - F)/F)[Centric], 100, color='r', label='Centric', alpha=0.4)
+plt.legend()
+plt.xlabel('Percent Error (F)')
+plt.savefig("F_hist.png")
+
+plt.figure()
+plt.hist((100.*(rs_SigF - SigF)/SigF)[~Centric], 100, color='k', label='Acentric', alpha=0.4)
+plt.hist((100.*(rs_SigF - SigF)/SigF)[Centric], 100, color='r', label='Centric', alpha=0.4)
+plt.legend()
+plt.xlabel('Percent Error (SigF)')
+plt.savefig("SigF_hist.png")
+
+plt.figure()
+plt.hist((100.*(rs_SigJ - SigJ)/SigJ)[~Centric], 100, color='k', label='Acentric', alpha=0.4)
+plt.hist((100.*(rs_SigJ - SigJ)/SigJ)[Centric], 100, color='r', label='Centric', alpha=0.4)
+plt.legend()
+plt.xlabel('Percent Error (SigJ)')
+plt.savefig("SigJ_hist.png")
+
+plt.figure()
+plt.plot((I/SigI)[~Centric], (100.*(rs_SigF - SigF)/SigF)[~Centric], 'k.', label='Acentric', alpha=0.1)
+plt.plot((I/SigI)[Centric], (100.*(rs_SigF - SigF)/SigF)[Centric], 'r.', label='Centric', alpha=0.1)
+plt.legend()
+plt.ylabel('Percent Error (SigF)')
+plt.xlabel('I/Sigma')
+plt.savefig("SigF_hist.png")
+
+plt.show()