Skip to content

Commit

Permalink
Merge pull request #105 from Hekstra-Lab/merge_anom_flag
Browse files Browse the repository at this point in the history
add anomalous flag to rs.algorightms.merge
JBGreisman authored Oct 19, 2021
2 parents f98b0dd + 9a3d636 commit c18ff26
Showing 2 changed files with 46 additions and 25 deletions.
45 changes: 25 additions & 20 deletions reciprocalspaceship/algorithms/merge.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
import numpy as np


def merge(dataset, intensity_key="I", sigma_key="SIGI", sort=False):
def merge(dataset, intensity_key="I", sigma_key="SIGI", anomalous=True, sort=False):
"""
Merge dataset using inverse-variance weights.
Parameters
----------
dataset : rs.DataSet
Unmerged DataSet containing scaled intensities and uncertainties
intensity_key : str
intensity_key : str (optional)
Column name for intensities
sigma_key : str
sigma_key : str (optional)
Column name for uncertainties
sort : bool
anomalous : bool (optional)
If True, do not merge Friedel mates
sort : bool (optional)
If True, index of returned DataSet will be sorted
Returns
@@ -28,7 +30,7 @@ def merge(dataset, intensity_key="I", sigma_key="SIGI", sort=False):
)

# Map observations to reciprocal ASU
ds = dataset.hkl_to_asu(anomalous=True)
ds = dataset.hkl_to_asu(anomalous=anomalous)
ds["w"] = ds[sigma_key] ** -2
ds["wI"] = ds[intensity_key] * ds["w"]
g = ds.groupby(["H", "K", "L"])
@@ -40,23 +42,26 @@ def merge(dataset, intensity_key="I", sigma_key="SIGI", sort=False):
result.merged = True

# Reshape anomalous data and use to compute IMEAN / SIGIMEAN
result = result.unstack_anomalous()
result.loc[:, ["N(+)", "N(-)"]] = result[["N(+)", "N(-)"]].fillna(0).astype("I")
result["IMEAN"] = result[["wI(+)", "wI(-)"]].sum(axis=1) / result[
["w(+)", "w(-)"]
].sum(axis=1).astype("Intensity")
result["SIGIMEAN"] = np.sqrt(1 / (result[["w(+)", "w(-)"]].sum(axis=1))).astype(
"Stddev"
)
if anomalous:
result = result.unstack_anomalous()
result.loc[:, ["N(+)", "N(-)"]] = result[["N(+)", "N(-)"]].fillna(0).astype("I")
result["IMEAN"] = result[["wI(+)", "wI(-)"]].sum(axis=1) / result[
["w(+)", "w(-)"]
].sum(axis=1).astype("Intensity")
result["SIGIMEAN"] = np.sqrt(1 / (result[["w(+)", "w(-)"]].sum(axis=1))).astype(
"Stddev"
)

# Adjust SIGIMEAN for centric reflections due to duplicated values in
# Friedel columns
centrics = result.label_centrics()["CENTRIC"]
result.loc[centrics, "SIGIMEAN"] *= np.sqrt(2)
# Adjust SIGIMEAN for centric reflections due to duplicated values in
# Friedel columns
centrics = result.label_centrics()["CENTRIC"]
result.loc[centrics, "SIGIMEAN"] *= np.sqrt(2)

result = result[
["IMEAN", "SIGIMEAN", "I(+)", "SIGI(+)", "I(-)", "SIGI(-)", "N(+)", "N(-)"]
]
result = result[
["IMEAN", "SIGIMEAN", "I(+)", "SIGI(+)", "I(-)", "SIGI(-)", "N(+)", "N(-)"]
]
else:
result = result[["I", "SIGI", "N"]]

if sort:
return result.sort_index()
26 changes: 21 additions & 5 deletions tests/algorithms/test_merge.py
Original file line number Diff line number Diff line change
@@ -22,24 +22,40 @@ def test_merge_valueerror(hewl_merged):
],
)
@pytest.mark.parametrize("sort", [True, False])
def test_merge(hewl_unmerged, hewl_merged, keys, sort):
@pytest.mark.parametrize("anomalous", [False, True])
def test_merge(hewl_unmerged, hewl_merged, keys, sort, anomalous):
"""Test rs.algorithms.merge() against AIMLESS output"""
if keys is None:
merged = rs.algorithms.merge(hewl_unmerged, sort=sort)
merged = rs.algorithms.merge(hewl_unmerged, sort=sort, anomalous=anomalous)
elif not (keys[0] in hewl_unmerged.columns and keys[1] in hewl_unmerged.columns):
with pytest.raises(KeyError):
merged = rs.algorithms.merge(hewl_unmerged, keys[0], keys[1], sort=sort)
merged = rs.algorithms.merge(
hewl_unmerged, keys[0], keys[1], sort=sort, anomalous=anomalous
)
return
else:
merged = rs.algorithms.merge(hewl_unmerged, keys[0], keys[1], sort=sort)
merged = rs.algorithms.merge(
hewl_unmerged, keys[0], keys[1], sort=sort, anomalous=anomalous
)

# Check DataSet attributes
assert merged.merged
assert merged.spacegroup.xhm() == hewl_merged.spacegroup.xhm()
assert merged.cell.a == hewl_merged.cell.a
assert merged.index.is_monotonic_increasing == sort

if sort:
assert merged.index.is_monotonic_increasing

# Note: AIMLESS zero-fills empty observations, whereas we use NaNs
if not anomalous:
hewl_merged["I"] = hewl_merged["IMEAN"]
hewl_merged["SIGI"] = hewl_merged["SIGIMEAN"]
hewl_merged["N"] = rs.DataSeries(
(hewl_merged["N(+)"] + hewl_merged["N(-)"])
/ (hewl_merged.label_centrics().CENTRIC + 1),
dtype="I",
)

for key in merged.columns:
assert np.allclose(
merged.loc[hewl_merged.index, key].fillna(0),

0 comments on commit c18ff26

Please sign in to comment.