Skip to content

Commit

Permalink
Merge pull request #102 from Hekstra-Lab/suffixes
Browse files Browse the repository at this point in the history
Allow custom suffix specification for `DataSet.stack_anomalous()`
  • Loading branch information
JBGreisman authored Oct 11, 2021
2 parents 5d7f876 + eb2958b commit f98b0dd
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 50 deletions.
53 changes: 40 additions & 13 deletions reciprocalspaceship/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,9 @@ def assign_resolution_bins(self, bins=20, inplace=False, return_labels=True):
else:
return self

def stack_anomalous(self, plus_labels=None, minus_labels=None):
def stack_anomalous(
self, plus_labels=None, minus_labels=None, suffixes=("(+)", "(-)")
):
"""
Convert data from two-column anomalous format to one-column
format. Intensities, structure factor amplitudes, or other data
Expand All @@ -814,10 +816,14 @@ def stack_anomalous(self, plus_labels=None, minus_labels=None):
----------
plus_labels: str or list-like
Column label or list of column labels of data associated with
Friedel-plus reflection (Defaults to columns suffixed with "(+)")
Friedel-plus reflections
minus_labels: str or list-like
Column label or list of column labels of data associated with
Friedel-minus reflection (Defaults to columns suffixed with "(-)")
Friedel-minus reflections
suffixes: list of strings
Suffixes to identify column labels associated with Friedel-plus
and Friedel-minus reflections. Only consulted if plus_labels and
minus_labels are None. Defaults to ("(+)", "(-)")
Returns
-------
Expand All @@ -832,16 +838,37 @@ def stack_anomalous(self, plus_labels=None, minus_labels=None):
"DataSet.stack_anomalous() cannot be called with unmerged data"
)

# Default behavior: Use labels suffixed with "(+)" or "(-)"
# Make sure suffixes are valid
if not isinstance(suffixes, (list, tuple)):
raise ValueError(
f"suffixes must have type tuple or list. supplied suffixes"
f"{suffixes} have type {type(suffixes)}"
)
if len(suffixes) != 2:
raise ValueError(
f"suffixes must be of length 2. Provided suffixes "
f"{suffixes} have length {len(suffixes)}."
)

# If no labels provided, use suffixes to create them
if plus_labels is None and minus_labels is None:
plus_labels = [l for l in self.columns if "(+)" in l]
minus_labels = [l for l in self.columns if "(-)" in l]
plus_labels = [l for l in self.columns if l.endswith(suffixes[0])]
minus_labels = [l for l in self.columns if l.endswith(suffixes[1])]

elif plus_labels is None or minus_labels is None:
raise ValueError(
f"plus_labels and minus_labels must either both be None"
f"or both not be None: plus_labels has type {type(plus_labels)}"
f"and minus_labels has type {type(minus_labels)}"
)

# Validate column labels
# Validate column labels (either default or created via suffixes)
if isinstance(plus_labels, str) and isinstance(minus_labels, str):
plus_labels = [plus_labels]
minus_labels = [minus_labels]
elif isinstance(plus_labels, list) and isinstance(minus_labels, list):
elif isinstance(plus_labels, (list, tuple)) and isinstance(
minus_labels, (list, tuple)
):
if len(plus_labels) != len(minus_labels):
raise ValueError(
f"plus_labels: {plus_labels} and minus_labels: "
Expand All @@ -850,8 +877,8 @@ def stack_anomalous(self, plus_labels=None, minus_labels=None):
else:
raise ValueError(
f"plus_labels and minus_labels must have same type "
f"and be str or list: plus_labels is type "
f"{type(plus_labels)} and minus_labe is type "
f"and be str, list, or tuple: plus_labels is type "
f"{type(plus_labels)} and minus_labels is type "
f"{type(minus_labels)}."
)

Expand All @@ -865,12 +892,12 @@ def stack_anomalous(self, plus_labels=None, minus_labels=None):

# Map Friedel reflections to +/- ASU
centrics = self.label_centrics()["CENTRIC"]
dataset_plus = self.drop(columns=minus_labels)
dataset_minus = self.loc[~centrics].drop(columns=plus_labels)
dataset_plus = self.drop(columns=list(minus_labels))
dataset_minus = self.loc[~centrics].drop(columns=list(plus_labels))
dataset_minus.apply_symop(gemmi.Op("-x,-y,-z"), inplace=True)

# Rename columns and update dtypes
new_labels = [l.rstrip("(+)") for l in plus_labels]
new_labels = [l.rstrip(suffixes[0]) for l in plus_labels]
column_mapping_plus = dict(zip(plus_labels, new_labels))
column_mapping_minus = dict(zip(minus_labels, new_labels))
dataset_plus.rename(columns=column_mapping_plus, inplace=True)
Expand Down
135 changes: 98 additions & 37 deletions tests/test_dataset_anomalous.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,56 +6,117 @@


@pytest.mark.parametrize(
"labels",
"plus_labels,minus_labels,suffixes",
[
(None, None),
("I(+)", "I(-)"),
(["I(+)", "SIGI(+)"], ["I(-)", "SIGI(-)"]),
(("I(+)", "SIGI(+)"), ("I(-)", "SIGI(-)")),
(None, "I(-)"),
(["I(+)", "SIGI(+)"], ["I(-)"]),
(["I(+)", "SIGI(+)"], ["SIGI(-)", "I(-)"]),
(None, None, None),
(None, None, ("(+)", "(-)")),
("I(+)", "I(-)", None),
(["I(+)"], ["I(-)"], None),
(["I(+)", "SIGI(+)"], ["I(-)", "SIGI(-)"], None),
(("I(+)", "SIGI(+)"), ("I(-)", "SIGI(-)"), None),
],
)
def test_stack_anomalous(data_merged, labels):
"""Test behavior of DataSet.stack_anomalous()"""
def test_stack_anomalous(data_merged, plus_labels, minus_labels, suffixes):
"""
Test behavior of DataSet.stack_anomalous()
"""
if suffixes is None:
result = data_merged.stack_anomalous(plus_labels, minus_labels)
else:
result = data_merged.stack_anomalous(plus_labels, minus_labels, suffixes)

if plus_labels is None:
if suffixes is not None:
plus_labels = [l for l in data_merged.columns if l.endswith(suffixes[0])]
else:
plus_labels = [l for l in data_merged.columns if l.endswith("(+)")]
elif isinstance(plus_labels, str):
plus_labels = [plus_labels]

plus_labels = labels[0]
minus_labels = labels[1]
assert len(result.columns) == (len(data_merged.columns) - len(plus_labels))

# Check input data
def check_ValueError(data, plus_labels, minus_labels):
with pytest.raises(ValueError):
result = data.stack_anomalous(plus_labels, minus_labels)
return
centrics = data_merged.label_centrics()["CENTRIC"]
assert len(result) == (2 * (~centrics).sum()) + centrics.sum()
assert result.spacegroup.xhm() == data_merged.spacegroup.xhm()

if plus_labels is None and minus_labels is None:
plus_labels = [l for l in data_merged.columns if "(+)" in l]
minus_labels = [l for l in data_merged.columns if "(-)" in l]

if isinstance(plus_labels, str) and isinstance(minus_labels, str):
plus_labels = [plus_labels]
minus_labels = [minus_labels]
elif isinstance(plus_labels, list) and isinstance(minus_labels, list):
if len(plus_labels) != len(minus_labels):
check_ValueError(data_merged, plus_labels, minus_labels)
return
else:
check_ValueError(data_merged, plus_labels, minus_labels)
return
@pytest.mark.parametrize(
"plus_labels,minus_labels,suffixes",
[
(None, None, None),
(None, None, 5),
(None, None, ("(+)")),
(None, None, ("(+)", "(-)", "(=)")),
(None, "I(-)", None),
(["I(+)", "SIGI(+)"], ["I(-)"], None),
(["I(+)", "SIGI(+)"], ["SIGI(-)", "I(-)"], None),
],
)
def test_stack_anomalous_failure(data_merged, plus_labels, minus_labels, suffixes):
"""
Test that DataSet.stack_anomalous() fails with improper arguments
"""
with pytest.raises(ValueError):
result = data_merged.stack_anomalous(plus_labels, minus_labels, suffixes)

for plus, minus in zip(plus_labels, minus_labels):
if data_merged[plus].dtype != data_merged[minus].dtype:
check_ValueError(data_merged, plus_labels, minus_labels)
return

result = data_merged.stack_anomalous(labels[0], labels[1])
centrics = data_merged.label_centrics()["CENTRIC"]
assert len(result.columns) == (len(data_merged.columns) - len(plus_labels))
@pytest.mark.parametrize(
"label_dict,suffixes",
[
(
{
"I(+)": "Iplus",
"SIGI(+)": "SIGIplus",
"I(-)": "Iminus",
"SIGI(-)": "SIGIminus",
},
("plus", "minus"),
),
(
{"I(+)": "I+", "SIGI(+)": "SIGI+", "I(-)": "I-", "SIGI(-)": "SIGI-"},
("+", "-"),
),
],
)
def test_stack_anomalous_suffixes(data_merged, label_dict, suffixes):
"""
Test DataSet.stack_anomalous() with custom suffixes
"""

custom = data_merged.rename(columns=label_dict)
result = custom.stack_anomalous(suffixes=suffixes)
centrics = custom.label_centrics()["CENTRIC"]

assert len(result) == (2 * (~centrics).sum()) + centrics.sum()
assert result.spacegroup.xhm() == data_merged.spacegroup.xhm()


@pytest.mark.parametrize(
"label_dict,suffixes",
[
(
{
"I(+)": "I(+)_foo",
"SIGI(+)": "SIGI(+)_foo",
"I(-)": "I(-)_foo",
"SIGI(-)": "SIGI(-)_foo",
},
("+", "-"),
),
],
)
def test_stack_anomalous_non_suffixes(data_merged, label_dict, suffixes):
"""
Test DataSet.stack_anomalous() does not strip non-suffixes
"""

custom = data_merged.rename(columns=label_dict)
result = custom.stack_anomalous(suffixes=suffixes)

assert "I" not in result.columns
assert "I_foo" not in result.columns


def test_stack_anomalous_unmerged(data_unmerged):
"""
Test DataSet.stack_anomalous() raises ValueError with unmerged data
Expand Down

0 comments on commit f98b0dd

Please sign in to comment.