cmelab · jennyfothergill · Apr 16, 2021 · Sep 21, 2020 · Sep 21, 2020 · Apr 5, 2021
diff --git a/cmeutils/__init__.py b/cmeutils/__init__.py
@@ -1,7 +1,4 @@
 from . import gsd_utils
 from .__version__ import __version__
 
-__all__ = [
-        "__version__",
-        "gsd_utils",
-        ]
+__all__ = ["__version__", "gsd_utils"]
diff --git a/cmeutils/gsd_utils.py b/cmeutils/gsd_utils.py
@@ -1,50 +1,50 @@
+import freud
 import gsd
 import gsd.hoomd
-import freud
+import numpy as np
 
 
-def get_type_position(type_name, gsd_file=None, snap=None, gsd_frame=-1):
+def get_type_position(typename, gsd_file=None, snap=None, gsd_frame=-1):
     """
     This function returns the  positions of a particular particle
     type from a frame of a gsd trajectory file or from a snapshot.
     Pass in either a gsd file or a snapshot, but not both.
 
     Parameters
     ----------
-    type_name : str,
-               name of particles of which to get the positions
-               (found in gsd.hoomd.Snapshot.particles.types)
-    gsd_file : str,
-              filename of the gsd trajectory (default = None)
-    snap : gsd.hoomd.Snapshot
-            Trajectory snapshot (default = None)
-    gsd_frame : int,
-            frame number to get positions from. Supports
-            negative indexing. (default = -1)
+    typename : str,
+        Name of particles of which to get the positions
+        (found in gsd.hoomd.Snapshot.particles.types)
+    gsd_file : str, default None
+        Filename of the gsd trajectory
+    snap : gsd.hoomd.Snapshot, default None
+        Trajectory snapshot
+    gsd_frame : int, default -1
+        Frame number to get positions from. Supports negative indexing.
 
     Returns
     -------
     numpy.ndarray
     """
     snap = _validate_inputs(gsd_file, snap, gsd_frame)
-    type_pos = snap.particles.position[
-            snap.particles.typeid == snap.particles.types.index(type_name)
-            ]
-    return type_pos
+    typepos = snap.particles.position[
+        snap.particles.typeid == snap.particles.types.index(typename)
+    ]
+    return typepos
+
 
 def get_all_types(gsd_file=None, snap=None, gsd_frame=-1):
     """
     Returns all particle types in a hoomd trajectory
 
     Parameters
     ----------
-    gsd_file : str,
-              filename of the gsd trajectory (default = None)
-    snap : gsd.hoomd.Snapshot
-            Trajectory snapshot (default = None)
-    gsd_frame : int,
-            frame number to get positions from. Supports
-            negative indexing. (default = -1)
+    gsd_file : str, default None
+        Filename of the gsd trajectory
+    snap : gsd.hoomd.Snapshot, default None
+        Trajectory snapshot
+    gsd_frame : int, default -1
+        Frame number to get positions from. Supports negative indexing.
 
     Returns
     -------
@@ -53,6 +53,7 @@ def get_all_types(gsd_file=None, snap=None, gsd_frame=-1):
     snap = _validate_inputs(gsd_file, snap, gsd_frame)
     return snap.particles.types
 
+
 def snap_molecule_cluster(gsd_file=None, snap=None, gsd_frame=-1):
     """Find molecule index for each particle.
 
@@ -62,12 +63,12 @@ def snap_molecule_cluster(gsd_file=None, snap=None, gsd_frame=-1):
 
     Parameters
     ----------
-    gsd_file : str,
-        Filename of the gsd trajectory (default = None)
-    snap : gsd.hoomd.Snapshot
-        Trajectory snapshot. (default = None)
-    gsd_frame : int,
-        Frame number of gsd_file to use in computing clusters. (default = -1)
+    gsd_file : str, default None
+        Filename of the gsd trajectory
+    snap : gsd.hoomd.Snapshot, default None
+        Trajectory snapshot.
+    gsd_frame : int, default -1
+        Frame number of gsd_file to use to compute clusters.
 
     Returns
     -------
@@ -86,7 +87,7 @@ def snap_molecule_cluster(gsd_file=None, snap=None, gsd_frame=-1):
     )
     cluster = freud.cluster.Cluster()
     cluster.compute(system=system, neighbors=nlist)
-    return cluster
+    return cluster.cluster_idx
 
 
 def _validate_inputs(gsd_file, snap, gsd_frame):
@@ -95,7 +96,7 @@ def _validate_inputs(gsd_file, snap, gsd_frame):
     if gsd_file:
         assert isinstance(gsd_frame, int)
         try:
-            with gsd.hoomd.open(name=gsd_file, mode='rb') as f:
+            with gsd.hoomd.open(name=gsd_file, mode="rb") as f:
                 snap = f[gsd_frame]
         except Exception as e:
             print("Unable to open the gsd_file")

diff --git a/cmeutils/structure.py b/cmeutils/structure.py
@@ -0,0 +1,105 @@
+from cmeutils import gsd_utils
+import freud
+import gsd
+import gsd.hoomd
+import numpy as np
+
+
+def gsd_rdf(
+    gsdfile,
+    A_name,
+    B_name,
+    start=0,
+    stop=None,
+    r_max=None,
+    r_min=0,
+    bins=100,
+    exclude_bonded=True,
+):
+    """Compute intermolecular RDF from a GSD file.
+
+    This function calculates the radial distribution function given a GSD file
+    and the names of the particle types. By default it will calculate the RDF
+    for the entire trajectory.
+
+    It is assumed that the bonding, number of particles, and simulation box do
+    not change during the simulation.
+
+    Parameters
+    ----------
+    gsdfile : str
+        Filename of the GSD trajectory.
+    A_name, B_name : str
+        Name(s) of particles between which to calculate the RDF (found in
+        gsd.hoomd.Snapshot.particles.types)
+    start : int
+        Starting frame index for accumulating the RDF. Negative numbers index
+        from the end. (default 0)
+    stop : int
+        Final frame index for accumulating the RDF. If None, the last frame
+        will be used. (default None)
+    r_max : float
+        Maximum radius of RDF. If None, half of the maximum box size is used.
+        (default None)
+    r_min : float
+        Minimum radius of RDF. (default 0)
+    bins : int
+        Number of bins to use when calculating the RDF. (default 100)
+    exclude_bonded : bool
+        Whether to remove particles in same molecule from the neighbor list.
+        (default True)
+
+    Returns
+    -------
+    (freud.density.RDF, float)
+    """
+    if not stop:
+        stop = -1
+
+    with gsd.hoomd.open(gsdfile, mode="rb") as trajectory:
+        snap = trajectory[0]
+
+        if r_max is None:
+            # Use a value just less than half the maximum box length.
+            r_max = np.nextafter(
+                np.max(snap.configuration.box[:3]) * 0.5, 0, dtype=np.float32
+            )
+
+        rdf = freud.density.RDF(bins=bins, r_max=r_max, r_min=r_min)
+
+        type_A = snap.particles.typeid == snap.particles.types.index(A_name)
+        type_B = snap.particles.typeid == snap.particles.types.index(B_name)
+
+        if exclude_bonded:
+            molecules = gsd_utils.snap_molecule_cluster(snap=snap)
+            molecules_A = molecules[type_A]
+            molecules_B = molecules[type_B]
+
+        for snap in trajectory[start:stop]:
+            A_pos = snap.particles.position[type_A]
+            if A_name == B_name:
+                B_pos = A_pos
+                exclude_ii = True
+            else:
+                B_pos = snap.particles.position[type_B]
+                exclude_ii = False
+
+            box = snap.configuration.box
+            system = (box, A_pos)
+            aq = freud.locality.AABBQuery.from_system(system)
+            nlist = aq.query(
+                B_pos, {"r_max": r_max, "exclude_ii": exclude_ii}
+            ).toNeighborList()
+
+            if exclude_bonded:
+                pre_filter = len(nlist)
+                nlist.filter(
+                    molecules_A[nlist.point_indices]
+                    != molecules_B[nlist.query_point_indices]
+                )
+                post_filter = len(nlist)
+
+            rdf.compute(aq, neighbors=nlist, reset=False)
+
+        normalization = post_filter / pre_filter if exclude_bonded else 1
+        return rdf, normalization
diff --git a/cmeutils/tests/base_test.py b/cmeutils/tests/base_test.py
@@ -8,40 +8,42 @@
 
 class BaseTest:
     @pytest.fixture
-    def test_gsd(self, tmp_path):
+    def gsdfile(self, tmp_path):
         filename = tmp_path / "test.gsd"
         create_gsd(filename)
         return filename
 
     @pytest.fixture
-    def test_gsd_bonded(self, tmp_path):
-        filename = tmp_path / "test.gsd"
+    def gsdfile_bond(self, tmp_path):
+        filename = tmp_path / "test_bond.gsd"
         create_gsd(filename, add_bonds=True)
         return filename
 
     @pytest.fixture
-    def test_snap(self, test_gsd):
-        with gsd.hoomd.open(name=test_gsd, mode="rb") as f:
+    def snap(self, gsdfile):
+        with gsd.hoomd.open(name=gsdfile, mode="rb") as f:
             snap = f[-1]
         return snap
 
+
 def create_frame(i, add_bonds, seed=42):
     np.random.seed(seed)
     s = gsd.hoomd.Snapshot()
     s.configuration.step = i
     s.particles.N = 5
-    s.particles.types = ['A', 'B']
-    s.particles.typeid = [0,0,1,1,1]
-    s.particles.position = np.random.random(size=(5,3))
+    s.particles.types = ["A", "B"]
+    s.particles.typeid = [0, 0, 1, 1, 1]
+    s.particles.position = np.random.random(size=(5, 3))
     s.configuration.box = [3, 3, 3, 0, 0, 0]
     if add_bonds:
         s.bonds.N = 2
-        s.bonds.types = ['AB']
+        s.bonds.types = ["AB"]
         s.bonds.typeid = [0, 0]
         s.bonds.group = [[0, 2], [1, 3]]
     s.validate()
     return s
 
+
 def create_gsd(filename, add_bonds=False):
-    with gsd.hoomd.open(name=filename, mode='wb') as f:
+    with gsd.hoomd.open(name=filename, mode="wb") as f:
         f.extend((create_frame(i, add_bonds=add_bonds) for i in range(10)))
diff --git a/cmeutils/tests/test_gsd.py b/cmeutils/tests/test_gsd.py
@@ -1,34 +1,38 @@
 import numpy as np
 import pytest
 
-from cmeutils import gsd_utils
 from base_test import BaseTest
 
 
 class TestGSD(BaseTest):
-
-    def test_get_type_position(self, test_gsd):
+    def test_get_type_position(self, gsdfile):
         from cmeutils.gsd_utils import get_type_position
 
-        pos_array = get_type_position(gsd_file = test_gsd, type_name = 'A')
+        pos_array = get_type_position(gsd_file=gsdfile, typename="A")
         assert type(pos_array) is type(np.array([]))
-        assert pos_array.shape == (2,3)
+        assert pos_array.shape == (2, 3)
+
+    def test_validate_inputs(self, gsdfile, snap):
+        from cmeutils.gsd_utils import _validate_inputs
 
-    def test_validate_inputs(self, test_gsd, test_snap):
-        # Catch error with both gsd_file and snap are passed
+        # Catch errors when both gsd_file and snap are passed
         with pytest.raises(ValueError):
-            snap = gsd_utils._validate_inputs(test_gsd, test_snap, 1)
+            _validate_inputs(gsdfile, snap, 1)
         with pytest.raises(AssertionError):
-            snap = gsd_utils._validate_inputs(test_gsd, None, 1.0)
+            _validate_inputs(gsdfile, None, 1.0)
         with pytest.raises(AssertionError):
-            snap = gsd_utils._validate_inputs(None, test_gsd, 1)
+            _validate_inputs(None, gsdfile, 1)
         with pytest.raises(OSError):
-            gsd_utils._validate_inputs("bad_gsd_file", None, 0)
+            _validate_inputs("bad_gsd_file", None, 0)
+
+    def test_get_all_types(self, gsdfile):
+        from cmeutils.gsd_utils import get_all_types
 
-    def test_get_all_types(self, test_gsd):
-        types = gsd_utils.get_all_types(test_gsd)
-        assert types == ['A', 'B']
+        types = get_all_types(gsdfile)
+        assert types == ["A", "B"]
 
-    def test_snap_molecule_cluster(self, test_gsd_bonded):
-        cluster = gsd_utils.snap_molecule_cluster(gsd_file=test_gsd_bonded)
+    def test_snap_molecule_cluster(self, gsdfile_bond):
+        from cmeutils.gsd_utils import snap_molecule_cluster
 
+        cluster = snap_molecule_cluster(gsd_file=gsdfile_bond)
+        assert np.array_equal(cluster, [0, 1, 0, 1, 2])
diff --git a/cmeutils/tests/test_imports.py b/cmeutils/tests/test_imports.py
@@ -1,4 +1,3 @@
-
 def test_import():
     import cmeutils
     from cmeutils import gsd_utils
diff --git a/cmeutils/tests/test_structure.py b/cmeutils/tests/test_structure.py
@@ -0,0 +1,21 @@
+import pytest
+
+from cmeutils.tests.base_test import BaseTest
+
+
+class TestStructure(BaseTest):
+    def test_gsd_rdf(self, gsdfile_bond):
+        from cmeutils.structure import gsd_rdf
+
+        rdf_ex, norm = gsd_rdf(gsdfile_bond, "A", "B")
+        rdf_noex, norm2 = gsd_rdf(gsdfile_bond, "A", "B", exclude_bonded=False)
+        assert norm2 == 1
+        assert rdf_noex != rdf_ex
+
+    def test_gsd_rdf_samename(self, gsdfile_bond):
+        from cmeutils.structure import gsd_rdf
+
+        rdf_ex, norm = gsd_rdf(gsdfile_bond, "A", "A")
+        rdf_noex, norm2 = gsd_rdf(gsdfile_bond, "A", "A", exclude_bonded=False)
+        assert norm2 == 1
+        assert rdf_noex != rdf_ex