Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add get_histogram utility, option to return histogram of bond lengths and angles #33

Merged
merged 14 commits into from
Feb 9, 2022
Merged
36 changes: 36 additions & 0 deletions cmeutils/plotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import numpy as np


def get_histogram(data, normalize=False, bins="auto"):
"""Bins a 1-D array of data into a histogram using
the numpy.histogram method.

Parameters
----------
data : 1-D numpy.array, required
Array of data used to generate the histogram
normalize : boolean, default=False
If set to true, normalizes the histogram bin heights
by the sum of data so that the distribution adds
up to 1
bins : float, int, or str, default="auto"
Method used by numpy to determine bin borders.
Check the numpy.histogram docs for more details.

Returns
-------
bin_cetners : 1-D numpy.array
Array of the bin center values
bin_heights : 1-D numpy.array
Array of the bin height values

"""
bin_heights, bin_borders = np.histogram(data, bins=bins)
if normalize is True:
s = sum(bin_heights)
bin_heights = np.array(
[float(i)/s for i in bin_heights]
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
s = sum(bin_heights)
bin_heights = np.array(
[float(i)/s for i in bin_heights]
)
bin_heights /= sum(bin_heights)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This throws an error.

numpy.core._exceptions._UFuncOutputCastingError: Cannot cast ufunc 'true_divide' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'

I did change it so it's a bit cleaner/faster though.

bin_widths = np.diff(bin_borders)
bin_centers = bin_borders[:-1] + bin_widths / 2
return bin_centers, bin_heights
68 changes: 60 additions & 8 deletions cmeutils/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,19 @@

from cmeutils import gsd_utils
from cmeutils.geometry import get_plane_normal, angle_between_vectors
from cmeutils.plotting import get_histogram


def angle_distribution(
gsd_file, A_name, B_name, C_name, start=0, stop=-1
gsd_file,
A_name,
B_name,
C_name,
start=0,
stop=-1,
histogram=False,
normalize=False,
bins="auto"
):
"""Returns the bond angle distribution for a given triplet of particles

Expand All @@ -26,11 +35,24 @@ def angle_distribution(
Negative numbers index from the end. (default 0)
stop : int
Final frame index for accumulating bond lengths. (default -1)
histogram : bool, default=False
If set to True, places the resulting angles into a histogram
and retrums the histogram's bin centers and heights as
opposed to the actual calcualted angles.
normalize : bool, default=False
If set to True, normalizes the angle distribution by the
sum of the bin heights, so that the distribution adds up to 1.
bins : float, int, or str, default="auto"
The number of bins to use when finding the distribution
of bond angles. Using "auto" will set the number of
bins based on the ideal bin size for the data.
See the numpy.histogram docs for more details.

Returns
-------
1-D numpy.array
Array of bond angles in degrees
1-D numpy.array or 2-D numpy.array
If histogram is False, Array of actual bond angles in degrees
If histogram is True, returns a 2D array of bin centers and bin heights.

"""
angles = []
Expand Down Expand Up @@ -63,11 +85,23 @@ def angle_distribution(
v = pos3_unwrap - pos2_unwrap
angles.append(np.round(angle_between_vectors(u, v, False), 3))
trajectory.close()
return np.array(angles)

if histogram:
bin_centers, bin_heights = get_histogram(np.array(angles), bins=bins)
return np.stack((bin_centers, bin_heights)).T
else:
return np.array(angles)


def bond_distribution(
gsd_file, A_name, B_name, start=0, stop=-1
gsd_file,
A_name,
B_name,
start=0,
stop=-1,
histogram=False,
normalize=True,
bins=100
):
"""Returns the bond length distribution for a given bond pair

Expand All @@ -83,11 +117,24 @@ def bond_distribution(
Negative numbers index from the end. (default 0)
stop : int
Final frame index for accumulating bond lengths. (default -1)
histogram : bool, default=False
If set to True, places the resulting bonds into a histogram
and retrums the histogram's bin centers and heights as
opposed to the actual calcualted bonds.
normalize : bool, default=False
If set to True, normalizes the angle distribution by the
sum of the bin heights, so that the distribution adds up to 1.
bins : float, int, or str, default="auto"
The number of bins to use when finding the distribution
of bond angles. Using "auto" will set the number of
bins based on the ideal bin size for the data.
See the numpy.histogram docs for more details.

Returns
-------
1-D numpy array
Array of bond lengths
1-D numpy.array or 2-D numpy.array
If histogram is False, Array of actual bond angles in degrees
If histogram is True, returns a 2D array of bin centers and bin heights.

"""
trajectory = gsd.hoomd.open(gsd_file, mode="rb")
Expand All @@ -113,7 +160,12 @@ def bond_distribution(
np.round(np.linalg.norm(pos2_unwrap - pos1_unwrap), 3)
)
trajectory.close()
return np.array(bonds)

if histogram:
bin_centers, bin_heights = get_histogram(np.array(bonds), bins=bins)
return np.stack((bin_centers, bin_heights)).T
else:
return np.array(bonds)


def get_quaternions(n_views = 20):
Expand Down
18 changes: 18 additions & 0 deletions cmeutils/tests/test_plotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import numpy as np
import pytest

from cmeutils.plotting import get_histogram
from base_test import BaseTest


class TestPlotting(BaseTest):
def test_histogram_bins(self):
sample = np.random.randn(100)
bin_c, bin_h = get_histogram(sample, bins=20)
assert len(bin_c) == len(bin_h) == 20

def test_histogram_normalize(self):
sample = np.random.randn(100)*-1
bin_c, bin_h = get_histogram(sample, normalize=True)
assert all(bin_h <= 1)

20 changes: 20 additions & 0 deletions cmeutils/tests/test_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@ def test_bond_not_found(self, p3ht_gsd):
with pytest.raises(ValueError):
bonds = bond_distribution(p3ht_gsd, "xx", "ss", start=0, stop=1)

def test_bond_histogram(self, p3ht_gsd):
bonds_hist = bond_distribution(
p3ht_gsd, "cc", "ss", start=0, stop=1, histogram=True
)
bonds_no_hist = bond_distribution(
p3ht_gsd, "cc", "ss", start=0, stop=1, histogram=False
)
assert bonds_hist.ndim == 2
assert bonds_no_hist.ndim == 1
jennyfothergill marked this conversation as resolved.
Show resolved Hide resolved

def test_angle_histogram(self, p3ht_gsd):
angles_hist = angle_distribution(
p3ht_gsd, "cc", "ss", "cc", start=0, stop=1, histogram=True
)
angles_no_hist = angle_distribution(
p3ht_gsd, "cc", "ss", "cc", start=0, stop=1, histogram=False
)
assert angles_hist.ndim == 2
jennyfothergill marked this conversation as resolved.
Show resolved Hide resolved
assert angles_no_hist.ndim == 1

def test_gsd_rdf(self, gsdfile_bond):
rdf_ex, norm = gsd_rdf(gsdfile_bond, "A", "B")
rdf_noex, norm2 = gsd_rdf(gsdfile_bond, "A", "B", exclude_bonded=False)
Expand Down