Skip to content

Commit

Permalink
Merge pull request #134 from knaaptime/localknox
Browse files Browse the repository at this point in the history
keep members of local knox hotspots
  • Loading branch information
knaaptime authored Mar 25, 2024
2 parents 85a77b8 + 2c32e7a commit e152ac8
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 10 deletions.
37 changes: 30 additions & 7 deletions pointpats/spacetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Methods for identifying space-time interaction in spatio-temporal event
data.
"""

__author__ = (
"Eli Knaap <eknaap@sdsu.edu>",
"Nicholas Malizia <nmalizia@asu.edu>",
Expand Down Expand Up @@ -29,6 +30,7 @@
import pandas
import scipy.stats as stats
from libpysal import cg
from libpysal.graph import Graph
from pandas.api.types import is_numeric_dtype
from scipy.spatial import KDTree
from scipy.stats import hypergeom, poisson
Expand Down Expand Up @@ -1297,7 +1299,7 @@ def from_dataframe(
ids=dataframe.index.values,
)

def hotspots(self, crit=0.05, inference="permutation"):
def hotspots(self, crit=0.05, inference="permutation", keep_neighbors=True):
"""Table of significant space-time clusters that define local hotspots.
Parameters
Expand All @@ -1307,6 +1309,12 @@ def hotspots(self, crit=0.05, inference="permutation"):
inference : str, optional
whether p-values should use permutation or analutical inference, by default
"permutation"
keep_neighbors: bool
whether to included nonsignificant members of hotspots. While these
observations are not themselves significant, these still define the spatial
extent of the cluster, and the the focal observation cannot become
significant without their presence. If True, return all members of a
significant hotspot, else return only the significant locations
Returns
-------
Expand All @@ -1323,7 +1331,8 @@ def hotspots(self, crit=0.05, inference="permutation"):
warn(
"Pseudo-p values not availalable. Permutation-based p-values require "
"fitting the KnoxLocal class using `permutations` set to a large "
"number. Using analytic p-values instead"
"number. Using analytic p-values instead",
stacklevel=1,
)
col = "p_hypergeom"
else:
Expand All @@ -1336,11 +1345,27 @@ def hotspots(self, crit=0.05, inference="permutation"):
pdf_sig = self._gdf[self._gdf[col] <= crit][[col, "time"]].rename(
columns={col: "pvalue", "time": "focal_time"}
)

# if keep_neighbors, we want to include a 'cluster' column denoting which
# cluster nonsig observations belong to. Need to use a graph for that
temp_neighbors = self.adjlist[
(self.adjlist.focal.isin(pdf_sig.index.values))
| self.adjlist.neighbor.isin(pdf_sig.index.values)
]

pdf_sig = pdf_sig.merge(
self.adjlist, how="inner", left_index=True, right_on="focal"
temp_neighbors, how='outer', left_index=True, right_on="focal"
).reset_index(drop=True)

return pdf_sig.copy()
# significant focals can be neighbors of others (dupes)
pdf_sig = pdf_sig.groupby("focal").first().reset_index()
graph = Graph.from_adjacency(pdf_sig.assign(weight=1))
pdf_sig["cluster"] = graph.component_labels.values
if not keep_neighbors :
pdf_sig = pdf_sig.dropna(subset=['pvalue'])

return self._gdf[["geometry"]].merge(
pdf_sig.copy(), left_index=True, right_on="focal"
)

def plot(
self,
Expand Down Expand Up @@ -1418,7 +1443,6 @@ def plot(
g[g.color == colors["focal"]].plot(ax=m, color=colors["focal"], **point_kwargs)

if plot_edges:

# edges between hotspot and st-neighbors
ghs = self.hotspots(crit=crit, inference=inference)
ghs = ghs.dropna()
Expand Down Expand Up @@ -1519,7 +1543,6 @@ def explore(
)

if plot_edges:

# edges between hotspot and st-neighbors
g = g.set_index("index")
ghs = self.hotspots(crit=crit, inference=inference)
Expand Down
24 changes: 21 additions & 3 deletions pointpats/tests/test_spacetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def test_knox_local_from_gdf(self):
)

def test_explore(self):
gdf = self.gdf
gdf = self.gdf.copy()
gdf.crs = 21096
numpy.random.seed(12345)
m = KnoxLocal.from_dataframe(
Expand All @@ -337,15 +337,33 @@ def test_explore(self):
)
assert len(m.to_dict()["children"]) == 5


def test_hotspots_without_neighbors(self):
gdf = self.gdf.copy()
gdf = gdf.set_crs(21096)
numpy.random.seed(1)
knox = KnoxLocal.from_dataframe(
gdf, time_col="T", delta=20, tau=5,
).hotspots(keep_neighbors=False, inference='analytic')
assert knox.shape == (3,7)

def test_hotspots_with_neighbors(self):
gdf = self.gdf.copy()
gdf = gdf.set_crs(21096)
knox = KnoxLocal.from_dataframe(
gdf, time_col="T", delta=20, tau=5,
).hotspots(keep_neighbors=True, inference='analytic')
assert knox.shape == (4,7)

@pytest.mark.mpl_image_compare
def test_plot(self):
gdf = self.gdf
gdf = self.gdf.copy()
gdf.crs = 21096
fig, ax2 = plt.subplots(figsize=(30,18))
lk = KnoxLocal.from_dataframe(
gdf, time_col="T", delta=20, tau=5, keep=True)
lk.plot(inference='analytic', ax=ax2)
return fig
assert fig



Expand Down

0 comments on commit e152ac8

Please sign in to comment.