Skip to content

Commit

Permalink
Merge pull request #676 from martinfleis/apply
Browse files Browse the repository at this point in the history
ENH: add Graph.apply, Graph.aggregate and allow callable as transformation in transform
  • Loading branch information
ljwolf authored Jan 17, 2024
2 parents f7db539 + 6cd6378 commit e70cae1
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 4 deletions.
64 changes: 60 additions & 4 deletions libpysal/graph/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
_sparse_to_arrays,
)

ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V")
ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V", "C")

# listed alphabetically
__author__ = """"
Expand Down Expand Up @@ -68,6 +68,7 @@ def __init__(self, adjacency, transformation="O", is_sorted=False):
- **R** -- Row-standardization (global sum :math:`=n`)
- **D** -- Double-standardization (global sum :math:`=1`)
- **V** -- Variance stabilizing
- **C** -- Custom
is_sorted : bool, default False
``adjacency`` capturing the graph needs to be canonically sorted to
initialize the class. The MultiIndex needs to be ordered i-->j
Expand Down Expand Up @@ -885,7 +886,7 @@ def transform(self, transformation):
Parameters
----------
transformation : str
transformation : str | callable
Transformation method. The following are
valid transformations.
Expand All @@ -894,6 +895,9 @@ def transform(self, transformation):
- **D** -- Double-standardization (global sum :math:`=1`)
- **V** -- Variance stabilizing
Alternatively, you can pass your own callable passed to
``self.adjacency.groupby(level=0).transform()``.
Returns
-------
Graph
Expand All @@ -904,7 +908,8 @@ def transform(self, transformation):
ValueError
Value error for unsupported transformation
"""
transformation = transformation.upper()
if isinstance(transformation, str):
transformation = transformation.upper()

if self.transformation == transformation:
return self.copy()
Expand All @@ -929,10 +934,14 @@ def transform(self, transformation):
n_q = self.n / s.sum()
standardized = (s * n_q).fillna(0).values # isolate comes as NaN -> 0

elif callable(transformation):
standardized = self._adjacency.groupby(level=0).transform(transformation)
transformation = "C"

else:
raise ValueError(
f"Transformation '{transformation}' is not supported. "
f"Use one of {ALLOWED_TRANSFORMATIONS[1:]}"
f"Use one of {ALLOWED_TRANSFORMATIONS[1:]} or pass a callable."
)

standardized_adjacency = pd.Series(
Expand Down Expand Up @@ -1455,6 +1464,53 @@ def assign_self_weight(self, weight=1):
)
return Graph(adj, is_sorted=True)

def apply(self, y, func, **kwargs):
"""Apply a reduction across the neighbor sets
Applies ``func`` over groups of ``y`` defined by neighbors for each focal.
Parameters
----------
y : array_like
array of values to be grouped. Can be 1-D or 2-D and will be coerced to a
pandas object
func : function, str, list, dict or None
Function to use for aggregating the data passed to pandas ``GroupBy.apply``.
Returns
-------
Series | DataFrame
pandas object indexed by unique_ids
"""
if not isinstance(y, pd.Series | pd.DataFrame):
y = pd.DataFrame(y) if hasattr(y, "ndim") and y.ndim == 2 else pd.Series(y)
grouper = y.take(self._adjacency.index.codes[1]).groupby(
self._adjacency.index.codes[0]
)
result = grouper.apply(func, **kwargs)
result.index = self.unique_ids
if isinstance(result, pd.Series):
result.name = None
return result

def aggregate(self, func):
"""Aggregate weights within a neighbor set
Apply a custom aggregation function to a group of weights of the same focal
geometry.
Parameters
----------
func : callable
A callable accepted by pandas ``groupby.agg`` method
Returns
-------
pd.Series
Aggregated weights
"""
return self._adjacency.groupby(level=0).agg(func)


def _arrange_arrays(heads, tails, weights, ids=None):
"""
Expand Down
115 changes: 115 additions & 0 deletions libpysal/graph/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,12 @@ def test_transform(self):
with pytest.raises(ValueError, match="Transformation 'X' is not"):
self.g_int.transform("x")

def test_transform_callable(self):
contig = graph.Graph.build_contiguity(self.nybb)
trans = contig.transform(lambda x: x * 10)
assert trans.transformation == "C"
assert trans.adjacency.sum() == 100

def test_asymmetry(self):
neighbors = {
"a": ["b", "c", "d"],
Expand Down Expand Up @@ -982,3 +988,112 @@ def test_assign_self_weight(self):
diag_array._adjacency[(contig.unique_ids[i], contig.unique_ids[i])]
== val
)

def test_apply(self):
contig = graph.Graph.build_contiguity(self.nybb)

# pandas built-in
expected = pd.Series(
[1.62382200e09, 3.76087588e09, 3.68168493e09, 6.16961834e09, 3.68168493e09],
index=pd.Index(
["Staten Island", "Queens", "Brooklyn", "Manhattan", "Bronx"],
name="focal",
),
)
pd.testing.assert_series_equal(contig.apply(self.nybb.area, "sum"), expected)

# numpy
expected = pd.Series(
[1.62382200e09, 1.18692629e09, 1.84084247e09, 1.93747835e09, 1.84084247e09],
index=pd.Index(
["Staten Island", "Queens", "Brooklyn", "Manhattan", "Bronx"],
name="focal",
),
)
pd.testing.assert_series_equal(
contig.apply(self.nybb.area, np.median), expected
)

# lambda over geometry
expected = pd.Series(
[2.06271959e09, 6.68788190e09, 7.57087991e09, 8.78957337e09, 7.57087991e09],
index=pd.Index(
["Staten Island", "Queens", "Brooklyn", "Manhattan", "Bronx"],
name="focal",
),
)
pd.testing.assert_series_equal(
contig.apply(self.nybb.geometry, lambda x: x.unary_union.convex_hull.area),
expected,
)

# reduction over a dataframe
expected = pd.DataFrame(
[
[3.30470010e05, 1.62381982e09],
[1.56477261e06, 3.76087473e09],
[1.25564314e06, 3.68168433e09],
[2.10181756e06, 6.16961599e09],
[1.25564314e06, 3.68168433e09],
],
columns=["Shape_Leng", "Shape_Area"],
index=pd.Index(
["Staten Island", "Queens", "Brooklyn", "Manhattan", "Bronx"],
name="focal",
),
)
pd.testing.assert_frame_equal(
contig.apply(
self.nybb, lambda x: x[["Shape_Leng", "Shape_Area"]].sum(axis=None)
),
expected,
)

# 1D array input
expected = pd.Series(
[1.62382200e09, 3.76087588e09, 3.68168493e09, 6.16961834e09, 3.68168493e09],
index=pd.Index(
["Staten Island", "Queens", "Brooklyn", "Manhattan", "Bronx"],
name="focal",
),
)
pd.testing.assert_series_equal(
contig.apply(self.nybb.area.values, "sum"), expected
)

# 2D array input
expected = pd.DataFrame(
[
[3.30470010e05, 1.62381982e09],
[1.56477261e06, 3.76087473e09],
[1.25564314e06, 3.68168433e09],
[2.10181756e06, 6.16961599e09],
[1.25564314e06, 3.68168433e09],
],
index=pd.Index(
["Staten Island", "Queens", "Brooklyn", "Manhattan", "Bronx"],
name="focal",
),
)
pd.testing.assert_frame_equal(
contig.apply(
self.nybb[["Shape_Leng", "Shape_Area"]].values,
lambda x: x.sum(axis=None),
),
expected,
)

def test_aggregate(self):
contig = graph.Graph.build_contiguity(self.nybb)
expected = pd.Series(
[7.3890561, 7.3890561, 20.08553692, 20.08553692, 1.0],
index=pd.Index(
["Bronx", "Brooklyn", "Manhattan", "Queens", "Staten Island"],
name="focal",
),
name="weight",
)
pd.testing.assert_series_equal(
contig.aggregate(lambda x: np.exp(np.sum(x))),
expected,
)

0 comments on commit e70cae1

Please sign in to comment.