From 531be29cecfc5dd49efd8510236e026dfa1ad613 Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Mon, 30 Oct 2023 21:20:51 -0400 Subject: [PATCH 1/4] lint graph/base.py --- libpysal/graph/base.py | 129 ++++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 66 deletions(-) diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index 1aee264ed..9f59fbbe6 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -36,7 +36,8 @@ class Graph(_Set_Mixin): """Graph class encoding spatial weights matrices - The :class:`Graph` is currently experimental and its API is incomplete and unstable. + The :class:`Graph` is currently experimental + and its API is incomplete and unstable. """ def __init__(self, adjacency, transformation="O"): @@ -45,8 +46,8 @@ def __init__(self, adjacency, transformation="O"): It is recommenced to use one of the ``from_*`` or ``build_*`` constructors rather than invoking ``__init__`` directly. - Each observation needs to be present in the focal, at least as a self-loop with - a weight 0. + Each observation needs to be present in the focal, + at least as a self-loop with a weight 0. Parameters ---------- @@ -63,7 +64,6 @@ def __init__(self, adjacency, transformation="O"): - **D** -- Double-standardization (global sum :math:`=1`) - **V** -- Variance stabilizing - """ if not isinstance(adjacency, pd.Series): raise TypeError( @@ -147,7 +147,7 @@ def adjacency(self): return self._adjacency.copy() @classmethod - def from_W(cls, w): + def from_W(cls, w): # noqa N802 """Create an experimental Graph from libpysal.weights.W object Parameters @@ -161,7 +161,7 @@ def from_W(cls, w): """ return cls.from_weights_dict(dict(w)) - def to_W(self): + def to_W(self): # noqa N802 """Convert Graph to a libpysal.weights.W object Returns @@ -223,12 +223,12 @@ def from_adjacency( libpysal.graph.Graph """ cols = dict( - zip( + zip( # noqa B905 [focal_col, neighbor_col, weight_col], ["focal_col", "neighbor_col", "weight_col"], ) ) - for col in cols.keys(): + for col in cols: assert col in adjacency.columns.tolist(), ( f'"{col}" was given for `{cols[col]}`, but the ' f"columns available in `adjacency` are: {adjacency.columns.tolist()}." @@ -307,10 +307,8 @@ def from_weights_dict(cls, weights_dict): Graph libpysal.graph.Graph based on weights dictionary of dictionaries """ - idx = {f: [k for k in neighbors] for f, neighbors in weights_dict.items()} - data = { - f: [k for k in neighbors.values()] for f, neighbors in weights_dict.items() - } + idx = {f: list(neighbors) for f, neighbors in weights_dict.items()} + data = {f: list(neighbors.values()) for f, neighbors in weights_dict.items()} return cls.from_dicts(idx, data) @classmethod @@ -339,9 +337,9 @@ def build_contiguity(cls, geometry, rook=True, by_perimeter=False, strict=False) """Generate Graph from geometry based on contiguity Contiguity builder assumes that all geometries are forming a coverage, i.e. - a non-overlapping mesh and neighbouring geometries share only points or segments - of their exterior boundaries. In practice, ``build_contiguity`` is capable of - creating a Graph of partially overlapping geometries when + a non-overlapping mesh and neighbouring geometries share only points or + segments of their exterior boundaries. In practice, ``build_contiguity`` is + capable of creating a Graph of partially overlapping geometries when ``strict=False, by_perimeter=False``, but that would not strictly follow the definition of queen or rook contiguity. @@ -352,9 +350,9 @@ def build_contiguity(cls, geometry, rook=True, by_perimeter=False, strict=False) resulting Graph is indexed by the original index. If an array of shapely.Geometry objects is passed, Graph will assume a RangeIndex. rook : bool, optional - Contiguity method. If True, two geometries are considered neighbours if they - share at least one edge. If False, two geometries are considered neighbours - if they share at least one vertex. By default True + Contiguity method. If True, two geometries are considered neighbours if + they share at least one edge. If False, two geometries are considered + neighbours if they share at least one vertex. By default True by_perimeter : bool, optional If True, ``weight`` represents the length of the shared boundary between adjacent units, by default False. For row-standardized version of perimeter @@ -362,12 +360,12 @@ def build_contiguity(cls, geometry, rook=True, by_perimeter=False, strict=False) ``Graph.build_contiguity(gdf, by_perimeter=True).transform("r")``. strict : bool, optional Use the strict topological method. If False, the contiguity is determined - based on shared coordinates or coordinate sequences representing edges. This - assumes geometry coverage that is topologically correct. This method is - faster but can miss some relations. If True, the contiguity is determined - based on geometric relations that do not require precise topology. This - method is slower but will result in correct contiguity even if the topology - of geometries is not optimal. By default False + based on shared coordinates or coordinate sequences representing edges. + This assumes geometry coverage that is topologically correct. This method + is faster but can miss some relations. If True, the contiguity is + determined based on geometric relations that do not require precise + topology. This method is slower but will result in correct contiguity + even if the topology of geometries is not optimal. By default False. Returns ------- @@ -448,12 +446,11 @@ def build_kernel( p : int (default: 2) parameter for minkowski metric, ignored if metric != "minkowski". coincident: str, optional (default "raise") - Method for handling coincident points when ``k`` is not None. Options include + Method for handling coincident points when ``k`` is not None. Options are ``'raise'`` (raising an exception when coincident points are present), - ``'jitter'`` (randomly displace coincident points to produce uniqueness), and + ``'jitter'`` (randomly displace coincident points to produce uniqueness), & ``'clique'`` (induce fully-connected sub cliques for coincident points). - Returns ------- Graph @@ -498,7 +495,7 @@ def build_knn(cls, data, k, metric="euclidean", p=2, coincident="raise"): coincident: str, optional (default "raise") Method for handling coincident points. Options include ``'raise'`` (raising an exception when coincident points are present), - ``'jitter'`` (randomly displace coincident points to produce uniqueness), and + ``'jitter'`` (randomly displace coincident points to produce uniqueness), & ``'clique'`` (induce fully-connected sub cliques for coincident points). @@ -583,7 +580,7 @@ def build_triangulation( coincident: str, optional (default "raise") Method for handling coincident points. Options include ``'raise'`` (raising an exception when coincident points are present), - ``'jitter'`` (randomly displace coincident points to produce uniqueness), and + ``'jitter'`` (randomly displace coincident points to produce uniqueness), & ``'clique'`` (induce fully-connected sub cliques for coincident points). Returns @@ -737,15 +734,16 @@ def build_fuzzy_contiguity( ): """Generate Graph from fuzzy contiguity - Fuzzy contiguity relaxes the notion of contiguity neighbors for the case of - geometry collections that violate the condition of planar enforcement. It - handles three types of conditions present in such collections that would result - in missing links when using the regular contiguity methods. + Fuzzy contiguity relaxes the notion of contiguity neighbors + for the case of geometry collections that violate the condition + of planar enforcement. It handles three types of conditions present + in such collections that would result in missing links when using + the regular contiguity methods. - The first are edges for nearby polygons that should be shared, but are digitized - separately for the individual polygons and the resulting edges do not - coincide, but instead the edges intersect. This case can also be covered by - ``build_contiguty`` with the ``strict=False`` parameter. + The first are edges for nearby polygons that should be shared, but are + digitized separately for the individual polygons and the resulting edges + do not coincide, but instead the edges intersect. This case can also be + covered by ``build_contiguty`` with the ``strict=False`` parameter. The second case is similar to the first, only the resultant edges do not intersect but are "close". The optional buffering of geometry then closes the @@ -772,10 +770,10 @@ def build_fuzzy_contiguity( ``tolerance`` or ``buffer`` may be specified but not both. By default None. predicate : str, optional - The predicate to use for determination of neighbors. Default is 'intersects'. - If None is passed, neighbours are determined based on the intersection of - bounding boxes. See the documentation of ``geopandas.GeoSeries.sindex.query`` - for allowed predicates. + The predicate to use for determination of neighbors. Default is + 'intersects'. If None is passed, neighbours are determined based + on the intersection of bounding boxes. See the documentation of + ``geopandas.GeoSeries.sindex.query`` for allowed predicates. Returns ------- @@ -904,7 +902,7 @@ def transform(self, transformation): s = self._adjacency.groupby(level=0).transform( lambda group: group / math.sqrt((group**2).sum()) ) - nQ = self.n / s.sum() + nQ = self.n / s.sum() # noqa N806 standardized = (s * nQ).fillna(0).values # isolate comes as NaN -> 0 else: @@ -965,7 +963,8 @@ def cardinalities(self): def isolates(self): """Index of observations with no neighbors - Isolates are encoded as a self-loop with the weight == 0 in the adjacency table. + Isolates are encoded as a self-loop with + the weight == 0 in the adjacency table. Returns ------- @@ -1012,7 +1011,6 @@ def asymmetry(self, intrinsic=True): Parameters ---------- - intrinsic : bool, optional Default is ``True``. Intrinsic symmetry is defined as: @@ -1033,13 +1031,11 @@ def asymmetry(self, intrinsic=True): Returns ------- - pandas.Series A ``Series`` of ``(i,j)`` pairs of asymmetries sorted ascending by the focal observation (index value), where ``i`` is the focal and ``j`` is the neighbor. An empty ``Series`` is returned if no asymmetries are found. - """ if intrinsic: wd = self.sparse.transpose() - self.sparse @@ -1055,7 +1051,9 @@ def asymmetry(self, intrinsic=True): dtype=self._adjacency.index.dtypes["focal"], ) else: - i2id = dict(zip(np.arange(self.unique_ids.shape[0]), self.unique_ids)) + i2id = dict( + zip(np.arange(self.unique_ids.shape[0]), self.unique_ids, strict=True) + ) focal, neighbor = np.nonzero(wd) focal = focal.astype(self._adjacency.index.dtypes["focal"]) neighbor = neighbor.astype(self._adjacency.index.dtypes["focal"]) @@ -1070,9 +1068,9 @@ def asymmetry(self, intrinsic=True): def higher_order(self, k=2, shortest_path=True, diagonal=False, lower_order=False): """Contiguity weights object of order :math:`k`. - Proper higher order neighbors are returned such that :math:`i` and :math:`j` are - :math:`k`-order neighbors if the shortest path from :math:`i-j` is of length - :math:`k`. + Proper higher order neighbors are returned such that :math:`i` and :math:`j` + are :math:`k`-order neighbors if the shortest path from :math:`i-j` is of + length :math:`k`. Parameters ---------- @@ -1102,22 +1100,22 @@ def higher_order(self, k=2, shortest_path=True, diagonal=False, lower_order=Fals sp = sparse.csr_matrix(binary.sparse) if lower_order: - wk = sum(map(lambda x: sp**x, range(2, k + 1))) + wk = sum(sp**x for x in range(2, k + 1)) shortest_path = False else: wk = sp**k rk, ck = wk.nonzero() - sk = set(zip(rk, ck)) + sk = set(zip(rk, ck)) # noqa B905 if shortest_path: for j in range(1, k): wj = sp**j rj, cj = wj.nonzero() - sj = set(zip(rj, cj)) + sj = set(zip(rj, cj)) # noqa B905 sk.difference_update(sj) if not diagonal: - sk = set([(i, j) for i, j in sk if i != j]) + sk = {(i, j) for i, j in sk if i != j} return Graph.from_sparse( sparse.coo_array( @@ -1133,9 +1131,9 @@ def higher_order(self, k=2, shortest_path=True, diagonal=False, lower_order=Fals def lag(self, y): """Spatial lag operator - If weights are row standardized, returns the mean of each observation's neighbors; - if not, returns the weighted sum of each observation's neighbors. - + If weights are row standardized, returns the mean of each + observation's neighbors; if not, returns the weighted sum + of each observation's neighbors. Parameters ---------- @@ -1183,12 +1181,9 @@ def to_networkx(self): try: import networkx as nx except ImportError: - raise ImportError("NetworkX is required.") + raise ImportError("NetworkX is required.") from None - if self.asymmetry().empty: - graph_type = nx.Graph - else: - graph_type = nx.DiGraph + graph_type = nx.Graph if self.asymmetry().empty else nx.DiGraph return nx.from_pandas_edgelist( self._adjacency.reset_index(), @@ -1294,7 +1289,9 @@ def _arrange_arrays(heads, tails, weights, ids=None): if ids is None: ids = np.unique(np.hstack((heads, tails))) lookup = list(ids).index - input_df = pd.DataFrame.from_dict(dict(focal=heads, neighbor=tails, weight=weights)) + input_df = pd.DataFrame.from_dict( + {"focal": heads, "neighbor": tails, "weight": weights} + ) return ( input_df.set_index(["focal", "neighbor"]) .assign( @@ -1311,9 +1308,9 @@ def _arrange_arrays(heads, tails, weights, ids=None): def read_parquet(path, **kwargs): """Read Graph from a Apache Parquet - Read Graph serialized using `Graph.to_parquet()` back into the `Graph` object. The - Parquet file needs to contain adjacency table with a structure required by the `Graph` - constructor and optional metadata with the type of transformation. + Read Graph serialized using `Graph.to_parquet()` back into the `Graph` object. + The Parquet file needs to contain adjacency table with a structure required + by the `Graph` constructor and optional metadata with the type of transformation. Parameters ---------- From 7e00e19c4482e8cd0c37e32a63d2b622f500b50d Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Tue, 31 Oct 2023 08:43:11 -0400 Subject: [PATCH 2/4] Apply suggestions from code review Co-authored-by: Martin Fleischmann --- libpysal/graph/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index 9f59fbbe6..969e8409b 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -226,6 +226,7 @@ def from_adjacency( zip( # noqa B905 [focal_col, neighbor_col, weight_col], ["focal_col", "neighbor_col", "weight_col"], + strict=True, ) ) for col in cols: @@ -902,8 +903,8 @@ def transform(self, transformation): s = self._adjacency.groupby(level=0).transform( lambda group: group / math.sqrt((group**2).sum()) ) - nQ = self.n / s.sum() # noqa N806 - standardized = (s * nQ).fillna(0).values # isolate comes as NaN -> 0 + n_q = self.n / s.sum() + standardized = (s * n_q).fillna(0).values # isolate comes as NaN -> 0 else: raise ValueError( @@ -1106,13 +1107,13 @@ def higher_order(self, k=2, shortest_path=True, diagonal=False, lower_order=Fals wk = sp**k rk, ck = wk.nonzero() - sk = set(zip(rk, ck)) # noqa B905 + sk = set(zip(rk, ck, strict=True)) if shortest_path: for j in range(1, k): wj = sp**j rj, cj = wj.nonzero() - sj = set(zip(rj, cj)) # noqa B905 + sj = set(zip(rj, cj), strict=True) sk.difference_update(sj) if not diagonal: sk = {(i, j) for i, j in sk if i != j} From 9c94e762012c96debbdddfe734267f75638bbd3e Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Tue, 31 Oct 2023 08:43:35 -0400 Subject: [PATCH 3/4] Update libpysal/graph/base.py --- libpysal/graph/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index 969e8409b..fefe4d768 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -223,7 +223,7 @@ def from_adjacency( libpysal.graph.Graph """ cols = dict( - zip( # noqa B905 + zip( [focal_col, neighbor_col, weight_col], ["focal_col", "neighbor_col", "weight_col"], strict=True, From 25001c5608d5a3d8508ed0743b5f583e37c17a2f Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Tue, 31 Oct 2023 08:52:46 -0400 Subject: [PATCH 4/4] fix strict keyword --- libpysal/graph/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index fefe4d768..5f98fcdca 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -1113,7 +1113,7 @@ def higher_order(self, k=2, shortest_path=True, diagonal=False, lower_order=Fals for j in range(1, k): wj = sp**j rj, cj = wj.nonzero() - sj = set(zip(rj, cj), strict=True) + sj = set(zip(rj, cj, strict=True)) sk.difference_update(sj) if not diagonal: sk = {(i, j) for i, j in sk if i != j}