From 31f3b2077a3f6b8b3a83c23bbaea3e5b93880937 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sat, 6 Apr 2024 23:43:29 +0200 Subject: [PATCH 1/7] GAL Graph IO --- libpysal/graph/base.py | 36 ++++++++++++++++++- libpysal/graph/io/_gal.py | 54 +++++++++++++++++++++++++++++ libpysal/graph/{ => io}/_parquet.py | 0 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 libpysal/graph/io/_gal.py rename libpysal/graph/{ => io}/_parquet.py (100%) diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index abbd0033d..ff029d07e 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -16,7 +16,6 @@ ) from ._kernel import _distance_band, _kernel from ._matching import _spatial_matching -from ._parquet import _read_parquet, _to_parquet from ._plotting import _explore_graph, _plot from ._set_ops import SetOpsMixin from ._spatial_lag import _lag_spatial @@ -27,6 +26,8 @@ _resolve_islands, _sparse_to_arrays, ) +from .io._gal import _read_gal, _to_gal +from .io._parquet import _read_parquet, _to_parquet ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V", "C") @@ -1259,6 +1260,22 @@ def to_parquet(self, path, **kwargs): """ _to_parquet(self, path, **kwargs) + def to_gal(self, path): + """Save Graph to a GAL file + + Graph is serialized to the GAL file format. + + Parameters + ---------- + path : str + path to the GAL file + + See also + -------- + read_gal + """ + _to_gal(self, path) + def to_networkx(self): """Convert Graph to a ``networkx`` graph. @@ -1614,3 +1631,20 @@ def read_parquet(path, **kwargs): """ adjacency, transformation = _read_parquet(path, **kwargs) return Graph(adjacency, transformation, is_sorted=True) + + +def read_gal(path): + """Read Graph from a GAL file + + Parameters + ---------- + path : str + path to a file + + Returns + ------- + Graph + deserialized Graph + """ + neighbors = _read_gal(path) + return Graph.from_dicts(neighbors) diff --git a/libpysal/graph/io/_gal.py b/libpysal/graph/io/_gal.py new file mode 100644 index 000000000..e010ddb5d --- /dev/null +++ b/libpysal/graph/io/_gal.py @@ -0,0 +1,54 @@ +def _read_gal(path): + """Read GAL weights to Graph object + + Parameters + ---------- + path : str + path to GAL file + + Returns + ------- + dict + neighbors dict + """ + with open(path) as file: + neighbors = {} + + # handle case where more than n is specified in first line + header = file.readline().strip().split() + header_n = len(header) + n = int(header[0]) + + if header_n > 1: + n = int(header[1]) + + for _ in range(n): + id_, _ = file.readline().strip().split() + neighbors_i = file.readline().strip().split() + neighbors[id_] = neighbors_i + return neighbors + + +def _to_gal(graph_obj, path): + """Write GAL weights to Graph object + + Parameters + ---------- + path : str + path to GAL file + neighbors : dict + neighbors dict + """ + grouper = graph_obj._adjacency.groupby(level=0, sort=False) + + with open(path, "w") as file: + file.write(f"{graph_obj.n}\n") + + for ix, chunk in grouper: + if ix in graph_obj.isolates: + neighbors = [] + else: + neighbors = chunk.index.get_level_values("neighbor").tolist() + + file.write(f"{ix} {len(neighbors)}\n") + file.write(" ".join(neighbors) + "\n") diff --git a/libpysal/graph/_parquet.py b/libpysal/graph/io/_parquet.py similarity index 100% rename from libpysal/graph/_parquet.py rename to libpysal/graph/io/_parquet.py From fa74d6ef5af7e52c20bf98c55d33b4295655cfe8 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sun, 7 Apr 2024 23:36:46 +0200 Subject: [PATCH 2/7] GWT --- libpysal/graph/__init__.py | 2 +- libpysal/graph/base.py | 34 ++++++++++++++++++++++++++++++++++ libpysal/graph/io/_gal.py | 4 ++-- libpysal/graph/io/_gwt.py | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 libpysal/graph/io/_gwt.py diff --git a/libpysal/graph/__init__.py b/libpysal/graph/__init__.py index 5e0c34c35..69bf59f0a 100644 --- a/libpysal/graph/__init__.py +++ b/libpysal/graph/__init__.py @@ -1 +1 @@ -from .base import Graph, read_parquet # noqa +from .base import Graph, read_parquet, read_gal, read_gwt # noqa diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index ff029d07e..dd3a55ab4 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -27,6 +27,7 @@ _sparse_to_arrays, ) from .io._gal import _read_gal, _to_gal +from .io._gwt import _read_gwt, _to_gwt from .io._parquet import _read_parquet, _to_parquet ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V", "C") @@ -1276,6 +1277,22 @@ def to_gal(self, path): """ _to_gal(self, path) + def to_gwt(self, path): + """Save Graph to a GWT file + + Graph is serialized to the GWT file format. + + Parameters + ---------- + path : str + path to the GWT file + + See also + -------- + read_gwt + """ + _to_gwt(self, path) + def to_networkx(self): """Convert Graph to a ``networkx`` graph. @@ -1648,3 +1665,20 @@ def read_gal(path): """ neighbors = _read_gal(path) return Graph.from_dicts(neighbors) + + +def read_gwt(path): + """Read Graph from a GWT file + + Parameters + ---------- + path : str + path to a file + + Returns + ------- + Graph + deserialized Graph + """ + head, tail, weight = _read_gwt(path) + return Graph.from_arrays(head, tail, weight) diff --git a/libpysal/graph/io/_gal.py b/libpysal/graph/io/_gal.py index e010ddb5d..a0d43b688 100644 --- a/libpysal/graph/io/_gal.py +++ b/libpysal/graph/io/_gal.py @@ -34,10 +34,10 @@ def _to_gal(graph_obj, path): Parameters ---------- + graph_obj : Graph + Graph object path : str path to GAL file - neighbors : dict - neighbors dict """ grouper = graph_obj._adjacency.groupby(level=0, sort=False) diff --git a/libpysal/graph/io/_gwt.py b/libpysal/graph/io/_gwt.py new file mode 100644 index 000000000..8b47ee528 --- /dev/null +++ b/libpysal/graph/io/_gwt.py @@ -0,0 +1,38 @@ +import pandas as pd + + +def _read_gwt(path): + """ + Read GWT weights to Graph object + + Parameters + ---------- + path : str + path to GWT file + + Returns + ------- + tuple + focal, neighbor, weight arrays + """ + adjacency = pd.read_csv(path, sep=r"\s+", skiprows=1, header=None) + return adjacency[0].values, adjacency[1].values, adjacency[2].values + + +def _to_gwt(graph_obj, path): + """ + Write GWT weights to Graph object + + Parameters + ---------- + graph_obj : Graph + Graph object + path : str + path to GAL file + """ + adj = graph_obj._adjacency.reset_index() + adj["focal"] = adj["focal"].astype(str).str.replace(" ", "_") + adj["neighbor"] = adj["neighbor"].astype(str).str.replace(" ", "_") + with open(path, "w") as file: + file.write(f"0 {graph_obj.n} Unknown Unknown\n") + adj.to_csv(path, sep=" ", header=False, index=False, mode="a", float_format="%.7f") From e8db7751ee0188431a0c8a328a6cec7bb0fe52d2 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Mon, 20 May 2024 21:31:44 +0200 Subject: [PATCH 3/7] tests + necessary bugfixes --- libpysal/graph/_contiguity.py | 8 ++++++-- libpysal/graph/_utils.py | 18 ++++++++++-------- libpysal/graph/base.py | 3 +++ libpysal/graph/io/_gal.py | 12 +++++++++++- libpysal/graph/tests/test_base.py | 26 ++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 11 deletions(-) diff --git a/libpysal/graph/_contiguity.py b/libpysal/graph/_contiguity.py index c28b3104b..76bb83500 100644 --- a/libpysal/graph/_contiguity.py +++ b/libpysal/graph/_contiguity.py @@ -77,8 +77,12 @@ def _vertex_set_intersection(geoms, rook=True, ids=None, by_perimeter=False): nexus_names = {ids[ix] for ix in nexus} for geom_ix in nexus: gid = ids[geom_ix] - graph[gid] |= nexus_names - graph[gid].remove(gid) + graph[gid].update(nexus_names) + + for idx in ids: + graph[idx].remove(idx) + + # return graph heads, tails, weights = _neighbor_dict_to_edges(graph) diff --git a/libpysal/graph/_utils.py b/libpysal/graph/_utils.py index 8cab2f674..9d40685b7 100644 --- a/libpysal/graph/_utils.py +++ b/libpysal/graph/_utils.py @@ -109,13 +109,15 @@ def _neighbor_dict_to_edges(neighbors, weights=None): that the any self-loops have a weight of zero. """ idxs = pd.Series(neighbors).explode() - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - "Downcasting object dtype arrays on .fillna, .ffill, .bfill ", - FutureWarning, - ) - idxs = idxs.fillna(pd.Series(idxs.index, index=idxs.index)) # self-loops + isolates = idxs.isna() + if isolates.any(): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Downcasting object dtype arrays on .fillna, .ffill, .bfill ", + FutureWarning, + ) + idxs = idxs.fillna(pd.Series(idxs.index, index=idxs.index)) # self-loops heads, tails = idxs.index.values, idxs.values tails = tails.astype(heads.dtype) if weights is not None: @@ -130,7 +132,7 @@ def _neighbor_dict_to_edges(neighbors, weights=None): data_array = pd.to_numeric(data_array) else: data_array = np.ones(idxs.shape[0], dtype=int) - data_array[heads == tails] = 0 + data_array[isolates] = 0 return heads, tails, data_array diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index dd3a55ab4..6a8598039 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -1653,6 +1653,9 @@ def read_parquet(path, **kwargs): def read_gal(path): """Read Graph from a GAL file + The reader tries to infer the dtype of IDs. In case of unsuccessful + casting to int, it will fall back to string. + Parameters ---------- path : str diff --git a/libpysal/graph/io/_gal.py b/libpysal/graph/io/_gal.py index a0d43b688..0042de911 100644 --- a/libpysal/graph/io/_gal.py +++ b/libpysal/graph/io/_gal.py @@ -1,3 +1,6 @@ +import contextlib + + def _read_gal(path): """Read GAL weights to Graph object @@ -26,6 +29,11 @@ def _read_gal(path): id_, _ = file.readline().strip().split() neighbors_i = file.readline().strip().split() neighbors[id_] = neighbors_i + + # try casting to ints to ensure loss-less roundtrip of integer node ids + with contextlib.suppress(ValueError): + neighbors = {int(k): list(map(int, v)) for k, v in neighbors.items()} + return neighbors @@ -48,7 +56,9 @@ def _to_gal(graph_obj, path): if ix in graph_obj.isolates: neighbors = [] else: - neighbors = chunk.index.get_level_values("neighbor").tolist() + neighbors = ( + chunk.index.get_level_values("neighbor").astype(str).tolist() + ) file.write(f"{ix} {len(neighbors)}\n") file.write(" ".join(neighbors) + "\n") diff --git a/libpysal/graph/tests/test_base.py b/libpysal/graph/tests/test_base.py index f5b83724c..cd24885e6 100644 --- a/libpysal/graph/tests/test_base.py +++ b/libpysal/graph/tests/test_base.py @@ -824,6 +824,32 @@ def test_parquet(self): g_pandas = graph.read_parquet(path) assert self.g_str == g_pandas + def test_gal(self): + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "g_int.gal") + g_int = self.g_int.transform("b") + g_int.to_gal(path) + g_int_ = graph.read_gal(path) + assert g_int == g_int_ + + path = os.path.join(tmpdir, "g_str.gal") + g_str = self.g_str.transform("b") + g_str.to_gal(path) + g_str_ = graph.read_gal(path) + assert g_str == g_str_ + + def test_gwt(self): + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "g_int.gwt") + self.g_int.to_gwt(path) + g_int = graph.read_gwt(path) + assert self.g_int == g_int + + path = os.path.join(tmpdir, "g_str.gwt") + self.g_str.to_gwt(path) + g_str = graph.read_gwt(path) + assert self.g_str == g_str + def test_getitem(self): expected = pd.Series( [1, 0.5, 0.5], From 402ece500dc0d2d502fe7aa006a3105bde31c640 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Mon, 20 May 2024 21:34:02 +0200 Subject: [PATCH 4/7] allow full path imports --- libpysal/__init__.py | 2 +- libpysal/graph/base.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/libpysal/__init__.py b/libpysal/__init__.py index 72ddee53a..8b49fb0ec 100644 --- a/libpysal/__init__.py +++ b/libpysal/__init__.py @@ -24,7 +24,7 @@ import contextlib from importlib.metadata import PackageNotFoundError, version -from . import cg, examples, io, weights +from . import cg, examples, graph, io, weights with contextlib.suppress(PackageNotFoundError): __version__ = version("libpysal") diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index 6a8598039..c9d673b28 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -40,6 +40,13 @@ Levi John Wolf (levi.john.wolf@gmail.com) """ +__all__ = [ + "Graph", + "read_parquet", + "read_gal", + "read_gwt", +] + class Graph(SetOpsMixin): """Graph class encoding spatial weights matrices From e9d5da4ba23af96be10e43848e18ddfc4f2eb3e2 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Mon, 20 May 2024 21:53:45 +0200 Subject: [PATCH 5/7] compat --- libpysal/graph/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpysal/graph/_utils.py b/libpysal/graph/_utils.py index 9d40685b7..ea957fc80 100644 --- a/libpysal/graph/_utils.py +++ b/libpysal/graph/_utils.py @@ -132,7 +132,7 @@ def _neighbor_dict_to_edges(neighbors, weights=None): data_array = pd.to_numeric(data_array) else: data_array = np.ones(idxs.shape[0], dtype=int) - data_array[isolates] = 0 + data_array[isolates.values] = 0 return heads, tails, data_array From f014971544034f296324693974c5443b9cf79e25 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Mon, 20 May 2024 22:24:22 +0200 Subject: [PATCH 6/7] pull the latest tobler --- ci/312-dev.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/312-dev.yaml b/ci/312-dev.yaml index 5bf7585d5..5f0995470 100644 --- a/ci/312-dev.yaml +++ b/ci/312-dev.yaml @@ -10,7 +10,6 @@ dependencies: # testing - codecov - matplotlib - - tobler - h3-py - pytest - pytest-cov @@ -39,4 +38,5 @@ dependencies: - xarray - git+https://github.com/geopandas/geopandas.git@main - git+https://github.com/shapely/shapely.git@main + - git+https://github.com/pysal/tobler.git@main - pulp From ef6bfdca868d16f8a79d578af2ae89d87fa3ea2e Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Mon, 27 May 2024 16:15:42 +0200 Subject: [PATCH 7/7] cleanup --- libpysal/graph/_contiguity.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/libpysal/graph/_contiguity.py b/libpysal/graph/_contiguity.py index 76bb83500..deed456db 100644 --- a/libpysal/graph/_contiguity.py +++ b/libpysal/graph/_contiguity.py @@ -82,8 +82,6 @@ def _vertex_set_intersection(geoms, rook=True, ids=None, by_perimeter=False): for idx in ids: graph[idx].remove(idx) - # return graph - heads, tails, weights = _neighbor_dict_to_edges(graph) if by_perimeter: