Skip to content

Commit

Permalink
Fix error in latitude grouping when creating a unique postcode index (#…
Browse files Browse the repository at this point in the history
…62)

Co-authored-by: Roman Yurchak <rth.yurchak@gmail.com>
Closes #32
Closes #55
  • Loading branch information
hedi-guedidi authored Dec 13, 2022
1 parent fda2318 commit b0999b7
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

- The minimum supported Python version is updated to Python 3.8
[#65](https://github.com/symerio/pgeocode/pull/65)
- Fix error in latitude grouping when creating a unique postcode index.
With this fix `Nominatim(.., unique=True)` correctly computes the average
latitude for each postcode (if multiple localities share the same postcode),
instead of taking the first latitude value.
[#62](https://github.com/symerio/pgeocode/pull/62)

- The default folder to store downloaded data is changed to `~/.cache/pgeocode/`.
This default can still be changed by setting the `PGEOCODE_DATA_DIR` environment variable.
[#51](https://github.com/symerio/pgeocode/pull/51)
Expand Down
2 changes: 1 addition & 1 deletion pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def _index_postal_codes(self) -> pd.DataFrame:
df_unique_cp_group = self._data.groupby("postal_code")
data_unique = df_unique_cp_group[["latitude", "longitude"]].mean()
valid_keys = set(DATA_FIELDS).difference(
["place_name", "lattitude", "longitude", "postal_code"]
["place_name", "latitude", "longitude", "postal_code"]
)
data_unique["place_name"] = df_unique_cp_group["place_name"].apply(
lambda x: ", ".join([str(el) for el in x])
Expand Down
57 changes: 57 additions & 0 deletions test_pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,60 @@ def test_first_url_fails(httpserver, monkeypatch, temp_dir):
with pytest.warns(UserWarning, match=msg):
Nominatim("ie")
httpserver.check_assertions()


def test_unique_index_pcode(tmp_path):
"""Check that a centroid is computed both for latitude and longitude
Regression test for https://github.com/symerio/pgeocode/pull/62
"""

class MockNominatim(Nominatim):
def __init__(self):
pass

data = pd.DataFrame(
{
"postal_code": ["1", "1", "2", "2"],
"latitude": [1.0, 2.0, 3.0, 4],
"longitude": [5.0, 6.0, 7.0, 8],
"place_name": ["a", "b", "c", "d"],
"state_name": ["a", "b", "c", "d"],
"country_name": ["a", "b", "c", "d"],
"county_name": ["a", "b", "c", "d"],
"community_name": ["a", "b", "c", "d"],
"accuracy": [1, 2, 3, 4],
"country_code": [1, 2, 3, 4],
"county_code": [1, 2, 3, 4],
"state_code": [1, 2, 3, 4],
"community_code": [1, 2, 3, 4],
}
)

nominatim = MockNominatim()
data_path = tmp_path / "a.txt"
nominatim._data_path = str(data_path)
nominatim._data = data
data_unique = nominatim._index_postal_codes()

data_unique_expected = pd.DataFrame(
{
"postal_code": ["1", "2"],
"latitude": [1.5, 3.5],
"longitude": [5.5, 7.5],
"place_name": ["a, b", "c, d"],
"state_name": ["a", "c"],
# We don't include the country_name for some reason?
# 'country_name': ['a', 'c'],
"county_name": ["a", "c"],
"community_name": ["a", "c"],
"accuracy": [1, 3],
"country_code": [1, 3],
"county_code": [1, 3],
"state_code": [1, 3],
"community_code": [1, 3],
}
)
pd.testing.assert_frame_equal(
data_unique.sort_index(axis=1), data_unique_expected.sort_index(axis=1)
)

0 comments on commit b0999b7

Please sign in to comment.