Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: access the whole list when calling get_namewithout parameters #37

Merged
merged 3 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,15 @@ For example to get the name and codes of all the departments in France you can r

pygadm.get_names(admin="FRA", content_level=2)

.. note::

You can also get the list of all the country names by omitting ``admin`` and ``name`` parameters. If a level is not provided the table will only show country names but other parameters remain availables.

.. code-block:: python

pygadm.get_names()


Google Earth engine
-------------------

Expand Down
57 changes: 0 additions & 57 deletions ee_token.py

This file was deleted.

103 changes: 58 additions & 45 deletions pygadm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,57 +148,70 @@ def get_names(
# sanitary check on parameters
if name and admin:
raise ValueError('"name" and "id" cannot be set at the same time.')
elif not name and not admin:
raise ValueError('at least "name" or "admin" need to be set.')

# set the id we look for and tell the function if its a name or an admin
is_name = True if name else False
id = name if name else admin

# read the data and find if the element exist
# if a name or admin number is set, we need to filter the dataset accordingly
# if not we will simply consider the world dataset
df = pd.read_parquet(__gadm_data__)
column = "NAME_{}" if is_name else "GID_{}"
is_in = (
df.filter([column.format(i) for i in range(6)])
.apply(lambda col: col.str.lower())
.isin([id.lower()])
)

if not is_in.any().any():
# find the 5 closest names/id
columns = [df[column.format(i)].dropna().str.lower().values for i in range(6)]
ids = np.unique(np.concatenate(columns))
close_ids = get_close_matches(id.lower(), ids, n=5)
if is_name is True:
close_ids = [i.capitalize() for i in close_ids]
else:
close_ids = [i.upper() for i in close_ids]
raise ValueError(
f'The requested "{id}" is not part of GADM. The closest matches are: {", ".join(close_ids)}.'
if name or admin:
# set the id we look for and tell the function if its a name or an admin
is_name = True if name else False
id = name if name else admin

# read the data and find if the element exist
column = "NAME_{}" if is_name else "GID_{}"
is_in = (
df.filter([column.format(i) for i in range(6)])
.apply(lambda col: col.str.lower())
.isin([id.lower()])
)

# Get the iso_3 of the associated country of the identifed area and the associated level
line = is_in[~((~is_in).all(axis=1))].idxmax(1)
level = line.iloc[0][5 if is_name else 4] # GID_ or NAME_

# load the max_level available in the requested area
sub_df = df[df[column.format(level)].str.fullmatch(id, case=False)]
max_level = next(i for i in reversed(range(6)) if (sub_df[f"GID_{i}"] != "").any())

# get the request level from user
if content_level == -1:
content_level = level
elif content_level < int(level):
warnings.warn(
f"The requested level ({content_level}) is higher than the area ({level}). Fallback to {level}."
if not is_in.any().any():
# find the 5 closest names/id
columns = [
df[column.format(i)].dropna().str.lower().values for i in range(6)
]
ids = np.unique(np.concatenate(columns))
close_ids = get_close_matches(id.lower(), ids, n=5)
if is_name is True:
close_ids = [i.capitalize() for i in close_ids]
else:
close_ids = [i.upper() for i in close_ids]
raise ValueError(
f'The requested "{id}" is not part of GADM. '
f'The closest matches are: {", ".join(close_ids)}.'
)

# Get the iso_3 of the associated country of the identifed area and the associated level
line = is_in[~((~is_in).all(axis=1))].idxmax(1)
level = line.iloc[0][5 if is_name else 4] # GID_ or NAME_

# load the max_level available in the requested area
sub_df = df[df[column.format(level)].str.fullmatch(id, case=False)]
max_level = next(
i for i in reversed(range(6)) if (sub_df[f"GID_{i}"] != "").any()
)
content_level = level

if int(content_level) > max_level:
warnings.warn(
f"The requested level ({content_level}) is higher than the max level in this country ({max_level}). Fallback to {max_level}."
)
content_level = max_level
# get the request level from user
content_level, level = int(content_level), int(level)
if content_level == -1:
content_level = level
elif content_level < level:
warnings.warn(
f"The requested level ({content_level}) is higher than the area ({level}). "
f"Fallback to {level}."
)
content_level = level

if content_level > max_level:
warnings.warn(
f"The requested level ({content_level}) is higher than the max level in "
f"this country ({max_level}). Fallback to {max_level}."
)
content_level = max_level

else:
sub_df = df
content_level = 0 if content_level == -1 else content_level

# get the columns name to display
columns = [f"NAME_{content_level}", f"GID_{content_level}"]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ docstring-quotes = "double"
convention = "google"

[tool.codespell]
skip = 'CHANGELOG.md,**/*.json'
skip = 'CHANGELOG.md,**/*.json,**/*.csv'

[tool.mypy]
scripts_are_modules = true
Expand Down
43 changes: 43 additions & 0 deletions test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pygadm\n",
"\n",
"pygadm.get_names()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
8 changes: 4 additions & 4 deletions tests/test_get_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import pygadm


def test_empty():
"""Empty request."""
with pytest.raises(Exception):
pygadm.get_names()
def test_empty(dataframe_regression):
"""Empty request which should return the country list."""
df = pygadm.get_names()
dataframe_regression.check(df)


def test_duplicate_input():
Expand Down
Loading