Skip to content

Commit

Permalink
Local file cache (#696)
Browse files Browse the repository at this point in the history
  • Loading branch information
Rlamboll authored Sep 14, 2022
1 parent d2d7a84 commit d7d9b45
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 4 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependency for better performance.
- [#701](https://github.com/IAMconsortium/pyam/pull/701) Add **xlsxwriter** as dependency to improve `to_excel()` performance
- [#699](https://github.com/IAMconsortium/pyam/pull/699) Add filter options to IIASA API `index()`, `meta()` and `properties()` methods
- [#697](https://github.com/IAMconsortium/pyam/pull/697) Add warning if IIASA API returns empty result
- [#696](https://github.com/IAMconsortium/pyam/pull/696) Added ability to load preferentially from a local cache
- [#695](https://github.com/IAMconsortium/pyam/pull/695) Remove unused meta levels during initialization
- [#688](https://github.com/IAMconsortium/pyam/pull/688) Remove ixmp as optional dependency
- [#684](https://github.com/IAMconsortium/pyam/pull/684) Use new IIASA-manager API with token refresh
Expand Down
22 changes: 22 additions & 0 deletions doc/source/tutorials/iiasa_dbs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,28 @@
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Loading all of a large database may take a few minutes on some connections. To save time when writing code you may reuse, you can save a local version of the database via the __lazy_read_iiasa__ function. This is given a file location as well as whatever connection options we saw above. The first time the code is run, the result is stored there, and the code will read it from there on subsequent attempts. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lazy_df = pyam.lazy_read_iiasa(\n",
" file=\"./tmp/messageix_co2_coal_data.csv\",\n",
" name=\"iamc15\",\n",
" model='MESSAGEix*', \n",
" variable=['Emissions|CO2', 'Primary Energy|Coal'], \n",
" region='World'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
2 changes: 1 addition & 1 deletion pyam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pyam.timeseries import *
from pyam.logging import *
from pyam.run_control import *
from pyam.iiasa import read_iiasa # noqa: F401
from pyam.iiasa import read_iiasa, lazy_read_iiasa # noqa: F401
from pyam.datareader import read_worldbank # noqa: F401
from pyam.unfccc import read_unfccc # noqa: F401
from pyam.testing import assert_iamframe_equal # noqa: F401
Expand Down
79 changes: 77 additions & 2 deletions pyam/iiasa.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path
import json
import logging
import os.path
import requests

import httpx
Expand Down Expand Up @@ -575,15 +576,16 @@ def read_iiasa(name, default=True, meta=True, creds=None, base_url=_AUTH_URL, **
----------
name : str
A valid name of an IIASA scenario explorer instance,
see :attr:`pyam.iiasa.Connection.valid_connections`
see :attr:`pyam.iiasa.Connection.valid_connections`. Obtain a list of options
via pyam.iiasa.Connection().valid_connections.
default : bool, optional
Return *only* the default version of each scenario.
Any (`model`, `scenario`) without a default version is omitted.
If :obj:`False`, return all versions.
meta : bool or list of strings, optional
If `True`, include all meta categories & quantitative indicators
(or subset if list is given).
creds : str, :class:`pathlib.Path`, list-like, or dict, optional
creds : str or :class:`pathlib.Path`, optional
| Credentials (username & password) are not required to access
any public Scenario Explorer instances (i.e., with Guest login).
| See :class:`pyam.iiasa.Connection` for details.
Expand All @@ -595,3 +597,76 @@ def read_iiasa(name, default=True, meta=True, creds=None, base_url=_AUTH_URL, **
Arguments for :meth:`pyam.iiasa.Connection.query`
"""
return Connection(name, creds, base_url).query(default=default, meta=meta, **kwargs)


def lazy_read_iiasa(
file, name, default=True, meta=True, creds=None, base_url=_AUTH_URL, **kwargs
):
"""
Try to load data from a local cache, failing that, loads it from the internet.
Check if the file in a given location is an up-to-date version of an IIASA
database. If so, load it. If not, load data from the IIASA scenario explorer
database API and save to that location. Does not check that the previously read
version is a complete instance of the database, so if the initial load applies a
filter, you will read only data that passes the same filter as well as any
additional filter you apply.
Parameters
----------
file : str or :class:`pathlib.Path`
The location to test for valid data and save the data if not up-to-date. Must be
either xls, xlsx or csv.
name : str
A valid name of an IIASA scenario explorer instance,
see :attr:`pyam.iiasa.Connection.valid_connections`. Obtain a list of options
via pyam.iiasa.Connection().valid_connections.
default : bool, optional
Return *only* the default version of each scenario.
Any (`model`, `scenario`) without a default version is omitted.
If :obj:`False`, return all versions.
meta : bool or list of strings, optional
If `True`, include all meta categories & quantitative indicators
(or subset if list is given).
creds : str or :class:`pathlib.Path`, optional
| Credentials (username & password) are not required to access
any public Scenario Explorer instances (i.e., with Guest login).
| See :class:`pyam.iiasa.Connection` for details.
| Use :meth:`pyam.iiasa.set_config` to set credentials
for accessing private/restricted Scenario Explorer instances.
base_url : str
Authentication server URL
kwargs
Arguments for :meth:`pyam.iiasa.Connection.query`
"""

file = Path(file)
assert file.suffix in [
".csv",
".xlsx",
".xls",
], "We will only read and write to csv, xls and xlsx format."
if os.path.exists(file):
date_set = pd.to_datetime(os.path.getmtime(file), unit="s")
version_info = Connection(name, creds, base_url).properties()
latest_new = np.nanmax(pd.to_datetime(version_info["create_date"]))
latest_update = np.nanmax(pd.to_datetime(version_info["update_date"]))
latest = pd.Series([latest_new, latest_update]).max()
if latest < date_set:
old_read = IamDataFrame(file)
if kwargs:
old_read = old_read.filter(**kwargs)
logger.info("Database read from file")
return old_read
else:
logger.info("Database out of date and will be re-downloaded")
# If we get here, we need to redownload the database
new_read = read_iiasa(
name, meta=True, default=default, creds=None, base_url=_AUTH_URL, **kwargs
)
Path(file).parent.mkdir(parents=True, exist_ok=True)
if file.suffix == ".csv":
new_read.to_csv(file)
else:
new_read.to_excel(file)
return new_read
30 changes: 29 additions & 1 deletion tests/test_iiasa.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy.testing as npt
import yaml

from pyam import IamDataFrame, iiasa, read_iiasa, META_IDX
from pyam import IamDataFrame, iiasa, lazy_read_iiasa, read_iiasa, META_IDX
from pyam.testing import assert_iamframe_equal

from .conftest import META_COLS, IIASA_UNAVAILABLE, TEST_API, TEST_API_NAME
Expand Down Expand Up @@ -371,3 +371,31 @@ def test_query_empty_response(conn):
"""Check that querying with an empty response returns an empty IamDataFrame"""
# solves https://github.com/IAMconsortium/pyam/issues/676
assert conn.query(model="foo").empty


def test_lazy_read(tmpdir):
tmp_file = tmpdir / "test_database.csv"
df = lazy_read_iiasa(tmp_file, TEST_API, model="model_a")
writetime = os.path.getmtime(tmp_file)
assert df.model == ["model_a"]
# This is read from the file, so the filter is not applied.
df2 = lazy_read_iiasa(tmp_file, TEST_API)
assert df.data.equals(df2.data)
# If requesting with an inconsistent filter, get nothing back. Strings and filters
# work interchangably.
tmp_file = str(tmp_file)
df_newfilt = lazy_read_iiasa(tmp_file, TEST_API, model="model_b")
assert df_newfilt.empty
assert writetime == os.path.getmtime(tmp_file)
# Filter correctly applied if the file is deleted
os.remove(tmp_file)
df_newfilt = lazy_read_iiasa(tmp_file, TEST_API, model="model_b")
assert df_newfilt.model == ["model_b"]
assert os.path.getmtime(tmp_file) > writetime
# file can also be xls or xlsx
xlsx_file = tmpdir / "test_database.xlsx"
df_xlsx = lazy_read_iiasa(xlsx_file, TEST_API, model="model_b")
assert df_newfilt.equals(df_xlsx)
xls_file = tmpdir / "test_database.xls"
df_xls = lazy_read_iiasa(xls_file, TEST_API, model="model_b")
assert df_xls.equals(df_xlsx)

0 comments on commit d7d9b45

Please sign in to comment.