Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add implementation for to_netcdf() #899

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

The next release must be bumped to v3.0.0.

- [#899](https://github.com/IAMconsortium/pyam/pull/899) Add `to_netcdf()` method
- [#896](https://github.com/IAMconsortium/pyam/pull/896) Add `sort_data()` method
- [#896](https://github.com/IAMconsortium/pyam/pull/896) Sort columns of `timeseries()` with mixed time domain
- [#893](https://github.com/IAMconsortium/pyam/pull/893) No sorting of timeseries data on initialization or append
Expand Down
28 changes: 27 additions & 1 deletion pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import pandas as pd
from pandas.api.types import is_integer

from pyam.netcdf import to_xarray

try:
from datapackage import Package

Expand Down Expand Up @@ -2513,7 +2515,7 @@ def to_datapackage(self, path):
Parameters
----------
path : string or path object
any valid string path or :class:`pathlib.Path`
Any valid string path or :class:`pathlib.Path`.
"""
if not HAS_DATAPACKAGE:
raise ImportError("Required package `datapackage` not found!")
Expand All @@ -2533,6 +2535,30 @@ def to_datapackage(self, path):
# return the package (needs to reloaded because `tmp` was deleted)
return Package(path)

def to_netcdf(self, path):
"""Write object to a NetCDF file

Parameters
----------
path : string or path object
Any valid string path or :class:`pathlib.Path`.

See Also
--------
pyam.read_netcdf
"""
self.to_xarray().to_netcdf(path)

def to_xarray(self):
"""Convert object to an :class:`xarray.Dataset`

Returns
-------
:class:`xarray.Dataset`
"""
df = swap_year_for_time(self) if self.time_domain == "year" else self
return to_xarray(df._data, df.meta)

def load_meta(self, path, sheet_name="meta", ignore_conflict=False, **kwargs):
"""Load 'meta' indicators from file

Expand Down
55 changes: 52 additions & 3 deletions pyam/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
import numpy as np
import pandas as pd

from pyam.index import get_index_levels

try:
import xarray as xr

HAS_XARRAY = True
except ModuleNotFoundError:
xr = None
HAS_XARRAY = False
from pyam.core import IamDataFrame
from pyam.utils import IAMC_IDX, META_IDX

NETCDF_IDX = ["time", "model", "scenario", "region"]


def read_netcdf(path):
"""Read timeseries data and meta indicators from a netCDF file
Expand All @@ -26,11 +29,15 @@
----------
:class:`IamDataFrame`

See Also
--------
pyam.IamDataFrame.to_netcdf
"""
from pyam import IamDataFrame

if not HAS_XARRAY:
raise ModuleNotFoundError("Reading netcdf files requires 'xarray'")
raise ModuleNotFoundError("Reading netcdf files requires 'xarray'.")

Check warning on line 39 in pyam/netcdf.py

View check run for this annotation

Codecov / codecov/patch

pyam/netcdf.py#L39

Added line #L39 was not covered by tests
_ds = xr.open_dataset(path)
NETCDF_IDX = ["time", "model", "scenario", "region"]
_list_variables = [i for i in _ds.to_dict()["data_vars"].keys()]

# Check if the time coordinate is years (integers) or date time-format
Expand Down Expand Up @@ -86,3 +93,45 @@
data,
meta=_ds[_meta].to_dataframe().replace("nan", np.nan) if _meta else None,
)


def to_xarray(data_series: pd.Series, meta: pd.DataFrame):
"""Convert timeseries data and meta indicators to an xarray Dataset

Returns
-------
:class:`xarray.Dataset`

"""
if not HAS_XARRAY:
raise ModuleNotFoundError("Converting to xarray requires 'xarray'.")

Check warning on line 107 in pyam/netcdf.py

View check run for this annotation

Codecov / codecov/patch

pyam/netcdf.py#L107

Added line #L107 was not covered by tests

dataset = xr.Dataset()

# add timeseries data-variables
for variable, _variable_data in data_series.groupby("variable"):
unit = get_index_levels(_variable_data, "unit")

if len(unit) > 1:
raise ValueError(

Check warning on line 116 in pyam/netcdf.py

View check run for this annotation

Codecov / codecov/patch

pyam/netcdf.py#L116

Added line #L116 was not covered by tests
"Cannot write to xarray for non-unique units in '" + variable + "'."
)

dataset[variable] = xr.DataArray(
_variable_data.droplevel(["variable", "unit"]).to_xarray(),
)
dataset[variable].attrs = {
"unit": unit[0],
"long_name": variable,
}

# add meta indicators as data-variables
for meta_indicator, meta_data in meta.items():
meta_data = meta_data.replace(np.nan, "nan")
dataset[meta_indicator] = xr.DataArray(
meta_data.to_xarray(),
dims=META_IDX,
name=meta_indicator,
)

return dataset
4 changes: 2 additions & 2 deletions pyam/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pyam.logging import raise_data_error


def swap_time_for_year(df, inplace, subannual=False):
def swap_time_for_year(df, inplace=False, subannual=False):
"""Internal implementation to swap 'time' domain to 'year' (as int)"""
if not df.time_col == "time":
raise ValueError("Time domain must be datetime to use this method")
Expand Down Expand Up @@ -49,7 +49,7 @@ def swap_time_for_year(df, inplace, subannual=False):
return ret


def swap_year_for_time(df, inplace):
def swap_year_for_time(df, inplace=False):
"""Internal implementation to swap 'year' domain to 'time' (as datetime)"""

if not df.time_col == "year":
Expand Down
7 changes: 4 additions & 3 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ def test_io_datapackage(test_df, tmpdir):
assert_iamframe_equal(test_df, import_df)


def test_io_netcdf(test_df_year):
obs = read_netcdf(TEST_DATA_DIR / "test_df.nc")
assert_iamframe_equal(obs, test_df_year)
def test_io_netcdf(test_df, tmpdir):
file = Path(tmpdir) / "foo.nc"
test_df.to_netcdf(file)
assert_iamframe_equal(read_netcdf(file), test_df)
Loading