Skip to content

Commit

Permalink
Add implementation for to_netcdf() (#899)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Dec 19, 2024
1 parent ebcf032 commit 19706ca
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 9 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

The next release must be bumped to v3.0.0.

- [#899](https://github.com/IAMconsortium/pyam/pull/899) Add `to_netcdf()` method
- [#896](https://github.com/IAMconsortium/pyam/pull/896) Add `sort_data()` method
- [#896](https://github.com/IAMconsortium/pyam/pull/896) Sort columns of `timeseries()` with mixed time domain
- [#893](https://github.com/IAMconsortium/pyam/pull/893) No sorting of timeseries data on initialization or append
Expand Down
28 changes: 27 additions & 1 deletion pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import pandas as pd
from pandas.api.types import is_integer

from pyam.netcdf import to_xarray

try:
from datapackage import Package

Expand Down Expand Up @@ -2513,7 +2515,7 @@ def to_datapackage(self, path):
Parameters
----------
path : string or path object
any valid string path or :class:`pathlib.Path`
Any valid string path or :class:`pathlib.Path`.
"""
if not HAS_DATAPACKAGE:
raise ImportError("Required package `datapackage` not found!")
Expand All @@ -2533,6 +2535,30 @@ def to_datapackage(self, path):
# return the package (needs to reloaded because `tmp` was deleted)
return Package(path)

def to_netcdf(self, path):
"""Write object to a NetCDF file
Parameters
----------
path : string or path object
Any valid string path or :class:`pathlib.Path`.
See Also
--------
pyam.read_netcdf
"""
self.to_xarray().to_netcdf(path)

def to_xarray(self):
"""Convert object to an :class:`xarray.Dataset`
Returns
-------
:class:`xarray.Dataset`
"""
df = swap_year_for_time(self) if self.time_domain == "year" else self
return to_xarray(df._data, df.meta)

def load_meta(self, path, sheet_name="meta", ignore_conflict=False, **kwargs):
"""Load 'meta' indicators from file
Expand Down
55 changes: 52 additions & 3 deletions pyam/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
import numpy as np
import pandas as pd

from pyam.index import get_index_levels

try:
import xarray as xr

HAS_XARRAY = True
except ModuleNotFoundError:
xr = None
HAS_XARRAY = False
from pyam.core import IamDataFrame
from pyam.utils import IAMC_IDX, META_IDX

NETCDF_IDX = ["time", "model", "scenario", "region"]


def read_netcdf(path):
"""Read timeseries data and meta indicators from a netCDF file
Expand All @@ -26,11 +29,15 @@ def read_netcdf(path):
----------
:class:`IamDataFrame`
See Also
--------
pyam.IamDataFrame.to_netcdf
"""
from pyam import IamDataFrame

if not HAS_XARRAY:
raise ModuleNotFoundError("Reading netcdf files requires 'xarray'")
raise ModuleNotFoundError("Reading netcdf files requires 'xarray'.")
_ds = xr.open_dataset(path)
NETCDF_IDX = ["time", "model", "scenario", "region"]
_list_variables = [i for i in _ds.to_dict()["data_vars"].keys()]

# Check if the time coordinate is years (integers) or date time-format
Expand Down Expand Up @@ -86,3 +93,45 @@ def read_netcdf(path):
data,
meta=_ds[_meta].to_dataframe().replace("nan", np.nan) if _meta else None,
)


def to_xarray(data_series: pd.Series, meta: pd.DataFrame):
"""Convert timeseries data and meta indicators to an xarray Dataset
Returns
-------
:class:`xarray.Dataset`
"""
if not HAS_XARRAY:
raise ModuleNotFoundError("Converting to xarray requires 'xarray'.")

dataset = xr.Dataset()

# add timeseries data-variables
for variable, _variable_data in data_series.groupby("variable"):
unit = get_index_levels(_variable_data, "unit")

if len(unit) > 1:
raise ValueError(
"Cannot write to xarray for non-unique units in '" + variable + "'."
)

dataset[variable] = xr.DataArray(
_variable_data.droplevel(["variable", "unit"]).to_xarray(),
)
dataset[variable].attrs = {
"unit": unit[0],
"long_name": variable,
}

# add meta indicators as data-variables
for meta_indicator, meta_data in meta.items():
meta_data = meta_data.replace(np.nan, "nan")
dataset[meta_indicator] = xr.DataArray(
meta_data.to_xarray(),
dims=META_IDX,
name=meta_indicator,
)

return dataset
4 changes: 2 additions & 2 deletions pyam/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pyam.logging import raise_data_error


def swap_time_for_year(df, inplace, subannual=False):
def swap_time_for_year(df, inplace=False, subannual=False):
"""Internal implementation to swap 'time' domain to 'year' (as int)"""
if not df.time_col == "time":
raise ValueError("Time domain must be datetime to use this method")
Expand Down Expand Up @@ -49,7 +49,7 @@ def swap_time_for_year(df, inplace, subannual=False):
return ret


def swap_year_for_time(df, inplace):
def swap_year_for_time(df, inplace=False):
"""Internal implementation to swap 'year' domain to 'time' (as datetime)"""

if not df.time_col == "year":
Expand Down
7 changes: 4 additions & 3 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ def test_io_datapackage(test_df, tmpdir):
assert_iamframe_equal(test_df, import_df)


def test_io_netcdf(test_df_year):
obs = read_netcdf(TEST_DATA_DIR / "test_df.nc")
assert_iamframe_equal(obs, test_df_year)
def test_io_netcdf(test_df, tmpdir):
file = Path(tmpdir) / "foo.nc"
test_df.to_netcdf(file)
assert_iamframe_equal(read_netcdf(file), test_df)

0 comments on commit 19706ca

Please sign in to comment.