From de3ac23c7169fa28b850147e9cb6b0aca3cbab90 Mon Sep 17 00:00:00 2001 From: drs251 Date: Sun, 11 Mar 2018 16:58:46 +0100 Subject: [PATCH 1/4] XArrayInterface improvements: will now attempt to read units and dimension labels for all key and value dimensions, added test to make sure xarray.Datasets and xarray.DataArrays are treated equivalently --- holoviews/core/data/xarray.py | 12 +++++++++- tests/core/data/testdataset.py | 41 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py index 92478e40f8..4649eaa159 100644 --- a/holoviews/core/data/xarray.py +++ b/holoviews/core/data/xarray.py @@ -80,6 +80,7 @@ def init(cls, eltype, data, kdims, vdims): cls) vdims = [vdim] data = data.to_dataset(name=vdim.name) + if not isinstance(data, xr.Dataset): if kdims is None: kdims = kdim_param.default @@ -120,8 +121,9 @@ def init(cls, eltype, data, kdims, vdims): for c in data.coords: if c not in kdims and set(data[c].dims) == set(virtual_dims): kdims.append(c) + vdims = [vd if isinstance(vd, Dimension) else Dimension(vd) for vd in vdims] + kdims = [kd if isinstance(kd, Dimension) else Dimension(kd) for kd in kdims] - kdims = [d if isinstance(d, Dimension) else Dimension(d) for d in kdims] not_found = [] for d in kdims: if not any(d.name == k or (isinstance(v, xr.DataArray) and d.name in v.dims) @@ -133,6 +135,14 @@ def init(cls, eltype, data, kdims, vdims): raise DataError("xarray Dataset must define coordinates " "for all defined kdims, %s coordinates not found." % not_found, cls) + + # retrieve units and labels from Dataset: + for d in kdims + vdims: + d.unit = data[d.name].attrs.get('units') + label = data[d.name].attrs.get('long_name') + if label is not None: + d.label = label + return data, {'kdims': kdims, 'vdims': vdims}, {} diff --git a/tests/core/data/testdataset.py b/tests/core/data/testdataset.py index 1d35fa4626..4bf3750f99 100644 --- a/tests/core/data/testdataset.py +++ b/tests/core/data/testdataset.py @@ -1572,6 +1572,47 @@ def test_xarray_dataset_with_scalar_dim_canonicalize(self): expected = np.array([[0, 1], [2, 3], [4, 5]]) self.assertEqual(canonical, expected) + def test_xarray_dataset_names_and_units(self): + import xarray as xr + xs = [0.1, 0.2, 0.3] + ys = [0, 1] + zs = np.array([[0, 1], [2, 3], [4, 5]]) + da = xr.DataArray(zs, coords=[('x_dim', xs), ('y_dim', ys)], name="data_name", dims=['y_dim', 'x_dim']) + da.attrs['long_name'] = "data long name" + da.attrs['units'] = "array_unit" + da.x_dim.attrs['units'] = "x_unit" + da.y_dim.attrs['long_name'] = "y axis long name" + dataset = Dataset(da) + self.assertEqual(dataset.get_dimension("x_dim"), Dimension("x_dim", unit="x_unit")) + self.assertEqual(dataset.get_dimension("y_dim"), Dimension("y_dim", label="y axis long name")) + self.assertEqual(dataset.get_dimension("data_name"), + Dimension("data_name", label="data long name", unit="array_unit")) + + def test_xarray_dataset_dataarray_vs_dataset(self): + import xarray as xr + xs = [0.1, 0.2, 0.3] + ys = [0, 1] + zs = np.array([[0, 1], [2, 3], [4, 5]]) + da = xr.DataArray(zs, coords=[('x_dim', xs), ('y_dim', ys)], name="data_name", dims=['y_dim', 'x_dim']) + da.attrs['long_name'] = "data long name" + da.attrs['units'] = "array_unit" + da.x_dim.attrs['units'] = "x_unit" + da.y_dim.attrs['long_name'] = "y axis long name" + ds = da.to_dataset() + dataset_from_da = Dataset(da) + dataset_from_ds = Dataset(ds) + self.assertEqual(dataset_from_da, dataset_from_ds) + # same with reversed names: + da_rev = xr.DataArray(zs, coords=[('x_dim', xs), ('y_dim', ys)], name="data_name", dims=['x_dim', 'y_dim']) + da_rev.attrs['long_name'] = "data long name" + da_rev.attrs['units'] = "array_unit" + da_rev.x_dim.attrs['units'] = "x_unit" + da_rev.y_dim.attrs['long_name'] = "y axis long name" + ds_rev = da_rev.to_dataset() + dataset_from_da_rev = Dataset(da_rev) + dataset_from_ds_rev = Dataset(ds_rev) + self.assertEqual(dataset_from_da_rev, dataset_from_ds_rev) + def test_dataset_array_init_hm(self): "Tests support for arrays (homogeneous)" raise SkipTest("Not supported") From 182bc6097e0c27538088c11949d1a5e3d24fc8e4 Mon Sep 17 00:00:00 2001 From: drs251 Date: Thu, 15 Mar 2018 01:57:02 +0100 Subject: [PATCH 2/4] Added some documentation on using xarray with Dataset --- examples/user_guide/08-Gridded_Datasets.ipynb | 953 +++++++++++++++++- 1 file changed, 950 insertions(+), 3 deletions(-) diff --git a/examples/user_guide/08-Gridded_Datasets.ipynb b/examples/user_guide/08-Gridded_Datasets.ipynb index 989ecd2bbb..cc7dc9cf30 100644 --- a/examples/user_guide/08-Gridded_Datasets.ipynb +++ b/examples/user_guide/08-Gridded_Datasets.ipynb @@ -9,9 +9,626 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "import xarray as xr\n", "import numpy as np\n", @@ -292,6 +909,323 @@ "heatmap + heatmap.table()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Working with xarray data types\n", + "As demonstrated previously, `Dataset` comes with support for the `xarray` library, which offers a powerful way to work with multi-dimensional, regularly spaced data. In this example, we'll load an example dataset, turn it into a HoloViews `Dataset` and visualize it. First, let's have a look at the xarray dataset's contents:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "Dimensions: (lat: 25, lon: 53, time: 2920)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 62.5 60.0 57.5 55.0 52.5 ...\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 210.0 212.5 215.0 217.5 ...\n", + " * time (time) datetime64[ns] 2013-01-01 2013-01-01T06:00:00 ...\n", + "Data variables:\n", + " air (time, lat, lon) float32 241.2 242.5 243.5 244.0 244.09999 ...\n", + "Attributes:\n", + " Conventions: COARDS\n", + " title: 4x daily NMC reanalysis (1948)\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly..." + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr_ds = xr.tutorial.load_dataset(\"air_temperature\")\n", + "xr_ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is trivial to turn this xarray Dataset into a Holoviews `Dataset` (the same also works for DataArray):" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ":Dataset [lat,lon,time] (air)\n" + ] + } + ], + "source": [ + "hv_ds = hv.Dataset(xr_ds)[:, :, \"2013-01-01\"]\n", + "print(hv_ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have used the usual slice notation in order to select one single day in the rather large dataset. Finally, let's visualize the dataset by converting it to a `HoloMap` of `Images` using the `to()` method. We need to specify which of the dataset's key dimensions will be consumed by the images (in this case \"lat\" and \"lon\"), where the remaing key dimensions will be associated with the HoloMap (here: \"time\"). We'll use the slice notation again to clip the longitude." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "\t \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + "" + ], + "text/plain": [ + ":HoloMap [time]\n", + " :Image [lon,lat] (air)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%opts Image [colorbar=True]\n", + "%%output size=200\n", + "hv_ds.to(hv.Image, kdims=[\"lon\", \"lat\"])[:, 220:320, :]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note, that this approach immediately converts all available data to images, which will take up a lot of RAM for large datasets. For these situations, consider using a [DynamicMap](./06-Live_Data.ipynb) in conjunction with [xarray's dask support](http://xarray.pydata.org/en/stable/dask.html) instead.\n", + "\n", + "Additional examples of visualizing xarrays in the context of geographical data can be found in the GeoViews documentation: [Gridded Datasets I](http://geo.holoviews.org/Gridded_Datasets_I.html) and\n", + "[Gridded Datasets II](http://geo.holoviews.org/Gridded_Datasets_II.html). These guides also contain useful information on the interaction between xarray data structures and HoloViews Datasets in general." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -399,9 +1333,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "pygments_lexer": "ipython3" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" } }, "nbformat": 4, From 5f6c7d3fb5090db0fdca1b301b02f935b95302d3 Mon Sep 17 00:00:00 2001 From: drs251 Date: Thu, 15 Mar 2018 02:52:14 +0100 Subject: [PATCH 3/4] Cleared notebook outputs --- examples/user_guide/08-Gridded_Datasets.ipynb | 887 +----------------- 1 file changed, 8 insertions(+), 879 deletions(-) diff --git a/examples/user_guide/08-Gridded_Datasets.ipynb b/examples/user_guide/08-Gridded_Datasets.ipynb index cc7dc9cf30..e9b6f60dbe 100644 --- a/examples/user_guide/08-Gridded_Datasets.ipynb +++ b/examples/user_guide/08-Gridded_Datasets.ipynb @@ -9,626 +9,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - " \n", - " \n", - "\n", - "\n", - "
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import xarray as xr\n", "import numpy as np\n", @@ -919,33 +302,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\n", - "Dimensions: (lat: 25, lon: 53, time: 2920)\n", - "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 62.5 60.0 57.5 55.0 52.5 ...\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 210.0 212.5 215.0 217.5 ...\n", - " * time (time) datetime64[ns] 2013-01-01 2013-01-01T06:00:00 ...\n", - "Data variables:\n", - " air (time, lat, lon) float32 241.2 242.5 243.5 244.0 244.09999 ...\n", - "Attributes:\n", - " Conventions: COARDS\n", - " title: 4x daily NMC reanalysis (1948)\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly..." - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "xr_ds = xr.tutorial.load_dataset(\"air_temperature\")\n", "xr_ds" @@ -960,17 +319,9 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ":Dataset [lat,lon,time] (air)\n" - ] - } - ], + "outputs": [], "source": [ "hv_ds = hv.Dataset(xr_ds)[:, :, \"2013-01-01\"]\n", "print(hv_ds)" @@ -985,231 +336,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - " \n", - "
\n", - " \n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - "\t \n", - " \n", - " \n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - "" - ], - "text/plain": [ - ":HoloMap [time]\n", - " :Image [lon,lat] (air)" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "%%opts Image [colorbar=True]\n", "%%output size=200\n", From c3ba75024abf2dd70c7a92565a9998dab3963c46 Mon Sep 17 00:00:00 2001 From: drs251 Date: Thu, 15 Mar 2018 15:59:42 +0100 Subject: [PATCH 4/4] Included dynamic flag for .to() --- examples/user_guide/08-Gridded_Datasets.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/user_guide/08-Gridded_Datasets.ipynb b/examples/user_guide/08-Gridded_Datasets.ipynb index e9b6f60dbe..28147f5d5b 100644 --- a/examples/user_guide/08-Gridded_Datasets.ipynb +++ b/examples/user_guide/08-Gridded_Datasets.ipynb @@ -342,14 +342,14 @@ "source": [ "%%opts Image [colorbar=True]\n", "%%output size=200\n", - "hv_ds.to(hv.Image, kdims=[\"lon\", \"lat\"])[:, 220:320, :]" + "hv_ds.to(hv.Image, kdims=[\"lon\", \"lat\"], dynamic=False)[:, 220:320, :]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Note, that this approach immediately converts all available data to images, which will take up a lot of RAM for large datasets. For these situations, consider using a [DynamicMap](./06-Live_Data.ipynb) in conjunction with [xarray's dask support](http://xarray.pydata.org/en/stable/dask.html) instead.\n", + "Here, we have explicitly specified the default behaviour `dynamic=False`, which returns a HoloMap. Note, that this approach immediately converts all available data to images, which will take up a lot of RAM for large datasets. For these situations, use `dynamic=True` to generate a [DynamicMap](./06-Live_Data.ipynb) instead. Additionally, [xarray features dask support](http://xarray.pydata.org/en/stable/dask.html), which is helpful when dealing with large amounts of data.\n", "\n", "Additional examples of visualizing xarrays in the context of geographical data can be found in the GeoViews documentation: [Gridded Datasets I](http://geo.holoviews.org/Gridded_Datasets_I.html) and\n", "[Gridded Datasets II](http://geo.holoviews.org/Gridded_Datasets_II.html). These guides also contain useful information on the interaction between xarray data structures and HoloViews Datasets in general."