pydata · shoyer · Mar 16, 2019 · Feb 20, 2019 · Feb 26, 2019 · Mar 5, 2019
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -170,8 +170,8 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
         Strings and Path objects are interpreted as a path to a netCDF file
         or an OpenDAP URL and opened with python-netCDF4, unless the filename
         ends with .gz, in which case the file is gunzipped and opened with
-        scipy.io.netcdf (only netCDF3 supported). File-like objects are opened
-        with scipy.io.netcdf (only netCDF3 supported).
+        scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
+        objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
     group : str, optional
         Path to the netCDF4 group in the given file to open (only works for
         netCDF4 files).
@@ -310,17 +310,9 @@ def maybe_decode_store(store, lock=False):
     if isinstance(filename_or_obj, backends.AbstractDataStore):
         store = filename_or_obj
         ds = maybe_decode_store(store)
-    elif isinstance(filename_or_obj, str):
 
-        if (isinstance(filename_or_obj, bytes) and
-                filename_or_obj.startswith(b'\x89HDF')):
-            raise ValueError('cannot read netCDF4/HDF5 file images')
-        elif (isinstance(filename_or_obj, bytes) and
-                filename_or_obj.startswith(b'CDF')):
-            # netCDF3 file images are handled by scipy
-            pass
-        elif isinstance(filename_or_obj, str):
-            filename_or_obj = _normalize_path(filename_or_obj)
+    elif isinstance(filename_or_obj, str):
+        filename_or_obj = _normalize_path(filename_or_obj)
 
         if engine is None:
             engine = _get_default_engine(filename_or_obj,
@@ -352,11 +344,24 @@ def maybe_decode_store(store, lock=False):
         with close_on_error(store):
             ds = maybe_decode_store(store)
     else:
-        if engine is not None and engine != 'scipy':
-            raise ValueError('can only read file-like objects with '
-                             "default engine or engine='scipy'")
-        # assume filename_or_obj is a file-like object
-        store = backends.ScipyDataStore(filename_or_obj)
+        if engine not in [None, 'scipy', 'h5netcdf']:
+            raise ValueError('can only read bytes or file-like objects with '
+                             "engine = None, 'scipy', or 'h5netcdf'")
+        else:
+            if isinstance(filename_or_obj, bytes):
+                filename_or_obj = BytesIO(filename_or_obj)
+            # read first bytes of file-like object to determine engine
+            magic_number = filename_or_obj.read(8)
+            filename_or_obj.seek(0)
+            if magic_number.startswith(b'CDF'):
+                store = backends.ScipyDataStore(filename_or_obj,
+                                                **backend_kwargs)
+            elif magic_number.startswith(b'\211HDF\r\n\032\n'):
+                store = backends.H5NetCDFStore(filename_or_obj, group=group,
+                                               lock=lock, **backend_kwargs)
+            else:
+                raise ValueError("byte header doesn't match netCDF3 or "
+                                 "netCDF4/HDF5: {}".format(magic_number))
         ds = maybe_decode_store(store)
 
     # Ensure source filename always stored in dataset object (GH issue #2550)
@@ -383,8 +388,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
         Strings and Paths are interpreted as a path to a netCDF file or an
         OpenDAP URL and opened with python-netCDF4, unless the filename ends
         with .gz, in which case the file is gunzipped and opened with
-        scipy.io.netcdf (only netCDF3 supported). File-like objects are opened
-        with scipy.io.netcdf (only netCDF3 supported).
+        scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
+        objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
     group : str, optional
         Path to the netCDF4 group in the given file to open (only works for
         netCDF4 files).

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -1955,6 +1955,38 @@ def test_dump_encodings_h5py(self):
             assert actual.x.encoding['compression_opts'] is None
 
 
+# Requires h5py>2.9.0
+@requires_h5netcdf
+class TestH5NetCDFFileObject(TestH5NetCDFData):
+    engine = 'h5netcdf'
+
+    @network
+    def test_h5remote(self):
+        # alternative: http://era5-pds.s3.amazonaws.com/2008/01/main.nc
+        import requests
+        url = ('https://www.unidata.ucar.edu/'
+               'software/netcdf/examples/test_hgroups.nc')
+        print(url)
+        bytes = requests.get(url).content
+        with xr.open_dataset(bytes) as ds:
+            assert len(ds['UTC_time']) == 74
+            assert ds['UTC_time'].attrs['name'] == 'time'
+
+    def test_h5bytes(self):
+        import h5py
+        bio = BytesIO()
+        with h5py.File(bio) as ds:
+            v = np.array(2.0)
+            ds['scalar'] = v
+        bio.seek(0)
+        with xr.open_dataset(bio) as ds:
+            v = ds['scalar']
+            assert v == np.array(2.0)
+            assert v.dtype == 'float64'
+            assert v.ndim == 0
+            assert list(v.attrs) == []
+
+
 @requires_h5netcdf
 @requires_dask
 @pytest.mark.filterwarnings('ignore:deallocating CachingFileManager')