pydata · alexamici · Nov 6, 2020 · Sep 25, 2020 · Sep 25, 2020 · Sep 25, 2020
diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
@@ -1,3 +1,4 @@
+import inspect
 import os
 
 from ..core.utils import is_remote_uri
@@ -23,7 +24,7 @@ def dataset_from_backend_dataset(
     chunks,
     cache,
     overwrite_encoded_chunks,
-    extra_tokens,
+    **extra_tokens,
 ):
     if not (isinstance(chunks, (int, dict)) or chunks is None):
         if chunks != "auto":
@@ -73,17 +74,34 @@ def dataset_from_backend_dataset(
     # Ensure source filename always stored in dataset object (GH issue #2550)
     if "source" not in ds.encoding:
         if isinstance(filename_or_obj, str):
-            ds.encoding["source"] = filename_or_obj
+            ds2.encoding["source"] = filename_or_obj
 
     return ds2
 
 
+def resolve_decoders_kwargs(decode_cf, engine, **decoders):
+    signature = inspect.signature(ENGINES[engine]).parameters
+    if decode_cf is False:
+        for d in decoders:
+            if d in signature and d != "use_cftime":
+                decoders[d] = decode_cf
+    return {k: v for k, v in decoders.items() if v is not None}
+
+
 def open_dataset(
     filename_or_obj,
     *,
     engine=None,
     chunks=None,
     cache=None,
+    decode_cf=None,
+    mask_and_scale=None,
+    decode_times=None,
+    decode_timedelta=None,
+    use_cftime=None,
+    concat_characters=None,
+    decode_coords=None,
+    drop_variables=None,
     backend_kwargs=None,
     **kwargs,
 ):
@@ -94,70 +112,50 @@ def open_dataset(
     filename_or_obj : str, Path, file-like or DataStore
         Strings and Path objects are interpreted as a path to a netCDF file
         or an OpenDAP URL and opened with python-netCDF4, unless the filename
-        ends with .gz, in which case the file is gunzipped and opened with
+        ends with .gz, in which case the file is unzipped and opened with
         scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
         objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
-    group : str, optional
-        Path to the netCDF4 group in the given file to open (only works for
-        netCDF4 files).
-    decode_cf : bool, optional
-        Whether to decode these variables, assuming they were saved according
-        to CF conventions.
-    mask_and_scale : bool, optional
-        If True, replace array values equal to `_FillValue` with NA and scale
-        values according to the formula `original_values * scale_factor +
-        add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
-        taken from variable attributes (if they exist).  If the `_FillValue` or
-        `missing_value` attribute contains multiple values a warning will be
-        issued and all array values matching one of the multiple values will
-        be replaced by NA. mask_and_scale defaults to True except for the
-        pseudonetcdf backend.
-    decode_times : bool, optional
-        If True, decode times encoded in the standard NetCDF datetime format
-        into datetime objects. Otherwise, leave them encoded as numbers.
-    autoclose : bool, optional
-        If True, automatically close files to avoid OS Error of too many files
-        being open.  However, this option doesn't work with streams, e.g.,
-        BytesIO.
-    concat_characters : bool, optional
-        If True, concatenate along the last dimension of character arrays to
-        form string arrays. Dimensions will only be concatenated over (and
-        removed) if they have no corresponding variable and if they are only
-        used as the last dimension of character arrays.
-    decode_coords : bool, optional
-        If True, decode the 'coordinates' attribute to identify coordinates in
-        the resulting dataset.
-    engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \
-        "pseudonetcdf", "zarr"}, optional
+    engine : str, optional
         Engine to use when reading files. If not provided, the default engine
         is chosen based on available dependencies, with a preference for
-        "netcdf4".
+        "netcdf4". Options are: {"netcdf4", "scipy", "pydap", "h5netcdf",\
+        "pynio", "cfgrib", "pseudonetcdf", "zarr"}.
     chunks : int or dict, optional
         If chunks is provided, it is used to load the new dataset into dask
         arrays. ``chunks={}`` loads the dataset with dask using a single
         chunk for all arrays. When using ``engine="zarr"``, setting
         ``chunks='auto'`` will create dask chunks based on the variable's zarr
         chunks.
-    lock : False or lock-like, optional
-        Resource lock to use when reading data from disk. Only relevant when
-        using dask or another form of parallelism. By default, appropriate
-        locks are chosen to safely read and write files with the currently
-        active dask scheduler.
     cache : bool, optional
-        If True, cache data loaded from the underlying datastore in memory as
+        If True, cache data is loaded from the underlying datastore in memory as
         NumPy arrays when accessed to avoid reading from the underlying data-
         store multiple times. Defaults to True unless you specify the `chunks`
         argument to use dask, in which case it defaults to False. Does not
         change the behavior of coordinates corresponding to dimensions, which
         always load their data from disk into a ``pandas.Index``.
-    drop_variables: str or iterable, optional
-        A variable or list of variables to exclude from being parsed from the
-        dataset. This may be useful to drop variables with problems or
-        inconsistent values.
-    backend_kwargs: dict, optional
-        A dictionary of keyword arguments to pass on to the backend. This
-        may be useful when backend options would improve performance or
-        allow user control of dataset processing.
+    decode_cf : bool, optional
+        Setting ``decode_cf=False`` will disable ``mask_and_scale``,
+        ``decode_times``, ``decode_timedelta``, ``concat_characters``,
+        ``decode_coords``.
+    mask_and_scale : bool, optional
+        If True, array values equal to `_FillValue` are replaced with NA and other
+        values are scaled according to the formula `original_values * scale_factor +
+        add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
+        taken from variable attributes (if they exist).  If the `_FillValue` or
+        `missing_value` attribute contains multiple values, a warning will be
+        issued and all array values matching one of the multiple values will
+        be replaced by NA. mask_and_scale defaults to True except for the
+        pseudonetcdf backend. This keyword may not be supported by all the backends.
+    decode_times : bool, optional
+        If True, decode times encoded in the standard NetCDF datetime format
+        into datetime objects. Otherwise, leave them encoded as numbers.
+        This keyword may not be supported by all the backends.
+    decode_timedelta : bool, optional
+        If True, decode variables and coordinates with time units in
+        {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
+        into timedelta objects. If False, they remain encoded as numbers.
+        If None (default), assume the same value of decode_time.
+        This keyword may not be supported by all the backends.
     use_cftime: bool, optional
         Only relevant if encoded dates come from a standard calendar
         (e.g. "gregorian", "proleptic_gregorian", "standard", or not
@@ -167,12 +165,38 @@ def open_dataset(
         ``cftime.datetime`` objects, regardless of whether or not they can be
         represented using ``np.datetime64[ns]`` objects.  If False, always
         decode times to ``np.datetime64[ns]`` objects; if this is not possible
-        raise an error.
-    decode_timedelta : bool, optional
-        If True, decode variables and coordinates with time units in
-        {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
-        into timedelta objects. If False, leave them encoded as numbers.
-        If None (default), assume the same value of decode_time.
+        raise an error. This keyword may not be supported by all the backends.
+    concat_characters : bool, optional
+        If True, concatenate along the last dimension of character arrays to
+        form string arrays. Dimensions will only be concatenated over (and
+        removed) if they have no corresponding variable and if they are only
+        used as the last dimension of character arrays.
+        This keyword may not be supported by all the backends.
+    decode_coords : bool, optional
+        If True, decode the 'coordinates' attribute to identify coordinates in
+        the resulting dataset. This keyword may not be supported by all the
+        backends.
+    drop_variables: str or iterable, optional
+        A variable or list of variables to exclude from the dataset parsing.
+        This may be useful to drop variables with problems or
+        inconsistent values.
+    backend_kwargs:
+        Additional keyword arguments passed on to the engine open function.
+    **kwargs: dict
+        Additional keyword arguments passed on to the engine open function.
+        For example:
+
+        - 'group': path to the netCDF4 group in the given file to open given as
+        a str,supported by "netcdf4", "h5netcdf", "zarr".
+
+        - 'lock': resource lock to use when reading data from disk. Only
+        relevant when using dask or another form of parallelism. By default,
+        appropriate locks are chosen to safely read and write files with the
+        currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
+        "pynio", "pseudonetcdf", "cfgrib".
+
+        See engine open function for kwargs accepted by each specific engine.
+
 
     Returns
     -------
@@ -202,12 +226,25 @@ def open_dataset(
     if engine is None:
         engine = _autodetect_engine(filename_or_obj)
 
+    decoders = resolve_decoders_kwargs(
+        decode_cf,
+        engine=engine,
+        mask_and_scale=mask_and_scale,
+        decode_times=decode_times,
+        decode_timedelta=decode_timedelta,
+        concat_characters=concat_characters,
+        use_cftime=use_cftime,
+        decode_coords=decode_coords,
+    )
+
     backend_kwargs = backend_kwargs.copy()
     overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
 
     open_backend_dataset = _get_backend_cls(engine, engines=ENGINES)
     backend_ds = open_backend_dataset(
         filename_or_obj,
+        drop_variables=drop_variables,
+        **decoders,
         **backend_kwargs,
         **{k: v for k, v in kwargs.items() if v is not None},
     )
@@ -218,7 +255,10 @@ def open_dataset(
         chunks,
         cache,
         overwrite_encoded_chunks,
-        {**backend_kwargs, **kwargs},
+        drop_variables=drop_variables,
+        **decoders,
+        **backend_kwargs,
+        **kwargs,
     )
 
     return ds
diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
@@ -76,7 +76,6 @@ def get_encoding(self):
 def open_backend_dataset_cfgrib(
     filename_or_obj,
     *,
-    decode_cf=True,
     mask_and_scale=True,
     decode_times=None,
     concat_characters=None,
@@ -93,13 +92,6 @@ def open_backend_dataset_cfgrib(
     time_dims=("time", "step"),
 ):
 
-    if not decode_cf:
-        mask_and_scale = False
-        decode_times = False
-        concat_characters = False
-        decode_coords = False
-        decode_timedelta = False
-
     store = CfGribDataStore(
         filename_or_obj,
         indexpath=indexpath,

diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
@@ -328,7 +328,6 @@ def close(self, **kwargs):
 def open_backend_dataset_h5necdf(
     filename_or_obj,
     *,
-    decode_cf=True,
     mask_and_scale=True,
     decode_times=None,
     concat_characters=None,
@@ -343,13 +342,6 @@ def open_backend_dataset_h5necdf(
     phony_dims=None,
 ):
 
-    if not decode_cf:
-        mask_and_scale = False
-        decode_times = False
-        concat_characters = False
-        decode_coords = False
-        decode_timedelta = False
-
     store = H5NetCDFStore.open(
         filename_or_obj,
         format=format,

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -684,7 +684,6 @@ def open_zarr(
 
 def open_backend_dataset_zarr(
     filename_or_obj,
-    decode_cf=True,
     mask_and_scale=True,
     decode_times=None,
     concat_characters=None,
@@ -700,13 +699,6 @@ def open_backend_dataset_zarr(
     chunk_store=None,
 ):
 
-    if not decode_cf:
-        mask_and_scale = False
-        decode_times = False
-        concat_characters = False
-        decode_coords = False
-        decode_timedelta = False
-
     store = ZarrStore.open_group(
         filename_or_obj,
         group=group,