intake · andersy005 · Jun 4, 2022 · Jun 1, 2022 · Jun 1, 2022 · Jun 1, 2022
@@ -96,11 +96,11 @@ The column names can optionally be associated with a controlled vocabulary, such
 
 An assets object describes the columns in the CSV file relevant for opening the actual data files.
 
-| Element            | Type   | Description                                                                                                                        |
-| ------------------ | ------ | ---------------------------------------------------------------------------------------------------------------------------------- |
-| column_name        | string | **REQUIRED.** The name of the column containing the path to the asset. Must be in the header of the CSV file.                      |
-| format             | string | The data format. Valid values are `netcdf` and `zarr`. If specified, it means that all data in the catalog is the same type.       |
-| format_column_name | string | The column name which contains the data format, allowing for variable data types in one catalog. Mutually exclusive with `format`. |
+| Element            | Type   | Description                                                                                                                                                                                                  |
+| ------------------ | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| column_name        | string | **REQUIRED.** The name of the column containing the path to the asset. Must be in the header of the CSV file.                                                                                                |
+| format             | string | The data format. Valid values are `netcdf`, `zarr`, or `reference` ([`kerchunk`](https://github.com/fsspec/kerchunk) reference files). If specified, it means that all data in the catalog is the same type. |
+| format_column_name | string | The column name which contains the data format, allowing for variable data types in one catalog. Mutually exclusive with `format`.                                                                           |
 
 ### Aggregation Control Object
 

@@ -44,10 +44,19 @@ def _open_dataset(
     requested_variables=None,
     additional_attrs=None,
     expand_dims=None,
+    data_format=None,
 ):
 
     _can_be_local = fsspec.utils.can_be_local(urlpath)
     storage_options = xarray_open_kwargs.get('backend_kwargs', {}).get('storage_options', {})
+
+    # Support kerchunk datasets, setting the file object (fo) and urlpath
+    if data_format == 'reference':
+        if 'storage_options' not in xarray_open_kwargs.keys():
+            xarray_open_kwargs['storage_options'] = {}
+        xarray_open_kwargs['storage_options']['fo'] = urlpath
+        urlpath = 'reference://'
+
     if xarray_open_kwargs['engine'] == 'zarr':
         url = urlpath
     elif _can_be_local:
@@ -220,6 +229,7 @@ def _open_dataset(self):
                         if agg.type.value == 'join_new'
                     },
                     requested_variables=self.requested_variables,
+                    data_format=record['_data_format_'],
                     additional_attrs=record.to_dict(),
                 )
                 for _, record in self.df.iterrows()