Fix docs formatting and phrasing for some datasets (kedro-org#107)

* Fix docs formatting and phrasing for some datasets Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu> * Manually fix files not resolved with patch command Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu> * Apply fix from kedro-org#98 Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu> --------- Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu> Signed-off-by: Danny Farah <danny_farah@mckinsey.com>
dannyrfar · Mar 21, 2023 · 411b1bc · 411b1bc
1 parent 084ac0c
commit 411b1bc
Show file tree

Hide file tree

Showing 30 changed files with 580 additions and 526 deletions.
diff --git a/kedro-datasets/kedro_datasets/api/api_dataset.py b/kedro-datasets/kedro_datasets/api/api_dataset.py
@@ -12,27 +12,26 @@ class APIDataSet(AbstractDataSet[None, requests.Response]):
     """``APIDataSet`` loads the data from HTTP(S) APIs.
     It uses the python requests library: https://requests.readthedocs.io/en/latest/
 
-    Example adding a catalog entry with
-    `YAML API
-    <https://kedro.readthedocs.io/en/stable/data/\
-        data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
+    Example usage for the
+    `YAML API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
 
     .. code-block:: yaml
 
-        >>> usda:
-        >>>   type: api.APIDataSet
-        >>>   url: https://quickstats.nass.usda.gov
-        >>>   params:
-        >>>     key: SOME_TOKEN,
-        >>>     format: JSON,
-        >>>     commodity_desc: CORN,
-        >>>     statisticcat_des: YIELD,
-        >>>     agg_level_desc: STATE,
-        >>>     year: 2000
-        >>>
-
-
-    Example using Python API:
+        usda:
+          type: api.APIDataSet
+          url: https://quickstats.nass.usda.gov
+          params:
+            key: SOME_TOKEN,
+            format: JSON,
+            commodity_desc: CORN,
+            statisticcat_des: YIELD,
+            agg_level_desc: STATE,
+            year: 2000
+
+    Example usage for the
+    `Python API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-code-api>`_:
     ::
 
         >>> from kedro_datasets.api import APIDataSet

diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
@@ -15,73 +15,72 @@ class ParquetDataSet(AbstractDataSet[dd.DataFrame, dd.DataFrame]):
     remote data services to handle the corresponding load and save operations:
     https://docs.dask.org/en/latest/how-to/connect-to-remote-data.html
 
-        Example adding a catalog entry with
-        `YAML API
-        <https://kedro.readthedocs.io/en/stable/data/\
-            data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
-
-        .. code-block:: yaml
-
-        >>> cars:
-        >>>   type: dask.ParquetDataSet
-        >>>   filepath: s3://bucket_name/path/to/folder
-        >>>   save_args:
-        >>>     compression: GZIP
-        >>>   credentials:
-        >>>     client_kwargs:
-        >>>         aws_access_key_id: YOUR_KEY
-        >>>         aws_secret_access_key: YOUR_SECRET
+    Example usage for the
+    `YAML API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
+
+    .. code-block:: yaml
+
+        cars:
+          type: dask.ParquetDataSet
+          filepath: s3://bucket_name/path/to/folder
+          save_args:
+            compression: GZIP
+          credentials:
+            client_kwargs:
+              aws_access_key_id: YOUR_KEY
+              aws_secret_access_key: YOUR_SECRET
+
+    Example usage for the
+    `Python API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-code-api>`_:
+    ::
+
+        >>> from kedro.extras.datasets.dask import ParquetDataSet
+        >>> import pandas as pd
+        >>> import dask.dataframe as dd
         >>>
-
-
-        Example using Python API (AWS S3):
-        ::
-
-            >>> from kedro_datasets.dask import ParquetDataSet
-            >>> import pandas as pd
-            >>> import dask.dataframe as dd
-            >>>
-            >>> data = pd.DataFrame({'col1': [1, 2], 'col2': [4, 5],
-            >>>                      'col3': [[5, 6], [7, 8]]})
-            >>> ddf = dd.from_pandas(data, npartitions=2)
-            >>>
-            >>> data_set = ParquetDataSet(
-            >>>     filepath="s3://bucket_name/path/to/folder",
-            >>>     credentials={
-            >>>         'client_kwargs':{
-            >>>             'aws_access_key_id': 'YOUR_KEY',
-            >>>             'aws_secret_access_key': 'YOUR SECRET',
-            >>>         }
-            >>>     },
-            >>>     save_args={"compression": "GZIP"}
-            >>> )
-            >>> data_set.save(ddf)
-            >>> reloaded = data_set.load()
-            >>>
-            >>> assert ddf.compute().equals(reloaded.compute())
-
-    The output schema can also be explicitly specified using Triad's grammar.
-    This is processed to map specific columns into pyarrow field types or schema.
-
-    References:
-    https://triad.readthedocs.io/en/latest/api/triad.collections.html#module-triad.collections.schema
-    https://arrow.apache.org/docs/python/api/datatypes.html
-
-        .. code-block:: yaml
-
-            >>> parquet_dataset:
-            >>>   type: dask.ParquetDataSet
-            >>>   filepath: "s3://bucket_name/path/to/folder"
-            >>>   credentials:
-            >>>     client_kwargs:
-            >>>       aws_access_key_id: YOUR_KEY
-            >>>       aws_secret_access_key: "YOUR SECRET"
-            >>>   save_args:
-            >>>     compression: GZIP
-            >>>     schema:
-            >>>       col1: [int32]
-            >>>       col2: [int32]
-            >>>       col3: [[int32]]
+        >>> data = pd.DataFrame({'col1': [1, 2], 'col2': [4, 5],
+        >>>                      'col3': [[5, 6], [7, 8]]})
+        >>> ddf = dd.from_pandas(data, npartitions=2)
+        >>>
+        >>> data_set = ParquetDataSet(
+        >>>     filepath="s3://bucket_name/path/to/folder",
+        >>>     credentials={
+        >>>         'client_kwargs':{
+        >>>             'aws_access_key_id': 'YOUR_KEY',
+        >>>             'aws_secret_access_key': 'YOUR SECRET',
+        >>>         }
+        >>>     },
+        >>>     save_args={"compression": "GZIP"}
+        >>> )
+        >>> data_set.save(ddf)
+        >>> reloaded = data_set.load()
+        >>>
+        >>> assert ddf.compute().equals(reloaded.compute())
+
+    The output schema can also be explicitly specified using
+    `Triad <https://triad.readthedocs.io/en/latest/api/\
+    triad.collections.html#module-triad.collections.schema>`_.
+    This is processed to map specific columns to
+    `PyArrow field types <https://arrow.apache.org/docs/python/api/\
+    datatypes.html>`_ or schema. For instance:
+
+    .. code-block:: yaml
+
+        parquet_dataset:
+          type: dask.ParquetDataSet
+          filepath: "s3://bucket_name/path/to/folder"
+          credentials:
+            client_kwargs:
+              aws_access_key_id: YOUR_KEY
+              aws_secret_access_key: "YOUR SECRET"
+          save_args:
+            compression: GZIP
+            schema:
+              col1: [int32]
+              col2: [int32]
+              col3: [[int32]]
     """
 
     DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]

diff --git a/kedro-datasets/kedro_datasets/json/json_dataset.py b/kedro-datasets/kedro_datasets/json/json_dataset.py
@@ -20,22 +20,21 @@ class JSONDataSet(AbstractVersionedDataSet[Any, Any]):
     """``JSONDataSet`` loads/saves data from/to a JSON file using an underlying
     filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file.
 
-    Example adding a catalog entry with the ``YAML API``:
-
+    Example usage for the
+    `YAML API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
     .. code-block:: yaml
 
-        >>> json_dataset:
-        >>>   type: json.JSONDataSet
-        >>>   filepath: data/01_raw/location.json
-        >>>
-        >>> cars:
-        >>>   type: json.JSONDataSet
-        >>>   filepath: gcs://your_bucket/cars.json
-        >>>   fs_args:
-        >>>     project: my-project
-        >>>   credentials: my_gcp_credentials
-
-    Example using Python API:
+        cars:
+          type: json.JSONDataSet
+          filepath: gcs://your_bucket/cars.json
+          fs_args:
+            project: my-project
+          credentials: my_gcp_credentials
+
+    Example usage for the
+    `Python API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-code-api>`_:
     ::
 
         >>> from kedro_datasets.json import JSONDataSet

diff --git a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py
@@ -26,21 +26,21 @@ class MatplotlibWriter(
     """``MatplotlibWriter`` saves one or more Matplotlib objects as
     image files to an underlying filesystem (e.g. local, S3, GCS).
 
-    Example adding a catalog entry with the `YAML API
-    <https://kedro.readthedocs.io/en/stable/data/\
-        data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
+    Example usage for the
+    `YAML API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
 
     .. code-block:: yaml
 
-        >>> output_plot:
-        >>>   type: matplotlib.MatplotlibWriter
-        >>>   filepath: data/08_reporting/output_plot.png
-        >>>   save_args:
-        >>>     format: png
-        >>>
-
-    Example using the Python API:
+        output_plot:
+          type: matplotlib.MatplotlibWriter
+          filepath: data/08_reporting/output_plot.png
+          save_args:
+            format: png
 
+    Example usage for the
+    `Python API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-code-api>`_:
     ::
 
         >>> import matplotlib.pyplot as plt
@@ -55,7 +55,6 @@ class MatplotlibWriter(
         >>> plot_writer.save(fig)
 
     Example saving a plot as a PDF file:
-
     ::
 
         >>> import matplotlib.pyplot as plt
@@ -70,9 +69,7 @@ class MatplotlibWriter(
         >>> plt.close()
         >>> pdf_plot_writer.save(fig)
 
-
     Example saving multiple plots in a folder, using a dictionary:
-
     ::
 
         >>> import matplotlib.pyplot as plt
@@ -90,7 +87,6 @@ class MatplotlibWriter(
         >>> dict_plot_writer.save(plots_dict)
 
     Example saving multiple plots in a folder, using a list:
-
     ::
 
         >>> import matplotlib.pyplot as plt

diff --git a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py
@@ -25,30 +25,31 @@ class CSVDataSet(AbstractVersionedDataSet[pd.DataFrame, pd.DataFrame]):
     """``CSVDataSet`` loads/saves data from/to a CSV file using an underlying
     filesystem (e.g.: local, S3, GCS). It uses pandas to handle the CSV file.
 
-    Example adding a catalog entry with
-    `YAML API
-    <https://kedro.readthedocs.io/en/stable/data/\
-        data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
+    Example usage for the
+    `YAML API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
 
     .. code-block:: yaml
 
-        >>> cars:
-        >>>   type: pandas.CSVDataSet
-        >>>   filepath: data/01_raw/company/cars.csv
-        >>>   load_args:
-        >>>     sep: ","
-        >>>     na_values: ["#NA", NA]
-        >>>   save_args:
-        >>>     index: False
-        >>>     date_format: "%Y-%m-%d %H:%M"
-        >>>     decimal: .
-        >>>
-        >>> motorbikes:
-        >>>   type: pandas.CSVDataSet
-        >>>   filepath: s3://your_bucket/data/02_intermediate/company/motorbikes.csv
-        >>>   credentials: dev_s3
-
-    Example using Python API:
+        cars:
+          type: pandas.CSVDataSet
+          filepath: data/01_raw/company/cars.csv
+          load_args:
+            sep: ","
+            na_values: ["#NA", NA]
+          save_args:
+            index: False
+            date_format: "%Y-%m-%d %H:%M"
+            decimal: .
+
+        motorbikes:
+          type: pandas.CSVDataSet
+          filepath: s3://your_bucket/data/02_intermediate/company/motorbikes.csv
+          credentials: dev_s3
+
+    Example usage for the
+    `Python API <https://kedro.readthedocs.io/en/stable/data/\
+    data_catalog.html#use-the-data-catalog-with-the-code-api>`_:
     ::
 
         >>> from kedro_datasets.pandas import CSVDataSet