Skip to content

Commit

Permalink
[ENH] add examples in APIDataSet docstring
Browse files Browse the repository at this point in the history
Signed-off-by: jmcdonnell <jmcdonnell@fieldbox.ai>
  • Loading branch information
jmcdonnell committed May 11, 2023
1 parent bc528d8 commit 245c63b
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 22 deletions.
52 changes: 36 additions & 16 deletions kedro-datasets/kedro_datasets/api/api_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,18 @@ class APIDataSet(AbstractDataSet[None, requests.Response]):
"""``APIDataSet`` loads the data from HTTP(S) APIs.
It uses the python requests library: https://requests.readthedocs.io/en/latest/
Example usage for the
`YAML API <https://kedro.readthedocs.io/en/stable/data/\
Example usage for the `YAML API <https://kedro.readthedocs.io/en/stable/data/\
data_catalog.html#use-the-data-catalog-with-the-yaml-api>`_:
.. code-block:: yaml
usda:
type: api.APIDataSet
url: https://quickstats.nass.usda.gov
params:
key: SOME_TOKEN,
format: JSON,
commodity_desc: CORN,
statisticcat_des: YIELD,
agg_level_desc: STATE,
year: 2000
Example usage for the
`Python API <https://kedro.readthedocs.io/en/stable/data/\
data_catalog.html#use-the-data-catalog-with-the-code-api>`_:
::
type: api.APIDataSet url: https://quickstats.nass.usda.gov params:
key: SOME_TOKEN, format: JSON, commodity_desc: CORN, statisticcat_des: YIELD,
agg_level_desc: STATE, year: 2000
Example usage for the `Python API <https://kedro.readthedocs.io/en/stable/data/\
data_catalog.html#use-the-data-catalog-with-the-code-api>`_: ::
>>> from kedro_datasets.api import APIDataSet
>>>
Expand All @@ -52,6 +43,35 @@ class APIDataSet(AbstractDataSet[None, requests.Response]):
>>> )
>>> data = data_set.load()
``APIDataSet`` can also be used to save some output on some remote server using
HTTP(S) methods.
>>> import pandas as pd
>>> example_table = pd.DataFrame({"col1":["val1", "val2"], "col2":["val3", "val4"]}
Here we initialise our APIDataSet with the correct parameters to make requests
towards the configured remote server.
>>> data_set = APIDataSet(
url = "url_of_remote_server",
save_args = {"method":"POST",
"chunk_size":1}
)
On initialization, we can specify all the necessary parameters in the save args
dictionary. The default HTTP(S) method is POST but all other methods are supported.
Two important parameters to keep in mind are timeout and chunk_size. ``Timeout``
defines how long our program waits for a response after a request. ``Chunk_size``, is
only used if the input of save method is a list. It will, divide the request into
chunks of size ``chunk_size``. For example, here we will send two requests each
containing one row of our example DataFrame.
>>> data_to_save = example_table.to_dict(orient="records")
>>> data_set.save(data_to_save)
If the data passed to the save method is not a list, ``APIDataSet`` will check if
it can be loaded as JSON. If true, it will send the data unchanged in a single
request. Otherwise, the ``_save`` method will try to dump the data in JSON format and
execute the request.
"""

DEFAULT_SAVE_ARGS = {
Expand Down
2 changes: 0 additions & 2 deletions kedro-datasets/kedro_datasets/pandas/generic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ def _ensure_file_system_target(self) -> None:
)

def _load(self) -> pd.DataFrame:

self._ensure_file_system_target()

load_path = get_filepath_str(self._get_load_path(), self._protocol)
Expand All @@ -196,7 +195,6 @@ def _load(self) -> pd.DataFrame:
)

def _save(self, data: pd.DataFrame) -> None:

self._ensure_file_system_target()

save_path = get_filepath_str(self._get_save_path(), self._protocol)
Expand Down
1 change: 0 additions & 1 deletion kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ def __init__(

# Update properties in load_args and save_args with credentials.
if credentials is not None:

# Check credentials for bad inputs.
for cred_key, cred_value in credentials.items():
if cred_value is None:
Expand Down
2 changes: 0 additions & 2 deletions kedro-datasets/tests/matplotlib/test_matplotlib_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ def test_dict_save(self, tmp_path, mock_dict_plot, plot_writer, mocked_s3_bucket
plot_writer.save(mock_dict_plot)

for colour in COLOUR_LIST:

download_path = tmp_path / "downloaded_image.png"
actual_filepath = tmp_path / "locally_saved.png"

Expand Down Expand Up @@ -361,7 +360,6 @@ def test_list_save(self, tmp_path, mock_list_plot, versioned_plot_writer):
versioned_plot_writer.save(mock_list_plot)

for index in range(5):

test_path = tmp_path / "test_image.png"
versioned_filepath = str(versioned_plot_writer._get_load_path())

Expand Down
1 change: 0 additions & 1 deletion kedro-datasets/tests/polars/test_csv_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def mocked_dataframe():

@pytest.fixture
def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe: pl.DataFrame):

binarycsv = mocked_dataframe.write_csv()[:-1]

mocked_s3_bucket.put_object(
Expand Down

0 comments on commit 245c63b

Please sign in to comment.