69 improve docs again (#80)

* updated parallel/netcdf example and included notebook in sphinx docs * fixed cli and increase test coverage * generalized tests
Deltares · Mar 13, 2024 · b74dcee · b74dcee
1 parent 83ecb7f
commit b74dcee
Show file tree

Hide file tree

Showing 13 changed files with 196 additions and 447 deletions.
diff --git a/.github/workflows/sphinx-docs.yml b/.github/workflows/sphinx-docs.yml
@@ -21,15 +21,8 @@ jobs:
           python -m pip install -e .[docs]
       - name: Sphinx build
         run: |
+          cp notebooks/measurements.ipynb docs
           sphinx-build docs _build
-      #- name: Deploy to GitHub Pages
-      #  uses: peaceiris/actions-gh-pages@v3
-      #  if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
-      #  with:
-      #    publish_branch: gh-pages
-      #    github_token: ${{ secrets.GITHUB_TOKEN }}
-      #    publish_dir: _build/
-      #    force_orphan: true
       - uses: actions/upload-pages-artifact@v1
         with:
           path: _build/

diff --git a/ddlpy/cli.py b/ddlpy/cli.py
@@ -2,12 +2,9 @@
 
 """Console script for ddlpy."""
 import sys
-import io
 import logging
-
 import click
 import pandas as pd
-import dateutil
 import ddlpy
 
 
@@ -25,7 +22,11 @@ def cli(verbose,  args=None):
 # Define a command
 # Each command has options which are read from the console.
 @cli.command()
-@click.argument('output', type=click.STRING )
+@click.option(
+    '--output', 
+    help='output of locations json file',
+    default='locations.json'
+    )
 @click.option(
     '--quantity',
     help='Grootheid code',
@@ -103,13 +104,11 @@ def locations(output,
 
 # Another command to get the measurements from locations
 @cli.command()
-@click.option(
-    '--start-date',
-    help='Start date of the measurements'
+@click.argument(
+    'start-date',
 )
-@click.option(
-    '--end-date',
-    help='End date of the measurements'
+@click.argument(
+    'end-date',
 )
 @click.option(
     '--locations',
@@ -123,14 +122,8 @@ def measurements(locations, start_date, end_date):
     try:
         locations_df = pd.read_json(locations, orient='records')
     except:
-        raise ValueError('location file not existing. Create one or specify its name.')
-
-    # conver strings to dates
-    if start_date:
-        start_date = dateutil.parser.parse(start_date)
-    if end_date:
-        end_date = dateutil.parser.parse(end_date)
-
+        raise ValueError('locations.json file not found. First run "ddlpy locations"')
+
     for obs in range(locations_df.shape[0]): #goes through rows in table
         selected = locations_df.loc[obs]
 

diff --git a/ddlpy/ddlpy.py b/ddlpy/ddlpy.py
@@ -299,6 +299,21 @@ def _measurements_slice(location, start_date, end_date):
     return df
 
 
+def _clean_dataframe(measurements):
+    len_raw = len(measurements)
+    # drop duplicate rows (preserves e.g. different Grootheden/Groeperingen at same timestep)
+    measurements = measurements.drop_duplicates()
+
+    # remove Tijdstap column, has to be done after drop_duplicates to avoid too much to be dropped
+    measurements = measurements.drop("Tijdstip", axis=1, errors='ignore')
+
+    # sort dataframe on time, ddl returns non-sorted data
+    measurements = measurements.sort_index()
+    ndropped = len_raw - len(measurements)
+    logger.debug(f"{ndropped} duplicated values dropped")
+    return measurements
+
+
 def measurements(location, start_date, end_date, clean_df=True):
     """return measurements for the given location and time window (start_date, end_date)"""
 
@@ -342,17 +357,7 @@ def measurements(location, start_date, end_date, clean_df=True):
     measurements = pd.concat(measurements)
 
     if clean_df:
-        len_raw = len(measurements)
-        # drop duplicate rows (preserves e.g. different Grootheden/Groeperingen at same timestep)
-        measurements = measurements.drop_duplicates()
-
-        # remove Tijdstap column, has to be done after drop_duplicates to avoid too much to be dropped
-        measurements = measurements.drop("Tijdstip", axis=1)
-
-        # sort dataframe on time, ddl returns non-sorted data
-        measurements = measurements.sort_index()
-        ndropped = len_raw - len(measurements)
-        logger.debug(f"{ndropped} duplicated values dropped")
+        measurements = _clean_dataframe(measurements)
 
     return measurements
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -32,7 +32,7 @@
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx_mdinclude']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx_mdinclude', 'nbsphinx']
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']

diff --git a/docs/index.rst b/docs/index.rst
@@ -8,7 +8,7 @@ ddlpy documentation
    :hidden:
 
    installation
-   usage
+   measurements
    modules
    contributing
    history
diff --git a/docs/usage.rst b/docs/usage.rst
diff --git a/examples/1_get_data_from_waterInfo_parallel.py b/examples/1_get_data_from_waterInfo_parallel.py