Skip to content

Commit

Permalink
Merge pull request #611 from NatLibFi/autogenerate-commands-wiki
Browse files Browse the repository at this point in the history
Autogenerated CLI commands documentation on ReadTheDocs
  • Loading branch information
juhoinkinen authored Aug 26, 2022
2 parents 576c7b7 + 3da99fa commit 6f9488a
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 22 deletions.
6 changes: 5 additions & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ python:
- voikko
- nn
- omikuji
- fasttext
- yake
- pycld3
- spacy
- requirements: docs/requirements.txt
- method: setuptools
- method: pip
path: .
system_packages: true
72 changes: 58 additions & 14 deletions annif/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def backend_param_option(f):
return click.option(
'--backend-param', '-b', multiple=True,
help='Override backend parameter of the config file. ' +
'Syntax: "-b <backend>.<parameter>=<value>".')(f)
'Syntax: `-b <backend>.<parameter>=<value>`.')(f)


@cli.command('list-projects')
Expand All @@ -134,6 +134,12 @@ def backend_param_option(f):
def run_list_projects():
"""
List available projects.
\f
Show a list of currently defined projects. Projects are defined in a
configuration file, normally called ``projects.cfg``. See `Project
configuration
<https://github.com/NatLibFi/Annif/wiki/Project-configuration>`_
for details.
"""

template = "{0: <25}{1: <45}{2: <10}{3: <7}"
Expand Down Expand Up @@ -185,6 +191,19 @@ def run_clear_project(project_id):
def run_loadvoc(project_id, force, subjectfile):
"""
Load a vocabulary for a project.
\f
This will load the vocabulary to be used in subject indexing. Note that
although ``PROJECT_ID`` is a parameter of the command, the vocabulary is
shared by all the projects with the same vocab identifier in the project
configuration, and the vocabulary only needs to be loaded for one of those
projects.
If a vocabulary has already been loaded, reinvoking loadvoc with a new
subject file will update the Annif’s internal vocabulary: label names are
updated and any subject not appearing in the new subject file is removed.
Note that new subjects will not be suggested before the project is
retrained with the updated vocabulary. The update behavior can be
overridden with the ``--force`` option.
"""
proj = get_project(project_id)
if annif.corpus.SubjectFileSKOS.is_rdf_file(subjectfile):
Expand Down Expand Up @@ -216,6 +235,13 @@ def run_loadvoc(project_id, force, subjectfile):
def run_train(project_id, paths, cached, docs_limit, jobs, backend_param):
"""
Train a project on a collection of documents.
\f
This will train the project using the documents from ``PATHS`` (directories
or possibly gzipped TSV files) in a single batch operation. If ``--cached``
is set, preprocessed training data from the previous run is reused instead
of documents input; see `Reusing preprocessed training data
<https://github.com/NatLibFi/Annif/wiki/
Reusing-preprocessed-training-data>`_.
"""
proj = get_project(project_id)
backend_params = parse_backend_params(backend_param, proj)
Expand All @@ -241,6 +267,10 @@ def run_train(project_id, paths, cached, docs_limit, jobs, backend_param):
def run_learn(project_id, paths, docs_limit, backend_param):
"""
Further train an existing project on a collection of documents.
\f
Similar to the ``train`` command. This will continue training an already
trained project using the documents given by ``PATHS`` in a single batch
operation. Not supported by all backends.
"""
proj = get_project(project_id)
backend_params = parse_backend_params(backend_param, proj)
Expand All @@ -258,6 +288,9 @@ def run_learn(project_id, paths, docs_limit, backend_param):
def run_suggest(project_id, limit, threshold, backend_param):
"""
Suggest subjects for a single document from standard input.
\f
This will read a text document from standard input and suggest subjects for
it.
"""
project = get_project(project_id)
text = sys.stdin.read()
Expand Down Expand Up @@ -293,7 +326,8 @@ def run_index(project_id, directory, suffix, force,
limit, threshold, backend_param):
"""
Index a directory with documents, suggesting subjects for each document.
Write the results in TSV files with the given suffix.
Write the results in TSV files with the given suffix (``.annif`` by
default).
"""
project = get_project(project_id)
backend_params = parse_backend_params(backend_param, project)
Expand Down Expand Up @@ -370,11 +404,17 @@ def run_eval(
jobs,
backend_param):
"""
Analyze documents and evaluate the result.
Compare the results of automated indexing against a gold standard. The
path may be either a TSV file with short documents or a directory with
documents in separate files.
Suggest subjects for documents and evaluate the results by comparing
against a gold standard.
\f
With this command the documents from ``PATHS`` (directories or possibly
gzipped TSV files) will be assigned subject suggestions and then
statistical measures are calculated that quantify how well the suggested
subjects match the gold-standard subjects in the documents.
Normally the output is the list of the metrics calculated across documents.
If ``--results-file <FILENAME>`` option is given, the metrics are
calculated separately for each subject, and written to the given file.
"""

project = get_project(project_id)
Expand Down Expand Up @@ -428,12 +468,14 @@ def run_eval(
@common_options
def run_optimize(project_id, paths, docs_limit, backend_param):
"""
Analyze documents, testing multiple limits and thresholds.
Evaluate the analysis results for a directory with documents against a
gold standard given in subject files. Test different limit/threshold
values and report the precision, recall and F-measure of each combination
of settings.
Suggest subjects for documents, testing multiple limits and thresholds.
\f
This command will use different limit (maximum number of subjects) and
score threshold values when assigning subjects to each document given by
``PATHS`` and compare the results against the gold standard subjects in the
documents. The output is a list of parameter combinations and their scores.
From the output, you can determine the optimum limit and threshold
parameters depending on which measure you want to target.
"""
project = get_project(project_id)
backend_params = parse_backend_params(backend_param, project)
Expand Down Expand Up @@ -519,7 +561,9 @@ def run_optimize(project_id, paths, docs_limit, backend_param):
def run_hyperopt(project_id, paths, docs_limit, trials, jobs, metric,
results_file):
"""
Optimize the hyperparameters of a project using a validation corpus.
Optimize the hyperparameters of a project using validation documents from
``PATHS``. Not supported by all backends. Output is a list of trial results
and a report of the best performing parameters.
"""
proj = get_project(project_id)
documents = open_documents(paths, proj.subjects,
Expand Down
3 changes: 2 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
extensions = [
'sphinx.ext.autodoc',
'sphinxcontrib.apidoc',
'sphinx_click',
]

# Add any paths that contain templates here, relative to this directory.
Expand All @@ -50,7 +51,7 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'venv']

apidoc_module_dir = '../annif'
apidoc_output_dir = 'source'
Expand Down
7 changes: 4 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ You are reading documentation for version |version|.


.. toctree::
:maxdepth: 3
:caption: Annif API Reference:
:maxdepth: 1
:caption: Contents:

source/commands
source/annif


Indices and tables
==================
******************

* :ref:`genindex`
* :ref:`modindex`
Expand Down
4 changes: 2 additions & 2 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
sphinx==4.5.*
sphinx-rtd-theme
sphinxcontrib-apidoc==0.3.0
docutils<0.18

sphinx-click
docutils==0.16
113 changes: 113 additions & 0 deletions docs/source/commands.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
############
CLI commands
############

These are the command-line interface commands of Annif, with REST API
equivalents when applicable.

To reference a project most of the commands take a ``PROJECT_ID`` parameter,
which is an alphanumeric string ``(A-Za-z0-9_-)``. Common options of the
commands are ``--projects`` for setting a (non-default) path to a `project
configuration file
<https://github.com/NatLibFi/Annif/wiki/Project-configuration>`_ and
``--verbosity`` for selecting logging level.

.. contents::
:local:
:backlinks: none

**********************
Project administration
**********************

.. click:: annif.cli:run_loadvoc
:prog: annif loadvoc

**REST equivalent**

N/A

.. click:: annif.cli:run_list_projects
:prog: annif list-projects

**REST equivalent**
::

GET /projects/

.. click:: annif.cli:run_show_project
:prog: annif show-project

**REST equivalent**
::

GET /projects/<PROJECT_ID>

.. click:: annif.cli:run_clear_project
:prog: annif clear-project

**REST equivalent**

N/A

****************************
Subject index administration
****************************

.. click:: annif.cli:run_train
:prog: annif train

**REST equivalent**

N/A

.. click:: annif.cli:run_learn
:prog: annif learn

**REST equivalent**
::

/projects/<PROJECT_ID>/learn

.. click:: annif.cli:run_suggest
:prog: annif suggest

**REST equivalent**
::

POST /projects/<PROJECT_ID>/suggest

.. click:: annif.cli:run_eval
:prog: annif eval

**REST equivalent**

N/A

.. click:: annif.cli:run_optimize
:prog: annif optimize

**REST equivalent**

N/A

.. click:: annif.cli:run_index
:prog: annif index

**REST equivalent**

N/A

.. click:: annif.cli:run_hyperopt
:prog: annif hyperopt

**REST equivalent**

N/A

.. click:: flask.cli:run_command
:prog: annif run

**REST equivalent**

N/A
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def read(fname):
'swagger_ui_bundle',
'flask>=1.0.4,<3',
'flask-cors',
'click==8.0.*',
'click==8.1.*',
'click-log',
'joblib==1.1.0',
'nltk',
Expand Down

0 comments on commit 6f9488a

Please sign in to comment.