diff --git a/Makefile b/Makefile
index 6b8827d2..948e15f4 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,7 @@ pytest:
 
 test-docs:
 	$(DOCTEST) src/malco/*.py src/malco/*/*.py
+	$(DOCTEST) src/malco/runner.py src/malco/run/*.py src/malco/prepare/*.py src/malco/post_process/*.py 
 
 %-doctest: %
 	$(DOCTEST) $<
diff --git a/src/malco/analysis/disease_avail_knowledge.py b/src/malco/analysis/disease_avail_knowledge.py
index f6f33a41..8e32d707 100644
--- a/src/malco/analysis/disease_avail_knowledge.py
+++ b/src/malco/analysis/disease_avail_knowledge.py
@@ -4,6 +4,7 @@
 # (1) Parse out disease genes discovered after 2008/9 (First thing in HPOA)
 #     Look for a correlation between date annotated and disease correctly diagnosed. 
 #     Hypothesis: the older the easier to diagnose
+#     PNR suggests: for each ppkt we have a date 
 # (2) To start, looking at the two broad categories found/not-found, count average number of all links
 #     After that, count average number of links of some kind
 #     Then, something more graphy, such as, centrality? Maybe need to project out something first to find signal in the noise...
diff --git a/src/malco/prepare/setup_run_pars.py b/src/malco/prepare/setup_run_pars.py
index d27dd0b6..6944ee90 100644
--- a/src/malco/prepare/setup_run_pars.py
+++ b/src/malco/prepare/setup_run_pars.py
@@ -2,8 +2,9 @@
 import csv
 import sys
 
-def import_inputdata(self):
-    """Example inputfile is located in input_dir and named run_parameters.csv
+def import_inputdata(self) -> None:
+    """
+    Example input file is located in ``self.input_dir`` and named run_parameters.csv
     It should contain something like:
     ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     "en"
diff --git a/src/malco/run/run.py b/src/malco/run/run.py
index 61870d5f..1ad3e416 100644
--- a/src/malco/run/run.py
+++ b/src/malco/run/run.py
@@ -8,9 +8,25 @@
 from malco.run.search_ppkts import search_ppkts
 
 def call_ontogpt(
-    lang, raw_results_dir, input_dir, model, 
+    lang: str, 
+    raw_results_dir: Path, 
+    input_dir: Path, 
+    model: str, 
     modality: typing.Literal['several_languages', 'several_models'],
-):
+)-> None:
+    """
+    Wrapper used for parallel execution of ontogpt.
+
+    Args:
+        lang (str): Two-letter language code, for example "en" for English.
+        raw_results_dir (Path): Path to the raw results directory.
+        output_dir (Path): Path to the output directory.
+        model (str): Name of the model to be run, e.g. "gpt-4-turbo".
+        modality (str): Determines whether English and several models or gpt-4o and several languages are being run.
+
+    Returns:
+        None
+    """
     prompt_dir = f'{input_dir}/prompts/'
     if modality == 'several_languages':
         lang_or_model_dir = lang
diff --git a/src/malco/run/search_ppkts.py b/src/malco/run/search_ppkts.py
index 61496a62..d0cfd560 100644
--- a/src/malco/run/search_ppkts.py
+++ b/src/malco/run/search_ppkts.py
@@ -7,8 +7,12 @@
 def search_ppkts(input_dir, prompt_dir, raw_results_dir, lang_or_model):
     """
     Check what ppkts have already been computed in current output dir, for current run parameters.
+    
     ontogpt will run every .txt that is in inputdir, we need a tmp inputdir 
-    excluding already run cases.
+    excluding already run cases. Source of truth is the results.yaml output by ontogpt.
+    Only extracted_object containing terms is considered successfully run.
+
+    Note that rerunning 
     """
     
     # List of "labels" that are already present in results.yaml iff terms is not None