Fix a few documentation and formatting errors

VectorInstitute · Oct 1, 2024 · 359f860 · 359f860
1 parent c46dac7
commit 359f860
Show file tree

Hide file tree

Showing 9 changed files with 43 additions and 76 deletions.
diff --git a/odyssey/evals/evaluation.py b/odyssey/evals/evaluation.py
@@ -22,11 +22,12 @@ def calculate_metrics(
     y_prob: Union[np.ndarray, torch.Tensor, list],
 ) -> Dict[str, float]:
     """
-    Calculate a variety of performance metrics given true labels, predicted labels, and predicted probabilities.
+    Calculate a variety of performance metrics given true labels, predicted labels,
+    and predicted probabilities.
 
-    This function computes several commonly used classification metrics to evaluate the performance of a model.
-    It returns a dictionary containing the balanced accuracy, F1 score, precision, recall, AUROC, average precision score,
-    and AUC-PR.
+    This function computes several commonly used classification metrics to evaluate
+    the performance of a model. It returns a dictionary containing the balanced
+    accuracy, F1 score, precision, recall, AUROC, average precision score, and AUC-PR.
 
     Parameters
     ----------
@@ -41,6 +42,7 @@ def calculate_metrics(
     -------
     Dict[str, float]
         Dictionary containing the following metrics:
+
         - "Balanced Accuracy": Balanced accuracy score
         - "F1 Score": F1 score
         - "Precision": Precision score

diff --git a/odyssey/evals/prediction.py b/odyssey/evals/prediction.py
@@ -1,4 +1,8 @@
-"""Prediction module for loading and running EHR models on patient data, both for clinical predictive tasks and EHR forecasting."""
+"""Prediction module for loading and running EHR models on patient data.
+
+This module provides functionality to load and run EHR models on patient data,
+both for clinical predictive tasks and EHR forecasting.
+"""
 
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -19,7 +23,8 @@ def load_pretrained_model(
     Load a pretrained model based on the specified model type and tokenizer.
 
     This function initializes a model of the specified type, loads its pretrained
-    weights from a checkpoint file, and prepares it for inference on the specified device.
+    weights from a checkpoint file, and prepares it for inference on the specified
+    device.
 
     Parameters
     ----------
@@ -70,30 +75,30 @@ def load_finetuned_model(
     fine_model_config: Optional[Dict[str, Any]] = None,
     device: Optional[torch.device] = None,
 ) -> torch.nn.Module:
-    """Load a finetuned model from model_path using tokenizer information.
+    """
+    Load a finetuned model from model_path using tokenizer information.
 
     Return a loaded finetuned model from model_path, using tokenizer information.
     If config arguments are not provided, the default configs built into the
     PyTorch classes are used.
 
     Parameters
     ----------
-    model_path: str
+    model_path : str
         Path to the finetuned model to load
-    tokenizer: ConceptTokenizer
+    tokenizer : ConceptTokenizer
         Loaded tokenizer object
-    pre_model_config: Dict[str, Any], optional
+    pre_model_config : Dict[str, Any], optional
         Optional config to override default values of a pretrained model
-    fine_model_config: Dict[str, Any], optional
+    fine_model_config : Dict[str, Any], optional
         Optional config to override default values of a finetuned model
-    device: torch.device, optional
+    device : torch.device, optional
         CUDA device. By default, GPU is used
 
     Returns
     -------
     torch.nn.Module
         Finetuned model loaded from model_path
-
     """
     # Load GPU or CPU device
     if not device:
@@ -126,15 +131,18 @@ def create_concept_and_id_to_type_mapping(
     """
     Create a mapping from concepts and their IDs to their corresponding type IDs.
 
-    This function processes pretraining data to build a dictionary that maps each unique concept
-    and its corresponding ID to its type ID, based on the first occurrence of the concept in the data.
+    This function processes pretraining data to build a dictionary that maps each unique
+    concept and its corresponding ID to its type ID, based on the first occurrence of
+    the concept in the data.
 
     Parameters
     ----------
     pretrain_data : pd.DataFrame
-        A pandas DataFrame containing the pretraining data with columns 'event_tokens_2048' and 'type_tokens_2048'.
+        A pandas DataFrame containing the pretraining data with columns
+        'event_tokens_2048' and 'type_tokens_2048'.
     tokenizer : ConceptTokenizer
-        The tokenizer object used to convert concepts to IDs. Must have a 'token_to_id' method.
+        The tokenizer object used to convert concepts to IDs. Must have a 'token_to_id'
+        method.
 
     Returns
     -------
@@ -147,7 +155,8 @@ def create_concept_and_id_to_type_mapping(
     for events, types in zip(
         pretrain_data["event_tokens_2048"], pretrain_data["type_tokens_2048"]
     ):
-        # Use numpy's unique function to get unique concepts and their first occurrence index
+        # Use numpy's unique function to get unique concepts and
+        # their first occurrence index
         unique_concepts, first_occurrence = np.unique(events, return_index=True)
 
         # Map each unique concept and its ID to its corresponding type
@@ -162,6 +171,8 @@ def create_concept_and_id_to_type_mapping(
 
 
 class Forecast:
+    """Forecast token sequences using a pretrained model."""
+
     def __init__(
         self,
         model: torch.nn.Module,
@@ -253,8 +264,8 @@ def prepare_input_data(
                 # Map predicted concept IDs to their corresponding type IDs
                 predicted_type_ids = torch.tensor(
                     [
-                        self.concept_and_id_to_type.get(id.item(), 0)
-                        for id in predicted_ids
+                        self.concept_and_id_to_type.get(id_.item(), 0)
+                        for id_ in predicted_ids
                     ],
                     device=self.device,
                     dtype=tensor.dtype,
@@ -324,7 +335,8 @@ def generate_token_sequence(
         num_tokens : int
             The number of tokens to generate.
         cutoff_index : int, optional
-            The index at which to truncate the patient data. If None, it will be calculated.
+            The index at which to truncate the patient data. If None, it will be
+            calculated.
 
         Returns
         -------
@@ -350,7 +362,9 @@ def generate_token_sequence(
             )
 
         predicted_ids_list = predicted_ids.cpu().tolist()
-        predicted_tokens = [self.tokenizer.id_to_token(id) for id in predicted_ids_list]
+        predicted_tokens = [
+            self.tokenizer.id_to_token(id_) for id_ in predicted_ids_list
+        ]
         predicted_labels = self.tokenizer.decode_to_labels(predicted_tokens)
 
         return predicted_ids_list, predicted_tokens, predicted_labels
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1 @@
+"""Test odyssey."""
diff --git a/tests/odyssey/__init__.py b/tests/odyssey/__init__.py
@@ -0,0 +1 @@
+"""Test odyssey."""
diff --git a/tests/odyssey/data/__init__.py b/tests/odyssey/data/__init__.py
diff --git a/tests/odyssey/data/mimiciv/__init__.py b/tests/odyssey/data/mimiciv/__init__.py
diff --git a/tests/odyssey/data/mimiciv/test_collect.py b/tests/odyssey/data/mimiciv/test_collect.py
diff --git a/tests/odyssey/data/test_dataset.py b/tests/odyssey/data/test_dataset.py
@@ -23,6 +23,8 @@
 
 
 class TestDatasets(unittest.TestCase):
+    """Test dataset classes."""
+
     def setUp(self) -> None:
         """Set up mock data and tokenizer for testing."""
         self.data = pd.DataFrame(

diff --git a/tests/odyssey/utils/__init__.py b/tests/odyssey/utils/__init__.py
@@ -0,0 +1 @@
+"""Test utils."""