From 72575043728d454ff440f8b770204a649317410b Mon Sep 17 00:00:00 2001
From: Trent Hinkle <trenth12@gmail.com>
Date: Sat, 28 Dec 2024 14:13:32 -0800
Subject: [PATCH]  Updated documentation and Dockerfile

---
 Dockerfile                   |  5 ++-
 docs/advanced.md             | 44 +++++++++++++++++++++--
 docs/index.md                |  7 ++--
 docs/input_format.md         | 12 ++++---
 docs/parameters.md           | 40 ++++++++++-----------
 docs/quickstart.md           | 68 ++++++++++++++++++++++++++++++------
 pyproteininference/reader.py | 16 +++++----
 7 files changed, 144 insertions(+), 48 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index 845c673..dfca780 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,4 +21,7 @@ RUN pip install -r requirements.txt
 
 RUN python setup.py install
 
-WORKDIR /
+RUN chmod +x /usr/local/bin/protein_inference_cli.py
+RUN chmod +x /usr/local/bin/protein_inference_heuristic_cli.py
+
+WORKDIR /
\ No newline at end of file
diff --git a/docs/advanced.md b/docs/advanced.md
index 3cd270f..5e5c7db 100644
--- a/docs/advanced.md
+++ b/docs/advanced.md
@@ -105,7 +105,7 @@ Advanced usage flags
 1. `-p` This flag is a True/False on whether to skip appending alternative proteins from the Fasta database digestion. If this flag is left blank, it will not skip appending alternative proteins (recommended). 
 2. `-i` True/False on whether to split the IDs in the Fasta database file. If this is left blank, it will not split IDs in the Fasta database file (recommended).
 
-You can run the tool as follows:
+You can run the tool as follows with separate target and decoy files:
 ```shell
 protein_inference_cli.py \
     -t /path/to/target/file.txt \
@@ -115,6 +115,15 @@ protein_inference_cli.py \
     -o /path/to/output/directory/
 ```
 
+Or from combined files like an mzIdentML file:
+```shell
+protein_inference_cli.py \
+    -f /path/to/target/file.mzid \
+    -db /path/to/database/file.fasta \
+    -y /path/to/parameter/file.yaml \
+    -o /path/to/output/directory/
+```
+
 Running with multiple input target/decoy files:
 ```shell
 protein_inference_cli.py \
@@ -125,6 +134,15 @@ protein_inference_cli.py \
     -o /path/to/output/directory/
 ```
 
+Or from multiple mzIdentML / idXML / pepXML files:
+```shell
+protein_inference_cli.py \
+    -f /path/to/target/file1.mzid /path/to/target/file2.mzid \
+    -db /path/to/database/file.fasta \
+    -y /path/to/parameter/file.yaml \
+    -o /path/to/output/directory/
+```
+
 
 #### Running Within Python
 To run within a python console please see the following example:
@@ -149,7 +167,29 @@ pipeline = ProteinInferencePipeline(parameter_file=yaml_params,
 pipeline.execute()
 ```
 
+Or running mzIdentML files within python:
+```python
+from pyproteininference.pipeline import ProteinInferencePipeline
+
+yaml_params = "/path/to/yaml/params.yaml"
+database = "/path/to/database/file.fasta"
+### target_files can either be a list of files or one file
+mzid_files = ["/path/to/file1.mzid","/path/to/file2.mzid"]
+### decoy_files can either be a list of files or one file
+output_directory_name = "/path/to/output/directory/"
+
+pipeline = ProteinInferencePipeline(parameter_file=yaml_params,
+									database_file=database,  
+                                    target_files=None,  
+                                    decoy_files=None,  
+                                    combined_files=mzid_files,  
+                                    output_directory=output_directory_name)  
+# Calling .execute() will initiate the pipeline with the given data                                                               
+pipeline.execute()
+```
+
 ### Running the Heuristic Method
+**NOTE: The Heuristic Method is experimental and has not be extensively tested on multiple datasets yet. Check back for updates on this tool.** <br> <br>
 Py Protein Inference also has a built-in Heuristic that runs through four inference methods (Inclusion, Exclusion, Parsimony, and Peptide Centric) and selects a recommended method for your given dataset. 
 By default, all four result files will be written, and the optimal method will be highlighted to the user.
 The Heuristic method also outputs a density plot that showcases all the inference methods compared to one another to gain further insight. For more information on the Heuristic Method see the [__Heuristic algorithm__](supplementary.md#heuristic-algorithm) section.
@@ -327,6 +367,6 @@ Console Output is as follows and indicates the recommended method at the end:
 Below is an example of a Heuristic Density plot. The plot indicates the distribution of the number of standard deviations 
 from the mean (of identified proteins at a specified FDR) for each inference method for a range of FDRs from 0 to the false discovery rate threshold (100 fdrs are incrementally selected in the range [0, fdr threshold])
 In general, the closer that the peak of a distribution is to 0 the more likely the associated method is to be selected as the recommended method.
-For more information on the specifics of the Heuristic Algorithm see [__Heuristic Algorithm Description__](supplementary.md#heuristic-algorithm-description)
+For more information on the specifics of the Heuristic Algorithm see [__Heuristic Algorithm Description__](supplementary.md#heuristic-algorithm)
 
 ![density](img/swissprot_example_density.png)
diff --git a/docs/index.md b/docs/index.md
index 2f6379e..9068b11 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,10 +1,9 @@
 ## Introduction
 
+**PyProteinInference** is a Python package for running various protein inference algorithms on tandem mass spectrometry search results and generating protein to peptide mappings with protein level false discovery rates.  
 
 For a quick start guide please click [here](quickstart.md).
 
-**PyProteinInference** is a Python package for running various protein inference algorithms on tandem mass spectrometry search results and generating protein to peptide mappings with protein level false discovery rates..  
-
 **Key Features** <br>
 
 * **Protein Inference and Scoring**:
@@ -14,7 +13,7 @@ For a quick start guide please click [here](quickstart.md).
 * **Supported Input Formats**:
     * Search Result File Types: [idXML](input_format.md#idxml), [mzIdentML](input_format.md#mzidentml), or [pepXML](input_format.md#pepxml).  
     * PSM files from [Percolator](https://github.com/percolator/percolator).
-    * Custom tab-delimited files (see [input formats](input_format.md#custom-input)).  
+    * Custom tab-delimited [files](input_format.md#custom-input).  
 * **Output**:
     * User-friendly CSV file containing Proteins, Peptides, q-values, and Protein Scores.  
     * Details on output formats: [supplementary](supplementary.md#export-types).  
@@ -49,5 +48,5 @@ Please see the [__Protein Score Types__](supplementary.md#protein-score-types) s
 ## Using Py Protein Inference
  1. [Yaml Parameter File](parameters.md#yaml-parameter-file-outline)
  2. [Input File Examples](input_format.md#input-file-examples) (idXML, mzIdentML, pepXML, Tab Delimited)
- 3. [Fasta Database](input_format.md#fasta-file-example)
+ 3. [Fasta Database](input_format.md#fasta-file)
  4. [Running Py Protein Inference](advanced.md#running-py-protein-inference)
\ No newline at end of file
diff --git a/docs/input_format.md b/docs/input_format.md
index d4fbd6b..90f869b 100644
--- a/docs/input_format.md
+++ b/docs/input_format.md
@@ -50,17 +50,19 @@ For example, lets say you wanted to use the `Comet:xcorr` value (for Comet Xcorr
 ```
 
 One of the Accession values can also be used. For example, `MS:1001493` actually means `Posterior Error Probability`. <br>
-Since this is the Posterior Error Probability from percolator, the input psm_score would actually be posterior_error_prob. This will be changed in a future release to be the accession `MS:1001493` instead. <br>
+You would simply enter in `MS:1001493` to utilize that output PSM score.
 
 So to use that value as the PSM score you might have this as your score section for your parameter file:
 
 ```yaml
   score:
     protein_score: multiplicative_log
-    psm_score: posterior_error_prob
+    psm_score: MS:1001493
     psm_score_type: multiplicative
 ```
 
+Remember, `MS:1001493` actually means `Posterior Error Probability`. Please refer to your specific search algorithms documentation to learn more about what each accession means.
+
 ### mzIdentML
 Lets inspect a standard MSGF+ based mzIdentML file for assistance in running pyProteinInference from a a mzIdentML file. <br>
 
@@ -93,8 +95,8 @@ When inspecting a file you might see a spectrum identification as such:
 </SpectrumIdentificationItem>
 ```
 
-When selecting custom scores to use from your mzIdentML file you can select any of the cvParam options under SpectrumIdentificationItem. For example, we might want to use the MS-GF:PepQValue (MS:1002055). <br>
-However, when selecting custom scores from mzIdentML files you will actually input the `accession` listed above instead of the `name`. <br>
+When selecting custom scores to use from your mzIdentML file you can select any of the `cvParam` options under SpectrumIdentificationItem. For example, we might want to use the `MS-GF:PepQValue` (`MS:1002055`). <br>
+An important note when selecting custom scores from mzIdentML files is that you will actually input the `accession` listed above instead of the `name`. <br>
 So, for selecting `MS-GF:PepQValue`, you would actually input `MS:1002055` into the parameter file (See below). <br>
 Also, given that `MS:1002055` scores are better if they are lower we would insert it as a multiplicative score. <br>
 The `score` section of the parameter file in this case might look like this:
@@ -106,6 +108,8 @@ The `score` section of the parameter file in this case might look like this:
     psm_score_type: multiplicative
 ```
 
+Remember, you can use any `cvParam` accession from your mzIdentML file. Make sure to note if the selected score is better when lower or higher to calculate protein scores correctly.
+
 ### pepXML
 
 Lets next inspect a standard MSFragger Version 4.0 based pepXML file for assistance in running pyProteinInference from a pepXML file
diff --git a/docs/parameters.md b/docs/parameters.md
index 71723f9..2858db9 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -2,13 +2,13 @@
 The Yaml Parameter File is the central location for all configurations for a given Protein Inference run and are summarized below:
 Note: These parameters are all optional. Please see the section [Default Parameters](#default-parameters) for more information on defaults.
 ## General
-| Parameter  | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | Type    |
-|------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|
-| export     | Export Type can be one of: __peptides__, __psms__, __psm_ids__, __long__, __q_value__, __q_value_all__, __q_value_comma_sep__, __leads__, __all__, __comma_sep__. Suggested types are __peptides__, __psms__, and __psm_ids__ as these produce square output. If there are multiple proteins per group the three mentioned types will report the leads only. Other types report on the peptide level with slightly different formats and whether or not to include leads only or all proteins. See [here](supplementary.md#export-types) for an in-depth explanation of Export Types. | String  |
-| fdr        | False Discovery Rate to be marked as significant. Ex. __0.01__ for 1% FDR.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | Numeric |
-| picker     | __True__/__False__ on whether to run the Protein Picker algorithm. For more info click [here](supplementary.md#protein-picker).                                                                                                                                                                                                                                                                                                                                                                                                                                                       | Bool    |
-| tag        | A String tag that will be written into the result files. Ex. __example_tag__.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | String  |
-| xml_parser | The library to read idXML, mzID, or pepXML files from. Can be either __openms__ or __pyteomics__. Default: __openms__.                                                                                                                                                                                                                                                                                                                                                                                                                                                                | String  |
+| Parameter  | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    | Type    |
+|------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|
+| export     | Export Type can be one of: __peptides__, __psms__, __psm_ids__, __long__, __q_value__, __q_value_all__, __q_value_comma_sep__, __leads__, __all__, __comma_sep__. Suggested types are __peptides__, __psms__, and __psm_ids__ as these produce square output. If there are multiple proteins per group the three mentioned types will report the leads only unless __inference_type__ is __peptide_centric__ which will output a ; separated list of proteins in the group OR if __inference_type__ is __parsimony__ and __grouping_type__ is __parsimonious_grouping__ which will also return a ; separated list of proteins in the parsimony group. Other types report on the peptide level with slightly different formats and whether or not to include leads only or all proteins. See [here](supplementary.md#export-types) for an in-depth explanation of Export Types. | String  |
+| fdr        | False Discovery Rate to be marked as significant. Ex. __0.01__ for 1% FDR.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | Numeric |
+| picker     | __True__/__False__ on whether to run the Protein Picker algorithm. For more info click [here](supplementary.md#protein-picker).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | Bool    |
+| tag        | A String tag that will be written into the result files. Ex. __example_tag__.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | String  |
+| xml_parser | The library to read idXML, mzID, or pepXML files from. Can be either __openms__ or __pyteomics__. Default: __openms__.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | String  |
 
 ## Data Restriction
 | Parameter | Description |Type|
@@ -20,11 +20,11 @@ Note: These parameters are all optional. Please see the section [Default Paramet
 | max_allowed_alternative_proteins | The maximum number of proteins a peptide is allowed to map to. Default: __50__. | Int |
 
 ## Score
-| Parameter | Description |Type|
-|---|---|---|
-| protein_score | One of any of the following: __multiplicative_log__, __best_peptide_per_protein__, __top_two_combined__, __additive__, __iterative_downweighted_log__, __downweighted_multiplicative_log__, __geometric_mean__. Recommended: __multiplicative_log__. | String |
-| psm_score | PSM score to use for Protein Scoring. If using Percolator output as input this would either be __posterior_error_prob__ or __q-value__. The string typed here should match the column in your input files __EXACTLY__. If using a custom score it will be filtered accordingly with the value in [__custom_restriction__](#data-restriction). | String |
-| psm_score_type | The Type of score that __psm_score__ parameter is. Either __multiplicative__ or __additive__. If a larger psm score is "better" than input additive (i.e. Mascot Ion Score, Xcorr, Percolator Score). If a smaller psm score is "better" than input multiplicative (i.e. Q Value, Posterior Error Probability). See [below](#extra-score-information) for more information.| String |
+| Parameter | Description                                                                                                                                                                                                                                                                                                                                                                 |Type|
+|---|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---|
+| protein_score | One of any of the following: __multiplicative_log__, __best_peptide_per_protein__, __top_two_combined__, __additive__, __iterative_downweighted_log__, __downweighted_multiplicative_log__, __geometric_mean__. Recommended: __multiplicative_log__.                                                                                                                        | String |
+| psm_score | PSM score to use for Protein Scoring. If using Percolator output as input this would either be __posterior_error_prob__ or __q-value__. The string typed here should match the column/attribute in your input files __EXACTLY__. For more info on selecting PSM scores from your input files please see [input file examples](input_format.md#input-file-examples)          | String |
+| psm_score_type | The type of score that __psm_score__ parameter is. Either __multiplicative__ or __additive__. If a larger psm score is "better" than input additive (i.e. Mascot Ion Score, Xcorr, Percolator Score). If a smaller psm score is "better" than input multiplicative (i.e. Q Value, Posterior Error Probability). See [below](#extra-score-information) for more information. | String |
 
 #### Extra Score information
  1. The __protein_score__, __psm_score__, and __psm_score_type__ methods must be compatible.
@@ -32,11 +32,11 @@ Note: These parameters are all optional. Please see the section [Default Paramet
  3. If using a PSM score (__psm_score__ parameter) where the higher the score the better (i.e. Percolator Score, Mascot Ion Score, Xcorr) (Percolator Score is called __psm_score__ - column name) in the tab delimited percolator output. Then __protein_score__ and __psm_score_type__ must both be __additive__.
 
 ## Identifiers
-| Parameter | Description |Type|
-|---|---|---|
-| decoy_symbol | Symbol within Decoy Identifiers to distinguish between targets. (i.e "__##__" or "__decoy___"). This is important for [Protein Picker](supplementary.md#protein-picker) and FDR calculation. | String |
-| isoform_symbol | Symbol that is present in isoform proteins only. (i.e. "__-__"). See [below](#extra-identifier-information) for more information. | String |
-| reviewed_identifier_symbol | Identifier to determine a reviewed vs unreviewed identifier. (i.e. "__sp\|__"). See [below](#extra-identifier-information) for more information.   | String |
+| Parameter | Description                                                                                                                                                                                          |Type|
+|---|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---|
+| decoy_symbol | Symbol within Decoy Identifiers to distinguish between targets. (i.e "##", "decoy_", "rev_", "DECOY_"). This is important for [Protein Picker](supplementary.md#protein-picker) and FDR calculation. | String |
+| isoform_symbol | Symbol that is present in isoform proteins only. (i.e. "__-__"). See [below](#extra-identifier-information) for more information.                                                                    | String |
+| reviewed_identifier_symbol | Identifier to determine a reviewed vs unreviewed identifier. (i.e. "__sp\|__"). See [below](#extra-identifier-information) for more information.                                                     | String |
 
 #### Extra Identifier information
  1. For the __decoy_symbol__ an example of a target protein -> __ex|protein__ and its decoy counterpart could be any of the following: __##ex|##protein__, __##ex|protein__, __decoy_ex|protein__. The decoy symbol just needs to be present within the string to be determined a decoy.
@@ -59,7 +59,7 @@ These parameters are only used if __parsimony__ is selected as __inference_type_
 
 | Parameter | Description |Type|
 |---|---|---|
-| lp_solver | This can be one of: __pulp__ or __None__. This determines which linear program solver is used. Please see [here](supplementary.md#parsimony-dependencies) for more information on lp solvers. Input __None__ if not running __parsimony__. If running __parsimony__ this needs to be set to __pulp__. | String |
+| lp_solver | This can be one of: __pulp__ or __None__. This determines which linear program solver is used. Input __None__ if not running __parsimony__. If running __parsimony__ this needs to be set to __pulp__. | String |
 | shared_peptides | How to assign shared peptides for parsimony. Can be one of: __all__ or __best__. __all__ assigns shared peptides to all possible proteins in the output. __best__ assigns shared peptides to the best scoring protein which is a "winner take all" approach. This is specific to the Parsimony Inference type. | String |
 
 
@@ -95,8 +95,8 @@ parameters:
     isoform_symbol: "-"
     reviewed_identifier_symbol: "sp|"
   inference:
-    inference_type: inclusion
-    grouping_type: subset_peptides
+    inference_type: parsimony
+    grouping_type: parsimonious_grouping
   digest:
     digest_type: trypsin
     missed_cleavages: 3
diff --git a/docs/quickstart.md b/docs/quickstart.md
index f49b63c..c803be8 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -2,7 +2,7 @@
 
 1. __Python 3.9__ or greater.
 2. __Python Packages__:
-   __numpy__, __pyteomics__, __pulp__, __PyYAML__, __matplotlib__. These should be installed automatically during installation.
+   __numpy__, __pyteomics__, __pyopenms__, __pulp__, __PyYAML__, __matplotlib__, __lxml__, __nicegui__, __pywebview__, __tqdm__. These should be installed automatically during installation.
 		
 # Quick Start Guide
 1. Install the package using pip:
@@ -16,26 +16,28 @@
 			-db /path/to/database/file.fasta \
 			-y /path/to/params.yaml
 
-3. Run the standard commandline from a mzid file 
+3. Run the standard commandline from a mzIdentML file 
 
 		protein_inference_cli.py \
 			-f /path/to/target/file1.mzid \
 			-db /path/to/database/file.fasta \
 			-y /path/to/params.yaml
 
-4. Run the standard commandline from a pep.xml file 
+4. Run the standard commandline from a pepXML file 
 
 		protein_inference_cli.py \
-			-f /path/to/target/file1.pep.xml \
+			-f /path/to/target/file1.pepXML \
 			-db /path/to/database/file.fasta \
 			-y /path/to/params.yaml
 
-5. Run the standard commandline tool with tab delimited results directly from percolator to run a particular inference method. By default, peptide centric inference is selected if a parameter file is not specified:
+5. Run the standard commandline tool with tab delimited results directly from percolator
 
 		protein_inference_cli.py \
 			-t /path/to/target/file.txt \
 			-d /path/to/decoy/file.txt \
-			-db /path/to/database/file.fasta 
+			-db /path/to/database/file.fasta \ 
+     		-y /path/to/params.yaml
+
 
 6. Specifying Parameters. 
 The two most common parameters to change are the inference type, and the decoy symbol (for identifying decoy proteins vs target proteins).
@@ -51,7 +53,49 @@ The parameters can be quickly altered by creating a file called params.yaml as f
 	All parameters are optional, so you only need to define the ones you want to alter. Parameters that are not defined are set to default values.
 	See [here](parameters.md#default-parameters) for the default parameters.
 
-7. Run the standard commandline tool again, this time specifying the parameters as above:
+7. Full Parameter Specifications
+See below for a full standard parameter file:
+
+## Default Parameters
+```yaml
+parameters:
+  general:
+    export: peptides
+    fdr: 0.01
+    picker: True
+    tag: example_tag
+    xml_parser: openms
+  data_restriction:
+    pep_restriction: 0.9
+    peptide_length_restriction: 7
+    q_value_restriction: .9
+    custom_restriction: None
+    max_allowed_alternative_proteins: 50
+  score:
+    protein_score: best_peptide_per_protein
+    psm_score: posterior_error_prob
+    psm_score_type: multiplicative
+  identifiers:
+    decoy_symbol: "##"
+    isoform_symbol: "-"
+    reviewed_identifier_symbol: "sp|"
+  inference:
+    inference_type: parsimony
+    grouping_type: parsimonious_grouping
+  digest:
+    digest_type: trypsin
+    missed_cleavages: 3
+  parsimony:
+    lp_solver: pulp
+    shared_peptides: all
+  peptide_centric:
+    max_identifiers: 5
+```
+
+These parameter options are just a suggestion. Please alter these for your specifications. 
+For full description of each parameter and all options see the in depth [parameter file description](parameters.md#yaml-parameter-file-outline)
+
+8. Run the standard commandline tool again, this time specifying the parameters as above:
 		
 		protein_inference_cli.py \
 			-t /path/to/target/file.txt \
@@ -59,10 +103,10 @@ The parameters can be quickly altered by creating a file called params.yaml as f
 			-db /path/to/database/file.fasta \
 			-y /path/to/params.yaml
 
-8. Running with docker
+9. Running with docker
 	
 	- Either Pull the image from docker hub:
-		- `docker pull hinklet/pyproteininference:latest`
+		- `docker pull thinkle12/pyproteininference:latest`
 	- Or Build the image with the following command (After having cloned the repository):
 	  	- `git clone REPOSITORY_URL`
 	  	- `cd pyproteininference`
@@ -76,4 +120,8 @@ The parameters can be quickly altered by creating a file called params.yaml as f
 				-db /data/database.fasta \
 				-y /data/parameters.yaml \
 				-o /data/
-	
\ No newline at end of file
+	
+	- Get the commandline help via docker
+
+			docker run thinkle12/pyproteininference:latest \
+            python /usr/local/bin/protein_inference_cli.py --help
\ No newline at end of file
diff --git a/pyproteininference/reader.py b/pyproteininference/reader.py
index d4cffc6..0c8a276 100644
--- a/pyproteininference/reader.py
+++ b/pyproteininference/reader.py
@@ -38,6 +38,7 @@ def __init__(
             decoy_file (str/list): Path to Decoy PSM result files.
             combined_files (str/list): Path to Combined PSM result files.
             directory (str): Path to directory containing combined PSM result files.
+            top_hit_per_psm_only (bool): If True, only include top hit for each PSM.
 
         """
         self.target_file = target_file
@@ -219,6 +220,7 @@ def __init__(
             decoy_file (str/list): Path to Decoy PSM result files.
             combined_files (str/list): Path to Combined PSM result files.
             directory (str): Path to directory containing combined PSM result files.
+            top_hit_per_psm_only (bool): If True, only include top hit for each PSM.
 
         Returns:
             Reader: [Reader][pyproteininference.reader.Reader] object.
@@ -505,6 +507,7 @@ def __init__(
                 [ProteinInferenceParameter][pyproteininference.parameters.ProteinInferenceParameter] object.
             append_alt_from_db (bool): Whether or not to append alternative proteins found in the database
                 that are not in the input files.
+            top_hit_per_psm_only (bool): If True, only include top hit for each PSM.
 
 
         Returns:
@@ -694,6 +697,7 @@ def __init__(
             decoy_file (str/list): Path to Decoy PSM result files.
             combined_files (str/list): Path to Combined PSM result files.
             directory (str): Path to directory containing combined PSM result files.
+            top_hit_per_psm_only (bool): If True, only include top hit for each PSM.
 
         Returns:
             Reader: [Reader][pyproteininference.reader.Reader] object.
@@ -1017,7 +1021,7 @@ def _find_psms_with_alternative_proteins(self, raw_psms):
 
 class IdXMLReader(Reader):
     """
-    The following class takes a idXML like target file and a idXML like decoy file
+    The following class takes a idXML like file
     and creates standard [Psm][pyproteininference.physical.Psm] objects.
 
     Attributes:
@@ -1075,13 +1079,13 @@ def __init__(
             decoy_file (str/list): Path to Decoy PSM result files.
             combined_files (str/list): Path to Combined PSM result files.
             directory (str): Path to directory containing combined PSM result files.
+            top_hit_per_psm_only (bool): If True, only include top hit for each PSM.
 
         Returns:
             Reader: [Reader][pyproteininference.reader.Reader] object.
 
         Example:
-            >>> pyproteininference.reader.IdXMLReader(target_file = "example_target.txt",
-            >>>     decoy_file = "example_decoy.txt",
+            >>> pyproteininference.reader.IdXMLReader(combined_file = "example_file.idXML",
             >>>     digest=digest, parameter_file_object=pi_params)
         """
         self.target_file = target_file
@@ -1143,8 +1147,7 @@ def _read_psms_pyteomics(self):
         This method must be ran before initializing [DataStore object][pyproteininference.datastore.DataStore].
 
         Example:
-            >>> reader = pyproteininference.reader.GenericReader(target_file = "example_target.txt",
-            >>>     decoy_file = "example_decoy.txt",
+            >>> reader = pyproteininference.reader.IdXMLReader(combined_file = "example_file.idXML",
             >>>     digest=digest, parameter_file_object=pi_params)
             >>> reader.read_psms()
 
@@ -1300,8 +1303,7 @@ def _read_psms_openms(self):
         This method must be ran before initializing [DataStore object][pyproteininference.datastore.DataStore].
 
         Example:
-            >>> reader = pyproteininference.reader.GenericReader(target_file = "example_target.txt",
-            >>>     decoy_file = "example_decoy.txt",
+            >>> reader = pyproteininference.reader.IdXMLReader(combined_file = "example_file.idXML",
             >>>     digest=digest, parameter_file_object=pi_params)
             >>> reader.read_psms()