diff --git a/pyproteininference/datastore.py b/pyproteininference/datastore.py index 207fe80..eebc8f7 100644 --- a/pyproteininference/datastore.py +++ b/pyproteininference/datastore.py @@ -62,6 +62,8 @@ class DataStore(object): "q-value": "qvalue", "posterior_error_prob": "pepvalue", "posterior_error_probability": "pepvalue", + "MS:1001493": "pepvalue", # Added to make sure custom input for pep/qval accession gets mapped to pep/qval + "MS:1001491": "qvalue", } CUSTOM_SCORE_KEY = "custom_score" @@ -1179,6 +1181,12 @@ def _check_target_decoy_split(self): decoys = [ x for x in self.digest.protein_to_peptide_dictionary.keys() if self.parameter_file_object.decoy_symbol in x ] + if len(decoys) == 0: + raise ValueError( + "No decoy proteins found in digest file with decoy symbol: {}. Please double check your decoy symbol and make sure decoy proteins are present in your input file(s).".format( + self.parameter_file_object.decoy_symbol + ) + ) ratio = float(len(targets)) / float(len(decoys)) logger.info("Number of Target Proteins in Digest: {}".format(len(targets))) logger.info("Number of Decoy Proteins in Digest: {}".format(len(decoys))) diff --git a/pyproteininference/physical.py b/pyproteininference/physical.py index eefa87e..90453e3 100644 --- a/pyproteininference/physical.py +++ b/pyproteininference/physical.py @@ -330,6 +330,7 @@ def assign_main_score(self, score): if score not in self.SCORE_ATTRIBUTE_NAMES: raise ValueError("Scores must either be one of: '{}'".format(", ".join(self.SCORE_ATTRIBUTE_NAMES))) else: + score_attribute = getattr(self, score) self.main_score = getattr(self, score) diff --git a/pyproteininference/pipeline.py b/pyproteininference/pipeline.py index 2cad67f..a397457 100644 --- a/pyproteininference/pipeline.py +++ b/pyproteininference/pipeline.py @@ -210,9 +210,7 @@ def _as_list(x: Union[str, List[str]]) -> List[str]: else ( _as_list(self.decoy_files) if self.decoy_files - else _as_list(self.combined_files) - if self.combined_files - else list() + else _as_list(self.combined_files) if self.combined_files else list() ) ) extensions = set([os.path.splitext(x)[1].lower() for x in input_files]) diff --git a/pyproteininference/reader.py b/pyproteininference/reader.py index 327a5c8..d4cffc6 100644 --- a/pyproteininference/reader.py +++ b/pyproteininference/reader.py @@ -1111,13 +1111,13 @@ def __init__( if self.scoring_variable != self.Q_VALUE and self.scoring_variable != self.POSTERIOR_ERROR_PROB: self.load_custom_score = True logger.info( - "Pulling custom column based on parameter file input for score, Column: {}".format( + "Pulling custom column based on parameter file input for score, Attribute: {}".format( self.scoring_variable ) ) else: logger.info( - "Pulling no custom columns based on parameter file input for score, using standard Column: {}".format( + "Pulling no custom columns based on parameter file input for score, using standard Attribute: {}".format( self.scoring_variable ) ) diff --git a/pyproteininference/scoring.py b/pyproteininference/scoring.py index e5e8176..e61af6d 100644 --- a/pyproteininference/scoring.py +++ b/pyproteininference/scoring.py @@ -120,6 +120,8 @@ def score_psms(self, score_method="multiplicative_log"): >>> score.score_psms(score_method="best_peptide_per_protein") """ + self._validate_scoring_input() + if score_method not in self.SCORE_METHODS: raise ValueError( "score method '{}' is not a proper method. Score method must be one of the following: '{}'".format( @@ -472,3 +474,18 @@ def additive(self): self.data.protein_score = self.ADDITIVE self.data.short_protein_score = self.SHORT_ADDITIVE self.data.scored_proteins = all_scores + + def _validate_scoring_input(self): + validated_psm_scores = all(x.main_score is not None for x in self.data.get_psm_data()) + if validated_psm_scores: + logger.info( + "PSM scores validated. Score: {} read from file correctly for all PSMs".format( + self.data.parameter_file_object.psm_score + ) + ) + else: + raise ValueError( + "PSM scores not validated. Score: {} not read from file correctly for all PSMs".format( + self.data.parameter_file_object.psm_score + ) + )