From 8fd129993a92d85790e038f1271406d18657a014 Mon Sep 17 00:00:00 2001 From: RalfG Date: Thu, 31 Oct 2024 17:27:27 +0100 Subject: [PATCH] Fix peptidoform modification offset and mass (fixes #100) --- .../PXD053286-G1_PTMiprophet.slice.pep.xml | 90 +++++++++++++++++++ psm_utils/io/pepxml.py | 13 ++- tests/test_io/test_pepxml.py | 5 +- 3 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 example_files/PXD053286-G1_PTMiprophet.slice.pep.xml diff --git a/example_files/PXD053286-G1_PTMiprophet.slice.pep.xml b/example_files/PXD053286-G1_PTMiprophet.slice.pep.xml new file mode 100644 index 0000000..daa36d1 --- /dev/null +++ b/example_files/PXD053286-G1_PTMiprophet.slice.pep.xml @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/psm_utils/io/pepxml.py b/psm_utils/io/pepxml.py index 5a84b87..da591ba 100644 --- a/psm_utils/io/pepxml.py +++ b/psm_utils/io/pepxml.py @@ -7,7 +7,7 @@ from pathlib import Path from typing import List, Optional, Union -from pyteomics import pepxml, proforma +from pyteomics import mass, pepxml, proforma from psm_utils.io._base_classes import ReaderBase from psm_utils.peptidoform import Peptidoform @@ -81,13 +81,20 @@ def _parse_peptidoform(peptide: str, modifications: List[dict], charge: Optional n_term = [] c_term = [] for mod in modifications: - mod_tag = proforma.process_tag_tokens(f"{mod['mass']:+}") + # Round mass modification to 6 decimal places, precision from UniMod if mod["position"] == 0: + mod_tag = proforma.process_tag_tokens(f"{mod['mass']:+.6f}") n_term.append(mod_tag) elif mod["position"] == len(peptide) + 1: + mod_tag = proforma.process_tag_tokens(f"{mod['mass']:+.6f}") c_term.append(mod_tag) else: - modifications_dict[mod["position"]].append(mod_tag) + # Convert 1-based to 0-based position + position = mod["position"] - 1 + # Sequence modifications are written as residue mass + modification mass + mod_mass = mod["mass"] - mass.std_aa_mass[peptide[position]] + mod_tag = proforma.process_tag_tokens(f"{mod_mass:+.6f}") + modifications_dict[position].append(mod_tag) sequence = [(aa, modifications_dict[i] or None) for i, aa in enumerate(peptide)] properties = { diff --git a/tests/test_io/test_pepxml.py b/tests/test_io/test_pepxml.py index 1e1d388..c31cd54 100644 --- a/tests/test_io/test_pepxml.py +++ b/tests/test_io/test_pepxml.py @@ -1,5 +1,6 @@ -from psm_utils.peptidoform import Peptidoform from psm_utils.io.pepxml import PepXMLReader +from psm_utils.peptidoform import Peptidoform + class TestPepXMLReader: def test_parse_peptidoform(self): @@ -21,7 +22,7 @@ def test_parse_peptidoform(self): ], "charge": 2, }, - "out": Peptidoform("[+43.017841151532004]-STE[+181.014009]EQNGGGQK/2"), + "out": Peptidoform("[+43.017841]-ST[+79.966331]EEQNGGGQK/2"), }, { "in": {