Add timsTOF2024 model

compomics · Feb 9, 2024 · 362f862 · 362f862
1 parent 6dbb709
commit 362f862
Show file tree

Hide file tree

Showing 3 changed files with 33 additions and 11 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ['3.7', '3.8', '3.9', '3.10']
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
 
     steps:
     - uses: actions/checkout@v2

diff --git a/README.md b/README.md
@@ -325,7 +325,8 @@ next to the predictions for singly charged b- and y-ions.
 | CIDch2 | CID | Linear ion trap | Tryptic digest |
 | Immuno-HCD | HCD | Orbitrap | Immunopeptides |
 | CID-TMT | CID | Linear ion trap | Tryptic digest, TMT-labeled |
-
+| timsTOF2023 | CID | Ion mobility quadrupole time-of-flight | Tryptic-, elastase digest, immuno class 1 |
+| timsTOF2024 | CID | Ion mobility quadrupole time-of-flight | Tryptic-, elastase digest, immuno class 1 & class 2 |
 
 ### Models, version numbers, and the train and test datasets used to create each model
 
@@ -342,6 +343,8 @@ next to the predictions for singly charged b- and y-ions.
 | HCD2021 | v20210416 | [Combined dataset] (520 579) | [PXD008034](https://doi.org/10.1016/j.jprot.2017.12.006) (35 269)  | 0.932361
 | Immuno-HCD | v20210316 | [Combined dataset] (460 191) | [PXD005231 (HLA-I)](https://doi.org/10.1101/098780) (46 753) <br>[PXD020011 (HLA-II)](https://doi.org/10.3389/fimmu.2020.01981 ) (23 941) | 0.963736<br>0.942383
 | CID-TMT | v20220104 | [in-house dataset] (72 138) | [PXD005890](https://doi.org/10.1021/acs.jproteome.7b00091) (69 768) | 0.851085
+| timsTOF2023 | v20230912 | [Combined dataset] (234 973) | PXD043026<br>PXD046535<br>PXD046543 (13 012) | 0.892540 (tryptic)<br>0.871258 (elastase)<br>0.899834 (class I)<br>0.635548 (class II)
+| timsTOF2024 | v20240105 | [Combined dataset]  (480 024) | PXD043026<br>PXD046535<br>PXD046543<br>PXD038782 (25 265)  | 0.883270 (tryptic)<br>0.814374 (elastase)<br>0.887192 (class I)<br>0.847951 (class II)
 
 
 To train custom MS²PIP models, please refer to [Training new MS²PIP models](http://compomics.github.io/projects/ms2pip_c/wiki/Training-new-MS2PIP-models.html) on our Wiki pages.
diff --git a/ms2pip/ms2pipC.py b/ms2pip/ms2pipC.py
@@ -180,7 +180,7 @@
             "model_20220104_CID_TMT_Y.xgboost": "299539179ca55d4ac82e9aed6a4e0bd134a9a41e",
         },
     },
-    "timsTOF": {
+    "timsTOF2023": {
         "id": 12,
         "ion_types": ["B", "Y"],
         "peaks_version": "general",
@@ -194,8 +194,25 @@
             "model_20230912_timsTOF_Y.xgboost": "8edd87e0fba5f338d0a0881b5afbcf2f48ec5268",
         },
     },
+    "timsTOF2024": {
+        "id": 13,
+        "ion_types": ["B", "Y"],
+        "peaks_version": "general",
+        "features_version": "normal",
+        "xgboost_model_files": {
+            "b": "model_20240105_timsTOF_B.xgboost",
+            "y": "model_20240105_timsTOF_Y.xgboost",
+        },
+        "model_hash": {
+            "model_20240105_timsTOF_B.xgboost": "d70e145c15cf2bfa30968077a68409699b2fa541",
+            "model_20240105_timsTOF_Y.xgboost": "3f0414ee1ad7cff739e0d6242e25bfc22b6ebfe5",
+        },
+    },
 }
+
+
 MODELS["HCD"] = MODELS["HCD2021"]
+MODELS["timsTOF"] = MODELS["timsTOF2024"]
 
 
 def process_peptides(worker_num, data, afile, modfile, modfile2, PTMmap, model):
@@ -340,7 +357,9 @@ def process_spectra(
         ces = specdict["ce"]
     else:
         specdict = (
-            data[["spec_id", "peptide", "modifications", "charge"]].set_index("spec_id").to_dict()
+            data[["spec_id", "peptide", "modifications", "charge"]]
+            .set_index("spec_id")
+            .to_dict()
         )
     peptides = specdict["peptide"]
     modifications = specdict["modifications"]
@@ -449,9 +468,7 @@ def process_spectra(
             if "ce" in data.columns:
                 dvectors.append(
                     np.array(
-                        ms2pip_pyx.get_vector_ce(
-                            peptide, modpeptide, charge, colen
-                        ),
+                        ms2pip_pyx.get_vector_ce(peptide, modpeptide, charge, colen),
                         dtype=np.uint16,
                     )
                 )  # SD: added collision energy
@@ -825,7 +842,9 @@ def run(self):
                                 index=True,
                                 lineterminator="\n",
                             )
-                        except TypeError:  # Pandas < 1.5 (Required for Python 3.7 support)
+                        except (
+                            TypeError
+                        ):  # Pandas < 1.5 (Required for Python 3.7 support)
                             correlations.to_csv(
                                 corr_filename,
                                 index=True,
@@ -966,9 +985,9 @@ def _write_vector_file(self, results):
 
             # dtargets is a dict, containing targets for every ion type (keys are int)
             for i, t in dtargets.items():
-                df[
-                    "targets_{}".format(MODELS[self.model]["ion_types"][i])
-                ] = np.concatenate(t, axis=None)
+                df["targets_{}".format(MODELS[self.model]["ion_types"][i])] = (
+                    np.concatenate(t, axis=None)
+                )
             df["psmid"] = psmids
 
             all_results.append(df)