Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spectronaut params #437

Merged
merged 9 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions proteobench/io/params/spectronaut.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import re
import pandas as pd
from proteobench.io.params import ProteoBenchParameters
from pathlib import Path


def clean_text(text):
text = re.sub(r"^[\s:,\t]+|[\s:,\t]+$", "", text)
return text


def extract_value(lines, search_term):
return next((clean_text(line.split(search_term)[1]) for line in lines if search_term in line), None)


def extract_mass_tolerance(lines, search_term):
value = next((clean_text(line.split(search_term)[1]) for line in lines if search_term in line), None)
value = "40ppm" if value == "System Default" else value
return value


def extract_value_regex(lines, search_term):
return next((clean_text(re.split(search_term, line)[1]) for line in lines if re.search(search_term, line)), None)


def read_spectronaut_settings(file_path) -> ProteoBenchParameters:
# check if file exists
try:
# Read in the log file
with open(file_path) as f:
lines = f.readlines()
except:
lines = [l for l in file_path.read().decode("utf-8").splitlines()]

# Remove any trailing newline characters from each line
lines = [line.strip() for line in lines]

params = ProteoBenchParameters()
params.software_name = "Spectronaut"
params.software_version = lines[0].split()[1]
params.search_engine = "Spectronaut"
params.search_engine_version = params.software_version

lines = [re.sub(r"^[\s│├─└]*", "", line).strip() for line in lines]

params.ident_fdr_psm = extract_value(lines, "Precursor Qvalue Cutoff:")
params.ident_fdr_peptide = None
params.ident_fdr_protein = extract_value(lines, "Protein Qvalue Cutoff (Experiment):")
params.enable_match_between_runs = None
params.precursor_mass_tolerance = extract_mass_tolerance(lines, "MS1 Mass Tolerance Strategy:")
params.fragment_mass_tolerance = extract_mass_tolerance(lines, "MS2 Mass Tolerance Strategy:")
params.enzyme = extract_value(lines, "Enzymes / Cleavage Rules:")
params.allowed_miscleavages = extract_value(lines, "Missed Cleavages:")
params.max_peptide_length = extract_value(lines, "Max Peptide Length:")
params.min_peptide_length = extract_value(lines, "Min Peptide Length:")
params.fixed_mods = extract_value(lines, "Fixed Modifications:")
params.variable_mods = extract_value_regex(lines, "^Variable Modifications:")
params.max_mods = extract_value(lines, "Max Variable Modifications:")
params.min_precursor_charge = extract_value(lines, "Peptide Charge:")
params.max_precursor_charge = extract_value(lines, "Peptide Charge:")
params.scan_window = extract_value(lines, "XIC IM Extraction Window:")
params.quantification_method = extract_value(
lines, "Quantity MS Level:"
) # "Quantity MS Level:" or "Protein LFQ Method:" or "Quantity Type:"
params.second_pass = extract_value(lines, "directDIA Workflow:")
params.protein_inference = extract_value(lines, "Inference Algorithm:") # or Protein Inference Workflow:
params.predictors_library = extract_value(lines, "Hybrid (DDA + DIA) Library").replace(":", "").strip()

return params


if __name__ == "__main__":
fnames = ["../../../test/params/spectronaut_Experiment1_ExperimentSetupOverview_BGS_Factory_Settings.txt"]

for file in fnames:
parameters = read_spectronaut_settings(file)
actual = pd.Series(parameters.__dict__)
actual.to_csv(Path(file).with_suffix(".csv"))
print(parameters)
3 changes: 2 additions & 1 deletion proteobench/modules/quant_base/quant_base_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from proteobench.io.params.alphadia import extract_params as extract_params_alphadia
from proteobench.io.params.alphapept import extract_params as extract_params_alphapept
from proteobench.io.params.diann import extract_params as extract_params_diann
from proteobench.io.params.spectronaut import read_spectronaut_settings as extract_params_spectronaut
from proteobench.io.params.fragger import extract_params as extract_params_fragger
from proteobench.io.params.i2masschroq import (
extract_params as extract_params_i2masschroq,
Expand Down Expand Up @@ -74,7 +75,7 @@ def __init__(self, token: str = None, proteobench_repo_name: str = "", proteobot
"AlphaDIA": extract_params_alphadia,
"FragPipe (DIA-NN quant)": extract_params_fragger,
"MSAID": extract_params_msaid,
# "Spectronaut": extract_params_spectronaut
"Spectronaut": extract_params_spectronaut,
}

def is_implemented(self) -> bool:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
,0
software_name,Spectronaut
software_version,19.2.240905.62635
search_engine,Spectronaut
search_engine_version,19.2.240905.62635
ident_fdr_psm,0.01
ident_fdr_peptide,
ident_fdr_protein,0.01
enable_match_between_runs,
precursor_mass_tolerance,40ppm
fragment_mass_tolerance,40ppm
enzyme,Trypsin/P
allowed_miscleavages,2
min_peptide_length,7
max_peptide_length,52
fixed_mods,Carbamidomethyl (C)
variable_mods,"Acetyl (Protein N-term), Oxidation (M)"
max_mods,5
min_precursor_charge,False
max_precursor_charge,False
scan_window,Dynamic
quantification_method,MS2
second_pass,directDIA+ (Deep)
protein_inference,IDPicker
predictors_library,False
Loading
Loading