-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #437 from Proteobench/spectronaut_params
Spectronaut params
- Loading branch information
Showing
5 changed files
with
2,802 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import re | ||
import pandas as pd | ||
from proteobench.io.params import ProteoBenchParameters | ||
from pathlib import Path | ||
|
||
|
||
def clean_text(text): | ||
text = re.sub(r"^[\s:,\t]+|[\s:,\t]+$", "", text) | ||
return text | ||
|
||
|
||
def extract_value(lines, search_term): | ||
return next((clean_text(line.split(search_term)[1]) for line in lines if search_term in line), None) | ||
|
||
|
||
def extract_mass_tolerance(lines, search_term): | ||
value = next((clean_text(line.split(search_term)[1]) for line in lines if search_term in line), None) | ||
value = "40ppm" if value == "System Default" else value | ||
return value | ||
|
||
|
||
def extract_value_regex(lines, search_term): | ||
return next((clean_text(re.split(search_term, line)[1]) for line in lines if re.search(search_term, line)), None) | ||
|
||
|
||
def read_spectronaut_settings(file_path) -> ProteoBenchParameters: | ||
# check if file exists | ||
try: | ||
# Read in the log file | ||
with open(file_path) as f: | ||
lines = f.readlines() | ||
except: | ||
lines = [l for l in file_path.read().decode("utf-8").splitlines()] | ||
|
||
# Remove any trailing newline characters from each line | ||
lines = [line.strip() for line in lines] | ||
|
||
params = ProteoBenchParameters() | ||
params.software_name = "Spectronaut" | ||
params.software_version = lines[0].split()[1] | ||
params.search_engine = "Spectronaut" | ||
params.search_engine_version = params.software_version | ||
|
||
lines = [re.sub(r"^[\s│├─└]*", "", line).strip() for line in lines] | ||
|
||
params.ident_fdr_psm = extract_value(lines, "Precursor Qvalue Cutoff:") | ||
params.ident_fdr_peptide = None | ||
params.ident_fdr_protein = extract_value(lines, "Protein Qvalue Cutoff (Experiment):") | ||
params.enable_match_between_runs = None | ||
params.precursor_mass_tolerance = extract_mass_tolerance(lines, "MS1 Mass Tolerance Strategy:") | ||
params.fragment_mass_tolerance = extract_mass_tolerance(lines, "MS2 Mass Tolerance Strategy:") | ||
params.enzyme = extract_value(lines, "Enzymes / Cleavage Rules:") | ||
params.allowed_miscleavages = extract_value(lines, "Missed Cleavages:") | ||
params.max_peptide_length = extract_value(lines, "Max Peptide Length:") | ||
params.min_peptide_length = extract_value(lines, "Min Peptide Length:") | ||
params.fixed_mods = extract_value(lines, "Fixed Modifications:") | ||
params.variable_mods = extract_value_regex(lines, "^Variable Modifications:") | ||
params.max_mods = extract_value(lines, "Max Variable Modifications:") | ||
params.min_precursor_charge = extract_value(lines, "Peptide Charge:") | ||
params.max_precursor_charge = extract_value(lines, "Peptide Charge:") | ||
params.scan_window = extract_value(lines, "XIC IM Extraction Window:") | ||
params.quantification_method = extract_value( | ||
lines, "Quantity MS Level:" | ||
) # "Quantity MS Level:" or "Protein LFQ Method:" or "Quantity Type:" | ||
params.second_pass = extract_value(lines, "directDIA Workflow:") | ||
params.protein_inference = extract_value(lines, "Inference Algorithm:") # or Protein Inference Workflow: | ||
params.predictors_library = extract_value(lines, "Hybrid (DDA + DIA) Library").replace(":", "").strip() | ||
|
||
return params | ||
|
||
|
||
if __name__ == "__main__": | ||
fnames = ["../../../test/params/spectronaut_Experiment1_ExperimentSetupOverview_BGS_Factory_Settings.txt"] | ||
|
||
for file in fnames: | ||
parameters = read_spectronaut_settings(file) | ||
actual = pd.Series(parameters.__dict__) | ||
actual.to_csv(Path(file).with_suffix(".csv")) | ||
print(parameters) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
test/params/spectronaut_Experiment1_ExperimentSetupOverview_BGS_Factory_Settings.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
,0 | ||
software_name,Spectronaut | ||
software_version,19.2.240905.62635 | ||
search_engine,Spectronaut | ||
search_engine_version,19.2.240905.62635 | ||
ident_fdr_psm,0.01 | ||
ident_fdr_peptide, | ||
ident_fdr_protein,0.01 | ||
enable_match_between_runs, | ||
precursor_mass_tolerance,40ppm | ||
fragment_mass_tolerance,40ppm | ||
enzyme,Trypsin/P | ||
allowed_miscleavages,2 | ||
min_peptide_length,7 | ||
max_peptide_length,52 | ||
fixed_mods,Carbamidomethyl (C) | ||
variable_mods,"Acetyl (Protein N-term), Oxidation (M)" | ||
max_mods,5 | ||
min_precursor_charge,False | ||
max_precursor_charge,False | ||
scan_window,Dynamic | ||
quantification_method,MS2 | ||
second_pass,directDIA+ (Deep) | ||
protein_inference,IDPicker | ||
predictors_library,False |
Oops, something went wrong.