Skip to content

Commit

Permalink
Merge pull request #302 from Proteobench/fragpipe_parse_win_path_on_unix
Browse files Browse the repository at this point in the history
🐛 Fix FragPipe parameter parsing
  • Loading branch information
RobbinBouwmeester authored May 25, 2024
2 parents 715e7b0 + 568da5b commit 8b1ea25
Show file tree
Hide file tree
Showing 6 changed files with 713 additions and 9 deletions.
16 changes: 13 additions & 3 deletions proteobench/io/params/fragger.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import re
from collections import namedtuple
from io import BytesIO
from pathlib import Path
from pathlib import PureWindowsPath

import pandas as pd

Expand Down Expand Up @@ -79,7 +79,7 @@ def extract_params(file: BytesIO) -> ProteoBenchParameters:
params.search_engine = "MSFragger"

msfragger_executable = fragpipe_params.loc["fragpipe-config.bin-msfragger"]
msfragger_executable = Path(msfragger_executable).name
msfragger_executable = PureWindowsPath(msfragger_executable).name
match = re.search(VERSION_NO_PATTERN, msfragger_executable)

if match:
Expand Down Expand Up @@ -133,9 +133,19 @@ def extract_params(file: BytesIO) -> ProteoBenchParameters:
_, data = read_fragpipe_workflow(f)
df = pd.DataFrame.from_records(data, columns=Parameter._fields).set_index(Parameter._fields[0])
df.to_csv(file.with_suffix(".csv"))
with open(file, "rb") as f:
params = extract_params(f)
pprint(params.__dict__)
series = pd.Series(params.__dict__)
series.to_csv(file.parent / f"{file.stem}_extracted_params.csv")

file = pathlib.Path("../../../test/params/fragpipe_win_paths.workflow")
with open(file, "rb") as f:
_, data = read_fragpipe_workflow(f)
df = pd.DataFrame.from_records(data, columns=Parameter._fields).set_index(Parameter._fields[0])
df.to_csv(file.with_suffix(".csv"))
with open(file, "rb") as f:
params = extract_params(f)
pprint(params.__dict__)
series = pd.Series(params.__dict__)
series.to_csv(file.parent / "fragger_extracted_params.csv")
series.to_csv(file.parent / f"{file.stem}_extracted_params.csv")
File renamed without changes.
328 changes: 328 additions & 0 deletions test/params/fragpipe_win_paths.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
name,value,comment
database.db-path,Q\:\\MISC_PERSONAL\\Bart\\Proteobench\\20240417\\2024-04-17-decoys-ProteoBenchFASTA_DDAQuantification.fasta.fas,
crystalc.run-crystalc,false,
database.decoy-tag,rev_,
diann.fragpipe.cmd-opts,,
diann.generate-msstats,true,
diann.heavy,,
diann.library,,
diann.light,,
diann.medium,,
diann.q-value,0.01,
diann.quantification-strategy,3,
diann.quantification-strategy-2,QuantUMS (high accuracy),
diann.run-dia-nn,false,
diann.run-dia-plex,false,
diann.run-specific-protein-q-value,false,
diann.unrelated-runs,false,
diann.use-predicted-spectra,true,
diaumpire.AdjustFragIntensity,true,
diaumpire.BoostComplementaryIon,false,
diaumpire.CorrThreshold,0,
diaumpire.DeltaApex,0.2,
diaumpire.ExportPrecursorPeak,false,
diaumpire.Q1,true,
diaumpire.Q2,true,
diaumpire.Q3,true,
diaumpire.RFmax,500,
diaumpire.RPmax,25,
diaumpire.RTOverlap,0.3,
diaumpire.SE.EstimateBG,false,
diaumpire.SE.IsoPattern,0.3,
diaumpire.SE.MS1PPM,10,
diaumpire.SE.MS2PPM,20,
diaumpire.SE.MS2SN,1.1,
diaumpire.SE.MassDefectFilter,true,
diaumpire.SE.MassDefectOffset,0.1,
diaumpire.SE.NoMissedScan,1,
diaumpire.SE.SN,1.1,
diaumpire.run-diaumpire,false,
fpop.fpop-tmt,false,
fpop.label_control,,
fpop.label_fpop,,
fpop.region_size,1,
fpop.run-fpop,false,
fpop.subtract-control,false,
fragpipe-config.bin-diann,C\:\\Users\\brvpuyve\\Downloads\\FragPipe-21.1\\fragpipe\\tools\\diann\\1.8.2_beta_8\\win\\DiaNN.exe,
fragpipe-config.bin-ionquant,C\:\\Users\\brvpuyve\\Downloads\\FragPipe-21.1\\fragpipe\\tools\\IonQuant-1.10.12.jar,
fragpipe-config.bin-msfragger,C\:\\Users\\brvpuyve\\Downloads\\FragPipe-21.1\\fragpipe\\tools\\MSFragger-4.0\\MSFragger-4.0.jar,
fragpipe-config.bin-philosopher,C\:\\Users\\brvpuyve\\Downloads\\philosopher_v5.1.0_windows_amd64\\philosopher.exe,
fragpipe-config.bin-python,C\:\\Users\\brvpuyve\\Anaconda3\\python.exe,
freequant.mz-tol,10,
freequant.rt-tol,0.4,
freequant.run-freequant,false,
ionquant.excludemods,,
ionquant.heavy,,
ionquant.imtol,0.05,
ionquant.ionfdr,0.01,
ionquant.light,,
ionquant.locprob,0.75,
ionquant.maxlfq,1,
ionquant.mbr,1,
ionquant.mbrimtol,0.05,
ionquant.mbrmincorr,0,
ionquant.mbrrttol,1,
ionquant.mbrtoprun,10,
ionquant.medium,,
ionquant.minfreq,0,
ionquant.minions,2,
ionquant.minisotopes,2,
ionquant.minscans,3,
ionquant.mztol,10,
ionquant.normalization,1,
ionquant.peptidefdr,1,
ionquant.proteinfdr,1,
ionquant.requantify,1,
ionquant.rttol,0.4,
ionquant.run-ionquant,true,
ionquant.tp,0,
ionquant.uniqueness,0,
ionquant.use-labeling,false,
ionquant.use-lfq,true,
ionquant.writeindex,0,
msbooster.predict-rt,true,
msbooster.predict-spectra,true,
msbooster.run-msbooster,true,
msbooster.use-correlated-features,false,
msfragger.Y_type_masses,,
msfragger.activation_types,all,
msfragger.allowed_missed_cleavage_1,2,
msfragger.allowed_missed_cleavage_2,2,
msfragger.calibrate_mass,2,
msfragger.check_spectral_files,true,
msfragger.clip_nTerm_M,true,
msfragger.deisotope,1,
msfragger.delta_mass_exclude_ranges,"(-1.5,3.5)",
msfragger.deneutralloss,1,
msfragger.diagnostic_fragments,,
msfragger.diagnostic_intensity_filter,0,
msfragger.digest_max_length,50,
msfragger.digest_min_length,7,
msfragger.fragment_ion_series,"b,y",
msfragger.fragment_mass_tolerance,0.02,
msfragger.fragment_mass_units,0,
msfragger.group_variable,0,
msfragger.intensity_transform,0,
msfragger.ion_series_definitions,,
msfragger.isotope_error,0/1/2,
msfragger.labile_search_mode,off,
msfragger.localize_delta_mass,false,
msfragger.mass_diff_to_variable_mod,0,
msfragger.mass_offsets,0,
msfragger.mass_offsets_detailed,,
msfragger.max_fragment_charge,2,
msfragger.max_variable_mods_combinations,5000,
msfragger.max_variable_mods_per_peptide,3,
msfragger.min_fragments_modelling,2,
msfragger.min_matched_fragments,4,
msfragger.min_sequence_matches,2,
msfragger.minimum_peaks,15,
msfragger.minimum_ratio,0.01,
msfragger.misc.fragger.clear-mz-hi,0,
msfragger.misc.fragger.clear-mz-lo,0,
msfragger.misc.fragger.digest-mass-hi,5000,
msfragger.misc.fragger.digest-mass-lo,500,
msfragger.misc.fragger.enzyme-dropdown-1,stricttrypsin,
msfragger.misc.fragger.enzyme-dropdown-2,null,
msfragger.misc.fragger.precursor-charge-hi,4,
msfragger.misc.fragger.precursor-charge-lo,1,
msfragger.misc.fragger.remove-precursor-range-hi,1.5,
msfragger.misc.fragger.remove-precursor-range-lo,-1.5,
msfragger.misc.slice-db,1,
msfragger.num_enzyme_termini,2,
msfragger.output_format,pepXML_pin,
msfragger.output_max_expect,50,
msfragger.output_report_topN,1,
msfragger.output_report_topN_dda_plus,5,
msfragger.output_report_topN_dia1,5,
msfragger.override_charge,false,
msfragger.precursor_mass_lower,-20,
msfragger.precursor_mass_mode,selected,
msfragger.precursor_mass_units,1,
msfragger.precursor_mass_upper,20,
msfragger.precursor_true_tolerance,10,
msfragger.precursor_true_units,1,
msfragger.remainder_fragment_masses,,
msfragger.remove_precursor_peak,1,
msfragger.report_alternative_proteins,true,
msfragger.require_precursor,true,
msfragger.restrict_deltamass_to,all,
msfragger.reuse_dia_fragment_peaks,false,
msfragger.run-msfragger,true,
msfragger.search_enzyme_cut_1,KR,
msfragger.search_enzyme_cut_2,,
msfragger.search_enzyme_name_1,stricttrypsin,
msfragger.search_enzyme_name_2,null,
msfragger.search_enzyme_nocut_1,,
msfragger.search_enzyme_nocut_2,,
msfragger.search_enzyme_sense_1,C,
msfragger.search_enzyme_sense_2,C,
msfragger.table.fix-mods,"0.0,C-Term Peptide,true,-1; 0.0,N-Term Peptide,true,-1; 0.0,C-Term Protein,true,-1; 0.0,N-Term Protein,true,-1; 0.0,G (glycine),true,-1; 0.0,A (alanine),true,-1; 0.0,S (serine),true,-1; 0.0,P (proline),true,-1; 0.0,V (valine),true,-1; 0.0,T (threonine),true,-1; 57.02146,C (cysteine),true,-1; 0.0,L (leucine),true,-1; 0.0,I (isoleucine),true,-1; 0.0,N (asparagine),true,-1; 0.0,D (aspartic acid),true,-1; 0.0,Q (glutamine),true,-1; 0.0,K (lysine),true,-1; 0.0,E (glutamic acid),true,-1; 0.0,M (methionine),true,-1; 0.0,H (histidine),true,-1; 0.0,F (phenylalanine),true,-1; 0.0,R (arginine),true,-1; 0.0,Y (tyrosine),true,-1; 0.0,W (tryptophan),true,-1; 0.0,B ,true,-1; 0.0,J,true,-1; 0.0,O,true,-1; 0.0,U,true,-1; 0.0,X,true,-1; 0.0,Z,true,-1",
msfragger.table.var-mods,"15.9949,M,true,3; 42.0106,[^,true,1; 79.96633,STY,false,3; -17.0265,nQnC,false,1; -18.0106,nE,false,1; 4.025107,K,false,2; 6.020129,R,false,2; 8.014199,K,false,2; 10.008269,R,false,2; 0.0,site_10,false,1; 0.0,site_11,false,1; 0.0,site_12,false,1; 0.0,site_13,false,1; 0.0,site_14,false,1; 0.0,site_15,false,1; 0.0,site_16,false,1",
msfragger.track_zero_topN,0,
msfragger.use_all_mods_in_first_search,false,
msfragger.use_detailed_offsets,false,
msfragger.use_topN_peaks,150,
msfragger.write_calibrated_mzml,false,
msfragger.write_uncalibrated_mgf,false,
msfragger.zero_bin_accept_expect,0,
msfragger.zero_bin_mult_expect,1,
opair.activation1,HCD,
opair.activation2,ETD,
opair.filterOxonium,true,
opair.glyco_db,,
opair.max_glycans,4,
opair.max_isotope_error,2,
opair.min_isotope_error,0,
opair.ms1_tol,20,
opair.ms2_tol,20,
opair.oxonium_filtering_file,,
opair.oxonium_minimum_intensity,0.05,
opair.reverse_scan_order,false,
opair.run-opair,false,
opair.single_scan_type,false,
peptide-prophet.cmd-opts,--decoyprobs --ppm --accmass --nonparam --expectscore,
peptide-prophet.combine-pepxml,false,
peptide-prophet.run-peptide-prophet,false,
percolator.cmd-opts,--only-psms --no-terminate --post-processing-tdc,
percolator.keep-tsv-files,false,
percolator.min-prob,0.5,
percolator.run-percolator,true,
phi-report.dont-use-prot-proph-file,false,
phi-report.filter,--sequential --prot 0.01,
phi-report.pep-level-summary,false,
phi-report.philosoher-msstats,false,
phi-report.print-decoys,false,
phi-report.prot-level-summary,false,
phi-report.remove-contaminants,false,
phi-report.run-report,true,
protein-prophet.cmd-opts,--maxppmdiff 2000000,
protein-prophet.run-protein-prophet,true,
ptmprophet.cmdline,"KEEPOLD STATIC EM\=1 NIONS\=b M\:15.9949,n\:42.0106 MINPROB\=0.5",
ptmprophet.run-ptmprophet,false,
ptmshepherd.adv_params,false,
ptmshepherd.annotation-common,false,
ptmshepherd.annotation-custom,false,
ptmshepherd.annotation-glyco,false,
ptmshepherd.annotation-unimod,true,
ptmshepherd.annotation_file,,
ptmshepherd.annotation_tol,0.01,
ptmshepherd.cap_y_ions,,
ptmshepherd.decoy_type,1,
ptmshepherd.diag_ions,,
ptmshepherd.diagmine_diagMinFoldChange,3.0,
ptmshepherd.diagmine_diagMinSpecDiff,00.2,
ptmshepherd.diagmine_fragMinFoldChange,3.0,
ptmshepherd.diagmine_fragMinPropensity,00.1,
ptmshepherd.diagmine_fragMinSpecDiff,00.1,
ptmshepherd.diagmine_minIonsPerSpec,2,
ptmshepherd.diagmine_minPeps,25,
ptmshepherd.diagmine_pepMinFoldChange,3.0,
ptmshepherd.diagmine_pepMinSpecDiff,00.2,
ptmshepherd.glyco_adducts,,
ptmshepherd.glyco_fdr,1.00,
ptmshepherd.glyco_isotope_max,3,
ptmshepherd.glyco_isotope_min,-1,
ptmshepherd.glyco_ppm_tol,50,
ptmshepherd.glycodatabase,,
ptmshepherd.histo_smoothbins,2,
ptmshepherd.iontype_a,false,
ptmshepherd.iontype_b,true,
ptmshepherd.iontype_c,false,
ptmshepherd.iontype_x,false,
ptmshepherd.iontype_y,true,
ptmshepherd.iontype_z,false,
ptmshepherd.localization_allowed_res,,
ptmshepherd.localization_background,4,
ptmshepherd.max_adducts,0,
ptmshepherd.n_glyco,true,
ptmshepherd.normalization-psms,true,
ptmshepherd.normalization-scans,false,
ptmshepherd.output_extended,false,
ptmshepherd.peakpicking_mass_units,0,
ptmshepherd.peakpicking_minPsm,10,
ptmshepherd.peakpicking_promRatio,0.3,
ptmshepherd.peakpicking_width,0.002,
ptmshepherd.precursor_mass_units,0,
ptmshepherd.precursor_tol,0.01,
ptmshepherd.print_decoys,false,
ptmshepherd.prob_dhexOx,"2,0.5,0.1",
ptmshepherd.prob_dhexY,"2,0.5",
ptmshepherd.prob_neuacOx,"2,0.05,0.2",
ptmshepherd.prob_neugcOx,"2,0.05,0.2",
ptmshepherd.prob_phosphoOx,"2,0.05,0.2",
ptmshepherd.prob_regY,"5,0.5",
ptmshepherd.prob_sulfoOx,"2,0.05,0.2",
ptmshepherd.remainder_masses,,
ptmshepherd.remove_glycan_delta_mass,true,
ptmshepherd.run-shepherd,false,
ptmshepherd.run_diagextract_mode,false,
ptmshepherd.run_diagmine_mode,false,
ptmshepherd.run_glyco_mode,false,
ptmshepherd.spectra_maxfragcharge,2,
ptmshepherd.spectra_ppmtol,20,
ptmshepherd.varmod_masses,,
quantitation.run-label-free-quant,true,
run-psm-validation,true,
run-validation-tab,true,
saintexpress.fragpipe.cmd-opts,,
saintexpress.max-replicates,10,
saintexpress.run-saint-express,false,
saintexpress.virtual-controls,100,
speclibgen.easypqp.extras.max_delta_ppm,15,
speclibgen.easypqp.extras.max_delta_unimod,0.02,
speclibgen.easypqp.extras.rt_lowess_fraction,0,
speclibgen.easypqp.fragment.a,false,
speclibgen.easypqp.fragment.b,true,
speclibgen.easypqp.fragment.c,false,
speclibgen.easypqp.fragment.x,false,
speclibgen.easypqp.fragment.y,true,
speclibgen.easypqp.fragment.z,false,
speclibgen.easypqp.im-cal,Automatic selection of a run as reference IM,
speclibgen.easypqp.neutral_loss,false,
speclibgen.easypqp.rt-cal,noiRT,
speclibgen.easypqp.select-file.text,,
speclibgen.easypqp.select-im-file.text,,
speclibgen.keep-intermediate-files,false,
speclibgen.run-speclibgen,false,
tab-run.delete_calibrated_mzml,false,
tab-run.delete_temp_files,false,
tab-run.sub_mzml_prob_threshold,0.5,
tab-run.write_sub_mzml,false,
tmtintegrator.add_Ref,-1,
tmtintegrator.aggregation_method,0,
tmtintegrator.allow_overlabel,true,
tmtintegrator.allow_unlabeled,true,
tmtintegrator.best_psm,true,
tmtintegrator.channel_num,TMT-6,
tmtintegrator.extraction_tool,IonQuant,
tmtintegrator.glyco_qval,-1,
tmtintegrator.groupby,0,
tmtintegrator.log2transformed,true,
tmtintegrator.max_pep_prob_thres,0,
tmtintegrator.min_ntt,0,
tmtintegrator.min_pep_prob,0.9,
tmtintegrator.min_percent,0.05,
tmtintegrator.min_purity,0.5,
tmtintegrator.min_site_prob,-1,
tmtintegrator.mod_tag,none,
tmtintegrator.ms1_int,true,
tmtintegrator.outlier_removal,true,
tmtintegrator.print_RefInt,false,
tmtintegrator.prot_exclude,none,
tmtintegrator.prot_norm,0,
tmtintegrator.psm_norm,false,
tmtintegrator.quant_level,2,
tmtintegrator.ref_tag,Bridge,
tmtintegrator.run-tmtintegrator,false,
tmtintegrator.tolerance,20,
tmtintegrator.top3_pep,true,
tmtintegrator.unique_gene,0,
tmtintegrator.unique_pep,false,
tmtintegrator.use_glycan_composition,false,
workdir,Q\:\\MISC_PERSONAL\\Bart\\Proteobench\\20240417,
workflow.input.data-type.im-ms,false,
workflow.input.data-type.regular-ms,true,
workflow.misc.save-sdrf,true,
workflow.ram,0,
workflow.threads,47,
Loading

0 comments on commit 8b1ea25

Please sign in to comment.