Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dia #375

Merged
merged 15 commits into from
Sep 4, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"Modified.Sequence" = "Sequence"
"File.Name" = "Raw file"
"Protein.Names" = "Proteins"
"proforma" = "proforma"
"Precursor.Charge" = "Charge"
"Precursor.Quantity" = "Intensity"

Expand Down Expand Up @@ -30,3 +29,11 @@
[general]
contaminant_flag = "Cont_"
decoy_flag = true

[modification_mapper]
"parse_column" = "Modified.Sequence"
"before_aa" = false
"isalpha" = true
"isupper" = true
"pattern" = "\\[([^]]+)\\]"
"modification_dict" = {"UniMod:35" = "Oxidation", "UniMod:1" = "Acetyl", "UniMod:4" = "Carbamidomethyl"}
7 changes: 0 additions & 7 deletions proteobench/io/parsing/parse_ion.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,6 @@ def load_input_file(input_csv: str, input_format: str) -> pd.DataFrame:
input_data_frame["proforma"] = input_data_frame["Modified sequence"]
elif input_format == "DIA-NN":
input_data_frame = pd.read_csv(input_csv, low_memory=False, sep="\t")
input_data_frame["proforma"] = input_data_frame["modified_sequence"].apply(
lambda x: (
re.sub(r"^\(UniMod:(\d+)\)", r"[UNIMOD:\1]-", x)
if re.match(r"^\(UniMod:\d+\)", x)
else re.sub(r"\(UniMod:(\d+)\)", r"[UNIMOD:\1]", x)
)
)
return input_data_frame


Expand Down
8 changes: 4 additions & 4 deletions proteobench/io/parsing/parse_settings_ion.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@


class ParseSettingsBuilder:
def __init__(self, parse_settings_dir=None, ion_approach="dda"):
def __init__(self, parse_settings_dir=None, acquisition_method="dda"):
if parse_settings_dir is None:
parse_settings_dir = os.path.join(os.path.dirname(__file__), "io_parse_settings")
if ion_approach == "dda":
if acquisition_method == "dda":
self.PARSE_SETTINGS_FILES = {
"MaxQuant": os.path.join(parse_settings_dir, "parse_settings_maxquant.toml"),
"FragPipe": os.path.join(parse_settings_dir, "parse_settings_fragpipe.toml"),
Expand All @@ -26,7 +26,7 @@ def __init__(self, parse_settings_dir=None, ion_approach="dda"):
"Sage": os.path.join(parse_settings_dir, "parse_settings_sage.toml"),
"Custom": os.path.join(parse_settings_dir, "parse_settings_custom.toml"),
}
elif ion_approach == "dia":
elif acquisition_method == "dia":
self.PARSE_SETTINGS_FILES = {
"DIA-NN": os.path.join(parse_settings_dir, "parse_settings_diann.toml"),
# "Skyline": os.path.join(parse_settings_dir, "parse_settings_skyline.toml"),
Expand All @@ -37,7 +37,7 @@ def __init__(self, parse_settings_dir=None, ion_approach="dda"):
"Custom": os.path.join(parse_settings_dir, "parse_settings_custom.toml"),
}
else:
raise ValueError("Invalid ion approach. Please choose either 'dda' or 'dia'.")
raise ValueError("Invalid acquisition mode. Please choose either 'dda' or 'dia'.")
self.PARSE_SETTINGS_FILES_MODULE = os.path.join(parse_settings_dir, "module_settings.toml")
self.INPUT_FORMATS = list(self.PARSE_SETTINGS_FILES.keys())

Expand Down
59 changes: 51 additions & 8 deletions proteobench/modules/dia_quant_ion/dia_quant_ion_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,32 @@


class DIAQuantIonModule(QuantModule):
"""Object is used as a main interface with the Proteobench library within the module."""
"""DIA Quantification Module for Ion level Quantification."""

def __init__(
self,
token,
proteobot_repo_name="Proteobot/Results_quant_ion_DIA",
proteobench_repo_name="Proteobench/Results_quant_ion_DIA",
token: str,
proteobot_repo_name: str = "Proteobot/Results_quant_ion_DIA",
proteobench_repo_name: str = "Proteobench/Results_quant_ion_DIA",
):
"""
DIA Quantification Module for Ion level Quantification.

Parameters
----------
token
GitHub token for the user.
proteobot_repo_name
Name of the repository for pull requests and where new points are added.
proteobench_repo_name
Name of the repository where the benchmarking results will be stored.

Attributes
----------
precursor_name: str
Name of the precursor ion.

"""
super().__init__(token, proteobot_repo_name=proteobot_repo_name, proteobench_repo_name=proteobench_repo_name)
self.precursor_name = "precursor ion"

Expand All @@ -38,9 +56,29 @@ def is_implemented(self) -> bool:
def benchmarking(
self, input_file: str, input_format: str, user_input: dict, all_datapoints, default_cutoff_min_prec: int = 3
) -> tuple[DataFrame, DataFrame, DataFrame]:
"""Main workflow of the module. Used to benchmark workflow results."""
# Parse user config
"""
Main workflow of the module. Used to benchmark workflow results.

Parameters
----------
input_file
Path to the workflow output file.
input_format
Format of the workflow output file.
user_input
User provided parameters for plotting.
all_datapoints
DataFrame containing all datapoints from the proteobench repo.
default_cutoff_min_prec
Minimum number of runs an ion has to be identified in.

Returns
-------
tuple[DataFrame, DataFrame, DataFrame]
Tuple containing the intermediate data structure, all datapoints, and the input DataFrame.
"""

# Parse workflow output file
try:
input_df = load_input_file(input_file, input_format)
except pd.errors.ParserError as e:
Expand All @@ -50,8 +88,9 @@ def benchmarking(
except Exception as e:
raise ParseSettingsError(f"Error parsing the input file: {e}")

# Parse settings file
try:
parse_settings = ParseSettingsBuilder(ion_approach="dia").build_parser(input_format)
parse_settings = ParseSettingsBuilder(acquisition_method="dia").build_parser(input_format)
except KeyError as e:
raise ParseSettingsError(f"Error parsing settings file for parsing, settings seem to be missing: {e}")
except FileNotFoundError as e:
Expand All @@ -66,31 +105,35 @@ def benchmarking(
except Exception as e:
raise ConvertStandardFormatError(f"Error converting to standard format: {e}")

# calculate quantification scores
try:
# Get quantification data
quant_score = QuantScores(
self.precursor_name, parse_settings.species_expected_ratio(), parse_settings.species_dict()
)
except Exception as e:
raise QuantificationError(f"Error generating quantification scores: {e}")

# generate intermediate data structure
try:
intermediate_data_structure = quant_score.generate_intermediate(standard_format, replicate_to_raw)
except Exception as e:
raise IntermediateFormatGenerationError(f"Error generating intermediate data structure: {e}")

# generate current data point
try:
current_datapoint = Datapoint.generate_datapoint(
intermediate_data_structure, input_format, user_input, default_cutoff_min_prec=default_cutoff_min_prec
)
except Exception as e:
raise DatapointGenerationError(f"Error generating datapoint: {e}")

# add current data point to all datapoints
try:
all_datapoints = self.add_current_data_point(all_datapoints, current_datapoint)
except Exception as e:
raise DatapointAppendError(f"Error adding current data point: {e}")

# return intermediate data structure, all datapoints, and input DataFrame
# TODO check why there are NA and inf/-inf values
return (
intermediate_data_structure,
Expand Down
2 changes: 1 addition & 1 deletion webinterface/pages/DIA_Quant_ion.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self):
except KeyError:
token = ""
self.ionmodule: DDAQuantIonModule = DDAQuantIonModule(token=token)
self.parsesettingsbuilder = ParseSettingsBuilder()
self.parsesettingsbuilder = ParseSettingsBuilder(acquisition_method="dia")

self.quant_uiobjects = QuantUIObjects(self.variables_dda_quant, self.ionmodule, self.parsesettingsbuilder)

Expand Down
2 changes: 1 addition & 1 deletion webinterface/pages/base_pages/quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from streamlit_extras.let_it_rain import rain

from proteobench.io.parsing.parse_settings_ion import ParseSettingsBuilder
from proteobench.modules.dda_quant_ion.dda_quant_ion_module import QuantIonDDAModule as IonModule
from proteobench.modules.dda_quant_ion.dda_quant_ion_module import DDAQuantIonModule as IonModule
from proteobench.plotting.plot_quant import PlotDataPoint

logger: logging.Logger = logging.getLogger(__name__)
Expand Down
Loading