Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Prod to v0.3.4 - 11/7/2024 #69

Merged
merged 32 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
ebcefec
inverse has/is adduct ids for losses
JeffreyMinucci Oct 3, 2024
eae2a3d
revert previous changes to adduct function
JeffreyMinucci Oct 3, 2024
2162ecc
negate neutral loss deltas in adduct identifier
alexchao32 Oct 7, 2024
8c7f681
prototype improved adduct identifier
JeffreyMinucci Oct 8, 2024
e2112af
fix bug with adduct str type
JeffreyMinucci Oct 8, 2024
00e4a21
apply adduct id function to axis 1
JeffreyMinucci Oct 8, 2024
3c67361
fix info col formatting
JeffreyMinucci Oct 9, 2024
166269c
change some neutral modifications from neg to pos delta; refactor som…
JeffreyMinucci Oct 10, 2024
052c3e8
Merge pull request #68 from quanted/NTAW-595
JeffreyMinucci Oct 10, 2024
0d5b6d2
Correcting neutral loss representation on input page from M-O to M+O
tmferland Oct 17, 2024
8d6b4d9
Merge branch 'dev' of https://github.com/quanted/nta_app into dev
alexchao32 Oct 17, 2024
ade30ce
NTAW-537: Generate percentile scores in MS2 results
Oct 29, 2024
e3ad669
NTAW-537: Generate percentile scores in MS2 results
Oct 29, 2024
d2a1562
NTAW-537: Generate percentile scores in MS2 results
Oct 29, 2024
c94e7f1
NTAW-537: Generate percentile scores in MS2 results
Oct 30, 2024
4ef78a1
NTAW-602: Add quotient and percentile columns from MS2 file into merg…
Oct 31, 2024
47e3e8e
NTAW-602: Add percentile/quotient columns into merged results
Oct 31, 2024
c5ee027
NTAW-602: Add percentile/quotient columns into merged results
Oct 31, 2024
17026d9
NTAW-606: Update percentile and quotient scores to equal zero when ma…
Nov 1, 2024
2f3353c
NTAW-607: Update merge results formatting
Nov 4, 2024
f04d25a
NTAW-607: Update merge results formatting
Nov 4, 2024
646b0a8
NTAW-607: Revert changes to debug
Nov 4, 2024
aadc7a5
NTAW-607: Revert changes to debug
Nov 4, 2024
f0107c1
NTAW-607: Round column values to two decimal places
Nov 4, 2024
2e51155
NTAW-607: Round column values to two decimal places
Nov 4, 2024
ec7af2c
Update MS2 workflow: convert all values to numeric in score columns
Nov 4, 2024
614c6bb
Update MS2 workflow: convert all values to numeric in score columns
Nov 4, 2024
e4492c0
NTAW-607: Update MS2 workflow to round scores to two units (prior to …
Nov 5, 2024
e5e08b6
NTAW-607: Update MS2 workflow to round scores to two units (prior to …
Nov 5, 2024
94ae2ca
NTAW-608: Fix bug with quotient scores of 1 being empty
Nov 5, 2024
7d5ae42
NTAW-608: Fix bug with quotient scores of 1 being empty
Nov 5, 2024
97d1d24
Update NTA_WebApp_Version_History.txt
alexchao32 Nov 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,6 @@ venv.bak/

# copied as a result of local code mounted development
collected_static/

# vscode
.vscode/
411 changes: 253 additions & 158 deletions app/feature/feature.py

Large diffs are not rendered by default.

63 changes: 51 additions & 12 deletions app/merge/merge_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,52 @@


def process_MS2_data(ms1_data, ms2_data_list, mass_accuracy=10, rt_accuracy=0.2):
# NTAW-158: Adjust sheet names pulled from MS1 results
matched_df = ms1_data if isinstance(ms1_data, pd.DataFrame) else ms1_data["chemical_results"]
# matched_df = ms1_data if isinstance(ms1_data, pd.DataFrame) else ms1_data["dsstox_search"]

matched_df.rename(columns={"DTXCID_INDIVIDUAL_COMPONENT": "DTXCID"}, inplace=True)

for ms2_data in ms2_data_list:
filename = ms2_data["file_name"]
cfmid_df = ms2_data["file_df"]
mass_col, rt_col, score_col = (f"MASS_MGF_{filename}", f"RT_{filename}", f"SUM_SCORE_{filename}")
# mass_col, rt_col, score_col, q_score_col, percentile_col = (f"MASS_MGF_{filename}", f"RT_{filename}", f"SUM_SCORE_{filename}", f"QUOTIENT_SCORE_{filename}", f"PERCENTILE_SCORE_{filename}")
mass_col, rt_col, score_col, q_score_col, percentile_col = (
f"MASS_MGF_{filename}",
f"RT_{filename}",
f"SUM_SCORE_{filename}",
f"QUOTIENT_SCORE_{filename}",
f"PERCENTILE_SCORE_{filename}",
)
# logger.info('mass_col, rt_col, score_col')
# logger.info(mass_col, rt_col, score_col)

# NTAW-158: Adjust columns renamed based on new MS2 column names
# NTAW-158: Grab the neutral mass column from the MS2 data as this is going to be compared to the neutral mass from the MS1 data
cfmid_df.rename(columns={"MASS_NEUTRAL": mass_col, "RT": rt_col, "SUM_SCORE": score_col}, inplace=True)
# cfmid_df.rename(columns={"MASS_MGF": mass_col, "RT": rt_col, "SUM_SCORE": score_col}, inplace=True)
# cfmid_df.rename(columns = {'MASS_MGF': mass_col, 'RT': rt_col, 'SUM_SCORE' : score_col}, inplace = True)
# cfmid_df.rename(columns={"MASS_in_MGF": mass_col, "RT": rt_col, "energy_sum": score_col}, inplace=True)
cfmid_df.rename(
columns={
"MASS_NEUTRAL": mass_col,
"RT": rt_col,
"SUM_SCORE": score_col,
"Q-SCORE": q_score_col,
"PERCENTILE": percentile_col,
},
inplace=True,
)

# # NTAW-607: Convert retention time column units from seconds to minutes
# cfmid_df[rt_col] = cfmid_df[rt_col] / 60

# # NTAW-607: Add units to MS1 retention time column
# matched_df.rename(columns={"Retention_Time": "Retention_Time(min)"}, inplace=True)

matched_df = matched_df.merge(
cfmid_df[["DTXCID", f"MASS_MGF_{filename}", f"RT_{filename}", f"SUM_SCORE_{filename}"]],
cfmid_df[
[
"DTXCID",
f"MASS_MGF_{filename}",
f"RT_{filename}",
f"SUM_SCORE_{filename}",
f"QUOTIENT_SCORE_{filename}",
f"PERCENTILE_SCORE_{filename}",
]
],
how="left",
on="DTXCID",
)
Expand All @@ -57,10 +81,25 @@ def process_MS2_data(ms1_data, ms2_data_list, mass_accuracy=10, rt_accuracy=0.2)
mass_diff + rt_diff if mass_diff <= mass_accuracy and rt_diff <= rt_accuracy else np.nan
for mass_diff, rt_diff in zip(matched_df["mass_diff"], matched_df["rt_diff"])
]
matched_df[[mass_col, rt_col, score_col]] = matched_df[[mass_col, rt_col, score_col]].where(
(matched_df["mass_diff"] < mass_accuracy) & (matched_df["rt_diff"] < rt_accuracy), [np.nan, np.nan, np.nan]
matched_df[[mass_col, rt_col, score_col, q_score_col, percentile_col]] = matched_df[
[mass_col, rt_col, score_col, q_score_col, percentile_col]
].where(
(matched_df["mass_diff"] < mass_accuracy) & (matched_df["rt_diff"] < rt_accuracy),
[np.nan, np.nan, np.nan, np.nan, np.nan],
)

# NTAW-608: Quotient scores of 1 are showing up as empty cell. As a quick fix, fill in empty quotient cells with 1 (where the percentile cell has a value)
matched_df.loc[matched_df[q_score_col].isna() & matched_df[percentile_col].notna(), q_score_col] = 1

# # NTAW-607: Round MS2 retention time, cfmid score columns to two decimal places
# matched_df[f"RT_{filename}"] = matched_df[f"RT_{filename}"].round(2)
# matched_df[score_col] = matched_df[score_col].round(2)
# matched_df[q_score_col] = matched_df[q_score_col].round(2)
# matched_df[percentile_col] = matched_df[percentile_col].round(2)

# # NTAW-607: Round MS1 retention time column to two decimal places
# matched_df["Retention_Time"] = matched_df["Retention_Time"].round(2)

matched_df.drop(columns=["mass_diff", "rt_diff", "sum_diff"], inplace=True)
matched_df["Median_MS2_Mass"] = matched_df[[col for col in matched_df.columns if "MASS_" in col]].apply(
np.median, axis=1
Expand Down
Loading
Loading