From 1e70ff6ea138000d369638965c9cf30099f6f266 Mon Sep 17 00:00:00 2001 From: Leonardo macOS Date: Fri, 6 Sep 2024 17:56:14 +0200 Subject: [PATCH] started working on analysis of found vs not found diseases, very rough --- src/malco/analysis/eval_diagnose_category.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/malco/analysis/eval_diagnose_category.py b/src/malco/analysis/eval_diagnose_category.py index d3db75fb..b00fbc6d 100644 --- a/src/malco/analysis/eval_diagnose_category.py +++ b/src/malco/analysis/eval_diagnose_category.py @@ -80,6 +80,7 @@ def find_category(omim_term, disease_categories, mondo): omim_wo_match = {} for ppkt in ppkts: + breakpoint() # find this phenopackets category from OMIM category_index = find_category(ppkt[1].iloc[0]["correct_term"], dc_list, mondo) if not category_index: @@ -88,7 +89,7 @@ def find_category(omim_term, disease_categories, mondo): omim_wo_match[ppkt[0]] = ppkt[1].iloc[0]["correct_term"] continue #cat_ind = find_cat_index(category) - # is there a true? ppkt is tuple ("filename", dataframe) --> ppkt[1] is a dataframe + # is there a true? ppkt is tuple ("filename"/"label"/what has been used for grouping, dataframe) --> ppkt[1] is a dataframe if not any(ppkt[1]["is_correct"]): # no --> increase incorrect contingency_table.loc[category_index, "incorrect"] += 1