From 3704337a7fc5873357581dbcc1e98c8fdb2fcf1f Mon Sep 17 00:00:00 2001 From: Bishoy Wadie <38420248+Bisho2122@users.noreply.github.com> Date: Wed, 7 Feb 2024 09:44:12 +0100 Subject: [PATCH] Fix FDR formula Based on the current formulation of the FDR, datasets having high decoy scores can lead to FDR values > 1 leading to loss of some annotations when preparing results by applying a cutoff of FDR < 1 (https://github.com/metaspace2020/metaspace/blob/6661211be8c87c2ff617f2cb8716a128bdc3915d/metaspace/engine/sm/engine/annotation_lithops/prepare_results.py#L74). This fixes the FDR calculation by adding the weighted decoy term to the denominator as well to make sure that FDR is always [0-1]. --- metaspace/engine/sm/engine/annotation/fdr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metaspace/engine/sm/engine/annotation/fdr.py b/metaspace/engine/sm/engine/annotation/fdr.py index ff4f3bbee..75588f72f 100644 --- a/metaspace/engine/sm/engine/annotation/fdr.py +++ b/metaspace/engine/sm/engine/annotation/fdr.py @@ -89,7 +89,8 @@ def score_to_fdr_map( cumulative_targets = cumulative_targets + 1 cumulative_decoys = cumulative_decoys + 1 - fdrs = cumulative_decoys / decoy_ratio / cumulative_targets + weighted_decoys = cumulative_decoys / decoy_ratio + fdrs = weighted_decoys / (cumulative_targets + weighted_decoys) fdrs[cumulative_targets == 0] = 1 # Fix NaNs when decoys come before targets if monotonic: