From 3704337a7fc5873357581dbcc1e98c8fdb2fcf1f Mon Sep 17 00:00:00 2001
From: Bishoy Wadie <38420248+Bisho2122@users.noreply.github.com>
Date: Wed, 7 Feb 2024 09:44:12 +0100
Subject: [PATCH] Fix FDR formula

Based on the current formulation of the FDR, datasets having high decoy scores can lead to FDR values > 1 leading to loss of some annotations when preparing results by applying a cutoff of FDR < 1 (https://github.com/metaspace2020/metaspace/blob/6661211be8c87c2ff617f2cb8716a128bdc3915d/metaspace/engine/sm/engine/annotation_lithops/prepare_results.py#L74).

This fixes the FDR calculation by adding the weighted decoy term to the denominator as well to make sure that FDR is always [0-1].
---
 metaspace/engine/sm/engine/annotation/fdr.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metaspace/engine/sm/engine/annotation/fdr.py b/metaspace/engine/sm/engine/annotation/fdr.py
index ff4f3bbee..75588f72f 100644
--- a/metaspace/engine/sm/engine/annotation/fdr.py
+++ b/metaspace/engine/sm/engine/annotation/fdr.py
@@ -89,7 +89,8 @@ def score_to_fdr_map(
         cumulative_targets = cumulative_targets + 1
         cumulative_decoys = cumulative_decoys + 1
 
-    fdrs = cumulative_decoys / decoy_ratio / cumulative_targets
+    weighted_decoys = cumulative_decoys / decoy_ratio
+    fdrs = weighted_decoys / (cumulative_targets + weighted_decoys)
     fdrs[cumulative_targets == 0] = 1  # Fix NaNs when decoys come before targets
 
     if monotonic: