From 86eb76cd10fa29cc517264d82f1d1cbfcd47102f Mon Sep 17 00:00:00 2001 From: mschubertv Date: Fri, 3 Nov 2023 14:03:50 +0000 Subject: [PATCH] do not remove all rows when one under obs limit --- genesets/test_fet.r | 2 +- genesets/test_lm.r | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/genesets/test_fet.r b/genesets/test_fet.r index f4e265d..a432d8e 100644 --- a/genesets/test_fet.r +++ b/genesets/test_fet.r @@ -40,7 +40,7 @@ test_fet = function(valid, hits, sets, min=2, max=Inf) { setNames(names(sets)) %>% dplyr::bind_rows(.id="label") %>% as_tibble() %>% - na.omit() %>% + filter(size >= min) %>% select(label, size, size_used, everything()) %>% mutate(adj.p = p.adjust(p.value, method="fdr")) %>% arrange(adj.p, p.value) diff --git a/genesets/test_lm.r b/genesets/test_lm.r index bce5489..c885812 100644 --- a/genesets/test_lm.r +++ b/genesets/test_lm.r @@ -19,8 +19,9 @@ test_lm = function(genes, sets, min_n=2, add_means=c(), trim=0, cl=0) { test_one = function(res, set) { dset = res %>% mutate(in_set = !! rlang::sym(label) %in% set + 0) - if (sum(dset$in_set, na.rm=TRUE) < min_n) - return(data.frame(estimate=NA, size=length(set), size_used=NA)) + sz_use = sum(dset$in_set, na.rm=TRUE) + if (sz_use < min_n) + return(data.frame(size=length(set), size_used=sz_use)) sums = dset %>% group_by(in_set) %>% summarize_at(vars(all_of(add_means)), function(x) mean(x, na.rm=TRUE, trim=trim)) %>% @@ -70,7 +71,7 @@ test_lm = function(genes, sets, setNames(names(sets)) %>% dplyr::bind_rows(.id="label") %>% as_tibble() %>% - na.omit() %>% + filter(size_used >= min_n) %>% select(label, size, size_used, !!! rlang::syms(add_means), everything()) %>% mutate(adj.p = p.adjust(p.value, method="fdr")) %>% arrange(adj.p, p.value)