Skip to content

Commit

Permalink
Refactored return data for all methods
Browse files Browse the repository at this point in the history
  • Loading branch information
PauBadiaM committed Jun 20, 2024
1 parent 6fc3916 commit 2092219
Show file tree
Hide file tree
Showing 15 changed files with 85 additions and 104 deletions.
2 changes: 1 addition & 1 deletion decoupler/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__version__ = '1.6.3' # noqa: F401
__version_info__ = tuple([int(num) for num in __version__.split('.')]) # noqa: F401

from .pre import extract, match, rename_net, get_net_mat, filt_min_n, mask_features # noqa: F401
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, mask_features, return_data # noqa: F401
from .utils import (
melt, show_methods, check_corr, get_toy_data, summarize_acts, assign_groups, dense_run, p_adjust_fdr, shuffle_net,
read_gmt # noqa: F401
Expand Down
9 changes: 2 additions & 7 deletions decoupler/method_aucell.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from numpy.random import default_rng
from tqdm import tqdm

from .pre import extract, rename_net, filt_min_n
from .pre import extract, rename_net, filt_min_n, return_data

from anndata import AnnData
import numba as nb
Expand Down Expand Up @@ -151,9 +151,4 @@ def run_aucell(mat, net, source='source', target='target', n_up=None, min_n=5, s
estimate = pd.DataFrame(estimate, index=r, columns=net.index)
estimate.name = 'aucell_estimate'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
else:
return estimate
return return_data(mat=mat, results=(estimate, ))
15 changes: 2 additions & 13 deletions decoupler/method_gsea.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from numpy.random import default_rng
from scipy.sparse import csr_matrix

from .pre import extract, rename_net, filt_min_n
from .pre import extract, rename_net, filt_min_n, return_data
from .utils import p_adjust_fdr

from anndata import AnnData
Expand Down Expand Up @@ -369,15 +369,4 @@ def run_gsea(mat, net, source='source', target='target', times=1000, batch_size=
pvals = pd.DataFrame(pvals, index=r, columns=net.index)
pvals.name = 'gsea_pvals'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
if norm_e is not None:
mat.obsm[norm_e.name] = norm_e
mat.obsm[pvals.name] = pvals
else:
if pvals is not None:
return estimate, norm_e, pvals
else:
return estimate
return return_data(mat=mat, results=(estimate, norm_e, pvals))
9 changes: 2 additions & 7 deletions decoupler/method_gsva.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scipy.sparse import csr_matrix
from numpy.random import default_rng

from .pre import extract, rename_net, filt_min_n
from .pre import extract, rename_net, filt_min_n, return_data
from .method_gsea import std

from anndata import AnnData
Expand Down Expand Up @@ -232,9 +232,4 @@ def run_gsva(mat, net, source='source', target='target', kcdf=False, mx_diff=Tru
estimate = pd.DataFrame(estimate, index=r, columns=net.index)
estimate.name = 'gsva_estimate'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
else:
return estimate
return return_data(mat=mat, results=(estimate, ))
9 changes: 2 additions & 7 deletions decoupler/method_mdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
from scipy.sparse import csr_matrix

from .pre import extract, match, rename_net, get_net_mat, filt_min_n
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data

from anndata import AnnData
from tqdm import tqdm
Expand Down Expand Up @@ -117,9 +117,4 @@ def run_mdt(mat, net, source='source', target='target', weight='weight', trees=1
estimate = pd.DataFrame(estimate, index=r, columns=sources)
estimate.name = 'mdt_estimate'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
else:
return estimate
return return_data(mat=mat, results=(estimate, ))
10 changes: 2 additions & 8 deletions decoupler/method_mlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
from scipy.sparse import csr_matrix

from .pre import extract, match, rename_net, get_net_mat, filt_min_n
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data

from anndata import AnnData
from scipy import stats
Expand Down Expand Up @@ -131,10 +131,4 @@ def run_mlm(mat, net, source='source', target='target', weight='weight', batch_s
pvals = pd.DataFrame(pvals, index=r, columns=sources)
pvals.name = 'mlm_pvals'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
mat.obsm[pvals.name] = pvals
else:
return estimate, pvals
return return_data(mat=mat, results=(estimate, pvals))
10 changes: 2 additions & 8 deletions decoupler/method_ora.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from scipy.stats import rankdata
from math import log, exp, lgamma

from .pre import extract, rename_net, filt_min_n
from .pre import extract, rename_net, filt_min_n, return_data
from .utils import p_adjust_fdr

from anndata import AnnData
Expand Down Expand Up @@ -315,10 +315,4 @@ def run_ora(mat, net, source='source', target='target', n_up=None, n_bottom=0, n
estimate = pd.DataFrame(-np.log10(pvals), index=r, columns=pvals.columns)
estimate.name = 'ora_estimate'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
mat.obsm[pvals.name] = pvals
else:
return estimate, pvals
return return_data(mat=mat, results=(estimate, pvals))
9 changes: 2 additions & 7 deletions decoupler/method_udt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from scipy.sparse import csr_matrix
import pandas as pd

from .pre import extract, match, rename_net, get_net_mat, filt_min_n
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data

from anndata import AnnData
from tqdm import tqdm
Expand Down Expand Up @@ -114,9 +114,4 @@ def run_udt(mat, net, source='source', target='target', weight='weight', min_lea
estimate = pd.DataFrame(estimate, index=r, columns=sources)
estimate.name = 'udt_estimate'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
else:
return estimate
return return_data(mat=mat, results=(estimate, ))
10 changes: 2 additions & 8 deletions decoupler/method_ulm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from scipy.stats import t

from .pre import extract, match, rename_net, get_net_mat, filt_min_n
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data

from anndata import AnnData
from tqdm import tqdm
Expand Down Expand Up @@ -124,10 +124,4 @@ def run_ulm(mat, net, source='source', target='target', weight='weight', batch_s
pvals = pd.DataFrame(pvals, index=r, columns=sources)
pvals.name = 'ulm_pvals'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
mat.obsm[pvals.name] = pvals
else:
return estimate, pvals
return return_data(mat=mat, results=(estimate, pvals))
10 changes: 2 additions & 8 deletions decoupler/method_viper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scipy.stats import rankdata
from scipy.stats import norm

from .pre import extract, match, rename_net, get_net_mat, filt_min_n
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data

from anndata import AnnData
from tqdm import tqdm
Expand Down Expand Up @@ -308,10 +308,4 @@ def run_viper(mat, net, source='source', target='target', weight='weight', pleio
pvals = pd.DataFrame(pvals, index=r, columns=sources)
pvals.name = 'viper_pvals'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
mat.obsm[pvals.name] = pvals
else:
return estimate, pvals
return return_data(mat=mat, results=(estimate, pvals))
16 changes: 2 additions & 14 deletions decoupler/method_wmean.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
from scipy.sparse import csr_matrix

from .pre import extract, match, rename_net, get_net_mat, filt_min_n
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data
from .method_gsea import std

from anndata import AnnData
Expand Down Expand Up @@ -177,16 +177,4 @@ def run_wmean(mat, net, source='source', target='target', weight='weight', times
pvals = pd.DataFrame(pvals, index=r, columns=sources)
pvals.name = 'wmean_pvals'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
if pvals is not None:
mat.obsm[norm.name] = norm
mat.obsm[corr.name] = corr
mat.obsm[pvals.name] = pvals
else:
if pvals is not None:
return estimate, norm, corr, pvals
else:
return estimate
return return_data(mat=mat, results=(estimate, norm, corr, pvals))
16 changes: 2 additions & 14 deletions decoupler/method_wsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
from scipy.sparse import csr_matrix

from .pre import extract, match, rename_net, get_net_mat, filt_min_n
from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data
from .method_gsea import std

from anndata import AnnData
Expand Down Expand Up @@ -173,16 +173,4 @@ def run_wsum(mat, net, source='source', target='target', weight='weight', times=
pvals = pd.DataFrame(pvals, index=r, columns=sources)
pvals.name = 'wsum_pvals'

# AnnData support
if isinstance(mat, AnnData):
# Update obsm AnnData object
mat.obsm[estimate.name] = estimate
if pvals is not None:
mat.obsm[norm.name] = norm
mat.obsm[corr.name] = corr
mat.obsm[pvals.name] = pvals
else:
if pvals is not None:
return estimate, norm, corr, pvals
else:
return estimate
return return_data(mat=mat, results=(estimate, norm, corr, pvals))
22 changes: 21 additions & 1 deletion decoupler/pre.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
from scipy.sparse import csr_matrix, issparse
import pandas as pd

import logging
from anndata import AnnData


Expand Down Expand Up @@ -278,3 +278,23 @@ def mask_features(mat, log=False, thr=1, use_raw=False):
else:
raise ValueError("""mat must be a list of [matrix, samples, features], dataframe (samples x features) or an AnnData
instance.""")


def add_to_anndata(mat, results):
for result in results:
if result is not None:
mat.obsm[result.name] = result


def return_data(mat, results):
if isinstance(mat, AnnData):
if mat.obs_names.size != results[0].index.size:
logging.warning('Provided AnnData contains empty observations. Returning repaired object.')
mat = mat[results[0].index, :].copy()
add_to_anndata(mat, results)
return mat
else:
add_to_anndata(mat, results)
return None
else:
return tuple([result for result in results if result is not None])
40 changes: 39 additions & 1 deletion decoupler/tests/test_pre.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import numpy as np
from scipy.sparse import csr_matrix
from anndata import AnnData
from ..pre import check_mat, extract, filt_min_n, match, rename_net, get_net_mat, mask_features
from ..pre import (
check_mat, extract, filt_min_n, match, rename_net, get_net_mat, mask_features,
return_data, add_to_anndata
)


def test_check_mat():
Expand Down Expand Up @@ -101,3 +104,38 @@ def test_mask_features():
mask_features('asdfg')
with pytest.raises(ValueError):
mask_features(adata, use_raw=True)


def test_add_to_anndata():
m = np.array([[1, 0, 2], [1, 0, 3]])
r = np.array(['S1', 'S2'])
c = np.array(['G1', 'G2', 'G3'])
df = pd.DataFrame(m, index=r, columns=c)
adata = AnnData(df.astype(np.float32))
estimate = np.array([[1], [4]])
s = np.array(['S1'])
estimate = pd.DataFrame(estimate, index=r, columns=s)
estimate.name = 'estimate'
add_to_anndata(mat=adata, results=(estimate, None))
assert 'estimate' in adata.obsm


def test_return_data():
m = np.array([[1, 0, 2], [1, 0, 3], [0, 0, 0]])
r = np.array(['S1', 'S2', 'S3'])
c = np.array(['G1', 'G2', 'G3'])
df = pd.DataFrame(m, index=r, columns=c)
adata = AnnData(df.astype(np.float32))
estimate = np.array([[1], [4]])
s = np.array(['S1'])
estimate = pd.DataFrame(estimate, index=r[:-1], columns=s)
estimate.name = 'estimate'
pvals = np.array([[0.4], [0.01]])
pvals = pd.DataFrame(pvals, index=estimate.index, columns=estimate.columns)
pvals.name = 'pvals'
ret = return_data(mat=adata, results=(estimate, pvals))
assert isinstance(ret, AnnData)
ret = return_data(mat=adata[estimate.index, :].copy(), results=(estimate, pvals))
assert ret is None
ret = return_data(mat=df, results=(estimate, pvals))
assert isinstance(ret, tuple)
2 changes: 2 additions & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ Bug fixes
- Fixed error in in ``get_contrast`` by reverting use of ``copy.deepcopy`` to ``copy``.
- Fixed verbose error regarding the number of unique sources being used in ``benchmark``.
- Added check for minimum version of ``igraph>=0.10.0`` to properly render ``plot_network``.
- Fixed return error of methods triggered when an observation was empty and input was ``AnnData``.

Changes
~~~~~~~
- Resource functions such as ``get_resource`` or ``get_collectri`` now accept different ``genesymbol_resource`` than UniProt for gene translation to other organisms.
- Deprecated ``sklearn`` and switched to ``sklearn`` for ``udt``.

Additions
~~~~~~~~~
Expand Down

0 comments on commit 2092219

Please sign in to comment.