Skip to content

Commit

Permalink
Merge pull request #15 from earmingol/new_version
Browse files Browse the repository at this point in the history
Updates for v0.6.1
  • Loading branch information
earmingol committed Oct 28, 2022
2 parents 14c9e76 + 2656038 commit 97f498b
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 71 deletions.
2 changes: 1 addition & 1 deletion cell2cell/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
from cell2cell import tensor
from cell2cell import utils

__version__ = "0.6.0"
__version__ = "0.6.1"
2 changes: 1 addition & 1 deletion cell2cell/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from cell2cell.preprocessing.cutoffs import (get_constant_cutoff, get_cutoffs, get_global_percentile_cutoffs,
get_local_percentile_cutoffs)
from cell2cell.preprocessing.find_elements import (find_duplicates)
from cell2cell.preprocessing.find_elements import (find_duplicates, get_element_abundances, get_elements_over_fraction)
from cell2cell.preprocessing.gene_ontology import (find_all_children_of_go_term, find_go_terms_from_keyword,
get_genes_from_go_hierarchy, get_genes_from_go_terms)
from cell2cell.preprocessing.integrate_data import (get_thresholded_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms,
Expand Down
52 changes: 50 additions & 2 deletions cell2cell/preprocessing/find_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from __future__ import absolute_import

from collections import defaultdict
import itertools
from collections import defaultdict, Counter

def find_duplicates(element_list):
'''Function based on: https://stackoverflow.com/a/5419576/12032899
Expand All @@ -25,4 +26,51 @@ def find_duplicates(element_list):

duplicate_dict = {key : locs for key,locs in tally.items()
if len(locs)>1}
return duplicate_dict
return duplicate_dict


def get_element_abundances(element_lists):
'''Computes the fraction of occurrence of each element
in a list of lists.
Parameters
----------
element_lists : list
List of lists of elements. Elements will be
counted only once in each of the lists.
Returns
-------
abundance_dict : dict
Dictionary containing the number of times that an
element was present, divided by the total number of
lists in `element_lists`.
'''
abundance_dict = Counter(itertools.chain(*map(set, element_lists)))
total = len(element_lists)
abundance_dict = {k : v/total for k, v in abundance_dict.items()}
return abundance_dict


def get_elements_over_fraction(abundance_dict, fraction):
'''Obtains a list of elements with the
fraction of occurrence at least the threshold.
Parameters
----------
abundance_dict : dict
Dictionary containing the number of times that an
element was present, divided by the total number of
possible occurrences.
fraction : float
Threshold to filter the elements. Elements with at least
this threshold will be included.
Returns
-------
elements : list
List of elements that met the fraction criteria.
'''
elements = [k for k, v in abundance_dict.items() if v >= fraction]
return elements
16 changes: 12 additions & 4 deletions cell2cell/stats/permutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,20 @@ def compute_pvalue_from_dist(obs_value, dist, consider_size=False, comparison='u
P-value obtained from comparing the observed value and values in the
distribution.
'''
# Omit nan values
dist_ = [x for x in dist if ~np.isnan(x)]

# All values in dist are NaNs or obs_value is NaN
if (len(dist_) == 0) | np.isnan(obs_value):
return 1.0

# No NaN values
if comparison == 'lower':
pval = scipy.stats.percentileofscore(dist, obs_value) / 100.0
pval = scipy.stats.percentileofscore(dist_, obs_value) / 100.0
elif comparison == 'upper':
pval = 1.0 - scipy.stats.percentileofscore(dist, obs_value) / 100.0
pval = 1.0 - scipy.stats.percentileofscore(dist_, obs_value) / 100.0
elif comparison == 'different':
percentile = scipy.stats.percentileofscore(dist, obs_value) / 100.0
percentile = scipy.stats.percentileofscore(dist_, obs_value) / 100.0
if percentile <= 0.5:
pval = 2.0 * percentile
else:
Expand All @@ -63,7 +71,7 @@ def compute_pvalue_from_dist(obs_value, dist, consider_size=False, comparison='u
raise NotImplementedError('Comparison {} is not implemented'.format(comparison))

if (consider_size) & (pval == 0.):
pval = 1./(len(dist) + 1e-6)
pval = 1./(len(dist_) + 1e-6)

return pval

Expand Down
63 changes: 31 additions & 32 deletions cell2cell/tensor/external_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
import pandas as pd

from collections import defaultdict
from cell2cell.preprocessing.find_elements import get_element_abundances, get_elements_over_fraction
from cell2cell.tensor.tensor import PreBuiltTensor


def dataframes_to_tensor(context_df_dict, sender_col, receiver_col, ligand_col, receptor_col, score_col, how='inner',
lr_fill=np.nan, cell_fill=np.nan, lr_sep='^', context_order=None, order_labels=None,
sort_elements=True, device=None):
outer_fraction=0.0, lr_fill=np.nan, cell_fill=np.nan, lr_sep='^', context_order=None,
order_labels=None, sort_elements=True, device=None):
'''Generates an InteractionTensor from a dictionary
containing dataframes for all contexts.
Expand Down Expand Up @@ -55,6 +56,13 @@ def dataframes_to_tensor(context_df_dict, sender_col, receiver_col, ligand_col,
contexts (intersection), while all cell types that are
present across contexts (union).
outer_fraction : float, default=0.0
Threshold to filter the elements when `how` includes any outer option.
Elements with a fraction abundance across contexts (in `context_df_dict`)
at least this threshold will be included. When this value is 0, considers
all elements across the samples. When this value is 1, it acts as using
`how='inner'`.
lr_fill : float, default=numpy.nan
Value to fill communication scores when a ligand-receptor pair is not
present across all contexts.
Expand Down Expand Up @@ -123,41 +131,32 @@ def dataframes_to_tensor(context_df_dict, sender_col, receiver_col, ligand_col,
receiver_dict[k].update(df[receiver_col].unique().tolist())

# Subset LR pairs, sender and receiver cells given parameter 'how'
for i, k in enumerate(context_order):
if i == 0:
inter_lrs = set(lr_dict[k])
inter_senders = set(sender_dict[k])
inter_receivers = set(receiver_dict[k])

union_lrs = set(lr_dict[k])
union_senders = set(sender_dict[k])
union_receivers = set(receiver_dict[k])

else:
inter_lrs = inter_lrs.intersection(set(lr_dict[k]))
inter_senders = inter_senders.intersection(set(sender_dict[k]))
inter_receivers = inter_receivers.intersection(set(receiver_dict[k]))

union_lrs = union_lrs.union(set(lr_dict[k]))
union_senders = union_senders.union(set(sender_dict[k]))
union_receivers = union_receivers.union(set(receiver_dict[k]))
df_lrs = [list(lr_dict[k]) for k in context_order]
df_senders = [list(sender_dict[k]) for k in context_order]
df_receivers = [list(receiver_dict[k]) for k in context_order]

if how == 'inner':
lr_pairs = list(inter_lrs)
sender_cells = list(inter_senders)
receiver_cells = list(inter_receivers)
lr_pairs = list(set.intersection(*map(set, df_lrs)))
sender_cells = list(set.intersection(*map(set, df_senders)))
receiver_cells = list(set.intersection(*map(set, df_receivers)))
elif how == 'outer':
lr_pairs = list(union_lrs)
sender_cells = list(union_senders)
receiver_cells = list(union_receivers)
lr_pairs = get_elements_over_fraction(abundance_dict=get_element_abundances(element_lists=df_lrs),
fraction=outer_fraction)
sender_cells = get_elements_over_fraction(abundance_dict=get_element_abundances(element_lists=df_senders),
fraction=outer_fraction)
receiver_cells = get_elements_over_fraction(abundance_dict=get_element_abundances(element_lists=df_receivers),
fraction=outer_fraction)
elif how == 'outer_lrs':
lr_pairs = list(union_lrs)
sender_cells = list(inter_senders)
receiver_cells = list(inter_receivers)
lr_pairs = get_elements_over_fraction(abundance_dict=get_element_abundances(element_lists=df_lrs),
fraction=outer_fraction)
sender_cells = list(set.intersection(*map(set, df_senders)))
receiver_cells = list(set.intersection(*map(set, df_receivers)))
elif how == 'outer_cells':
lr_pairs = list(inter_lrs)
sender_cells = list(union_senders)
receiver_cells = list(union_receivers)
lr_pairs = list(set.intersection(*map(set, df_lrs)))
sender_cells = get_elements_over_fraction(abundance_dict=get_element_abundances(element_lists=df_senders),
fraction=outer_fraction)
receiver_cells = get_elements_over_fraction(abundance_dict=get_element_abundances(element_lists=df_receivers),
fraction=outer_fraction)
else:
raise ValueError("Not a valid input for parameter 'how'")

Expand Down
Loading

0 comments on commit 97f498b

Please sign in to comment.