Skip to content

Commit

Permalink
Merge pull request #12 from earmingol/new_version
Browse files Browse the repository at this point in the history
Merged v0.6.0, and kept pending tasks for version 0.6.1
  • Loading branch information
earmingol committed Sep 13, 2022
2 parents 40f568e + 2e42717 commit 14c9e76
Show file tree
Hide file tree
Showing 19 changed files with 468 additions and 48 deletions.
2 changes: 1 addition & 1 deletion cell2cell/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
from cell2cell import tensor
from cell2cell import utils

__version__ = "0.5.11"
__version__ = "0.6.0"
14 changes: 12 additions & 2 deletions cell2cell/analysis/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class BulkInteractions:
- 'bray_curtis' : Bray-Curtis-like score.
- 'jaccard' : Jaccard-like score.
- 'count' : Number of LR pairs that the pair of cells use.
- 'icellnet' : Sum of the L-R expression product of a pair of cells
cci_type : str, default='undirected'
Specifies whether computing the cci_score in a directed or undirected
Expand Down Expand Up @@ -93,6 +94,7 @@ class BulkInteractions:
- 'min' : Minimum expression value among all genes.
- 'mean' : Average expression value among all genes.
- 'gmean' : Geometric mean expression value among all genes.
verbose : boolean, default=False
Whether printing or not steps of the analysis.
Expand Down Expand Up @@ -122,13 +124,13 @@ class BulkInteractions:
For example, '&' is the complex_sep for a list of ligand-receptor pairs
where a protein partner could be "CD74&CD44".
complex_agg_method : str
Method to aggregate the expression value of multiple genes in a
complex.
- 'min' : Minimum expression value among all genes.
- 'mean' : Average expression value among all genes.
- 'gmean' : Geometric mean expression value among all genes.
ref_ppi : pandas.DataFrame
Reference list of protein-protein interactions (or ligand-receptor pairs) used
Expand Down Expand Up @@ -164,6 +166,7 @@ class BulkInteractions:
- 'bray_curtis'
- 'jaccard'
- 'count'
- 'icellnet'
- 'cci_type' : is the type of interaction between two cells. If it is
undirected, all ligands and receptors are considered from both cells.
Expand Down Expand Up @@ -274,6 +277,7 @@ def compute_pairwise_cci_scores(self, cci_score=None, use_ppi_score=False, verbo
- 'bray_curtis' : Bray-Curtis-like score.
- 'jaccard' : Jaccard-like score.
- 'count' : Number of LR pairs that the pair of cells use.
- 'icellnet' : Sum of the L-R expression product of a pair of cells
use_ppi_score : boolean, default=False
Whether using a weight of LR pairs specified in the ppi_data
Expand Down Expand Up @@ -422,6 +426,7 @@ class SingleCellInteractions:
- 'bray_curtis' : Bray-Curtis-like score.
- 'jaccard' : Jaccard-like score.
- 'count' : Number of LR pairs that the pair of cells use.
- 'icellnet' : Sum of the L-R expression product of a pair of cells
cci_type : str, default='undirected'
Specifies whether computing the cci_score in a directed or undirected
Expand Down Expand Up @@ -466,6 +471,7 @@ class SingleCellInteractions:
- 'min' : Minimum expression value among all genes.
- 'mean' : Average expression value among all genes.
- 'gmean' : Geometric mean expression value among all genes.
verbose : boolean, default=False
Whether printing or not steps of the analysis.
Expand Down Expand Up @@ -503,6 +509,7 @@ class SingleCellInteractions:
- 'min' : Minimum expression value among all genes.
- 'mean' : Average expression value among all genes.
- 'gmean' : Geometric mean expression value among all genes.
ref_ppi : pandas.DataFrame
Reference list of protein-protein interactions (or ligand-receptor pairs) used
Expand Down Expand Up @@ -538,6 +545,7 @@ class SingleCellInteractions:
- 'bray_curtis'
- 'jaccard'
- 'count'
- 'icellnet'
- 'cci_type' : is the type of interaction between two cells. If it is
undirected, all ligands and receptors are considered from both cells.
Expand Down Expand Up @@ -638,15 +646,16 @@ def __init__(self, rnaseq_data, ppi_data, metadata, interaction_columns=('A', 'B
self.analysis_setup['cci_type'] = cci_type

# Initialize PPI

ppi_data_ = ppi.filter_ppi_by_proteins(ppi_data=ppi_data,
proteins=genes,
complex_sep=complex_sep,
upper_letter_comparison=False,
interaction_columns=interaction_columns)

self.ppi_data = ppi.remove_ppi_bidirectionality(ppi_data=ppi_data_,
interaction_columns=interaction_columns,
verbose=verbose)

if self.analysis_setup['cci_type'] == 'undirected':
self.ref_ppi = self.ppi_data
self.ppi_data = ppi.bidirectional_ppi_for_cci(ppi_data=self.ppi_data,
Expand Down Expand Up @@ -854,6 +863,7 @@ def initialize_interaction_space(rnaseq_data, ppi_data, cutoff_setup, analysis_s
- 'bray_curtis'
- 'jaccard'
- 'count'
- 'icellnet'
- 'cci_type' : is the type of interaction between two cells. If it is
undirected, all ligands and receptors are considered from both cells.
Expand Down
5 changes: 3 additions & 2 deletions cell2cell/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

from __future__ import absolute_import

from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_jaccard_like_cci_score,
matmul_bray_curtis_like, matmul_count_active, matmul_jaccard_like)
from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_icellnet_score,
compute_jaccard_like_cci_score, matmul_bray_curtis_like, matmul_count_active,
matmul_jaccard_like)
from cell2cell.core.cell import (Cell, get_cells_from_rnaseq)
from cell2cell.core.communication_scores import (get_binary_scores, get_continuous_scores, compute_ccc_matrix, aggregate_ccc_matrices)
from cell2cell.core.interaction_space import (generate_interaction_elements, InteractionSpace)
39 changes: 39 additions & 0 deletions cell2cell/core/cci_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,45 @@ def compute_count_score(cell1, cell2, ppi_score=None):
return cci_score


def compute_icellnet_score(cell1, cell2, ppi_score=None):
'''Calculates the sum of communication scores
for the interaction between two cells. Based on ICELLNET.
Parameters
----------
cell1 : cell2cell.core.cell.Cell
First cell-type/tissue/sample to compute interaction
between a pair of them. In a directed interaction,
this is the sender.
cell2 : cell2cell.core.cell.Cell
Second cell-type/tissue/sample to compute interaction
between a pair of them. In a directed interaction,
this is the receiver.
Returns
-------
cci_score : float
Overall score for the interaction between a pair of
cell-types/tissues/samples.
'''
c1 = cell1.weighted_ppi['A'].values
c2 = cell2.weighted_ppi['B'].values

if (len(c1) == 0) or (len(c2) == 0):
return 0.0

if ppi_score is None:
ppi_score = np.array([1.0] * len(c1))

mult = c1 * c2 * ppi_score
cci_score = np.nansum(mult)

if cci_score is np.nan:
return 0.0
return cci_score


def matmul_jaccard_like(A_scores, B_scores, ppi_score=None):
'''Computes Jaccard-like scores using matrices of proteins by
cell-types/tissues/samples.
Expand Down
14 changes: 11 additions & 3 deletions cell2cell/core/interaction_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def generate_pairs(cells, cci_type, self_interaction=True, remove_duplicates=Tru
self_interaction : boolean, default=True
Whether considering autocrine interactions (pair A-A, B-B, etc).
remove_duplicates : booleanm default=True
remove_duplicates : boolean, default=True
Whether removing duplicates when a list of cells is passed and names are
duplicated. If False and a list [A, A, B] is passed, pairs could be
[A-A, A-A, A-B, A-A, A-A, A-B, B-A, B-A, B-B] when self_interaction is True
Expand Down Expand Up @@ -110,6 +110,7 @@ def generate_interaction_elements(modified_rnaseq, ppi_data, cci_type='undirecte
- 'min' : Minimum expression value among all genes.
- 'mean' : Average expression value among all genes.
- 'gmean' : Geometric mean expression value among all genes.
interaction_columns : tuple, default=('A', 'B')
Contains the names of the columns where to find the partners in a
Expand Down Expand Up @@ -245,6 +246,7 @@ class InteractionSpace():
- 'bray_curtis'
- 'jaccard'
- 'count'
- 'icellnet'
cci_type : str, default='undirected'
Type of interaction between two cells. If it is undirected, all ligands
Expand Down Expand Up @@ -272,6 +274,7 @@ class InteractionSpace():
- 'min' : Minimum expression value among all genes.
- 'mean' : Average expression value among all genes.
- 'gmean' : Geometric mean expression value among all genes.
interaction_columns : tuple, default=('A', 'B')
Contains the names of the columns where to find the partners in a
Expand Down Expand Up @@ -303,6 +306,7 @@ class InteractionSpace():
- 'bray_curtis'
- 'jaccard'
- 'count'
- 'icellnet'
cci_type : str
Type of interaction between two cells. If it is undirected, all ligands
Expand Down Expand Up @@ -332,7 +336,7 @@ class InteractionSpace():
to store CCI scores(under key 'cci_matrix'). A communication matrix
is also stored in this object when the communication scores are
computed in the InteractionSpace class (under key
'communication_score')
'communication_matrix')
distance_matrix : pandas.DataFrame
Contains distances for each pair of cells, computed from
Expand Down Expand Up @@ -408,6 +412,7 @@ def pair_cci_score(self, cell1, cell2, cci_score='bray_curtis', use_ppi_score=Fa
- 'bray_curtis' : Bray-Curtis-like score
- 'jaccard' : Jaccard-like score
- 'count' : Number of LR pairs that the pair of cells uses
- 'icellnet' : Sum of the L-R expression product of a pair of cells
use_ppi_score : boolean, default=False
Whether using a weight of LR pairs specified in the ppi_data
Expand Down Expand Up @@ -438,6 +443,8 @@ def pair_cci_score(self, cell1, cell2, cci_score='bray_curtis', use_ppi_score=Fa
cci_value = cci_scores.compute_jaccard_like_cci_score(cell1, cell2, ppi_score=ppi_score)
elif cci_score == 'count':
cci_value = cci_scores.compute_count_score(cell1, cell2, ppi_score=ppi_score)
elif cci_score == 'icellnet':
cci_value = cci_scores.compute_icellnet_score(cell1, cell2, ppi_score=ppi_score)
else:
raise NotImplementedError("CCI score {} to compute pairwise cell-interactions is not implemented".format(cci_score))
return cci_value
Expand All @@ -457,6 +464,7 @@ def compute_pairwise_cci_scores(self, cci_score=None, use_ppi_score=False, verbo
- 'bray_curtis' : Bray-Curtis-like score
- 'jaccard' : Jaccard-like score
- 'count' : Number of LR pairs that the pair of cells uses
- 'icellnet' : Sum of the L-R expression product of a pair of cells
use_ppi_score : boolean, default=False
Whether using a weight of LR pairs specified in the ppi_data
Expand Down Expand Up @@ -502,7 +510,7 @@ def compute_pairwise_cci_scores(self, cci_score=None, use_ppi_score=False, verbo
# )

# Generate distance matrix
if cci_score != 'count':
if ~(cci_score in ['count', 'icellnet']):
self.distance_matrix = self.interaction_elements['cci_matrix'].apply(lambda x: 1 - x)
else:
#self.distance_matrix = self.interaction_elements['cci_matrix'].div(self.interaction_elements['cci_matrix'].max().max()).apply(lambda x: 1 - x)
Expand Down
2 changes: 1 addition & 1 deletion cell2cell/external/tensorly_nn_cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import warnings
import tensorly as tl
from tensorly.random import check_random_state, random_cp
from tensorly.random import random_cp # check_random_state, # check_random_state only available in tensorly 0.5.1
from tensorly.base import unfold
from tensorly.cp_tensor import (CPTensor,
unfolding_dot_khatri_rao, cp_norm,
Expand Down
4 changes: 2 additions & 2 deletions cell2cell/plotting/cci_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ def clustermap_cci(interaction_space, method='ward', optimal_leaf=True, metadata
# Drop excluded cells
if excluded_cells is not None:
df = distance_matrix.loc[~distance_matrix.index.isin(excluded_cells),
~distance_matrix.columns.isin(excluded_cells)]
~distance_matrix.columns.isin(excluded_cells)].copy()
else:
df = distance_matrix
df = distance_matrix.copy()

# Check symmetry to get linkage
symmetric = check_symmetry(df)
Expand Down
1 change: 1 addition & 0 deletions cell2cell/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from cell2cell.preprocessing.cutoffs import (get_constant_cutoff, get_cutoffs, get_global_percentile_cutoffs,
get_local_percentile_cutoffs)
from cell2cell.preprocessing.find_elements import (find_duplicates)
from cell2cell.preprocessing.gene_ontology import (find_all_children_of_go_term, find_go_terms_from_keyword,
get_genes_from_go_hierarchy, get_genes_from_go_terms)
from cell2cell.preprocessing.integrate_data import (get_thresholded_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms,
Expand Down
28 changes: 28 additions & 0 deletions cell2cell/preprocessing/find_elements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-

from __future__ import absolute_import

from collections import defaultdict

def find_duplicates(element_list):
'''Function based on: https://stackoverflow.com/a/5419576/12032899
Finds duplicate items and list their index location.
Parameters
----------
element_list : list
List of elements
Returns
-------
duplicate_dict : dict
Dictionary with duplicate items. Keys are the items, and values
are lists with the respective indexes where they are.
'''
tally = defaultdict(list)
for i,item in enumerate(element_list):
tally[item].append(i)

duplicate_dict = {key : locs for key,locs in tally.items()
if len(locs)>1}
return duplicate_dict
5 changes: 3 additions & 2 deletions cell2cell/preprocessing/rnaseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def add_complexes_to_expression(rnaseq_data, complexes, agg_method='min'):
- 'min' : Minimum expression value among all genes.
- 'mean' : Average expression value among all genes.
- 'gmean' : Geometric mean expression value among all genes.
Returns
-------
Expand All @@ -180,8 +181,8 @@ def add_complexes_to_expression(rnaseq_data, complexes, agg_method='min'):
tmp_rna.loc[k] = df.min().values.tolist()
elif agg_method == 'mean':
tmp_rna.loc[k] = df.mean().values.tolist()
# elif agg_method == 'gmean':
# tmp_rna.loc[k] = df.gmean().values.tolist() # Not implemented
elif agg_method == 'gmean':
tmp_rna.loc[k] = df.apply(lambda x: np.exp(np.mean(np.log(x)))).values.tolist()
else:
ValueError("{} is not a valid agg_method".format(agg_method))
else:
Expand Down
1 change: 1 addition & 0 deletions cell2cell/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
from cell2cell.tensor.metrics import (correlation_index)
from cell2cell.tensor.tensor import (InteractionTensor, PreBuiltTensor, build_context_ccc_tensor, generate_tensor_metadata,
interactions_to_tensor)
from cell2cell.tensor.tensor_manipulation import (concatenate_interaction_tensors)
from cell2cell.tensor.subset import (subset_tensor, subset_metadata)
Loading

0 comments on commit 14c9e76

Please sign in to comment.