Merge pull request #12 from earmingol/new_version

Merged v0.6.0, and kept pending tasks for version 0.6.1
earmingol · Sep 13, 2022 · 14c9e76 · 14c9e76
2 parents 40f568e + 2e42717
commit 14c9e76
Show file tree

Hide file tree

Showing 19 changed files with 468 additions and 48 deletions.
diff --git a/cell2cell/__init__.py b/cell2cell/__init__.py
@@ -14,4 +14,4 @@
 from cell2cell import tensor
 from cell2cell import utils
 
-__version__ = "0.5.11"
+__version__ = "0.6.0"
diff --git a/cell2cell/analysis/pipelines.py b/cell2cell/analysis/pipelines.py
@@ -62,6 +62,7 @@ class BulkInteractions:
         - 'bray_curtis' : Bray-Curtis-like score.
         - 'jaccard' : Jaccard-like score.
         - 'count' : Number of LR pairs that the pair of cells use.
+        - 'icellnet' : Sum of the L-R expression product of a pair of cells
 
     cci_type : str, default='undirected'
         Specifies whether computing the cci_score in a directed or undirected
@@ -93,6 +94,7 @@ class BulkInteractions:
 
         - 'min' : Minimum expression value among all genes.
         - 'mean' : Average expression value among all genes.
+        - 'gmean' : Geometric mean expression value among all genes.
 
     verbose : boolean, default=False
         Whether printing or not steps of the analysis.
@@ -122,13 +124,13 @@ class BulkInteractions:
         For example, '&' is the complex_sep for a list of ligand-receptor pairs
         where a protein partner could be "CD74&CD44".
 
-
     complex_agg_method : str
         Method to aggregate the expression value of multiple genes in a
         complex.
 
         - 'min' : Minimum expression value among all genes.
         - 'mean' : Average expression value among all genes.
+        - 'gmean' : Geometric mean expression value among all genes.
 
     ref_ppi : pandas.DataFrame
         Reference list of protein-protein interactions (or ligand-receptor pairs) used
@@ -164,6 +166,7 @@ class BulkInteractions:
             - 'bray_curtis'
             - 'jaccard'
             - 'count'
+            - 'icellnet'
 
         - 'cci_type' : is the type of interaction between two cells. If it is
             undirected, all ligands and receptors are considered from both cells.
@@ -274,6 +277,7 @@ def compute_pairwise_cci_scores(self, cci_score=None, use_ppi_score=False, verbo
             - 'bray_curtis' : Bray-Curtis-like score.
             - 'jaccard' : Jaccard-like score.
             - 'count' : Number of LR pairs that the pair of cells use.
+            - 'icellnet' : Sum of the L-R expression product of a pair of cells
 
         use_ppi_score : boolean, default=False
             Whether using a weight of LR pairs specified in the ppi_data
@@ -422,6 +426,7 @@ class SingleCellInteractions:
         - 'bray_curtis' : Bray-Curtis-like score.
         - 'jaccard' : Jaccard-like score.
         - 'count' : Number of LR pairs that the pair of cells use.
+        - 'icellnet' : Sum of the L-R expression product of a pair of cells
 
     cci_type : str, default='undirected'
         Specifies whether computing the cci_score in a directed or undirected
@@ -466,6 +471,7 @@ class SingleCellInteractions:
 
         - 'min' : Minimum expression value among all genes.
         - 'mean' : Average expression value among all genes.
+        - 'gmean' : Geometric mean expression value among all genes.
 
     verbose : boolean, default=False
         Whether printing or not steps of the analysis.
@@ -503,6 +509,7 @@ class SingleCellInteractions:
 
         - 'min' : Minimum expression value among all genes.
         - 'mean' : Average expression value among all genes.
+        - 'gmean' : Geometric mean expression value among all genes.
 
     ref_ppi : pandas.DataFrame
         Reference list of protein-protein interactions (or ligand-receptor pairs) used
@@ -538,6 +545,7 @@ class SingleCellInteractions:
             - 'bray_curtis'
             - 'jaccard'
             - 'count'
+            - 'icellnet'
 
         - 'cci_type' : is the type of interaction between two cells. If it is
             undirected, all ligands and receptors are considered from both cells.
@@ -638,15 +646,16 @@ def __init__(self, rnaseq_data, ppi_data, metadata, interaction_columns=('A', 'B
         self.analysis_setup['cci_type'] = cci_type
 
         # Initialize PPI
-
         ppi_data_ = ppi.filter_ppi_by_proteins(ppi_data=ppi_data,
                                                proteins=genes,
                                                complex_sep=complex_sep,
                                                upper_letter_comparison=False,
                                                interaction_columns=interaction_columns)
+
         self.ppi_data = ppi.remove_ppi_bidirectionality(ppi_data=ppi_data_,
                                                         interaction_columns=interaction_columns,
                                                         verbose=verbose)
+
         if self.analysis_setup['cci_type'] == 'undirected':
             self.ref_ppi = self.ppi_data
             self.ppi_data = ppi.bidirectional_ppi_for_cci(ppi_data=self.ppi_data,
@@ -854,6 +863,7 @@ def initialize_interaction_space(rnaseq_data, ppi_data, cutoff_setup, analysis_s
             - 'bray_curtis'
             - 'jaccard'
             - 'count'
+            - 'icellnet'
 
         - 'cci_type' : is the type of interaction between two cells. If it is
             undirected, all ligands and receptors are considered from both cells.

diff --git a/cell2cell/core/__init__.py b/cell2cell/core/__init__.py
@@ -2,8 +2,9 @@
 
 from __future__ import absolute_import
 
-from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_jaccard_like_cci_score,
-                                       matmul_bray_curtis_like, matmul_count_active, matmul_jaccard_like)
+from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_icellnet_score,
+                                       compute_jaccard_like_cci_score, matmul_bray_curtis_like, matmul_count_active,
+                                       matmul_jaccard_like)
 from cell2cell.core.cell import (Cell, get_cells_from_rnaseq)
 from cell2cell.core.communication_scores import (get_binary_scores, get_continuous_scores, compute_ccc_matrix, aggregate_ccc_matrices)
 from cell2cell.core.interaction_space import (generate_interaction_elements, InteractionSpace)
diff --git a/cell2cell/core/cci_scores.py b/cell2cell/core/cci_scores.py
@@ -139,6 +139,45 @@ def compute_count_score(cell1, cell2, ppi_score=None):
     return cci_score
 
 
+def compute_icellnet_score(cell1, cell2, ppi_score=None):
+    '''Calculates the sum of communication scores
+    for the interaction between two cells. Based on ICELLNET.
+
+    Parameters
+    ----------
+    cell1 : cell2cell.core.cell.Cell
+        First cell-type/tissue/sample to compute interaction
+        between a pair of them. In a directed interaction,
+        this is the sender.
+
+    cell2 : cell2cell.core.cell.Cell
+        Second cell-type/tissue/sample to compute interaction
+        between a pair of them. In a directed interaction,
+        this is the receiver.
+
+    Returns
+    -------
+    cci_score : float
+        Overall score for the interaction between a pair of
+        cell-types/tissues/samples.
+    '''
+    c1 = cell1.weighted_ppi['A'].values
+    c2 = cell2.weighted_ppi['B'].values
+
+    if (len(c1) == 0) or (len(c2) == 0):
+        return 0.0
+
+    if ppi_score is None:
+        ppi_score = np.array([1.0] * len(c1))
+
+    mult = c1 * c2 * ppi_score
+    cci_score = np.nansum(mult)
+
+    if cci_score is np.nan:
+        return 0.0
+    return cci_score
+
+
 def matmul_jaccard_like(A_scores, B_scores, ppi_score=None):
     '''Computes Jaccard-like scores using matrices of proteins by
     cell-types/tissues/samples.

diff --git a/cell2cell/core/interaction_space.py b/cell2cell/core/interaction_space.py
@@ -30,7 +30,7 @@ def generate_pairs(cells, cci_type, self_interaction=True, remove_duplicates=Tru
     self_interaction : boolean, default=True
         Whether considering autocrine interactions (pair A-A, B-B, etc).
 
-    remove_duplicates : booleanm default=True
+    remove_duplicates : boolean, default=True
         Whether removing duplicates when a list of cells is passed and names are
         duplicated. If False and a list [A, A, B] is passed, pairs could be
         [A-A, A-A, A-B, A-A, A-A, A-B, B-A, B-A, B-B] when self_interaction is True
@@ -110,6 +110,7 @@ def generate_interaction_elements(modified_rnaseq, ppi_data, cci_type='undirecte
 
         - 'min' : Minimum expression value among all genes.
         - 'mean' : Average expression value among all genes.
+        - 'gmean' : Geometric mean expression value among all genes.
 
     interaction_columns : tuple, default=('A', 'B')
         Contains the names of the columns where to find the partners in a
@@ -245,6 +246,7 @@ class InteractionSpace():
         - 'bray_curtis'
         - 'jaccard'
         - 'count'
+        - 'icellnet'
 
     cci_type : str, default='undirected'
         Type of interaction between two cells. If it is undirected, all ligands
@@ -272,6 +274,7 @@ class InteractionSpace():
 
         - 'min' : Minimum expression value among all genes.
         - 'mean' : Average expression value among all genes.
+        - 'gmean' : Geometric mean expression value among all genes.
 
     interaction_columns : tuple, default=('A', 'B')
         Contains the names of the columns where to find the partners in a
@@ -303,6 +306,7 @@ class InteractionSpace():
         - 'bray_curtis'
         - 'jaccard'
         - 'count'
+        - 'icellnet'
 
     cci_type : str
         Type of interaction between two cells. If it is undirected, all ligands
@@ -332,7 +336,7 @@ class InteractionSpace():
         to store CCI scores(under key 'cci_matrix'). A communication matrix
         is also stored in this object when the communication scores are
         computed in the InteractionSpace class (under key
-        'communication_score')
+        'communication_matrix')
 
     distance_matrix : pandas.DataFrame
         Contains distances for each pair of cells, computed from
@@ -408,6 +412,7 @@ def pair_cci_score(self, cell1, cell2, cci_score='bray_curtis', use_ppi_score=Fa
             - 'bray_curtis' : Bray-Curtis-like score
             - 'jaccard' : Jaccard-like score
             - 'count' : Number of LR pairs that the pair of cells uses
+            - 'icellnet' : Sum of the L-R expression product of a pair of cells
 
         use_ppi_score : boolean, default=False
             Whether using a weight of LR pairs specified in the ppi_data
@@ -438,6 +443,8 @@ def pair_cci_score(self, cell1, cell2, cci_score='bray_curtis', use_ppi_score=Fa
             cci_value = cci_scores.compute_jaccard_like_cci_score(cell1, cell2, ppi_score=ppi_score)
         elif cci_score == 'count':
             cci_value = cci_scores.compute_count_score(cell1, cell2, ppi_score=ppi_score)
+        elif cci_score == 'icellnet':
+            cci_value = cci_scores.compute_icellnet_score(cell1, cell2, ppi_score=ppi_score)
         else:
             raise NotImplementedError("CCI score {} to compute pairwise cell-interactions is not implemented".format(cci_score))
         return cci_value
@@ -457,6 +464,7 @@ def compute_pairwise_cci_scores(self, cci_score=None, use_ppi_score=False, verbo
             - 'bray_curtis' : Bray-Curtis-like score
             - 'jaccard' : Jaccard-like score
             - 'count' : Number of LR pairs that the pair of cells uses
+            - 'icellnet' : Sum of the L-R expression product of a pair of cells
 
         use_ppi_score : boolean, default=False
             Whether using a weight of LR pairs specified in the ppi_data
@@ -502,7 +510,7 @@ def compute_pairwise_cci_scores(self, cci_score=None, use_ppi_score=False, verbo
         #                                                        )
 
         # Generate distance matrix
-        if cci_score != 'count':
+        if ~(cci_score in ['count', 'icellnet']):
             self.distance_matrix = self.interaction_elements['cci_matrix'].apply(lambda x: 1 - x)
         else:
             #self.distance_matrix = self.interaction_elements['cci_matrix'].div(self.interaction_elements['cci_matrix'].max().max()).apply(lambda x: 1 - x)

diff --git a/cell2cell/external/tensorly_nn_cp.py b/cell2cell/external/tensorly_nn_cp.py
@@ -5,7 +5,7 @@
 
 import warnings
 import tensorly as tl
-from tensorly.random import check_random_state, random_cp
+from tensorly.random import random_cp # check_random_state, # check_random_state only available in tensorly 0.5.1
 from tensorly.base import unfold
 from tensorly.cp_tensor import (CPTensor,
                          unfolding_dot_khatri_rao, cp_norm,

diff --git a/cell2cell/plotting/cci_plot.py b/cell2cell/plotting/cci_plot.py
@@ -105,9 +105,9 @@ def clustermap_cci(interaction_space, method='ward', optimal_leaf=True, metadata
     # Drop excluded cells
     if excluded_cells is not None:
         df = distance_matrix.loc[~distance_matrix.index.isin(excluded_cells),
-                                 ~distance_matrix.columns.isin(excluded_cells)]
+                                 ~distance_matrix.columns.isin(excluded_cells)].copy()
     else:
-        df = distance_matrix
+        df = distance_matrix.copy()
 
     # Check symmetry to get linkage
     symmetric = check_symmetry(df)

diff --git a/cell2cell/preprocessing/__init__.py b/cell2cell/preprocessing/__init__.py
@@ -4,6 +4,7 @@
 
 from cell2cell.preprocessing.cutoffs import (get_constant_cutoff, get_cutoffs, get_global_percentile_cutoffs,
                                              get_local_percentile_cutoffs)
+from cell2cell.preprocessing.find_elements import (find_duplicates)
 from cell2cell.preprocessing.gene_ontology import (find_all_children_of_go_term, find_go_terms_from_keyword,
                                                    get_genes_from_go_hierarchy, get_genes_from_go_terms)
 from cell2cell.preprocessing.integrate_data import (get_thresholded_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms,

diff --git a/cell2cell/preprocessing/find_elements.py b/cell2cell/preprocessing/find_elements.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from collections import defaultdict
+
+def find_duplicates(element_list):
+    '''Function based on: https://stackoverflow.com/a/5419576/12032899
+    Finds duplicate items and list their index location.
+
+    Parameters
+    ----------
+    element_list : list
+        List of elements
+
+    Returns
+    -------
+    duplicate_dict : dict
+        Dictionary with duplicate items. Keys are the items, and values
+        are lists with the respective indexes where they are.
+    '''
+    tally = defaultdict(list)
+    for i,item in enumerate(element_list):
+        tally[item].append(i)
+
+    duplicate_dict = {key : locs for key,locs in tally.items()
+                            if len(locs)>1}
+    return duplicate_dict
diff --git a/cell2cell/preprocessing/rnaseq.py b/cell2cell/preprocessing/rnaseq.py
@@ -163,6 +163,7 @@ def add_complexes_to_expression(rnaseq_data, complexes, agg_method='min'):
 
         - 'min' : Minimum expression value among all genes.
         - 'mean' : Average expression value among all genes.
+        - 'gmean' : Geometric mean expression value among all genes.
 
     Returns
     -------
@@ -180,8 +181,8 @@ def add_complexes_to_expression(rnaseq_data, complexes, agg_method='min'):
                 tmp_rna.loc[k] = df.min().values.tolist()
             elif agg_method == 'mean':
                 tmp_rna.loc[k] = df.mean().values.tolist()
-            # elif agg_method == 'gmean':
-            #    tmp_rna.loc[k] = df.gmean().values.tolist() # Not implemented
+            elif agg_method == 'gmean':
+                tmp_rna.loc[k] = df.apply(lambda x: np.exp(np.mean(np.log(x)))).values.tolist()
             else:
                 ValueError("{} is not a valid agg_method".format(agg_method))
         else:

diff --git a/cell2cell/tensor/__init__.py b/cell2cell/tensor/__init__.py
@@ -4,4 +4,5 @@
 from cell2cell.tensor.metrics import (correlation_index)
 from cell2cell.tensor.tensor import (InteractionTensor, PreBuiltTensor, build_context_ccc_tensor, generate_tensor_metadata,
                                      interactions_to_tensor)
+from cell2cell.tensor.tensor_manipulation import (concatenate_interaction_tensors)
 from cell2cell.tensor.subset import (subset_tensor, subset_metadata)