From c3c9332196211481b3a4d7650ae66d28f4a62673 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Thu, 9 Apr 2020 19:44:06 -0700
Subject: [PATCH 01/19] Renamed to parameters communication_score and cci_score

---
 cell2cell/analysis/pipelines.py           | 111 ++++++++++------------
 cell2cell/core/interaction_space.py       |  12 +--
 cell2cell/preprocessing/integrate_data.py |  44 +++------
 3 files changed, 73 insertions(+), 94 deletions(-)

diff --git a/cell2cell/analysis/pipelines.py b/cell2cell/analysis/pipelines.py
index 804984a..02f4be7 100644
--- a/cell2cell/analysis/pipelines.py
+++ b/cell2cell/analysis/pipelines.py
@@ -13,7 +13,7 @@
 
 
 def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_setup, analysis_setup, excluded_cells=None,
-                  colors=None, use_ppi_score=False, filename_suffix='',verbose=True):
+                  colors=None, metric='bray_curtis', use_ppi_score=False, filename_suffix='',verbose=True):
 
     if excluded_cells is None:
         excluded_cells = []
@@ -25,8 +25,8 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
     interaction_space = ispace.InteractionSpace(rnaseq_data=rnaseq_data,
                                                 ppi_data=bi_ppi_data,
                                                 gene_cutoffs=cutoff_setup,
-                                                score_type=analysis_setup['score_type'],
-                                                score_metric=analysis_setup['score_metric'],
+                                                communication_score=analysis_setup['communication_score'],
+                                                cci_score=analysis_setup['cci_score'],
                                                 verbose=verbose)
 
     interaction_space.compute_pairwise_interactions(use_ppi_score=use_ppi_score,
@@ -62,10 +62,6 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
         use_ppi_score=use_ppi_score
     )
 
-    if use_ppi_score:
-        metric = 'cosine'
-    else:
-        metric = 'jaccard'
     interaction_clustermap = plotting.clustermap_cell_pairs_vs_ppi(interaction_matrix,
                                                                    metric=metric,
                                                                    metadata=metadata,
@@ -74,11 +70,8 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
                                                                    colors=colors,
                                                                    excluded_cells=excluded_cells,
                                                                    title='Active ligand-receptor pairs for interacting cells',
-                                                                   filename=files[
-                                                                                'output_folder'] + 'CCI-Active-LR-pairs{}.png'.format(filename_suffix),
-                                                                   **{'figsize': (20, 40),
-                                                                      'vmin' : 0.0,
-                                                                      'vmax': 1.0}
+                                                                   filename=files['output_folder'] + 'CCI-Active-LR-pairs{}.png'.format(filename_suffix),
+                                                                   **{'figsize': (20, 40)}
                                                                    )
 
     interaction_clustermap.data2d.to_csv(files['output_folder'] + 'CCI-Active-LR-pairs{}.csv'.format(filename_suffix))
@@ -92,8 +85,53 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
     return outputs
 
 
+def ligand_receptor_pipeline(files, rnaseq_setup, ppi_setup, meta_setup, cutoff_setup, analysis_setup, excluded_cells=None,
+                             colors=None,  use_ppi_score=False, filename_suffix='', verbose=True):
+    if excluded_cells is None:
+        excluded_cells = []
+
+    # Check for output directory
+    if not os.path.exists(files['output_folder']):
+        os.makedirs(files['output_folder'])
+
+    # Load Data
+    rnaseq_data = read_data.load_rnaseq(rnaseq_file=files['rnaseq'],
+                                        gene_column=rnaseq_setup['gene_col'],
+                                        drop_nangenes=rnaseq_setup['drop_nangenes'],
+                                        log_transformation=rnaseq_setup['log_transform'],
+                                        format='auto',
+                                        verbose=verbose)
+
+    ppi_data = read_data.load_ppi(ppi_file=files['ppi'],
+                                  interaction_columns=ppi_setup['protein_cols'],
+                                  rnaseq_genes=list(rnaseq_data.index),
+                                  format='auto',
+                                  verbose=verbose)
+
+    meta = read_data.load_metadata(metadata_file=files['metadata'],
+                                   rnaseq_data=rnaseq_data,
+                                   sample_col=meta_setup['sample_col'],
+                                   format='auto',
+                                   verbose=verbose)
+
+    # Run Analysis
+    outputs = core_pipeline(files=files,
+                            rnaseq_data=rnaseq_data,
+                            ppi_data=ppi_data,
+                            metadata=meta,
+                            meta_setup=meta_setup,
+                            cutoff_setup=cutoff_setup,
+                            analysis_setup=analysis_setup,
+                            excluded_cells=excluded_cells,
+                            colors=colors,
+                            use_ppi_score=use_ppi_score,
+                            filename_suffix=filename_suffix,
+                            verbose=verbose)
+    return outputs
+
+
 def heuristic_pipeline(files, rnaseq_setup, ppi_setup, meta_setup, cutoff_setup, go_setup, analysis_setup,
-                       contact_go_terms = None, mediator_go_terms = None, interaction_type='combined',
+                       contact_go_terms = None, mediator_go_terms = None, interaction_type='mediated',
                        excluded_cells=None, colors=None,  use_ppi_score=False, filename_suffix='', verbose=None):
     '''
     This function performs the analysis with the default list of GO terms to filter the proteins in the PPI network.
@@ -134,7 +172,7 @@ def heuristic_pipeline(files, rnaseq_setup, ppi_setup, meta_setup, cutoff_setup,
 
     Returns
     -------
-    subsampling_space : cell2cell.core.SubsamplingSpace
+    subsampling_space : cell2cell.core.SpatialCCI
 
 
     ppi_dict : dict
@@ -221,49 +259,4 @@ def heuristic_pipeline(files, rnaseq_setup, ppi_setup, meta_setup, cutoff_setup,
                             use_ppi_score=use_ppi_score,
                             filename_suffix=filename_suffix,
                             verbose=verbose)
-    return outputs
-
-
-def ligand_receptor_pipeline(files, rnaseq_setup, ppi_setup, meta_setup, cutoff_setup, analysis_setup, excluded_cells=None,
-                             colors=None,  use_ppi_score=False, filename_suffix='', verbose=True):
-    if excluded_cells is None:
-        excluded_cells = []
-
-    # Check for output directory
-    if not os.path.exists(files['output_folder']):
-        os.makedirs(files['output_folder'])
-
-    # Load Data
-    rnaseq_data = read_data.load_rnaseq(rnaseq_file=files['rnaseq'],
-                                        gene_column=rnaseq_setup['gene_col'],
-                                        drop_nangenes=rnaseq_setup['drop_nangenes'],
-                                        log_transformation=rnaseq_setup['log_transform'],
-                                        format='auto',
-                                        verbose=verbose)
-
-    ppi_data = read_data.load_ppi(ppi_file=files['ppi'],
-                                  interaction_columns=ppi_setup['protein_cols'],
-                                  rnaseq_genes=list(rnaseq_data.index),
-                                  format='auto',
-                                  verbose=verbose)
-
-    meta = read_data.load_metadata(metadata_file=files['metadata'],
-                                   rnaseq_data=rnaseq_data,
-                                   sample_col=meta_setup['sample_col'],
-                                   format='auto',
-                                   verbose=verbose)
-
-    # Run Analysis
-    outputs = core_pipeline(files=files,
-                            rnaseq_data=rnaseq_data,
-                            ppi_data=ppi_data,
-                            metadata=meta,
-                            meta_setup=meta_setup,
-                            cutoff_setup=cutoff_setup,
-                            analysis_setup=analysis_setup,
-                            excluded_cells=excluded_cells,
-                            colors=colors,
-                            use_ppi_score=use_ppi_score,
-                            filename_suffix=filename_suffix,
-                            verbose=verbose)
     return outputs
\ No newline at end of file
diff --git a/cell2cell/core/interaction_space.py b/cell2cell/core/interaction_space.py
index 14a5dae..9dd28d7 100644
--- a/cell2cell/core/interaction_space.py
+++ b/cell2cell/core/interaction_space.py
@@ -84,12 +84,12 @@ class InteractionSpace():
 
     '''
 
-    def __init__(self, rnaseq_data, ppi_data, gene_cutoffs, score_type='binary', score_metric='bray_curtis',
-                 cci_matrix_template=None, verbose=True):
+    def __init__(self, rnaseq_data, ppi_data, gene_cutoffs, communication_score='expression_thresholding',
+                 cci_score='bray_curtis', cci_matrix_template=None, verbose=True):
 
-        self.score_type = score_type
-        self.score_metric = score_metric
-        if self.score_type == 'binary':
+        self.communication_score = communication_score
+        self.score_metric = cci_score
+        if self.communication_score == 'expression_thresholding':
             if 'type' in gene_cutoffs.keys():
                 cutoff_values = cutoffs.get_cutoffs(rnaseq_data=rnaseq_data,
                                                     parameters=gene_cutoffs,
@@ -102,7 +102,7 @@ def __init__(self, rnaseq_data, ppi_data, gene_cutoffs, score_type='binary', sco
         self.ppi_data = ppi_data.copy()
 
         self.modified_rnaseq = integrate_data.get_modified_rnaseq(rnaseq_data=rnaseq_data,
-                                                                  score_type=self.score_type,
+                                                                  communication_score=self.communication_score,
                                                                   cutoffs=cutoff_values
                                                                   )
 
diff --git a/cell2cell/preprocessing/integrate_data.py b/cell2cell/preprocessing/integrate_data.py
index 7f14e63..8953acf 100644
--- a/cell2cell/preprocessing/integrate_data.py
+++ b/cell2cell/preprocessing/integrate_data.py
@@ -5,19 +5,19 @@
 from cell2cell.preprocessing import ppi, gene_ontology, rnaseq
 
 ## RNAseq datasets
-def get_modified_rnaseq(rnaseq_data, score_type='binary', **kwargs):
-    if score_type == 'binary':
-        modified_rnaseq = get_binary_rnaseq(rnaseq_data, kwargs['cutoffs'])
-    elif score_type == 'absolute':
+def get_modified_rnaseq(rnaseq_data, communication_score='expression_thresholding', **kwargs):
+    if communication_score == 'expression_thresholding':
+        modified_rnaseq = get_thresholded_rnaseq(rnaseq_data, kwargs['cutoffs'])
+    elif communication_score == 'absolute':
         #modified_rnaseq = rnaseq.divide_expression_by_max(rnaseq_data)
         modified_rnaseq = rnaseq_data.copy()
     else:
         # As other score types are implemented, other elif condition will be included here.
-        raise NotImplementedError("Score type {} to compute pairwise cell-interactions is not implemented".format(score_type))
+        raise NotImplementedError("Score type {} to compute pairwise cell-interactions is not implemented".format(communication_score))
     return modified_rnaseq
 
 
-def get_binary_rnaseq(rnaseq_data, cutoffs):
+def get_thresholded_rnaseq(rnaseq_data, cutoffs):
     binary_rnaseq_data = rnaseq_data.copy()
     columns = list(cutoffs.columns)
     if (len(columns) == 1) and ('value' in columns):
@@ -51,29 +51,15 @@ def get_ppi_dict_from_proteins(ppi_data, contact_proteins, mediator_proteins=Non
                                                   bidirectional=bidirectional,
                                                   verbose=verbose)
     if mediator_proteins is not None:
-        ppi_dict['mediated'] = ppi.filter_ppi_network(ppi_data=ppi_data,
-                                                      contact_proteins=contact_proteins,
-                                                      mediator_proteins=mediator_proteins,
-                                                      interaction_type='mediated',
-                                                      interaction_columns=interaction_columns,
-                                                      bidirectional=bidirectional,
-                                                      verbose=verbose)
-
-        ppi_dict['combined'] = ppi.filter_ppi_network(ppi_data=ppi_data,
-                                                      contact_proteins=contact_proteins,
-                                                      mediator_proteins=mediator_proteins,
-                                                      interaction_type='combined',
-                                                      interaction_columns=interaction_columns,
-                                                      bidirectional=bidirectional,
-                                                      verbose=verbose)
-
-        ppi_dict['complete'] = ppi.filter_ppi_network(ppi_data=ppi_data,
-                                                      contact_proteins=contact_proteins,
-                                                      mediator_proteins=mediator_proteins,
-                                                      interaction_type='complete',
-                                                      interaction_columns=interaction_columns,
-                                                      bidirectional=bidirectional,
-                                                      verbose=verbose)
+        for interaction_type in ['mediated', 'combined', 'complete']:
+            ppi_dict[interaction_type] = ppi.filter_ppi_network(ppi_data=ppi_data,
+                                                          contact_proteins=contact_proteins,
+                                                          mediator_proteins=mediator_proteins,
+                                                          interaction_type=interaction_type,
+                                                          interaction_columns=interaction_columns,
+                                                          bidirectional=bidirectional,
+                                                          verbose=verbose)
+
     return ppi_dict
 
 

From 93e434d421e9a24d6c9220e6103d65964801ba8d Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Thu, 16 Apr 2020 13:06:30 -0700
Subject: [PATCH 02/19] Added CCI scores from matrix multiplication (all cell
 pairs simultaneously)

---
 cell2cell/core/cci_scores.py | 96 ++++++++++++++++++++++++++++++++++--
 1 file changed, 92 insertions(+), 4 deletions(-)

diff --git a/cell2cell/core/cci_scores.py b/cell2cell/core/cci_scores.py
index a8db656..e733c88 100644
--- a/cell2cell/core/cci_scores.py
+++ b/cell2cell/core/cci_scores.py
@@ -3,7 +3,7 @@
 from __future__ import absolute_import
 
 import numpy as np
-import math
+
 
 def compute_jaccard_like_cci_score(cell1, cell2, ppi_score=None):
     '''
@@ -87,7 +87,7 @@ def compute_braycurtis_like_cci_score(cell1, cell2, ppi_score=None):
     return cci_score
 
 
-def compute_dot_product_score(cell1, cell2, ppi_score=None):
+def compute_count_score(cell1, cell2, ppi_score=None):
     '''
     Function that calculates an dot score for the interaction between two cells based on the
     interactions of their proteins with the proteins of the other cell.
@@ -114,8 +114,96 @@ def compute_dot_product_score(cell1, cell2, ppi_score=None):
     if ppi_score is None:
         ppi_score = np.array([1.0] * len(c1))
 
-    cci_score = np.nansum(c1 * c2 * ppi_score)
+    mult = c1 * c2 * ppi_score
+    cci_score = np.nansum(mult != 0) # Count all active pathways (different to zero)
 
     if cci_score is np.nan:
         return 0.0
-    return cci_score
\ No newline at end of file
+    return cci_score
+
+
+def matmul_jaccard_like(A_scores, B_scores, ppi_score=None):
+    '''All cell-pair scores from two matrices. A_scores contains the communication scores for the partners on the left
+     column of the PPI network and B_score contains the same but for the partners in the right column.
+
+     Parameters
+     __________
+     A_scores : array
+        Matrix of size NxM, where N is the number of PPIs and M is the number of individual cells.
+
+    B_scores : array
+        Matrix of size NxM, where N is the number of PPIs and M is the number of individual cells.
+
+    Returns
+    -------
+    jaccard : array
+        Matrix MxM, representing the CCI score for all cell pairs
+    '''
+    if ppi_score is None:
+        ppi_score = np.array([1.0] * A_scores.shape[0])
+    ppi_score = ppi_score.reshape((len(ppi_score), 1))
+
+    numerator = np.matmul(np.multiply(A_scores, ppi_score).transpose(), B_scores)
+
+    A_module = np.sum(np.multiply(np.multiply(A_scores, A_scores), ppi_score), axis=0)
+    B_module = np.sum(np.multiply(np.multiply(B_scores, B_scores), ppi_score), axis=0)
+    denominator = A_module.reshape((A_module.shape[0], 1)) + B_module - numerator
+
+    jaccard = np.divide(numerator, denominator)
+    return jaccard
+
+
+def matmul_bray_curtis_like(A_scores, B_scores, ppi_score=None):
+    '''All cell-pair scores from two matrices. A_scores contains the communication scores for the partners on the left
+     column of the PPI network and B_score contains the same but for the partners in the right column.
+
+     Parameters
+     __________
+     A_scores : array
+        Matrix of size NxM, where N is the number of PPIs and M is the number of individual cells.
+
+    B_scores : array
+        Matrix of size NxM, where N is the number of PPIs and M is the number of individual cells.
+
+    Returns
+    -------
+    bray_curtis : array
+        Matrix MxM, representing the CCI score for all cell pairs
+    '''
+    if ppi_score is None:
+        ppi_score = np.array([1.0] * A_scores.shape[0])
+    ppi_score = ppi_score.reshape((len(ppi_score), 1))
+
+    numerator = np.matmul(np.multiply(A_scores, ppi_score).transpose(), B_scores)
+
+    A_module = np.sum(np.multiply(np.multiply(A_scores, A_scores), ppi_score), axis=0)
+    B_module = np.sum(np.multiply(np.multiply(B_scores, B_scores), ppi_score), axis=0)
+    denominator = A_module.reshape((A_module.shape[0], 1)) + B_module
+
+    bray_curtis = np.divide(2.0*numerator, denominator)
+    return bray_curtis
+
+
+def matmul_count_active(A_scores, B_scores, ppi_score=None):
+    '''All cell-pair scores from two matrices. A_scores contains the communication scores for the partners on the left
+     column of the PPI network and B_score contains the same but for the partners in the right column.
+
+     Parameters
+     __________
+     A_scores : array
+        Matrix of size NxM, where N is the number of PPIs and M is the number of individual cells.
+
+    B_scores : array
+        Matrix of size NxM, where N is the number of PPIs and M is the number of individual cells.
+
+    Returns
+    -------
+    counts : array
+        Matrix MxM, representing the CCI score for all cell pairs
+    '''
+    if ppi_score is None:
+        ppi_score = np.array([1.0] * A_scores.shape[0])
+    ppi_score = ppi_score.reshape((len(ppi_score), 1))
+
+    counts = np.matmul(np.multiply(A_scores, ppi_score).transpose(), B_scores)
+    return counts
\ No newline at end of file

From e5ac1f48ddbb570ccd056fb838ace4d019ca77ff Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Thu, 16 Apr 2020 13:07:27 -0700
Subject: [PATCH 03/19] Added new methods

---
 cell2cell/core/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cell2cell/core/__init__.py b/cell2cell/core/__init__.py
index acad71a..190f963 100644
--- a/cell2cell/core/__init__.py
+++ b/cell2cell/core/__init__.py
@@ -2,7 +2,8 @@
 
 from __future__ import absolute_import
 
-from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_dot_product_score, compute_jaccard_like_cci_score)
+from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_jaccard_like_cci_score,
+                                       matmul_bray_curtis_like, matmul_count_active, matmul_jaccard_like)
 from cell2cell.core.cell import (Cell, get_cells_from_rnaseq)
 from cell2cell.core.interaction_space import (generate_interaction_elements, InteractionSpace)
-from cell2cell.core.subsampling import (subsampling_operation, SubsamplingSpace)
\ No newline at end of file
+from cell2cell.core.spatial_interactions import (SpatialCCI)
\ No newline at end of file

From 723750108607aac58c286837609538841a2c67c3 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Thu, 16 Apr 2020 13:07:48 -0700
Subject: [PATCH 04/19] Fixed PPI network template for cells

---
 cell2cell/core/cell.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cell2cell/core/cell.py b/cell2cell/core/cell.py
index ad25191..60eb395 100644
--- a/cell2cell/core/cell.py
+++ b/cell2cell/core/cell.py
@@ -45,9 +45,7 @@ def __init__(self, sc_rnaseq_data, verbose=True):
         self.rnaseq_data.columns = ['value']
 
         # Binary ppi datasets
-        self.weighted_ppi = pd.DataFrame(columns=['A','score'])
-        # 'B' removed after including bidirectional interactions to use only one column
-        # self.weighted_ppi = pd.DataFrame(columns=['A', 'B', 'score'])
+        self.weighted_ppi = pd.DataFrame(columns=['A', 'B', 'score'])
 
         # Object created
         if verbose:

From fe6f89a2fe03d2f6e1c9caef5249fa20bbcf2382 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:38:28 -0700
Subject: [PATCH 05/19] Added communication scores

---
 cell2cell/core/communication_scores.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 cell2cell/core/communication_scores.py

diff --git a/cell2cell/core/communication_scores.py b/cell2cell/core/communication_scores.py
new file mode 100644
index 0000000..ffaf4fb
--- /dev/null
+++ b/cell2cell/core/communication_scores.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import numpy as np
+
+
+def get_binary_scores(cell1, cell2, ppi_score=None):
+    c1 = cell1.weighted_ppi['A'].values
+    c2 = cell2.weighted_ppi['B'].values
+
+    if (len(c1) == 0) or (len(c2) == 0):
+        return 0.0
+
+    if ppi_score is None:
+        ppi_score = np.array([1.0] * len(c1))
+
+    communication_scores = c1 * c2 * ppi_score
+    return communication_scores
+
+
+def get_continuous_scores(cell1, cell2, ppi_score=None):
+    raise ValueError("Continuous communication scores not implemented yet")
\ No newline at end of file

From 4fe5a8b533d6f128e15661a59cfe4598e43090f6 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:38:58 -0700
Subject: [PATCH 06/19] Future new features

---
 cell2cell/analysis/pipelines.py        |  58 +++++++------
 cell2cell/core/spatial_interactions.py |  20 +++++
 cell2cell/core/subsampling.py          | 116 -------------------------
 3 files changed, 50 insertions(+), 144 deletions(-)
 create mode 100644 cell2cell/core/spatial_interactions.py
 delete mode 100644 cell2cell/core/subsampling.py

diff --git a/cell2cell/analysis/pipelines.py b/cell2cell/analysis/pipelines.py
index 02f4be7..519c57a 100644
--- a/cell2cell/analysis/pipelines.py
+++ b/cell2cell/analysis/pipelines.py
@@ -4,7 +4,6 @@
 
 import os
 
-from cell2cell.analysis import ppi_enrichment
 from cell2cell.core import interaction_space as ispace
 from cell2cell.datasets import heuristic_data
 from cell2cell.io import read_data
@@ -18,8 +17,12 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
     if excluded_cells is None:
         excluded_cells = []
 
-    bi_ppi_data = ppi.bidirectional_ppi_for_cci(ppi_data=ppi_data,
-                                                verbose=verbose)
+    if analysis_setup['cci_type'] == 'undirected':
+        bi_ppi_data = ppi.bidirectional_ppi_for_cci(ppi_data=ppi_data, verbose=False)
+        ref_ppi = ppi_data
+    else:
+        bi_ppi_data = ppi_data.copy()
+        ref_ppi = None
 
 
     interaction_space = ispace.InteractionSpace(rnaseq_data=rnaseq_data,
@@ -27,10 +30,15 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
                                                 gene_cutoffs=cutoff_setup,
                                                 communication_score=analysis_setup['communication_score'],
                                                 cci_score=analysis_setup['cci_score'],
+                                                cci_type=analysis_setup['cci_type'],
                                                 verbose=verbose)
 
-    interaction_space.compute_pairwise_interactions(use_ppi_score=use_ppi_score,
-                                                    verbose=verbose)
+    interaction_space.compute_pairwise_cci_scores(use_ppi_score=use_ppi_score,
+                                                  verbose=verbose)
+
+    interaction_space.compute_pairwise_communication_scores(ref_ppi_data=ref_ppi,
+                                                            use_ppi_score=use_ppi_score,
+                                                            verbose=verbose)
 
     clustermap = plotting.clustermap_cci(interaction_space,
                                          method='ward',
@@ -54,25 +62,17 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
                                 filename=files['output_folder'] + 'CCI-PCoA-CCI-scores{}.png'.format(filename_suffix),
                                 )
 
-    interaction_matrix, std_interaction_matrix = ppi_enrichment.get_ppi_score_for_cell_pairs(
-        cells=list(interaction_space.distance_matrix.columns),
-        subsampled_interactions=[interaction_space.interaction_elements],
-        ppi_data=bi_ppi_data,
-        ref_ppi_data=ppi_data,
-        use_ppi_score=use_ppi_score
-    )
-
-    interaction_clustermap = plotting.clustermap_cell_pairs_vs_ppi(interaction_matrix,
-                                                                   metric=metric,
-                                                                   metadata=metadata,
-                                                                   sample_col=meta_setup['sample_col'],
-                                                                   group_col=meta_setup['group_col'],
-                                                                   colors=colors,
-                                                                   excluded_cells=excluded_cells,
-                                                                   title='Active ligand-receptor pairs for interacting cells',
-                                                                   filename=files['output_folder'] + 'CCI-Active-LR-pairs{}.png'.format(filename_suffix),
-                                                                   **{'figsize': (20, 40)}
-                                                                   )
+    interaction_clustermap = plotting.clustermap_ccc(interaction_space,
+                                                     metric=metric,
+                                                     metadata=metadata,
+                                                     sample_col=meta_setup['sample_col'],
+                                                     group_col=meta_setup['group_col'],
+                                                     colors=colors,
+                                                     excluded_cells=excluded_cells,
+                                                     title='Active ligand-receptor pairs for interacting cells',
+                                                     filename=files['output_folder'] + 'CCI-Active-LR-pairs{}.png'.format(filename_suffix),
+                                                     **{'figsize': (20, 40)}
+                                                     )
 
     interaction_clustermap.data2d.to_csv(files['output_folder'] + 'CCI-Active-LR-pairs{}.csv'.format(filename_suffix))
 
@@ -91,8 +91,9 @@ def ligand_receptor_pipeline(files, rnaseq_setup, ppi_setup, meta_setup, cutoff_
         excluded_cells = []
 
     # Check for output directory
-    if not os.path.exists(files['output_folder']):
-        os.makedirs(files['output_folder'])
+    if 'output_folder' in files.keys():
+        if not os.path.exists(files['output_folder']):
+            os.makedirs(files['output_folder'])
 
     # Load Data
     rnaseq_data = read_data.load_rnaseq(rnaseq_file=files['rnaseq'],
@@ -183,8 +184,9 @@ def heuristic_pipeline(files, rnaseq_setup, ppi_setup, meta_setup, cutoff_setup,
         excluded_cells = []
 
     # Check for output directory
-    if not os.path.exists(files['output_folder']):
-        os.makedirs(files['output_folder'])
+    if 'output_folder' in files.keys():
+        if not os.path.exists(files['output_folder']):
+            os.makedirs(files['output_folder'])
 
     # Load Data
     rnaseq_data = read_data.load_rnaseq(rnaseq_file=files['rnaseq'],
diff --git a/cell2cell/core/spatial_interactions.py b/cell2cell/core/spatial_interactions.py
new file mode 100644
index 0000000..3594ae7
--- /dev/null
+++ b/cell2cell/core/spatial_interactions.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import numpy as np
+import pandas as pd
+import random
+
+from cell2cell.core import InteractionSpace
+from cell2cell.utils import parallel_computing
+from contextlib import closing
+from multiprocessing import Pool
+
+
+class SpatialCCI:
+
+    def __init__(self, rnaseq_data, ppi_data, gene_cutoffs, spatial_dict, score_type='binary', score_metric='bray_curtis',
+                 n_jobs=1, verbose=True):
+        pass
+        # TODO IMPLEMENT SPATIAL CCI
\ No newline at end of file
diff --git a/cell2cell/core/subsampling.py b/cell2cell/core/subsampling.py
deleted file mode 100644
index f227bff..0000000
--- a/cell2cell/core/subsampling.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import absolute_import
-
-import numpy as np
-import pandas as pd
-import random
-
-from cell2cell.clustering import compute_avg_cci_matrix
-from cell2cell.core import InteractionSpace
-from cell2cell.utils import parallel_computing
-from contextlib import closing
-from multiprocessing import Pool
-
-
-class SubsamplingSpace:
-
-    def __init__(self, rnaseq_data, ppi_data, gene_cutoffs, score_type='binary', score_metric='bray_curtis',
-                 subsampling_percentage=1.0, iterations=1, initial_seed=None, n_jobs=1, verbose=True):
-
-        if verbose:
-            print("Computing interactions by sub-sampling a"
-                  " {}% of cells in each of the {} iterations".format(int(100*subsampling_percentage), iterations))
-
-        self.cell_ids = list(rnaseq_data.columns)
-
-        data = np.empty((len(self.cell_ids), len(self.cell_ids)))
-        data[:] = np.nan
-        self.cci_matrix_template = pd.DataFrame(data,
-                                                columns=self.cell_ids,
-                                                index=self.cell_ids)
-
-        self.subsampled_interactions = self.subsampling_interactions(rnaseq_data=rnaseq_data,
-                                                                     ppi_data=ppi_data,
-                                                                     gene_cutoffs=gene_cutoffs,
-                                                                     score_type=score_type,
-                                                                     score_metric=score_metric,
-                                                                     subsampling_percentage=subsampling_percentage,
-                                                                     iterations=iterations,
-                                                                     initial_seed=initial_seed,
-                                                                     n_jobs=n_jobs,
-                                                                     verbose=verbose)
-
-        self.average_cci_matrix = compute_avg_cci_matrix(self)
-
-
-    def subsampling_interactions(self, rnaseq_data, ppi_data, gene_cutoffs, score_type='binary',
-                                 score_metric='bray_curtis', subsampling_percentage=1.0, iterations=1, initial_seed=None,
-                                 n_jobs=1, verbose=True):
-        '''
-        This function performs the sub-sampling method by generating a list of cells to consider in each iteration.
-        Then, for each list of cells a InteractionSpace is generated to perform the cell2cell analysis and return the
-        respective CCI Matrix and clusters.
-        '''
-        # Last position for sub-sampling when shuffling
-        last_item = int(len(self.cell_ids) * subsampling_percentage)
-        if last_item == 0:
-            last_item = 1
-
-        inputs = {'cells' : self.cell_ids,
-                  'list_end' : last_item,
-                  'rnaseq' : rnaseq_data,
-                  'ppi' : ppi_data,
-                  'cutoffs' : gene_cutoffs,
-                  'score_type' : score_type,
-                  'score_metric' : score_metric,
-                  'cci_matrix' : self.cci_matrix_template,
-                  'verbose' :verbose
-                  }
-
-        inputs_list = []
-        for i in range(iterations):
-            inputs_ = inputs.copy()
-            if initial_seed is not None:
-                inputs_['seed'] = initial_seed + i
-            else:
-                inputs_['seed'] = None
-            inputs_list.append(inputs_)
-
-        # Parallel computing
-        if n_jobs == 1:
-            results = list(map(parallel_computing.parallel_subsampling_interactions, inputs_list))
-        else:
-            agents = parallel_computing.agents_number(n_jobs)
-            chunksize = 1
-            with closing(Pool(processes=agents)) as pool:
-                results = pool.map(parallel_computing.parallel_subsampling_interactions, inputs_list, chunksize)
-        return results
-
-
-def subsampling_operation(cell_ids, last_item, rnaseq_data, ppi_data, gene_cutoffs, score_type='binary',
-                          score_metric='bray_curtis', cci_matrix_template=None, seed=None, verbose=True):
-    '''
-    Functional unit to perform parallel computing in Sub-sampling Space
-    '''
-    cell_ids_ = cell_ids.copy()
-    if seed is not None:
-        random.seed(seed)
-    random.shuffle(cell_ids_)
-    included_cells = cell_ids_[:last_item]
-
-    interaction_space = InteractionSpace(rnaseq_data=rnaseq_data[included_cells],
-                                         ppi_data=ppi_data,
-                                         gene_cutoffs=gene_cutoffs,
-                                         score_type=score_type,
-                                         cci_matrix_template=cci_matrix_template,
-                                         verbose=verbose)
-
-    interaction_space.compute_pairwise_interactions(score_metric=score_metric,
-                                                    verbose=verbose)
-
-    interaction_elements = dict()
-    interaction_elements['cells'] = included_cells
-    interaction_elements['cci_matrix'] = interaction_space.interaction_elements['cci_matrix']
-    interaction_elements['ppis'] = interaction_space.interaction_elements['ppis']
-    return interaction_elements
\ No newline at end of file

From 35db56f785dbd648894cada17dfd4e87ec9354ea Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:39:25 -0700
Subject: [PATCH 07/19] New format of scores for CCIs and CCC

---
 cell2cell/core/cci_scores.py        |  33 ++++-
 cell2cell/core/interaction_space.py | 191 +++++++++++++++++++++-------
 2 files changed, 179 insertions(+), 45 deletions(-)

diff --git a/cell2cell/core/cci_scores.py b/cell2cell/core/cci_scores.py
index e733c88..23227e0 100644
--- a/cell2cell/core/cci_scores.py
+++ b/cell2cell/core/cci_scores.py
@@ -206,4 +206,35 @@ def matmul_count_active(A_scores, B_scores, ppi_score=None):
     ppi_score = ppi_score.reshape((len(ppi_score), 1))
 
     counts = np.matmul(np.multiply(A_scores, ppi_score).transpose(), B_scores)
-    return counts
\ No newline at end of file
+    return counts
+
+
+def matmul_cosine(A_scores, B_scores, ppi_score=None):
+    '''All cell-pair scores from two matrices. A_scores contains the communication scores for the partners on the left
+     column of the PPI network and B_score contains the same but for the partners in the right column.
+
+     Parameters
+     __________
+     A_scores : array
+        Matrix of size NxM, where N is the number of PPIs and M is the number of individual cells.
+
+    B_scores : array
+        Matrix of size NxM, where N is the number of PPIs and M is the number of individual cells.
+
+    Returns
+    -------
+    counts : array
+        Matrix MxM, representing the CCI score for all cell pairs
+    '''
+    if ppi_score is None:
+        ppi_score = np.array([1.0] * A_scores.shape[0])
+    ppi_score = ppi_score.reshape((len(ppi_score), 1))
+
+    numerator = np.matmul(np.multiply(A_scores, ppi_score).transpose(), B_scores)
+
+    A_module = np.sum(np.multiply(np.multiply(A_scores, A_scores), ppi_score), axis=0) ** 0.5
+    B_module = np.sum(np.multiply(np.multiply(B_scores, B_scores), ppi_score), axis=0) ** 0.5
+    denominator = A_module.reshape((A_module.shape[0], 1)) * B_module
+
+    cosine = np.divide(numerator, denominator)
+    return cosine
\ No newline at end of file
diff --git a/cell2cell/core/interaction_space.py b/cell2cell/core/interaction_space.py
index 9dd28d7..d87d01a 100644
--- a/cell2cell/core/interaction_space.py
+++ b/cell2cell/core/interaction_space.py
@@ -3,14 +3,25 @@
 from __future__ import absolute_import
 
 from cell2cell.preprocessing import integrate_data, cutoffs
-from cell2cell.core import cell, cci_scores
+from cell2cell.core import cell, cci_scores, communication_scores
 
 import itertools
 import numpy as np
 import pandas as pd
 
 
-def generate_interaction_elements(modified_rnaseq, ppi_data, cci_matrix_template=None, verbose=True):
+def generate_pairs(cells, cci_type):
+    if cci_type == 'directed':
+        pairs = list(itertools.combinations(cells + cells, 2)) # Directed
+    elif cci_type == 'undirected':
+        pairs = list(itertools.combinations(cells, 2)) + [(c, c) for c in cells] # Undirected
+    else:
+        raise NotImplementedError("CCI type has to be directed or undirected")
+    pairs = list(set(pairs))  # Remove duplicates
+    return pairs
+
+
+def generate_interaction_elements(modified_rnaseq, ppi_data, cci_type='undirected', cci_matrix_template=None, verbose=True):
     '''Create all elements of pairwise interactions needed to perform the analyses.
     All these variables are used by the class InteractionSpace.
 
@@ -36,38 +47,32 @@ def generate_interaction_elements(modified_rnaseq, ppi_data, cci_matrix_template
     cell_number = len(cell_instances)
 
     # Generate pairwise interactions
-    # pairwise_interactions = list(itertools.combinations(cell_instances + cell_instances, 2))
-    pairwise_interactions = list(itertools.combinations(cell_instances, 2)) + [(c, c) for c in cell_instances]
-    pairwise_interactions = list(set(pairwise_interactions))  # Remove duplicates
+    pairwise_interactions = generate_pairs(cell_instances, cci_type)
 
     # Interaction elements
-    interaction_space = {}
-    interaction_space['pairs'] = pairwise_interactions
-    interaction_space['cells'] = cell.get_cells_from_rnaseq(modified_rnaseq, verbose=verbose)
+    interaction_elements = {}
+    interaction_elements['cell_names'] = cell_instances
+    interaction_elements['pairs'] = pairwise_interactions
+    interaction_elements['cells'] = cell.get_cells_from_rnaseq(modified_rnaseq, verbose=verbose)
 
     # Cell-specific binary ppi
-    for cell_instance in interaction_space['cells'].values():
+    interaction_elements['A_score'] = np.array([], dtype=np.int64)#.reshape(ppi_data.shape[0],0)
+    interaction_elements['B_score'] = np.array([], dtype=np.int64)#.reshape(ppi_data.shape[0],0)
+    for cell_instance in interaction_elements['cells'].values():
         cell_instance.weighted_ppi = integrate_data.get_weighted_ppi(ppi_data=ppi_data,
                                                                      modified_rnaseq_data=cell_instance.rnaseq_data,
                                                                      column='value')
+        interaction_elements['A_score'] = np.hstack([interaction_elements['A_score'], cell_instance.weighted_ppi['A'].values])
+        interaction_elements['B_score'] = np.hstack([interaction_elements['B_score'], cell_instance.weighted_ppi['B'].values])
 
     # Cell-cell interaction matrix
     if cci_matrix_template is None:
-        interaction_space['cci_matrix'] = pd.DataFrame(np.zeros((cell_number, cell_number)),
+        interaction_elements['cci_matrix'] = pd.DataFrame(np.zeros((cell_number, cell_number)),
                                                        columns=cell_instances,
                                                        index=cell_instances)
     else:
-        interaction_space['cci_matrix'] = cci_matrix_template
-
-    # PPIs for each cell
-    empty_ppi = np.empty(ppi_data.shape)
-    empty_ppi[:] = np.nan
-    interaction_space['ppis'] = dict.fromkeys(list(interaction_space['cells'].values()),
-                                              pd.DataFrame(empty_ppi, columns=['A', 'B', 'score']))
-
-    for name, cell_unit in interaction_space['cells'].items():
-        interaction_space['ppis'][name] = cell_unit.weighted_ppi
-    return interaction_space
+        interaction_elements['cci_matrix'] = cci_matrix_template
+    return interaction_elements
 
 
 class InteractionSpace():
@@ -85,10 +90,12 @@ class InteractionSpace():
     '''
 
     def __init__(self, rnaseq_data, ppi_data, gene_cutoffs, communication_score='expression_thresholding',
-                 cci_score='bray_curtis', cci_matrix_template=None, verbose=True):
+                 cci_score='bray_curtis', cci_type='undirected', cci_matrix_template=None, verbose=True):
 
         self.communication_score = communication_score
-        self.score_metric = cci_score
+        self.cci_score = cci_score
+        self.cci_type = cci_type
+
         if self.communication_score == 'expression_thresholding':
             if 'type' in gene_cutoffs.keys():
                 cutoff_values = cutoffs.get_cutoffs(rnaseq_data=rnaseq_data,
@@ -113,7 +120,7 @@ def __init__(self, rnaseq_data, ppi_data, gene_cutoffs, communication_score='exp
 
         self.interaction_elements['ppi_score'] = self.ppi_data['score'].values
 
-    def pairwise_interaction(self, cell1, cell2, score_metric='bray_curtis', use_ppi_score=False, verbose=True):
+    def pair_cci_score(self, cell1, cell2, cci_score='bray_curtis', use_ppi_score=False, verbose=True):
         '''
         Function that performs the interaction analysis of a pair of cells.
 
@@ -139,22 +146,24 @@ def pairwise_interaction(self, cell1, cell2, score_metric='bray_curtis', use_ppi
         '''
 
         if verbose:
-            print("Computing interaction between {} and {}".format(cell1.type, cell2.type))
+            print("Computing interaction score between {} and {}".format(cell1.type, cell2.type))
 
         if use_ppi_score:
             ppi_score = self.ppi_data['score'].values
         else:
             ppi_score = None
         # Calculate cell-cell interaction score
-        if score_metric == 'bray_curtis':
-            cci_score = cci_scores.compute_braycurtis_like_cci_score(cell1, cell2, ppi_score=ppi_score)
-        elif score_metric == 'jaccard':
-            cci_score = cci_scores.compute_jaccard_like_cci_score(cell1, cell2, ppi_score=ppi_score)
+        if cci_score == 'bray_curtis':
+            cci_value = cci_scores.compute_braycurtis_like_cci_score(cell1, cell2, ppi_score=ppi_score)
+        elif cci_score == 'jaccard':
+            cci_value = cci_scores.compute_jaccard_like_cci_score(cell1, cell2, ppi_score=ppi_score)
+        elif cci_score == 'count':
+            cci_value = cci_scores.compute_count_score(cell1, cell2, ppi_score=ppi_score)
         else:
-            raise NotImplementedError("Score metric {} to compute pairwise cell-interactions is not implemented".format(score_metric))
-        return cci_score
+            raise NotImplementedError("CCI score {} to compute pairwise cell-interactions is not implemented".format(cci_score))
+        return cci_value
 
-    def compute_pairwise_interactions(self, score_metric=None, use_ppi_score=False, verbose=True):
+    def compute_pairwise_cci_scores(self, cci_score=None, use_ppi_score=False, verbose=True):
         '''
         Function that computes...
 
@@ -167,26 +176,120 @@ def compute_pairwise_interactions(self, score_metric=None, use_ppi_score=False,
 
 
         '''
-        if score_metric is None:
-            score_metric = self.score_metric
+        if cci_score is None:
+            cci_score = self.cci_score
         else:
-            assert isinstance(score_metric, str)
+            assert isinstance(cci_score, str)
 
         ### Compute pairwise physical interactions
         if verbose:
             print("Computing pairwise interactions")
 
+        # Compute pair by pair
         for pair in self.interaction_elements['pairs']:
             cell1 = self.interaction_elements['cells'][pair[0]]
             cell2 = self.interaction_elements['cells'][pair[1]]
-            cci_score = self.pairwise_interaction(cell1,
-                                                  cell2,
-                                                  score_metric=score_metric,
-                                                  use_ppi_score=use_ppi_score,
-                                                  verbose=verbose)
-            self.interaction_elements['cci_matrix'].loc[pair[0], pair[1]] = cci_score
-            self.interaction_elements['cci_matrix'].loc[pair[1], pair[0]] = cci_score
+            cci_value = self.pair_cci_score(cell1,
+                                            cell2,
+                                            cci_score=cci_score,
+                                            use_ppi_score=use_ppi_score,
+                                            verbose=verbose)
+            self.interaction_elements['cci_matrix'].loc[pair[0], pair[1]] = cci_value
+            if self.cci_type == 'undirected':
+                self.interaction_elements['cci_matrix'].loc[pair[1], pair[0]] = cci_value
+
+        # Compute using matmul -> Too slow and uses a lot of memory TODO: Try to optimize this
+        # if cci_score == 'bray_curtis':
+        #     cci_matrix = cci_scores.matmul_bray_curtis_like(self.interaction_elements['A_score'],
+        #                                                     self.interaction_elements['B_score'])
+        # self.interaction_elements['cci_matrix'] = pd.DataFrame(cci_matrix,
+        #                                                        index=self.interaction_elements['cell_names'],
+        #                                                        columns=self.interaction_elements['cell_names']
+        #                                                        )
 
         # Generate distance matrix
-        self.distance_matrix = self.interaction_elements['cci_matrix'].apply(lambda x: 1 - x)
-        np.fill_diagonal(self.distance_matrix.values, 0.0)  # Make diagonal zero (delete autocrine-interactions)
\ No newline at end of file
+        if cci_score != 'count':
+            self.distance_matrix = self.interaction_elements['cci_matrix'].apply(lambda x: 1 - x)
+        else:
+            self.distance_matrix = self.interaction_elements['cci_matrix'].div(self.interaction_elements['cci_matrix'].max().max()).apply(lambda x: 1 - x)
+        np.fill_diagonal(self.distance_matrix.values, 0.0)  # Make diagonal zero (delete autocrine-interactions)
+
+    def pair_communication_score(self, cell1, cell2, communication_score='expression_thresholding',
+                                 use_ppi_score=False, verbose=True):
+        # TODO: Implement communication scores
+        if verbose:
+            print("Computing communication score between {} and {}".format(cell1.type, cell2.type))
+
+        # Check that new score is the same type as score used to build interaction space (binary or continuous)
+        if (communication_score in ['expression_product', 'coexpression']) \
+                & (self.communication_score in ['expression_thresholding', 'differential_combinations']):
+            raise ValueError('Cannot use {} for this interaction space'.format(communication_score))
+        if (communication_score in ['expression_thresholding', 'differential_combinations']) \
+                & (self.communication_score in ['expression_product', 'expression_correlation']):
+            raise ValueError('Cannot use {} for this interaction space'.format(communication_score))
+
+        if use_ppi_score:
+            ppi_score = self.ppi_data['score'].values
+        else:
+            ppi_score = None
+
+        if (communication_score == 'expression_thresholding') | (communication_score == 'differential_combinations'):
+            communication_value = communication_scores.get_binary_scores(cell1, cell2, ppi_score=ppi_score)
+        elif (communication_score == 'expression_product') | (communication_score == 'expression_correlation'):
+              communication_value = communication_scores.get_continuous_scores(cell1, cell2, ppi_score=ppi_score)
+        else:
+            raise NotImplementedError(
+                "Communication score {} to compute pairwise cell-communication is not implemented".format(communication_score))
+        return communication_value
+
+    def compute_pairwise_communication_scores(self, communication_score=None, use_ppi_score=False, ref_ppi_data=None,
+                                              cells=None, verbose=True):
+        '''
+        Function that computes...
+
+        Parameters
+        ----------
+
+
+        Returns
+        -------
+
+
+        '''
+        if communication_score is None:
+            communication_score = self.communication_score
+        else:
+            assert isinstance(communication_score, str)
+
+        # Labels:
+        if cells is None:
+            cell_pairs = self.interaction_elements['pairs']
+        else:
+            cell_pairs = generate_pairs(cells, self.cci_type)
+        col_labels = ['{};{}'.format(pair[0], pair[1]) for pair in cell_pairs]
+        if ref_ppi_data is None:
+            ref_index = self.ppi_data.apply(lambda row: (row[0], row[1]), axis=1)
+            keep_index = list(range(self.ppi_data.shape[0]))
+        else:
+            ref_index = list(ref_ppi_data.apply(lambda row: (row[0], row[1]), axis=1).values)
+            keep_index = list(pd.merge(self.ppi_data, ref_ppi_data, how='inner').index)
+
+        # DataFrame to Store values
+        communication_matrix = pd.DataFrame(index=ref_index, columns=col_labels)
+
+        ### Compute pairwise physical interactions
+        if verbose:
+            print("Computing pairwise communication")
+
+        for i, pair in enumerate(cell_pairs):
+            cell1 = self.interaction_elements['cells'][pair[0]]
+            cell2 = self.interaction_elements['cells'][pair[1]]
+
+            comm_score = self.pair_communication_score(cell1,
+                                                       cell2,
+                                                       communication_score=communication_score,
+                                                       use_ppi_score=use_ppi_score,
+                                                       verbose=verbose)
+            communication_matrix[col_labels[i]] = comm_score.flatten()[keep_index]
+
+        self.communication_matrix = communication_matrix
\ No newline at end of file

From 77f5d5e31629086f8ce229b465f9f15ba45a6e3f Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:45:06 -0700
Subject: [PATCH 08/19] Place for future features

---
 cell2cell/utils/parallel_computing.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/cell2cell/utils/parallel_computing.py b/cell2cell/utils/parallel_computing.py
index 702fe0e..ca9497f 100644
--- a/cell2cell/utils/parallel_computing.py
+++ b/cell2cell/utils/parallel_computing.py
@@ -4,6 +4,7 @@
 
 from multiprocessing import cpu_count
 
+
 # GENERAL
 def agents_number(n_jobs):
     '''
@@ -23,21 +24,16 @@ def agents_number(n_jobs):
         agents = n_jobs
     return agents
 
+
 # CORE FUNCTIONS
-def parallel_subsampling_interactions(inputs):
+def parallel_spatial_ccis(inputs):
     '''
-    Parallel computing in cell2cell2.subsampling.SubsamplingSpace
+    Parallel computing in cell2cell2.spatial_interactions.SpatialCCIs
     '''
-    from cell2cell.core import subsampling_operation
-    results = subsampling_operation(cell_ids=inputs['cells'],
-                                    last_item= inputs['list_end'],
-                                    rnaseq_data=inputs['rnaseq'],
-                                    ppi_data=inputs['ppi'],
-                                    gene_cutoffs=inputs['cutoffs'],
-                                    score_type=inputs['score_type'],
-                                    score_metric=inputs['score_metric'],
-                                    cci_matrix_template=inputs['cci_matrix'],
-                                    seed=inputs['seed'],
-                                    verbose=inputs['verbose'])
-
-    return results
\ No newline at end of file
+    # TODO: Implement this for enabling spatial analysis and compute interactions in parallel
+
+    # from cell2cell.core import spatial_operation
+    #results = spatial_operation()
+
+    # return results
+    pass
\ No newline at end of file

From 4adeb05df1706153e2a68ca7eacc75cdb80b0f5c Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:45:25 -0700
Subject: [PATCH 09/19] Changes in way of subsampling

---
 cell2cell/stats/null_distribution.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/cell2cell/stats/null_distribution.py b/cell2cell/stats/null_distribution.py
index 5a9675d..ba7ae70 100644
--- a/cell2cell/stats/null_distribution.py
+++ b/cell2cell/stats/null_distribution.py
@@ -2,6 +2,7 @@
 
 from __future__ import absolute_import
 
+import random
 import numpy as np
 import pandas as pd
 
@@ -151,7 +152,14 @@ def check_presence_in_dataframe(df, elements, columns=None):
 
 
 def subsample_dataframe(df, n_samples, random_state=None):
-    subsampled_df = df.sample(n_samples, random_state=random_state).reset_index(drop=True)
+    items = list(df.index)
+    if n_samples > len(items):
+        n_samples = len(items)
+    if isinstance(random_state, int):
+        random.seed(random_state)
+    random.shuffle(items)
+
+    subsampled_df = df.loc[items[:n_samples],:]
     return subsampled_df
 
 

From 7e1324a15ca088c8ec0c7725d1c75bace9dae882 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:45:46 -0700
Subject: [PATCH 10/19] Adapted to new code structure

---
 cell2cell/utils/plotting.py | 200 +++++++++++++++++++-----------------
 1 file changed, 108 insertions(+), 92 deletions(-)

diff --git a/cell2cell/utils/plotting.py b/cell2cell/utils/plotting.py
index 1d275ab..f37f601 100644
--- a/cell2cell/utils/plotting.py
+++ b/cell2cell/utils/plotting.py
@@ -29,7 +29,7 @@ def get_colors_from_labels(labels, cmap='gist_rainbow', factor=1):
     return colors
 
 
-def clustermap_cci(interaction_space, method='ward', optimal_leaf =True, metadata=None, sample_col='#SampleID',
+def clustermap_cci(interaction_space, method='ward', optimal_leaf=True, metadata=None, sample_col='#SampleID',
                    group_col='Groups', meta_cmap='gist_rainbow', colors=None, excluded_cells=None, title='',
                    bar_title='CCI score', cbar_fontsize='12', filename=None, **kwargs):
     if hasattr(interaction_space, 'distance_matrix'):
@@ -50,50 +50,54 @@ def clustermap_cci(interaction_space, method='ward', optimal_leaf =True, metadat
     else:
         df = distance_matrix
 
-    # Compute linkage
-    D = sp.distance.squareform(df)
-    if 'col_cluster' in kwargs.keys():
-        if kwargs['col_cluster']:
-            linkage = hc.linkage(D, method=method, optimal_ordering=optimal_leaf)
+    # Check symmetry to run linkage separately
+    symmetric = (df.values.transpose() == df.values).all()
+    if symmetric:
+        # TODO: implement heatmap when distance matrix is not symmetric
+        # Compute linkage
+        D = sp.distance.squareform(df)
+        if 'col_cluster' in kwargs.keys():
+            if kwargs['col_cluster']:
+                linkage = hc.linkage(D, method=method, optimal_ordering=optimal_leaf)
+            else:
+                linkage = None
         else:
-            linkage = None
-    else:
-        linkage = hc.linkage(D, method=method, optimal_ordering=optimal_leaf)
-
-    # Plot hierarchical clustering
-    hier = sns.clustermap(df,
-                          col_linkage=linkage,
-                          row_linkage=linkage,
-                          **kwargs
-                          )
-    plt.close()
-
-    # Triangular matrix
-    mask = np.zeros((df.shape[0], df.shape[1]))
-    order_map = dict()
-    if linkage is None:
-        ind_order = range(hier.data2d.shape[0])
-    else:
-        ind_order = hier.dendrogram_col.reordered_ind
-
-    for i, ind in enumerate(ind_order):
-        order_map[i] = ind
-        filter_list = [order_map[j] for j in range(i)]
-        mask[ind, filter_list] = 1 #hier.dendrogram_col.reordered_ind[:i]
-
-    # This may not work if we have a perfect interaction between two cells - Intended to avoid diagonal in distance matrix
-    # exclude_diag = distance_matrix.values[distance_matrix.values != 0]
+            linkage = hc.linkage(D, method=method, optimal_ordering=optimal_leaf)
 
-    kwargs_ = kwargs.copy()
+        # Plot hierarchical clustering
+        hier = sns.clustermap(df,
+                              col_linkage=linkage,
+                              row_linkage=linkage,
+                              **kwargs
+                              )
+        plt.close()
 
-    # This one does exclude only diagonal
-    #exclude_diag = df.values[~np.eye(df.values.shape[0], dtype=bool)].reshape(df.values.shape[0], -1)
-    #if 'vmin' not in list(kwargs_.keys()):
-    #    vmin = np.nanmin(exclude_diag)
-    #    kwargs_['vmin'] = vmin
-    #if 'vmax' not in list(kwargs_.keys()):
-    #    vmax = np.nanmax(exclude_diag)
-    #    kwargs_['vmax'] = vmax
+        # Triangular matrix
+        mask = np.zeros((df.shape[0], df.shape[1]))
+        order_map = dict()
+        if linkage is None:
+            ind_order = range(hier.data2d.shape[0])
+        else:
+            ind_order = hier.dendrogram_col.reordered_ind
+
+        for i, ind in enumerate(ind_order):
+            order_map[i] = ind
+            filter_list = [order_map[j] for j in range(i)]
+            mask[ind, filter_list] = 1 #hier.dendrogram_col.reordered_ind[:i]
+
+        kwargs_ = kwargs.copy()
+        # This one does exclude only diagonal
+        #exclude_diag = df.values[~np.eye(df.values.shape[0], dtype=bool)].reshape(df.values.shape[0], -1)
+        #if 'vmin' not in list(kwargs_.keys()):
+        #    vmin = np.nanmin(exclude_diag)
+        #    kwargs_['vmin'] = vmin
+        #if 'vmax' not in list(kwargs_.keys()):
+        #    vmax = np.nanmax(exclude_diag)
+        #    kwargs_['vmax'] = vmax
+    else:
+        linkage = None
+        mask = None
+        kwargs_ = kwargs.copy()
 
     # PLOT CCI MATRIX
     if space_type == 'class':
@@ -121,12 +125,18 @@ def clustermap_cci(interaction_space, method='ward', optimal_leaf =True, metadat
         col_colors.index = meta_.index
         col_colors.name = group_col.capitalize()
 
+        if not symmetric:
+            row_colors = col_colors
+        else:
+            row_colors = None
+
         # Plot hierarchical clustering (triangular)
         hier = sns.clustermap(df,
                               col_linkage=linkage,
                               row_linkage=linkage,
                               mask=mask,
                               col_colors=col_colors,
+                              row_colors=row_colors,
                               **kwargs_
                               )
     else:
@@ -137,50 +147,57 @@ def clustermap_cci(interaction_space, method='ward', optimal_leaf =True, metadat
                               **kwargs_
                               )
 
-    hier.ax_row_dendrogram.set_visible(False)
-    hier.ax_heatmap.tick_params(bottom=False)  # Hide xtick line
-
     # Apply offset transform to all xticklabels.
-    labels_x = hier.ax_heatmap.xaxis.get_majorticklabels()
-    labels_y = hier.ax_heatmap.yaxis.get_majorticklabels()
-
-    label_x1 = labels_x[0]
-    label_y1 = labels_y[0]
-
-    pos_x0 = label_x1.get_transform().transform((0., 0.))
-    xlims = hier.ax_heatmap.get_xlim()
-    xrange = abs(xlims[0] - xlims[1])
-    x_cell_width = xrange / len(df.columns)
-
-    pos_y1 = label_y1.get_transform().transform(label_y1.get_position())
-    ylims = hier.ax_heatmap.get_ylim()
-    yrange = abs(ylims[0] - ylims[1])
-    y_cell_width = yrange / len(df.index)
-
-    dpi_x = hier.fig.dpi_scale_trans.to_values()[0]
-    dpi_y = hier.fig.dpi_scale_trans.to_values()[3]
-
-    for i, label in enumerate(labels_x):
-        if label.get_position()[0] % x_cell_width == 0:
-            pos_x1 = label.get_transform().transform((aux_scaler_x, aux_scaler_x))
-            scaler_x = abs(pos_x1 - pos_x0)
-        else:
-            pos_x1 = label.get_transform().transform((x_cell_width, x_cell_width))
-            scaler_x = abs(pos_x1 - pos_x0)
-
-        aux_scaler_x = label.get_position()[0] - (label.get_position()[0] // x_cell_width) * x_cell_width
-        aux_scaler_y = label.get_position()[1] - (label.get_position()[1] // y_cell_width) * y_cell_width
-
-        new_pos_x = label.get_position()[0] - aux_scaler_x
-        new_pos_y = new_pos_x * yrange / xrange - yrange + y_cell_width
-
-        new_pos_y = label_y1.get_transform().transform((0, new_pos_y + aux_scaler_y)) - pos_y1
-        dx = -0.5 * scaler_x[0] / dpi_x
-        dy = new_pos_y[1] / dpi_y
-        offset = mlp.transforms.ScaledTranslation(dx, dy, hier.fig.dpi_scale_trans)
-        label.set_transform(label.get_transform() + offset)
-
-    hier.ax_heatmap.set_xticklabels(hier.ax_heatmap.xaxis.get_majorticklabels(), rotation=45, ha='right')#, fontsize=9.5)
+    if symmetric:
+        hier.ax_row_dendrogram.set_visible(False)
+        hier.ax_heatmap.tick_params(bottom=False)  # Hide xtick line
+
+        labels_x = hier.ax_heatmap.xaxis.get_majorticklabels()
+        labels_y = hier.ax_heatmap.yaxis.get_majorticklabels()
+
+        label_x1 = labels_x[0]
+        label_y1 = labels_y[0]
+
+        pos_x0 = label_x1.get_transform().transform((0., 0.))
+        xlims = hier.ax_heatmap.get_xlim()
+        xrange = abs(xlims[0] - xlims[1])
+        x_cell_width = xrange / len(df.columns)
+
+        pos_y1 = label_y1.get_transform().transform(label_y1.get_position())
+        ylims = hier.ax_heatmap.get_ylim()
+        yrange = abs(ylims[0] - ylims[1])
+        y_cell_width = yrange / len(df.index)
+
+        dpi_x = hier.fig.dpi_scale_trans.to_values()[0]
+        dpi_y = hier.fig.dpi_scale_trans.to_values()[3]
+
+        for i, label in enumerate(labels_x):
+            if label.get_position()[0] % x_cell_width == 0:
+                pos_x1 = label.get_transform().transform((aux_scaler_x, aux_scaler_x))
+                scaler_x = abs(pos_x1 - pos_x0)
+            else:
+                pos_x1 = label.get_transform().transform((x_cell_width, x_cell_width))
+                scaler_x = abs(pos_x1 - pos_x0)
+
+            aux_scaler_x = label.get_position()[0] - (label.get_position()[0] // x_cell_width) * x_cell_width
+            aux_scaler_y = label.get_position()[1] - (label.get_position()[1] // y_cell_width) * y_cell_width
+
+            new_pos_x = label.get_position()[0] - aux_scaler_x
+            new_pos_y = new_pos_x * yrange / xrange - yrange + y_cell_width
+
+            new_pos_y = label_y1.get_transform().transform((0, new_pos_y + aux_scaler_y)) - pos_y1
+            dx = -0.5 * scaler_x[0] / dpi_x
+            dy = new_pos_y[1] / dpi_y
+            offset = mlp.transforms.ScaledTranslation(dx, dy, hier.fig.dpi_scale_trans)
+            label.set_transform(label.get_transform() + offset)
+
+    if symmetric:
+        rot = 45
+        ha = 'right'
+    else:
+        rot = 90
+        ha = 'center'
+    hier.ax_heatmap.set_xticklabels(hier.ax_heatmap.xaxis.get_majorticklabels(), rotation=rot, ha=ha)#, fontsize=9.5)
     hier.ax_heatmap.set_yticklabels(hier.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, ha='left')  # , fontsize=9.5)
 
     # Title
@@ -263,7 +280,7 @@ def pcoa_biplot(interaction_space, metadata, sample_col='#SampleID', group_col='
     ax.set_zticklabels([])
 
     ax.view_init(view_angles[0], view_angles[1])
-    ax.legend(loc='center left', bbox_to_anchor=(1.15, 0.5),
+    ax.legend(loc='center left', bbox_to_anchor=(1.05, 0.5),
               ncol=2, fancybox=True, shadow=True, fontsize=legend_size)
     plt.title(title, fontsize=16)
 
@@ -278,12 +295,11 @@ def pcoa_biplot(interaction_space, metadata, sample_col='#SampleID', group_col='
     return results
 
 
-
-def clustermap_cell_pairs_vs_ppi(ppi_score_for_cell_pairs, metadata=None, sample_col='#SampleID', group_col='Groups',
-                                 meta_cmap='gist_rainbow', colors=None, cell_labels=('LEFT-CELL','RIGHT-CELL'),
-                                 metric='jaccard', method='ward', optimal_leaf=True, excluded_cells=None, title='',
-                                 cbar_fontsize=12, filename=None, **kwargs):
-    df_ = ppi_score_for_cell_pairs.copy()
+def clustermap_ccc(interaction_space, metadata=None, sample_col='#SampleID', group_col='Groups',
+                   meta_cmap='gist_rainbow', colors=None, cell_labels=('SENDER-CELL','RECEIVER-CELL'),
+                   metric='jaccard', method='ward', optimal_leaf=True, excluded_cells=None, title='',
+                   cbar_fontsize=12, filename=None, **kwargs):
+    df_ = interaction_space.communication_matrix.copy()
 
     if excluded_cells is not None:
         included_cells = []

From d15e8b7087426039247ba813f585f071de8d7860 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:46:12 -0700
Subject: [PATCH 11/19] Adapted to new code structure

---
 cell2cell/utils/plotting.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cell2cell/utils/plotting.py b/cell2cell/utils/plotting.py
index f37f601..604064b 100644
--- a/cell2cell/utils/plotting.py
+++ b/cell2cell/utils/plotting.py
@@ -53,7 +53,6 @@ def clustermap_cci(interaction_space, method='ward', optimal_leaf=True, metadata
     # Check symmetry to run linkage separately
     symmetric = (df.values.transpose() == df.values).all()
     if symmetric:
-        # TODO: implement heatmap when distance matrix is not symmetric
         # Compute linkage
         D = sp.distance.squareform(df)
         if 'col_cluster' in kwargs.keys():

From ec4c7e6581a2dceb08e4d2d00875e7e63725dad2 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:46:39 -0700
Subject: [PATCH 12/19] Minor changes

---
 cell2cell/preprocessing/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cell2cell/preprocessing/__init__.py b/cell2cell/preprocessing/__init__.py
index 3d2445e..a04c12f 100644
--- a/cell2cell/preprocessing/__init__.py
+++ b/cell2cell/preprocessing/__init__.py
@@ -6,7 +6,7 @@
                                              get_local_percentile_cutoffs)
 from cell2cell.preprocessing.gene_ontology import (find_all_children_of_go_term, find_go_terms_from_keyword,
                                                    get_genes_from_go_hierarchy, get_genes_from_go_terms)
-from cell2cell.preprocessing.integrate_data import (get_binary_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms,
+from cell2cell.preprocessing.integrate_data import (get_thresholded_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms,
                                                     get_ppi_dict_from_proteins, get_weighted_ppi)
 from cell2cell.preprocessing.ppi import (bidirectional_ppi_for_cci, filter_ppi_by_proteins, filter_ppi_network,
                                          get_all_to_all_ppi, get_filtered_ppi_network, get_one_group_to_other_ppi,

From 41ee81c7bc8e9a5a340616d9fbcd678acccf4242 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Fri, 17 Apr 2020 18:47:03 -0700
Subject: [PATCH 13/19] New code structure

---
 cell2cell/analysis/ppi_enrichment.py | 105 ---------------------------
 cell2cell/core/__init__.py           |   1 +
 cell2cell/utils/__init__.py          |   4 +-
 cell2cell/utils/networks.py          |   2 +
 4 files changed, 5 insertions(+), 107 deletions(-)
 delete mode 100644 cell2cell/analysis/ppi_enrichment.py

diff --git a/cell2cell/analysis/ppi_enrichment.py b/cell2cell/analysis/ppi_enrichment.py
deleted file mode 100644
index e2a1f77..0000000
--- a/cell2cell/analysis/ppi_enrichment.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import absolute_import
-
-import itertools
-import functools
-import numpy as np
-import pandas as pd
-
-
-def ppi_count_operation(pair, subsampled_interactions, use_ppi_score=False, index=None):
-    # TODO make sure that index from merging ppi_data & ref_ppi_data matches after using .values[index]
-    cell1 = pair[0]
-    cell2 = pair[1]
-
-    if use_ppi_score:
-        assert np.array_equal(subsampled_interactions[0]['ppis'][cell1][['score']].values,
-                              subsampled_interactions[0]['ppis'][cell2][['score']].values)
-
-    if index is None:
-        if use_ppi_score:
-            matrices = [np.multiply(element['ppis'][cell1][['A']].values, element['ppis'][cell2][['B']].values) \
-                        * element['ppis'][cell1][['score']].values for element in subsampled_interactions]
-        else:
-            matrices = [np.multiply(element['ppis'][cell1][['A']].values, element['ppis'][cell2][['B']].values)
-                        for element in subsampled_interactions]
-
-    else:
-        if use_ppi_score:
-            matrices = [
-                np.multiply(element['ppis'][cell1][['A']].values[index], element['ppis'][cell2][['B']].values[index]) \
-                * element['ppis'][cell1][['score']].values[index] for element in subsampled_interactions]
-        else:
-            matrices = [np.multiply(element['ppis'][cell1][['A']].values[index], element['ppis'][cell2][['B']].values[index])
-                        for element in subsampled_interactions]
-    matrices = np.concatenate(matrices, axis=1)
-    return matrices
-
-
-def compute_avg_count_for_ppis(subsampled_interactions, clusters, ppi_data):
-    index = ppi_data.apply(lambda row: tuple(sorted([row[0], row[1]])), axis=1)
-    mean_matrix = pd.DataFrame(columns=list(clusters.keys()),
-                               index=index)
-
-    std_matrix = pd.DataFrame(columns=list(clusters.keys()),
-                               index=index)
-
-    for cluster, cells in clusters.items():
-        cell_pairs = list(itertools.combinations(cells + cells, 2))
-        cell_pairs = list(set(cell_pairs))  # Remove duplicates
-
-        matrices = list(map(functools.partial(ppi_count_operation, subsampled_interactions=subsampled_interactions),
-                            cell_pairs))
-
-        matrices = np.concatenate(matrices, axis=1)
-        mean_column = np.nanmean(matrices, axis=1)
-        std_column = np.nanstd(matrices, axis=1)
-
-        mean_matrix[cluster] = mean_column
-        std_matrix[cluster] = std_column
-
-    mean_matrix = mean_matrix.dropna(how='all')
-    mean_matrix = mean_matrix.fillna(0.0)
-    mean_matrix = mean_matrix.groupby(level=0).sum() / 2.0
-
-    std_matrix = std_matrix.dropna(how='all')
-    std_matrix = std_matrix.fillna(0.0)
-    std_matrix = std_matrix.groupby(level=0).sum() / 2.0
-    return mean_matrix, std_matrix
-
-
-def get_ppi_score_for_cell_pairs(cells, subsampled_interactions, ppi_data, use_ppi_score=False, ref_ppi_data=None):
-    cell_pairs = list(itertools.combinations(cells + cells, 2))
-    cell_pairs = list(set(cell_pairs))
-    col_labels = ['{};{}'.format(pair[0], pair[1]) for pair in cell_pairs]
-    if ref_ppi_data is None:
-        index = ppi_data.apply(lambda row: (row[0], row[1]), axis=1)
-        keep_index=None
-        mean_matrix = pd.DataFrame(index=index, columns=col_labels)
-        std_matrix = pd.DataFrame(index=index, columns=col_labels)
-    else:
-        ref_index = list(ref_ppi_data.apply(lambda row: (row[0], row[1]), axis=1).values)
-        keep_index = list(pd.merge(ppi_data, ref_ppi_data, how='inner').index)
-        mean_matrix = pd.DataFrame(index=ref_index, columns=col_labels)
-        std_matrix = pd.DataFrame(index=ref_index, columns=col_labels)
-
-
-    for i, pair in enumerate(cell_pairs):
-        matrices = ppi_count_operation(pair=pair,
-                                       subsampled_interactions=subsampled_interactions,
-                                       use_ppi_score=use_ppi_score,
-                                       index=keep_index)
-
-        mean_column = np.nanmean(matrices, axis=1)
-        std_column = np.nanstd(matrices, axis=1)
-
-        mean_matrix[col_labels[i]] = mean_column
-        std_matrix[col_labels[i]] = std_column
-
-    mean_matrix = mean_matrix.dropna(how='all')
-    mean_matrix = mean_matrix.fillna(0.0)
-
-    std_matrix = std_matrix.loc[mean_matrix.index, mean_matrix.columns]
-    std_matrix = std_matrix.fillna(0.0)
-    return mean_matrix, std_matrix
\ No newline at end of file
diff --git a/cell2cell/core/__init__.py b/cell2cell/core/__init__.py
index 190f963..fdceb08 100644
--- a/cell2cell/core/__init__.py
+++ b/cell2cell/core/__init__.py
@@ -5,5 +5,6 @@
 from cell2cell.core.cci_scores import (compute_braycurtis_like_cci_score, compute_count_score, compute_jaccard_like_cci_score,
                                        matmul_bray_curtis_like, matmul_count_active, matmul_jaccard_like)
 from cell2cell.core.cell import (Cell, get_cells_from_rnaseq)
+from cell2cell.core.communication_scores import (get_binary_scores, get_continuous_scores)
 from cell2cell.core.interaction_space import (generate_interaction_elements, InteractionSpace)
 from cell2cell.core.spatial_interactions import (SpatialCCI)
\ No newline at end of file
diff --git a/cell2cell/utils/__init__.py b/cell2cell/utils/__init__.py
index 4ba5ad4..a80c98d 100644
--- a/cell2cell/utils/__init__.py
+++ b/cell2cell/utils/__init__.py
@@ -3,5 +3,5 @@
 from __future__ import absolute_import
 
 from cell2cell.utils.networks import (generate_network_from_adjacency, export_network_to_gephi)
-from cell2cell.utils.parallel_computing import (agents_number, parallel_subsampling_interactions)
-from cell2cell.utils.plotting import (clustermap_cci, clustermap_cell_pairs_vs_ppi, get_colors_from_labels, pcoa_biplot)
+from cell2cell.utils.parallel_computing import (agents_number)
+from cell2cell.utils.plotting import (clustermap_cci, clustermap_ccc, get_colors_from_labels, pcoa_biplot)
diff --git a/cell2cell/utils/networks.py b/cell2cell/utils/networks.py
index 0bdcfaa..068a0eb 100644
--- a/cell2cell/utils/networks.py
+++ b/cell2cell/utils/networks.py
@@ -4,6 +4,8 @@
 
 import networkx as nx
 
+# TODO: Add an export function for Cytoscape
+
 def generate_network_from_adjacency(adjacency_matrix, package='networkx'):
     if package == 'networkx':
         network = nx.from_pandas_adjacency(adjacency_matrix)

From 059fc352775ad8391c99c52dc7383a1fddf2208f Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Sun, 26 Apr 2020 00:57:29 -0700
Subject: [PATCH 14/19] Added functions to manipulate dataframes

---
 cell2cell/preprocessing/__init__.py           |  2 +
 .../preprocessing/manipulate_dataframes.py    | 51 +++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 cell2cell/preprocessing/manipulate_dataframes.py

diff --git a/cell2cell/preprocessing/__init__.py b/cell2cell/preprocessing/__init__.py
index a04c12f..41336c1 100644
--- a/cell2cell/preprocessing/__init__.py
+++ b/cell2cell/preprocessing/__init__.py
@@ -8,6 +8,8 @@
                                                    get_genes_from_go_hierarchy, get_genes_from_go_terms)
 from cell2cell.preprocessing.integrate_data import (get_thresholded_rnaseq, get_modified_rnaseq, get_ppi_dict_from_go_terms,
                                                     get_ppi_dict_from_proteins, get_weighted_ppi)
+from cell2cell.preprocessing.manipulate_dataframes import (check_presence_in_dataframe, shuffle_cols_in_df, shuffle_dataframe,
+                                                           subsample_dataframe)
 from cell2cell.preprocessing.ppi import (bidirectional_ppi_for_cci, filter_ppi_by_proteins, filter_ppi_network,
                                          get_all_to_all_ppi, get_filtered_ppi_network, get_one_group_to_other_ppi,
                                          remove_ppi_bidirectionality, simplify_ppi)
diff --git a/cell2cell/preprocessing/manipulate_dataframes.py b/cell2cell/preprocessing/manipulate_dataframes.py
new file mode 100644
index 0000000..c41f0af
--- /dev/null
+++ b/cell2cell/preprocessing/manipulate_dataframes.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import random
+import numpy as np
+import pandas as pd
+
+
+def check_presence_in_dataframe(df, elements, columns=None):
+    if columns is None:
+        columns = list(df.columns)
+    df_elements = pd.Series(np.unique(df[columns].values.flatten()))
+    df_elements = df_elements.loc[df_elements.isin(elements)].values
+    return list(df_elements)
+
+
+def shuffle_cols_in_df(df, columns, shuffling_number=1, random_state=None):
+    df_ = df.copy()
+    if isinstance(columns, str):
+        columns = [columns]
+
+    for col in columns:
+        for i in range(shuffling_number):
+            if random_state is not None:
+                np.random.seed(random_state + i)
+            df_[col] = np.random.permutation(df_[col].values)
+    return df_
+
+
+def shuffle_dataframe(df, shuffling_number=1, axis=0, random_state=None):
+    df_ = df.copy()
+    axis = int(not axis)  # pandas.DataFrame is always 2D
+    for _ in range(shuffling_number):
+        for i, view in enumerate(np.rollaxis(df_.values, axis)):
+            if random_state is not None:
+                np.random.seed(random_state + i)
+            np.random.shuffle(view)
+    return df_
+
+
+def subsample_dataframe(df, n_samples, random_state=None):
+    items = list(df.index)
+    if n_samples > len(items):
+        n_samples = len(items)
+    if isinstance(random_state, int):
+        random.seed(random_state)
+    random.shuffle(items)
+
+    subsampled_df = df.loc[items[:n_samples],:]
+    return subsampled_df

From 55c4d1cef647ec4aa81d123f922c3b3f4fbbd857 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Sun, 26 Apr 2020 00:57:58 -0700
Subject: [PATCH 15/19] Re-organized functions for permuting PPI lists

---
 cell2cell/stats/__init__.py          |   4 +-
 cell2cell/stats/null_distribution.py | 189 ---------------------------
 cell2cell/stats/permutation.py       | 108 +++++++++++++++
 3 files changed, 109 insertions(+), 192 deletions(-)
 delete mode 100644 cell2cell/stats/null_distribution.py
 create mode 100644 cell2cell/stats/permutation.py

diff --git a/cell2cell/stats/__init__.py b/cell2cell/stats/__init__.py
index 4183d6f..d840bca 100644
--- a/cell2cell/stats/__init__.py
+++ b/cell2cell/stats/__init__.py
@@ -3,6 +3,4 @@
 from __future__ import absolute_import
 
 from cell2cell.stats.enrichment import (hypergeom_representation)
-from cell2cell.stats.null_distribution import (filter_null_ppi_network, filter_random_subsample_ppi_network,
-                                               check_presence_in_dataframe, get_null_ppi_network, random_switching_ppi_labels,
-                                               shuffle_dataframe, subsample_dataframe)
+from cell2cell.stats.permutation import (pvalue_from_dist, random_switching_ppi_labels)
diff --git a/cell2cell/stats/null_distribution.py b/cell2cell/stats/null_distribution.py
deleted file mode 100644
index ba7ae70..0000000
--- a/cell2cell/stats/null_distribution.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import absolute_import
-
-import random
-import numpy as np
-import pandas as pd
-
-from sklearn.utils import resample, shuffle
-
-from cell2cell.preprocessing import ppi
-
-
-### FUNCTIONS FOR GENERATING DIFFERENT KIND OF NULL DISTIBUTIONS ###
-def filter_null_ppi_network(ppi_data, contact_proteins, mediator_proteins=None, interaction_type='contacts',
-                            interaction_columns=['A', 'B'], randomized_col=0, random_state=None, verbose=True):
-    '''
-    This function
-    '''
-
-    new_ppi_data = get_null_ppi_network(ppi_data=ppi_data,
-                                        contact_proteins=contact_proteins,
-                                        mediator_proteins=mediator_proteins,
-                                        interaction_type=interaction_type,
-                                        interaction_columns=interaction_columns,
-                                        randomized_col=randomized_col,
-                                        random_state=random_state,
-                                        verbose=verbose)
-
-    new_ppi_data = ppi.bidirectional_ppi_for_cci(ppi_data=new_ppi_data,
-                                                 interaction_columns=interaction_columns,
-                                                 verbose=verbose)
-    return new_ppi_data
-
-
-def filter_random_subsample_ppi_network(original_ppi_data, contact_proteins, mediator_proteins=None,
-                                        interaction_type='contacts', random_ppi_data=None, interaction_columns=['A', 'B'],
-                                        random_state=None, verbose=True):
-    '''
-    This function randomly subsample a ppi network, if a random_ppi_data is provided, it is subsampled instead.
-    '''
-
-    new_ppi_data = ppi.get_filtered_ppi_network(ppi_data=original_ppi_data,
-                                                contact_proteins=contact_proteins,
-                                                mediator_proteins=mediator_proteins,
-                                                interaction_type=interaction_type,
-                                                interaction_columns=interaction_columns,
-                                                verbose=verbose)
-
-    if random_ppi_data is not None:
-        new_random = ppi.get_filtered_ppi_network(ppi_data=random_ppi_data,
-                                                 contact_proteins=contact_proteins,
-                                                 mediator_proteins=mediator_proteins,
-                                                 interaction_type=interaction_type,
-                                                 interaction_columns=interaction_columns,
-                                                 verbose=verbose)
-
-        new_ppi_data = subsample_dataframe(df=new_random,
-                                           n_samples=new_ppi_data.shape[0],
-                                           random_state=random_state)
-    else:
-        new_ppi_data = shuffle_dataframe(df=new_ppi_data,
-                                         shuffling_number=1,
-                                         axis=0, random_state=random_state)
-        #new_ppi_data = subsample_dataframe(df=new_ppi_data,
-        #                                   n_samples=new_ppi_data.shape[0],
-        #                                   random_state=random_state)
-
-    new_ppi_data = ppi.bidirectional_ppi_for_cci(ppi_data=new_ppi_data,
-                                                 interaction_columns=interaction_columns,
-                                                 verbose=verbose)
-    return new_ppi_data
-
-
-def get_null_ppi_network(ppi_data, contact_proteins, mediator_proteins=None, interaction_type='contacts',
-                         interaction_columns=['A', 'B'], randomized_col=0, random_state=None, verbose=True):
-    if (mediator_proteins is None) and (interaction_type != 'contacts'):
-        raise ValueError("mediator_proteins cannot be None when interaction_type is not contacts")
-
-    if verbose:
-        print('Filtering PPI interactions by using a list of genes for {} interactions'.format(interaction_type))
-    if interaction_type == 'contacts':
-        new_ppi_data = ppi.get_all_to_all_ppi(ppi_data=ppi_data,
-                                              proteins=contact_proteins,
-                                              interaction_columns=interaction_columns)
-
-        in_contacts = check_presence_in_dataframe(df=new_ppi_data,
-                                                  elements=contact_proteins,
-                                                  columns=interaction_columns)
-
-        new_ppi_data[interaction_columns[randomized_col]] = resample(in_contacts,
-                                                                     n_samples=new_ppi_data.shape[0],
-                                                                     random_state=random_state)
-    elif interaction_type == 'complete':
-        total_proteins = list(set(contact_proteins + mediator_proteins))
-
-        new_ppi_data = ppi.get_all_to_all_ppi(ppi_data=ppi_data,
-                                              proteins=total_proteins,
-                                              interaction_columns=interaction_columns)
-
-        in_total = check_presence_in_dataframe(df=new_ppi_data,
-                                               elements=total_proteins,
-                                               columns=interaction_columns)
-
-        new_ppi_data[interaction_columns[randomized_col]] = resample(in_total,
-                                                                     n_samples=new_ppi_data.shape[0],
-                                                                     random_state=random_state)
-
-    else:
-        # All the following interactions incorporate contacts-mediator interactions
-        mediated = ppi.get_one_group_to_other_ppi(ppi_data=ppi_data,
-                                                  proteins_a=contact_proteins,
-                                                  proteins_b=mediator_proteins,
-                                                  interaction_columns=interaction_columns)
-
-        in_mediated = check_presence_in_dataframe(df=mediated,
-                                                  elements=mediator_proteins,
-                                                  columns=interaction_columns)
-
-        mediated[interaction_columns[randomized_col]] = resample(in_mediated,
-                                                                 n_samples=mediated.shape[0],
-                                                                 random_state=random_state)
-
-        if interaction_type == 'mediated':
-            new_ppi_data = mediated
-        elif interaction_type == 'combined':
-            contacts = ppi.get_all_to_all_ppi(ppi_data=ppi_data,
-                                              proteins=contact_proteins,
-                                              interaction_columns=interaction_columns)
-
-            in_contacts = check_presence_in_dataframe(df=contacts,
-                                                      elements=contact_proteins,
-                                                      columns=interaction_columns)
-
-            contacts[interaction_columns[randomized_col]] = resample(in_contacts,
-                                                                     n_samples=contacts.shape[0],
-                                                                     random_state=random_state)
-
-            new_ppi_data = pd.concat([contacts, mediated], ignore_index=True).drop_duplicates()
-        else:
-            raise NameError('Not valid interaction type to filter the PPI network')
-    new_ppi_data.reset_index(inplace=True, drop=True)
-    return new_ppi_data
-
-
-def check_presence_in_dataframe(df, elements, columns=None):
-    if columns is None:
-        columns = list(df.columns)
-    df_elements = pd.Series(np.unique(df[columns].values.flatten()))
-    df_elements = df_elements.loc[df_elements.isin(elements)].values
-    return list(df_elements)
-
-
-def subsample_dataframe(df, n_samples, random_state=None):
-    items = list(df.index)
-    if n_samples > len(items):
-        n_samples = len(items)
-    if isinstance(random_state, int):
-        random.seed(random_state)
-    random.shuffle(items)
-
-    subsampled_df = df.loc[items[:n_samples],:]
-    return subsampled_df
-
-
-def random_switching_ppi_labels(interaction_type, ppi_dict, genes=None, random_state=None, interaction_columns=['A', 'B']):
-    ppi_dict_ = dict()
-    ppi_dict_[interaction_type] = ppi_dict[interaction_type].copy()
-    if genes is None:
-        genes = list(np.unique(ppi_dict_[interaction_type][interaction_columns].values.flatten()))
-    else:
-        genes = list(set(genes))
-    mapper = dict(zip(genes, shuffle(genes, random_state=random_state)))
-    A = interaction_columns[0]
-    B = interaction_columns[1]
-    ppi_dict_[interaction_type][A] = ppi_dict_[interaction_type][A].apply(lambda x: mapper[x])
-    ppi_dict_[interaction_type][B] = ppi_dict_[interaction_type][B].apply(lambda x: mapper[x])
-    return mapper, ppi_dict_
-
-
-def shuffle_dataframe(df, shuffling_number=1, axis=0, random_state=None):
-    df_ = df.copy()
-    axis = int(not axis)  # pandas.DataFrame is always 2D
-    for _ in range(shuffling_number):
-        for i, view in enumerate(np.rollaxis(df_.values, axis)):
-            if random_state is not None:
-                np.random.seed(random_state + i)
-            np.random.shuffle(view)
-    return df_
\ No newline at end of file
diff --git a/cell2cell/stats/permutation.py b/cell2cell/stats/permutation.py
new file mode 100644
index 0000000..e44aab6
--- /dev/null
+++ b/cell2cell/stats/permutation.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import scipy
+import numpy as np
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+from sklearn.utils import shuffle
+
+
+def pvalue_from_dist(obs_value, dist, label='', comparison='upper'):
+    '''
+    This function computes a p-value for an observed value given a simulated or empirical distribution.
+    It plots the distribution and prints the p-value.
+
+    Parameters
+    ----------
+    obs_value : float
+        Observed value used to get a p-value from a distribution.
+
+    dist : array-like
+        Simulated/Empirical distribution used to compare the observed value and get a p-value
+
+    label : str, '' by default
+        Label used for the histogram plot.
+
+    comparison : str, 'upper' by default
+        Type of comparison used to get the p-value. It could be 'lower', 'upper' or 'different'.
+        Lower and upper correspond to one-tailed analysis, while different is for a two-tailed analysis.
+
+    Returns
+    -------
+    fig : matplotlib.pyplot.figure
+        Figure that shows the histogram for dist.
+
+    pval : float
+        P-value obtained from both observed value and pre-computed distribution.
+    '''
+    if comparison == 'lower':
+        pval = scipy.stats.percentileofscore(dist, obs_value) / 100.0
+    elif comparison == 'upper':
+        pval = 1.0 - scipy.stats.percentileofscore(dist, obs_value) / 100.0
+    elif comparison == 'different':
+        percentile = scipy.stats.percentileofscore(dist, obs_value) / 100.0
+        if percentile <= 0.5:
+            pval = 2.0 * percentile
+        else:
+            pval = 2.0 * (1.0 - percentile)
+    else:
+        raise NotImplementedError('Comparison {} is not implemented'.format(comparison))
+
+    print('P-value is: {}'.format(pval))
+
+    with sns.axes_style("darkgrid"):
+        if label == '':
+            if pval > 1. - 1. / len(dist):
+                label_ = 'p-val: >{:g}'.format(float('{:.1g}'.format(1. - 1. / len(dist))))
+            elif pval < 1. / len(dist):
+                label_ = 'p-val: <{:g}'.format(float('{:.1g}'.format(1. / len(dist))))
+            else:
+                label_ = 'p-val: {0:.2E}'.format(pval)
+        else:
+            if pval > 1. - 1. / len(dist):
+                label_ = label + ' - p-val: >{:g}'.format(float('{:.1g}'.format(1. - 1. / len(dist))))
+            elif pval < 1. / len(dist):
+                label_ = label + ' - p-val: <{:g}'.format(float('{:.1g}'.format(1. / len(dist))))
+            else:
+                label_ = label + ' - p-val: {0:.2E}'.format(pval)
+        fig = sns.distplot(dist, hist=True, kde=True, norm_hist=False, rug=False, label=label_)
+        fig.axvline(x=obs_value, color=fig.get_lines()[-1].get_c(), ls='--')
+
+        fig.tick_params(axis='both', which='major', labelsize=16)
+    lgd = plt.legend(loc='center left', bbox_to_anchor=(1.01, 0.5),
+                     ncol=1, fancybox=True, shadow=True, fontsize=14)
+    return fig, pval
+
+
+def random_switching_ppi_labels(ppi_data, genes=None, random_state=None, interaction_columns=['A', 'B'], column='both'):
+    ppi_data_ = ppi_data.copy()
+    A = interaction_columns[0]
+    B = interaction_columns[1]
+    if column == 'both':
+        if genes is None:
+            genes = list(np.unique(ppi_data_[interaction_columns].values.flatten()))
+        else:
+            genes = list(set(genes))
+        mapper = dict(zip(genes, shuffle(genes, random_state=random_state)))
+        ppi_data_[A] = ppi_data_[A].apply(lambda x: mapper[x])
+        ppi_data_[B] = ppi_data_[B].apply(lambda x: mapper[x])
+    elif column == 'first':
+        if genes is None:
+            genes = list(np.unique(ppi_data_[A].values.flatten()))
+        else:
+            genes = list(set(genes))
+        mapper = dict(zip(genes, shuffle(genes, random_state=random_state)))
+        ppi_data_[A] = ppi_data_[A].apply(lambda x: mapper[x])
+    elif column == 'second':
+        if genes is None:
+            genes = list(np.unique(ppi_data_[B].values.flatten()))
+        else:
+            genes = list(set(genes))
+        mapper = dict(zip(genes, shuffle(genes, random_state=random_state)))
+        ppi_data_[B] = ppi_data_[B].apply(lambda x: mapper[x])
+    else: raise ValueError('Not valid option')
+    return ppi_data_
\ No newline at end of file

From 0a5a5f06912d10e73d7957e35a5f0788cf091a68 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Sun, 26 Apr 2020 00:58:22 -0700
Subject: [PATCH 16/19] Minor changes

---
 cell2cell/utils/plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cell2cell/utils/plotting.py b/cell2cell/utils/plotting.py
index 604064b..8e4bef6 100644
--- a/cell2cell/utils/plotting.py
+++ b/cell2cell/utils/plotting.py
@@ -298,7 +298,7 @@ def clustermap_ccc(interaction_space, metadata=None, sample_col='#SampleID', gro
                    meta_cmap='gist_rainbow', colors=None, cell_labels=('SENDER-CELL','RECEIVER-CELL'),
                    metric='jaccard', method='ward', optimal_leaf=True, excluded_cells=None, title='',
                    cbar_fontsize=12, filename=None, **kwargs):
-    df_ = interaction_space.communication_matrix.copy()
+    df_ = interaction_space.interaction_elements['communication_matrix'].copy()
 
     if excluded_cells is not None:
         included_cells = []

From ddcf648f9e78b2121c35c744b8214083e7e8f718 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Sun, 26 Apr 2020 00:58:35 -0700
Subject: [PATCH 17/19] Minor changes

---
 cell2cell/core/interaction_space.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/cell2cell/core/interaction_space.py b/cell2cell/core/interaction_space.py
index d87d01a..51792c5 100644
--- a/cell2cell/core/interaction_space.py
+++ b/cell2cell/core/interaction_space.py
@@ -243,7 +243,7 @@ def pair_communication_score(self, cell1, cell2, communication_score='expression
         return communication_value
 
     def compute_pairwise_communication_scores(self, communication_score=None, use_ppi_score=False, ref_ppi_data=None,
-                                              cells=None, verbose=True):
+                                              cells=None, cci_type=None, verbose=True):
         '''
         Function that computes...
 
@@ -261,12 +261,20 @@ def compute_pairwise_communication_scores(self, communication_score=None, use_pp
         else:
             assert isinstance(communication_score, str)
 
-        # Labels:
+        # Cells to consider
         if cells is None:
+            cells = self.interaction_elements['cell_names']
+
+        # Labels:
+        if cci_type is None:
             cell_pairs = self.interaction_elements['pairs']
+        elif cci_type != self.cci_type:
+            cell_pairs = generate_pairs(cells, cci_type)
         else:
             cell_pairs = generate_pairs(cells, self.cci_type)
         col_labels = ['{};{}'.format(pair[0], pair[1]) for pair in cell_pairs]
+
+        # Ref PPI data
         if ref_ppi_data is None:
             ref_index = self.ppi_data.apply(lambda row: (row[0], row[1]), axis=1)
             keep_index = list(range(self.ppi_data.shape[0]))
@@ -292,4 +300,4 @@ def compute_pairwise_communication_scores(self, communication_score=None, use_pp
                                                        verbose=verbose)
             communication_matrix[col_labels[i]] = comm_score.flatten()[keep_index]
 
-        self.communication_matrix = communication_matrix
\ No newline at end of file
+        self.interaction_elements['communication_matrix'] = communication_matrix
\ No newline at end of file

From 54b1c8946d2de6bc131aec6ac9747a6941ae8831 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Sun, 26 Apr 2020 00:59:28 -0700
Subject: [PATCH 18/19] Moved functions to study PPI enrichment into
 Communication Scores in InteractionSpace

---
 cell2cell/analysis/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cell2cell/analysis/__init__.py b/cell2cell/analysis/__init__.py
index fd27f7b..b6c3f85 100644
--- a/cell2cell/analysis/__init__.py
+++ b/cell2cell/analysis/__init__.py
@@ -3,6 +3,5 @@
 from __future__ import absolute_import
 
 from cell2cell.analysis.pipelines import (core_pipeline, heuristic_pipeline, ligand_receptor_pipeline)
-from cell2cell.analysis.ppi_enrichment import (compute_avg_count_for_ppis, ppi_count_operation, get_ppi_score_for_cell_pairs)
 # from cell2cell.analysis.func_enrichment import *
 

From 8a209640a1d261590055d9acfb4c7f799169c415 Mon Sep 17 00:00:00 2001
From: Erick Armingol <earmingol14@gmail.com>
Date: Sun, 26 Apr 2020 01:00:16 -0700
Subject: [PATCH 19/19] Run PCoA only if CCI matrix is symetric ('undirected'
 interactions)

---
 cell2cell/analysis/pipelines.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/cell2cell/analysis/pipelines.py b/cell2cell/analysis/pipelines.py
index 519c57a..d950571 100644
--- a/cell2cell/analysis/pipelines.py
+++ b/cell2cell/analysis/pipelines.py
@@ -52,15 +52,19 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
                                          **{'cmap': 'Blues'}
                                          )
 
-    pcoa = plotting.pcoa_biplot(interaction_space,
-                                excluded_cells=excluded_cells,
-                                metadata=metadata,
-                                sample_col=meta_setup['sample_col'],
-                                group_col=meta_setup['group_col'],
-                                colors=colors,
-                                title='PCoA for cells given their CCI scores',
-                                filename=files['output_folder'] + 'CCI-PCoA-CCI-scores{}.png'.format(filename_suffix),
-                                )
+    # Run PCoA only if CCI matrix is symmetric
+    pcoa_state = False
+    if (interaction_space.interaction_elements['cci_matrix'].values.transpose() == interaction_space.interaction_elements['cci_matrix'].values).all():
+        pcoa = plotting.pcoa_biplot(interaction_space,
+                                    excluded_cells=excluded_cells,
+                                    metadata=metadata,
+                                    sample_col=meta_setup['sample_col'],
+                                    group_col=meta_setup['group_col'],
+                                    colors=colors,
+                                    title='PCoA for cells given their CCI scores',
+                                    filename=files['output_folder'] + 'CCI-PCoA-CCI-scores{}.png'.format(filename_suffix),
+                                    )
+        pcoa_state = True
 
     interaction_clustermap = plotting.clustermap_ccc(interaction_space,
                                                      metric=metric,
@@ -79,7 +83,8 @@ def core_pipeline(files, rnaseq_data, ppi_data, metadata, meta_setup, cutoff_set
     outputs = dict()
     outputs['interaction_space'] = interaction_space
     outputs['clustermap'] = clustermap
-    outputs['pcoa'] = pcoa
+    if pcoa_state:
+        outputs['pcoa'] = pcoa
     outputs['interaction_clustermap'] = interaction_clustermap
     outputs['LR-pairs'] = interaction_clustermap.data2d
     return outputs