jakobrunge · jakobrunge · Sep 7, 2022 · Sep 7, 2022 · Sep 7, 2022 · Sep 7, 2022
diff --git a/setup.py b/setup.py
@@ -63,7 +63,7 @@ def run(self):
 # Run the setup
 setup(
     name="tigramite",
-    version="5.1.0.3",
+    version="5.1.0.4",
     packages=["tigramite", "tigramite.independence_tests", "tigramite.toymodels"],
     license="GNU General Public License v3.0",
     description="Tigramite causal discovery for time series",

diff --git a/tests/test_independence_tests.py b/tests/test_independence_tests.py
@@ -210,7 +210,7 @@ def test_shuffle_sig_parcorr(par_corr, data_sample_a):
     # Get the data sample values
     array, val, _, xyz, dim, T = data_sample_a
     # Get the analytic significance
-    pval_a = par_corr.get_analytic_significance(value=val, T=T, dim=dim)
+    pval_a = par_corr.get_analytic_significance(value=val, T=T, dim=dim, xyz=xyz)
     # Get the shuffle significance
     pval_s = par_corr.get_shuffle_significance(array, xyz, val)
     # Adjust p-value for two-sided measures
@@ -340,7 +340,7 @@ def test_shuffle_sig_gpdc(gpdc, data_sample_b):
     array = array[:, :T]
     # Get the value of the dependence measurement
     val = gpdc.get_dependence_measure(array, xyz)
-    pval_a = gpdc.get_analytic_significance(value=val, T=T, dim=dim)
+    pval_a = gpdc.get_analytic_significance(value=val, T=T, dim=dim, xyz=xyz)
     pval_s = gpdc.get_shuffle_significance(array, xyz, val)
     np.testing.assert_allclose(np.array(pval_a), np.array(pval_s), atol=0.05)
 
@@ -448,7 +448,7 @@ def test_shuffle_sig_gpdc_torch(gpdc_torch, data_sample_b):
     array = array[:, :T]
     # Get the value of the dependence measurement
     val = gpdc_torch.get_dependence_measure(array, xyz)
-    pval_a = gpdc_torch.get_analytic_significance(value=val, T=T, dim=dim)
+    pval_a = gpdc_torch.get_analytic_significance(value=val, T=T, dim=dim, xyz=xyz)
     pval_s = gpdc_torch.get_shuffle_significance(array, xyz, val)
     np.testing.assert_allclose(np.array(pval_a), np.array(pval_s), atol=0.05)
 

diff --git a/tigramite/independence_tests/gpdc.py b/tigramite/independence_tests/gpdc.py
@@ -625,7 +625,7 @@ def get_shuffle_significance(self, array, xyz, value,
             return pval, null_dist
         return pval
 
-    def get_analytic_significance(self, value, T, dim):
+    def get_analytic_significance(self, value, T, dim, xyz):
         """Returns p-value for the distance correlation coefficient.
 
         The null distribution for necessary degrees of freedom (df) is loaded.
@@ -647,6 +647,9 @@ def get_analytic_significance(self, value, T, dim):
         dim : int
             Dimensionality, ie, number of features.
 
+        xyz : array of ints
+            XYZ identifier array of shape (dim,).
+
         Returns
         -------
         pval : float or numpy.nan

diff --git a/tigramite/independence_tests/gpdc_torch.py b/tigramite/independence_tests/gpdc_torch.py
@@ -783,7 +783,7 @@ def get_shuffle_significance(self, array, xyz, value,
             return pval, null_dist
         return pval
 
-    def get_analytic_significance(self, value, T, dim):
+    def get_analytic_significance(self, value, T, dim, xyz):
         """Returns p-value for the distance correlation coefficient.
 
         The null distribution for necessary degrees of freedom (df) is loaded.
@@ -805,6 +805,9 @@ def get_analytic_significance(self, value, T, dim):
         dim : int
             Dimensionality, ie, number of features.
 
+        xyz : array of ints
+            XYZ identifier array of shape (dim,).
+
         Returns
         -------
         pval : float or numpy.nan

diff --git a/tigramite/independence_tests/independence_tests_base.py b/tigramite/independence_tests/independence_tests_base.py
@@ -546,7 +546,7 @@ def get_significance(self, val, array, xyz, T, dim, sig_override=None):
             use_sig = sig_override
         # Check if we are using the analytic significance
         if use_sig == 'analytic':
-            pval = self.get_analytic_significance(value=val, T=T, dim=dim)
+            pval = self.get_analytic_significance(value=val, T=T, dim=dim, xyz=xyz)
         # Check if we are using the shuffle significance
         elif use_sig == 'shuffle_test':
             pval = self.get_shuffle_significance(array=array,

diff --git a/tigramite/independence_tests/parcorr.py b/tigramite/independence_tests/parcorr.py
@@ -187,7 +187,7 @@ def get_shuffle_significance(self, array, xyz, value,
             return pval, null_dist
         return pval
 
-    def get_analytic_significance(self, value, T, dim):
+    def get_analytic_significance(self, value, T, dim, xyz):
         """Returns analytic p-value from Student's t-test for the Pearson
         correlation coefficient.
 
@@ -205,6 +205,9 @@ def get_analytic_significance(self, value, T, dim):
         dim : int
             Dimensionality, ie, number of features.
 
+        xyz : array of ints
+            XYZ identifier array of shape (dim,).
+
         Returns
         -------
         pval : float or numpy.nan

diff --git a/tigramite/independence_tests/parcorr_mult.py b/tigramite/independence_tests/parcorr_mult.py
@@ -47,7 +47,7 @@ def measure(self):
 
     def __init__(self, correlation_type='max_corr', **kwargs):
         self._measure = 'par_corr_mult'
-        self.two_sided = False
+        self.two_sided = True
         self.residual_based = True
 
         self.correlation_type = correlation_type
@@ -201,18 +201,24 @@ def mult_corr(self, array, xyz, standardize=True):
         y = array[np.where(xyz==1)[0]]
 
         if self.correlation_type == 'max_corr':
-            # Get maximum correlation value
-            val = 0.
-            for x_vals in x:
-                for y_vals in y:
-                    val_here, _ = stats.pearsonr(x_vals, y_vals)
-                    val = max(val, np.abs(val_here))
+            # Get (positive or negative) absolute maximum correlation value
+            corr = np.corrcoef(x, y)[:len(x), len(x):].flatten()
+            val = corr[np.argmax(np.abs(corr))]
+
+            # val = 0.
+            # for x_vals in x:
+            #     for y_vals in y:
+            #         val_here, _ = stats.pearsonr(x_vals, y_vals)
+            #         val = max(val, np.abs(val_here))
 
         # elif self.correlation_type == 'linear_hsci':
         #     # For linear kernel and standardized data (centered and divided by std)
         #     # biased V -statistic of HSIC reduces to sum of squared inner products
         #     # over all dimensions
         #     val = ((x.dot(y.T)/float(n))**2).sum()
+        else:
+            raise NotImplementedError("Currently only"
+                                      "correlation_type == 'max_corr' implemented.")
 
         return val
 
@@ -258,15 +264,21 @@ def get_shuffle_significance(self, array, xyz, value,
 
         pval = (null_dist >= np.abs(value)).mean()
 
+        # Adjust p-value for two-sided measures
+        if pval < 1.:
+            pval *= 2.
+
+        # Adjust p-value for dimensions of x and y (conservative Bonferroni-correction)
+        # pval *= dim_x*dim_y
 
         if return_null_dist:
             return pval, null_dist
         return pval
 
-    def get_analytic_significance(self, value, T, dim):
+    def get_analytic_significance(self, value, T, dim, xyz):
         """Returns analytic p-value depending on correlation_type.
 
-        If the degrees of freedom are less than
+        Assumes two-sided correlation. If the degrees of freedom are less than
         1, numpy.nan is returned.
 
         Parameters
@@ -280,6 +292,9 @@ def get_analytic_significance(self, value, T, dim):
         dim : int
             Dimensionality, ie, number of features.
 
+        xyz : array of ints
+            XYZ identifier array of shape (dim,).
+
         Returns
         -------
         pval : float or numpy.nan
@@ -288,6 +303,9 @@ def get_analytic_significance(self, value, T, dim):
         # Get the number of degrees of freedom
         deg_f = T - dim
 
+        dim_x = (xyz==0).sum()
+        dim_y = (xyz==1).sum()
+
         if self.correlation_type == 'max_corr':
             if deg_f < 1:
                 pval = np.nan
@@ -297,6 +315,12 @@ def get_analytic_significance(self, value, T, dim):
                 trafo_val = value * np.sqrt(deg_f/(1. - value*value))
                 # Two sided significance level
                 pval = stats.t.sf(np.abs(trafo_val), deg_f) * 2
+        else:
+            raise NotImplementedError("Currently only"
+                                      "correlation_type == 'max_corr' implemented.")
+
+        # Adjust p-value for dimensions of x and y (conservative Bonferroni-correction)
+        pval *= dim_x*dim_y
 
         return pval
 
@@ -315,3 +339,37 @@ def get_model_selection_criterion(self, j, parents, tau_max=0):
         """
         raise NotImplementedError("Model selection not"+\
                                   " implemented for %s" % self.measure)
+
+if __name__ == '__main__':
+
+    import tigramite
+    from tigramite.data_processing import DataFrame
+    # import numpy as np
+    import timeit
+
+    # np.random.seed(3)
+    cmi = ParCorrMult(
+            # significance = 'shuffle_test',
+            # sig_samples=1000,
+        )
+
+    rate = np.zeros(100)
+    for i in range(100):
+        print(i)
+        data = np.random.randn(100, 6)
+        data[:,2] += -0.5*data[:,0]
+        # data[:,1] += data[:,2]
+        dataframe = DataFrame(data)
+
+        cmi.set_dataframe(dataframe)
+
+        pval = cmi.run_test(
+                X=[(0,0)], #, (1,0)], 
+                Y=[(2,0)], #, (3, 0)], 
+                # Z=[(5,0)]
+                Z = []
+                )[1]
+
+        rate[i] = pval <= 0.1
+        # print(cmi.run_test(X=[(0,0),(1,0)], Y=[(2,0), (3, 0)], Z=[(5,0)]))
+    print(rate.mean())
diff --git a/tutorials/tigramite_tutorial_multiple_datasets.ipynb b/tutorials/tigramite_tutorial_multiple_datasets.ipynb