Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Developer #238

Merged
merged 4 commits into from
Sep 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def run(self):
# Run the setup
setup(
name="tigramite",
version="5.1.0.3",
version="5.1.0.4",
packages=["tigramite", "tigramite.independence_tests", "tigramite.toymodels"],
license="GNU General Public License v3.0",
description="Tigramite causal discovery for time series",
Expand Down
6 changes: 3 additions & 3 deletions tests/test_independence_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def test_shuffle_sig_parcorr(par_corr, data_sample_a):
# Get the data sample values
array, val, _, xyz, dim, T = data_sample_a
# Get the analytic significance
pval_a = par_corr.get_analytic_significance(value=val, T=T, dim=dim)
pval_a = par_corr.get_analytic_significance(value=val, T=T, dim=dim, xyz=xyz)
# Get the shuffle significance
pval_s = par_corr.get_shuffle_significance(array, xyz, val)
# Adjust p-value for two-sided measures
Expand Down Expand Up @@ -340,7 +340,7 @@ def test_shuffle_sig_gpdc(gpdc, data_sample_b):
array = array[:, :T]
# Get the value of the dependence measurement
val = gpdc.get_dependence_measure(array, xyz)
pval_a = gpdc.get_analytic_significance(value=val, T=T, dim=dim)
pval_a = gpdc.get_analytic_significance(value=val, T=T, dim=dim, xyz=xyz)
pval_s = gpdc.get_shuffle_significance(array, xyz, val)
np.testing.assert_allclose(np.array(pval_a), np.array(pval_s), atol=0.05)

Expand Down Expand Up @@ -448,7 +448,7 @@ def test_shuffle_sig_gpdc_torch(gpdc_torch, data_sample_b):
array = array[:, :T]
# Get the value of the dependence measurement
val = gpdc_torch.get_dependence_measure(array, xyz)
pval_a = gpdc_torch.get_analytic_significance(value=val, T=T, dim=dim)
pval_a = gpdc_torch.get_analytic_significance(value=val, T=T, dim=dim, xyz=xyz)
pval_s = gpdc_torch.get_shuffle_significance(array, xyz, val)
np.testing.assert_allclose(np.array(pval_a), np.array(pval_s), atol=0.05)

Expand Down
5 changes: 4 additions & 1 deletion tigramite/independence_tests/gpdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ def get_shuffle_significance(self, array, xyz, value,
return pval, null_dist
return pval

def get_analytic_significance(self, value, T, dim):
def get_analytic_significance(self, value, T, dim, xyz):
"""Returns p-value for the distance correlation coefficient.

The null distribution for necessary degrees of freedom (df) is loaded.
Expand All @@ -647,6 +647,9 @@ def get_analytic_significance(self, value, T, dim):
dim : int
Dimensionality, ie, number of features.

xyz : array of ints
XYZ identifier array of shape (dim,).

Returns
-------
pval : float or numpy.nan
Expand Down
5 changes: 4 additions & 1 deletion tigramite/independence_tests/gpdc_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ def get_shuffle_significance(self, array, xyz, value,
return pval, null_dist
return pval

def get_analytic_significance(self, value, T, dim):
def get_analytic_significance(self, value, T, dim, xyz):
"""Returns p-value for the distance correlation coefficient.

The null distribution for necessary degrees of freedom (df) is loaded.
Expand All @@ -805,6 +805,9 @@ def get_analytic_significance(self, value, T, dim):
dim : int
Dimensionality, ie, number of features.

xyz : array of ints
XYZ identifier array of shape (dim,).

Returns
-------
pval : float or numpy.nan
Expand Down
2 changes: 1 addition & 1 deletion tigramite/independence_tests/independence_tests_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def get_significance(self, val, array, xyz, T, dim, sig_override=None):
use_sig = sig_override
# Check if we are using the analytic significance
if use_sig == 'analytic':
pval = self.get_analytic_significance(value=val, T=T, dim=dim)
pval = self.get_analytic_significance(value=val, T=T, dim=dim, xyz=xyz)
# Check if we are using the shuffle significance
elif use_sig == 'shuffle_test':
pval = self.get_shuffle_significance(array=array,
Expand Down
5 changes: 4 additions & 1 deletion tigramite/independence_tests/parcorr.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def get_shuffle_significance(self, array, xyz, value,
return pval, null_dist
return pval

def get_analytic_significance(self, value, T, dim):
def get_analytic_significance(self, value, T, dim, xyz):
"""Returns analytic p-value from Student's t-test for the Pearson
correlation coefficient.

Expand All @@ -205,6 +205,9 @@ def get_analytic_significance(self, value, T, dim):
dim : int
Dimensionality, ie, number of features.

xyz : array of ints
XYZ identifier array of shape (dim,).

Returns
-------
pval : float or numpy.nan
Expand Down
76 changes: 67 additions & 9 deletions tigramite/independence_tests/parcorr_mult.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def measure(self):

def __init__(self, correlation_type='max_corr', **kwargs):
self._measure = 'par_corr_mult'
self.two_sided = False
self.two_sided = True
self.residual_based = True

self.correlation_type = correlation_type
Expand Down Expand Up @@ -201,18 +201,24 @@ def mult_corr(self, array, xyz, standardize=True):
y = array[np.where(xyz==1)[0]]

if self.correlation_type == 'max_corr':
# Get maximum correlation value
val = 0.
for x_vals in x:
for y_vals in y:
val_here, _ = stats.pearsonr(x_vals, y_vals)
val = max(val, np.abs(val_here))
# Get (positive or negative) absolute maximum correlation value
corr = np.corrcoef(x, y)[:len(x), len(x):].flatten()
val = corr[np.argmax(np.abs(corr))]

# val = 0.
# for x_vals in x:
# for y_vals in y:
# val_here, _ = stats.pearsonr(x_vals, y_vals)
# val = max(val, np.abs(val_here))

# elif self.correlation_type == 'linear_hsci':
# # For linear kernel and standardized data (centered and divided by std)
# # biased V -statistic of HSIC reduces to sum of squared inner products
# # over all dimensions
# val = ((x.dot(y.T)/float(n))**2).sum()
else:
raise NotImplementedError("Currently only"
"correlation_type == 'max_corr' implemented.")

return val

Expand Down Expand Up @@ -258,15 +264,21 @@ def get_shuffle_significance(self, array, xyz, value,

pval = (null_dist >= np.abs(value)).mean()

# Adjust p-value for two-sided measures
if pval < 1.:
pval *= 2.

# Adjust p-value for dimensions of x and y (conservative Bonferroni-correction)
# pval *= dim_x*dim_y

if return_null_dist:
return pval, null_dist
return pval

def get_analytic_significance(self, value, T, dim):
def get_analytic_significance(self, value, T, dim, xyz):
"""Returns analytic p-value depending on correlation_type.

If the degrees of freedom are less than
Assumes two-sided correlation. If the degrees of freedom are less than
1, numpy.nan is returned.

Parameters
Expand All @@ -280,6 +292,9 @@ def get_analytic_significance(self, value, T, dim):
dim : int
Dimensionality, ie, number of features.

xyz : array of ints
XYZ identifier array of shape (dim,).

Returns
-------
pval : float or numpy.nan
Expand All @@ -288,6 +303,9 @@ def get_analytic_significance(self, value, T, dim):
# Get the number of degrees of freedom
deg_f = T - dim

dim_x = (xyz==0).sum()
dim_y = (xyz==1).sum()

if self.correlation_type == 'max_corr':
if deg_f < 1:
pval = np.nan
Expand All @@ -297,6 +315,12 @@ def get_analytic_significance(self, value, T, dim):
trafo_val = value * np.sqrt(deg_f/(1. - value*value))
# Two sided significance level
pval = stats.t.sf(np.abs(trafo_val), deg_f) * 2
else:
raise NotImplementedError("Currently only"
"correlation_type == 'max_corr' implemented.")

# Adjust p-value for dimensions of x and y (conservative Bonferroni-correction)
pval *= dim_x*dim_y

return pval

Expand All @@ -315,3 +339,37 @@ def get_model_selection_criterion(self, j, parents, tau_max=0):
"""
raise NotImplementedError("Model selection not"+\
" implemented for %s" % self.measure)

if __name__ == '__main__':

import tigramite
from tigramite.data_processing import DataFrame
# import numpy as np
import timeit

# np.random.seed(3)
cmi = ParCorrMult(
# significance = 'shuffle_test',
# sig_samples=1000,
)

rate = np.zeros(100)
for i in range(100):
print(i)
data = np.random.randn(100, 6)
data[:,2] += -0.5*data[:,0]
# data[:,1] += data[:,2]
dataframe = DataFrame(data)

cmi.set_dataframe(dataframe)

pval = cmi.run_test(
X=[(0,0)], #, (1,0)],
Y=[(2,0)], #, (3, 0)],
# Z=[(5,0)]
Z = []
)[1]

rate[i] = pval <= 0.1
# print(cmi.run_test(X=[(0,0),(1,0)], Y=[(2,0), (3, 0)], Z=[(5,0)]))
print(rate.mean())
196 changes: 178 additions & 18 deletions tutorials/tigramite_tutorial_multiple_datasets.ipynb

Large diffs are not rendered by default.