Skip to content

Commit

Permalink
Merge pull request #436 from oanaipopescu/update_nov24
Browse files Browse the repository at this point in the history
Update CMIknnMixed Nov24
  • Loading branch information
jakobrunge authored Dec 20, 2024
2 parents 1357b65 + 5d540d1 commit 0e0477d
Show file tree
Hide file tree
Showing 14 changed files with 1,508 additions and 38 deletions.
73 changes: 66 additions & 7 deletions tests/test_independence_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,18 @@
from tigramite.independence_tests.gpdc import GPDC
from tigramite.independence_tests.gpdc_torch import GPDCtorch
from tigramite.independence_tests.cmiknn import CMIknn
from tigramite.independence_tests.cmiknnmixed import CMIknnMixed
from tigramite.independence_tests.cmiknn_mixed import CMIknnMixed
from tigramite.independence_tests.cmisymb import CMIsymb
from tigramite.independence_tests.gsquared import Gsquared
from tigramite.independence_tests.regressionCI import RegressionCI

import tigramite.data_processing as pp
from tigramite.toymodels import structural_causal_processes as toys

from test_pcmci_calculations import a_chain, gen_data_frame
from test_pcmci_calculations import (a_chain, mixed_confounder,
gen_data_frame,
gen_chain_data_frame_mixed,
gen_confounder_data_frame_mixed)

# Pylint settings
# pylint: disable=redefined-outer-name
Expand Down Expand Up @@ -126,11 +129,11 @@ def check_run_test(ind_test, sample):
tau_max=tau_max, alpha_or_thres=alpha_or_thres)

# Get the array the test is running on
array, xyz, _, _ = ind_test._get_array(x_nds, y_nds, z_nds, tau_max)
array, xyz, _, data_type = ind_test._get_array(x_nds, y_nds, z_nds, tau_max)
dim, T = array.shape
# Get the correct dependence measure
val_expt = ind_test.get_dependence_measure(array, xyz)
pval_expt = ind_test._get_p_value(val, array, xyz, T, dim)
val_expt = ind_test.get_dependence_measure(array, xyz, data_type=data_type)
pval_expt = ind_test._get_p_value(val, array, xyz, T, dim, data_type=data_type)
if ind_test.significance == 'fixed_thres':
dependent = val_expt >= alpha_or_thres
pval_expt = 0. if dependent else 1.
Expand All @@ -151,9 +154,9 @@ def check_get_measure(ind_test, sample):
# Run the test
val = ind_test.get_measure(x_nds, y_nds, z_nds, tau_max)
# Get the array the test is running on
array, xyz, _, _ = ind_test._get_array(x_nds, y_nds, z_nds, tau_max)
array, xyz, _, data_type = ind_test._get_array(x_nds, y_nds, z_nds, tau_max)
# Get the correct dependence measure
val_expt = ind_test.get_dependence_measure(array, xyz)
val_expt = ind_test.get_dependence_measure(array, xyz, data_type=data_type)
# Check the values are close
np.testing.assert_allclose(np.array(val), np.array(val_expt), atol=1e-2)

Expand Down Expand Up @@ -857,6 +860,62 @@ def test_cmi_knn(cmi_knn, data_sample_c):
np.array(val_est),
atol=0.02)


# CMIknnMixed TESTING ##############################################################

# Here we only test the main functionality of CMIknnMixed, as the rest of the
# functions are the same as for the continuous CMIknn test

@pytest.fixture()
def cmi_knn_mixed(request):
return CMIknnMixed(mask_type=None,
significance='shuffle_test',
sig_samples=20,
sig_blocklength=3,
knn=0.3,
verbosity=2)

@pytest.fixture(params=[
# Generate a test data sample
# Parameterize the sample by setting the autocorrelation value, coefficient
# value, total time length, and random seed to different numbers
# links_coeffs, time, seed_val
(mixed_confounder(0.1, 0.9), 1000, 2),
(mixed_confounder(0.5, 0.6), 1000, 11),
(mixed_confounder(0.5, 0.6), 1000, 42)])
def data_frame_conf_mixed(request):
# Set the parameters
links_coeffs, time, seed_val = request.param
# Generate the dataframe
return gen_confounder_data_frame_mixed(links_coeffs, time, seed_val)

@pytest.fixture(params=[
# Generate a test data sample
# Parameterize the sample by setting the autocorrelation value, coefficient
# value, total time length, and random seed to different numbers
# links_coeffs, time, seed_val
(a_chain(0.1, 0.9), 10, 2),
(a_chain(0.5, 0.6), 10, 11),
(a_chain(0.5, 0.6, length=5), 10, 42)])
def data_frame_chain_mixed(request):
# Set the parameters
links_coeffs, time, seed_val = request.param
# Generate the dataframe
return gen_chain_data_frame_mixed(links_coeffs, time, seed_val)

def test_get_measure_cmi_knn_mixed_chain(cmi_knn_mixed, data_frame_chain_mixed):
# Check the get_measure function
check_get_measure(cmi_knn_mixed, data_frame_chain_mixed)

def test_get_measure_cmi_knn_mixed_confounder(cmi_knn_mixed, data_frame_conf_mixed):
# Check the get_measure function, aditionally check the type matrix
check_get_measure(cmi_knn_mixed, data_frame_conf_mixed)

def test_run_test_cmi_knn_mixed(cmi_knn_mixed, data_frame_chain_mixed):
# Check the run_test function
check_run_test(cmi_knn_mixed, data_frame_chain_mixed)


# CMIsymb TESTING ##############################################################
@pytest.fixture()
def cmi_symb(request):
Expand Down
55 changes: 55 additions & 0 deletions tests/test_pcmci_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,40 @@ def gen_data_frame(links_coeffs, time, seed_val):
true_parents = _get_parent_graph(links_coeffs)
return pp.DataFrame(data), true_parents


def gen_confounder_data_frame_mixed(links_coeffs, time, seed_val):
# Set the random seed
random_state = np.random.default_rng(seed_val)
data = np.zeros((time, 3))
data[:, 2] = random_state.binomial(n=1, p=0.5, size=time)
for t in range(2, time):
data[t, 0] = links_coeffs[0][0][1] * data[t + links_coeffs[0][0][0][1], 1] + random_state.normal(
0.2 + data[t + links_coeffs[0][1][0][1], 2] * links_coeffs[0][1][1], 1)
data[t, 1] = links_coeffs[1][0][1] * data[t + links_coeffs[1][0][0][1], 2] + random_state.normal(
0.2 + data[t + links_coeffs[1][1][0][1], 2] * links_coeffs[1][1][1], 1)

data_type = np.zeros(data.shape, dtype='int')
# X2 is continuous, encoded as 1 in data_type
data_type[:, 2] = 1

dataframe = pp.DataFrame(data,
data_type=data_type)

true_parents = _get_parent_graph(links_coeffs)
return dataframe, true_parents


def gen_chain_data_frame_mixed(links_coeffs, time, seed_val):
# Set the random seed
np.random.seed(seed_val)
# Generate the data
data, _ = toys.var_process(links_coeffs, T=time)
data_type = np.zeros(data.shape)
# Get the true parents
true_parents = _get_parent_graph(links_coeffs)
return pp.DataFrame(data, data_type=data_type), true_parents


# TEST LINK GENERATION #########################################################
def a_chain(auto_corr, coeff, length=3):
"""
Expand All @@ -91,6 +125,27 @@ def a_chain(auto_corr, coeff, length=3):
return_links[lnk] = [((lnk, -1), auto_corr), ((lnk-1, -1), coeff)]
return return_links

def mixed_confounder(auto_corr, coeff, length=3):
"""
Generate a simple confounder process with the given auto-correlations and
parents with the given coefficient strength.
Parameters
----------
auto_corr: float
Autocorrelation strength for all nodes
coeff : float
Parent strength for all relations
length : int
Length of the confounder model.
"""
return_links = dict()
return_links[2] = []
for lnk in range(0, length - 1):
return_links[lnk] = [((lnk, -1), auto_corr), ((2, -1), coeff)]

return return_links

# TODO implement common_driver: return two variables commonly driven by N common
# drivers which are random noise, autocorrelation as parameter
# TODO implement independent drivers, autocorrelated noise
Expand Down
5 changes: 3 additions & 2 deletions tigramite/independence_tests/cmiknn.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _get_nearest_neighbors(self, array, xyz, knn):

return k_xz, k_yz, k_z

def get_dependence_measure(self, array, xyz):
def get_dependence_measure(self, array, xyz, data_type=None):
"""Returns CMI estimate as described in Frenzel and Pompe PRL (2007).
Parameters
Expand Down Expand Up @@ -253,7 +253,8 @@ def get_dependence_measure(self, array, xyz):


def get_shuffle_significance(self, array, xyz, value,
return_null_dist=False):
return_null_dist=False,
data_type=None):
"""Returns p-value for nearest-neighbor shuffle significance test.
For non-empty Z, overwrites get_shuffle_significance from the parent
Expand Down
Loading

0 comments on commit 0e0477d

Please sign in to comment.