Merge pull request #436 from oanaipopescu/update_nov24

Update CMIknnMixed Nov24
jakobrunge · Dec 20, 2024 · 0e0477d · 0e0477d
2 parents 1357b65 + 5d540d1
commit 0e0477d
Show file tree

Hide file tree

Showing 14 changed files with 1,508 additions and 38 deletions.
diff --git a/tests/test_independence_tests.py b/tests/test_independence_tests.py
@@ -12,15 +12,18 @@
 from tigramite.independence_tests.gpdc import GPDC
 from tigramite.independence_tests.gpdc_torch import GPDCtorch
 from tigramite.independence_tests.cmiknn import CMIknn
-from tigramite.independence_tests.cmiknnmixed import CMIknnMixed
+from tigramite.independence_tests.cmiknn_mixed import CMIknnMixed
 from tigramite.independence_tests.cmisymb import CMIsymb
 from tigramite.independence_tests.gsquared import Gsquared
 from tigramite.independence_tests.regressionCI import RegressionCI
 
 import tigramite.data_processing as pp
 from tigramite.toymodels import structural_causal_processes as toys
 
-from test_pcmci_calculations import a_chain, gen_data_frame
+from test_pcmci_calculations import (a_chain, mixed_confounder,
+                                     gen_data_frame, 
+                                     gen_chain_data_frame_mixed, 
+                                     gen_confounder_data_frame_mixed)
 
 # Pylint settings
 # pylint: disable=redefined-outer-name
@@ -126,11 +129,11 @@ def check_run_test(ind_test, sample):
         tau_max=tau_max, alpha_or_thres=alpha_or_thres)
 
     # Get the array the test is running on
-    array, xyz, _, _ = ind_test._get_array(x_nds, y_nds, z_nds, tau_max)
+    array, xyz, _, data_type = ind_test._get_array(x_nds, y_nds, z_nds, tau_max)
     dim, T = array.shape
     # Get the correct dependence measure
-    val_expt = ind_test.get_dependence_measure(array, xyz)
-    pval_expt = ind_test._get_p_value(val, array, xyz, T, dim)
+    val_expt = ind_test.get_dependence_measure(array, xyz, data_type=data_type)
+    pval_expt = ind_test._get_p_value(val, array, xyz, T, dim, data_type=data_type)
     if ind_test.significance == 'fixed_thres':
         dependent = val_expt >= alpha_or_thres
         pval_expt = 0. if dependent else 1.
@@ -151,9 +154,9 @@ def check_get_measure(ind_test, sample):
     # Run the test
     val = ind_test.get_measure(x_nds, y_nds, z_nds, tau_max)
     # Get the array the test is running on
-    array, xyz, _, _ = ind_test._get_array(x_nds, y_nds, z_nds, tau_max)
+    array, xyz, _, data_type = ind_test._get_array(x_nds, y_nds, z_nds, tau_max)
     # Get the correct dependence measure
-    val_expt = ind_test.get_dependence_measure(array, xyz)
+    val_expt = ind_test.get_dependence_measure(array, xyz, data_type=data_type)
     # Check the values are close
     np.testing.assert_allclose(np.array(val), np.array(val_expt), atol=1e-2)
 
@@ -857,6 +860,62 @@ def test_cmi_knn(cmi_knn, data_sample_c):
                                np.array(val_est),
                                atol=0.02)
 
+
+# CMIknnMixed TESTING ##############################################################
+
+# Here we only test the main functionality of CMIknnMixed, as the rest of the 
+# functions are the same as for the continuous CMIknn test
+
+@pytest.fixture()
+def cmi_knn_mixed(request):
+    return CMIknnMixed(mask_type=None,
+                       significance='shuffle_test',
+                       sig_samples=20,
+                       sig_blocklength=3,
+                       knn=0.3,
+                       verbosity=2)
+
+@pytest.fixture(params=[
+    # Generate a test data sample
+    # Parameterize the sample by setting the autocorrelation value, coefficient
+    # value, total time length, and random seed to different numbers
+    # links_coeffs,               time, seed_val
+    (mixed_confounder(0.1, 0.9), 1000, 2),
+    (mixed_confounder(0.5, 0.6), 1000, 11),
+    (mixed_confounder(0.5, 0.6), 1000, 42)])
+def data_frame_conf_mixed(request):
+    # Set the parameters
+    links_coeffs, time, seed_val = request.param
+    # Generate the dataframe
+    return gen_confounder_data_frame_mixed(links_coeffs, time, seed_val)
+
+@pytest.fixture(params=[
+    # Generate a test data sample
+    # Parameterize the sample by setting the autocorrelation value, coefficient
+    # value, total time length, and random seed to different numbers
+    # links_coeffs,               time, seed_val
+    (a_chain(0.1, 0.9), 10, 2),
+    (a_chain(0.5, 0.6), 10, 11),
+    (a_chain(0.5, 0.6, length=5), 10, 42)])
+def data_frame_chain_mixed(request):
+    # Set the parameters
+    links_coeffs, time, seed_val = request.param
+    # Generate the dataframe
+    return gen_chain_data_frame_mixed(links_coeffs, time, seed_val)
+
+def test_get_measure_cmi_knn_mixed_chain(cmi_knn_mixed, data_frame_chain_mixed):
+    # Check the get_measure function
+    check_get_measure(cmi_knn_mixed, data_frame_chain_mixed)
+
+def test_get_measure_cmi_knn_mixed_confounder(cmi_knn_mixed, data_frame_conf_mixed):
+    # Check the get_measure function, aditionally check the type matrix 
+    check_get_measure(cmi_knn_mixed, data_frame_conf_mixed)
+
+def test_run_test_cmi_knn_mixed(cmi_knn_mixed, data_frame_chain_mixed):
+    # Check the run_test function
+    check_run_test(cmi_knn_mixed, data_frame_chain_mixed)
+
+
 # CMIsymb TESTING ##############################################################
 @pytest.fixture()
 def cmi_symb(request):

diff --git a/tests/test_pcmci_calculations.py b/tests/test_pcmci_calculations.py
@@ -69,6 +69,40 @@ def gen_data_frame(links_coeffs, time, seed_val):
     true_parents = _get_parent_graph(links_coeffs)
     return pp.DataFrame(data), true_parents
 
+
+def gen_confounder_data_frame_mixed(links_coeffs, time, seed_val):
+    # Set the random seed
+    random_state = np.random.default_rng(seed_val)
+    data = np.zeros((time, 3))
+    data[:, 2] = random_state.binomial(n=1, p=0.5, size=time)
+    for t in range(2, time):
+        data[t, 0] = links_coeffs[0][0][1] * data[t + links_coeffs[0][0][0][1], 1] + random_state.normal(
+            0.2 + data[t + links_coeffs[0][1][0][1], 2] * links_coeffs[0][1][1], 1)
+        data[t, 1] = links_coeffs[1][0][1] * data[t + links_coeffs[1][0][0][1], 2] + random_state.normal(
+            0.2 + data[t + links_coeffs[1][1][0][1], 2] * links_coeffs[1][1][1], 1)
+
+    data_type = np.zeros(data.shape, dtype='int')
+    # X2 is continuous, encoded as 1 in data_type
+    data_type[:, 2] = 1
+
+    dataframe = pp.DataFrame(data,
+                             data_type=data_type)
+
+    true_parents = _get_parent_graph(links_coeffs)
+    return dataframe, true_parents
+
+
+def gen_chain_data_frame_mixed(links_coeffs, time, seed_val):
+    # Set the random seed
+    np.random.seed(seed_val)
+    # Generate the data
+    data, _ = toys.var_process(links_coeffs, T=time)
+    data_type = np.zeros(data.shape)
+    # Get the true parents
+    true_parents = _get_parent_graph(links_coeffs)
+    return pp.DataFrame(data, data_type=data_type), true_parents
+
+
 # TEST LINK GENERATION #########################################################
 def a_chain(auto_corr, coeff, length=3):
     """
@@ -91,6 +125,27 @@ def a_chain(auto_corr, coeff, length=3):
         return_links[lnk] = [((lnk, -1), auto_corr), ((lnk-1, -1), coeff)]
     return return_links
 
+def mixed_confounder(auto_corr, coeff, length=3):
+    """
+    Generate a simple confounder process with the given auto-correlations and
+    parents with the given coefficient strength.
+
+    Parameters
+    ----------
+    auto_corr: float
+        Autocorrelation strength for all nodes
+    coeff : float
+        Parent strength for all relations
+    length : int
+        Length of the confounder model.
+    """
+    return_links = dict()
+    return_links[2] = []
+    for lnk in range(0, length - 1):
+        return_links[lnk] = [((lnk, -1), auto_corr), ((2, -1), coeff)]
+
+    return return_links
+
 # TODO implement common_driver: return two variables commonly driven by N common
 # drivers which are random noise, autocorrelation as parameter
 # TODO implement independent drivers, autocorrelated noise

diff --git a/tigramite/independence_tests/cmiknn.py b/tigramite/independence_tests/cmiknn.py
@@ -216,7 +216,7 @@ def _get_nearest_neighbors(self, array, xyz, knn):
 
         return k_xz, k_yz, k_z
 
-    def get_dependence_measure(self, array, xyz):
+    def get_dependence_measure(self, array, xyz, data_type=None):
         """Returns CMI estimate as described in Frenzel and Pompe PRL (2007).
 
         Parameters
@@ -253,7 +253,8 @@ def get_dependence_measure(self, array, xyz):
 
 
     def get_shuffle_significance(self, array, xyz, value,
-                                 return_null_dist=False):
+                                 return_null_dist=False, 
+                                 data_type=None):
         """Returns p-value for nearest-neighbor shuffle significance test.
 
         For non-empty Z, overwrites get_shuffle_significance from the parent