Skip to content

Commit

Permalink
Merge pull request #180 from jakobrunge/developer
Browse files Browse the repository at this point in the history
Developer
  • Loading branch information
jakobrunge authored Mar 9, 2022
2 parents d9d06a1 + 16050c3 commit 99eae95
Show file tree
Hide file tree
Showing 6 changed files with 282 additions and 33 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Tigramite – Causal discovery for time series datasets
# Tigramite – Causal inference and causal discovery for time series datasets
Version 5.0

(Python Package)
Expand Down Expand Up @@ -32,7 +32,7 @@ Further, Tigramite provides several causal discovery methods that can be used un

## General Notes

Tigramite is a causal time series analysis python package. It allows to efficiently estimate causal graphs from high-dimensional time series datasets (causal discovery) and to use these graphs for robust forecasting and the estimation and prediction of direct, total, and mediated effects. Causal discovery is based on linear as well as non-parametric conditional independence tests applicable to discrete or continuously-valued time series. Also includes functions for high-quality plots of the results. You can find a simple GUI version on https://github.com/stbachinger/TigramiteGui. Please cite the following papers depending on which method you use:
Tigramite is a causal time series analysis python package. It allows to efficiently estimate causal graphs from high-dimensional time series datasets (causal discovery) and to use graphs for robust forecasting and the estimation and prediction of direct, total, and mediated effects. Causal discovery is based on linear as well as non-parametric conditional independence tests applicable to discrete or continuously-valued time series. Also includes functions for high-quality plots of the results. You can find a simple GUI version on https://github.com/stbachinger/TigramiteGui. Please cite the following papers depending on which method you use:

- PCMCI: J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic, Detecting and quantifying causal associations in large nonlinear time series datasets. Sci. Adv. 5, eaau4996 (2019). https://advances.sciencemag.org/content/5/11/eaau4996
- PCMCI+: J. Runge (2020): Discovering contemporaneous and lagged causal relations in autocorrelated nonlinear time series datasets. Proceedings of the 36th Conference on Uncertainty in Artificial Intelligence, UAI 2020,Toronto, Canada, 2019, AUAI Press, 2020. http://auai.org/uai2020/proceedings/579_main_paper.pdf
Expand Down
145 changes: 139 additions & 6 deletions tigramite/causal_effects.py
Original file line number Diff line number Diff line change
Expand Up @@ -1870,19 +1870,19 @@ def predict_total_effect(self,
Results from prediction: an array of shape (time, len(Y)).
"""

if intervention_data.shape[1] != len(self.X):
if intervention_data.shape[1] != len(self.listX):
raise ValueError("intervention_data.shape[1] must be len(X).")

if conditions_data is not None:
if conditions_data.shape[1] != len(self.S):
if conditions_data.shape[1] != len(self.listS):
raise ValueError("conditions_data.shape[1] must be len(S).")
if conditions_data.shape[0] != intervention_data.shape[0]:
raise ValueError("conditions_data.shape[0] must match intervention_data.shape[0].")

if self.no_causal_path:
if self.verbosity > 0:
print("No causal path from X to Y exists.")
return np.zeros((len(intervention_data), len(self.Y)))
return np.zeros((len(intervention_data), len(self.listY)))

effect = self.model.get_general_prediction(
intervention_data=intervention_data,
Expand Down Expand Up @@ -2095,11 +2095,11 @@ def predict_wright_effect(self,

intervention_T, lenX = intervention_data.shape

lenY = len(self.Y)
lenY = len(self.listY)

predicted_array = np.zeros((intervention_T, lenY))
pred_dict = {}
for iy, y in enumerate(self.Y):
for iy, y in enumerate(self.listY):
# Print message
if self.verbosity > 1:
print("\n## Predicting target %s" % str(y))
Expand Down Expand Up @@ -2129,4 +2129,137 @@ def predict_wright_effect(self,
X=predictor_array, **pred_params)
predicted_array[index, iy] = predicted_vals.mean()

return predicted_array
return predicted_array

@staticmethod
def get_graph_from_dict(links, tau_max=None):
"""Helper function to convert dictionary of links to graph array format.
Parameters
---------
links : dict
Dictionary of form {0:[((0, -1), coeff, func), ...], 1:[...], ...}.
Also format {0:[(0, -1), ...], 1:[...], ...} is allowed.
tau_max : int or None
Maximum lag. If None, the maximum lag in links is used.
Returns
-------
graph : array of shape (N, N, tau_max+1)
Matrix format of graph with 1 for true links and 0 else.
"""

def _get_minmax_lag(links):
"""Helper function to retrieve tau_min and tau_max from links.
"""

N = len(links)

# Get maximum time lag
min_lag = np.inf
max_lag = 0
for j in range(N):
for link_props in links[j]:
if len(link_props) > 2:
var, lag = link_props[0]
coeff = link_props[1]
# func = link_props[2]
if coeff != 0.:
min_lag = min(min_lag, abs(lag))
max_lag = max(max_lag, abs(lag))
else:
var, lag = link_props
min_lag = min(min_lag, abs(lag))
max_lag = max(max_lag, abs(lag))

return min_lag, max_lag

N = len(links)

# Get maximum time lag
min_lag, max_lag = _get_minmax_lag(links)

# Set maximum lag
if tau_max is None:
tau_max = max_lag
else:
if max_lag > tau_max:
raise ValueError("tau_max is smaller than maximum lag = %d "
"found in links, use tau_max=None or larger "
"value" % max_lag)

graph = np.zeros((N, N, tau_max + 1), dtype='<U3')
for j in links.keys():
for link_props in links[j]:
if len(link_props) > 2:
var, lag = link_props[0]
coeff = link_props[1]
if coeff != 0.:
graph[var, j, abs(lag)] = "-->"
if lag == 0:
graph[j, var, 0] = "<--"
else:
var, lag = link_props
graph[var, j, abs(lag)] = "-->"
if lag == 0:
graph[j, var, 0] = "<--"

return graph

if __name__ == '__main__':

# Consider some toy data
import tigramite
import tigramite.toymodels.structural_causal_processes as toys
import tigramite.data_processing as pp

import sklearn
from sklearn.linear_model import LinearRegression

T = 10000
def lin_f(x): return x
auto_coeff = 0.3
coeff = 2.
links = {
0: [((0, -1), auto_coeff, lin_f)],
1: [((1, -1), auto_coeff, lin_f), ((0, -1), coeff, lin_f)],
2: [((2, -1), auto_coeff, lin_f), ((1, 0), coeff, lin_f)],
}
data, nonstat = toys.structural_causal_process(links, T=T,
noises=None, seed=7)
dataframe = pp.DataFrame(data)

# Construct expert knowledge graph from links here
links = {0: [(0, -1)],
1: [(1, -1), (0, -1)],
2: [(2, -1), (1, 0),],
}
# Use staticmethod to get graph
graph = CausalEffects.get_graph_from_dict(links, tau_max=None)

# We are interested in lagged total effect of X on Y
X = [(0, -1)]
Y = [(2, 0)]

# Initialize class as `stationary_dag`
causal_effects = CausalEffects(graph, graph_type='stationary_dag',
X=X, Y=Y, S=None,
hidden_variables=None,
verbosity=1)

# Optimal adjustment set (is used by default)
print(causal_effects.get_optimal_set())

# Fit causal effect model from observational data
causal_effects.fit_total_effect(
dataframe=dataframe,
# mask_type='y',
estimator=LinearRegression(),
)

# Predict effect of interventions do(X=0.), ..., do(X=1.) in one go
dox_vals = np.linspace(0., 1., 5)
intervention_data = dox_vals.reshape(len(dox_vals), len(X))
pred_Y = causal_effects.predict_total_effect(
intervention_data=intervention_data)
print(pred_Y)
30 changes: 20 additions & 10 deletions tigramite/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def _check_mask(self, mask=None, require_mask=False):
% str(_use_mask.shape))

def construct_array(self, X, Y, Z, tau_max,
extraZ=None,
mask=None,
mask_type=None,
return_cleaned_xyz=False,
Expand All @@ -127,10 +128,10 @@ def construct_array(self, X, Y, Z, tau_max,
Parameters
----------
X, Y, Z : list of tuples
X, Y, Z, extraZ : list of tuples
For a dependence measure I(X;Y|Z), X, Y, Z can be multivariate of
the form [(var1, -lag), (var2, -lag), ...]. At least one varlag in Y
has to be at lag zero.
has to be at lag zero. extraZ is only used in CausalEffects class.
tau_max : int
Maximum time lag. This may be used to make sure that estimates for
different lags in X and Z all have the same sample size.
Expand Down Expand Up @@ -171,16 +172,20 @@ def construct_array(self, X, Y, Z, tau_max,
# Get the length in time and the number of nodes
T, N = self.values.shape

# Remove duplicates in X, Y, Z
if extraZ is None:
extraZ = []
# Remove duplicates in X, Y, Z, extraZ
X = list(OrderedDict.fromkeys(X))
Y = list(OrderedDict.fromkeys(Y))
Z = list(OrderedDict.fromkeys(Z))
extraZ = list(OrderedDict.fromkeys(extraZ))

# If a node in Z occurs already in X or Y, remove it from Z
Z = [node for node in Z if (node not in X) and (node not in Y)]
extraZ = [node for node in extraZ if (node not in X) and (node not in Y) and (node not in Z)]

# Check that all lags are non-positive and indices are in [0,N-1]
XYZ = X + Y + Z
XYZ = X + Y + Z + extraZ
dim = len(XYZ)

# Ensure that XYZ makes sense
Expand All @@ -200,9 +205,10 @@ def construct_array(self, X, Y, Z, tau_max,
# Setup XYZ identifier
index_code = {'x' : 0,
'y' : 1,
'z' : 2}
'z' : 2,
'e' : 3}
xyz = np.array([index_code[name]
for var, name in zip([X, Y, Z], ['x', 'y', 'z'])
for var, name in zip([X, Y, Z, extraZ], ['x', 'y', 'z', 'e'])
for _ in var])

# Setup and fill array with lagged time series
Expand Down Expand Up @@ -250,7 +256,7 @@ def construct_array(self, X, Y, Z, tau_max,
array_mask[i, :] = (_use_mask[self.bootstrap + lag, var] == False)

# Iterate over defined mapping from letter index to number index,
# i.e. 'x' -> 0, 'y' -> 1, 'z'-> 2
# i.e. 'x' -> 0, 'y' -> 1, 'z'-> 2, 'e'-> 3
for idx, cde in index_code.items():
# Check if the letter index is in the mask type
if (mask_type is not None) and (idx in mask_type):
Expand All @@ -268,7 +274,7 @@ def construct_array(self, X, Y, Z, tau_max,

# Print information about the constructed array
if verbosity > 2:
self.print_array_info(array, X, Y, Z, self.missing_flag, mask_type)
self.print_array_info(array, X, Y, Z, self.missing_flag, mask_type, extraZ)

# Return the array and xyz and optionally (X, Y, Z)
if return_cleaned_xyz:
Expand Down Expand Up @@ -310,15 +316,15 @@ def _check_nodes(self, Y, XYZ, N, dim):
# raise ValueError("Y-nodes are %s, " % str(Y) +
# "but one of the Y-nodes must have zero lag")

def print_array_info(self, array, X, Y, Z, missing_flag, mask_type):
def print_array_info(self, array, X, Y, Z, missing_flag, mask_type, extraZ=None):
"""
Print info about the constructed array
Parameters
----------
array : Data array of shape (dim, T)
Data array.
X, Y, Z : list of tuples
X, Y, Z, extraZ : list of tuples
For a dependence measure I(X;Y|Z), Y is of the form [(varY, 0)],
where var specifies the variable index. X typically is of the form
[(varX, -tau)] with tau denoting the time lag and Z can be
Expand All @@ -333,11 +339,15 @@ def print_array_info(self, array, X, Y, Z, missing_flag, mask_type):
measure I(X; Y | Z) the samples should be masked. If None, the mask
is not used. Explained in tutorial on masking and missing values.
"""
if extraZ is None:
extraZ = []
indt = " " * 12
print(indt + "Constructed array of shape %s from"%str(array.shape) +
"\n" + indt + "X = %s" % str(X) +
"\n" + indt + "Y = %s" % str(Y) +
"\n" + indt + "Z = %s" % str(Z))
if extraZ is not None:
print(indt + "extraZ = %s" % str(extraZ))
if self.mask is not None and mask_type is not None:
print(indt+"with masked samples in %s removed" % mask_type)
if self.missing_flag is not None:
Expand Down
10 changes: 6 additions & 4 deletions tigramite/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,9 @@ def get_general_fitted_model(self,
# Construct array of shape (var, time) with first entry being
# a dummy, second is y followed by joint X and Z (ignore the notation in construct_array)
array, xyz = \
self.dataframe.construct_array(X=self.X, Y=[y] + self.Z, Z=self.conditions,
self.dataframe.construct_array(X=self.X, Y=[y], # + self.Z,
Z=self.conditions,
extraZ=self.Z,
tau_max=self.tau_max,
mask_type=self.mask_type,
cut_off=self.cut_off,
Expand All @@ -170,8 +172,8 @@ def get_general_fitted_model(self,
a_model = deepcopy(self.model)

predictor_indices = list(np.where(xyz==0)[0]) \
+ list(np.where(xyz==1)[0][1:]) \
+ list(np.where(xyz==2)[0])
+ list(np.where(xyz==2)[0]) \
+ list(np.where(xyz==3)[0])
predictor_array = array[predictor_indices, :].T
# Target is only first entry of Y, ie [y]
target_array = array[np.where(xyz==1)[0][0], :]
Expand Down Expand Up @@ -255,7 +257,7 @@ def get_general_prediction(self,
conditions_data = a_transform.transform(X=conditions_data)

# Extract observational Z from stored array
z_indices = list(np.where(self.fit_results[y]['xyz']==1)[0][1:])
z_indices = list(np.where(self.fit_results[y]['xyz']==3)[0])
z_array = self.fit_results[y]['observation_array'][z_indices, :].T
Tobs = len(z_array)

Expand Down
Loading

0 comments on commit 99eae95

Please sign in to comment.