-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_simulated_data.py
110 lines (98 loc) · 3.26 KB
/
generate_simulated_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import numpy as np
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
N = 5000 # Number of data points
D = 4 # Dimensionality, equivalent to the number of nodes
ETA = 0.4 # Sparsity: the probability of an edge being present
SEED = 0
DISTRIBUTION = 'gaussian'
def generate_graph(num_nodes, num_data, sparsity, seed=SEED):
"""
Create graph
populate graph using sparsity
ensure lower diagonal is empty
"""
edges = D*(D - 1)/2
np.random.seed(seed)
entries = np.random.choice([0, 1], size=int(edges), p=[1-ETA, ETA])
graph = np.zeros((num_nodes, num_nodes))
edge_indexes = np.triu_indices(num_nodes, 1)
graph[edge_indexes] = entries
print(graph)
return graph
def create_weights(graph, seed=SEED):
"""
Number of all non-null nodes
Sample a sequence of weights uniformally
Fill Graph with the weights
"""
num_edges = np.sum(graph)
np.random.seed(seed)
weight_values = np.random.uniform(0.1, 2, int(num_edges))
weights = np.zeros(graph.shape)
weights[np.where(graph)] = weight_values
return weights
def simulate_data(weights, num_data, num_nodes, distribution, seed):
"""
for n in range(num_data):
sample x_1 from a normal
"""
data = np.zeros([num_data, num_nodes])
for n in range(num_data):
if distribution == 'gaussian':
np.random.seed(seed+n)
noise = np.random.normal(0, 1, num_nodes)
data[n, 0] = noise[0] + 1
for i in range(1, num_nodes):
data[n, i] = np.dot(data[n, :], weights[:, i]) + noise[i]
if distribution == 'gamma':
data[n, 0] = np.random.gamma(shape=2, scale=2)
for i in range(1, num_nodes):
if np.dot(data[n, :i], weights[:i, i]) != 0:
data[n, i] = np.random.gamma(shape=2, scale=1/(np.dot(data[n, :i], weights[:i, i])))
else:
data[n, i] = np.random.gamma(shape=2, scale=2)
return np.around(data, 3)
def main():
#graph = generate_graph(D, N, ETA, SEED)
graph = np.array([
[0,0,0,1,1,0,0,0,0,0],
[0,0,1,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0],
[0,1,0,0,0,0,0,0,0,0],
[0,1,0,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,0,0,0],
[0,0,0,0,0,1,1,1,1,0]
])
print(graph)
weights = create_weights(graph, SEED)
weights = graph * 2
print(weights)
print(graph.shape[0])
data = simulate_data(weights, N, graph.shape[0], DISTRIBUTION, SEED)
# Save file
np.savetxt(
"data/simulated_data/{}_true_graph_seed_{}_N_{}_D_{}.csv".format(DISTRIBUTION, SEED, N, D),
graph.astype(int),
delimiter=','
)
np.savetxt(
"data/simulated_data/{}_true_weights_seed_{}_N_{}_D_{}.csv".format(DISTRIBUTION, SEED, N, D),
np.around(weights, 3),
delimiter=','
)
np.savetxt(
"data/simulated_data/{}_sim_data_seed_{}_N_{}_D_{}.csv".format(DISTRIBUTION, SEED, N, D),
np.around(data, 3),
delimiter=','
)
np.savetxt(
"data/simulated_data/bootstrap_sample_N_{}.csv".format(N),
np.around(data, 3),
delimiter=','
)
#if __name__ == '__main__':
# main()