-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlowprofool.py
176 lines (128 loc) · 6.07 KB
/
lowprofool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import numpy as np
import pandas as pd
from time import time
from utils.preprocessing import DfInfo
from utils.preprocessing import inverse_dummy
from utils.exceptions import UnsupportedNorm, UnspportedNum
from scipy.stats import pearsonr
from art.attacks.evasion import LowProFool
from art.estimators.classification import SklearnClassifier, KerasClassifier
# from art.estimators.classification.scikitlearn import ScikitlearnDecisionTreeClassifier
# from art.estimators.classification.scikitlearn import ScikitlearnRandomForestClassifier
from art.estimators.classification.scikitlearn import ScikitlearnLogisticRegression
from art.estimators.classification.scikitlearn import ScikitlearnSVC
'''
LowProFool Attack
https://arxiv.org/pdf/1911.03274.pdf
Only works on numerical features and continous features,
all non-ordered categorical features need to be dropped
'''
# Set the desired parameters for the attack
lowprofool_params = {
'verbose': True,
}
BATCH_SIZE = 64
def art_wrap_models(models, feature_range):
'''
Wrap the model to meet the requirements to art.attacks.evasion.LowProFool
'''
return {
'lr': ScikitlearnLogisticRegression(models['lr'], clip_values=feature_range),
'svc': ScikitlearnSVC(models['svc'], clip_values=feature_range),
'nn_2': KerasClassifier(models['nn_2'], clip_values=feature_range),
}
def get_lowprofool_instance(wrapped_models, norm):
'''
'''
adv_instance = {}
for k in wrapped_models.keys():
if norm != "inf" and isinstance(norm, str):
raise UnsupportedNorm()
elif norm == None:
raise UnsupportedNorm()
else:
adv_instance[k] = LowProFool(classifier=wrapped_models[k], norm=norm, **lowprofool_params)
return adv_instance
def calculate_feature_importances(x,y):
pearson_correlations = [pearsonr(x[:, col], y)[0] for col in range(x.shape[1])]
absolutes = np.abs(np.array(pearson_correlations))
importance_vec = absolutes / np.power(np.sum(absolutes ** 2), 0.5)
importance_vec = np.array(importance_vec) / np.sum(importance_vec)
return importance_vec.astype(np.float64)
def generate_lowprofool_result(
df_info: DfInfo,
models,
num_instances,
X_train, y_train,
X_test, y_test,
norm=None,
models_to_run=['svc', 'lr', 'nn_2'],
):
feature_range=(0,1)
print("Feature range:" )
print(feature_range)
wrapped_models = art_wrap_models(models, feature_range)
# importance_vec = calculate_feature_importances(X_train, y_train)
# Get adversarial examples generator instance.
adv_instance = get_lowprofool_instance(wrapped_models, norm=norm)
# Initialise the result dictionary.(It will be the return value.)
results = {}
if isinstance(num_instances, int) and num_instances % BATCH_SIZE == 0:
X_test_re=X_test[0:num_instances]
y_test_re=y_test[0:num_instances]
elif isinstance(num_instances, str) and num_instances == 'all':
X_test_num = len(X_test) - (len(X_test)%BATCH_SIZE)
X_test_re=X_test[0:X_test_num]
y_test_num = len(y_test) - (len(y_test)%BATCH_SIZE)
y_test_re=y_test[0:y_test_num]
else:
raise UnspportedNum()
# y_test_re_ohe = np.zeros((y_test_re.size, 2))
# y_test_re_ohe[np.arange(y_test_re.size), y_test_re] = 1
# Loop through every models (svc, lr, nn_2)
for k in models_to_run:
# Intialise the result for the classifier (predicting model).
results[k] = []
print(f"Finding adversarial examples for {k}")
adv_instance[k].fit_importances(x=X_train, y=y_train)
# Get the prediction from original predictive model in a human-understandable format.
if k == 'nn_2':
prediction = np.argmax(models[k].predict(X_test_re), axis=1).astype(int)
else:
prediction = models[k].predict(X_test_re)
target = 1-prediction
target_ohe = np.zeros((target.size, 2))
target_ohe[np.arange(target.size), target] = 1
start_t = time()
adv = adv_instance[k].generate(X_test_re, target_ohe)
end_t = time()
# Calculate the running time.
running_time = end_t - start_t
# Get the prediction from original predictive model in a human-understandable format.
if k == 'nn_2':
# nn return float [0, 1], so we need to define a threshold for it. (It's usually 0.5 for most of the classifier).
prediction = np.argmax(models[k].predict(X_test_re), axis=1).astype(int)
adv_prediction = np.argmax(models[k].predict(adv), axis=1).astype(int)
else:
# dt and rfc return int {1, 0}, so we don't need to define a threshold to get the final prediction.
prediction = models[k].predict(X_test_re)
adv_prediction = models[k].predict(adv)
# Looping throguh first `num_instances` in the test set.
for idx, instance in enumerate(X_test_re):
example = instance.reshape(1, -1)
adv_example = adv[idx].reshape(1,-1)
adv_example_df = inverse_dummy(pd.DataFrame(adv_example, columns=df_info.ohe_feature_names), df_info.cat_to_ohe_cat)
# Change the found input from ohe format to original format.
input_df = inverse_dummy(pd.DataFrame(example, columns=df_info.ohe_feature_names), df_info.cat_to_ohe_cat)
input_df.loc[0, df_info.target_name] = df_info.target_label_encoder.inverse_transform([prediction[idx]])[0]
results[k].append({
"input": example,
"input_df": input_df,
"adv_example": adv_example,
"adv_example_df": adv_example_df,
"running_time": running_time,
"ground_truth": df_info.target_label_encoder.inverse_transform([y_test[idx]])[0],
"prediction": df_info.target_label_encoder.inverse_transform([prediction[idx]])[0],
"adv_prediction": df_info.target_label_encoder.inverse_transform([adv_prediction[idx]])[0],
})
return results