-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcross_val.py
142 lines (98 loc) · 4.49 KB
/
cross_val.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import knn
def NestedCrossVal(X, y, nFolds, listN, distances, mySeed):
"""
Nested cross validation
Params
------
- X : array
- y : array
- nFolds : int
- listN : list
- distances : list
- mySeed : int
Returns
------
- float : accuracy
- array : confusion_matrix
"""
def kfold_indices(X, k):
"""
Split dataset to indices
Params
------
- X : array
Returns
------
- list : folds
"""
fold_size = len(X) // k
np.random.seed(mySeed)
indices = np.random.permutation(np.arange(0, len(X), 1))
folds = []
for i in range(k):
test_indices = indices[i * fold_size: (i + 1) * fold_size]
train_indices = np.concatenate([indices[:i * fold_size], indices[(i + 1) * fold_size:]])
folds.append((train_indices, test_indices))
return folds
outer_k = nFolds
inner_k = nFolds
outer_fold_indices = kfold_indices(X, outer_k)
mean_scores = []
for outer_train_indices, outer_test_indices in outer_fold_indices:
X_outer_train, y_outer_train = X[outer_train_indices], y[outer_train_indices]
X_outer_test, y_outer_test = X[outer_test_indices], y[outer_test_indices]
inner_scores = []
inner_fold_indices = kfold_indices(X_outer_train, inner_k)
conf_list = []
for inner_train_indices, inner_test_indices in inner_fold_indices:
fold_scores_k = []
fold_scores_k_m = []
# Calculating scores for different values of k and distances
for k in listN:
classifier = KNN(num_neighbors=k, distance='euclidean')
classifier.fit(X_outer_train[inner_train_indices], y_outer_train[inner_train_indices])
y_pred = classifier.predict(X_outer_train[inner_test_indices])
scoreA = accuracy(y_outer_train[inner_test_indices], y_pred)
fold_scores_k.append((float(scoreA), k))
classifierM = KNN(num_neighbors=k, distance='manhattan')
classifierM.fit(X_outer_train[inner_train_indices], y_outer_train[inner_train_indices])
y_predM = classifierM.predict(X_outer_train[inner_test_indices])
scoreAM = accuracy(y_outer_train[inner_test_indices], y_predM)
fold_scores_k_m.append((float(scoreAM), k))
#average scores
avg_E = np.mean([i[0] for i in fold_scores_k])
avg_M = np.mean([i[0] for i in fold_scores_k_m])
#top k values and neighbour
best_k = max(fold_scores_k, key=lambda x: x[0])
top_neighbor_E = fold_scores_k.index(best_k) + 1
best_k_m = max(fold_scores_k_m, key=lambda x: x[0])
top_neighbor_M = fold_scores_k_m.index(best_k_m) + 1
#############################################
#print(avg_E, avg_M)
best = max(avg_E, avg_M)
#print(best)
classifier_best = KNN()
# if top average score is equal to max average euclidean
if best == avg_E:
classifier_best = KNN(num_neighbors=top_neighbor_E, distance='euclidean')
elif best == avg_M:
classifier_best = KNN(num_neighbors=top_neighbor_M, distance='manhattan')
#train classifier outer fold best values
classifier_best.fit(X_outer_train, y_outer_train)
y_pred_best = classifier_best.predict(X_outer_test)
final_accuracy = accuracy(y_outer_test, y_pred_best)
inner_scores.append(final_accuracy)
optimal_k = max(top_neighbor_E, top_neighbor_M)
confusion_matrix = conf_mat(y_outer_test, y_pred_best)
conf_list.append(confusion_matrix)
#
if optimal_k == top_neighbor_E:
print('Final Accuracy:', np.round(final_accuracy, 6), optimal_k, 'euclidean')
else:
print('Final Accuracy:', np.round(final_accuracy, 6), optimal_k, 'manhattan')
# Calculate the mean accuracy across all inner folds
mean_inner_accuracy = np.mean(inner_scores)
mean_scores.append(mean_inner_accuracy)
mean_outer_accuracy = np.mean(mean_scores)
deviation = np.std(mean_scores)
return np.round(mean_outer_accuracy, 6), np.round(deviation, 6), confusion_matrix#, confusion_matrix