-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathCRFBasic.py
executable file
·129 lines (93 loc) · 3.02 KB
/
CRFBasic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 21 12:45:08 2016
@author: rsk
"""
import cPickle
import gzip
import os
import sys
import time
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import itertools
os.chdir('/home/rsk/Documents/PyStruct/CRF-MNIST-ImageDenoising/')
from pystruct.models import GraphCRF, LatentNodeCRF
from pystruct.learners import NSlackSSVM, OneSlackSSVM, LatentSSVM
from pystruct.datasets import make_simple_2x2
from pystruct.utils import make_grid_edges, plot_grid
from CRFUtils import *
#%%
"""
Use getDigitData to get images of a particular digit from the entire MNIST dataset
To increase the difficulty of the denoising task, increase the noise parameter of addNoise
"""
train,valid,test = getDigitData(0)
trainDirty = addNoise(train,noise=0.05)
testDirty = addNoise(test,noise=0.05)
# Creating truth labels by thresholding
threshold= 0.3
trainLabels = []
testLabels = []
for i in train:
trainLabels.append([ 1 if k>threshold else 0 for k in i ])
for i in test:
testLabels.append([ 1 if k>threshold else 0 for k in i ])
trainLabels = np.array(trainLabels)
testLabels = np.array(testLabels)
#%%
"""
To train a model with only unary potentials, set dist = 0
"""
n_train=200
n_test=100
num_iter=10
C=0.1
dist=0
diag=0
inference="ad3"
if dist==0:
G=[np.empty((0, 2), dtype=np.int) for x in trainDirty[0:n_train]]
else:
edgeList = edges((28,28),dist=dist,diag=diag)
G = [edgeList for x in trainDirty[0:n_train]]
X_flat = [np.vstack(i) for i in trainDirty[0:n_train]]
Y_flat = np.array(trainLabels[0:n_train])
crf = GraphCRF(inference_method=inference)
svm = NSlackSSVM(model=crf,max_iter=num_iter,C=C,n_jobs=6,verbose=1)
asdf = zip(X_flat,G)
svm.fit(asdf,Y_flat)
#%%
if dist==0:
G2 = G=[np.empty((0, 2), dtype=np.int) for x in testDirty[0:n_test]]
else:
G2 = [edgeList for x in testDirty[0:n_test]]
X_flat2 = [np.vstack(i) for i in testDirty[0:n_test]]
Y_flat2 = np.array(testLabels[0:n_test])
asdf2 = zip(X_flat2,G2)
predTrain = svm.predict(asdf)
errTrain = 0
for i in range(len(predTrain)):
errTrain += accuracy(predTrain[i],Y_flat[i])
errTrain = errTrain/float(len(predTrain))
predTest = svm.predict(asdf2)
errTest = 0
for i in range(len(predTest)):
errTest += accuracy(predTest[i],Y_flat2[i])
errTest = errTest/float(len(predTest))
print "The train set DICE is %f" %(errTrain)
print "The test set DICE is %f" %(errTest)
#%%
resultsDir = os.getcwd()+"/Results"
resultFile = open(resultsDir + "/results.csv",'a')
resultFile.write(str(num_iter)+","+str(dist)+","+str(diag)+","+inference+","+str(errTrain)+","+str(errTest)+"\n")
resultFile.close()
nameLen = len(os.listdir(resultsDir))
filename = str(nameLen)+"_"+str(dist)+"_"+str(diag)+"_"+inference
predFileTrain = open(resultsDir+"/"+filename+"_Train.pkl",'wb')
predFileTest = open(resultsDir+"/"+filename+"_Test.pkl",'wb')
cPickle.dump(predTrain,predFileTrain,protocol=cPickle.HIGHEST_PROTOCOL)
cPickle.dump(predTest,predFileTest,protocol=cPickle.HIGHEST_PROTOCOL)
predFileTrain.close()
predFileTest.close()