-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathregression.py
143 lines (115 loc) · 4.79 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 27 14:46:21 2017
@author: Jordan
July 2017: Trying linear regression models out
Need to improve abstraction in this code like my other code
Issue: Using dataprep function from algorithm.py but it's running whole script?
"""
from __future__ import division
print 'Start of script'
# Libraries
import numpy as np
import os
#from sklearn import datasets
#from sklearn.model_selection import cross_val_predict
from sklearn import linear_model
import matplotlib.pyplot as plt
from dataprep import dataprep, unison_shuffled_copies
from dataprep import makeFFT, running_mean, calcSma, smoothingPlot, stats
import glob
# Constants and parameters
path ='../Data/july19/static/analysis/' # Main working directory
#segment = 0.9 # Amount split for training. Not needed when we do LOOCV
seed = 0 # Random seed value
twidth = 2 # This is the number of label "t" columns in the front of the x-matrix
scale_table = 1.333 # This is the scaling factor of table at 805mm sys height. Units are pixels/mm
output_dir = path # Directory for results output
output_file = 'analysis.csv' # Analysis Summary file
output_path = os.path.join(output_dir,output_file)
def model(x_train,x_test,t_train,t_test):
#%% Model
# Round t values (coordinates) since the coord sampling is troublesome
t_train = np.round(t_train,decimals=0)
t_test = np.round(t_test,decimals=0)
regr = linear_model.LinearRegression()
regr.fit(x_train, t_train)
#%% Results
# Coefficients of fit
#print 'Coefficients: \n', regr.coef_
coeff = regr.coef_
# Mean Error
print('MSE: %.2f' % np.mean((regr.predict(x_test) - t_test) **2))
print('Variance: %.2f' %regr.score(x_test,t_test))
# Try Euclidean distance error on the system predictions
x_pred = regr.predict(x_test)
diff = (x_pred - t_test)**2 # Square errors
diff = np.sqrt(np.sum(diff,axis=1)) # Sum the square error and sqrt. Euclidean distance error
error_mean = np.mean(diff) # Per pixel error mean
error_mean = error_mean/scale_table # Mean Error in mm
print 'mean error is',error_mean,'mm'
diff_mm = diff/scale_table # This is the mm error values
error_min = np.min(diff_mm)
error_max = np.max(diff_mm)
error_med = np.median(diff_mm)
print 'min error (mm) is',error_min,'max error',error_max,'median',error_med
# Histogram of analysis
#np.histogram(diff_mm,bins=10)
plt.figure()
plt.hist(diff_mm,bins='auto')
title = 'Histogram of error (mm)'
plt.title(title)
plt.ylabel('Occurrences')
plt.xlabel('Error value (mm)')
plt.show()
return error_mean, error_min, error_max, error_med
# Functions are in dataprep.py
#%% Main Script
# Load data
# Get all the files in the folder
filelist = glob.glob(path + '*.csv')
numfiles = len(filelist)
seed = numfiles
e_mean = []
e_min = []
e_max = []
e_med = []
for i in range(0,numfiles): # Iterate through and generate train and test data
x_train = []
singlepath = filelist[i] # Single path
print os.path.basename(singlepath), ' is the file we test on'
otherpaths = filelist[:i] + filelist[i+1:] # Rest of paths
x_test = np.genfromtxt(singlepath,delimiter=',') # The real x data
time_test = x_test[:,0] # Time data
t_test = x_test[:,1:3] # coordinate data
x_test = x_test[:,3:] # Channel power data
# Try smoothing data
N1 = 300 # Smooth on x values. Smoothing on 50 points gives us 20 ms data
N2 = 300
# newx_test = calcSma(x_test,N1) # Smoothing on x data
# length_new = len(newx_test) # Length of new x matrix. Confirm alignment
# time_test = time_test[N1-1:] # Resize time vector. Note SMA "eats from start" of array
# t_test = t_test[N1-1:] # Resize coordinates also
# smoothingPlot(time_test[1:500],newx_test[1:500,1],N1,N2) # Look at subset of data to see trending
# x_test = newx_test
for apath in otherpaths:
xtemp = np.genfromtxt(apath,delimiter=',')
# Data smoothing on training data
# xtemp_sigdata = xtemp[:,3:] # Grab signal data only
# xtemp_sigdata = calcSma(xtemp_sigdata,N1) # Smooth signal data
# xtemp = xtemp[N1-1:,:] # Resize original array after smoothing
# xtemp[:,3:] = xtemp_sigdata # Put result back into array
#
x_train.append(xtemp)
x_train = np.vstack(x_train)
t_train = x_train[:,1:3]
x_train = x_train[:,3:]
# Run model
error_mean, error_min, error_max, error_med = model(x_train,x_test,t_train,t_test)
e_mean.append(error_mean)
e_min.append(error_min)
e_max.append(error_max)
e_med.append(error_med)
# Summarize results
print e_mean
print 'Overall mean error is ', np.mean(e_mean)