-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprepareData.m
100 lines (78 loc) · 3.1 KB
/
prepareData.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
function [imgDataTrain, labelsTrain, imgDataTest, labelsTest] = prepareData
% Copyright 2018 The MathWorks, Inc.
%% Check for the existence of the MNIST files and download them if necessary
filePrefix = 'E:\00 PhD\GITcnn\data';%'E:\00 PhD\DataSets\fashion mnist';
files = { "train-images-idx3-ubyte",...
"train-labels-idx1-ubyte",...
"t10k-images-idx3-ubyte",...
"t10k-labels-idx1-ubyte" };
% boolean for testing if the files exist
% basically, check for existence of "data" directory
download = exist(fullfile(pwd, 'data'), 'dir') ~= 7;
if download
disp('Downloading files...')
mkdir data
webPrefix = "http://yann.lecun.com/exdb/mnist/";
webSuffix = ".gz";
filenames = files + webSuffix;
for ii = 1:numel(files)
websave(fullfile('data', filenames{ii}),...
char(webPrefix + filenames(ii)));
end
disp('Download complete.')
% unzip the files
cd data
gunzip *.gz
% return to main directory
cd ..
end
%% Extract the MNIST images into arrays
disp('Preparing MNIST data...');
% Read headers for training set image file
fid = fopen(fullfile(filePrefix, char(files{1})), 'r', 'b');
magicNum = fread(fid, 1, 'uint32');
numImgs = fread(fid, 1, 'uint32');
numRows = fread(fid, 1, 'uint32');
numCols = fread(fid, 1, 'uint32');
% Read the data part
rawImgDataTrain = uint8(fread(fid, numImgs * numRows * numCols, 'uint8'));
fclose(fid);
% Reshape the data part into a 4D array
rawImgDataTrain = reshape(rawImgDataTrain, [numRows, numCols, numImgs]);
rawImgDataTrain = permute(rawImgDataTrain, [2,1,3]);
imgDataTrain(:,:,1,:) = uint8(rawImgDataTrain(:,:,:));
% Read headers for training set label file
fid = fopen(fullfile(filePrefix, char(files{2})), 'r', 'b');
magicNum = fread(fid, 1, 'uint32');
numLabels = fread(fid, 1, 'uint32');
% Read the data for the labels
labelsTrain = fread(fid, numLabels, 'uint8');
fclose(fid);
% Process the labels
labelsTrain = categorical(labelsTrain);
% Read headers for test set image file
fid = fopen(fullfile(filePrefix, char(files{3})), 'r', 'b');
magicNum = fread(fid, 1, 'uint32');
numImgs = fread(fid, 1, 'uint32');
numRows = fread(fid, 1, 'uint32');
numCols = fread(fid, 1, 'uint32');
% Read the data part
rawImgDataTest = uint8(fread(fid, numImgs * numRows * numCols, 'uint8'));
fclose(fid);
% Reprocess the data part into a 4D array
rawImgDataTest = reshape(rawImgDataTest, [numRows, numCols, numImgs]);
rawImgDataTest = permute(rawImgDataTest, [2,1,3]);
imgDataTest = uint8(zeros(numRows, numCols, 1, numImgs));
imgDataTest(:,:,1,:) = uint8(rawImgDataTest(:,:,:));
% Read headers for test set label file
fid = fopen(fullfile(filePrefix, char(files{4})), 'r', 'b');
magicNum = fread(fid, 1, 'uint32');
numLabels = fread(fid, 1, 'uint32');
% Read the data for the labels
labelsTest = fread(fid, numLabels, 'uint8');
fclose(fid);
% Process the labels
labelsTest = categorical(labelsTest);
disp('MNIST data preparation complete.');
% img = readMNISTImage(imgDataTrain, 3);
% figure, imshow(img);