-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 560c324
Showing
16 changed files
with
826 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
addpath(genpath('./minFunc_2012')); | ||
|
||
|
||
% Load MNIST. | ||
X = loadMNISTImages('train-images.idx3-ubyte'); | ||
labels = loadMNISTLabels('train-labels.idx1-ubyte'); | ||
|
||
% Transform the labels to correct target values. | ||
Y = 0.*ones(10, size(labels, 1)); | ||
|
||
for n = 1: size(labels, 1) | ||
Y(labels(n) + 1, n) = 1; | ||
end; | ||
|
||
X = X'; | ||
Y = Y'; | ||
|
||
[n, m] = size(Y); | ||
[~, b] = size(X); | ||
|
||
% W1 = rand(b, numberOfHiddenUnits); | ||
% W2 = rand(numberOfHiddenUnits, m); | ||
numberOfHiddenUnits = 10; | ||
W = rand(b+m, numberOfHiddenUnits); | ||
|
||
maxFunEvals = 100; | ||
|
||
fun = @(w)loss_func(Y, X, w, numberOfHiddenUnits); | ||
activationFunction = @logisticSigmoid; | ||
|
||
options = []; | ||
options.display = 'none'; | ||
options.useMex = 0; % For fair comparison in time | ||
options.maxFunEvals = maxFunEvals; | ||
|
||
%% Conjugate gradient | ||
options.Method = 'cg'; | ||
[cg_x, cg_f, ~, cg_output] = minFunc(fun, W, options); | ||
fprintf('Conjugate Gradient Objective Function Value: %f\n', cg_f); | ||
|
||
hiddenWeights = cg_x(1:b, :)'; | ||
outputWeights = cg_x(b+1:b+m, :); | ||
|
||
inputValues = loadMNISTImages('t10k-images.idx3-ubyte'); | ||
labels = loadMNISTLabels('t10k-labels.idx1-ubyte'); | ||
|
||
% Choose decision rule. | ||
fprintf('Validation:\n'); | ||
|
||
[correctlyClassified, classificationErrors] = validateTwoLayerPerceptron(activationFunction, hiddenWeights, outputWeights, inputValues, labels); | ||
|
||
fprintf('Classification errors: %d\n', classificationErrors); | ||
fprintf('Correctly classified: %d\n', correctlyClassified); |
52 changes: 52 additions & 0 deletions
52
code_files/applyStochasticSquaredErrorTwoLayerPerceptronMNIST.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
function [] = applyStochasticSquaredErrorTwoLayerPerceptronMNIST() | ||
%applyStochasticSquaredErrorTwoLayerPerceptronMNIST Train the two-layer | ||
%perceptron using the MNIST dataset and evaluate its performance. | ||
|
||
% Load MNIST. | ||
inputValues = loadMNISTImages('train-images.idx3-ubyte'); | ||
labels = loadMNISTLabels('train-labels.idx1-ubyte'); | ||
|
||
% Transform the labels to correct target values. | ||
targetValues = 0.*ones(10, size(labels, 1)); | ||
for n = 1: size(labels, 1) | ||
targetValues(labels(n) + 1, n) = 1; | ||
end; | ||
|
||
% Choose form of MLP: | ||
numberOfHiddenUnits = 100; | ||
|
||
% Choose appropriate parameters. | ||
learningRate = 0.1; | ||
|
||
% Choose activation function. | ||
activationFunction = @logisticSigmoid; | ||
dActivationFunction = @dLogisticSigmoid; | ||
|
||
% Choose batch size and epochs. Remember there are 60k input values. | ||
batchSize = 500; | ||
epochs = 1000; | ||
|
||
fprintf('Train twolayer perceptron with %d hidden units.\n', numberOfHiddenUnits); | ||
fprintf('Learning rate: %d.\n', learningRate); | ||
|
||
% [hiddenWeights, outputWeights, error] = trainStochasticSquaredErrorTwoLayerPerceptron(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate); | ||
tic(); | ||
[hiddenWeights, outputWeights, error] = trainMomentumSGD(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate); | ||
toc(); | ||
% [hiddenWeights, outputWeights, error] = trainDiagonalQuasiNewton(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate); | ||
% [hiddenWeights, outputWeights, error] = trainAdaGrad(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate); | ||
% [hiddenWeights, outputWeights, error] = trainAdaDelta(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate); | ||
% [hiddenWeights, outputWeights, error] = trainConjugateGradient(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate); | ||
|
||
% Load validation set. | ||
inputValues = loadMNISTImages('t10k-images.idx3-ubyte'); | ||
labels = loadMNISTLabels('t10k-labels.idx1-ubyte'); | ||
|
||
% Choose decision rule. | ||
fprintf('Validation:\n'); | ||
|
||
[correctlyClassified, classificationErrors] = validateTwoLayerPerceptron(activationFunction, hiddenWeights, outputWeights, inputValues, labels); | ||
|
||
fprintf('Classification errors: %d\n', classificationErrors); | ||
fprintf('Correctly classified: %d\n', correctlyClassified); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
function y = dLogisticSigmoid(x) | ||
% dLogisticSigmoid Derivative of the logistic sigmoid. | ||
% | ||
% INPUT: | ||
% x : Input vector. | ||
% | ||
% OUTPUT: | ||
% y : Output vector where the derivative of the logistic sigmoid was | ||
% applied element by element. | ||
% | ||
y = logisticSigmoid(x).*(1 - logisticSigmoid(x)); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
function images = loadMNISTImages(filename) | ||
%loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing | ||
%the raw MNIST images | ||
|
||
fp = fopen(filename, 'rb'); | ||
assert(fp ~= -1, ['Could not open ', filename, '']); | ||
|
||
magic = fread(fp, 1, 'int32', 0, 'ieee-be'); | ||
assert(magic == 2051, ['Bad magic number in ', filename, '']); | ||
|
||
numImages = fread(fp, 1, 'int32', 0, 'ieee-be'); | ||
numRows = fread(fp, 1, 'int32', 0, 'ieee-be'); | ||
numCols = fread(fp, 1, 'int32', 0, 'ieee-be'); | ||
|
||
images = fread(fp, inf, 'unsigned char'); | ||
images = reshape(images, numCols, numRows, numImages); | ||
images = permute(images,[2 1 3]); | ||
|
||
fclose(fp); | ||
|
||
% Reshape to #pixels x #examples | ||
images = reshape(images, size(images, 1) * size(images, 2), size(images, 3)); | ||
% Convert to double and rescale to [0,1] | ||
images = double(images) / 255; | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
function labels = loadMNISTLabels(filename) | ||
%loadMNISTLabels returns a [number of MNIST images]x1 matrix containing | ||
%the labels for the MNIST images | ||
|
||
fp = fopen(filename, 'rb'); | ||
assert(fp ~= -1, ['Could not open ', filename, '']); | ||
|
||
magic = fread(fp, 1, 'int32', 0, 'ieee-be'); | ||
assert(magic == 2049, ['Bad magic number in ', filename, '']); | ||
|
||
numLabels = fread(fp, 1, 'int32', 0, 'ieee-be'); | ||
|
||
labels = fread(fp, inf, 'unsigned char'); | ||
|
||
assert(size(labels,1) == numLabels, 'Mismatch in label count'); | ||
|
||
fclose(fp); | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
function y = logisticSigmoid(x) | ||
% simpleLogisticSigmoid Logistic sigmoid activation function | ||
% | ||
% INPUT: | ||
% x : Input vector. | ||
% | ||
% OUTPUT: | ||
% y : Output vector where the logistic sigmoid was applied element by | ||
% element. | ||
% | ||
|
||
y = 1./(1 + exp(-x)); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
function [f, df] = loss_func(Y, X, w, numberOfHiddenUnits) | ||
|
||
function [S] = sigmoid(Z) | ||
S = 1 ./ (1 + exp(-Z)); | ||
end | ||
|
||
[n, m] = size(Y); | ||
[~, b] = size(X); | ||
|
||
W1 = reshape(w(1:numberOfHiddenUnits*b), b, numberOfHiddenUnits); | ||
W2 = reshape(w(numberOfHiddenUnits*b+1:numberOfHiddenUnits*(b+m)), numberOfHiddenUnits , m); | ||
W1 = W1 ./ b; | ||
W2 = W2 ./ size(W2, 1); | ||
X = X ./ n; | ||
|
||
E = Y - sigmoid(sigmoid(X*W1)*W2); | ||
f = 0.5*sum(sum(E.*E)); | ||
f = f/n; | ||
|
||
A1 = sigmoid(X*W1); | ||
A2 = sigmoid(A1*W2); | ||
ones_2 = ones(size(A2)); | ||
|
||
G2 = -A1' * (E .* A2 .* (ones_2 - A2)); | ||
|
||
ones_1 = ones(size(A1)); | ||
G1 = -X' * ((E .* A2 .* (ones_2 - A2)) * W2' .* (A1 .* (ones_1 - A1))); | ||
|
||
df = [G2(:); G1(:)]; | ||
df = df/n; | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
function [] = saveMNISTImages(images, n, k) | ||
% saveMNISImages Saves the first every k-th image of the MNIST training | ||
% data set up to n images. | ||
|
||
for i = 1: n | ||
imwrite(reshape(images(:,i*k), 28, 28), strcat('MNIST/', num2str(i*k), '.png')); | ||
end; | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
function [hiddenWeights, outputWeights, error] = trainAdaDelta(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate) | ||
% trainStochasticSquaredErrorTwoLayerPerceptron Creates a two-layer perceptron | ||
% and trains it on the MNIST dataset. | ||
% | ||
% INPUT: | ||
% activationFunction : Activation function used in both layers. | ||
% dActivationFunction : Derivative of the activation | ||
% function used in both layers. | ||
% numberOfHiddenUnits : Number of hidden units. | ||
% inputValues : Input values for training (784 x 60000) | ||
% targetValues : Target values for training (1 x 60000) | ||
% epochs : Number of epochs to train. | ||
% batchSize : Plot error after batchSize images. | ||
% learningRate : Learning rate to apply. | ||
% | ||
% OUTPUT: | ||
% hiddenWeights : Weights of the hidden layer. | ||
% outputWeights : Weights of the output layer. | ||
% | ||
|
||
% The number of training vectors. | ||
trainingSetSize = size(inputValues, 2); | ||
|
||
% Input vector has 784 dimensions. | ||
inputDimensions = size(inputValues, 1); | ||
% We have to distinguish 10 digits. | ||
outputDimensions = size(targetValues, 1); | ||
|
||
% Initialize the weights for the hidden layer and the output layer. | ||
hiddenWeights = rand(numberOfHiddenUnits, inputDimensions); | ||
outputWeights = rand(outputDimensions, numberOfHiddenUnits); | ||
|
||
% AdaDelta terms | ||
% G terms will be used for accumulating gradients | ||
% deltaX terms will be used for accumulating updates | ||
G_1 = rand(numberOfHiddenUnits, inputDimensions); | ||
G_1_next = rand(numberOfHiddenUnits, inputDimensions); | ||
deltaX_1 = rand(numberOfHiddenUnits, inputDimensions); | ||
deltaX_1_next = rand(numberOfHiddenUnits, inputDimensions); | ||
G_2 = rand(outputDimensions, numberOfHiddenUnits); | ||
G_2_next = rand(outputDimensions, numberOfHiddenUnits); | ||
deltaX_2 = rand(outputDimensions, numberOfHiddenUnits); | ||
deltaX_2_next = rand(outputDimensions, numberOfHiddenUnits); | ||
rho = 0.6; | ||
|
||
% Initializing epsilon terms to avoid "division by zero" problems | ||
g1_dims = size(G_1); | ||
g2_dims = size(G_2); | ||
eps_1 = repmat(1/(g1_dims(1)*g1_dims(2)), size(G_1)); | ||
eps_2 = repmat(1/(g2_dims(1)*g2_dims(2)), size(G_2)); | ||
|
||
% Initializing weights | ||
hiddenWeights = hiddenWeights./size(hiddenWeights, 2); | ||
outputWeights = outputWeights./size(outputWeights, 2); | ||
|
||
n = zeros(batchSize); | ||
|
||
figure; hold on; | ||
|
||
for t = 1: epochs | ||
for k = 1: batchSize | ||
% Select which input vector to train on. | ||
n(k) = floor(rand(1)*trainingSetSize + 1); | ||
|
||
% Propagate the input vector through the network. | ||
inputVector = inputValues(:, n(k)); | ||
hiddenActualInput = hiddenWeights*inputVector; | ||
hiddenOutputVector = activationFunction(hiddenActualInput); | ||
outputActualInput = outputWeights*hiddenOutputVector; | ||
outputVector = activationFunction(outputActualInput); | ||
|
||
targetVector = targetValues(:, n(k)); | ||
|
||
% Backpropagate the errors. | ||
outputDelta = dActivationFunction(outputActualInput).*(outputVector - targetVector); | ||
hiddenDelta = dActivationFunction(hiddenActualInput).*(outputWeights'*outputDelta); | ||
|
||
g_ow = outputDelta*hiddenOutputVector'; | ||
% Accumulating gradient in AdaGrad-like fashion | ||
G_2_next = sqrt(rho .* G_2.^2 + (1 - rho) .* g_ow.^2); | ||
% Computing the update for output weights | ||
update_2 = - g_ow .* sqrt((deltaX_2.^2 + eps_2)./(g_ow.^2 + eps_2)); | ||
% Accumulating updates in momentum-like fashion | ||
deltaX_2_next = sqrt(rho .* deltaX_2.^2 + (1 - rho) .* update_2.^2); | ||
outputWeights = outputWeights + update_2; | ||
|
||
g_hw = hiddenDelta*inputVector'; | ||
% Accumulating gradient in AdaGrad-like fashion | ||
G_1_next = sqrt(rho .* G_1.^2 + (1 - rho) .* g_hw.^2); | ||
% Computing the update for hidden weights | ||
update_1 = - g_hw .* sqrt((deltaX_1.^2 + eps_1)./(g_hw.^2 + eps_1)); | ||
% Accumulating updates in momentum-like fashion | ||
deltaX_1_next = sqrt(rho .* deltaX_1.^2 + (1 - rho) .* update_1.^2); | ||
hiddenWeights = hiddenWeights + update_1; | ||
|
||
G_1 = G_1_next; | ||
G_2 = G_2_next; | ||
deltaX_1 = deltaX_1_next; | ||
deltaX_2 = deltaX_2_next; | ||
|
||
end; | ||
disp(t); | ||
% Calculate the error for plotting. | ||
error = 0; | ||
for k = 1: batchSize | ||
inputVector = inputValues(:, n(k)); | ||
targetVector = targetValues(:, n(k)); | ||
|
||
error = error + norm(activationFunction(outputWeights*activationFunction(hiddenWeights*inputVector)) - targetVector, 2); | ||
end; | ||
error = error/batchSize; | ||
|
||
plot(t, error,'k*'); | ||
xlabel('epoch'); | ||
ylabel('error'); | ||
end; | ||
end |
Oops, something went wrong.