Skip to content

Commit

Permalink
added all files
Browse files Browse the repository at this point in the history
  • Loading branch information
stopdemir authored Mar 6, 2024
0 parents commit 560c324
Show file tree
Hide file tree
Showing 16 changed files with 826 additions and 0 deletions.
Binary file added Report Neural Network.pdf
Binary file not shown.
53 changes: 53 additions & 0 deletions code_files/applyNNetMinFunc.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
addpath(genpath('./minFunc_2012'));


% Load MNIST.
X = loadMNISTImages('train-images.idx3-ubyte');
labels = loadMNISTLabels('train-labels.idx1-ubyte');

% Transform the labels to correct target values.
Y = 0.*ones(10, size(labels, 1));

for n = 1: size(labels, 1)
Y(labels(n) + 1, n) = 1;
end;

X = X';
Y = Y';

[n, m] = size(Y);
[~, b] = size(X);

% W1 = rand(b, numberOfHiddenUnits);
% W2 = rand(numberOfHiddenUnits, m);
numberOfHiddenUnits = 10;
W = rand(b+m, numberOfHiddenUnits);

maxFunEvals = 100;

fun = @(w)loss_func(Y, X, w, numberOfHiddenUnits);
activationFunction = @logisticSigmoid;

options = [];
options.display = 'none';
options.useMex = 0; % For fair comparison in time
options.maxFunEvals = maxFunEvals;

%% Conjugate gradient
options.Method = 'cg';
[cg_x, cg_f, ~, cg_output] = minFunc(fun, W, options);
fprintf('Conjugate Gradient Objective Function Value: %f\n', cg_f);

hiddenWeights = cg_x(1:b, :)';
outputWeights = cg_x(b+1:b+m, :);

inputValues = loadMNISTImages('t10k-images.idx3-ubyte');
labels = loadMNISTLabels('t10k-labels.idx1-ubyte');

% Choose decision rule.
fprintf('Validation:\n');

[correctlyClassified, classificationErrors] = validateTwoLayerPerceptron(activationFunction, hiddenWeights, outputWeights, inputValues, labels);

fprintf('Classification errors: %d\n', classificationErrors);
fprintf('Correctly classified: %d\n', correctlyClassified);
52 changes: 52 additions & 0 deletions code_files/applyStochasticSquaredErrorTwoLayerPerceptronMNIST.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
function [] = applyStochasticSquaredErrorTwoLayerPerceptronMNIST()
%applyStochasticSquaredErrorTwoLayerPerceptronMNIST Train the two-layer
%perceptron using the MNIST dataset and evaluate its performance.

% Load MNIST.
inputValues = loadMNISTImages('train-images.idx3-ubyte');
labels = loadMNISTLabels('train-labels.idx1-ubyte');

% Transform the labels to correct target values.
targetValues = 0.*ones(10, size(labels, 1));
for n = 1: size(labels, 1)
targetValues(labels(n) + 1, n) = 1;
end;

% Choose form of MLP:
numberOfHiddenUnits = 100;

% Choose appropriate parameters.
learningRate = 0.1;

% Choose activation function.
activationFunction = @logisticSigmoid;
dActivationFunction = @dLogisticSigmoid;

% Choose batch size and epochs. Remember there are 60k input values.
batchSize = 500;
epochs = 1000;

fprintf('Train twolayer perceptron with %d hidden units.\n', numberOfHiddenUnits);
fprintf('Learning rate: %d.\n', learningRate);

% [hiddenWeights, outputWeights, error] = trainStochasticSquaredErrorTwoLayerPerceptron(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
tic();
[hiddenWeights, outputWeights, error] = trainMomentumSGD(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
toc();
% [hiddenWeights, outputWeights, error] = trainDiagonalQuasiNewton(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
% [hiddenWeights, outputWeights, error] = trainAdaGrad(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
% [hiddenWeights, outputWeights, error] = trainAdaDelta(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
% [hiddenWeights, outputWeights, error] = trainConjugateGradient(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);

% Load validation set.
inputValues = loadMNISTImages('t10k-images.idx3-ubyte');
labels = loadMNISTLabels('t10k-labels.idx1-ubyte');

% Choose decision rule.
fprintf('Validation:\n');

[correctlyClassified, classificationErrors] = validateTwoLayerPerceptron(activationFunction, hiddenWeights, outputWeights, inputValues, labels);

fprintf('Classification errors: %d\n', classificationErrors);
fprintf('Correctly classified: %d\n', correctlyClassified);
end
12 changes: 12 additions & 0 deletions code_files/dLogisticSigmoid.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
function y = dLogisticSigmoid(x)
% dLogisticSigmoid Derivative of the logistic sigmoid.
%
% INPUT:
% x : Input vector.
%
% OUTPUT:
% y : Output vector where the derivative of the logistic sigmoid was
% applied element by element.
%
y = logisticSigmoid(x).*(1 - logisticSigmoid(x));
end
26 changes: 26 additions & 0 deletions code_files/loadMNISTImages.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
function images = loadMNISTImages(filename)
%loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
%the raw MNIST images

fp = fopen(filename, 'rb');
assert(fp ~= -1, ['Could not open ', filename, '']);

magic = fread(fp, 1, 'int32', 0, 'ieee-be');
assert(magic == 2051, ['Bad magic number in ', filename, '']);

numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
numCols = fread(fp, 1, 'int32', 0, 'ieee-be');

images = fread(fp, inf, 'unsigned char');
images = reshape(images, numCols, numRows, numImages);
images = permute(images,[2 1 3]);

fclose(fp);

% Reshape to #pixels x #examples
images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
% Convert to double and rescale to [0,1]
images = double(images) / 255;

end
19 changes: 19 additions & 0 deletions code_files/loadMNISTLabels.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
function labels = loadMNISTLabels(filename)
%loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
%the labels for the MNIST images

fp = fopen(filename, 'rb');
assert(fp ~= -1, ['Could not open ', filename, '']);

magic = fread(fp, 1, 'int32', 0, 'ieee-be');
assert(magic == 2049, ['Bad magic number in ', filename, '']);

numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');

labels = fread(fp, inf, 'unsigned char');

assert(size(labels,1) == numLabels, 'Mismatch in label count');

fclose(fp);

end
13 changes: 13 additions & 0 deletions code_files/logisticSigmoid.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
function y = logisticSigmoid(x)
% simpleLogisticSigmoid Logistic sigmoid activation function
%
% INPUT:
% x : Input vector.
%
% OUTPUT:
% y : Output vector where the logistic sigmoid was applied element by
% element.
%

y = 1./(1 + exp(-x));
end
32 changes: 32 additions & 0 deletions code_files/loss_func.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
function [f, df] = loss_func(Y, X, w, numberOfHiddenUnits)

function [S] = sigmoid(Z)
S = 1 ./ (1 + exp(-Z));
end

[n, m] = size(Y);
[~, b] = size(X);

W1 = reshape(w(1:numberOfHiddenUnits*b), b, numberOfHiddenUnits);
W2 = reshape(w(numberOfHiddenUnits*b+1:numberOfHiddenUnits*(b+m)), numberOfHiddenUnits , m);
W1 = W1 ./ b;
W2 = W2 ./ size(W2, 1);
X = X ./ n;

E = Y - sigmoid(sigmoid(X*W1)*W2);
f = 0.5*sum(sum(E.*E));
f = f/n;

A1 = sigmoid(X*W1);
A2 = sigmoid(A1*W2);
ones_2 = ones(size(A2));

G2 = -A1' * (E .* A2 .* (ones_2 - A2));

ones_1 = ones(size(A1));
G1 = -X' * ((E .* A2 .* (ones_2 - A2)) * W2' .* (A1 .* (ones_1 - A1)));

df = [G2(:); G1(:)];
df = df/n;

end
9 changes: 9 additions & 0 deletions code_files/saveMNISTImages.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
function [] = saveMNISTImages(images, n, k)
% saveMNISImages Saves the first every k-th image of the MNIST training
% data set up to n images.

for i = 1: n
imwrite(reshape(images(:,i*k), 28, 28), strcat('MNIST/', num2str(i*k), '.png'));
end;
end

117 changes: 117 additions & 0 deletions code_files/trainAdaDelta.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
function [hiddenWeights, outputWeights, error] = trainAdaDelta(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate)
% trainStochasticSquaredErrorTwoLayerPerceptron Creates a two-layer perceptron
% and trains it on the MNIST dataset.
%
% INPUT:
% activationFunction : Activation function used in both layers.
% dActivationFunction : Derivative of the activation
% function used in both layers.
% numberOfHiddenUnits : Number of hidden units.
% inputValues : Input values for training (784 x 60000)
% targetValues : Target values for training (1 x 60000)
% epochs : Number of epochs to train.
% batchSize : Plot error after batchSize images.
% learningRate : Learning rate to apply.
%
% OUTPUT:
% hiddenWeights : Weights of the hidden layer.
% outputWeights : Weights of the output layer.
%

% The number of training vectors.
trainingSetSize = size(inputValues, 2);

% Input vector has 784 dimensions.
inputDimensions = size(inputValues, 1);
% We have to distinguish 10 digits.
outputDimensions = size(targetValues, 1);

% Initialize the weights for the hidden layer and the output layer.
hiddenWeights = rand(numberOfHiddenUnits, inputDimensions);
outputWeights = rand(outputDimensions, numberOfHiddenUnits);

% AdaDelta terms
% G terms will be used for accumulating gradients
% deltaX terms will be used for accumulating updates
G_1 = rand(numberOfHiddenUnits, inputDimensions);
G_1_next = rand(numberOfHiddenUnits, inputDimensions);
deltaX_1 = rand(numberOfHiddenUnits, inputDimensions);
deltaX_1_next = rand(numberOfHiddenUnits, inputDimensions);
G_2 = rand(outputDimensions, numberOfHiddenUnits);
G_2_next = rand(outputDimensions, numberOfHiddenUnits);
deltaX_2 = rand(outputDimensions, numberOfHiddenUnits);
deltaX_2_next = rand(outputDimensions, numberOfHiddenUnits);
rho = 0.6;

% Initializing epsilon terms to avoid "division by zero" problems
g1_dims = size(G_1);
g2_dims = size(G_2);
eps_1 = repmat(1/(g1_dims(1)*g1_dims(2)), size(G_1));
eps_2 = repmat(1/(g2_dims(1)*g2_dims(2)), size(G_2));

% Initializing weights
hiddenWeights = hiddenWeights./size(hiddenWeights, 2);
outputWeights = outputWeights./size(outputWeights, 2);

n = zeros(batchSize);

figure; hold on;

for t = 1: epochs
for k = 1: batchSize
% Select which input vector to train on.
n(k) = floor(rand(1)*trainingSetSize + 1);

% Propagate the input vector through the network.
inputVector = inputValues(:, n(k));
hiddenActualInput = hiddenWeights*inputVector;
hiddenOutputVector = activationFunction(hiddenActualInput);
outputActualInput = outputWeights*hiddenOutputVector;
outputVector = activationFunction(outputActualInput);

targetVector = targetValues(:, n(k));

% Backpropagate the errors.
outputDelta = dActivationFunction(outputActualInput).*(outputVector - targetVector);
hiddenDelta = dActivationFunction(hiddenActualInput).*(outputWeights'*outputDelta);

g_ow = outputDelta*hiddenOutputVector';
% Accumulating gradient in AdaGrad-like fashion
G_2_next = sqrt(rho .* G_2.^2 + (1 - rho) .* g_ow.^2);
% Computing the update for output weights
update_2 = - g_ow .* sqrt((deltaX_2.^2 + eps_2)./(g_ow.^2 + eps_2));
% Accumulating updates in momentum-like fashion
deltaX_2_next = sqrt(rho .* deltaX_2.^2 + (1 - rho) .* update_2.^2);
outputWeights = outputWeights + update_2;

g_hw = hiddenDelta*inputVector';
% Accumulating gradient in AdaGrad-like fashion
G_1_next = sqrt(rho .* G_1.^2 + (1 - rho) .* g_hw.^2);
% Computing the update for hidden weights
update_1 = - g_hw .* sqrt((deltaX_1.^2 + eps_1)./(g_hw.^2 + eps_1));
% Accumulating updates in momentum-like fashion
deltaX_1_next = sqrt(rho .* deltaX_1.^2 + (1 - rho) .* update_1.^2);
hiddenWeights = hiddenWeights + update_1;

G_1 = G_1_next;
G_2 = G_2_next;
deltaX_1 = deltaX_1_next;
deltaX_2 = deltaX_2_next;

end;
disp(t);
% Calculate the error for plotting.
error = 0;
for k = 1: batchSize
inputVector = inputValues(:, n(k));
targetVector = targetValues(:, n(k));

error = error + norm(activationFunction(outputWeights*activationFunction(hiddenWeights*inputVector)) - targetVector, 2);
end;
error = error/batchSize;

plot(t, error,'k*');
xlabel('epoch');
ylabel('error');
end;
end
Loading

0 comments on commit 560c324

Please sign in to comment.