added all files

stopdemir · Mar 6, 2024 · 560c324 · 560c324
commit 560c324
Show file tree

Hide file tree

Showing 16 changed files with 826 additions and 0 deletions.
diff --git a/Report Neural Network.pdf b/Report Neural Network.pdf
diff --git a/code_files/applyNNetMinFunc.m b/code_files/applyNNetMinFunc.m
@@ -0,0 +1,53 @@
+addpath(genpath('./minFunc_2012'));
+
+
+% Load MNIST.
+X = loadMNISTImages('train-images.idx3-ubyte');
+labels = loadMNISTLabels('train-labels.idx1-ubyte');
+
+% Transform the labels to correct target values.
+Y = 0.*ones(10, size(labels, 1));
+
+for n = 1: size(labels, 1)
+    Y(labels(n) + 1, n) = 1;
+end;
+
+X = X';
+Y = Y';
+
+[n, m] = size(Y);
+[~, b] = size(X);
+
+% W1 = rand(b, numberOfHiddenUnits);
+% W2 = rand(numberOfHiddenUnits, m);
+numberOfHiddenUnits = 10;
+W = rand(b+m, numberOfHiddenUnits);
+
+maxFunEvals = 100;
+
+fun = @(w)loss_func(Y, X, w, numberOfHiddenUnits);
+activationFunction = @logisticSigmoid;
+
+options = [];
+options.display = 'none';
+options.useMex = 0; % For fair comparison in time
+options.maxFunEvals = maxFunEvals;
+
+%% Conjugate gradient
+options.Method = 'cg'; 
+[cg_x, cg_f, ~, cg_output] = minFunc(fun, W, options);
+fprintf('Conjugate Gradient Objective Function Value: %f\n', cg_f);
+
+hiddenWeights = cg_x(1:b, :)';
+outputWeights = cg_x(b+1:b+m, :);
+
+inputValues = loadMNISTImages('t10k-images.idx3-ubyte');
+labels = loadMNISTLabels('t10k-labels.idx1-ubyte');
+
+% Choose decision rule.
+fprintf('Validation:\n');
+
+[correctlyClassified, classificationErrors] = validateTwoLayerPerceptron(activationFunction, hiddenWeights, outputWeights, inputValues, labels);
+
+fprintf('Classification errors: %d\n', classificationErrors);
+fprintf('Correctly classified: %d\n', correctlyClassified);
diff --git a/code_files/applyStochasticSquaredErrorTwoLayerPerceptronMNIST.m b/code_files/applyStochasticSquaredErrorTwoLayerPerceptronMNIST.m
@@ -0,0 +1,52 @@
+function [] = applyStochasticSquaredErrorTwoLayerPerceptronMNIST()
+%applyStochasticSquaredErrorTwoLayerPerceptronMNIST Train the two-layer
+%perceptron using the MNIST dataset and evaluate its performance.
+
+    % Load MNIST.
+    inputValues = loadMNISTImages('train-images.idx3-ubyte');
+    labels = loadMNISTLabels('train-labels.idx1-ubyte');
+
+    % Transform the labels to correct target values.
+    targetValues = 0.*ones(10, size(labels, 1));
+    for n = 1: size(labels, 1)
+        targetValues(labels(n) + 1, n) = 1;
+    end;
+
+    % Choose form of MLP:
+    numberOfHiddenUnits = 100;
+
+    % Choose appropriate parameters.
+    learningRate = 0.1;
+
+    % Choose activation function.
+    activationFunction = @logisticSigmoid;
+    dActivationFunction = @dLogisticSigmoid;
+
+    % Choose batch size and epochs. Remember there are 60k input values.
+    batchSize = 500;
+    epochs = 1000;
+
+    fprintf('Train twolayer perceptron with %d hidden units.\n', numberOfHiddenUnits);
+    fprintf('Learning rate: %d.\n', learningRate);
+
+    % [hiddenWeights, outputWeights, error] = trainStochasticSquaredErrorTwoLayerPerceptron(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
+    tic();
+    [hiddenWeights, outputWeights, error] = trainMomentumSGD(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
+    toc();
+    % [hiddenWeights, outputWeights, error] = trainDiagonalQuasiNewton(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
+    % [hiddenWeights, outputWeights, error] = trainAdaGrad(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
+    % [hiddenWeights, outputWeights, error] = trainAdaDelta(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
+    % [hiddenWeights, outputWeights, error] = trainConjugateGradient(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate);
+
+    % Load validation set.
+    inputValues = loadMNISTImages('t10k-images.idx3-ubyte');
+    labels = loadMNISTLabels('t10k-labels.idx1-ubyte');
+
+    % Choose decision rule.
+    fprintf('Validation:\n');
+
+    [correctlyClassified, classificationErrors] = validateTwoLayerPerceptron(activationFunction, hiddenWeights, outputWeights, inputValues, labels);
+
+    fprintf('Classification errors: %d\n', classificationErrors);
+    fprintf('Correctly classified: %d\n', correctlyClassified);
+end
diff --git a/code_files/dLogisticSigmoid.m b/code_files/dLogisticSigmoid.m
@@ -0,0 +1,12 @@
+function y = dLogisticSigmoid(x)
+% dLogisticSigmoid Derivative of the logistic sigmoid.
+% 
+% INPUT:
+% x     : Input vector.
+%
+% OUTPUT:
+% y     : Output vector where the derivative of the logistic sigmoid was
+% applied element by element.
+%
+    y = logisticSigmoid(x).*(1 - logisticSigmoid(x));
+end
diff --git a/code_files/loadMNISTImages.m b/code_files/loadMNISTImages.m
@@ -0,0 +1,26 @@
+function images = loadMNISTImages(filename)
+%loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing
+%the raw MNIST images
+
+fp = fopen(filename, 'rb');
+assert(fp ~= -1, ['Could not open ', filename, '']);
+
+magic = fread(fp, 1, 'int32', 0, 'ieee-be');
+assert(magic == 2051, ['Bad magic number in ', filename, '']);
+
+numImages = fread(fp, 1, 'int32', 0, 'ieee-be');
+numRows = fread(fp, 1, 'int32', 0, 'ieee-be');
+numCols = fread(fp, 1, 'int32', 0, 'ieee-be');
+
+images = fread(fp, inf, 'unsigned char');
+images = reshape(images, numCols, numRows, numImages);
+images = permute(images,[2 1 3]);
+
+fclose(fp);
+
+% Reshape to #pixels x #examples
+images = reshape(images, size(images, 1) * size(images, 2), size(images, 3));
+% Convert to double and rescale to [0,1]
+images = double(images) / 255;
+
+end
diff --git a/code_files/loadMNISTLabels.m b/code_files/loadMNISTLabels.m
@@ -0,0 +1,19 @@
+function labels = loadMNISTLabels(filename)
+%loadMNISTLabels returns a [number of MNIST images]x1 matrix containing
+%the labels for the MNIST images
+
+fp = fopen(filename, 'rb');
+assert(fp ~= -1, ['Could not open ', filename, '']);
+
+magic = fread(fp, 1, 'int32', 0, 'ieee-be');
+assert(magic == 2049, ['Bad magic number in ', filename, '']);
+
+numLabels = fread(fp, 1, 'int32', 0, 'ieee-be');
+
+labels = fread(fp, inf, 'unsigned char');
+
+assert(size(labels,1) == numLabels, 'Mismatch in label count');
+
+fclose(fp);
+
+end
diff --git a/code_files/logisticSigmoid.m b/code_files/logisticSigmoid.m
@@ -0,0 +1,13 @@
+function y = logisticSigmoid(x)
+% simpleLogisticSigmoid Logistic sigmoid activation function
+% 
+% INPUT:
+% x     : Input vector.
+%
+% OUTPUT:
+% y     : Output vector where the logistic sigmoid was applied element by
+% element.
+%
+
+    y = 1./(1 + exp(-x));
+end
diff --git a/code_files/loss_func.m b/code_files/loss_func.m
@@ -0,0 +1,32 @@
+function [f, df] = loss_func(Y, X, w, numberOfHiddenUnits)
+
+    function [S] = sigmoid(Z)
+        S = 1 ./ (1 + exp(-Z));
+    end
+
+	[n, m] = size(Y);
+    [~, b] = size(X);
+
+    W1 = reshape(w(1:numberOfHiddenUnits*b), b, numberOfHiddenUnits);
+    W2 = reshape(w(numberOfHiddenUnits*b+1:numberOfHiddenUnits*(b+m)), numberOfHiddenUnits , m);
+	W1 = W1 ./ b;
+    W2 = W2 ./ size(W2, 1);
+    X = X ./ n;
+
+	E = Y - sigmoid(sigmoid(X*W1)*W2);
+	f = 0.5*sum(sum(E.*E));
+	f = f/n;
+
+	A1 = sigmoid(X*W1);
+	A2 = sigmoid(A1*W2);
+    ones_2 = ones(size(A2));    
+
+	G2 = -A1' * (E .* A2 .* (ones_2 - A2));
+
+	ones_1 = ones(size(A1));
+	G1 = -X' * ((E .* A2 .* (ones_2 - A2)) * W2' .* (A1 .* (ones_1 - A1)));	
+
+    df = [G2(:); G1(:)];
+    df = df/n;
+
+end
diff --git a/code_files/saveMNISTImages.m b/code_files/saveMNISTImages.m
@@ -0,0 +1,9 @@
+function [] = saveMNISTImages(images, n, k)
+% saveMNISImages Saves the first every k-th image of the MNIST training
+% data set up to n images.
+
+    for i = 1: n
+        imwrite(reshape(images(:,i*k), 28, 28), strcat('MNIST/', num2str(i*k), '.png'));
+    end;
+end
+
diff --git a/code_files/trainAdaDelta.m b/code_files/trainAdaDelta.m
@@ -0,0 +1,117 @@
+function [hiddenWeights, outputWeights, error] = trainAdaDelta(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate)
+% trainStochasticSquaredErrorTwoLayerPerceptron Creates a two-layer perceptron
+% and trains it on the MNIST dataset.
+%
+% INPUT:
+% activationFunction             : Activation function used in both layers.
+% dActivationFunction            : Derivative of the activation
+% function used in both layers.
+% numberOfHiddenUnits            : Number of hidden units.
+% inputValues                    : Input values for training (784 x 60000)
+% targetValues                   : Target values for training (1 x 60000)
+% epochs                         : Number of epochs to train.
+% batchSize                      : Plot error after batchSize images.
+% learningRate                   : Learning rate to apply.
+%
+% OUTPUT:
+% hiddenWeights                  : Weights of the hidden layer.
+% outputWeights                  : Weights of the output layer.
+% 
+
+    % The number of training vectors.
+    trainingSetSize = size(inputValues, 2);
+
+    % Input vector has 784 dimensions.
+    inputDimensions = size(inputValues, 1);
+    % We have to distinguish 10 digits.
+    outputDimensions = size(targetValues, 1);
+
+    % Initialize the weights for the hidden layer and the output layer.
+    hiddenWeights = rand(numberOfHiddenUnits, inputDimensions);
+    outputWeights = rand(outputDimensions, numberOfHiddenUnits);
+
+    % AdaDelta terms
+    % G terms will be used for accumulating gradients
+    % deltaX terms will be used for accumulating updates
+    G_1 = rand(numberOfHiddenUnits, inputDimensions);
+    G_1_next = rand(numberOfHiddenUnits, inputDimensions);
+    deltaX_1 = rand(numberOfHiddenUnits, inputDimensions);
+    deltaX_1_next = rand(numberOfHiddenUnits, inputDimensions);
+    G_2 = rand(outputDimensions, numberOfHiddenUnits);
+    G_2_next = rand(outputDimensions, numberOfHiddenUnits);
+    deltaX_2 = rand(outputDimensions, numberOfHiddenUnits);
+    deltaX_2_next = rand(outputDimensions, numberOfHiddenUnits);
+    rho = 0.6;
+
+    % Initializing epsilon terms to avoid "division by zero" problems
+    g1_dims = size(G_1);
+    g2_dims = size(G_2);    
+    eps_1 = repmat(1/(g1_dims(1)*g1_dims(2)), size(G_1));
+    eps_2 = repmat(1/(g2_dims(1)*g2_dims(2)), size(G_2));
+
+    % Initializing weights
+    hiddenWeights = hiddenWeights./size(hiddenWeights, 2);
+    outputWeights = outputWeights./size(outputWeights, 2);
+
+    n = zeros(batchSize);
+
+    figure; hold on;
+
+    for t = 1: epochs
+        for k = 1: batchSize
+            % Select which input vector to train on.
+            n(k) = floor(rand(1)*trainingSetSize + 1);
+
+            % Propagate the input vector through the network.
+            inputVector = inputValues(:, n(k));
+            hiddenActualInput = hiddenWeights*inputVector;
+            hiddenOutputVector = activationFunction(hiddenActualInput);
+            outputActualInput = outputWeights*hiddenOutputVector;
+            outputVector = activationFunction(outputActualInput);
+
+            targetVector = targetValues(:, n(k));
+
+            % Backpropagate the errors.
+            outputDelta = dActivationFunction(outputActualInput).*(outputVector - targetVector);
+            hiddenDelta = dActivationFunction(hiddenActualInput).*(outputWeights'*outputDelta);
+
+            g_ow = outputDelta*hiddenOutputVector';
+            % Accumulating gradient in AdaGrad-like fashion
+            G_2_next = sqrt(rho .* G_2.^2 + (1 - rho) .* g_ow.^2);
+            % Computing the update for output weights
+            update_2 = - g_ow .* sqrt((deltaX_2.^2 + eps_2)./(g_ow.^2 + eps_2));
+            % Accumulating updates in momentum-like fashion
+            deltaX_2_next = sqrt(rho .* deltaX_2.^2 + (1 - rho) .* update_2.^2);
+            outputWeights = outputWeights + update_2;
+
+            g_hw = hiddenDelta*inputVector';
+            % Accumulating gradient in AdaGrad-like fashion
+            G_1_next = sqrt(rho .* G_1.^2 + (1 - rho) .* g_hw.^2);
+            % Computing the update for hidden weights
+            update_1 = - g_hw .* sqrt((deltaX_1.^2 + eps_1)./(g_hw.^2 + eps_1));
+            % Accumulating updates in momentum-like fashion
+            deltaX_1_next = sqrt(rho .* deltaX_1.^2 + (1 - rho) .* update_1.^2);
+            hiddenWeights = hiddenWeights + update_1;
+
+            G_1 = G_1_next;
+            G_2 = G_2_next;
+            deltaX_1 = deltaX_1_next;
+            deltaX_2 = deltaX_2_next;
+
+        end;      
+        disp(t);
+        % Calculate the error for plotting.
+        error = 0;
+        for k = 1: batchSize
+            inputVector = inputValues(:, n(k));
+            targetVector = targetValues(:, n(k));
+
+            error = error + norm(activationFunction(outputWeights*activationFunction(hiddenWeights*inputVector)) - targetVector, 2);
+        end;
+        error = error/batchSize;
+
+        plot(t, error,'k*'); 
+        xlabel('epoch');
+        ylabel('error');
+    end;
+end