From fd6f7f3261a814b748eeb569b76b5e65000f038d Mon Sep 17 00:00:00 2001 From: Abhineet Choudhary Date: Tue, 25 Feb 2020 16:31:43 +0530 Subject: [PATCH 1/6] average callback and optimizers --- .../average_optimizers_callback.ipynb | 459 ++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100644 docs/tutorials/average_optimizers_callback.ipynb diff --git a/docs/tutorials/average_optimizers_callback.ipynb b/docs/tutorials/average_optimizers_callback.ipynb new file mode 100644 index 0000000000..8d5d28bcde --- /dev/null +++ b/docs/tutorials/average_optimizers_callback.ipynb @@ -0,0 +1,459 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "average_optimizers_callback.ipynb", + "provenance": [], + "collapsed_sections": [ + "Tce3stUlHN0L" + ], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "metadata": { + "cellView": "both", + "colab_type": "code", + "id": "tuOe1ymfHZPu", + "colab": {} + }, + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "qFdPvlXBOdUN" + }, + "source": [ + "# Title" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xHxb-dlhMIzW" + }, + "source": [ + "## Overview\n", + "\n", + "This notebook demonstrates how to use Moving Average Optimizer along with the Model Average Checkpoint from tensorflow addons pagkage.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o2UNySlpXkbl", + "colab_type": "text" + }, + "source": [ + "## Moving Averaging \n", + "\n", + "> The advantage of Moving Averaging is that they are less prone to rampant loss shifts or irregular data representation in the latest batch. It gives a smooothened and a more genral idea of the model training until some point.\n", + "\n", + "## Stocastic Averaging\n", + "\n", + "> Stocastic Weight Averaging converges to wider optimas. By doing so, it resembles geometric ensembeling. SWA is a simple method to improve model performance when used as a wrapper around other optimizers and averaging results from different points of trajectory of the inner optimizer.\n", + "\n", + "## Model Average Checkpoint \n", + "\n", + "> ```callbacks.ModelCheckpoint``` doesn't give you the option to save moving average weights in the middle of traning, which is why Model Average Optimizers required a custom callback. Using the ```update_weights``` parameter, ```ModelAverageCheckpoint``` allows you to:\n", + "1. Assign the moving average weights to the model, and save them.\n", + "2. Keep the old non-averaged weights, but the saved model uses the average weights." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MUXex9ctTuDB" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "IqR2PQG4ZaZ0", + "colab": {} + }, + "source": [ + "try:\n", + " %tensorflow_version 2.x\n", + "except:\n", + " pass\n", + "\n", + "import tensorflow as tf" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "vc1eVXwjiFle", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!pip install --upgrade tfa-nightly\n", + "\n", + "import tensorflow_addons as tfa" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "4hnJ2rDpI38-", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import tensorflow_datasets as tfds\n", + "import numpy as np\n", + "from matplotlib import pyplot as plt\n", + "import os" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Iox_HZNNYLEB", + "colab_type": "text" + }, + "source": [ + "## Build Model " + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "KtylpxOmceaC", + "colab": {} + }, + "source": [ + "def create_model(opt):\n", + " model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(), \n", + " tf.keras.layers.Dense(64, activation='relu', name='dense_1'),\n", + " tf.keras.layers.Dense(64, activation='relu', name='dense_2'),\n", + " tf.keras.layers.Dense(10, activation='softmax')\n", + " ])\n", + "\n", + " model.compile(optimizer=opt,\n", + " loss='sparse_categorical_crossentropy',\n", + " metrics=['accuracy'])\n", + "\n", + " return model" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "pwdM2pl3RSPb" + }, + "source": [ + "## Prepare Dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "mMOeXVmbdilM", + "colab": {} + }, + "source": [ + "#Load Fashion MNIST dataset\n", + "train, test = tf.keras.datasets.fashion_mnist.load_data()\n", + "\n", + "images, labels = train\n", + "images = images/255.0\n", + "labels = labels.astype(np.int32)\n", + "\n", + "fmnist_train_ds = tf.data.Dataset.from_tensor_slices((images, labels))\n", + "fmnist_train_ds = fmnist_train_ds.shuffle(5000).batch(32)\n", + "\n", + "test_images, test_labels = test" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iEbhI_eajpJe", + "colab_type": "text" + }, + "source": [ + "We will be comparing three optimizers here:\n", + "\n", + "* Unwrapped SGD\n", + "* SGD with Moving Average\n", + "* SGD with Stochastic Weight Averaging\n", + "\n", + "And see how they perform with the same model." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_Q76K1fNk7Va", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#Optimizers \n", + "sgd = tf.keras.optimizers.SGD(0.01)\n", + "moving_avg_sgd = tfa.optimizers.MovingAverage(sgd)\n", + "stocastic_avg_sgd = tfa.optimizers.SWA(sgd)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nXlMX4p9qHwg", + "colab_type": "text" + }, + "source": [ + "Both ```MovingAverage``` and ```StocasticAverage``` optimers use ```ModelAverageCheckpoint```." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "SnvZjt34qEHY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#Callback \n", + "checkpoint_path = \"./training/cp-{epoch:04d}.ckpt\"\n", + "checkpoint_dir = os.path.dirname(checkpoint_path)\n", + "\n", + "cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_dir,\n", + " save_weights_only=True,\n", + " verbose=1)\n", + "avg_callback = tfa.callbacks.average_model_checkpoint.AverageModelCheckpoint(filepath=checkpoint_dir, \n", + " update_weights=True)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "uabQmjMtRtzs" + }, + "source": [ + "## Train Model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SPmifETHmPix", + "colab_type": "text" + }, + "source": [ + "### Vanilla SGD Optimizer " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Xy8W4LYppadJ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#Build Model\n", + "model = create_model(sgd)\n", + "\n", + "#Train the network\n", + "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "uU2iQ6HAZ6-E", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#Evalute results\n", + "model.load_weights(checkpoint_dir)\n", + "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", + "print(\"Loss :\", loss)\n", + "print(\"Accuracy :\", accuracy)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lAvhD4unmc6W", + "colab_type": "text" + }, + "source": [ + "### Moving Average SGD" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "--NIjBp-mhVb", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#Build Model\n", + "model = create_model(moving_avg_sgd)\n", + "\n", + "#Train the network\n", + "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zRAym9EBmnW9", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#Evalute results\n", + "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", + "print(\"Loss :\", loss)\n", + "print(\"Accuracy :\", accuracy)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K98lbU07m_Bk", + "colab_type": "text" + }, + "source": [ + "### Stocastic Weight Average SGD " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ia7ALKefnXWQ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#Build Model\n", + "model = create_model(stocastic_avg_sgd)\n", + "\n", + "#Train the network\n", + "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EOT2E9NBoeHI", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#Evalute results\n", + "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", + "print(\"Loss :\", loss)\n", + "print(\"Accuracy :\", accuracy)" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From f9e9cb650a9a941edcbaebe642679943b9e24efa Mon Sep 17 00:00:00 2001 From: Abhineet Choudhary Date: Tue, 25 Feb 2020 16:40:06 +0530 Subject: [PATCH 2/6] fixed links --- docs/tutorials/average_optimizers_callback.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/average_optimizers_callback.ipynb b/docs/tutorials/average_optimizers_callback.ipynb index 8d5d28bcde..bb67855bef 100644 --- a/docs/tutorials/average_optimizers_callback.ipynb +++ b/docs/tutorials/average_optimizers_callback.ipynb @@ -81,10 +81,10 @@ "source": [ "\n", " \n", " \n", "
\n", - " Run in Google Colab\n", + " Run in Google Colab\n", " \n", - " View source on GitHub\n", + " View source on GitHub\n", "
" ] From b0709ac8d1d4c1b03045222601213c1d64c7bc4e Mon Sep 17 00:00:00 2001 From: Abhineet Choudhary Date: Tue, 25 Feb 2020 16:53:21 +0530 Subject: [PATCH 3/6] added SWA --- .../average_optimizers_callback.ipynb | 255 ++++++++++++++++-- 1 file changed, 228 insertions(+), 27 deletions(-) diff --git a/docs/tutorials/average_optimizers_callback.ipynb b/docs/tutorials/average_optimizers_callback.ipynb index bb67855bef..896491b576 100644 --- a/docs/tutorials/average_optimizers_callback.ipynb +++ b/docs/tutorials/average_optimizers_callback.ipynb @@ -81,10 +81,10 @@ "source": [ "\n", " \n", " \n", "
\n", - " Run in Google Colab\n", + " Run in Google Colab\n", " \n", - " View source on GitHub\n", + " View source on GitHub\n", "
" ] @@ -140,7 +140,11 @@ "metadata": { "colab_type": "code", "id": "IqR2PQG4ZaZ0", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "a683980d-3cf0-4da0-e680-ff6f70364e72" }, "source": [ "try:\n", @@ -150,23 +154,44 @@ "\n", "import tensorflow as tf" ], - "execution_count": 0, - "outputs": [] + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "TensorFlow 2.x selected.\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "code", "metadata": { "id": "vc1eVXwjiFle", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "outputId": "111f2d45-8276-4c73-90c7-c3c5a01b7603" }, "source": [ "!pip install --upgrade tfa-nightly\n", "\n", "import tensorflow_addons as tfa" ], - "execution_count": 0, - "outputs": [] + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Requirement already up-to-date: tfa-nightly in /usr/local/lib/python3.6/dist-packages (0.9.0.dev20200225)\n", + "Requirement already satisfied, skipping upgrade: typeguard>=2.7 in /tensorflow-2.1.0/python3.6 (from tfa-nightly) (2.7.1)\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "code", @@ -340,7 +365,11 @@ "metadata": { "id": "Xy8W4LYppadJ", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 399 + }, + "outputId": "5e049e0f-26ab-48e1-be35-cf4a907fbb49" }, "source": [ "#Build Model\n", @@ -349,25 +378,78 @@ "#Train the network\n", "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" ], - "execution_count": 0, - "outputs": [] + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Train for 1875 steps\n", + "Epoch 1/5\n", + "1874/1875 [============================>.] - ETA: 0s - loss: 0.7737 - accuracy: 0.7426\n", + "Epoch 00001: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.7737 - accuracy: 0.7426\n", + "Epoch 2/5\n", + "1869/1875 [============================>.] - ETA: 0s - loss: 0.5041 - accuracy: 0.8245\n", + "Epoch 00002: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.5037 - accuracy: 0.8247\n", + "Epoch 3/5\n", + "1866/1875 [============================>.] - ETA: 0s - loss: 0.4575 - accuracy: 0.8408\n", + "Epoch 00003: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4576 - accuracy: 0.8407\n", + "Epoch 4/5\n", + "1855/1875 [============================>.] - ETA: 0s - loss: 0.4298 - accuracy: 0.8483\n", + "Epoch 00004: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4299 - accuracy: 0.8483\n", + "Epoch 5/5\n", + "1860/1875 [============================>.] - ETA: 0s - loss: 0.4117 - accuracy: 0.8553\n", + "Epoch 00005: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4113 - accuracy: 0.8554\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] }, { "cell_type": "code", "metadata": { "id": "uU2iQ6HAZ6-E", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 69 + }, + "outputId": "8268002a-04b2-4de3-8ae1-71023578f81c" }, "source": [ "#Evalute results\n", - "model.load_weights(checkpoint_dir)\n", "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", "print(\"Loss :\", loss)\n", "print(\"Accuracy :\", accuracy)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "10000/10000 - 0s - loss: 93.3245 - accuracy: 0.7870\n", + "Loss : 93.32448555297852\n", + "Accuracy : 0.787\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "markdown", @@ -384,7 +466,11 @@ "metadata": { "id": "--NIjBp-mhVb", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 471 + }, + "outputId": "d21bfc64-c1a6-4031-f8a4-0e633fefc1a6" }, "source": [ "#Build Model\n", @@ -393,15 +479,62 @@ "#Train the network\n", "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" ], - "execution_count": 0, - "outputs": [] + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Train for 1875 steps\n", + "Epoch 1/5\n", + "WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/resource_variable_ops.py:1786: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "If using Keras pass *_constraint arguments to layers.\n", + "1857/1875 [============================>.] - ETA: 0s - loss: 0.7890 - accuracy: 0.7370\n", + "Epoch 00001: saving model to ./training\n", + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.7866 - accuracy: 0.7377\n", + "Epoch 2/5\n", + "1871/1875 [============================>.] - ETA: 0s - loss: 0.5071 - accuracy: 0.8219\n", + "Epoch 00002: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.5071 - accuracy: 0.8219\n", + "Epoch 3/5\n", + "1865/1875 [============================>.] - ETA: 0s - loss: 0.4584 - accuracy: 0.8380\n", + "Epoch 00003: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4583 - accuracy: 0.8381\n", + "Epoch 4/5\n", + "1867/1875 [============================>.] - ETA: 0s - loss: 0.4309 - accuracy: 0.8487\n", + "Epoch 00004: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4304 - accuracy: 0.8489\n", + "Epoch 5/5\n", + "1869/1875 [============================>.] - ETA: 0s - loss: 0.4111 - accuracy: 0.8539\n", + "Epoch 00005: saving model to ./training\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4115 - accuracy: 0.8538\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 11 + } + ] }, { "cell_type": "code", "metadata": { "id": "zRAym9EBmnW9", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 69 + }, + "outputId": "59eeb8cc-6bff-461a-b367-c20d0912532a" }, "source": [ "#Evalute results\n", @@ -409,8 +542,18 @@ "print(\"Loss :\", loss)\n", "print(\"Accuracy :\", accuracy)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "text": [ + "10000/10000 - 0s - loss: 106.2339 - accuracy: 0.7722\n", + "Loss : 106.23389839782715\n", + "Accuracy : 0.7722\n" + ], + "name": "stdout" + } + ] }, { "cell_type": "markdown", @@ -427,7 +570,11 @@ "metadata": { "id": "Ia7ALKefnXWQ", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 399 + }, + "outputId": "1a6e8c5b-534e-4c2c-8d3e-ea12ee122542" }, "source": [ "#Build Model\n", @@ -436,15 +583,59 @@ "#Train the network\n", "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" ], - "execution_count": 0, - "outputs": [] + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Train for 1875 steps\n", + "Epoch 1/5\n", + "1872/1875 [============================>.] - ETA: 0s - loss: 0.7673 - accuracy: 0.7464\n", + "Epoch 00001: saving model to ./training\n", + "1875/1875 [==============================] - 6s 3ms/step - loss: 0.7670 - accuracy: 0.7465\n", + "Epoch 2/5\n", + "1871/1875 [============================>.] - ETA: 0s - loss: 0.5034 - accuracy: 0.8248\n", + "Epoch 00002: saving model to ./training\n", + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.5034 - accuracy: 0.8247\n", + "Epoch 3/5\n", + "1858/1875 [============================>.] - ETA: 0s - loss: 0.4557 - accuracy: 0.8407\n", + "Epoch 00003: saving model to ./training\n", + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4561 - accuracy: 0.8407\n", + "Epoch 4/5\n", + "1860/1875 [============================>.] - ETA: 0s - loss: 0.4266 - accuracy: 0.8509\n", + "Epoch 00004: saving model to ./training\n", + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4273 - accuracy: 0.8507\n", + "Epoch 5/5\n", + "1859/1875 [============================>.] - ETA: 0s - loss: 0.4073 - accuracy: 0.8570\n", + "Epoch 00005: saving model to ./training\n", + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4074 - accuracy: 0.8569\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 13 + } + ] }, { "cell_type": "code", "metadata": { "id": "EOT2E9NBoeHI", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 69 + }, + "outputId": "4821a2b4-8b27-4312-9cd8-ff7961cb84f9" }, "source": [ "#Evalute results\n", @@ -452,8 +643,18 @@ "print(\"Loss :\", loss)\n", "print(\"Accuracy :\", accuracy)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "10000/10000 - 0s - loss: 85.0133 - accuracy: 0.7989\n", + "Loss : 85.01332214202881\n", + "Accuracy : 0.7989\n" + ], + "name": "stdout" + } + ] } ] } \ No newline at end of file From 6bfed31c923f76feab620a7ab116e6c909f1a3ee Mon Sep 17 00:00:00 2001 From: Abhineet Choudhary Date: Tue, 25 Feb 2020 23:41:32 +0530 Subject: [PATCH 4/6] removed update, changed alias, 4 spaces --- .../average_optimizers_callback.ipynb | 192 ++++++++---------- 1 file changed, 84 insertions(+), 108 deletions(-) diff --git a/docs/tutorials/average_optimizers_callback.ipynb b/docs/tutorials/average_optimizers_callback.ipynb index 896491b576..0942e99911 100644 --- a/docs/tutorials/average_optimizers_callback.ipynb +++ b/docs/tutorials/average_optimizers_callback.ipynb @@ -140,19 +140,20 @@ "metadata": { "colab_type": "code", "id": "IqR2PQG4ZaZ0", + "outputId": "dd92618d-ad8d-4956-d497-387ac6be541d", "colab": { "base_uri": "https://localhost:8080/", "height": 34 - }, - "outputId": "a683980d-3cf0-4da0-e680-ff6f70364e72" + } }, "source": [ "try:\n", - " %tensorflow_version 2.x\n", + " %tensorflow_version 2.x\n", "except:\n", - " pass\n", + " pass\n", "\n", - "import tensorflow as tf" + "import tensorflow as tf\n", + "import tensorflow_addons as tfa" ], "execution_count": 2, "outputs": [ @@ -165,34 +166,6 @@ } ] }, - { - "cell_type": "code", - "metadata": { - "id": "vc1eVXwjiFle", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 52 - }, - "outputId": "111f2d45-8276-4c73-90c7-c3c5a01b7603" - }, - "source": [ - "!pip install --upgrade tfa-nightly\n", - "\n", - "import tensorflow_addons as tfa" - ], - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Requirement already up-to-date: tfa-nightly in /usr/local/lib/python3.6/dist-packages (0.9.0.dev20200225)\n", - "Requirement already satisfied, skipping upgrade: typeguard>=2.7 in /tensorflow-2.1.0/python3.6 (from tfa-nightly) (2.7.1)\n" - ], - "name": "stdout" - } - ] - }, { "cell_type": "code", "metadata": { @@ -228,18 +201,18 @@ }, "source": [ "def create_model(opt):\n", - " model = tf.keras.models.Sequential([\n", - " tf.keras.layers.Flatten(), \n", - " tf.keras.layers.Dense(64, activation='relu', name='dense_1'),\n", - " tf.keras.layers.Dense(64, activation='relu', name='dense_2'),\n", - " tf.keras.layers.Dense(10, activation='softmax')\n", - " ])\n", + " model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(), \n", + " tf.keras.layers.Dense(64, activation='relu', name='dense_1'),\n", + " tf.keras.layers.Dense(64, activation='relu', name='dense_2'),\n", + " tf.keras.layers.Dense(10, activation='softmax')\n", + " ])\n", "\n", - " model.compile(optimizer=opt,\n", - " loss='sparse_categorical_crossentropy',\n", - " metrics=['accuracy'])\n", + " model.compile(optimizer=opt,\n", + " loss='sparse_categorical_crossentropy',\n", + " metrics=['accuracy'])\n", "\n", - " return model" + " return model" ], "execution_count": 0, "outputs": [] @@ -334,8 +307,8 @@ "cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_dir,\n", " save_weights_only=True,\n", " verbose=1)\n", - "avg_callback = tfa.callbacks.average_model_checkpoint.AverageModelCheckpoint(filepath=checkpoint_dir, \n", - " update_weights=True)" + "avg_callback = tfa.callbacks.AverageModelCheckpoint(filepath=checkpoint_dir, \n", + " update_weights=True)" ], "execution_count": 0, "outputs": [] @@ -365,11 +338,11 @@ "metadata": { "id": "Xy8W4LYppadJ", "colab_type": "code", + "outputId": "8341eba4-e68c-43f0-c514-d5af41226c4d", "colab": { "base_uri": "https://localhost:8080/", "height": 399 - }, - "outputId": "5e049e0f-26ab-48e1-be35-cf4a907fbb49" + } }, "source": [ "#Build Model\n", @@ -378,32 +351,32 @@ "#Train the network\n", "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" ], - "execution_count": 9, + "execution_count": 8, "outputs": [ { "output_type": "stream", "text": [ "Train for 1875 steps\n", "Epoch 1/5\n", - "1874/1875 [============================>.] - ETA: 0s - loss: 0.7737 - accuracy: 0.7426\n", + "1868/1875 [============================>.] - ETA: 0s - loss: 0.7708 - accuracy: 0.7461\n", "Epoch 00001: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.7737 - accuracy: 0.7426\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.7700 - accuracy: 0.7463\n", "Epoch 2/5\n", - "1869/1875 [============================>.] - ETA: 0s - loss: 0.5041 - accuracy: 0.8245\n", + "1850/1875 [============================>.] - ETA: 0s - loss: 0.5062 - accuracy: 0.8236\n", "Epoch 00002: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.5037 - accuracy: 0.8247\n", + "1875/1875 [==============================] - 3s 2ms/step - loss: 0.5058 - accuracy: 0.8238\n", "Epoch 3/5\n", - "1866/1875 [============================>.] - ETA: 0s - loss: 0.4575 - accuracy: 0.8408\n", + "1852/1875 [============================>.] - ETA: 0s - loss: 0.4590 - accuracy: 0.8386\n", "Epoch 00003: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4576 - accuracy: 0.8407\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4590 - accuracy: 0.8385\n", "Epoch 4/5\n", - "1855/1875 [============================>.] - ETA: 0s - loss: 0.4298 - accuracy: 0.8483\n", + "1874/1875 [============================>.] - ETA: 0s - loss: 0.4305 - accuracy: 0.8501\n", "Epoch 00004: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4299 - accuracy: 0.8483\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4306 - accuracy: 0.8500\n", "Epoch 5/5\n", - "1860/1875 [============================>.] - ETA: 0s - loss: 0.4117 - accuracy: 0.8553\n", + "1866/1875 [============================>.] - ETA: 0s - loss: 0.4115 - accuracy: 0.8555\n", "Epoch 00005: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4113 - accuracy: 0.8554\n" + "1875/1875 [==============================] - 3s 2ms/step - loss: 0.4114 - accuracy: 0.8556\n" ], "name": "stdout" }, @@ -411,13 +384,13 @@ "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": { "tags": [] }, - "execution_count": 9 + "execution_count": 8 } ] }, @@ -426,26 +399,27 @@ "metadata": { "id": "uU2iQ6HAZ6-E", "colab_type": "code", + "outputId": "07d0b8a1-666a-427b-ca54-54252061dc93", "colab": { "base_uri": "https://localhost:8080/", "height": 69 - }, - "outputId": "8268002a-04b2-4de3-8ae1-71023578f81c" + } }, "source": [ "#Evalute results\n", + "model.load_weights(checkpoint_dir)\n", "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", "print(\"Loss :\", loss)\n", "print(\"Accuracy :\", accuracy)" ], - "execution_count": 10, + "execution_count": 9, "outputs": [ { "output_type": "stream", "text": [ - "10000/10000 - 0s - loss: 93.3245 - accuracy: 0.7870\n", - "Loss : 93.32448555297852\n", - "Accuracy : 0.787\n" + "10000/10000 - 0s - loss: 79.5507 - accuracy: 0.8011\n", + "Loss : 79.5506900449872\n", + "Accuracy : 0.8011\n" ], "name": "stdout" } @@ -466,11 +440,11 @@ "metadata": { "id": "--NIjBp-mhVb", "colab_type": "code", + "outputId": "5db6d0e1-7baa-4406-ce20-a3f341e10e9f", "colab": { "base_uri": "https://localhost:8080/", "height": 471 - }, - "outputId": "d21bfc64-c1a6-4031-f8a4-0e633fefc1a6" + } }, "source": [ "#Build Model\n", @@ -479,7 +453,7 @@ "#Train the network\n", "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" ], - "execution_count": 11, + "execution_count": 10, "outputs": [ { "output_type": "stream", @@ -489,25 +463,25 @@ "WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/resource_variable_ops.py:1786: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "If using Keras pass *_constraint arguments to layers.\n", - "1857/1875 [============================>.] - ETA: 0s - loss: 0.7890 - accuracy: 0.7370\n", + "1870/1875 [============================>.] - ETA: 0s - loss: 0.7541 - accuracy: 0.7524\n", "Epoch 00001: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.7866 - accuracy: 0.7377\n", + "1875/1875 [==============================] - 5s 2ms/step - loss: 0.7535 - accuracy: 0.7525\n", "Epoch 2/5\n", - "1871/1875 [============================>.] - ETA: 0s - loss: 0.5071 - accuracy: 0.8219\n", + "1867/1875 [============================>.] - ETA: 0s - loss: 0.4944 - accuracy: 0.8290\n", "Epoch 00002: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.5071 - accuracy: 0.8219\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4944 - accuracy: 0.8291\n", "Epoch 3/5\n", - "1865/1875 [============================>.] - ETA: 0s - loss: 0.4584 - accuracy: 0.8380\n", + "1873/1875 [============================>.] - ETA: 0s - loss: 0.4485 - accuracy: 0.8429\n", "Epoch 00003: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4583 - accuracy: 0.8381\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4483 - accuracy: 0.8430\n", "Epoch 4/5\n", - "1867/1875 [============================>.] - ETA: 0s - loss: 0.4309 - accuracy: 0.8487\n", + "1869/1875 [============================>.] - ETA: 0s - loss: 0.4233 - accuracy: 0.8519\n", "Epoch 00004: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4304 - accuracy: 0.8489\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4233 - accuracy: 0.8519\n", "Epoch 5/5\n", - "1869/1875 [============================>.] - ETA: 0s - loss: 0.4111 - accuracy: 0.8539\n", + "1868/1875 [============================>.] - ETA: 0s - loss: 0.4020 - accuracy: 0.8590\n", "Epoch 00005: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4115 - accuracy: 0.8538\n" + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4020 - accuracy: 0.8591\n" ], "name": "stdout" }, @@ -515,13 +489,13 @@ "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": { "tags": [] }, - "execution_count": 11 + "execution_count": 10 } ] }, @@ -530,26 +504,27 @@ "metadata": { "id": "zRAym9EBmnW9", "colab_type": "code", + "outputId": "42713d82-c2ec-4f7a-8375-07560c7a5733", "colab": { "base_uri": "https://localhost:8080/", "height": 69 - }, - "outputId": "59eeb8cc-6bff-461a-b367-c20d0912532a" + } }, "source": [ "#Evalute results\n", + "model.load_weights(checkpoint_dir)\n", "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", "print(\"Loss :\", loss)\n", "print(\"Accuracy :\", accuracy)" ], - "execution_count": 12, + "execution_count": 11, "outputs": [ { "output_type": "stream", "text": [ - "10000/10000 - 0s - loss: 106.2339 - accuracy: 0.7722\n", - "Loss : 106.23389839782715\n", - "Accuracy : 0.7722\n" + "10000/10000 - 0s - loss: 86.9890 - accuracy: 0.7955\n", + "Loss : 86.98903251190185\n", + "Accuracy : 0.7955\n" ], "name": "stdout" } @@ -570,45 +545,46 @@ "metadata": { "id": "Ia7ALKefnXWQ", "colab_type": "code", + "outputId": "97fb462e-2c08-41d5-956d-a2ddb5140ee9", "colab": { "base_uri": "https://localhost:8080/", "height": 399 - }, - "outputId": "1a6e8c5b-534e-4c2c-8d3e-ea12ee122542" + } }, "source": [ "#Build Model\n", + "model.load_weights(checkpoint_dir)\n", "model = create_model(stocastic_avg_sgd)\n", "\n", "#Train the network\n", "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" ], - "execution_count": 13, + "execution_count": 12, "outputs": [ { "output_type": "stream", "text": [ "Train for 1875 steps\n", "Epoch 1/5\n", - "1872/1875 [============================>.] - ETA: 0s - loss: 0.7673 - accuracy: 0.7464\n", + "1867/1875 [============================>.] - ETA: 0s - loss: 0.7787 - accuracy: 0.7398\n", "Epoch 00001: saving model to ./training\n", - "1875/1875 [==============================] - 6s 3ms/step - loss: 0.7670 - accuracy: 0.7465\n", + "1875/1875 [==============================] - 6s 3ms/step - loss: 0.7776 - accuracy: 0.7401\n", "Epoch 2/5\n", - "1871/1875 [============================>.] - ETA: 0s - loss: 0.5034 - accuracy: 0.8248\n", + "1867/1875 [============================>.] - ETA: 0s - loss: 0.5027 - accuracy: 0.8231\n", "Epoch 00002: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.5034 - accuracy: 0.8247\n", + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.5027 - accuracy: 0.8232\n", "Epoch 3/5\n", - "1858/1875 [============================>.] - ETA: 0s - loss: 0.4557 - accuracy: 0.8407\n", + "1868/1875 [============================>.] - ETA: 0s - loss: 0.4602 - accuracy: 0.8376\n", "Epoch 00003: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4561 - accuracy: 0.8407\n", + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4598 - accuracy: 0.8378\n", "Epoch 4/5\n", - "1860/1875 [============================>.] - ETA: 0s - loss: 0.4266 - accuracy: 0.8509\n", + "1874/1875 [============================>.] - ETA: 0s - loss: 0.4335 - accuracy: 0.8478\n", "Epoch 00004: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4273 - accuracy: 0.8507\n", + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4334 - accuracy: 0.8478\n", "Epoch 5/5\n", - "1859/1875 [============================>.] - ETA: 0s - loss: 0.4073 - accuracy: 0.8570\n", + "1858/1875 [============================>.] - ETA: 0s - loss: 0.4157 - accuracy: 0.8542\n", "Epoch 00005: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4074 - accuracy: 0.8569\n" + "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4151 - accuracy: 0.8544\n" ], "name": "stdout" }, @@ -616,13 +592,13 @@ "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": { "tags": [] }, - "execution_count": 13 + "execution_count": 12 } ] }, @@ -631,11 +607,11 @@ "metadata": { "id": "EOT2E9NBoeHI", "colab_type": "code", + "outputId": "c328aeb4-df29-44af-cd54-b303b6fb9bba", "colab": { "base_uri": "https://localhost:8080/", "height": 69 - }, - "outputId": "4821a2b4-8b27-4312-9cd8-ff7961cb84f9" + } }, "source": [ "#Evalute results\n", @@ -643,14 +619,14 @@ "print(\"Loss :\", loss)\n", "print(\"Accuracy :\", accuracy)" ], - "execution_count": 14, + "execution_count": 13, "outputs": [ { "output_type": "stream", "text": [ - "10000/10000 - 0s - loss: 85.0133 - accuracy: 0.7989\n", - "Loss : 85.01332214202881\n", - "Accuracy : 0.7989\n" + "10000/10000 - 0s - loss: 85.3121 - accuracy: 0.7944\n", + "Loss : 85.31213686828613\n", + "Accuracy : 0.7944\n" ], "name": "stdout" } From 8397adb2afc7672f74111cb1c32fb0e730c4ff52 Mon Sep 17 00:00:00 2001 From: Abhineet Choudhary Date: Wed, 26 Feb 2020 02:51:09 +0530 Subject: [PATCH 5/6] added _gelu_py, test --- tensorflow_addons/activations/gelu.py | 11 +++++++++++ tensorflow_addons/activations/gelu_test.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tensorflow_addons/activations/gelu.py b/tensorflow_addons/activations/gelu.py index 4525516021..e82c58a03d 100644 --- a/tensorflow_addons/activations/gelu.py +++ b/tensorflow_addons/activations/gelu.py @@ -14,6 +14,7 @@ # ============================================================================== import tensorflow as tf +import math from tensorflow_addons.utils import types from tensorflow_addons.utils.resource_loader import LazySO @@ -49,3 +50,13 @@ def _gelu_grad(op, grad): return _activation_so.ops.addons_gelu_grad( grad, op.inputs[0], op.get_attr("approximate") ) + + +def _gelu_py(x: types.TensorLike, approximate: bool = True) -> tf.Tensor: + x = tf.convert_to_tensor(x) + if approximate: + pi = tf.cast(math.pi, x.dtype) + coeff = tf.cast(0.044715, x.dtype) + return 0.5 * x * (1.0 + tf.tanh(tf.sqrt(2.0 / pi) * (x + coeff * tf.pow(x, 3)))) + else: + return 0.5 * x * (1.0 + tf.math.erf(x / tf.cast(tf.sqrt(2.0), x.dtype))) diff --git a/tensorflow_addons/activations/gelu_test.py b/tensorflow_addons/activations/gelu_test.py index 53c8499eda..da44fb4315 100644 --- a/tensorflow_addons/activations/gelu_test.py +++ b/tensorflow_addons/activations/gelu_test.py @@ -18,6 +18,7 @@ import numpy as np import tensorflow as tf from tensorflow_addons.activations import gelu +from tensorflow_addons.activations.gelu import _gelu_py from tensorflow_addons.utils import test_utils @@ -51,6 +52,25 @@ def test_theoretical_gradients(self, dtype): ) self.assertAllCloseAccordingToType(theoretical, numerical, atol=1e-4) + @parameterized.named_parameters(("float32", np.float32), ("float64", np.float64)) + def test_same_as_py_func(self, dtype): + np.random.seed(100) + for _ in range(20): + self.verify_funcs_are_equivalent(dtype) + + def verify_funcs_are_equivalent(self, dtype): + x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype) + x = tf.convert_to_tensor(x_np) + for approximate in [True, False]: + with tf.GradientTape(persistent=True) as t: + t.watch(x) + y_native = gelu(x, approximate=approximate) + y_py = _gelu_py(x, approximate=approximate) + self.assertAllCloseAccordingToType(y_native, y_py, atol=1e-4) + grad_native = t.gradient(y_native, x) + grad_py = t.gradient(y_py, x) + self.assertAllCloseAccordingToType(grad_native, grad_py, atol=1e-4) + if __name__ == "__main__": tf.test.main() From 6c0a1c7eb91d0b14ca4cbb99016df02b3306b497 Mon Sep 17 00:00:00 2001 From: NoobMaster <39547012+abhichou4@users.noreply.github.com> Date: Wed, 26 Feb 2020 03:03:58 +0530 Subject: [PATCH 6/6] Delete average_optimizers_callback.ipynb --- .../average_optimizers_callback.ipynb | 636 ------------------ 1 file changed, 636 deletions(-) delete mode 100644 docs/tutorials/average_optimizers_callback.ipynb diff --git a/docs/tutorials/average_optimizers_callback.ipynb b/docs/tutorials/average_optimizers_callback.ipynb deleted file mode 100644 index 0942e99911..0000000000 --- a/docs/tutorials/average_optimizers_callback.ipynb +++ /dev/null @@ -1,636 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "average_optimizers_callback.ipynb", - "provenance": [], - "collapsed_sections": [ - "Tce3stUlHN0L" - ], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Tce3stUlHN0L" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "metadata": { - "cellView": "both", - "colab_type": "code", - "id": "tuOe1ymfHZPu", - "colab": {} - }, - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "qFdPvlXBOdUN" - }, - "source": [ - "# Title" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "MfBg1C5NB3X0" - }, - "source": [ - "\n", - " \n", - " \n", - "
\n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "xHxb-dlhMIzW" - }, - "source": [ - "## Overview\n", - "\n", - "This notebook demonstrates how to use Moving Average Optimizer along with the Model Average Checkpoint from tensorflow addons pagkage.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "o2UNySlpXkbl", - "colab_type": "text" - }, - "source": [ - "## Moving Averaging \n", - "\n", - "> The advantage of Moving Averaging is that they are less prone to rampant loss shifts or irregular data representation in the latest batch. It gives a smooothened and a more genral idea of the model training until some point.\n", - "\n", - "## Stocastic Averaging\n", - "\n", - "> Stocastic Weight Averaging converges to wider optimas. By doing so, it resembles geometric ensembeling. SWA is a simple method to improve model performance when used as a wrapper around other optimizers and averaging results from different points of trajectory of the inner optimizer.\n", - "\n", - "## Model Average Checkpoint \n", - "\n", - "> ```callbacks.ModelCheckpoint``` doesn't give you the option to save moving average weights in the middle of traning, which is why Model Average Optimizers required a custom callback. Using the ```update_weights``` parameter, ```ModelAverageCheckpoint``` allows you to:\n", - "1. Assign the moving average weights to the model, and save them.\n", - "2. Keep the old non-averaged weights, but the saved model uses the average weights." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "MUXex9ctTuDB" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "IqR2PQG4ZaZ0", - "outputId": "dd92618d-ad8d-4956-d497-387ac6be541d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "try:\n", - " %tensorflow_version 2.x\n", - "except:\n", - " pass\n", - "\n", - "import tensorflow as tf\n", - "import tensorflow_addons as tfa" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "text": [ - "TensorFlow 2.x selected.\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "4hnJ2rDpI38-", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import tensorflow_datasets as tfds\n", - "import numpy as np\n", - "from matplotlib import pyplot as plt\n", - "import os" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Iox_HZNNYLEB", - "colab_type": "text" - }, - "source": [ - "## Build Model " - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "KtylpxOmceaC", - "colab": {} - }, - "source": [ - "def create_model(opt):\n", - " model = tf.keras.models.Sequential([\n", - " tf.keras.layers.Flatten(), \n", - " tf.keras.layers.Dense(64, activation='relu', name='dense_1'),\n", - " tf.keras.layers.Dense(64, activation='relu', name='dense_2'),\n", - " tf.keras.layers.Dense(10, activation='softmax')\n", - " ])\n", - "\n", - " model.compile(optimizer=opt,\n", - " loss='sparse_categorical_crossentropy',\n", - " metrics=['accuracy'])\n", - "\n", - " return model" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "pwdM2pl3RSPb" - }, - "source": [ - "## Prepare Dataset" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "mMOeXVmbdilM", - "colab": {} - }, - "source": [ - "#Load Fashion MNIST dataset\n", - "train, test = tf.keras.datasets.fashion_mnist.load_data()\n", - "\n", - "images, labels = train\n", - "images = images/255.0\n", - "labels = labels.astype(np.int32)\n", - "\n", - "fmnist_train_ds = tf.data.Dataset.from_tensor_slices((images, labels))\n", - "fmnist_train_ds = fmnist_train_ds.shuffle(5000).batch(32)\n", - "\n", - "test_images, test_labels = test" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iEbhI_eajpJe", - "colab_type": "text" - }, - "source": [ - "We will be comparing three optimizers here:\n", - "\n", - "* Unwrapped SGD\n", - "* SGD with Moving Average\n", - "* SGD with Stochastic Weight Averaging\n", - "\n", - "And see how they perform with the same model." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "_Q76K1fNk7Va", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Optimizers \n", - "sgd = tf.keras.optimizers.SGD(0.01)\n", - "moving_avg_sgd = tfa.optimizers.MovingAverage(sgd)\n", - "stocastic_avg_sgd = tfa.optimizers.SWA(sgd)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nXlMX4p9qHwg", - "colab_type": "text" - }, - "source": [ - "Both ```MovingAverage``` and ```StocasticAverage``` optimers use ```ModelAverageCheckpoint```." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "SnvZjt34qEHY", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Callback \n", - "checkpoint_path = \"./training/cp-{epoch:04d}.ckpt\"\n", - "checkpoint_dir = os.path.dirname(checkpoint_path)\n", - "\n", - "cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_dir,\n", - " save_weights_only=True,\n", - " verbose=1)\n", - "avg_callback = tfa.callbacks.AverageModelCheckpoint(filepath=checkpoint_dir, \n", - " update_weights=True)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "uabQmjMtRtzs" - }, - "source": [ - "## Train Model\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SPmifETHmPix", - "colab_type": "text" - }, - "source": [ - "### Vanilla SGD Optimizer " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Xy8W4LYppadJ", - "colab_type": "code", - "outputId": "8341eba4-e68c-43f0-c514-d5af41226c4d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 399 - } - }, - "source": [ - "#Build Model\n", - "model = create_model(sgd)\n", - "\n", - "#Train the network\n", - "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Train for 1875 steps\n", - "Epoch 1/5\n", - "1868/1875 [============================>.] - ETA: 0s - loss: 0.7708 - accuracy: 0.7461\n", - "Epoch 00001: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.7700 - accuracy: 0.7463\n", - "Epoch 2/5\n", - "1850/1875 [============================>.] - ETA: 0s - loss: 0.5062 - accuracy: 0.8236\n", - "Epoch 00002: saving model to ./training\n", - "1875/1875 [==============================] - 3s 2ms/step - loss: 0.5058 - accuracy: 0.8238\n", - "Epoch 3/5\n", - "1852/1875 [============================>.] - ETA: 0s - loss: 0.4590 - accuracy: 0.8386\n", - "Epoch 00003: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4590 - accuracy: 0.8385\n", - "Epoch 4/5\n", - "1874/1875 [============================>.] - ETA: 0s - loss: 0.4305 - accuracy: 0.8501\n", - "Epoch 00004: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4306 - accuracy: 0.8500\n", - "Epoch 5/5\n", - "1866/1875 [============================>.] - ETA: 0s - loss: 0.4115 - accuracy: 0.8555\n", - "Epoch 00005: saving model to ./training\n", - "1875/1875 [==============================] - 3s 2ms/step - loss: 0.4114 - accuracy: 0.8556\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 8 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "uU2iQ6HAZ6-E", - "colab_type": "code", - "outputId": "07d0b8a1-666a-427b-ca54-54252061dc93", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 69 - } - }, - "source": [ - "#Evalute results\n", - "model.load_weights(checkpoint_dir)\n", - "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", - "print(\"Loss :\", loss)\n", - "print(\"Accuracy :\", accuracy)" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "10000/10000 - 0s - loss: 79.5507 - accuracy: 0.8011\n", - "Loss : 79.5506900449872\n", - "Accuracy : 0.8011\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lAvhD4unmc6W", - "colab_type": "text" - }, - "source": [ - "### Moving Average SGD" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "--NIjBp-mhVb", - "colab_type": "code", - "outputId": "5db6d0e1-7baa-4406-ce20-a3f341e10e9f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 471 - } - }, - "source": [ - "#Build Model\n", - "model = create_model(moving_avg_sgd)\n", - "\n", - "#Train the network\n", - "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" - ], - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Train for 1875 steps\n", - "Epoch 1/5\n", - "WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/resource_variable_ops.py:1786: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "If using Keras pass *_constraint arguments to layers.\n", - "1870/1875 [============================>.] - ETA: 0s - loss: 0.7541 - accuracy: 0.7524\n", - "Epoch 00001: saving model to ./training\n", - "1875/1875 [==============================] - 5s 2ms/step - loss: 0.7535 - accuracy: 0.7525\n", - "Epoch 2/5\n", - "1867/1875 [============================>.] - ETA: 0s - loss: 0.4944 - accuracy: 0.8290\n", - "Epoch 00002: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4944 - accuracy: 0.8291\n", - "Epoch 3/5\n", - "1873/1875 [============================>.] - ETA: 0s - loss: 0.4485 - accuracy: 0.8429\n", - "Epoch 00003: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4483 - accuracy: 0.8430\n", - "Epoch 4/5\n", - "1869/1875 [============================>.] - ETA: 0s - loss: 0.4233 - accuracy: 0.8519\n", - "Epoch 00004: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4233 - accuracy: 0.8519\n", - "Epoch 5/5\n", - "1868/1875 [============================>.] - ETA: 0s - loss: 0.4020 - accuracy: 0.8590\n", - "Epoch 00005: saving model to ./training\n", - "1875/1875 [==============================] - 4s 2ms/step - loss: 0.4020 - accuracy: 0.8591\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 10 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "zRAym9EBmnW9", - "colab_type": "code", - "outputId": "42713d82-c2ec-4f7a-8375-07560c7a5733", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 69 - } - }, - "source": [ - "#Evalute results\n", - "model.load_weights(checkpoint_dir)\n", - "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", - "print(\"Loss :\", loss)\n", - "print(\"Accuracy :\", accuracy)" - ], - "execution_count": 11, - "outputs": [ - { - "output_type": "stream", - "text": [ - "10000/10000 - 0s - loss: 86.9890 - accuracy: 0.7955\n", - "Loss : 86.98903251190185\n", - "Accuracy : 0.7955\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K98lbU07m_Bk", - "colab_type": "text" - }, - "source": [ - "### Stocastic Weight Average SGD " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Ia7ALKefnXWQ", - "colab_type": "code", - "outputId": "97fb462e-2c08-41d5-956d-a2ddb5140ee9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 399 - } - }, - "source": [ - "#Build Model\n", - "model.load_weights(checkpoint_dir)\n", - "model = create_model(stocastic_avg_sgd)\n", - "\n", - "#Train the network\n", - "model.fit(fmnist_train_ds, epochs=5, callbacks=[cp_callback])" - ], - "execution_count": 12, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Train for 1875 steps\n", - "Epoch 1/5\n", - "1867/1875 [============================>.] - ETA: 0s - loss: 0.7787 - accuracy: 0.7398\n", - "Epoch 00001: saving model to ./training\n", - "1875/1875 [==============================] - 6s 3ms/step - loss: 0.7776 - accuracy: 0.7401\n", - "Epoch 2/5\n", - "1867/1875 [============================>.] - ETA: 0s - loss: 0.5027 - accuracy: 0.8231\n", - "Epoch 00002: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.5027 - accuracy: 0.8232\n", - "Epoch 3/5\n", - "1868/1875 [============================>.] - ETA: 0s - loss: 0.4602 - accuracy: 0.8376\n", - "Epoch 00003: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4598 - accuracy: 0.8378\n", - "Epoch 4/5\n", - "1874/1875 [============================>.] - ETA: 0s - loss: 0.4335 - accuracy: 0.8478\n", - "Epoch 00004: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4334 - accuracy: 0.8478\n", - "Epoch 5/5\n", - "1858/1875 [============================>.] - ETA: 0s - loss: 0.4157 - accuracy: 0.8542\n", - "Epoch 00005: saving model to ./training\n", - "1875/1875 [==============================] - 5s 3ms/step - loss: 0.4151 - accuracy: 0.8544\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 12 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "EOT2E9NBoeHI", - "colab_type": "code", - "outputId": "c328aeb4-df29-44af-cd54-b303b6fb9bba", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 69 - } - }, - "source": [ - "#Evalute results\n", - "loss, accuracy = model.evaluate(test_images, test_labels, batch_size=32, verbose=2)\n", - "print(\"Loss :\", loss)\n", - "print(\"Accuracy :\", accuracy)" - ], - "execution_count": 13, - "outputs": [ - { - "output_type": "stream", - "text": [ - "10000/10000 - 0s - loss: 85.3121 - accuracy: 0.7944\n", - "Loss : 85.31213686828613\n", - "Accuracy : 0.7944\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file