diff --git a/MC/README.md b/MC/README.md index 835789227..2c1a512d7 100644 --- a/MC/README.md +++ b/MC/README.md @@ -37,7 +37,7 @@ ### Exercises -- [Get familiar with the Blackjack environment (Blackjack-v0)](Blackjack%20Playground.ipynb) +- Get familiar with the [Blackjack environment (Blackjack-v0)](Blackjack%20Playground.ipynb) - Implement the Monte Carlo Prediction to estimate state-action values - [Exercise](MC%20Prediction.ipynb) - [Solution](MC%20Prediction%20Solution.ipynb) diff --git a/TD/Q-Learning Solution.ipynb b/TD/Q-Learning Solution.ipynb index 5794e20de..4c1c5be2c 100644 --- a/TD/Q-Learning Solution.ipynb +++ b/TD/Q-Learning Solution.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", @@ -31,9 +29,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "env = CliffWalkingEnv()" @@ -73,9 +69,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def q_learning(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n", @@ -86,7 +80,7 @@ " Args:\n", " env: OpenAI environment.\n", " num_episodes: Number of episodes to run for.\n", - " discount_factor: Lambda time discount factor.\n", + " discount_factor: Gamma discount factor.\n", " alpha: TD learning rate.\n", " epsilon: Chance the sample a random action. Float betwen 0 and 1.\n", " \n", @@ -147,9 +141,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -166,9 +158,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -231,9 +221,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1" + "version": "3.5.2" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/TD/Q-Learning.ipynb b/TD/Q-Learning.ipynb index 724d682ad..4e1396cf6 100644 --- a/TD/Q-Learning.ipynb +++ b/TD/Q-Learning.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", @@ -30,9 +28,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "env = CliffWalkingEnv()" @@ -72,9 +68,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def q_learning(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n", @@ -85,7 +79,7 @@ " Args:\n", " env: OpenAI environment.\n", " num_episodes: Number of episodes to run for.\n", - " discount_factor: Lambda time discount factor.\n", + " discount_factor: Gamma discount factor.\n", " alpha: TD learning rate.\n", " epsilon: Chance the sample a random action. Float betwen 0 and 1.\n", " \n", @@ -121,9 +115,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -140,9 +132,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -205,9 +195,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1" + "version": "3.5.2" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/TD/README.md b/TD/README.md index f0b26aa50..ac2488167 100644 --- a/TD/README.md +++ b/TD/README.md @@ -40,11 +40,11 @@ ### Exercises -- [Windy Gridworld Playground](Windy%20Gridworld%20Playground.ipynb) +- Get familiar with the [Windy Gridworld Playground](Windy%20Gridworld%20Playground.ipynb) - Implement SARSA - [Exercise](SARSA.ipynb) - [Solution](SARSA%20Solution.ipynb) -- [Cliff Environment Playground](Cliff%20Environment%20Playground.ipynb) +- Get familiar with the [Cliff Environment Playground](Cliff%20Environment%20Playground.ipynb) - Implement Q-Learning in Python - [Exercise](Q-Learning.ipynb) - [Solution](Q-Learning%20Solution.ipynb) diff --git a/TD/SARSA Solution.ipynb b/TD/SARSA Solution.ipynb index feab3db02..df647f193 100644 --- a/TD/SARSA Solution.ipynb +++ b/TD/SARSA Solution.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", @@ -39,9 +37,7 @@ { "cell_type": "code", "execution_count": 20, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "env = WindyGridworldEnv()" @@ -81,9 +77,7 @@ { "cell_type": "code", "execution_count": 22, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def sarsa(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n", @@ -93,7 +87,7 @@ " Args:\n", " env: OpenAI environment.\n", " num_episodes: Number of episodes to run for.\n", - " discount_factor: Lambda time discount factor.\n", + " discount_factor: Gamma discount factor.\n", " alpha: TD learning rate.\n", " epsilon: Chance the sample a random action. Float betwen 0 and 1.\n", " \n", @@ -156,9 +150,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -175,9 +167,7 @@ { "cell_type": "code", "execution_count": 24, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -217,9 +207,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] } @@ -240,9 +228,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1" + "version": "3.5.2" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/TD/SARSA.ipynb b/TD/SARSA.ipynb index 799915352..8a0344410 100644 --- a/TD/SARSA.ipynb +++ b/TD/SARSA.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", @@ -30,9 +28,7 @@ { "cell_type": "code", "execution_count": 12, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "env = WindyGridworldEnv()" @@ -72,9 +68,7 @@ { "cell_type": "code", "execution_count": 14, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "def sarsa(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n", @@ -84,7 +78,7 @@ " Args:\n", " env: OpenAI environment.\n", " num_episodes: Number of episodes to run for.\n", - " discount_factor: Lambda time discount factor.\n", + " discount_factor: Gamma discount factor.\n", " alpha: TD learning rate.\n", " epsilon: Chance the sample a random action. Float betwen 0 and 1.\n", " \n", @@ -121,9 +115,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -140,9 +132,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -182,9 +172,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] } @@ -205,9 +193,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1" + "version": "3.5.2" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 }