From 82a00b3c3a028f359b28db9007a8b6730338e10d Mon Sep 17 00:00:00 2001 From: "r.aristov" Date: Sat, 14 Jul 2018 00:36:23 +0300 Subject: [PATCH] Normalized loss in actor-critic and REINFORCE examples. AC converges more than 2x faster now, REINFORCE about 10% faster. --- reinforcement_learning/actor_critic.py | 1 + reinforcement_learning/reinforce.py | 1 + 2 files changed, 2 insertions(+) diff --git a/reinforcement_learning/actor_critic.py b/reinforcement_learning/actor_critic.py index 659faa0719..559b9ed147 100644 --- a/reinforcement_learning/actor_critic.py +++ b/reinforcement_learning/actor_critic.py @@ -79,6 +79,7 @@ def finish_episode(): value_losses.append(F.smooth_l1_loss(value, torch.tensor([r]))) optimizer.zero_grad() loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum() + loss /= rewards.numel() loss.backward() optimizer.step() del model.rewards[:] diff --git a/reinforcement_learning/reinforce.py b/reinforcement_learning/reinforce.py index 7982545902..cd168f8833 100644 --- a/reinforcement_learning/reinforce.py +++ b/reinforcement_learning/reinforce.py @@ -69,6 +69,7 @@ def finish_episode(): policy_loss.append(-log_prob * reward) optimizer.zero_grad() policy_loss = torch.cat(policy_loss).sum() + policy_loss /= rewards.numel() policy_loss.backward() optimizer.step() del policy.rewards[:]