Skip to content

Commit

Permalink
* now can choose to log at train_step or frame_step
Browse files Browse the repository at this point in the history
  • Loading branch information
jingweiz committed Aug 23, 2017
1 parent f62d04d commit dd51b10
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 71 deletions.
3 changes: 3 additions & 0 deletions core/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def __init__(self, args, env_prototype, model_prototype, memory_prototype=None):
self.memory_interval = args.memory_interval
self.train_interval = args.train_interval
elif args.agent_type == "a3c":
self.enable_log_at_train_step = args.enable_log_at_train_step

self.enable_lstm = args.enable_lstm
self.enable_continuous = args.enable_continuous
self.num_processes = args.num_processes
Expand All @@ -83,6 +85,7 @@ def __init__(self, args, env_prototype, model_prototype, memory_prototype=None):
elif args.agent_type == "acer":
self.enable_bias_correction = args.enable_bias_correction
self.enable_1st_order_trpo = args.enable_1st_order_trpo
self.enable_log_at_train_step = args.enable_log_at_train_step

self.enable_lstm = args.enable_lstm
self.enable_continuous = args.enable_continuous
Expand Down
66 changes: 36 additions & 30 deletions core/agents/a3c_single_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,18 +418,42 @@ def _eval_model(self):
v_loss_avg = self.master.v_loss_avg.value / loss_counter if loss_counter > 0 else 0.
loss_avg = self.master.loss_avg.value / loss_counter if loss_counter > 0 else 0.
self.master._reset_training_loggings()
self.p_loss_avg_log.append([eval_at_train_step, p_loss_avg])
self.v_loss_avg_log.append([eval_at_train_step, v_loss_avg])
self.loss_avg_log.append([eval_at_train_step, loss_avg])
self.entropy_avg_log.append([eval_at_train_step, np.mean(np.asarray(eval_entropy_log))])
self.v_avg_log.append([eval_at_train_step, np.mean(np.asarray(eval_v_log))])
self.steps_avg_log.append([eval_at_train_step, np.mean(np.asarray(eval_episode_steps_log))])
self.steps_std_log.append([eval_at_train_step, np.std(np.asarray(eval_episode_steps_log))]); del eval_episode_steps_log
self.reward_avg_log.append([eval_at_train_step, np.mean(np.asarray(eval_episode_reward_log))])
self.reward_std_log.append([eval_at_train_step, np.std(np.asarray(eval_episode_reward_log))]); del eval_episode_reward_log
self.nepisodes_log.append([eval_at_train_step, eval_nepisodes])
self.nepisodes_solved_log.append([eval_at_train_step, eval_nepisodes_solved])
self.repisodes_solved_log.append([eval_at_train_step, (eval_nepisodes_solved/eval_nepisodes) if eval_nepisodes > 0 else 0.])
def _log_at_step(eval_at_step):
self.p_loss_avg_log.append([eval_at_step, p_loss_avg])
self.v_loss_avg_log.append([eval_at_step, v_loss_avg])
self.loss_avg_log.append([eval_at_step, loss_avg])
self.entropy_avg_log.append([eval_at_step, np.mean(np.asarray(eval_entropy_log))])
self.v_avg_log.append([eval_at_step, np.mean(np.asarray(eval_v_log))])
self.steps_avg_log.append([eval_at_step, np.mean(np.asarray(eval_episode_steps_log))])
self.steps_std_log.append([eval_at_step, np.std(np.asarray(eval_episode_steps_log))])
self.reward_avg_log.append([eval_at_step, np.mean(np.asarray(eval_episode_reward_log))])
self.reward_std_log.append([eval_at_step, np.std(np.asarray(eval_episode_reward_log))])
self.nepisodes_log.append([eval_at_step, eval_nepisodes])
self.nepisodes_solved_log.append([eval_at_step, eval_nepisodes_solved])
self.repisodes_solved_log.append([eval_at_step, (eval_nepisodes_solved/eval_nepisodes) if eval_nepisodes > 0 else 0.])
# logging
self.master.logger.warning("Reporting @ Step: " + str(eval_at_step) + " | Elapsed Time: " + str(time.time() - self.start_time))
self.master.logger.warning("Iteration: {}; lr: {}".format(eval_at_step, self.master.lr_adjusted.value))
self.master.logger.warning("Iteration: {}; p_loss_avg: {}".format(eval_at_step, self.p_loss_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; v_loss_avg: {}".format(eval_at_step, self.v_loss_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; loss_avg: {}".format(eval_at_step, self.loss_avg_log[-1][1]))
self.master._reset_training_loggings()
self.master.logger.warning("Evaluating @ Step: " + str(eval_at_train_step) + " | (" + str(eval_at_frame_step) + " frames)...")
self.master.logger.warning("Evaluation Took: " + str(time.time() - self.last_eval))
self.master.logger.warning("Iteration: {}; entropy_avg: {}".format(eval_at_step, self.entropy_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; v_avg: {}".format(eval_at_step, self.v_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; steps_avg: {}".format(eval_at_step, self.steps_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; steps_std: {}".format(eval_at_step, self.steps_std_log[-1][1]))
self.master.logger.warning("Iteration: {}; reward_avg: {}".format(eval_at_step, self.reward_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; reward_std: {}".format(eval_at_step, self.reward_std_log[-1][1]))
self.master.logger.warning("Iteration: {}; nepisodes: {}".format(eval_at_step, self.nepisodes_log[-1][1]))
self.master.logger.warning("Iteration: {}; nepisodes_solved: {}".format(eval_at_step, self.nepisodes_solved_log[-1][1]))
self.master.logger.warning("Iteration: {}; repisodes_solved: {}".format(eval_at_step, self.repisodes_solved_log[-1][1]))
if self.master.enable_log_at_train_step:
_log_at_step(eval_at_train_step)
else:
_log_at_step(eval_at_frame_step)

# plotting
if self.master.visualize:
self.win_p_loss_avg = self.master.vis.scatter(X=np.array(self.p_loss_avg_log), env=self.master.refs, win=self.win_p_loss_avg, opts=dict(title="p_loss_avg"))
Expand All @@ -444,24 +468,6 @@ def _eval_model(self):
self.win_nepisodes = self.master.vis.scatter(X=np.array(self.nepisodes_log), env=self.master.refs, win=self.win_nepisodes, opts=dict(title="nepisodes"))
self.win_nepisodes_solved = self.master.vis.scatter(X=np.array(self.nepisodes_solved_log), env=self.master.refs, win=self.win_nepisodes_solved, opts=dict(title="nepisodes_solved"))
self.win_repisodes_solved = self.master.vis.scatter(X=np.array(self.repisodes_solved_log), env=self.master.refs, win=self.win_repisodes_solved, opts=dict(title="repisodes_solved"))
# logging
self.master.logger.warning("Reporting @ Step: " + str(eval_at_train_step) + " | Elapsed Time: " + str(time.time() - self.start_time))
self.master.logger.warning("Iteration: {}; lr: {}".format(eval_at_train_step, self.master.lr_adjusted.value))
self.master.logger.warning("Iteration: {}; p_loss_avg: {}".format(eval_at_train_step, self.p_loss_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; v_loss_avg: {}".format(eval_at_train_step, self.v_loss_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; loss_avg: {}".format(eval_at_train_step, self.loss_avg_log[-1][1]))
self.master._reset_training_loggings()
self.master.logger.warning("Evaluating @ Step: " + str(eval_at_train_step) + " | (" + str(eval_at_frame_step) + " frames)...")
self.master.logger.warning("Evaluation Took: " + str(time.time() - self.last_eval))
self.master.logger.warning("Iteration: {}; entropy_avg: {}".format(eval_at_train_step, self.entropy_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; v_avg: {}".format(eval_at_train_step, self.v_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; steps_avg: {}".format(eval_at_train_step, self.steps_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; steps_std: {}".format(eval_at_train_step, self.steps_std_log[-1][1]))
self.master.logger.warning("Iteration: {}; reward_avg: {}".format(eval_at_train_step, self.reward_avg_log[-1][1]))
self.master.logger.warning("Iteration: {}; reward_std: {}".format(eval_at_train_step, self.reward_std_log[-1][1]))
self.master.logger.warning("Iteration: {}; nepisodes: {}".format(eval_at_train_step, self.nepisodes_log[-1][1]))
self.master.logger.warning("Iteration: {}; nepisodes_solved: {}".format(eval_at_train_step, self.nepisodes_solved_log[-1][1]))
self.master.logger.warning("Iteration: {}; repisodes_solved: {}".format(eval_at_train_step, self.repisodes_solved_log[-1][1]))
self.last_eval = time.time()

# save model
Expand Down
16 changes: 8 additions & 8 deletions core/agents/acer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,17 @@ def __init__(self, args, env_prototype, model_prototype, memory_prototype):
self.on_policy_train_step = mp.Value('l', 0) # global on-policy train step counter
self.off_policy_train_step = mp.Value('l', 0) # global off-policy train step counter
# global training stats
self.p_loss_avg = mp.Value('d', 0.) # global policy loss
self.v_loss_avg = mp.Value('d', 0.) # global value loss
self.loss_avg = mp.Value('d', 0.) # global loss
self.loss_counter = mp.Value('l', 0) # storing this many losses
self.p_loss_avg = mp.Value('d', 0.) # global policy loss
self.v_loss_avg = mp.Value('d', 0.) # global value loss
self.entropy_loss_avg = mp.Value('d', 0.) # global value loss
self.loss_counter = mp.Value('l', 0) # storing this many losses
self._reset_training_loggings()

def _reset_training_loggings(self):
self.p_loss_avg.value = 0.
self.v_loss_avg.value = 0.
self.loss_avg.value = 0.
self.loss_counter.value = 0
self.p_loss_avg.value = 0.
self.v_loss_avg.value = 0.
self.entropy_loss_avg.value = 0.
self.loss_counter.value = 0

def fit_model(self):
self.jobs = []
Expand Down
Loading

0 comments on commit dd51b10

Please sign in to comment.