ftr_envs/tasks/crossing/agents/ftr_algo_ppo_cfg.yaml

params:
  seed: ${.config.seed}

  algo:
    name: ppo

  load_checkpoint: ${if:${.config.checkpoint},True,False} # flag which sets whether to load the checkpoint
  load_path: ${.config.checkpoint} # path to the checkpoint to load

  config:
    seed: 40
    name: ftr_algo_ppo
    experiment: ""
    device: cuda:0
    max_iterations: ""
    checkpoint: ""
    test: False


  policy: # only works for MlpPolicy right now
    pi_hid_sizes: [ 512, 512, 512 ]
    vf_hid_sizes: [ 512, 512, 512 ]
    activation: relu # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid

  learn:
    agent_name: ${..config.name}
    full_experiment_name: logs/${resolve_default:${..config.name},${..config.experiment}}
    device: ${..config.device}
    device_name: ${..config.device}

    test: ${..config.test}
    resume: 0
    save_interval: 500 # check for potential saves every this many iterations
    print_log: True

    # rollout params
    max_iterations: ${..config.max_iterations}

    # training params
    cliprange: 0.2
    ent_coef: 0
    nsteps: 8
    noptepochs: 5
    nminibatches: 4 # this is per agent
    max_grad_norm: 1
    optim_stepsize: 3.e-4 # 3e-4 is default for single agent training with constant schedule
    schedule: adaptive # could be adaptive or linear or fixed
    desired_kl: 0.016
    gamma: 0.99
    lam: 0.95
    init_noise_std: 0.8

    log_interval: 1
    asymmetric: False