Skip to content

Latest commit

 

History

History
164 lines (139 loc) · 5.53 KB

3_plume_train.md

File metadata and controls

164 lines (139 loc) · 5.53 KB

Instructions/Scripts to train agents from scratch

Important Notes

  • All development was done on an Ubuntu Linux v20.04 workstation with Intel Core i9-9940X CPU and a TITAN RTX GPU.
  • Each seed takes about 16 hours to train and evaluate, with MLP and RNN models using 1 and 4 cores in parallel respectively.
  • Deep RL experiments, in general, are tough to reproduce. See:
  • I have done my best to control for randomness, but there are many sources of stochasticity to control for -- simulation seeds, training initializations, agent network initialization, compounding behavioral differences between agents, etc.
  • However, I have run this code multiple times, and the results are qualitatively the same; quantitatively, there is large variability between agents, but selecting top-5 out of 14 agents for each architecture reduces that variability and gives us a good representative sample of each class. (Experiment with increasing the number of agents trained from 14 to higher for potentially more control on the variability.)

RNN

NUMPROC=4 # Walle/Weekend
SHAPE="step oob"

BIRTHX="0.3 0.8" #  Original Config
DATASET="constantx5b5 noisy3x5b5"
STEPS="1000000 4000000"
VARX="2.0 0.5"
DMAX="0.8 0.8"
DMIN="0.7 0.4"

ALGO=ppo
HIDDEN=64
DECAY=0.0001

EXPT=ExptMemory$(date '+%Y%m%d')
SAVEDIR=./trained_models/${EXPT}/
mkdir -p $SAVEDIR

MAXJOBS=7
for SEED in $(seq 14); do  
  for RNNTYPE in VRNN; do
    while (( $(jobs -p | wc -l) >= MAXJOBS )); do sleep 10; done 
    SEED=$RANDOM

    DATASTRING=$(echo -e $DATASET | tr -d ' ')
    SHAPESTRING=$(echo -e $SHAPE | tr -d ' ')
    BXSTRING=$(echo -e $BIRTHX | tr -d ' ')
    TSTRING=$(echo -e $STEPS | tr -d ' ')
    QVARSTR=$(echo -e $VARX | tr -d ' ')
    DMAXSTR=$(echo -e $DMAX | tr -d ' ')
    DMINSTR=$(echo -e $DMIN | tr -d ' ')

    OUTSUFFIX=$(date '+%Y%m%d')_${RNNTYPE}_${DATASTRING}_${SHAPESTRING}_bx${BXSTRING}_t${TSTRING}_q${QVARSTR}_dmx${DMAXSTR}_dmn${DMINSTR}_h${HIDDEN}_wd${DECAY}_n${NUMPROC}_code${RNNTYPE}_seed${SEED}$(openssl rand -hex 1)
    echo $OUTSUFFIX

    nice python -u main.py --env-name plume \
      --recurrent-policy \
      --dataset $DATASET \
      --num-env-steps ${STEPS} \
      --birthx $BIRTHX  \
      --flipping True \
      --qvar $VARX \
      --save-dir $SAVEDIR \
      --log-interval 1 \
      --r_shaping $(echo -e $SHAPE) \
      --algo $ALGO \
      --seed ${SEED} \
      --squash_action True \
      --diff_max $DMAX \
      --diff_min $DMIN \
      --num-processes $NUMPROC \
      --num-mini-batch $NUMPROC \
      --odor_scaling True \
      --rnn_type ${RNNTYPE} \
      --hidden_size $HIDDEN \
      --weight_decay ${DECAY} \
      --use-gae --num-steps 2048 --lr 3e-4 --entropy-coef 0.005 --value-loss-coef 0.5 --ppo-epoch 10 --gamma 0.99 --gae-lambda 0.95 --use-linear-lr-decay \
      --outsuffix ${OUTSUFFIX} > ${SAVEDIR}/${OUTSUFFIX}.log 2>&1 &

      echo "Sleeping.."
      sleep 0.5

   done
  done  
 done
done

tail -f ${SAVEDIR}/*.log

MLPs Serial execution: -- Memory

NUMPROC=1 # Mycroft Weeknight
SHAPE="step oob"
BIRTHX="0.3 1.0"
DATASET="constantx5b5 noisy3x5b5"
VARX="1.0 0.5"
STEPS="100000 2000000"
DMAX="0.8 0.8"
DMIN="0.8 0.4"

SHAPE="step oob"
BIRTHX="1.0"
DATASET="noisy3x5b5"
VARX="0.5"
STEPS="2000000"
DMAX="0.8"
DMIN="0.3"

ALGO=ppo
HIDDEN=64
DECAY=0.0001

EXPT=ExptMemory$(date '+%Y%m%d')
SAVEDIR=./trained_models/${EXPT}/
mkdir -p $SAVEDIR

MAXJOBS=28
for SEED in $(seq 14); do
  for STACK in 02 04 06 08 10 12; do
    while (( $(jobs -p | wc -l) >= MAXJOBS )); do sleep 10; done 
    SEED=$RANDOM
    SHAPESTRING=$(echo -e $SHAPE | tr -d ' ')
    DATASTRING=$(echo -e $DATASET | tr -d ' ')
    BXSTRING=$(echo -e $BIRTHX | tr -d ' ')
    TSTRING=$(echo -e $STEPS | tr -d ' ')
    QVARSTR=$(echo -e $VARX | tr -d ' ')
    DMAXSTR=$(echo -e $DMAX | tr -d ' ')
    DMINSTR=$(echo -e $DMIN | tr -d ' ')

    OUTSUFFIX=$(date '+%Y%m%d')_MLP_s${STACK}_${DATASTRING}_${SHAPESTRING}_bx${BXSTRING}_t${TSTRING}_q${QVARSTR}_dmx${DMAXSTR}_dmn${DMINSTR}_wd${DECAY}_n${NUMPROC}_codeMLPs${STACK}_seed${SEED}$(openssl rand -hex 1)

    echo $OUTSUFFIX

    nice python -u main.py --env-name plume \
      --stacking ${STACK} \
      --dataset $DATASET \
      --num-env-steps ${STEPS} \
      --birthx $BIRTHX  \
      --flipping True \
      --qvar $VARX \
      --save-dir $SAVEDIR \
      --log-interval 1 \
      --r_shaping $(echo -e $SHAPE) \
      --algo $ALGO \
      --seed ${SEED} \
      --squash_action True \
      --diff_max $DMAX \
      --diff_min $DMIN \
      --num-processes $NUMPROC \
      --num-mini-batch $NUMPROC \
      --odor_scaling True \
      --hidden_size $HIDDEN \
      --weight_decay ${DECAY} \
      --use-gae --num-steps 2048 --lr 3e-4 --entropy-coef 0.005 --value-loss-coef 0.5 --ppo-epoch 10 --gamma 0.99 --gae-lambda 0.95 --use-linear-lr-decay \
      --outsuffix ${OUTSUFFIX} > ${SAVEDIR}/${OUTSUFFIX}.log 2>&1 &
  done
 done
done

tail -f ${SAVEDIR}/*.log