config.yml

Job:
    run_mode: "Training"  
    #{Training, Predict, Repeat, CV, Hyperparameter, Ensemble, Analysis}
    Training:
        job_name: "my_train_job"
        reprocess: "False"    
        model: DOS_STO   
        load_model: "False"
        save_model: "True"
        model_path: "my_model.pth"
        write_output: "True"
        parallel: "True"
        #seed=0 means random initalization
        seed: 0        
    Predict:
        job_name: "my_predict_job"
        reprocess: "False"    
        model_path: "my_model.pth"
        write_output: "True"
        seed: 0     
    Repeat:
        job_name: "my_repeat_job"
        reprocess: "False"    
        model: DOS_STO   
        model_path: "my_model.pth"
        write_output: "False"
        parallel: "True"
        seed: 0      
        ###specific options
        #number of repeat trials
        repeat_trials: 5
    CV:
        job_name: "my_CV_job"
        reprocess: "False"    
        model: DOS_STO   
        write_output: "True"
        parallel: "True"
        seed: 0     
        ###specific options
        #number of folds for n-fold CV
        cv_folds: 5    
    Hyperparameter:
        job_name: "my_hyperparameter_job"
        reprocess: "False"    
        model: DOS_STO   
        seed: 0  
        ###specific options   
        hyper_trials: 200
        #number of concurrent trials (can be greater than number of GPUs)
        hyper_concurrency: 8
        #frequency of checkpointing and update (default: 1)
        hyper_iter: 25
        #resume a previous hyperparameter optimization run
        hyper_resume: "True"
        #Verbosity of ray tune output; available: (1, 2, 3)
        hyper_verbosity: 1
        #Delete processed datasets
        hyper_delete_processed: "True"
    Ensemble:
        job_name: "my_ensemble_job"
        reprocess: "False"     
        save_model: "False"
        model_path: "my_model.pth"
        write_output: "Partial"
        parallel: "True"
        seed: 0   
        ###specific options  
        #List of models to use: (Example: "CGCNN_demo,MPNN_demo,SchNet_demo,MEGNet_demo" or "CGCNN_demo,CGCNN_demo,CGCNN_demo,CGCNN_demo")
        ensemble_list: "DOS_STO,DOS_STO,DOS_STO,DOS_STO,DOS_STO"  
    
Processing:
    #Whether to use "inmemory" or "large" format for pytorch-geometric dataset. Reccomend inmemory unless the dataset is too large
    dataset_type: "inmemory"  
    #Path to data files
    data_path: "/data" 
    #Path to target file within data_path
    target_path: "targets.csv"
    #Method of obtaining atom idctionary: available:(provided, default, blank, generated)
    dictionary_source: "default"   
    #Path to atom dictionary file within data_path
    dictionary_path: "atom_dict.json"     
    #Format of data files (limit to those supported by ASE)
    data_format: "vasp"
    #Print out processing info 
    verbose: "True"
    #graph specific settings 
    graph_max_radius : 8
    graph_max_neighbors : 12
    edge_features: "True"
    graph_edge_length : 50 
    #LMBTR specific settings
    LMBTR_descriptor: "True"
    LMBTR_rcut : 8
    LMBTR_grid : 50
    LMBTR_sigma : 0.1
    #SOAP specific settings
    SOAP_descriptor: "True"
    SOAP_rcut : 8
    SOAP_nmax : 6
    SOAP_lmax : 4
    SOAP_sigma : 0.4

Training:     
    #Index of target column in targets.csv
    target_index: -1
    #Loss functions (from pytorch) examples: l1_loss, mse_loss, binary_cross_entropy
    loss: "l1_loss"   
    features_loss: "True"    
    #Ratios for train/val/test split out of a total of 1  
    train_ratio: 0.8
    val_ratio: 0.05
    test_ratio: 0.15
    #Training print out frequency (print per n number of epochs)
    verbosity: 5
     
Models:        
    DOS_bulk:
        model: DOSpredict
        dim1: 370
        dim2: 370
        pre_fc_count: 1
        gc_count: 9
        batch_norm: "True"
        batch_track_stats: "False"
        dropout_rate: 0.1
        epochs: 800
        lr: 0.00034
        batch_size: 140
        optimizer: "AdamW"
        optimizer_args: {"weight_decay":0.1}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":40, "min_lr":0.00001, "threshold":0.0002} 
    DOS_surf:
        model: DOSpredict
        dim1: 350
        dim2: 270
        pre_fc_count: 1
        gc_count: 6
        batch_norm: "True"
        batch_track_stats: "False"
        dropout_rate: 0.1
        epochs: 800
        lr: 0.00057
        batch_size: 50
        optimizer: "AdamW"
        optimizer_args: {"weight_decay":0.01}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":40, "min_lr":0.00001, "threshold":0.0002} 
    DOS_STO:
        model: DOSpredict
        dim1: 370
        dim2: 370
        pre_fc_count: 1
        gc_count: 9
        batch_norm: "True"
        batch_track_stats: "False"
        dropout_rate: 0.05
        epochs: 2000
        lr: 0.00047
        batch_size: 180
        optimizer: "AdamW"
        optimizer_args: {"weight_decay":0.1}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":40, "min_lr":0.00001, "threshold":0.0002}  
    DOS_STO_SOAP:
        model: SOAP_DOS
        dim1: 340
        fc_count: 4
        epochs: 800
        lr: 0.001024
        batch_size: 70
        optimizer: "AdamW"
        optimizer_args: {}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}   
    DOS_surf_SOAP:
        model: SOAP_DOS
        dim1: 300
        fc_count: 4
        epochs: 800
        lr: 0.00078
        batch_size: 90
        optimizer: "AdamW"
        optimizer_args: {}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}            
    DOS_bulk_SOAP:
        model: SOAP_DOS
        dim1: 390
        fc_count: 8
        epochs: 800
        lr: 0.000157
        batch_size: 70
        optimizer: "AdamW"
        optimizer_args: {}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}     
    DOS_STO_LMBTR:
        model: LMBTR_DOS
        dim1: 390
        fc_count: 9
        epochs: 800
        lr: 0.000484
        batch_size: 50
        optimizer: "AdamW"
        optimizer_args: {}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}   
    DOS_surf_LMBTR:
        model: LMBTR_DOS
        dim1: 360
        fc_count: 5
        epochs: 800
        lr: 0.00175
        batch_size: 70
        optimizer: "AdamW"
        optimizer_args: {}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}               
    DOS_dummy:
        model: Dummy
        epochs: 1
        lr: 0.001
        batch_size: 64
        optimizer: "AdamW"
        optimizer_args: {}
        scheduler: "ReduceLROnPlateau"
        scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}