sd21-env.sh.example

# Configure these values.

# This is the model type. "lora" is the default, and "full" is to impact the full u-net and possibly text encoder.
export MODEL_TYPE="lora"

# ControlNet model training is only supported when MODEL_TYPE='full'
# DeepFloyd does not currently support ControlNet model training.
# See this document for more information: https://github.com/bghira/SimpleTuner/blob/main/documentation/CONTROLNET.md
export CONTROLNET=false

# DoRA enhances the training style of LoRA, but it will run more slowly at the same rank.
# See: https://arxiv.org/abs/2402.09353
# See: https://github.com/huggingface/peft/pull/1474
export USE_DORA=false

# BitFit freeze strategy for the u-net causes everything but the biases to be frozen.
# This may help retain the full model's underlying capabilities. LoRA is currently not tested/known to work.
if [[ "$MODEL_TYPE" == "full" ]]; then
    # When training a full model, we will rely on BitFit to keep the u-net intact.
    export USE_BITFIT=true
elif [[ "$MODEL_TYPE" == "lora" ]]; then
    export USE_BITFIT=false
fi

# Valid choices: DEBUG, INFO, WARNING, ERROR
export SIMPLETUNER_LOG_LEVEL=INFO

# Reproducible training.
export TRAINING_SEED=42

# Restart where we left off. Change this to "checkpoint-1234" to start from a specific checkpoint.
export RESUME_CHECKPOINT="latest"

# How often to checkpoint. Depending on your learning rate, you may wish to change this.
# For the default settings with 10 gradient accumulations, more frequent checkpoints might be preferable at first.
export CHECKPOINTING_STEPS=150
# How many checkpoints to keep.
export CHECKPOINTING_LIMIT=3

# polynomial LR scheduler will "peak" here after WARMUP_STEPS.
export LEARNING_RATE=1e-6 #@param {type:"number"}
# polynomial LR scheduler will decay to this LR after hitting LEARNING_RATE.
export LEARNING_RATE_END=4e-7 #@param {type:"number"}
# Sine is the recommended LR scheduler, but polynomial and constant also work.
export LR_SCHEDULE="sine"
# Whether this is used, depends on whether you have epochs or num_steps in use.
export LR_WARMUP_STEPS=$((MAX_NUM_STEPS / 10))
#export LR_WARMUP_STEPS=0

# Caption dropout can help generalise style or overall improvements across the entire model.
# If you are setting out to train SD 2.1 on eg. photographs, to improve its realism, you want this at about 10%.
# If it is disabled, the training improvements/changes will be more limited to the captions shown.
# Default: 0.1, Use 0 to disable. Highest recommended value: .2
export CAPTION_DROPOUT_PROBABILITY=0.1
# How the trainer should locate your captions.
# "filename" will use the image filename, replacing underscores to spaces, and a couple other clean-ups.
# "textfile" will use contents of a .txt file next to the image with the same filename.
export CAPTION_STRATEGY="filename"

# Configure these values.
# Using a Huggingface Hub model:
export MODEL_NAME="stabilityai/stable-diffusion-2-1"
# Using a local path to a huggingface hub model or saved checkpoint:
#export MODEL_NAME="/notebooks/datasets/models/pipeline"

# Use this to append an instance prompt to each caption, used for adding trigger words.
#export INSTANCE_PROMPT="lotr style "

# Name the project on WandB.
export TRACKER_PROJECT_NAME="simpletuner-sd2x"
export TRACKER_RUN_NAME
TRACKER_RUN_NAME="$(date +%s)"
# Location of training data.
export BASE_DIR="/notebooks/datasets"
export OUTPUT_DIR="${BASE_DIR}/models"
# Set this to "true" to push your model to Hugging Face Hub.
export PUSH_TO_HUB="false"
# If PUSH_TO_HUB and PUSH_CHECKPOINTS are both enabled, every saved checkpoint will be pushed to Hugging Face Hub.
export PUSH_CHECKPOINTS="true"
# This will be the model name for your final hub upload, eg. "yourusername/yourmodelname"
# It defaults to the wandb project name, but you can override this here.
export DATALOADER_CONFIG="multidatabackend_sd2x.json"

# Max number of steps OR epochs can be used. Not both.
export MAX_NUM_STEPS=30000
# Will likely overtrain, but that's fine.
export NUM_EPOCHS=0

# Adjust this for your GPU memory size.
export TRAIN_BATCH_SIZE=1

# "area" resolution type means we use megapixel values instead of pixel edge length.
# "pixel" is using pixel edge length on the smaller or square side of the image.
export RESOLUTION_TYPE="area"
export RESOLUTION=1.0          # 1.0 Megapixel training sizes
export MINIMUM_RESOLUTION=$RESOLUTION

# How many decimals to round aspect buckets to.
#export ASPECT_BUCKET_ROUNDING=2

# Validation is when the model is used during training to make test outputs.
export VALIDATION_RESOLUTION=$RESOLUTION                                            # The square resolution of the validation images. Default: $RESOLUTION
export VALIDATION_STEPS=250                                                         # How long between each validation run. Default: 250
export VALIDATION_NUM_INFERENCE_STEPS=25                                            # How many inference steps to do. Default: 25
export VALIDATION_PROMPT="an ethnographic photograph of a teddy bear at a picnic"   # What to make for the first/only test image.
export VALIDATION_NEGATIVE_PROMPT="blurry, ugly, cropped, amputated"                # What to avoid in the first/only test image.

# These can be left alone.
export VALIDATION_GUIDANCE=7.5
export VALIDATION_GUIDANCE_RESCALE=0.7


# Training schedule options
export VAE_BATCH_SIZE=4                      # How many images to batch encode during VAE caching. Reducing this can resolve OOM condition if it occurs then.
export GRADIENT_ACCUMULATION_STEPS=1         # Accumulate over many steps. Default: 1
export TEXT_ENCODER_LIMIT=101                # Train the text encoder for % of the process. Buggy.
export TEXT_ENCODER_FREEZE_STRATEGY='before' # before, after, between.
export TEXT_ENCODER_FREEZE_BEFORE=22         # Ignored when using 'after' strategy.
export TEXT_ENCODER_FREEZE_AFTER=24          # Ignored when using 'before' strategy.
export MIXED_PRECISION="bf16"                # Might not be supported on all GPUs. fp32 will be needed for others.
export PURE_BF16=true                        # Will not use mixed precision, but rather pure bf16 (bf16 requires pytorch 2.3 on MPS.)
export TRAINING_DYNAMO_BACKEND='no'          # or 'inductor' if you want to brave PyTorch 2 compile issues
export ALLOW_TF32="true"                     # Some older GPUs don't support tfloat32, set to "false" in this case.
# Choices: adamw, adamw8bit, adafactor, dadaptation
export OPTIMIZER="adamw8bit"

# This has to be changed if you're training with multiple GPUs.
export TRAINING_NUM_PROCESSES=1
export TRAINING_NUM_MACHINES=1

# EMA is a strong regularisation method that uses a lot of extra VRAM to hold two copies of the weights.
# This is worthwhile on large training runs, but not so much for smaller training runs.
export USE_EMA=false
export EMA_DECAY=0.999

export TRAINER_EXTRA_ARGS=""

## You may benefit from directing training toward a specific weighted subset of timesteps.
# In this example, we train the final 25% of the timestep schedule with a 3x bias.
#export TRAINER_EXTRA_ARGS="${TRAINER_EXTRA_ARGS} --timestep_bias_strategy=later --timestep_bias_portion=0.25 --timestep_bias_multiplier=3"
# In this example, we train the earliest 25% of the timestep schedule with a 5x bias.
#export TRAINER_EXTRA_ARGS="${TRAINER_EXTRA_ARGS} --timestep_bias_strategy=earlier --timestep_bias_portion=0.25 --timestep_bias_multiplier=5"
# Here, we designate that specifically, timesteps 200 to 500 should be prioritised.
#export TRAINER_EXTRA_ARGS="${TRAINER_EXTRA_ARGS} --timestep_bias_strategy=range --timestep_bias_begin=200 --timestep_bias_end=500 --timestep_bias_multiplier=3"

export TOKENIZERS_PARALLELISM=false