forked from bghira/SimpleTuner
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsd21-env.sh.example
146 lines (120 loc) · 7.53 KB
/
sd21-env.sh.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Configure these values.
# This is the model type. "lora" is the default, and "full" is to impact the full u-net and possibly text encoder.
export MODEL_TYPE="lora"
# ControlNet model training is only supported when MODEL_TYPE='full'
# DeepFloyd does not currently support ControlNet model training.
# See this document for more information: https://github.com/bghira/SimpleTuner/blob/main/documentation/CONTROLNET.md
export CONTROLNET=false
# DoRA enhances the training style of LoRA, but it will run more slowly at the same rank.
# See: https://arxiv.org/abs/2402.09353
# See: https://github.com/huggingface/peft/pull/1474
export USE_DORA=false
# BitFit freeze strategy for the u-net causes everything but the biases to be frozen.
# This may help retain the full model's underlying capabilities. LoRA is currently not tested/known to work.
if [[ "$MODEL_TYPE" == "full" ]]; then
# When training a full model, we will rely on BitFit to keep the u-net intact.
export USE_BITFIT=true
elif [[ "$MODEL_TYPE" == "lora" ]]; then
export USE_BITFIT=false
fi
# Valid choices: DEBUG, INFO, WARNING, ERROR
export SIMPLETUNER_LOG_LEVEL=INFO
# Reproducible training.
export TRAINING_SEED=42
# Restart where we left off. Change this to "checkpoint-1234" to start from a specific checkpoint.
export RESUME_CHECKPOINT="latest"
# How often to checkpoint. Depending on your learning rate, you may wish to change this.
# For the default settings with 10 gradient accumulations, more frequent checkpoints might be preferable at first.
export CHECKPOINTING_STEPS=150
# How many checkpoints to keep.
export CHECKPOINTING_LIMIT=3
# polynomial LR scheduler will "peak" here after WARMUP_STEPS.
export LEARNING_RATE=1e-6 #@param {type:"number"}
# polynomial LR scheduler will decay to this LR after hitting LEARNING_RATE.
export LEARNING_RATE_END=4e-7 #@param {type:"number"}
# Sine is the recommended LR scheduler, but polynomial and constant also work.
export LR_SCHEDULE="sine"
# Whether this is used, depends on whether you have epochs or num_steps in use.
export LR_WARMUP_STEPS=$((MAX_NUM_STEPS / 10))
#export LR_WARMUP_STEPS=0
# Caption dropout can help generalise style or overall improvements across the entire model.
# If you are setting out to train SD 2.1 on eg. photographs, to improve its realism, you want this at about 10%.
# If it is disabled, the training improvements/changes will be more limited to the captions shown.
# Default: 0.1, Use 0 to disable. Highest recommended value: .2
export CAPTION_DROPOUT_PROBABILITY=0.1
# How the trainer should locate your captions.
# "filename" will use the image filename, replacing underscores to spaces, and a couple other clean-ups.
# "textfile" will use contents of a .txt file next to the image with the same filename.
export CAPTION_STRATEGY="filename"
# Configure these values.
# Using a Huggingface Hub model:
export MODEL_NAME="stabilityai/stable-diffusion-2-1"
# Using a local path to a huggingface hub model or saved checkpoint:
#export MODEL_NAME="/notebooks/datasets/models/pipeline"
# Use this to append an instance prompt to each caption, used for adding trigger words.
#export INSTANCE_PROMPT="lotr style "
# Name the project on WandB.
export TRACKER_PROJECT_NAME="simpletuner-sd2x"
export TRACKER_RUN_NAME
TRACKER_RUN_NAME="$(date +%s)"
# Location of training data.
export BASE_DIR="/notebooks/datasets"
export OUTPUT_DIR="${BASE_DIR}/models"
# Set this to "true" to push your model to Hugging Face Hub.
export PUSH_TO_HUB="false"
# If PUSH_TO_HUB and PUSH_CHECKPOINTS are both enabled, every saved checkpoint will be pushed to Hugging Face Hub.
export PUSH_CHECKPOINTS="true"
# This will be the model name for your final hub upload, eg. "yourusername/yourmodelname"
# It defaults to the wandb project name, but you can override this here.
export DATALOADER_CONFIG="multidatabackend_sd2x.json"
# Max number of steps OR epochs can be used. Not both.
export MAX_NUM_STEPS=30000
# Will likely overtrain, but that's fine.
export NUM_EPOCHS=0
# Adjust this for your GPU memory size.
export TRAIN_BATCH_SIZE=1
# "area" resolution type means we use megapixel values instead of pixel edge length.
# "pixel" is using pixel edge length on the smaller or square side of the image.
export RESOLUTION_TYPE="area"
export RESOLUTION=1.0 # 1.0 Megapixel training sizes
export MINIMUM_RESOLUTION=$RESOLUTION
# How many decimals to round aspect buckets to.
#export ASPECT_BUCKET_ROUNDING=2
# Validation is when the model is used during training to make test outputs.
export VALIDATION_RESOLUTION=$RESOLUTION # The square resolution of the validation images. Default: $RESOLUTION
export VALIDATION_STEPS=250 # How long between each validation run. Default: 250
export VALIDATION_NUM_INFERENCE_STEPS=25 # How many inference steps to do. Default: 25
export VALIDATION_PROMPT="an ethnographic photograph of a teddy bear at a picnic" # What to make for the first/only test image.
export VALIDATION_NEGATIVE_PROMPT="blurry, ugly, cropped, amputated" # What to avoid in the first/only test image.
# These can be left alone.
export VALIDATION_GUIDANCE=7.5
export VALIDATION_GUIDANCE_RESCALE=0.7
# Training schedule options
export VAE_BATCH_SIZE=4 # How many images to batch encode during VAE caching. Reducing this can resolve OOM condition if it occurs then.
export GRADIENT_ACCUMULATION_STEPS=1 # Accumulate over many steps. Default: 1
export TEXT_ENCODER_LIMIT=101 # Train the text encoder for % of the process. Buggy.
export TEXT_ENCODER_FREEZE_STRATEGY='before' # before, after, between.
export TEXT_ENCODER_FREEZE_BEFORE=22 # Ignored when using 'after' strategy.
export TEXT_ENCODER_FREEZE_AFTER=24 # Ignored when using 'before' strategy.
export MIXED_PRECISION="bf16" # Might not be supported on all GPUs. fp32 will be needed for others.
export PURE_BF16=true # Will not use mixed precision, but rather pure bf16 (bf16 requires pytorch 2.3 on MPS.)
export TRAINING_DYNAMO_BACKEND='no' # or 'inductor' if you want to brave PyTorch 2 compile issues
export ALLOW_TF32="true" # Some older GPUs don't support tfloat32, set to "false" in this case.
# Choices: adamw, adamw8bit, adafactor, dadaptation
export OPTIMIZER="adamw8bit"
# This has to be changed if you're training with multiple GPUs.
export TRAINING_NUM_PROCESSES=1
export TRAINING_NUM_MACHINES=1
# EMA is a strong regularisation method that uses a lot of extra VRAM to hold two copies of the weights.
# This is worthwhile on large training runs, but not so much for smaller training runs.
export USE_EMA=false
export EMA_DECAY=0.999
export TRAINER_EXTRA_ARGS=""
## You may benefit from directing training toward a specific weighted subset of timesteps.
# In this example, we train the final 25% of the timestep schedule with a 3x bias.
#export TRAINER_EXTRA_ARGS="${TRAINER_EXTRA_ARGS} --timestep_bias_strategy=later --timestep_bias_portion=0.25 --timestep_bias_multiplier=3"
# In this example, we train the earliest 25% of the timestep schedule with a 5x bias.
#export TRAINER_EXTRA_ARGS="${TRAINER_EXTRA_ARGS} --timestep_bias_strategy=earlier --timestep_bias_portion=0.25 --timestep_bias_multiplier=5"
# Here, we designate that specifically, timesteps 200 to 500 should be prioritised.
#export TRAINER_EXTRA_ARGS="${TRAINER_EXTRA_ARGS} --timestep_bias_strategy=range --timestep_bias_begin=200 --timestep_bias_end=500 --timestep_bias_multiplier=3"
export TOKENIZERS_PARALLELISM=false