diff --git a/src/scripts/train/OLMo2-ladder.py b/src/scripts/train/OLMo2-ladder.py index 9b21a73c..a8668ed0 100644 --- a/src/scripts/train/OLMo2-ladder.py +++ b/src/scripts/train/OLMo2-ladder.py @@ -20,7 +20,7 @@ class BaselineModelLadder(ModelLadder): MBZ_SIZES: ClassVar[Dict[ModelSize, int]] = { # TODO: may need to tune these # =============================== - ModelSize.size_190M: 16 * 4096, + ModelSize.size_190M: 8 * 4096, ModelSize.size_370M: 16 * 4096, ModelSize.size_600M: 16 * 4096, ModelSize.size_760M: 16 * 4096,