From 896fa5481a8932bcc9c1a1a791a808dd9c6b671a Mon Sep 17 00:00:00 2001 From: Akshita Bhagia Date: Wed, 29 Jan 2025 13:05:13 -0800 Subject: [PATCH] reduce mbz further --- src/scripts/train/OLMo2-ladder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/train/OLMo2-ladder.py b/src/scripts/train/OLMo2-ladder.py index a8668ed08..747af46d0 100644 --- a/src/scripts/train/OLMo2-ladder.py +++ b/src/scripts/train/OLMo2-ladder.py @@ -20,7 +20,7 @@ class BaselineModelLadder(ModelLadder): MBZ_SIZES: ClassVar[Dict[ModelSize, int]] = { # TODO: may need to tune these # =============================== - ModelSize.size_190M: 8 * 4096, + ModelSize.size_190M: 4 * 4096, ModelSize.size_370M: 16 * 4096, ModelSize.size_600M: 16 * 4096, ModelSize.size_760M: 16 * 4096,