From 4e6697f68d33bd4f632384709353643ddf0a52a5 Mon Sep 17 00:00:00 2001 From: autoresearch Date: Sun, 8 Mar 2026 03:56:11 +0000 Subject: [PATCH] warmdown 0.5 to 0.7 (more cooldown) --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index b8018f8..37bdb2c 100644 --- a/train.py +++ b/train.py @@ -442,7 +442,7 @@ SCALAR_LR = 0.5 # learning rate for per-layer scalars (Adam) WEIGHT_DECAY = 0.2 # cautious weight decay for Muon ADAM_BETAS = (0.8, 0.95) # Adam beta1, beta2 WARMUP_RATIO = 0.0 # fraction of time budget for LR warmup -WARMDOWN_RATIO = 0.5 # fraction of time budget for LR warmdown +WARMDOWN_RATIO = 0.7 # fraction of time budget for LR warmdown FINAL_LR_FRAC = 0.0 # final LR as fraction of initial # Model size