From 772dada6cc7c3fd7dbc959d54917b57b379721c4 Mon Sep 17 00:00:00 2001 From: autoresearch Date: Sun, 8 Mar 2026 04:36:38 +0000 Subject: [PATCH] FINAL_LR_FRAC 0.0 to 0.05 (small LR floor) --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index e7ce028..e8792c6 100644 --- a/train.py +++ b/train.py @@ -443,7 +443,7 @@ WEIGHT_DECAY = 0.2 # cautious weight decay for Muon ADAM_BETAS = (0.8, 0.95) # Adam beta1, beta2 WARMUP_RATIO = 0.0 # fraction of time budget for LR warmup WARMDOWN_RATIO = 0.7 # fraction of time budget for LR warmdown -FINAL_LR_FRAC = 0.0 # final LR as fraction of initial +FINAL_LR_FRAC = 0.05 # final LR as fraction of initial # Model size DEPTH = 9 # number of transformer layers