warmdown 0.5 to 0.7 (more cooldown)
This commit is contained in:
@@ -442,7 +442,7 @@ SCALAR_LR = 0.5 # learning rate for per-layer scalars (Adam)
|
||||
WEIGHT_DECAY = 0.2 # cautious weight decay for Muon
|
||||
ADAM_BETAS = (0.8, 0.95) # Adam beta1, beta2
|
||||
WARMUP_RATIO = 0.0 # fraction of time budget for LR warmup
|
||||
WARMDOWN_RATIO = 0.5 # fraction of time budget for LR warmdown
|
||||
WARMDOWN_RATIO = 0.7 # fraction of time budget for LR warmdown
|
||||
FINAL_LR_FRAC = 0.0 # final LR as fraction of initial
|
||||
|
||||
# Model size
|
||||
|
||||
Reference in New Issue
Block a user