Skip to content

Commit

Permalink
[Optim] Add an option controlling the optimizer eps
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhangcs committed Mar 3, 2025
1 parent b291ad6 commit 7a5c91f
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 2 deletions.
4 changes: 2 additions & 2 deletions torchtitan/components/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import copy
import functools

from typing import Any, Callable, Dict, Generic, List, TypeVar

import torch
Expand All @@ -23,7 +22,6 @@
from torchtitan.components.ft import FTManager, has_torchft
from torchtitan.config_manager import JobConfig


__all__ = [
"OptimizersContainer",
"LRSchedulersContainer",
Expand Down Expand Up @@ -273,6 +271,7 @@ def build_optimizers(
)
name = job_config.optimizer.name
lr = job_config.optimizer.lr
eps = job_config.optimizer.eps

optim_implementation = job_config.optimizer.implementation
assert optim_implementation in ["fused", "foreach", "for-loop"]
Expand All @@ -282,6 +281,7 @@ def build_optimizers(

optimizer_kwargs = {
"lr": lr,
"eps": eps,
"betas": (0.9, 0.95),
"weight_decay": 0.1,
"fused": fused,
Expand Down
3 changes: 3 additions & 0 deletions torchtitan/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,9 @@ def __init__(self):
self.parser.add_argument(
"--optimizer.lr", type=float, default=8e-4, help="Learning rate to use"
)
self.parser.add_argument(
"--optimizer.eps", type=float, default=1e-8, help="Epsilon value to use"
)
self.parser.add_argument(
"--optimizer.implementation",
type=str,
Expand Down
1 change: 1 addition & 0 deletions torchtitan/models/llama/train_configs/debug_model.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ tokenizer_path = "./tests/assets/test_tiktoken.model"
[optimizer]
name = "AdamW"
lr = 8e-4
eps = 1e-8

[training]
batch_size = 8
Expand Down
1 change: 1 addition & 0 deletions torchtitan/models/llama/train_configs/llama3_405b.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ converters = "float8"
[optimizer]
name = "AdamW"
lr = 8e-5
eps = 1e-8

[training]
batch_size = 2
Expand Down
1 change: 1 addition & 0 deletions torchtitan/models/llama/train_configs/llama3_70b.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ tokenizer_path = "./assets/tokenizer/original/tokenizer.model"
[optimizer]
name = "AdamW"
lr = 1.5e-4
eps = 1e-8

[training]
batch_size = 8
Expand Down
1 change: 1 addition & 0 deletions torchtitan/models/llama/train_configs/llama3_8b.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ tokenizer_path = "./assets/tokenizer/original/tokenizer.model"
[optimizer]
name = "AdamW"
lr = 3e-4
eps = 1e-8

[training]
batch_size = 1
Expand Down

0 comments on commit 7a5c91f

Please sign in to comment.