Skip to content

Commit

Permalink
Add new RCPs for v4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ShriyaPalsamudram authored and hiwotadese committed May 16, 2024
1 parent 267bfc4 commit 7849322
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 1 deletion.
18 changes: 18 additions & 0 deletions mlperf_logging/rcp_checker/training_4.0.0/rcps_gnn.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@
1.20,1.20,1.15,1.25,1.20,1.15,
1.10,1.15
]
},

"gnn_ref_262144":
{
"Benchmark": "gnn",
"Creator": "NVIDIA",
"When": "Reference RCPs before v4.0",
"Platform": "128xDGX-H100",
"BS": 262144,
"Hyperparams": {
"opt_base_learning_rate": 0.005
},
"Epochs to converge": [
2.40,2.55,2.35,2.45,2.50,2.35,
2.45,2.60,2.35,2.55,2.60,2.40,
2.40,2.30,2.30,2.45,2.60,2.50,
2.75,2.45
]
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,27 @@
5760,6528,6144,6528,5376,6528,5760,6144,6144,6528,
6144,6144,6144,5760,5760,5760,5760,5760,6144,5760
]
}
},
"llama2_70b_lora_ref_128":
{
"Benchmark": "llama2_70b_lora",
"Creator": "NVIDIA",
"When": "Prior to 4.0 submission",
"Platform": "TBD",
"BS": 128,
"Hyperparams": {
"opt_base_learning_rate": 1e-3,
"opt_max_grad_norm": 0.3,
"opt_learning_rate_warmup_epochs": 0,
"opt_learning_rate_decay_boundary_epochs": [],
"gradient_accumulation_steps": 1,
"lora_r": 16,
"lora_alpha": 32,
"max_steps": 1024
},
"samples to converge": [
11520,13056,10752,12672,12288,11136,10752,13056, 10752,9984,
11136,11136,11136,10752,11520,11136,11136,10752,11136,9984
]
}
}
18 changes: 18 additions & 0 deletions mlperf_logging/rcp_checker/training_4.0.0/rcps_ssd.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,24 @@
8, 8, 8, 8, 8, 8, 8, 8, 8, 9]
},

"ssd_ref_2560":
{
"Benchmark": "ssd",
"Creator": "NVIDIA",
"When": "Reference RCPs before v4.0",
"Platform": "20xDGX-H100",
"BS": 2560,
"Hyperparams": {
"opt_base_learning_rate": 0.000145,
"opt_learning_rate_warmup_factor": 1e-3,
"opt_learning_rate_warmup_epochs": 1,
"opt_weight_decay": 0
},
"Epochs to converge": [
8, 8, 8, 8, 8, 9, 9, 9, 9, 9,
9, 9, 9]
},

"ssd_ref_4096":
{
"Benchmark": "ssd",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
{

"sd_ref_384":
{
"Benchmark": "stable_diffusion",
"Creator": "NVIDIA",
"When": "Reference RCPs before v4.0",
"Platform": "16xDGX-H100",
"BS": 384,
"Hyperparams": {
"opt_adamw_beta_1": 0.9,
"opt_adamw_beta_2": 0.999,
"opt_adamw_epsilon": 1e-08,
"opt_adamw_weight_decay": 0.01,
"opt_base_learning_rate": 1.25e-7,
"opt_learning_rate_warmup_steps": 1000
},
"Epochs to converge": [
2049024, 2049024, 2049024, 2561280,
2561280, 2561280, 2561280, 2561280,
2561280, 2561280, 2561280, 2561280,
3073536, 3073536, 3073536]
},

"sd_ref_512":
{
"Benchmark": "stable_diffusion",
Expand Down

0 comments on commit 7849322

Please sign in to comment.