Skip to content

Commit

Permalink
Temporarily disable CP tests (#898)
Browse files Browse the repository at this point in the history
PyTorch trunk has an offensive PR that breaks CP. It is better to get
tests signals from other parallelisms than no signals.
  • Loading branch information
fegin authored Feb 27, 2025
1 parent 8a92fb6 commit 0047aa2
Showing 1 changed file with 90 additions and 90 deletions.
180 changes: 90 additions & 90 deletions tests/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,28 +296,28 @@ def build_test_list():
"hsdp",
ngpu=4,
),
OverrideDefinitions(
[
[
"--experimental.context_parallel_degree=4",
"--experimental.context_parallel_rotate_method='allgather'",
]
],
"CP (allgather)",
"cp_allgather",
ngpu=4,
),
OverrideDefinitions(
[
[
"--experimental.context_parallel_degree=4",
"--experimental.context_parallel_rotate_method='alltoall'",
]
],
"CP (alltoall)",
"cp_alltoall",
ngpu=4,
),
# OverrideDefinitions(
# [
# [
# "--experimental.context_parallel_degree=4",
# "--experimental.context_parallel_rotate_method='allgather'",
# ]
# ],
# "CP (allgather)",
# "cp_allgather",
# ngpu=4,
# ),
# OverrideDefinitions(
# [
# [
# "--experimental.context_parallel_degree=4",
# "--experimental.context_parallel_rotate_method='alltoall'",
# ]
# ],
# "CP (alltoall)",
# "cp_alltoall",
# ngpu=4,
# ),
OverrideDefinitions(
[
[
Expand All @@ -330,74 +330,74 @@ def build_test_list():
"hsdp+tp",
ngpu=8,
),
OverrideDefinitions(
[
[
"--training.data_parallel_shard_degree=2",
"--experimental.context_parallel_degree=2",
]
],
"FSDP+CP",
"fsdp+cp",
ngpu=4,
),
OverrideDefinitions(
[
[
"--training.data_parallel_shard_degree=1",
"--training.data_parallel_replicate_degree=2",
"--experimental.context_parallel_degree=2",
]
],
"HSDP+CP (with dp_shard)",
"hsdp+cp_without_dp_shard",
ngpu=4,
),
OverrideDefinitions(
[
[
"--training.data_parallel_shard_degree=2",
"--training.data_parallel_replicate_degree=2",
"--experimental.context_parallel_degree=2",
]
],
"HSDP+CP (without dp_shard)",
"hsdp+cp_with_dp_shard",
ngpu=8,
),
OverrideDefinitions(
[
[
"--training.data_parallel_shard_degree=2",
"--training.tensor_parallel_degree=2",
"--experimental.context_parallel_degree=2",
]
],
"FSDP+TP+CP",
"fsdp+tp+cp",
ngpu=8,
),
OverrideDefinitions(
[
[
"--checkpoint.enable_checkpoint",
"--training.tensor_parallel_degree=2",
"--experimental.context_parallel_degree=2",
"--training.enable_cpu_offload",
"--optimizer.early_step_in_backward",
],
[
"--training.tensor_parallel_degree=2",
"--experimental.context_parallel_degree=2",
"--training.data_parallel_replicate_degree=2",
"--training.enable_cpu_offload",
"--optimizer.early_step_in_backward",
],
],
"Enable CPU Offload, Optimizer in backward with TP, DP, CP",
"cpu_offload+opt_in_bwd+TP+DP+CP",
ngpu=8,
),
# OverrideDefinitions(
# [
# [
# "--training.data_parallel_shard_degree=2",
# "--experimental.context_parallel_degree=2",
# ]
# ],
# "FSDP+CP",
# "fsdp+cp",
# ngpu=4,
# ),
# OverrideDefinitions(
# [
# [
# "--training.data_parallel_shard_degree=1",
# "--training.data_parallel_replicate_degree=2",
# "--experimental.context_parallel_degree=2",
# ]
# ],
# "HSDP+CP (with dp_shard)",
# "hsdp+cp_without_dp_shard",
# ngpu=4,
# ),
# OverrideDefinitions(
# [
# [
# "--training.data_parallel_shard_degree=2",
# "--training.data_parallel_replicate_degree=2",
# "--experimental.context_parallel_degree=2",
# ]
# ],
# "HSDP+CP (without dp_shard)",
# "hsdp+cp_with_dp_shard",
# ngpu=8,
# ),
# OverrideDefinitions(
# [
# [
# "--training.data_parallel_shard_degree=2",
# "--training.tensor_parallel_degree=2",
# "--experimental.context_parallel_degree=2",
# ]
# ],
# "FSDP+TP+CP",
# "fsdp+tp+cp",
# ngpu=8,
# ),
# OverrideDefinitions(
# [
# [
# "--checkpoint.enable_checkpoint",
# "--training.tensor_parallel_degree=2",
# "--experimental.context_parallel_degree=2",
# "--training.enable_cpu_offload",
# "--optimizer.early_step_in_backward",
# ],
# [
# "--training.tensor_parallel_degree=2",
# "--experimental.context_parallel_degree=2",
# "--training.data_parallel_replicate_degree=2",
# "--training.enable_cpu_offload",
# "--optimizer.early_step_in_backward",
# ],
# ],
# "Enable CPU Offload, Optimizer in backward with TP, DP, CP",
# "cpu_offload+opt_in_bwd+TP+DP+CP",
# ngpu=8,
# ),
OverrideDefinitions(
[
[
Expand Down

0 comments on commit 0047aa2

Please sign in to comment.