From fbf5196a381592de26a19d047f39c749abd8256e Mon Sep 17 00:00:00 2001 From: muellerzr Date: Thu, 28 Apr 2022 15:24:08 -0400 Subject: [PATCH 1/3] Fix config --- src/accelerate/commands/config/cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/accelerate/commands/config/cluster.py b/src/accelerate/commands/config/cluster.py index 69464ea0b46..580439a638a 100644 --- a/src/accelerate/commands/config/cluster.py +++ b/src/accelerate/commands/config/cluster.py @@ -99,6 +99,7 @@ def get_cluster_input(): default=1, ) + fsdp_config = None if distributed_type in [DistributedType.MULTI_GPU]: use_fsdp = _ask_field( "Do you want to use FullyShardedDataParallel? [yes/NO]: ", From b81a7ae4ed72cce27854c79ba5ebfe90abcba460 Mon Sep 17 00:00:00 2001 From: Zachary Mueller Date: Thu, 28 Apr 2022 15:45:09 -0400 Subject: [PATCH 2/3] Set as dict and simplify Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/accelerate/commands/config/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/accelerate/commands/config/cluster.py b/src/accelerate/commands/config/cluster.py index 580439a638a..9b0fc73f9db 100644 --- a/src/accelerate/commands/config/cluster.py +++ b/src/accelerate/commands/config/cluster.py @@ -99,7 +99,7 @@ def get_cluster_input(): default=1, ) - fsdp_config = None + fsdp_config = {} if distributed_type in [DistributedType.MULTI_GPU]: use_fsdp = _ask_field( "Do you want to use FullyShardedDataParallel? [yes/NO]: ", From 474a2e2947ec32f5a35fb5545e7dcf389addd8a8 Mon Sep 17 00:00:00 2001 From: muellerzr Date: Thu, 28 Apr 2022 15:47:49 -0400 Subject: [PATCH 3/3] Rm duplicate dict generators --- src/accelerate/commands/config/cluster.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/accelerate/commands/config/cluster.py b/src/accelerate/commands/config/cluster.py index 9b0fc73f9db..f9d35ea48fe 100644 --- a/src/accelerate/commands/config/cluster.py +++ b/src/accelerate/commands/config/cluster.py @@ -64,7 +64,7 @@ def get_cluster_input(): else: use_cpu = False - deepspeed_config = None + deepspeed_config = {} if distributed_type in [DistributedType.MULTI_GPU, DistributedType.NO]: use_deepspeed = _ask_field( "Do you want to use DeepSpeed? [yes/NO]: ", @@ -78,7 +78,6 @@ def get_cluster_input(): is_deepspeed_available() ), "DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source" - deepspeed_config = {} if distributed_type == DistributedType.DEEPSPEED: deepspeed_config["zero_stage"] = _ask_field( "What should be your DeepSpeed's ZeRO optimization stage (0, 1, 2, 3)? [2]: ", @@ -109,7 +108,6 @@ def get_cluster_input(): ) if use_fsdp: distributed_type = DistributedType.FSDP - fsdp_config = {} if distributed_type == DistributedType.FSDP: fsdp_config["sharding_strategy"] = _ask_field( "What should be your sharding strategy ([1] FULL_SHARD, [2] SHARD_GRAD_OP)? [1]: ",