meta-pytorch
diff --git a/‎recipes/configs/gemma/2B_full.yaml‎
Lines changed: 0 additions & 1 deletion b/‎recipes/configs/gemma/2B_full.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎recipes/configs/gemma/7B_full.yaml‎
Lines changed: 0 additions & 1 deletion b/‎recipes/configs/gemma/7B_full.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎recipes/configs/llama2/7B_full.yaml‎
Lines changed: 0 additions & 1 deletion b/‎recipes/configs/llama2/7B_full.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎recipes/configs/llama3/70B_full.yaml‎
Lines changed: 1 addition & 1 deletion b/‎recipes/configs/llama3/70B_full.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes/configs/llama3/8B_full.yaml‎
Lines changed: 1 addition & 1 deletion b/‎recipes/configs/llama3/8B_full.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes/configs/llama3_1/70B_full.yaml‎
Lines changed: 1 addition & 1 deletion b/‎recipes/configs/llama3_1/70B_full.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes/configs/llama3_1/8B_full.yaml‎
Lines changed: 1 addition & 1 deletion b/‎recipes/configs/llama3_1/8B_full.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes/configs/mistral/7B_full.yaml‎
Lines changed: 0 additions & 1 deletion b/‎recipes/configs/mistral/7B_full.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎recipes/configs/phi3/mini_full.yaml‎
Lines changed: 0 additions & 1 deletion b/‎recipes/configs/phi3/mini_full.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎recipes/configs/qwen2/0.5B_full.yaml‎
Lines changed: 0 additions & 1 deletion b/‎recipes/configs/qwen2/0.5B_full.yaml‎
Lines changed: 0 additions & 1 deletion
@@ -59,7 +59,6 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: False
 
 # Reduced precision
 dtype: bf16
 
@@ -61,7 +61,6 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: False
 
 # Reduced precision
 dtype: bf16
 
@@ -62,7 +62,6 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: False
 
 # Reduced precision
 dtype: bf16
 
@@ -94,7 +94,7 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: True
+custom_sharded_layers: ['tok_embeddings', 'output']
 fsdp_cpu_offload: True
 
 # Reduced precision
 
@@ -64,7 +64,7 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: True
+custom_sharded_layers: ['tok_embeddings', 'output']
 
 # Reduced precision
 dtype: bf16
 
@@ -94,7 +94,7 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: True
+custom_sharded_layers: ['tok_embeddings', 'output']
 fsdp_cpu_offload: True
 
 # Reduced precision
 
@@ -67,7 +67,7 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: True
+custom_sharded_layers: ['tok_embeddings', 'output']
 
 # Reduced precision
 dtype: bf16
 
@@ -64,7 +64,6 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: False
 
 # Reduced precision
 dtype: bf16
 
@@ -61,7 +61,6 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: False
 dtype: bf16
 
 # Logging
 
@@ -61,7 +61,6 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: False
-memory_efficient_fsdp_wrap: False
 
 # Reduced precision
 dtype: bf16