Skip to content

Commit c9b9c4e

Browse files
committed
Only add clip_grad_norm to ones that support it
1 parent 377c60d commit c9b9c4e

File tree

115 files changed

+115
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+115
-0
lines changed

recipes/configs/code_llama2/7B_full_low_memory.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ optimizer:
6464
optimizer_in_bwd: True # True saves memory. Requires gradient_accumulation_steps=1
6565
loss:
6666
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
67+
clip_grad_norm: null
6768
compile: False # torch.compile the model + loss, True increases speed + decreases memory
6869

6970
# Training env

recipes/configs/code_llama2/7B_lora_single_device.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ lr_scheduler:
7272
num_warmup_steps: 100
7373
loss:
7474
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
75+
clip_grad_norm: null
7576
compile: False # torch.compile the model + loss, True increases speed + decreases memory
7677

7778
# Training env

recipes/configs/code_llama2/7B_qlora_single_device.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ lr_scheduler:
7171
num_warmup_steps: 100
7272
loss:
7373
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
74+
clip_grad_norm: null
7475
compile: False # torch.compile the model + loss, True increases speed + decreases memory
7576

7677
# Training env

recipes/configs/gemma/2B_full.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ loss:
5757
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
5858
max_steps_per_epoch: null
5959
gradient_accumulation_steps: 1 # Use to increase effective batch size
60+
clip_grad_norm: null
6061
compile: False # torch.compile the model + loss, True increases speed + decreases memory
6162
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1
6263

recipes/configs/gemma/2B_lora.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ batch_size: 4
6969
epochs: 1
7070
max_steps_per_epoch: null
7171
gradient_accumulation_steps: 1 # Use to increase effective batch size
72+
clip_grad_norm: null
7273
compile: False # torch.compile the model + loss, True increases speed + decreases memory
7374

7475
# Training env

recipes/configs/gemma/2B_lora_single_device.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ batch_size: 4
6868
epochs: 1
6969
max_steps_per_epoch: null
7070
gradient_accumulation_steps: 8 # Use to increase effective batch size
71+
clip_grad_norm: null
7172
compile: False # torch.compile the model + loss, True increases speed + decreases memory
7273

7374
# Training env

recipes/configs/gemma/2B_qlora_single_device.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ batch_size: 4
6868
epochs: 1
6969
max_steps_per_epoch: null
7070
gradient_accumulation_steps: 8 # Use to increase effective batch size
71+
clip_grad_norm: null
7172
compile: False # torch.compile the model + loss, True increases speed + decreases memory
7273

7374
# Training env

recipes/configs/gemma/7B_full.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ loss:
5959
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
6060
max_steps_per_epoch: null
6161
gradient_accumulation_steps: 1 # Use to increase effective batch size
62+
clip_grad_norm: null
6263
compile: False # torch.compile the model + loss, True increases speed + decreases memory
6364
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1
6465

recipes/configs/gemma/7B_lora.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ batch_size: 4
7171
epochs: 1
7272
max_steps_per_epoch: null
7373
gradient_accumulation_steps: 1 # Use to increase effective batch size
74+
clip_grad_norm: null
7475
compile: False # torch.compile the model + loss, True increases speed + decreases memory
7576

7677
# Training env

recipes/configs/gemma/7B_lora_single_device.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ batch_size: 8
7070
epochs: 1
7171
max_steps_per_epoch: null
7272
gradient_accumulation_steps: 8 # Use to increase effective batch size
73+
clip_grad_norm: null
7374
compile: False # torch.compile the model + loss, True increases speed + decreases memory
7475

7576
# Training env

0 commit comments

Comments
 (0)