Skip to content

Commit 053d0ae

Browse files
authored
Update some config default values (#711)
1 parent 4f758c0 commit 053d0ae

File tree

10 files changed

+15
-12
lines changed

10 files changed

+15
-12
lines changed

recipes/configs/llama2/13B_lora.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ loss:
7070
# Training
7171
epochs: 1
7272
max_steps_per_epoch: null
73+
gradient_accumulation_steps: 1
7374

7475
# Logging
7576
output_dir: /tmp/lora_finetune_output

recipes/configs/llama2/7B_lora.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
# this run:
66
# tune download meta-llama/Llama-2-7b --hf-token <HF_TOKEN> --output-dir /tmp/llama2
77
#
8-
# To launch on 4 devices, run the following command from root:
9-
# tune run --nnodes 1 --nproc_per_node 4 lora_finetune_distributed --config llama2/7B_lora
8+
# To launch on 2 devices, run the following command from root:
9+
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora
1010
#
1111
# You can add specific overrides through the command line. For example
1212
# to override the checkpointer directory while launching training
1313
# you can run:
14-
# tune run --nnodes 1 --nproc_per_node 4 lora_finetune_distributed --config llama2/7B_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
14+
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
1515
#
1616
# This config works best when the model is being fine-tuned on 2+ GPUs.
1717
# For single device LoRA finetuning please use 7B_lora_single_device.yaml
@@ -67,7 +67,7 @@ loss:
6767
# Training
6868
epochs: 1
6969
max_steps_per_epoch: null
70-
gradient_accumulation_steps: 1
70+
gradient_accumulation_steps: 32
7171

7272
# Logging
7373
output_dir: /tmp/lora_finetune_output

recipes/configs/llama2/7B_lora_single_device.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ loss:
6565
# Training
6666
epochs: 1
6767
max_steps_per_epoch: null
68-
gradient_accumulation_steps: 1
68+
gradient_accumulation_steps: 64
6969
compile: False
7070

7171
# Logging

recipes/configs/llama2/7B_qlora_single_device.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ loss:
6464
# Training
6565
epochs: 1
6666
max_steps_per_epoch: null
67-
gradient_accumulation_steps: 1
67+
gradient_accumulation_steps: 16
6868
# Note: compile for QLoRA is only supported on nightly
6969
# PyTorch (>= 2.4.0.dev20240408)
7070
compile: False

recipes/configs/mistral/7B_full_low_memory.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ optimizer:
5656
loss:
5757
_component_: torch.nn.CrossEntropyLoss
5858
max_steps_per_epoch: null
59-
gradient_accumulation_steps: 1
59+
gradient_accumulation_steps: 4
6060
optimizer_in_bwd: True
6161

6262
# Training env

recipes/configs/mistral/7B_lora.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
# this run:
1010
# tune download mistralai/Mistral-7B-v0.1 --hf-token <HF_TOKEN> --output-dir /tmp/Mistral-7B-v0.1
1111
#
12-
# Run this config on 4 GPUs using the following:
13-
# tune run --nnodes 1 --nproc_per_node 4 lora_finetune_distributed --config mistral/7B_lora
12+
# Run this config on 2 GPUs using the following:
13+
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config mistral/7B_lora
1414
#
1515
# You can add specific overrides through the command line. For example
1616
# to override the checkpointer directory while launching training
1717
# you can run:
18-
# tune run --nnodes 1 --nproc_per_node 4 lora_finetune_distributed --config mistral/7B_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
18+
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config mistral/7B_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
1919
#
2020
# This config works best when the model is being fine-tuned on 2+ GPUs.
2121
# For single device LoRA finetuning please use 7B_lora_single_device.yaml

recipes/configs/mistral/7B_lora_single_device.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ loss:
6767
batch_size: 4
6868
epochs: 3
6969
max_steps_per_epoch: null
70-
gradient_accumulation_steps: 1
70+
gradient_accumulation_steps: 4
7171
compile: False
7272

7373
# Training env

recipes/configs/mistral/7B_qlora_single_device.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ loss:
6868
batch_size: 4
6969
epochs: 3
7070
max_steps_per_epoch: null
71-
gradient_accumulation_steps: 1
71+
gradient_accumulation_steps: 4
7272
compile: False
7373

7474
# Training env

tests/recipes/test_lora_finetune_distributed.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def _get_test_config_overrides(self):
4141
"max_steps_per_epoch=2",
4242
"optimizer.lr=2e-5",
4343
"log_every_n_steps=1",
44+
"gradient_accumulation_steps=1",
4445
] + dummy_alpaca_dataset_config()
4546

4647
def _fetch_expected_loss_values(self):

tests/recipes/test_lora_finetune_single_device.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32"):
4242
"max_steps_per_epoch=2",
4343
"optimizer.lr=2e-5",
4444
"log_every_n_steps=1",
45+
"gradient_accumulation_steps=1",
4546
] + dummy_alpaca_dataset_config()
4647

4748
def _fetch_expected_loss_values(self):

0 commit comments

Comments
 (0)