|
| 1 | +# Config for multi-device LoRA in lora_finetune_distributed.py |
| 2 | +# using a Llama3 70B model |
| 3 | +# |
| 4 | +# This config assumes that you've run the following command before launching |
| 5 | +# this run: |
| 6 | +# tune download meta-llama/Meta-Llama-3-70b --hf-token <TOKEN> --output-dir /tmp/Meta-Llama-3-70b --ignore-patterns "original/consolidated*" |
| 7 | +# |
| 8 | +# This config needs 8 GPUs to run |
| 9 | +# # tune run --nproc_per_node 8 lora_finetune_distributed --config recipes/configs/llama3/70B_lora.yaml |
| 10 | +# |
| 11 | + |
| 12 | +# Model Arguments |
| 13 | +model: |
| 14 | + _component_: torchtune.models.llama3.lora_llama3_70b |
| 15 | + lora_attn_modules: ['q_proj', 'k_proj', 'v_proj'] |
| 16 | + apply_lora_to_mlp: False |
| 17 | + apply_lora_to_output: False |
| 18 | + lora_rank: 16 |
| 19 | + lora_alpha: 32 |
| 20 | + |
| 21 | +tokenizer: |
| 22 | + _component_: torchtune.models.llama3.llama3_tokenizer |
| 23 | + path: /tmp/Meta-Llama-3-70b/original/tokenizer.model |
| 24 | + |
| 25 | +checkpointer: |
| 26 | + _component_: torchtune.utils.FullModelHFCheckpointer |
| 27 | + checkpoint_dir: /tmp/Meta-Llama-3-70b |
| 28 | + checkpoint_files: [ |
| 29 | + model-00001-of-00030.safetensors, |
| 30 | + model-00002-of-00030.safetensors, |
| 31 | + model-00003-of-00030.safetensors, |
| 32 | + model-00004-of-00030.safetensors, |
| 33 | + model-00005-of-00030.safetensors, |
| 34 | + model-00006-of-00030.safetensors, |
| 35 | + model-00007-of-00030.safetensors, |
| 36 | + model-00008-of-00030.safetensors, |
| 37 | + model-00009-of-00030.safetensors, |
| 38 | + model-00010-of-00030.safetensors, |
| 39 | + model-00011-of-00030.safetensors, |
| 40 | + model-00012-of-00030.safetensors, |
| 41 | + model-00013-of-00030.safetensors, |
| 42 | + model-00014-of-00030.safetensors, |
| 43 | + model-00015-of-00030.safetensors, |
| 44 | + model-00016-of-00030.safetensors, |
| 45 | + model-00017-of-00030.safetensors, |
| 46 | + model-00018-of-00030.safetensors, |
| 47 | + model-00019-of-00030.safetensors, |
| 48 | + model-00020-of-00030.safetensors, |
| 49 | + model-00021-of-00030.safetensors, |
| 50 | + model-00022-of-00030.safetensors, |
| 51 | + model-00023-of-00030.safetensors, |
| 52 | + model-00024-of-00030.safetensors, |
| 53 | + model-00025-of-00030.safetensors, |
| 54 | + model-00026-of-00030.safetensors, |
| 55 | + model-00027-of-00030.safetensors, |
| 56 | + model-00028-of-00030.safetensors, |
| 57 | + model-00029-of-00030.safetensors, |
| 58 | + model-00030-of-00030.safetensors, |
| 59 | + ] |
| 60 | + recipe_checkpoint: null |
| 61 | + output_dir: /tmp/Meta-Llama-3-70b |
| 62 | + model_type: LLAMA3 |
| 63 | +resume_from_checkpoint: False |
| 64 | + |
| 65 | +# Dataset and Sampler |
| 66 | +dataset: |
| 67 | + _component_: torchtune.datasets.alpaca_dataset |
| 68 | + train_on_input: True |
| 69 | +seed: null |
| 70 | +shuffle: True |
| 71 | +batch_size: 2 |
| 72 | + |
| 73 | +# Optimizer and Scheduler |
| 74 | +optimizer: |
| 75 | + _component_: torch.optim.AdamW |
| 76 | + weight_decay: 0.01 |
| 77 | + lr: 3e-4 |
| 78 | +lr_scheduler: |
| 79 | + _component_: torchtune.modules.get_cosine_schedule_with_warmup |
| 80 | + num_warmup_steps: 100 |
| 81 | + |
| 82 | +loss: |
| 83 | + _component_: torch.nn.CrossEntropyLoss |
| 84 | + |
| 85 | +# Training |
| 86 | +epochs: 1 |
| 87 | +max_steps_per_epoch: null |
| 88 | +gradient_accumulation_steps: 1 |
| 89 | + |
| 90 | +# Logging |
| 91 | +output_dir: /tmp/lora_finetune_output |
| 92 | +metric_logger: |
| 93 | + _component_: torchtune.utils.metric_logging.DiskLogger |
| 94 | + log_dir: ${output_dir} |
| 95 | +log_every_n_steps: null |
| 96 | + |
| 97 | +# Environment |
| 98 | +device: cuda |
| 99 | +dtype: bf16 |
| 100 | +enable_activation_checkpointing: True |
0 commit comments