sarvamai
diff --git a/‎docs/source/deep_dives/checkpointer.rst‎
Lines changed: 166 additions & 134 deletions b/‎docs/source/deep_dives/checkpointer.rst‎
Lines changed: 166 additions & 134 deletions
diff --git a/‎docs/source/tutorials/e2e_flow.rst‎
Lines changed: 332 additions & 269 deletions b/‎docs/source/tutorials/e2e_flow.rst‎
Lines changed: 332 additions & 269 deletions
diff --git a/‎recipes/configs/eleuther_evaluation.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/eleuther_evaluation.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/gemma/evaluation.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/gemma/evaluation.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/generation.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/generation.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/llama2/7B_lora_dpo.yaml‎
Lines changed: 1 addition & 1 deletion b/‎recipes/configs/llama2/7B_lora_dpo.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes/configs/llama2/7B_lora_dpo_single_device.yaml‎
Lines changed: 1 addition & 1 deletion b/‎recipes/configs/llama2/7B_lora_dpo_single_device.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes/configs/llama2/generation_v2.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/llama2/generation_v2.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/llama3/8B_qat_lora.yaml‎
Lines changed: 4 additions & 5 deletions b/‎recipes/configs/llama3/8B_qat_lora.yaml‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎recipes/configs/llama3_1/8B_lora_dpo.yaml‎
Lines changed: 1 addition & 1 deletion b/‎recipes/configs/llama3_1/8B_lora_dpo.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -3,6 +3,8 @@
 # To launch, run the following command from root torchtune directory:
 #    tune run eleuther_eval --config eleuther_evaluation tasks=["truthfulqa_mc2","hellaswag"]
 
+output_dir: ./ # Not needed
+
 # Model Arguments
 model:
   _component_: torchtune.models.llama2.llama2_7b
@@ -14,7 +16,7 @@ checkpointer:
     pytorch_model-00001-of-00002.bin,
     pytorch_model-00002-of-00002.bin,
   ]
-  output_dir: /tmp/Llama-2-7b-hf
+  output_dir: ${output_dir}
   model_type: LLAMA2
 
 # Tokenizer
 
@@ -3,6 +3,8 @@
 # To launch, run the following command:
 #    tune run eleuther_eval --config gemma/evaluation
 
+output_dir: ./ # Not needed
+
 # Model Arguments
 model:
   _component_: torchtune.models.gemma.gemma_2b
@@ -15,7 +17,7 @@ checkpointer:
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors,
   ]
-  output_dir: ./ # Not needed
+  output_dir: ${output_dir}
   model_type: GEMMA
 
 # Tokenizer
 
@@ -3,6 +3,8 @@
 # To launch, run the following command from root torchtune directory:
 #    tune run generate --config generation
 
+output_dir: ./ # Not needed
+
 # Model arguments
 model:
   _component_: torchtune.models.llama2.llama2_7b
@@ -14,7 +16,7 @@ checkpointer:
     pytorch_model-00001-of-00002.bin,
     pytorch_model-00002-of-00002.bin,
   ]
-  output_dir: /tmp/Llama-2-7b-hf/
+  output_dir: ${output_dir}
   model_type: LLAMA2
 
 device: cuda
 
@@ -32,7 +32,7 @@ model:
 tokenizer:
   _component_: torchtune.models.llama2.llama2_tokenizer
   path: /tmp/Llama-2-7b-hf/tokenizer.model
-  max_seq_len: 1024
+  max_seq_len: 1024 # higher increases memory
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
 
@@ -31,7 +31,7 @@ model:
 tokenizer:
   _component_: torchtune.models.llama2.llama2_tokenizer
   path: /tmp/Llama-2-7b-hf/tokenizer.model
-  max_seq_len: 1024
+  max_seq_len: 1024 # higher increases memory
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
 
@@ -6,6 +6,8 @@
 # To launch, run the following command:
 #    tune run dev/generate_v2 --config llama2/generation_v2
 
+output_dir: ./ # Not needed
+
 # Model arguments
 model:
   _component_: torchtune.models.llama2.llama2_7b
@@ -24,7 +26,7 @@ checkpointer:
     pytorch_model-00001-of-00002.bin,
     pytorch_model-00002-of-00002.bin
   ]
-  output_dir: ./
+  output_dir: ${output_dir}
   model_type: LLAMA2
 
 # Device
 
@@ -83,6 +83,10 @@ dtype: bf16
 enable_activation_checkpointing: False  # True reduces memory
 enable_activation_offloading: False  # True reduces memory
 
+# QAT arguments
+quantizer:
+  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
+  groupsize: 256
 
 # Profiler (disabled)
 profiler:
@@ -108,8 +112,3 @@ profiler:
   warmup_steps: 3
   active_steps: 2
   num_cycles: 1
-
-# QAT arguments
-quantizer:
-  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
-  groupsize: 256
@@ -32,7 +32,7 @@ model:
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model
-  max_seq_len: null
+  max_seq_len: 1024 # higher increases memory
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer