meta-pytorch
diff --git a/‎.github/workflows/build_linux_wheels.yaml‎
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/build_linux_wheels.yaml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/source/api_ref_data.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/api_ref_data.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/api_ref_models.rst‎
Lines changed: 2 additions & 0 deletions b/‎docs/source/api_ref_models.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/api_ref_modules.rst‎
Lines changed: 2 additions & 0 deletions b/‎docs/source/api_ref_modules.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/tune_cli.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/tune_cli.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/tutorials/e2e_flow.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/tutorials/e2e_flow.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/tutorials/llama3.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/tutorials/llama3.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎recipes/configs/llama2/7B_lora_dpo_single_device.yaml‎
Lines changed: 1 addition & 0 deletions b/‎recipes/configs/llama2/7B_lora_dpo_single_device.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎recipes/configs/qwen2/0.5B_full.yaml‎
Lines changed: 2 additions & 2 deletions b/‎recipes/configs/qwen2/0.5B_full.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎recipes/configs/qwen2/0.5B_full_single_device.yaml‎
Lines changed: 7 additions & 9 deletions b/‎recipes/configs/qwen2/0.5B_full_single_device.yaml‎
Lines changed: 7 additions & 9 deletions
@@ -1,11 +1,9 @@
 name: Build Linux Wheels
 
 on:
-  pull_request:
   push:
     branches:
       - nightly
-      - main
       - release/*
     tags:
       # NOTE: Binary build pipelines should only get triggered on release candidate builds
 
@@ -25,6 +25,7 @@ and models.
     StackExchangedPairedTemplate
     PromptTemplate
     PromptTemplateInterface
+    ChatMLTemplate
 
     ChatFormat
     ChatMLFormat
 
@@ -104,6 +104,7 @@ To download the Llama2-70B model:
     llama2.llama2_reward_7b
     llama2.lora_llama2_reward_7b
     llama2.qlora_llama2_reward_7b
+    llama2.Llama2ChatTemplate
 
 
 code llama
@@ -212,6 +213,7 @@ To download the Mistral 7B v0.1 model:
     mistral.qlora_mistral_reward_7b
     mistral.mistral_tokenizer
     mistral.MistralTokenizer
+    mistral.MistralChatTemplate
 
 
 gemma
 
@@ -86,6 +86,7 @@ Loss
    loss.DPOLoss
    loss.RSOLoss
    loss.IPOLoss
+   loss.SimPOLoss
 
 
 Vision Transforms
@@ -98,6 +99,7 @@ Functions used for preprocessing images.
 
     transforms.Transform
     transforms.get_canvas_best_fit
+    transforms.get_inscribed_size
     transforms.resize_with_pad
     transforms.tile_crop
     transforms.find_supported_resolutions
 
@@ -92,7 +92,7 @@ with matching names. By default we ignore safetensor files, but if you want to i
 
 .. code-block:: bash
 
-    $ tune download meta-llama/Meta-Llama-3-8B-Instruct --hf-token <TOKEN> --ignore-patterns ""
+    $ tune download meta-llama/Meta-Llama-3-8B-Instruct --hf-token <TOKEN> --ignore-patterns None
     Successfully downloaded model repo and wrote to the following locations:
     ./model/config.json
     ./model/README.md
 
@@ -327,8 +327,8 @@ To quantize the fine-tuned model after installing torchao we can run the followi
   # we also support `int8_weight_only()` and `int8_dynamic_activation_int8_weight()`, see
   # https://github.com/pytorch/ao/tree/main/torchao/quantization#other-available-quantization-techniques
   # for a full list of techniques that we support
-  from torchao.quantization.quant_api import quantize\_, int4_weight_only
-  quantize\_(model, int4_weight_only())
+  from torchao.quantization.quant_api import quantize_, int4_weight_only
+  quantize_(model, int4_weight_only())
 
 After quantization, we rely on torch.compile for speedups. For more details, please see `this example usage <https://github.com/pytorch/ao/blob/main/torchao/quantization/README.md#quantization-flow-example>`_.
 
 
@@ -247,8 +247,8 @@ To quantize the fine-tuned model after installing torchao we can run the followi
   # we also support `int8_weight_only()` and `int8_dynamic_activation_int8_weight()`, see
   # https://github.com/pytorch/ao/tree/main/torchao/quantization#other-available-quantization-techniques
   # for a full list of techniques that we support
-  from torchao.quantization.quant_api import quantize\_, int4_weight_only
-  quantize\_(model, int4_weight_only())
+  from torchao.quantization.quant_api import quantize_, int4_weight_only
+  quantize_(model, int4_weight_only())
 
 After quantization, we rely on torch.compile for speedups. For more details, please see `this example usage <https://github.com/pytorch/ao/blob/main/torchao/quantization/README.md#quantization-flow-example>`_.
 
 
@@ -70,6 +70,7 @@ loss:
 epochs: 1
 max_steps_per_epoch: 1000
 gradient_accumulation_steps: 16
+compile: False
 
 # Logging
 output_dir: /tmp/lora_dpo_output/
 
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns ""
+#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns None
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed --config qwen2/0.5B_full
@@ -49,7 +49,7 @@ batch_size: 2
 epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
-  lr: 5e-6
+  lr: 2e-5
 loss:
   _component_: torch.nn.CrossEntropyLoss
 max_steps_per_epoch: null
 
@@ -3,11 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns ""
-#
-# The default config uses an optimizer from bitsandbytes. If you do not have it installed,
-# you can install it with
-#   pip install bitsandbytes
+#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns None
 #
 # To launch on a single device, run the following command from root:
 #   tune run full_finetune_single_device --config qwen2/0.5B_full_single_device
@@ -50,13 +46,15 @@ resume_from_checkpoint: False
 batch_size: 2
 epochs: 1
 optimizer:
-  _component_: bitsandbytes.optim.PagedAdamW
-  lr: 5e-6
-optimizer_in_bwd: True
+  _component_: torch.optim.AdamW
+  lr: 2e-5
+
 loss:
   _component_: torch.nn.CrossEntropyLoss
+optimizer_in_bwd: False
+
 max_steps_per_epoch: null
-gradient_accumulation_steps: 16
+gradient_accumulation_steps: 8
 compile: False
 
 # Training environment