maximegmd
diff --git a/‎docs/source/api_ref_utilities.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/api_ref_utilities.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes/configs/code_llama2/7B_lora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions b/‎recipes/configs/code_llama2/7B_lora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎recipes/configs/code_llama2/7B_qlora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions b/‎recipes/configs/code_llama2/7B_qlora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎recipes/configs/gemma/2B_lora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions b/‎recipes/configs/gemma/2B_lora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎recipes/configs/gemma/2B_qlora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions b/‎recipes/configs/gemma/2B_qlora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎recipes/configs/gemma/7B_lora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions b/‎recipes/configs/gemma/7B_lora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎recipes/configs/gemma/7B_qlora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions b/‎recipes/configs/gemma/7B_qlora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎recipes/configs/llama2/13B_qlora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions b/‎recipes/configs/llama2/13B_qlora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎recipes/configs/llama2/7B_lora.yaml‎
Lines changed: 27 additions & 0 deletions b/‎recipes/configs/llama2/7B_lora.yaml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎recipes/configs/llama2/7B_lora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions b/‎recipes/configs/llama2/7B_lora_single_device.yaml‎
Lines changed: 21 additions & 2 deletions
@@ -87,7 +87,7 @@ of your finetuning job.
 
     get_memory_stats
     log_memory_stats
-    profiler
+    setup_torch_profiler
 
 .. _metric_logging_label:
 
 
@@ -83,6 +83,25 @@ log_every_n_steps: 1
 log_peak_memory_stats: False
 
 profiler:
-  _component_: torchtune.utils.profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
-  output_dir: ${output_dir}/torchtune_perf_tracing.json
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1
@@ -86,6 +86,25 @@ log_peak_memory_stats: False
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 profiler:
-  _component_: torchtune.utils.profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
-  output_dir: ${output_dir}/torchtune_perf_tracing.json
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1
@@ -84,6 +84,25 @@ log_peak_memory_stats: False
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 profiler:
-  _component_: torchtune.utils.profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
-  output_dir: /tmp/alpaca-gemma-finetune/torchtune_perf_tracing.json
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1
@@ -84,6 +84,25 @@ log_peak_memory_stats: False
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 profiler:
-  _component_: torchtune.utils.profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
-  output_dir: /tmp/alpaca-gemma-finetune/torchtune_perf_tracing.json
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1
@@ -86,6 +86,25 @@ log_peak_memory_stats: False
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 profiler:
-  _component_: torchtune.utils.profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
-  output_dir: /tmp/alpaca-gemma-finetune/torchtune_perf_tracing.json
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1
@@ -86,6 +86,25 @@ log_peak_memory_stats: False
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 profiler:
-  _component_: torchtune.utils.profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
-  output_dir: /tmp/alpaca-gemma-finetune/torchtune_perf_tracing.json
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1
@@ -83,6 +83,25 @@ enable_activation_checkpointing: True
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 profiler:
-  _component_: torchtune.utils.profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
-  output_dir: ${output_dir}/torchtune_perf_tracing.json
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1
@@ -80,3 +80,30 @@ log_peak_memory_stats: False
 device: cuda
 dtype: bf16
 enable_activation_checkpointing: False
+
+# Show case the usage of pytorch profiler
+# Set enabled to False as it's only needed for debugging training
+profiler:
+  _component_: torchtune.utils.setup_torch_profiler
+
+  enabled: False
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1
@@ -83,6 +83,25 @@ enable_activation_checkpointing: True
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 profiler:
-  _component_: torchtune.utils.profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
-  output_dir: ${output_dir}/torchtune_perf_tracing.json
+
+  #Output directory of trace artifacts
+  output_dir: ${output_dir}/profiling_outputs
+
+  #`torch.profiler.ProfilerActivity` types to trace
+  cpu: True
+  cuda: True
+
+  #trace options passed to `torch.profiler.profile`
+  profile_memory: False
+  with_stack: False
+  record_shapes: True
+  with_flops: False
+
+  # `torch.profiler.schedule` options:
+  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
+  wait_steps: 5
+  warmup_steps: 5
+  active_steps: 2
+  num_cycles: 1