|
30 | 30 | from torchtune.training.checkpointing._utils import ( |
31 | 31 | ADAPTER_CONFIG_FNAME, |
32 | 32 | ADAPTER_MODEL_FNAME, |
33 | | - BASE_MODEL_DIRNAME, |
34 | 33 | copy_files, |
35 | 34 | get_adapter_checkpoint_path, |
36 | 35 | get_model_checkpoint_path, |
@@ -180,14 +179,6 @@ def __init__( |
180 | 179 | self._output_dir = Path(output_dir) |
181 | 180 | self._output_dir.mkdir(parents=True, exist_ok=True) |
182 | 181 |
|
183 | | - # save all files in input_dir, except model weights and mapping, to output_dir |
184 | | - # this is useful to preserve the tokenizer, configs, license, etc. |
185 | | - copy_files( |
186 | | - self._checkpoint_dir, |
187 | | - Path.joinpath(self._output_dir, BASE_MODEL_DIRNAME), |
188 | | - ignore_suffixes=SUFFIXES_TO_NOT_COPY, |
189 | | - ) |
190 | | - |
191 | 182 | # resume from adapter_model ckpt |
192 | 183 | self._adapter_checkpoint = get_adapter_checkpoint_path( |
193 | 184 | output_dir=self._output_dir, |
@@ -331,6 +322,14 @@ def save_checkpoint( |
331 | 322 | "Adapter checkpoint not found in state_dict. Please ensure that the state_dict contains adapter weights." |
332 | 323 | ) |
333 | 324 |
|
| 325 | + # Save all files in ckpt_dir, except model weights and mapping, to output_dir/epoch_{epoch} |
| 326 | + # So its easy to run inference with the model using this epoch's checkpoint |
| 327 | + copy_files( |
| 328 | + self._checkpoint_dir, |
| 329 | + Path.joinpath(self._output_dir, f"epoch_{epoch}"), |
| 330 | + ignore_suffixes=SUFFIXES_TO_NOT_COPY, |
| 331 | + ) |
| 332 | + |
334 | 333 | # If the recipe state needs to be output, first remove the model state dict |
335 | 334 | if intermediate_checkpoint: |
336 | 335 | _ = state_dict.pop(training.MODEL_KEY, None) |
@@ -435,14 +434,6 @@ def __init__( |
435 | 434 | Path.joinpath(self._checkpoint_dir, "config.json").read_text() |
436 | 435 | ) |
437 | 436 |
|
438 | | - # save all files in input_dir, except model weights and mapping, to output_dir |
439 | | - # this is useful to preserve the tokenizer, configs, license, etc. |
440 | | - copy_files( |
441 | | - self._checkpoint_dir, |
442 | | - Path.joinpath(self._output_dir, BASE_MODEL_DIRNAME), |
443 | | - ignore_suffixes=SUFFIXES_TO_NOT_COPY, |
444 | | - ) |
445 | | - |
446 | 437 | # repo_id is necessary for when saving an adapter config, so its compatible with HF. |
447 | 438 | # This json file is produced and saved in the download step. |
448 | 439 | # contents are {"repo_id": "some_model/some_model_version"} |
@@ -873,6 +864,14 @@ def save_checkpoint( |
873 | 864 | f"saved to {output_path}" |
874 | 865 | ) |
875 | 866 |
|
| 867 | + # Save all files in ckpt_dir, except model weights and mapping, to output_dir/epoch_{epoch} |
| 868 | + # So its easy to run inference with the model using this epoch's checkpoint |
| 869 | + copy_files( |
| 870 | + self._checkpoint_dir, |
| 871 | + Path.joinpath(self._output_dir, f"epoch_{epoch}"), |
| 872 | + ignore_suffixes=SUFFIXES_TO_NOT_COPY, |
| 873 | + ) |
| 874 | + |
876 | 875 | # If the recipe state needs to be output, first remove the model state dict |
877 | 876 | # and if it exists, remove the adapter state dict as well |
878 | 877 | if intermediate_checkpoint: |
@@ -966,14 +965,6 @@ def __init__( |
966 | 965 | self._output_dir = Path(output_dir) |
967 | 966 | self._output_dir.mkdir(parents=True, exist_ok=True) |
968 | 967 |
|
969 | | - # save all files in input_dir, except model weights and mapping, to output_dir |
970 | | - # this is useful to preserve the tokenizer, configs, license, etc. |
971 | | - copy_files( |
972 | | - self._checkpoint_dir, |
973 | | - Path.joinpath(self._output_dir, BASE_MODEL_DIRNAME), |
974 | | - ignore_suffixes=SUFFIXES_TO_NOT_COPY, |
975 | | - ) |
976 | | - |
977 | 968 | # resume from adapter_model ckpt |
978 | 969 | self._adapter_checkpoint = get_adapter_checkpoint_path( |
979 | 970 | output_dir=self._output_dir, |
@@ -1126,6 +1117,14 @@ def save_checkpoint( |
1126 | 1117 | "Adapter checkpoint not found in state_dict. Please ensure that the state_dict contains adapter weights." |
1127 | 1118 | ) |
1128 | 1119 |
|
| 1120 | + # Save all files in ckpt_dir, except model weights and mapping, to output_dir/epoch_{epoch} |
| 1121 | + # So its easy to run inference with the model using this epoch's checkpoint |
| 1122 | + copy_files( |
| 1123 | + self._checkpoint_dir, |
| 1124 | + Path.joinpath(self._output_dir, f"epoch_{epoch}"), |
| 1125 | + ignore_suffixes=SUFFIXES_TO_NOT_COPY, |
| 1126 | + ) |
| 1127 | + |
1129 | 1128 | # If the recipe state needs to be output, first remove the model state dict |
1130 | 1129 | # and if it exists, remove the adapter state dict as well |
1131 | 1130 | if intermediate_checkpoint: |
|
0 commit comments