fix

Kartikay Khandelwal · Kartikay Khandelwal · commit def434078d84 · 2024-03-31T11:57:31.000-07:00
diff --git a/recipes/configs/generate.yaml b/recipes/configs/generate.yaml
@@ -11,9 +11,8 @@ checkpointer:
     pytorch_model-00002-of-00003.bin,
     pytorch_model-00003-of-00003.bin
   ]
-  recipe_checkpoint: null
   output_dir: /tmp/Llama-2-13b-hf/
-  model_type: MISTRAL
+  model_type: LLAMA2
 
 device: cuda
 dtype: bf16
@@ -22,7 +21,7 @@ seed: 1234
 
 # Tokenizer arguments
 tokenizer:
-  _component_: torchtune.models.mistral.mistral_tokenizer
+  _component_: torchtune.models.llama2.llama2_tokenizer
   path: /tmp/Llama-2-13b-hf/tokenizer.model
 
 # Generation arguments; defaults taken from gpt-fast
diff --git a/recipes/generate.py b/recipes/generate.py
@@ -18,6 +18,13 @@
 
 
 class InferenceRecipe:
+    """
+    Recipe for generating tokens from a dense Transformer-based LLM.
+
+    Currently this recipe support single-GPU generation only. Speculative
+    decoding is not supported.
+    """
+
     def __init__(self, cfg: DictConfig) -> None:
         self._device = utils.get_device(device=cfg.device)
         self._dtype = utils.get_dtype(dtype=cfg.dtype)