Skip to content

Commit d7c0682

Browse files
committed
switch to 1k instead of 4k
1 parent ed78541 commit d7c0682

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

recipes/configs/alpaca_llama2_finetune.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Dataset and Dataloader
2-
dataset: slimorca
3-
seed: 10
2+
dataset: alpaca
3+
seed: null
44
shuffle: True
55

66
# Model Arguments

torchtune/datasets/slimorca.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class SlimOrcaDataset(Dataset):
3636
3737
Keyword Arguments:
3838
max_token_length (int): Maximum number of tokens in the returned.
39-
Default is 4096.
39+
Default is 1024.
4040
4141
Data input format:
4242
[ { "from": "system", "value": "You are an AI assistant. You will be
@@ -63,7 +63,7 @@ class SlimOrcaDataset(Dataset):
6363
def __init__(self, tokenizer: Tokenizer, **kwargs) -> None:
6464
self._data = load_dataset("Open-Orca/SlimOrca-Dedup", split="train")
6565
self._tokenizer = tokenizer
66-
self._max_token_length = kwargs.get("max_token_length", 4096)
66+
self._max_token_length = kwargs.get("max_token_length", 1024)
6767
if self._max_token_length < 4:
6868
# Input token needs to have 1 bos, 1 eos,
6969
# and 1 token from prompt, 1 from label

0 commit comments

Comments
 (0)