Skip to content

Commit 6f01c17

Browse files
committed
Update tests
1 parent 0e0ccf0 commit 6f01c17

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

tests/torchtune/models/qwen2/test_qwen2_tokenizer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def test_tokenize_messages(self, messages):
5959
1506, 416, 114, 128, 1429, 182, 253, 82, 120, 163, 330, 105, 262, 13, 223, 1791, 155, 1551,
6060
171, 1951, 628, 296, 64, 237, 886, 1390, 130, 883, 1678, 447, 306, 279, 113, 11, 215, 785,
6161
215, 1951, 628, 378, 101, 66, 72, 593, 98, 984, 208, 1580, 167, 510, 737, 318, 1278, 13,
62-
2000] # noqa
62+
2002] # noqa
6363
# fmt: on
6464

6565
expected_mask = [True] * 67 + [False] * 121
@@ -69,8 +69,9 @@ def test_tokenize_messages(self, messages):
6969
formatted_messages = tokenizer.decode(tokens)
7070
expected_formatted_messages = (
7171
f"<|im_start|>user\n{messages[0].text_content}<|im_end|>\n"
72-
f"<|im_start|>assistant\n{messages[1].text_content}<|endoftext|>"
72+
f"<|im_start|>assistant\n{messages[1].text_content}<|im_end|>"
7373
)
74+
7475
assert expected_formatted_messages == formatted_messages
7576

7677
def test_tokenize_messages_gt_max_seq_len(self, messages):

tests/torchtune/models/qwen2_5/test_tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_tokenize_messages(self):
2929
expected_tokens = [
3030
151644, 82, 88, 479, 94, 56, 119, 230, 98, 374, 494, 1318, 249, 13, 151645, 94, 151644, 273, 105, 94,
3131
38, 229, 362, 98, 1695, 310, 1305, 165, 128, 432, 43, 44, 82, 13, 151645, 94, 151644, 397, 251, 249, 94,
32-
151643,
32+
151645,
3333
] # noqa
3434
# fmt: on
3535

@@ -39,7 +39,7 @@ def test_tokenize_messages(self):
3939
"<|im_start|>user\n"
4040
"Give me a short introduction to LLMs.<|im_end|>\n"
4141
"<|im_start|>assistant\n"
42-
"<|endoftext|>"
42+
"<|im_end|>"
4343
)
4444
_test_tokenize_messages(
4545
tokenizer,
@@ -62,7 +62,7 @@ def test_tool_call(self):
6262
151644, 82, 88, 479, 94, 64, 151645, 94, 151644, 273, 105, 94, 65, 151645, 94, 151644, 397, 251, 249,
6363
94, 151657, 94, 83, 269, 107, 330, 94, 151658, 151645, 94, 151644, 273, 105, 94, 27, 83, 1364,
6464
62, 237, 79, 102, 182, 29, 94, 83, 269, 706, 102, 182, 94, 1932, 83, 1364, 62, 237, 79, 102,
65-
182, 29, 151645, 94, 151644, 397, 251, 249, 94, 151643,
65+
182, 29, 151645, 94, 151644, 397, 251, 249, 94, 151645,
6666
] # noqa
6767
# fmt: on
6868

@@ -80,7 +80,7 @@ def test_tool_call(self):
8080
"test response\n"
8181
"</tool_response><|im_end|>\n"
8282
"<|im_start|>assistant\n"
83-
"<|endoftext|>"
83+
"<|im_end|>"
8484
)
8585
_test_tokenize_messages(
8686
tokenizer,

0 commit comments

Comments
 (0)