@@ -29,7 +29,7 @@ def test_tokenize_messages(self):
2929 expected_tokens = [
3030 151644 , 82 , 88 , 479 , 94 , 56 , 119 , 230 , 98 , 374 , 494 , 1318 , 249 , 13 , 151645 , 94 , 151644 , 273 , 105 , 94 ,
3131 38 , 229 , 362 , 98 , 1695 , 310 , 1305 , 165 , 128 , 432 , 43 , 44 , 82 , 13 , 151645 , 94 , 151644 , 397 , 251 , 249 , 94 ,
32- 151643 ,
32+ 151645 ,
3333 ] # noqa
3434 # fmt: on
3535
@@ -39,7 +39,7 @@ def test_tokenize_messages(self):
3939 "<|im_start|>user\n "
4040 "Give me a short introduction to LLMs.<|im_end|>\n "
4141 "<|im_start|>assistant\n "
42- "<|endoftext |>"
42+ "<|im_end |>"
4343 )
4444 _test_tokenize_messages (
4545 tokenizer ,
@@ -62,7 +62,7 @@ def test_tool_call(self):
6262 151644 , 82 , 88 , 479 , 94 , 64 , 151645 , 94 , 151644 , 273 , 105 , 94 , 65 , 151645 , 94 , 151644 , 397 , 251 , 249 ,
6363 94 , 151657 , 94 , 83 , 269 , 107 , 330 , 94 , 151658 , 151645 , 94 , 151644 , 273 , 105 , 94 , 27 , 83 , 1364 ,
6464 62 , 237 , 79 , 102 , 182 , 29 , 94 , 83 , 269 , 706 , 102 , 182 , 94 , 1932 , 83 , 1364 , 62 , 237 , 79 , 102 ,
65- 182 , 29 , 151645 , 94 , 151644 , 397 , 251 , 249 , 94 , 151643 ,
65+ 182 , 29 , 151645 , 94 , 151644 , 397 , 251 , 249 , 94 , 151645 ,
6666 ] # noqa
6767 # fmt: on
6868
@@ -80,7 +80,7 @@ def test_tool_call(self):
8080 "test response\n "
8181 "</tool_response><|im_end|>\n "
8282 "<|im_start|>assistant\n "
83- "<|endoftext |>"
83+ "<|im_end |>"
8484 )
8585 _test_tokenize_messages (
8686 tokenizer ,
0 commit comments