We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 21c4172 commit b8c8237Copy full SHA for b8c8237
pkg/llms_from_scratch/tests/test_qwen3.py
@@ -279,3 +279,11 @@ def test_tokenizer_equivalence():
279
280
assert tokenizer_ref.eos_token_id == tokenizer.eos_token_id
281
assert tokenizer_ref.pad_token_id == tokenizer.pad_token_id
282
+
283
+ assert tokenizer.encode("<|endoftext|>") == [tokenizer._special_to_id["<|endoftext|>"]]
284
+ assert tokenizer.encode("<|im_end|>") == [tokenizer._special_to_id["<|im_end|>"]]
285
286
+ expected_eos_token = "<|im_end|>" if "Base" not in repo_id else "<|endoftext|>"
287
+ expected_pad_token = "<|endoftext|>"
288
+ assert tokenizer.decode([tokenizer.eos_token_id]) == expected_eos_token
289
+ assert tokenizer.decode([tokenizer.pad_token_id]) == expected_pad_token
0 commit comments