18
18
19
19
import importlib
20
20
import pytest
21
- import tiktoken
22
21
import torch
23
22
import torch .nn as nn
24
23
@@ -102,8 +101,8 @@ class RoPEConfig:
102
101
103
102
@pytest .fixture (scope = "session" )
104
103
def qwen3_weights_path (tmp_path_factory ):
105
- """Creates and saves a deterministic Llama3 model for testing."""
106
- path = tmp_path_factory .mktemp ("models" ) / "llama3_test_weights .pt"
104
+ """Creates and saves a deterministic model for testing."""
105
+ path = tmp_path_factory .mktemp ("models" ) / "qwen3_test_weights .pt"
107
106
108
107
if not path .exists ():
109
108
torch .manual_seed (123 )
@@ -122,26 +121,33 @@ def test_model_variants(ModelClass, qwen3_weights_path, generate_fn):
122
121
model .load_state_dict (torch .load (qwen3_weights_path ))
123
122
model .eval ()
124
123
125
- start_context = "Llamas eat"
124
+ tokenizer = Qwen3Tokenizer (
125
+ tokenizer_file_path = "tokenizer-base.json" ,
126
+ repo_id = "rasbt/qwen3-from-scratch" ,
127
+ add_generation_prompt = False ,
128
+ add_thinking = False
129
+ )
126
130
127
- tokenizer = tiktoken . get_encoding ( "gpt2" )
128
- encoded = tokenizer .encode (start_context )
129
- encoded_tensor = torch .tensor (encoded ). unsqueeze ( 0 )
131
+ prompt = "Give me a short introduction to large language models."
132
+ input_token_ids = tokenizer .encode (prompt )
133
+ input_token_ids = torch .tensor ([ input_token_ids ] )
130
134
131
135
print (f"\n { 50 * '=' } \n { 22 * ' ' } IN\n { 50 * '=' } " )
132
- print ("\n Input text:" , start_context )
133
- print ("Encoded input text:" , encoded )
134
- print ("encoded_tensor.shape:" , encoded_tensor .shape )
136
+ print ("\n Input text:" , prompt )
137
+ print ("Encoded input text:" , input_token_ids )
138
+ print ("encoded_tensor.shape:" , input_token_ids .shape )
135
139
136
140
out = generate_text_simple (
137
141
model = model ,
138
- idx = encoded_tensor ,
142
+ idx = input_token_ids ,
139
143
max_new_tokens = 5 ,
140
144
context_size = QWEN_CONFIG_06_B ["context_length" ]
141
145
)
142
146
print ("Encoded output text:" , out )
143
147
expect = torch .tensor ([
144
- [43 , 2543 , 292 , 4483 , 115206 , 459 , 43010 , 104223 , 55553 ]
148
+ [151644 , 872 , 198 , 35127 , 752 , 264 , 2805 , 16800 , 311 ,
149
+ 3460 , 4128 , 4119 , 13 , 151645 , 198 , 112120 , 83942 , 60483 ,
150
+ 102652 , 7414 ]
145
151
])
146
152
assert torch .equal (expect , out )
147
153
0 commit comments