|
13 | 13 | Qwen3Tokenizer
|
14 | 14 | )
|
15 | 15 | from llms_from_scratch.kv_cache.qwen3 import Qwen3Model as Qwen3ModelKV
|
| 16 | +from llms_from_scratch.kv_cache.utils import KVCache |
16 | 17 | from llms_from_scratch.kv_cache.generate import generate_text_simple as generate_text_simple_cached
|
17 | 18 |
|
18 | 19 | from llms_from_scratch.kv_cache_batched.qwen3 import Qwen3Model as Qwen3ModelKVBatched
|
@@ -50,6 +51,116 @@ def extra_repr(self):
|
50 | 51 | transformers_installed = importlib.util.find_spec("transformers") is not None
|
51 | 52 |
|
52 | 53 |
|
| 54 | +@pytest.fixture |
| 55 | +def dummy_input(): |
| 56 | + torch.manual_seed(123) |
| 57 | + return torch.randint(0, 100, (1, 8)) # batch size 1, seq length 8 |
| 58 | + |
| 59 | + |
| 60 | +@pytest.fixture |
| 61 | +def dummy_cfg_base(): |
| 62 | + return { |
| 63 | + "vocab_size": 100, |
| 64 | + "emb_dim": 32, |
| 65 | + "hidden_dim": 64, |
| 66 | + "n_layers": 2, |
| 67 | + "n_heads": 4, |
| 68 | + "head_dim": 8, |
| 69 | + "n_kv_groups": 1, |
| 70 | + "qk_norm": False, |
| 71 | + "dtype": torch.float32, |
| 72 | + "rope_base": 10000, |
| 73 | + "context_length": 64, |
| 74 | + "num_experts": 0, |
| 75 | + } |
| 76 | + |
| 77 | + |
| 78 | +@pytest.fixture |
| 79 | +def dummy_cfg_moe(dummy_cfg_base): |
| 80 | + cfg = dummy_cfg_base.copy() |
| 81 | + cfg.update({ |
| 82 | + "num_experts": 4, |
| 83 | + "num_experts_per_tok": 2, |
| 84 | + "moe_intermediate_size": 64, |
| 85 | + }) |
| 86 | + return cfg |
| 87 | + |
| 88 | + |
| 89 | +def test_dummy_qwen3_forward(dummy_cfg_base, dummy_input): |
| 90 | + model = Qwen3Model(dummy_cfg_base) |
| 91 | + out = model(dummy_input) |
| 92 | + assert out.shape == (1, dummy_input.size(1), dummy_cfg_base["vocab_size"]), \ |
| 93 | + f"Expected shape (1, seq_len, vocab_size), got {out.shape}" |
| 94 | + |
| 95 | + |
| 96 | +def test_dummy_qwen3_moe_forward(dummy_cfg_moe, dummy_input): |
| 97 | + model = Qwen3Model(dummy_cfg_moe) |
| 98 | + out = model(dummy_input) |
| 99 | + assert out.shape == (1, dummy_input.size(1), dummy_cfg_moe["vocab_size"]), \ |
| 100 | + f"Expected shape (1, seq_len, vocab_size), got {out.shape}" |
| 101 | + assert any(hasattr(block.ff, 'gate') for block in model.trf_blocks), \ |
| 102 | + "Expected MoEFeedForward in at least one transformer block" |
| 103 | + |
| 104 | + |
| 105 | +def test_qwen3_base_kvcache_equivalence(dummy_cfg_base): |
| 106 | + model_regular = Qwen3Model(dummy_cfg_base) |
| 107 | + model_regular.eval() |
| 108 | + |
| 109 | + model_kv = Qwen3ModelKV(dummy_cfg_base) |
| 110 | + model_kv.eval() |
| 111 | + model_kv.load_state_dict(model_regular.state_dict()) # ensure same weights |
| 112 | + |
| 113 | + model_kv.reset_kv_cache() |
| 114 | + cache = KVCache(n_layers=dummy_cfg_base["n_layers"]) |
| 115 | + |
| 116 | + torch.manual_seed(123) |
| 117 | + input_ids = torch.randint(0, dummy_cfg_base["vocab_size"], (1, 6)) # batch_size=1, seq_len=6 |
| 118 | + |
| 119 | + # full-sequence output |
| 120 | + out_full = model_regular(input_ids) |
| 121 | + |
| 122 | + # stepwise with KV cache |
| 123 | + logits_stepwise = [] |
| 124 | + for t in range(input_ids.size(1)): |
| 125 | + input_token = input_ids[:, t:t + 1] # shape (1,1) |
| 126 | + logits = model_kv(input_token, cache=cache) |
| 127 | + logits_stepwise.append(logits) |
| 128 | + |
| 129 | + out_kv = torch.cat(logits_stepwise, dim=1) |
| 130 | + |
| 131 | + assert out_full.shape == out_kv.shape, f"Shape mismatch: {out_full.shape} vs {out_kv.shape}" |
| 132 | + assert torch.allclose(out_full, out_kv, atol=1e-5, rtol=1e-3) |
| 133 | + |
| 134 | + |
| 135 | +@pytest.mark.parametrize("cfg_name", ["dummy_cfg_base", "dummy_cfg_moe"]) |
| 136 | +def test_qwen3_moe_kvcache_equivalence(cfg_name): |
| 137 | + model_regular = Qwen3Model(cfg_name) |
| 138 | + model_regular.eval() |
| 139 | + |
| 140 | + torch.manual_seed(123) |
| 141 | + input_ids = torch.randint(0, cfg_name["vocab_size"], (1, 6)) # batch_size=1, seq_len=6 |
| 142 | + |
| 143 | + # No KV cache forward |
| 144 | + out_full = model_regular(input_ids) |
| 145 | + |
| 146 | + # Now with KV cache |
| 147 | + model_kv = Qwen3ModelKV(cfg_name) |
| 148 | + model_kv.eval() |
| 149 | + model_kv.reset_kv_cache() |
| 150 | + cache = KVCache(n_layers=cfg_name["n_layers"]) |
| 151 | + |
| 152 | + logits_stepwise = [] |
| 153 | + for t in range(input_ids.size(1)): |
| 154 | + input_token = input_ids[:, t:t+1] # shape (1, 1) |
| 155 | + logits = model_kv(input_token, cache=cache) |
| 156 | + logits_stepwise.append(logits) |
| 157 | + |
| 158 | + # Concatenate all stepwise outputs |
| 159 | + out_kv = torch.cat(logits_stepwise, dim=1) |
| 160 | + |
| 161 | + assert torch.allclose(out_full, out_kv, atol=1e-5, rtol=1e-3) |
| 162 | + |
| 163 | + |
53 | 164 | @pytest.mark.skipif(not transformers_installed, reason="transformers not installed")
|
54 | 165 | def test_rope():
|
55 | 166 |
|
|
0 commit comments