rasbt · rasbt · Sep 11, 2025 · Sep 11, 2025
diff --git a/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb b/ch05/11_qwen3/standalone-qwen3-moe-plus-kvcache.ipynb
@@ -496,15 +496,13 @@
     "        # Shape (1, 1, num_tokens, num_tokens) to broadcast across batch and heads\n",
     "        mask = mask[None, None, :, :]\n",
     "\n",
-    "        next_cache = []\n",
     "        for i, block in enumerate(self.trf_blocks):\n",
     "            blk_cache = cache.get(i) if cache else None\n",
     "            x, new_blk_cache = block(x, mask, self.cos, self.sin,\n",
     "                                     start_pos=pos_start,\n",
     "                                     cache=blk_cache)\n",
     "            if cache is not None:\n",
     "                cache.update(i, new_blk_cache)\n",
-    "            next_cache.append(new_blk_cache)\n",
     "\n",
     "        x = self.final_norm(x)\n",
     "        logits = self.out_head(x.to(self.cfg[\"dtype\"]))\n",

diff --git a/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb b/ch05/11_qwen3/standalone-qwen3-plus-kvcache.ipynb
@@ -422,15 +422,13 @@
     "        # Shape (1, 1, num_tokens, num_tokens) to broadcast across batch and heads\n",
     "        mask = mask[None, None, :, :]\n",
     "\n",
-    "        next_cache = []\n",
     "        for i, block in enumerate(self.trf_blocks):\n",
     "            blk_cache = cache.get(i) if cache else None\n",
     "            x, new_blk_cache = block(x, mask, self.cos, self.sin,\n",
     "                                     start_pos=pos_start,\n",
     "                                     cache=blk_cache)\n",
     "            if cache is not None:\n",
     "                cache.update(i, new_blk_cache)\n",
-    "            next_cache.append(new_blk_cache)\n",
     "\n",
     "        x = self.final_norm(x)\n",
     "        logits = self.out_head(x.to(self.cfg[\"dtype\"]))\n",

diff --git a/pkg/llms_from_scratch/kv_cache/gpt2.py b/pkg/llms_from_scratch/kv_cache/gpt2.py
@@ -177,13 +177,11 @@ def forward(self, in_idx, use_cache=False, cache=None):
         else:
             start_pos = 0
 
-        next_cache = []
         for i, block in enumerate(self.trf_blocks):
             blk_cache = cache.get(i) if cache else None
             x, new_cache = block(x, use_cache=use_cache, start_pos=start_pos, cache=blk_cache)
             if cache:
                 cache.update(i, new_cache)
-            next_cache.append(new_cache)
 
         x = self.final_norm(x)
         logits = self.out_head(x)

diff --git a/pkg/llms_from_scratch/kv_cache/llama3.py b/pkg/llms_from_scratch/kv_cache/llama3.py
@@ -97,15 +97,13 @@ def forward(self, in_idx, cache=None):
         # Shape (1, 1, num_tokens, num_tokens) to broadcast across batch and heads
         mask = mask[None, None, :, :]
 
-        next_cache = []
         for i, block in enumerate(self.trf_blocks):
             blk_cache = cache.get(i) if cache else None
             x, new_blk_cache = block(x, mask, self.cos, self.sin,
                                      start_pos=pos_start,
                                      cache=blk_cache)
             if cache is not None:
                 cache.update(i, new_blk_cache)
-            next_cache.append(new_blk_cache)
 
         x = self.final_norm(x)
         logits = self.out_head(x.to(self.cfg["dtype"]))

diff --git a/pkg/llms_from_scratch/kv_cache/qwen3.py b/pkg/llms_from_scratch/kv_cache/qwen3.py
@@ -65,15 +65,13 @@ def forward(self, in_idx, cache=None):
         # Shape (1, 1, num_tokens, num_tokens) to broadcast across batch and heads
         mask = mask[None, None, :, :]
 
-        next_cache = []
         for i, block in enumerate(self.trf_blocks):
             blk_cache = cache.get(i) if cache else None
             x, new_blk_cache = block(x, mask, self.cos, self.sin,
                                      start_pos=pos_start,
                                      cache=blk_cache)
             if cache is not None:
                 cache.update(i, new_blk_cache)
-            next_cache.append(new_blk_cache)
 
         x = self.final_norm(x)
         logits = self.out_head(x.to(self.cfg["dtype"]))