Update logic to disable quantizers

ajrasane · ajrasane · commit 841067464461 · 2025-12-17T08:22:08.000Z
Signed-off-by: ajrasane &lt;131806219+ajrasane@users.noreply.github.com&gt;
diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
@@ -174,6 +174,21 @@ def build_quant_cfg(
             quant_cfg["quant_cfg"]["*image*"] = {"enable": False}
             quant_cfg["quant_cfg"]["*vision*"] = {"enable": False}
 
+        # Qwen3 specific quantizer disabling patterns (thinker.model.layers only)
+        if "qkv_disabled" in qformat:
+            quant_cfg = copy.deepcopy(quant_cfg)  # Don't modify global config
+            for proj in ["q_proj", "k_proj", "v_proj"]:
+                quant_cfg["quant_cfg"][f"*thinker.model.layers.*.self_attn.{proj}*"] = {
+                    "enable": False
+                }
+        if "qkvo_disabled" in qformat:
+            if "qkv_disabled" not in qformat:  # Avoid double deepcopy
+                quant_cfg = copy.deepcopy(quant_cfg)
+            for proj in ["o_proj"]:
+                quant_cfg["quant_cfg"][f"*thinker.model.layers.*.self_attn.{proj}*"] = {
+                    "enable": False
+                }
+
     return quant_cfg
 
 
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -298,12 +298,8 @@ def main(args):
             use_seq_device_map=args.use_seq_device_map,
             attn_implementation=args.attn_implementation,
         )
-    else:
-        assert args.qformat in QUANT_CFG_CHOICES, (
-            f"Quantization format is not supported for low memory mode. Supported formats: {QUANT_CFG_CHOICES.keys()}"
-        )
-        quant_cfg = QUANT_CFG_CHOICES[args.qformat]
 
+        quant_cfg = QUANT_CFG_CHOICES[args.qformat]
         # Qwen3 specific quantizer disabling patterns (thinker.model.layers only)
         if "qkv_disabled" in args.qformat:
             # Disable q_proj, k_proj, v_proj quantizers
@@ -325,6 +321,11 @@ def main(args):
                 quant_cfg["quant_cfg"][f"*thinker.model.layers.{i}.*"] = {"enable": False}
             for i in range(total_layers - n_layers_to_disable, total_layers):
                 quant_cfg["quant_cfg"][f"*thinker.model.layers.{i}.*"] = {"enable": False}
+    else:
+        assert args.qformat in QUANT_CFG_CHOICES, (
+            f"Quantization format is not supported for low memory mode. Supported formats: {QUANT_CFG_CHOICES.keys()}"
+        )
+        quant_cfg = QUANT_CFG_CHOICES[args.qformat]
 
         if args.kv_cache_qformat != "none":
             quant_cfg = mtq.utils.update_quant_cfg_with_kv_cache_quant(