Skip to content

Commit 8410674

Browse files
committed
Update logic to disable quantizers
Signed-off-by: ajrasane <[email protected]>
1 parent e4b374a commit 8410674

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

examples/llm_ptq/example_utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,21 @@ def build_quant_cfg(
174174
quant_cfg["quant_cfg"]["*image*"] = {"enable": False}
175175
quant_cfg["quant_cfg"]["*vision*"] = {"enable": False}
176176

177+
# Qwen3 specific quantizer disabling patterns (thinker.model.layers only)
178+
if "qkv_disabled" in qformat:
179+
quant_cfg = copy.deepcopy(quant_cfg) # Don't modify global config
180+
for proj in ["q_proj", "k_proj", "v_proj"]:
181+
quant_cfg["quant_cfg"][f"*thinker.model.layers.*.self_attn.{proj}*"] = {
182+
"enable": False
183+
}
184+
if "qkvo_disabled" in qformat:
185+
if "qkv_disabled" not in qformat: # Avoid double deepcopy
186+
quant_cfg = copy.deepcopy(quant_cfg)
187+
for proj in ["o_proj"]:
188+
quant_cfg["quant_cfg"][f"*thinker.model.layers.*.self_attn.{proj}*"] = {
189+
"enable": False
190+
}
191+
177192
return quant_cfg
178193

179194

examples/llm_ptq/hf_ptq.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -298,12 +298,8 @@ def main(args):
298298
use_seq_device_map=args.use_seq_device_map,
299299
attn_implementation=args.attn_implementation,
300300
)
301-
else:
302-
assert args.qformat in QUANT_CFG_CHOICES, (
303-
f"Quantization format is not supported for low memory mode. Supported formats: {QUANT_CFG_CHOICES.keys()}"
304-
)
305-
quant_cfg = QUANT_CFG_CHOICES[args.qformat]
306301

302+
quant_cfg = QUANT_CFG_CHOICES[args.qformat]
307303
# Qwen3 specific quantizer disabling patterns (thinker.model.layers only)
308304
if "qkv_disabled" in args.qformat:
309305
# Disable q_proj, k_proj, v_proj quantizers
@@ -325,6 +321,11 @@ def main(args):
325321
quant_cfg["quant_cfg"][f"*thinker.model.layers.{i}.*"] = {"enable": False}
326322
for i in range(total_layers - n_layers_to_disable, total_layers):
327323
quant_cfg["quant_cfg"][f"*thinker.model.layers.{i}.*"] = {"enable": False}
324+
else:
325+
assert args.qformat in QUANT_CFG_CHOICES, (
326+
f"Quantization format is not supported for low memory mode. Supported formats: {QUANT_CFG_CHOICES.keys()}"
327+
)
328+
quant_cfg = QUANT_CFG_CHOICES[args.qformat]
328329

329330
if args.kv_cache_qformat != "none":
330331
quant_cfg = mtq.utils.update_quant_cfg_with_kv_cache_quant(

0 commit comments

Comments
 (0)