File tree Expand file tree Collapse file tree 2 files changed +21
-5
lines changed
Expand file tree Collapse file tree 2 files changed +21
-5
lines changed Original file line number Diff line number Diff line change @@ -174,6 +174,21 @@ def build_quant_cfg(
174174 quant_cfg ["quant_cfg" ]["*image*" ] = {"enable" : False }
175175 quant_cfg ["quant_cfg" ]["*vision*" ] = {"enable" : False }
176176
177+ # Qwen3 specific quantizer disabling patterns (thinker.model.layers only)
178+ if "qkv_disabled" in qformat :
179+ quant_cfg = copy .deepcopy (quant_cfg ) # Don't modify global config
180+ for proj in ["q_proj" , "k_proj" , "v_proj" ]:
181+ quant_cfg ["quant_cfg" ][f"*thinker.model.layers.*.self_attn.{ proj } *" ] = {
182+ "enable" : False
183+ }
184+ if "qkvo_disabled" in qformat :
185+ if "qkv_disabled" not in qformat : # Avoid double deepcopy
186+ quant_cfg = copy .deepcopy (quant_cfg )
187+ for proj in ["o_proj" ]:
188+ quant_cfg ["quant_cfg" ][f"*thinker.model.layers.*.self_attn.{ proj } *" ] = {
189+ "enable" : False
190+ }
191+
177192 return quant_cfg
178193
179194
Original file line number Diff line number Diff line change @@ -298,12 +298,8 @@ def main(args):
298298 use_seq_device_map = args .use_seq_device_map ,
299299 attn_implementation = args .attn_implementation ,
300300 )
301- else :
302- assert args .qformat in QUANT_CFG_CHOICES , (
303- f"Quantization format is not supported for low memory mode. Supported formats: { QUANT_CFG_CHOICES .keys ()} "
304- )
305- quant_cfg = QUANT_CFG_CHOICES [args .qformat ]
306301
302+ quant_cfg = QUANT_CFG_CHOICES [args .qformat ]
307303 # Qwen3 specific quantizer disabling patterns (thinker.model.layers only)
308304 if "qkv_disabled" in args .qformat :
309305 # Disable q_proj, k_proj, v_proj quantizers
@@ -325,6 +321,11 @@ def main(args):
325321 quant_cfg ["quant_cfg" ][f"*thinker.model.layers.{ i } .*" ] = {"enable" : False }
326322 for i in range (total_layers - n_layers_to_disable , total_layers ):
327323 quant_cfg ["quant_cfg" ][f"*thinker.model.layers.{ i } .*" ] = {"enable" : False }
324+ else :
325+ assert args .qformat in QUANT_CFG_CHOICES , (
326+ f"Quantization format is not supported for low memory mode. Supported formats: { QUANT_CFG_CHOICES .keys ()} "
327+ )
328+ quant_cfg = QUANT_CFG_CHOICES [args .qformat ]
328329
329330 if args .kv_cache_qformat != "none" :
330331 quant_cfg = mtq .utils .update_quant_cfg_with_kv_cache_quant (
You can’t perform that action at this time.
0 commit comments