add checks in trtllm gen fused moe module

pamelap-nvidia · pamelap-nvidia · commit 0565af133a0b · 2025-08-20T15:18:56.000Z
Signed-off-by: Pamela &lt;179191831+pamelap-nvidia@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_trtllm_gen.py b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_trtllm_gen.py
@@ -4,7 +4,7 @@
 
 from ...distributed.ops import reducescatter
 from ...model_config import ModelConfig
-from ...utils import Fp4QuantizedTensor
+from ...utils import Fp4QuantizedTensor, get_sm_version
 from .interface import MoE, MoEWeightLoadingMode
 from .quantization import (DeepSeekFP8BlockScalesFusedMoEMethod,
                            NVFP4TRTLLMGenFusedMoEMethod)
@@ -68,6 +68,11 @@ def __init__(
             weight_loading_mode=weight_loading_mode,
         )
 
+        sm_version = get_sm_version()
+        if sm_version >= 120:
+            raise NotImplementedError(
+                "TRTLLMGenFusedMoE does not support SM120 and above.")
+
         assert not self.smart_router, "Smart router is not supported in TRTLLMGenFusedMoE."
 
         self.num_slots = self.num_experts