remove unused ENUM

Fridah-nv · Fridah-nv · commit f4463a5f8eb2 · 2025-08-26T23:53:42.000Z
Signed-off-by: Frida Hou &lt;201670829+Fridah-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/custom_ops/torch_quant.py b/tensorrt_llm/_torch/auto_deploy/custom_ops/torch_quant.py
@@ -8,14 +8,6 @@
 
 from .quant import QUANT_LINEAR_OPS, QUANT_OPS
 
-# ===== Enums =====
-FORMAT_FP8 = 0
-FORMAT_NVFP4 = 1
-
-# scale layouts
-PER_TENSOR = 0
-PER_CHANNEL_OUT = 1
-
 # FP4 tables (E2M1)
 e2m1_bounds = torch.tensor([0.25, 0.75, 1.25, 1.75, 2.5, 3.5, 5])
 e2m1_values = torch.tensor([0, 0.5, 1, 1.5, 2, 3, 4, 6, 0, -0.5, -1, -1.5, -2, -3, -4, -6])
@@ -179,7 +171,6 @@ def torch_fake_quant_fp8_linear(
     For FP8:
       - input_scale[0] and weight_scale[0] are required (amax/448 style)
       - input_zp / weight_zp ignored
-      - supports PER_TENSOR and PER_CHANNEL_OUT for weights
     """
     if weight_quantized.dtype != torch.float8_e4m3fn:
         raise TypeError("FP8 path requires weight_quantized.dtype == float8_e4m3fn")
diff --git a/tensorrt_llm/_torch/auto_deploy/utils/quantization_utils.py b/tensorrt_llm/_torch/auto_deploy/utils/quantization_utils.py
@@ -27,10 +27,6 @@
 except ImportError:
     float4_sf_dtype = None
 
-# TODO: put the ENUMs in the same place and import it
-FORMAT_FP8 = 0
-FORMAT_NVFP4 = 1
-
 
 def modelopt_fp4_scale_to_cutlass_fp4_scale(modelopt_scale: torch.Tensor) -> torch.Tensor:
     """Converts the modelopt FP4 per-block weight scale to the cutlass format (padded and swizzled)."""
@@ -185,14 +181,11 @@ def default_scales(original_weight_shape: Tuple) -> Dict[str, torch.Tensor]:
     def build_custom_kwargs_for_linear(
         scale_getattrs: Dict[str, Node],
     ) -> Dict[str, object]:
-        # FP8 custom op contract:
-        #   input_scale=[tensor], weight_scale=[tensor], input_zp=[], weight_zp=[], format_type=FORMAT_FP8
         return dict(
             input_scale=[scale_getattrs["input_scale"]],
             weight_scale=[scale_getattrs["weight_scale"]],
             input_zp=[],
             weight_zp=[],
-            # format_type=FORMAT_FP8,
         )
 
     @staticmethod
@@ -280,15 +273,13 @@ def build_custom_kwargs_for_linear(
               weight_scale=[weight_scale_cutlass_uint8, alpha_fused],
               input_zp=[],
               weight_zp=[],
-              format_type=FORMAT_NVFP4
           )
         """
         return dict(
             input_scale=[scale_getattrs["input_scale"]],
             weight_scale=[scale_getattrs["weight_scale"], scale_getattrs["alpha"]],
             input_zp=[],
             weight_zp=[],
-            # format_type=FORMAT_NVFP4,
         )
 
     @staticmethod
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_quant.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/custom_ops/test_quant.py
@@ -8,10 +8,6 @@
 
 torch.manual_seed(0)
 
-scaling_vector_size = 16
-FORMAT_FP8 = 0
-FORMAT_NVFP4 = 1
-
 SCALING_VECTOR_SIZE = 16  # NVFP4 block size along K
 
 
@@ -51,7 +47,7 @@ def test_fp4_linear():
     weight_scale_2 = fp4_global_scale(weight)
 
     weight_fp4, weight_scale = torch.ops.trtllm.fp4_quantize(
-        weight, weight_scale_2, scaling_vector_size, False
+        weight, weight_scale_2, SCALING_VECTOR_SIZE, False
     )
 
     output_fp4_gemm = torch.ops.auto_deploy.torch_quant_fp4_linear(