[Bugfix] Remove dead block_quant_to_tensor_quant function (#31294)

Co-authored-by: yurekami <yurekami@users.noreply.github.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-16 10:07:11 +08:00 · 2025-12-25 01:22:48 +08:00 · 2025-12-25 01:22:48 +08:00 · 506eb0f454
commit 506eb0f454
parent 5d93089686
1 changed files with 1 additions and 19 deletions
--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@ -15,10 +15,7 @@ from vllm import _custom_ops as ops
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
-from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    GroupShape,
-    group_broadcast,
-)
+from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
    CUTLASS_BLOCK_FP8_SUPPORTED,
 )
@ -463,21 +460,6 @@ def input_to_float8(
    return x_scl_sat.to(dtype).contiguous(), scale.float().reciprocal()


-def block_quant_to_tensor_quant(
-    x_q_block: torch.Tensor,
-    x_s: torch.Tensor,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """This function converts block-wise quantization to tensor-wise
-    quantization. The inputs are block-wise quantization tensor `x_q_block`,
-    block-wise quantization scale and the block size.
-    The outputs are tensor-wise quantization tensor and tensor-wise
-    quantization scale. Note only float8 is supported for now.
-    """
-    x_dq_block = group_broadcast(x_q_block, x_s)
-    x_q_tensor, scale = input_to_float8(x_dq_block, dtype=x_q_block.dtype)
-    return x_q_tensor, scale
-
-
@triton.jit
 def _per_token_group_quant_fp8(
    # Pointers to inputs and output