From 506eb0f45454a07a21bf6d8731475be2a279277b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=82=86=E3=82=8A?= Date: Thu, 25 Dec 2025 01:22:48 +0800 Subject: [PATCH] [Bugfix] Remove dead `block_quant_to_tensor_quant` function (#31294) Co-authored-by: yurekami Co-authored-by: Claude Opus 4.5 --- .../layers/quantization/utils/fp8_utils.py | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index 8e4dde324f397..de6a1e8c1aa7d 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -15,10 +15,7 @@ from vllm import _custom_ops as ops from vllm._aiter_ops import rocm_aiter_ops from vllm.logger import init_logger from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 -from vllm.model_executor.layers.quantization.utils.quant_utils import ( - GroupShape, - group_broadcast, -) +from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( CUTLASS_BLOCK_FP8_SUPPORTED, ) @@ -463,21 +460,6 @@ def input_to_float8( return x_scl_sat.to(dtype).contiguous(), scale.float().reciprocal() -def block_quant_to_tensor_quant( - x_q_block: torch.Tensor, - x_s: torch.Tensor, -) -> tuple[torch.Tensor, torch.Tensor]: - """This function converts block-wise quantization to tensor-wise - quantization. The inputs are block-wise quantization tensor `x_q_block`, - block-wise quantization scale and the block size. - The outputs are tensor-wise quantization tensor and tensor-wise - quantization scale. Note only float8 is supported for now. - """ - x_dq_block = group_broadcast(x_q_block, x_s) - x_q_tensor, scale = input_to_float8(x_dq_block, dtype=x_q_block.dtype) - return x_q_tensor, scale - - @triton.jit def _per_token_group_quant_fp8( # Pointers to inputs and output