diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index c093a9bfc4a60..20e7b444856e3 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -378,8 +378,6 @@ def per_token_group_quant_fp8( is supported for now. column_major_scales: Outputs scales in column major. out_q: Optional output tensor. If not provided, function will create. - tuple[torch.Tensor, torch.Tensor]: The quantized tensor and the - scaling factor for quantization. Returns: tuple[torch.Tensor, torch.Tensor]: The quantized tensor and the scaling factor.