From e0e58f9729e739d857a5ed0d11fc80ea9aa21087 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Mon, 28 Jul 2025 15:55:48 -0400 Subject: [PATCH] [Bug] Enforce contiguous input for `dynamic_scaled_fp8_quant` and `static_scaled_fp8_quant` (#21773) Signed-off-by: yewentao256 --- vllm/_custom_ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index cf296a3b534bc..35345b1be01c2 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1282,10 +1282,11 @@ def scaled_fp8_quant( output, input.contiguous(), scale, scale_ub) else: scale = torch.zeros(1, device=input.device, dtype=torch.float32) - torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale) + torch.ops._C.dynamic_scaled_fp8_quant(output, input.contiguous(), + scale) else: assert scale.numel() == 1, f"{scale.shape}" - torch.ops._C.static_scaled_fp8_quant(output, input, scale) + torch.ops._C.static_scaled_fp8_quant(output, input.contiguous(), scale) return output, scale