[Bug] Enforce contiguous input for dynamic_scaled_fp8_quant and static_scaled_fp8_quant (#21773)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-12-10 15:04:53 +08:00 · 2025-07-28 15:55:48 -04:00 · 2025-07-28 15:55:48 -04:00 · e0e58f9729
commit e0e58f9729
parent b361f14e39
1 changed files with 3 additions and 2 deletions
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@ -1282,10 +1282,11 @@ def scaled_fp8_quant(
                output, input.contiguous(), scale, scale_ub)
        else:
            scale = torch.zeros(1, device=input.device, dtype=torch.float32)
-            torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
+            torch.ops._C.dynamic_scaled_fp8_quant(output, input.contiguous(),
                                                  scale)
    else:
        assert scale.numel() == 1, f"{scale.shape}"
-        torch.ops._C.static_scaled_fp8_quant(output, input, scale)
+        torch.ops._C.static_scaled_fp8_quant(output, input.contiguous(), scale)
    return output, scale