[bugfix][quantization] Fix fp8 per_tensor scale shape (#30257)

Signed-off-by: Haoyang Li <lihaoyang0109@gmail.com>
2026-05-13 16:00:19 +08:00 · 2025-12-09 19:28:50 +08:00 · 2025-12-09 19:28:50 +08:00 · 03416eada6
commit 03416eada6
parent c72ea10723
1 changed files with 1 additions and 1 deletions
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@ -1726,7 +1726,7 @@ def scaled_fp8_quant(
                output, input, scale, scale_ub
            )
        else:
-            scale = torch.empty((1, 1), device=input.device, dtype=torch.float32)
+            scale = torch.empty(1, device=input.device, dtype=torch.float32)
            torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
    else:
        assert scale.numel() == 1, f"{scale.shape}"