From 03416eada6c01770fb71c3d988fc3c74958d8f5e Mon Sep 17 00:00:00 2001
From: haoyangli-amd <lihaoyang0109@gmail.com>
Date: Tue, 9 Dec 2025 19:28:50 +0800
Subject: [PATCH] [bugfix][quantization] Fix fp8 per_tensor scale shape
 (#30257)

Signed-off-by: Haoyang Li <lihaoyang0109@gmail.com>
---
 vllm/_custom_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index 6bbfe11b6e925..6d862c5812560 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1726,7 +1726,7 @@ def scaled_fp8_quant(
                 output, input, scale, scale_ub
             )
         else:
-            scale = torch.empty((1, 1), device=input.device, dtype=torch.float32)
+            scale = torch.empty(1, device=input.device, dtype=torch.float32)
             torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
     else:
         assert scale.numel() == 1, f"{scale.shape}"