From 03416eada6c01770fb71c3d988fc3c74958d8f5e Mon Sep 17 00:00:00 2001 From: haoyangli-amd Date: Tue, 9 Dec 2025 19:28:50 +0800 Subject: [PATCH] [bugfix][quantization] Fix fp8 per_tensor scale shape (#30257) Signed-off-by: Haoyang Li --- vllm/_custom_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 6bbfe11b6e925..6d862c5812560 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1726,7 +1726,7 @@ def scaled_fp8_quant( output, input, scale, scale_ub ) else: - scale = torch.empty((1, 1), device=input.device, dtype=torch.float32) + scale = torch.empty(1, device=input.device, dtype=torch.float32) torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale) else: assert scale.numel() == 1, f"{scale.shape}"