From 8b8c209e352899c870fe348013a99a91262bf1e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eldar=20Kurti=C4=87?= Date: Wed, 25 Jun 2025 21:08:03 +0200 Subject: [PATCH] static_scaled_fp8_quant should not run when scale.numel is not 1 (#20076) --- vllm/_custom_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index b16fef8714193..8ebe694eefd0e 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1276,7 +1276,7 @@ def scaled_fp8_quant( torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale) else: # num_token_padding not implemented for this case - assert (scale.numel() == 1 or num_token_padding is None) + assert (scale.numel() == 1 and num_token_padding is None) torch.ops._C.static_scaled_fp8_quant(output, input, scale) return output, scale