From 8b8c209e352899c870fe348013a99a91262bf1e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eldar=20Kurti=C4=87?= <eldarkurtic314@gmail.com>
Date: Wed, 25 Jun 2025 21:08:03 +0200
Subject: [PATCH] static_scaled_fp8_quant should not run when scale.numel is
 not 1 (#20076)

---
 vllm/_custom_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index b16fef8714193..8ebe694eefd0e 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1276,7 +1276,7 @@ def scaled_fp8_quant(
             torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
     else:
         # num_token_padding not implemented for this case
-        assert (scale.numel() == 1 or num_token_padding is None)
+        assert (scale.numel() == 1 and num_token_padding is None)
         torch.ops._C.static_scaled_fp8_quant(output, input, scale)
 
     return output, scale