From a29e62ea3452bc6b1d4f3eeac2dc9a6b30357c4d Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Sat, 28 Jun 2025 14:48:13 +0900
Subject: [PATCH] Fix num_token_padding support for static per-tensor
 scaled_fp8_quant (#20188)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 vllm/_custom_ops.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index 215f35bad34d9..51900de1cc099 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1274,8 +1274,7 @@ def scaled_fp8_quant(
             scale = torch.zeros(1, device=input.device, dtype=torch.float32)
             torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
     else:
-        # num_token_padding not implemented for this case
-        assert (scale.numel() == 1 and num_token_padding is None)
+        assert scale.numel() == 1
         torch.ops._C.static_scaled_fp8_quant(output, input, scale)
 
     return output, scale