From cbfcff373b2d45b918f2d1e5eb721504a2cb7bcf Mon Sep 17 00:00:00 2001
From: vllmellm <vllm.ellm@embeddedllm.com>
Date: Tue, 18 Nov 2025 07:22:05 +0000
Subject: [PATCH] add dynamic per tensor fallback

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
---
 vllm/model_executor/layers/quantization/fp8.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
index 67e5b65de6010..225ed9499fd4d 100644
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -77,6 +77,7 @@ from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
     is_layer_skipped,
+    kFp8DynamicTensorSym,
     kFp8DynamicTokenSym,
     kFp8StaticTensorSym,
 )
@@ -381,9 +382,12 @@ class Fp8LinearMethod(LinearMethodBase):
             if not self.act_q_static and cutlass_fp8_supported():
                 self.act_q_group_shape = GroupShape.PER_TOKEN
                 self.activation_quant_key = kFp8DynamicTokenSym
-            else:
+            elif self.act_q_static:
                 self.act_q_group_shape = GroupShape.PER_TENSOR
                 self.activation_quant_key = kFp8StaticTensorSym
+            else:
+                self.act_q_group_shape = GroupShape.PER_TENSOR
+                self.activation_quant_key = kFp8DynamicTensorSym
 
         if self.block_quant:
             assert not self.act_q_static