From 523dd610cbcf7b5494535f56f2422ede1f797a3c Mon Sep 17 00:00:00 2001
From: c0de128 <kevin.mckay@outlook.com>
Date: Sun, 21 Dec 2025 21:06:01 -0600
Subject: [PATCH] Fix dtype check in get_fp8_min_max helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review feedback: Only apply the 224.0 override when both:
1. Platform supports fnuz (is_fp8_fnuz())
2. The dtype is actually torch.float8_e4m3fnuz

This prevents incorrect min/max values when a non-fnuz dtype is
explicitly passed on a platform that supports fnuz.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Signed-off-by: c0de128 <kevin.mckay@outlook.com>
---
 vllm/model_executor/layers/quantization/utils/quant_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py
index 9829972cc3657..d2eb768ede9a3 100644
--- a/vllm/model_executor/layers/quantization/utils/quant_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py
@@ -36,7 +36,8 @@ def get_fp8_min_max(dtype: torch.dtype | None = None) -> tuple[float, float]:
     if dtype is None:
         dtype = FP8_DTYPE
     finfo = torch.finfo(dtype)
-    if current_platform.is_fp8_fnuz():
+    # Only apply the 224.0 override for the actual fnuz dtype on fnuz platform
+    if current_platform.is_fp8_fnuz() and dtype == torch.float8_e4m3fnuz:
         return -224.0, 224.0
     return finfo.min, finfo.max