From 523dd610cbcf7b5494535f56f2422ede1f797a3c Mon Sep 17 00:00:00 2001 From: c0de128 Date: Sun, 21 Dec 2025 21:06:01 -0600 Subject: [PATCH] Fix dtype check in get_fp8_min_max helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback: Only apply the 224.0 override when both: 1. Platform supports fnuz (is_fp8_fnuz()) 2. The dtype is actually torch.float8_e4m3fnuz This prevents incorrect min/max values when a non-fnuz dtype is explicitly passed on a platform that supports fnuz. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 Signed-off-by: c0de128 --- vllm/model_executor/layers/quantization/utils/quant_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py index 9829972cc3657..d2eb768ede9a3 100644 --- a/vllm/model_executor/layers/quantization/utils/quant_utils.py +++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py @@ -36,7 +36,8 @@ def get_fp8_min_max(dtype: torch.dtype | None = None) -> tuple[float, float]: if dtype is None: dtype = FP8_DTYPE finfo = torch.finfo(dtype) - if current_platform.is_fp8_fnuz(): + # Only apply the 224.0 override for the actual fnuz dtype on fnuz platform + if current_platform.is_fp8_fnuz() and dtype == torch.float8_e4m3fnuz: return -224.0, 224.0 return finfo.min, finfo.max