From 26fd8ca33c18f668c95e4328fba735c4881e59c0 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Fri, 20 Jun 2025 14:40:21 +0000 Subject: [PATCH] fixes Signed-off-by: Tyler Michael Smith --- vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py index 758cd7c56f71f..a92125a6faba9 100644 --- a/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py @@ -105,7 +105,7 @@ def silu_mul_fp8_quant_deep_gemm( y: torch.Tensor, # (E, T, 2*H) float32 tokens_per_expert: torch.Tensor, # (E,) number of valid tokens per expert group_size: int = 128, - eps: float = 1e-6, + eps: float = 1e-10, ): """Quantize silu(y[..., :H]) * y[..., H:] to FP8 with group per-token scales @@ -152,7 +152,7 @@ def silu_mul_fp8_quant_deep_gemm( f_info = torch.finfo(fp8_dtype) fp8_max = f_info.max - fp8_min = -f_info.max + fp8_min = f_info.min _silu_mul_fp8_quant_deep_gemm[grid]( y,