From 13e211bbbc1ddc248e0bca6c889784bfa78e6aa2 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Fri, 3 Oct 2025 13:35:17 -0400 Subject: [PATCH] Avoid division by zero in cache DS MLA kernel (#26174) Signed-off-by: Matthew Bonanni Signed-off-by: yewentao256 --- csrc/cache_kernels.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/csrc/cache_kernels.cu b/csrc/cache_kernels.cu index 9a9fb8724c1e..0e1b358fdf62 100644 --- a/csrc/cache_kernels.cu +++ b/csrc/cache_kernels.cu @@ -16,7 +16,7 @@ #include #include -#include // FLT_MIN +#include #ifdef USE_ROCM #include @@ -479,6 +479,7 @@ __global__ void concat_and_cache_ds_mla_kernel( // Compute the scale for the tile float tile_scale = max_abs / 448.f; + tile_scale = fmaxf(tile_scale, FLT_MIN); // The first lane of each half-warp writes the scale to kv_cache if ((lane_idx == 0) || (lane_idx == 16)) {