From 206ab1f0df8286bb5453625bf6bb6e62dfa4dab7 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Wed, 1 Oct 2025 00:30:36 +0800 Subject: [PATCH] [bugfix][deepseek] fix flashmla kernel selection (#25956) Signed-off-by: youkaichao Signed-off-by: yewentao256 --- vllm/attention/ops/flashmla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/ops/flashmla.py b/vllm/attention/ops/flashmla.py index 3cc0e4adfa0a0..9654f9f6775ae 100644 --- a/vllm/attention/ops/flashmla.py +++ b/vllm/attention/ops/flashmla.py @@ -136,7 +136,7 @@ def flash_mla_with_kvcache( descale_k is None ), "descale_q and descale_k should be both None or both not None" - if (descale_q is not None) and (descale_k is not None): + if indices is None and q.element_size() == 1: out, softmax_lse = torch.ops._flashmla_extension_C.fwd_kvcache_mla_fp8( q, k_cache, head_dim_v, cache_seqlens, block_table, softmax_scale, causal, tile_scheduler_metadata, num_splits, descale_q, descale_k)