From d698bb382db95af6b8836936eb0dfae71c791d11 Mon Sep 17 00:00:00 2001 From: Jingchun Gao <63247409+gjc0824@users.noreply.github.com> Date: Fri, 5 Dec 2025 13:54:31 +0800 Subject: [PATCH] [Bugfix] Correct num_q_heads on DCP for Flashinfer backends (#29487) Signed-off-by: Jingchun Gao Signed-off-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com> Co-authored-by: Jingchun Gao --- vllm/v1/attention/backends/flashinfer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index 69a6a5e5fae82..3d9640a2d4024 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -482,9 +482,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]): self.dcp_rank = 0 self.dcp_kv_cache_interleave_size = 1 - self.num_qo_heads = ( - self.model_config.get_num_attention_heads(self.vllm_config.parallel_config) - * self.dcp_world_size + self.num_qo_heads = self.model_config.get_num_attention_heads( + self.vllm_config.parallel_config ) self.num_kv_heads = self.kv_cache_spec.num_kv_heads