From d698bb382db95af6b8836936eb0dfae71c791d11 Mon Sep 17 00:00:00 2001
From: Jingchun Gao <63247409+gjc0824@users.noreply.github.com>
Date: Fri, 5 Dec 2025 13:54:31 +0800
Subject: [PATCH] [Bugfix] Correct num_q_heads on DCP for Flashinfer backends 
 (#29487)

Signed-off-by: Jingchun Gao <gaojingchun1@huawei.com>
Signed-off-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com>
Co-authored-by: Jingchun Gao <gaojingchun1@huawei.com>
---
 vllm/v1/attention/backends/flashinfer.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 69a6a5e5fae82..3d9640a2d4024 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -482,9 +482,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
             self.dcp_rank = 0
             self.dcp_kv_cache_interleave_size = 1
 
-        self.num_qo_heads = (
-            self.model_config.get_num_attention_heads(self.vllm_config.parallel_config)
-            * self.dcp_world_size
+        self.num_qo_heads = self.model_config.get_num_attention_heads(
+            self.vllm_config.parallel_config
         )
 
         self.num_kv_heads = self.kv_cache_spec.num_kv_heads