diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index 69a6a5e5fae82..3d9640a2d4024 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -482,9 +482,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]): self.dcp_rank = 0 self.dcp_kv_cache_interleave_size = 1 - self.num_qo_heads = ( - self.model_config.get_num_attention_heads(self.vllm_config.parallel_config) - * self.dcp_world_size + self.num_qo_heads = self.model_config.get_num_attention_heads( + self.vllm_config.parallel_config ) self.num_kv_heads = self.kv_cache_spec.num_kv_heads