mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-06 16:49:10 +08:00
[Bugfix] Correct num_q_heads on DCP for Flashinfer backends (#29487)
Signed-off-by: Jingchun Gao <gaojingchun1@huawei.com> Signed-off-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com> Co-authored-by: Jingchun Gao <gaojingchun1@huawei.com>
This commit is contained in:
parent
2c22c4ca2d
commit
d698bb382d
@ -482,9 +482,8 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
|
|||||||
self.dcp_rank = 0
|
self.dcp_rank = 0
|
||||||
self.dcp_kv_cache_interleave_size = 1
|
self.dcp_kv_cache_interleave_size = 1
|
||||||
|
|
||||||
self.num_qo_heads = (
|
self.num_qo_heads = self.model_config.get_num_attention_heads(
|
||||||
self.model_config.get_num_attention_heads(self.vllm_config.parallel_config)
|
self.vllm_config.parallel_config
|
||||||
* self.dcp_world_size
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.num_kv_heads = self.kv_cache_spec.num_kv_heads
|
self.num_kv_heads = self.kv_cache_spec.num_kv_heads
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user