mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-09 18:47:06 +08:00
[DCP][Bugfix][CI] Fix accuracy issue of DCP when using FLASH_ATTN_MLA (#30309)
Signed-off-by: FENP <yuanyongjie.yyj@antgroup.com>
This commit is contained in:
parent
9c32df6101
commit
67475a6e81
@ -123,8 +123,11 @@ class CPTestSettings:
|
||||
|
||||
CP_TEXT_GENERATION_MODELS = {
|
||||
"deepseek-ai/DeepSeek-V2-Lite-Chat": [
|
||||
CPTestSettings.detailed(dcp_multipliers=[1]),
|
||||
CPTestSettings.detailed(
|
||||
dcp_multipliers=[0.5, 1], cp_kv_cache_interleave_size=64
|
||||
dcp_multipliers=[0.5],
|
||||
cp_kv_cache_interleave_size=64,
|
||||
attn_backend="FLASHMLA",
|
||||
),
|
||||
],
|
||||
"Qwen/Qwen2.5-1.5B-Instruct": [
|
||||
|
||||
@ -105,13 +105,14 @@ class FlashAttnMLAMetadataBuilder(MLACommonMetadataBuilder[FlashAttnMLAMetadata]
|
||||
vllm_config: VllmConfig,
|
||||
device: torch.device,
|
||||
):
|
||||
interleave_size = vllm_config.parallel_config.cp_kv_cache_interleave_size
|
||||
super().__init__(
|
||||
kv_cache_spec,
|
||||
layer_names,
|
||||
vllm_config,
|
||||
device,
|
||||
FlashAttnMLAMetadata,
|
||||
supports_dcp_with_varlen=True,
|
||||
supports_dcp_with_varlen=(interleave_size == 1),
|
||||
)
|
||||
self.max_num_splits = 0 # No upper bound on the number of splits.
|
||||
self.fa_aot_schedule = get_flash_attn_version() == 3
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user