[DCP][Bugfix][CI] Fix accuracy issue of DCP when using FLASH_ATTN_MLA (#30309)

Signed-off-by: FENP <yuanyongjie.yyj@antgroup.com>
This commit is contained in:
Jaya Yuan 2025-12-09 16:22:14 +08:00 committed by GitHub
parent 9c32df6101
commit 67475a6e81
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 2 deletions

View File

@ -123,8 +123,11 @@ class CPTestSettings:
CP_TEXT_GENERATION_MODELS = {
"deepseek-ai/DeepSeek-V2-Lite-Chat": [
CPTestSettings.detailed(dcp_multipliers=[1]),
CPTestSettings.detailed(
dcp_multipliers=[0.5, 1], cp_kv_cache_interleave_size=64
dcp_multipliers=[0.5],
cp_kv_cache_interleave_size=64,
attn_backend="FLASHMLA",
),
],
"Qwen/Qwen2.5-1.5B-Instruct": [

View File

@ -105,13 +105,14 @@ class FlashAttnMLAMetadataBuilder(MLACommonMetadataBuilder[FlashAttnMLAMetadata]
vllm_config: VllmConfig,
device: torch.device,
):
interleave_size = vllm_config.parallel_config.cp_kv_cache_interleave_size
super().__init__(
kv_cache_spec,
layer_names,
vllm_config,
device,
FlashAttnMLAMetadata,
supports_dcp_with_varlen=True,
supports_dcp_with_varlen=(interleave_size == 1),
)
self.max_num_splits = 0 # No upper bound on the number of splits.
self.fa_aot_schedule = get_flash_attn_version() == 3