[Bug] Fix Long Context OOM Issue (#25290)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye 2025-09-22 22:04:15 -04:00 committed by GitHub
parent c625f9043c
commit 4741239db7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -481,7 +481,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
# which would result in up-projected context being
# 2*(192*128)*(64*1024) = 3gb
# (assuming 192 QK head dim, 128 heads, and fp16)
128 * 1024)
64 * 1024)
assert self.chunked_prefill_workspace_size >= \
scheduler_config.max_num_seqs * cache_config.block_size
if self.dcp_world_size > 1: