mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 20:15:01 +08:00
[Bug] Fix Long Context OOM Issue (#25290)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
c625f9043c
commit
4741239db7
@ -481,7 +481,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
|
||||
# which would result in up-projected context being
|
||||
# 2*(192*128)*(64*1024) = 3gb
|
||||
# (assuming 192 QK head dim, 128 heads, and fp16)
|
||||
128 * 1024)
|
||||
64 * 1024)
|
||||
assert self.chunked_prefill_workspace_size >= \
|
||||
scheduler_config.max_num_seqs * cache_config.block_size
|
||||
if self.dcp_world_size > 1:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user