mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 01:49:19 +08:00
[Bug] Fix Long Context OOM Issue (#25290)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
c625f9043c
commit
4741239db7
@ -481,7 +481,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
|
|||||||
# which would result in up-projected context being
|
# which would result in up-projected context being
|
||||||
# 2*(192*128)*(64*1024) = 3gb
|
# 2*(192*128)*(64*1024) = 3gb
|
||||||
# (assuming 192 QK head dim, 128 heads, and fp16)
|
# (assuming 192 QK head dim, 128 heads, and fp16)
|
||||||
128 * 1024)
|
64 * 1024)
|
||||||
assert self.chunked_prefill_workspace_size >= \
|
assert self.chunked_prefill_workspace_size >= \
|
||||||
scheduler_config.max_num_seqs * cache_config.block_size
|
scheduler_config.max_num_seqs * cache_config.block_size
|
||||||
if self.dcp_world_size > 1:
|
if self.dcp_world_size > 1:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user