mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-28 23:37:16 +08:00
Disable chunked prefill and/or prefix caching when MLA is enabled (#12642)
From @mgoin in https://github.com/vllm-project/vllm/pull/12638 I cannot push to that branch, therefore a new PR to unblock release. --------- Signed-off-by: mgoin <michael@neuralmagic.com> Signed-off-by: simon-mo <simon.mo@hey.com> Co-authored-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
parent
1e3698393f
commit
4f4d427ac2
@ -3252,6 +3252,16 @@ class VllmConfig:
|
||||
|
||||
current_platform.check_and_update_config(self)
|
||||
|
||||
# If MLA is enabled, force disable chunked prefill and prefix caching
|
||||
if self.model_config and self.model_config.use_mla:
|
||||
logger.info("MLA is enabled; forcing chunked prefill and prefix "
|
||||
"caching to be disabled.")
|
||||
self.scheduler_config.enable_chunked_prefill = False
|
||||
self.scheduler_config.chunked_prefill_enabled = False
|
||||
|
||||
if self.cache_config is not None:
|
||||
self.cache_config.enable_prefix_caching = False
|
||||
|
||||
if not self.instance_id:
|
||||
self.instance_id = random_uuid()[:5]
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user