mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-12 02:35:52 +08:00
[V1]Enable APC by default only for text models (#10148)
Signed-off-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
parent
1ff4aed5bd
commit
208ce622c7
@ -65,7 +65,10 @@ class LLMEngine:
|
|||||||
elif usage_context == UsageContext.OPENAI_API_SERVER:
|
elif usage_context == UsageContext.OPENAI_API_SERVER:
|
||||||
scheduler_config.max_num_seqs = 1024
|
scheduler_config.max_num_seqs = 1024
|
||||||
scheduler_config.max_num_batched_tokens = 2048
|
scheduler_config.max_num_batched_tokens = 2048
|
||||||
cache_config.enable_prefix_caching = True
|
|
||||||
|
# TODO (ywang96): Enable APC by default when VLM supports it.
|
||||||
|
if not model_config.is_multimodal_model:
|
||||||
|
cache_config.enable_prefix_caching = True
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Initializing an LLM engine (v%s) with config: "
|
"Initializing an LLM engine (v%s) with config: "
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user