[V1]Enable APC by default only for text models (#10148)

Signed-off-by: Roger Wang <ywang@roblox.com>
2025-12-12 02:35:52 +08:00 · 2024-11-08 06:39:41 -08:00 · 2024-11-08 06:39:41 -08:00 · 208ce622c7
commit 208ce622c7
parent 1ff4aed5bd
1 changed files with 4 additions and 1 deletions
--- a/vllm/v1/engine/llm_engine.py
+++ b/vllm/v1/engine/llm_engine.py
@ -65,7 +65,10 @@ class LLMEngine:
        elif usage_context == UsageContext.OPENAI_API_SERVER:
            scheduler_config.max_num_seqs = 1024
            scheduler_config.max_num_batched_tokens = 2048
-        cache_config.enable_prefix_caching = True
+
        # TODO (ywang96): Enable APC by default when VLM supports it.
        if not model_config.is_multimodal_model:
            cache_config.enable_prefix_caching = True
        logger.info(
            "Initializing an LLM engine (v%s) with config: "