[CI Failure] fix_test_auto_prefix_cache_support (#26053)

Signed-off-by: Huamin Li <3ericli@gmail.com>
This commit is contained in:
Huamin Li 2025-10-04 02:44:49 -07:00 committed by GitHub
parent 7c2e91c4e0
commit 7d6b03381e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 14 additions and 7 deletions

View File

@ -396,10 +396,17 @@ class VllmConfig:
"try setting 'VLLM_WORKER_MULTIPROC_METHOD' " "try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
"to 'spawn'.") "to 'spawn'.")
# Disable prefix caching only if chunked prefill is explicitly disabled # Final off-switch for CP/APC:
# (and not merely unset) # Disable for (a) collected blockers, (b) encoderdecoder, or
if (self.scheduler_config.chunked_prefill_enabled is False # (c) explicit CP=False when APC wasn't requested.
or disable_chunked_prefill_reasons): # Do NOT disable merely because the resolved CP flag is False.
apc_requested = (self.cache_config is not None
and self.cache_config.enable_prefix_caching)
if (disable_chunked_prefill_reasons
or (self.model_config is not None
and self.model_config.is_encoder_decoder)
or (self.scheduler_config.enable_chunked_prefill is False
and not apc_requested)):
for reason in disable_chunked_prefill_reasons: for reason in disable_chunked_prefill_reasons:
logger.info(reason) logger.info(reason)
self.scheduler_config.chunked_prefill_enabled = False self.scheduler_config.chunked_prefill_enabled = False