From e7ef61c1f039a8eac98602a9e5ab7517027e7278 Mon Sep 17 00:00:00 2001 From: majianpeng Date: Wed, 14 May 2025 14:43:44 +0800 Subject: [PATCH] [Bugfix][Example] make lmcache v0 work. (#18051) Signed-off-by: Ma, Jianpeng --- examples/lmcache/cpu_offload_lmcache.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/lmcache/cpu_offload_lmcache.py b/examples/lmcache/cpu_offload_lmcache.py index bf191960b0803..eedb47dfc12e5 100644 --- a/examples/lmcache/cpu_offload_lmcache.py +++ b/examples/lmcache/cpu_offload_lmcache.py @@ -34,7 +34,7 @@ from vllm.config import KVTransferConfig from vllm.engine.arg_utils import EngineArgs -def setup_environment_variables(): +def setup_environment_variables(vllm_version: str): # LMCache-related environment variables # Use experimental features in LMCache os.environ["LMCACHE_USE_EXPERIMENTAL"] = "True" @@ -44,6 +44,8 @@ def setup_environment_variables(): os.environ["LMCACHE_LOCAL_CPU"] = "True" # Set local CPU memory limit to 5.0 GB os.environ["LMCACHE_MAX_LOCAL_CPU_SIZE"] = "5.0" + if vllm_version == "v0": + os.environ["VLLM_USE_V1"] = "0" @contextlib.contextmanager @@ -120,7 +122,7 @@ def main(): lmcache_connector = "LMCacheConnectorV1" model = "meta-llama/Meta-Llama-3.1-8B-Instruct" - setup_environment_variables() + setup_environment_variables(args.version) with build_llm_with_lmcache(lmcache_connector, model, args.version) as llm: