mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:04:53 +08:00
[Bugfix][Example] make lmcache v0 work. (#18051)
Signed-off-by: Ma, Jianpeng <jianpeng.ma@intel.com>
This commit is contained in:
parent
d4154c35a2
commit
e7ef61c1f0
@ -34,7 +34,7 @@ from vllm.config import KVTransferConfig
|
|||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
|
|
||||||
|
|
||||||
def setup_environment_variables():
|
def setup_environment_variables(vllm_version: str):
|
||||||
# LMCache-related environment variables
|
# LMCache-related environment variables
|
||||||
# Use experimental features in LMCache
|
# Use experimental features in LMCache
|
||||||
os.environ["LMCACHE_USE_EXPERIMENTAL"] = "True"
|
os.environ["LMCACHE_USE_EXPERIMENTAL"] = "True"
|
||||||
@ -44,6 +44,8 @@ def setup_environment_variables():
|
|||||||
os.environ["LMCACHE_LOCAL_CPU"] = "True"
|
os.environ["LMCACHE_LOCAL_CPU"] = "True"
|
||||||
# Set local CPU memory limit to 5.0 GB
|
# Set local CPU memory limit to 5.0 GB
|
||||||
os.environ["LMCACHE_MAX_LOCAL_CPU_SIZE"] = "5.0"
|
os.environ["LMCACHE_MAX_LOCAL_CPU_SIZE"] = "5.0"
|
||||||
|
if vllm_version == "v0":
|
||||||
|
os.environ["VLLM_USE_V1"] = "0"
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
@ -120,7 +122,7 @@ def main():
|
|||||||
lmcache_connector = "LMCacheConnectorV1"
|
lmcache_connector = "LMCacheConnectorV1"
|
||||||
model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
||||||
|
|
||||||
setup_environment_variables()
|
setup_environment_variables(args.version)
|
||||||
|
|
||||||
with build_llm_with_lmcache(lmcache_connector, model, args.version) as llm:
|
with build_llm_with_lmcache(lmcache_connector, model, args.version) as llm:
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user