mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 07:15:01 +08:00
[Bugfix] Fix logic for choosing default prefix caching setting (#29393)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
parent
51fc9e017a
commit
516c3f7847
@ -277,8 +277,9 @@ def test_prefix_cache_default():
|
|||||||
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
|
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
|
||||||
args = parser.parse_args([])
|
args = parser.parse_args([])
|
||||||
|
|
||||||
|
# should be None by default (depends on model).
|
||||||
engine_args = EngineArgs.from_cli_args(args=args)
|
engine_args = EngineArgs.from_cli_args(args=args)
|
||||||
assert engine_args.enable_prefix_caching, "prefix caching should default to on."
|
assert engine_args.enable_prefix_caching is None
|
||||||
|
|
||||||
# with flag to turn it on.
|
# with flag to turn it on.
|
||||||
args = parser.parse_args(["--enable-prefix-caching"])
|
args = parser.parse_args(["--enable-prefix-caching"])
|
||||||
|
|||||||
@ -880,7 +880,11 @@ class EngineArgs:
|
|||||||
"--num-gpu-blocks-override", **cache_kwargs["num_gpu_blocks_override"]
|
"--num-gpu-blocks-override", **cache_kwargs["num_gpu_blocks_override"]
|
||||||
)
|
)
|
||||||
cache_group.add_argument(
|
cache_group.add_argument(
|
||||||
"--enable-prefix-caching", **cache_kwargs["enable_prefix_caching"]
|
"--enable-prefix-caching",
|
||||||
|
**{
|
||||||
|
**cache_kwargs["enable_prefix_caching"],
|
||||||
|
"default": None,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
cache_group.add_argument(
|
cache_group.add_argument(
|
||||||
"--prefix-caching-hash-algo", **cache_kwargs["prefix_caching_hash_algo"]
|
"--prefix-caching-hash-algo", **cache_kwargs["prefix_caching_hash_algo"]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user