[Bugfix] Fix logic for choosing default prefix caching setting (#29393)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell 2025-11-25 15:05:10 +01:00 committed by GitHub
parent 51fc9e017a
commit 516c3f7847
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 2 deletions

View File

@ -277,8 +277,9 @@ def test_prefix_cache_default():
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
args = parser.parse_args([])
# should be None by default (depends on model).
engine_args = EngineArgs.from_cli_args(args=args)
assert engine_args.enable_prefix_caching, "prefix caching should default to on."
assert engine_args.enable_prefix_caching is None
# with flag to turn it on.
args = parser.parse_args(["--enable-prefix-caching"])

View File

@ -880,7 +880,11 @@ class EngineArgs:
"--num-gpu-blocks-override", **cache_kwargs["num_gpu_blocks_override"]
)
cache_group.add_argument(
"--enable-prefix-caching", **cache_kwargs["enable_prefix_caching"]
"--enable-prefix-caching",
**{
**cache_kwargs["enable_prefix_caching"],
"default": None,
},
)
cache_group.add_argument(
"--prefix-caching-hash-algo", **cache_kwargs["prefix_caching_hash_algo"]