mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 17:42:21 +08:00
[Bug][CLI] Allow users to disable prefix caching explicitly (#10724)
Signed-off-by: rickyx <rickyx@anyscale.com>
This commit is contained in:
parent
278be671a3
commit
d9b4b3f069
@ -59,6 +59,25 @@ def test_compilation_config():
|
|||||||
assert args.compilation_config.level == 3
|
assert args.compilation_config.level == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_prefix_cache_default():
|
||||||
|
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
|
||||||
|
args = parser.parse_args([])
|
||||||
|
|
||||||
|
engine_args = EngineArgs.from_cli_args(args=args)
|
||||||
|
assert (not engine_args.enable_prefix_caching
|
||||||
|
), "prefix caching defaults to off."
|
||||||
|
|
||||||
|
# with flag to turn it on.
|
||||||
|
args = parser.parse_args(["--enable-prefix-caching"])
|
||||||
|
engine_args = EngineArgs.from_cli_args(args=args)
|
||||||
|
assert engine_args.enable_prefix_caching
|
||||||
|
|
||||||
|
# with disable flag to turn it off.
|
||||||
|
args = parser.parse_args(["--no-enable-prefix-caching"])
|
||||||
|
engine_args = EngineArgs.from_cli_args(args=args)
|
||||||
|
assert not engine_args.enable_prefix_caching
|
||||||
|
|
||||||
|
|
||||||
def test_valid_pooling_config():
|
def test_valid_pooling_config():
|
||||||
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
|
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
|
||||||
args = parser.parse_args([
|
args = parser.parse_args([
|
||||||
|
|||||||
@ -4,6 +4,7 @@ from vllm import envs
|
|||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
|
from vllm.utils import FlexibleArgumentParser
|
||||||
|
|
||||||
if not envs.VLLM_USE_V1:
|
if not envs.VLLM_USE_V1:
|
||||||
pytest.skip(
|
pytest.skip(
|
||||||
@ -12,6 +13,24 @@ if not envs.VLLM_USE_V1:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_prefix_caching_from_cli():
|
||||||
|
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
|
||||||
|
args = parser.parse_args([])
|
||||||
|
engine_args = EngineArgs.from_cli_args(args=args)
|
||||||
|
assert (engine_args.enable_prefix_caching
|
||||||
|
), "V1 turns on prefix caching by default."
|
||||||
|
|
||||||
|
# Turn it off possible with flag.
|
||||||
|
args = parser.parse_args(["--no-enable-prefix-caching"])
|
||||||
|
engine_args = EngineArgs.from_cli_args(args=args)
|
||||||
|
assert not engine_args.enable_prefix_caching
|
||||||
|
|
||||||
|
# Turn it on with flag.
|
||||||
|
args = parser.parse_args(["--enable-prefix-caching"])
|
||||||
|
engine_args = EngineArgs.from_cli_args(args=args)
|
||||||
|
assert engine_args.enable_prefix_caching
|
||||||
|
|
||||||
|
|
||||||
def test_defaults():
|
def test_defaults():
|
||||||
engine_args = EngineArgs(model="facebook/opt-125m")
|
engine_args = EngineArgs(model="facebook/opt-125m")
|
||||||
|
|
||||||
|
|||||||
@ -416,9 +416,13 @@ class EngineArgs:
|
|||||||
'tokens. This is ignored on neuron devices and '
|
'tokens. This is ignored on neuron devices and '
|
||||||
'set to max-model-len')
|
'set to max-model-len')
|
||||||
|
|
||||||
parser.add_argument('--enable-prefix-caching',
|
parser.add_argument(
|
||||||
action='store_true',
|
"--enable-prefix-caching",
|
||||||
help='Enables automatic prefix caching.')
|
action=argparse.BooleanOptionalAction,
|
||||||
|
default=EngineArgs.enable_prefix_caching,
|
||||||
|
help="Enables automatic prefix caching. "
|
||||||
|
"Use --no-enable-prefix-caching to disable explicitly.",
|
||||||
|
)
|
||||||
parser.add_argument('--disable-sliding-window',
|
parser.add_argument('--disable-sliding-window',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Disables sliding window, '
|
help='Disables sliding window, '
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user