mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:26:00 +08:00
Disable prefix cache by default for benchmark (#18639)
Signed-off-by: cascade812 <cascade812@outlook.com>
This commit is contained in:
parent
06a0338015
commit
aaa4ac1c95
@ -189,5 +189,8 @@ if __name__ == "__main__":
|
||||
)
|
||||
|
||||
parser = EngineArgs.add_cli_args(parser)
|
||||
# V1 enables prefix caching by default which skews the latency
|
||||
# numbers. We need to disable prefix caching by default.
|
||||
parser.set_defaults(enable_prefix_caching=False)
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
||||
@ -80,6 +80,9 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
)
|
||||
|
||||
parser = EngineArgs.add_cli_args(parser)
|
||||
# V1 enables prefix caching by default which skews the latency
|
||||
# numbers. We need to disable prefix caching by default.
|
||||
parser.set_defaults(enable_prefix_caching=True)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user