From ac3ed5a8159ed014d2fc975782c1276119f690f9 Mon Sep 17 00:00:00 2001 From: Mandy Li Date: Thu, 16 Oct 2025 12:10:57 -0700 Subject: [PATCH] Support block size of 256 used by Intel HPU (#26883) Signed-off-by: mandy-li --- vllm/config/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 04b1e7bf2ac1d..c01643fbff25f 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -19,7 +19,7 @@ else: logger = init_logger(__name__) -BlockSize = Literal[1, 8, 16, 32, 64, 128] +BlockSize = Literal[1, 8, 16, 32, 64, 128, 256] CacheDType = Literal["auto", "bfloat16", "fp8", "fp8_e4m3", "fp8_e5m2", "fp8_inc"] MambaDType = Literal["auto", "float32"] PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor"]