mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 21:54:36 +08:00
Support block size of 256 used by Intel HPU (#26883)
Signed-off-by: mandy-li <mandy.j.li@intel.com>
This commit is contained in:
parent
e6ba2000ae
commit
ac3ed5a815
@ -19,7 +19,7 @@ else:
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
BlockSize = Literal[1, 8, 16, 32, 64, 128]
|
||||
BlockSize = Literal[1, 8, 16, 32, 64, 128, 256]
|
||||
CacheDType = Literal["auto", "bfloat16", "fp8", "fp8_e4m3", "fp8_e5m2", "fp8_inc"]
|
||||
MambaDType = Literal["auto", "float32"]
|
||||
PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user