mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 05:34:55 +08:00
[BugFix] Disable fp8 kv-cache by default for DeepSeek V3.2 (#27121)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
e133d6d218
commit
c2bba69065
@ -481,12 +481,9 @@ class DeepseekV32ForCausalLM(VerifyAndUpdateConfig):
|
||||
is_v32 = hasattr(hf_config, "index_topk")
|
||||
assert is_v32
|
||||
|
||||
# For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
|
||||
# "auto")
|
||||
# For DeepSeekV3.2, a custom fp8 format is used when fp8 kv-cache is enabled.
|
||||
cache_config = vllm_config.cache_config
|
||||
if cache_config.cache_dtype == "auto" or cache_config.cache_dtype.startswith(
|
||||
"fp8"
|
||||
):
|
||||
if cache_config.cache_dtype.startswith("fp8"):
|
||||
cache_config.cache_dtype = "fp8_ds_mla"
|
||||
logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2")
|
||||
if cache_config.cache_dtype == "bfloat16":
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user