[BugFix] Disable fp8 kv-cache by default for DeepSeek V3.2 (#27121)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Lucas Wilkinson 2025-10-18 18:05:23 -04:00 committed by GitHub
parent e133d6d218
commit c2bba69065
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -481,12 +481,9 @@ class DeepseekV32ForCausalLM(VerifyAndUpdateConfig):
is_v32 = hasattr(hf_config, "index_topk")
assert is_v32
# For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
# "auto")
# For DeepSeekV3.2, a custom fp8 format is used when fp8 kv-cache is enabled.
cache_config = vllm_config.cache_config
if cache_config.cache_dtype == "auto" or cache_config.cache_dtype.startswith(
"fp8"
):
if cache_config.cache_dtype.startswith("fp8"):
cache_config.cache_dtype = "fp8_ds_mla"
logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2")
if cache_config.cache_dtype == "bfloat16":