mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 13:07:11 +08:00
[BugFix] Disable fp8 kv-cache by default for DeepSeek V3.2 (#27121)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
e133d6d218
commit
c2bba69065
@ -481,12 +481,9 @@ class DeepseekV32ForCausalLM(VerifyAndUpdateConfig):
|
|||||||
is_v32 = hasattr(hf_config, "index_topk")
|
is_v32 = hasattr(hf_config, "index_topk")
|
||||||
assert is_v32
|
assert is_v32
|
||||||
|
|
||||||
# For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
|
# For DeepSeekV3.2, a custom fp8 format is used when fp8 kv-cache is enabled.
|
||||||
# "auto")
|
|
||||||
cache_config = vllm_config.cache_config
|
cache_config = vllm_config.cache_config
|
||||||
if cache_config.cache_dtype == "auto" or cache_config.cache_dtype.startswith(
|
if cache_config.cache_dtype.startswith("fp8"):
|
||||||
"fp8"
|
|
||||||
):
|
|
||||||
cache_config.cache_dtype = "fp8_ds_mla"
|
cache_config.cache_dtype = "fp8_ds_mla"
|
||||||
logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2")
|
logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2")
|
||||||
if cache_config.cache_dtype == "bfloat16":
|
if cache_config.cache_dtype == "bfloat16":
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user