mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-27 07:07:52 +08:00
[BugFix] Fix MLA + V1 + TP==1 causing reinitialization of cuda context (#14910)
This commit is contained in:
parent
7f6c5ee06c
commit
1e799b7ec1
@ -152,7 +152,7 @@ class CudaPlatformBase(Platform):
|
|||||||
# here
|
# here
|
||||||
use_flashmla = (envs.VLLM_ATTENTION_BACKEND is None \
|
use_flashmla = (envs.VLLM_ATTENTION_BACKEND is None \
|
||||||
or envs.VLLM_ATTENTION_BACKEND == "FLASHMLA")
|
or envs.VLLM_ATTENTION_BACKEND == "FLASHMLA")
|
||||||
from vllm.attention.backends.flashmla import is_flashmla_supported
|
from vllm.attention.ops.flashmla import is_flashmla_supported
|
||||||
if use_flashmla and is_flashmla_supported()[0] \
|
if use_flashmla and is_flashmla_supported()[0] \
|
||||||
and cache_config.block_size != 64:
|
and cache_config.block_size != 64:
|
||||||
cache_config.block_size = 64
|
cache_config.block_size = 64
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user