diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 3897584307e91..8a53337ebc087 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -152,7 +152,7 @@ class CudaPlatformBase(Platform): # here use_flashmla = (envs.VLLM_ATTENTION_BACKEND is None \ or envs.VLLM_ATTENTION_BACKEND == "FLASHMLA") - from vllm.attention.backends.flashmla import is_flashmla_supported + from vllm.attention.ops.flashmla import is_flashmla_supported if use_flashmla and is_flashmla_supported()[0] \ and cache_config.block_size != 64: cache_config.block_size = 64