diff --git a/vllm/config.py b/vllm/config.py
index 7cdc7b1adf9ab..ebe46c4bec5b1 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -748,8 +748,6 @@ class ModelConfig:
     def get_head_size(self) -> int:
         # TODO remove hard code
         if self.is_deepseek_mla:
-            # FlashAttention supports only head_size 32, 64, 128, 256,
-            # we need to pad head_size 192 to 256
             if self.should_use_mla:
                 return self.hf_text_config.kv_lora_rank
             else:
@@ -974,7 +972,7 @@ class ModelConfig:
     @property
     def should_use_mla(self) -> bool:
         use_mla = (self.is_deepseek_mla and not self.disable_mla
-                   and not envs.VLLM_DISABLE_MLA)
+                   and not envs.VLLM_MLA_DISABLE)
         return use_mla
 
     def supported_runner_types(self) -> Set[RunnerType]:
diff --git a/vllm/envs.py b/vllm/envs.py
index c8b7340c0d251..2a18e3b9bc51d 100644
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -77,6 +77,7 @@ if TYPE_CHECKING:
     V_SCALE_CONSTANT: int = 100
     VLLM_SERVER_DEV_MODE: bool = False
     VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
+    VLLM_MLA_DISABLE: bool = False
     VLLM_MLA_PERFORM_MATRIX_ABSORPTION: bool = True
 
 
@@ -302,10 +303,6 @@ environment_variables: Dict[str, Callable[[], Any]] = {
     "VLLM_FLASHINFER_FORCE_TENSOR_CORES":
     lambda: bool(int(os.getenv("VLLM_FLASHINFER_FORCE_TENSOR_CORES", "0"))),
 
-    # If set, vLLM will disable the MLA attention optimizations.
-    "VLLM_DISABLE_MLA":
-    lambda: bool(int(os.getenv("VLLM_DISABLE_MLA", "0"))),
-
     # Pipeline stage partition strategy
     "VLLM_PP_LAYER_PARTITION":
     lambda: os.getenv("VLLM_PP_LAYER_PARTITION", None),
@@ -512,6 +509,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
     "VLLM_V1_OUTPUT_PROC_CHUNK_SIZE":
     lambda: int(os.getenv("VLLM_V1_OUTPUT_PROC_CHUNK_SIZE", "128")),
 
+    # If set, vLLM will disable the MLA attention optimizations.
+    "VLLM_MLA_DISABLE":
+    lambda: bool(int(os.getenv("VLLM_MLA_DISABLE", "0"))),
+
     # Flag that can control whether or not we perform matrix-absorption for MLA
     # decode, i.e. absorb W_UK into W_Q/W_UK and W_UV into W_O, absorbing the
     # matrices reduces the runtime FLOPs needed to compute MLA but requires