diff --git a/vllm/envs.py b/vllm/envs.py index 56558548d398..2ac457419a72 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -116,7 +116,7 @@ if TYPE_CHECKING: VLLM_ROCM_USE_AITER_TRITON_ROPE: bool = False VLLM_ROCM_USE_AITER_FP8BMM: bool = True VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION: bool = False - VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS: bool = True + VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS: bool = False VLLM_ROCM_USE_AITER_TRITON_GEMM: bool = True VLLM_ROCM_USE_SKINNY_GEMM: bool = True VLLM_ROCM_FP8_PADDING: bool = True @@ -969,9 +969,9 @@ environment_variables: dict[str, Callable[[], Any]] = { in ("true", "1") ), # Whether to use aiter fusion shared experts ops. - # By default is enabled. + # By default is disabled. "VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS": lambda: ( - os.getenv("VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS", "True").lower() + os.getenv("VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS", "False").lower() in ("true", "1") ), # Whether to use aiter triton kernels for gemm ops.