From 98deac3879860b829dd9a30b19bbb2adb9c96e7f Mon Sep 17 00:00:00 2001 From: Chi Zhang Date: Wed, 13 Aug 2025 20:27:25 +0800 Subject: [PATCH] [FEATURE] support custom vllm tuned config path for fused moe triton kernels (#22791) Signed-off-by: Chi Zhang --- vllm/envs.py | 6 ++++ .../layers/fused_moe/fused_moe.py | 28 +++++++++++++------ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 931edcfa7f1e..e7796aa73df4 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -158,6 +158,7 @@ if TYPE_CHECKING: VLLM_USE_TRTLLM_ATTENTION: Optional[str] = None VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: bool = False VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False + VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None def get_default_cache_root(): @@ -1120,6 +1121,11 @@ environment_variables: dict[str, Callable[[], Any]] = { # never removed from memory until the server terminates. "VLLM_ENABLE_RESPONSES_API_STORE": lambda: bool(int(os.getenv("VLLM_ENABLE_RESPONSES_API_STORE", "0"))), + + # Allows vllm to find tuned config under customized folder + "VLLM_TUNED_CONFIG_FOLDER": + lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None), + } # --8<-- [end:env-vars-definition] diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index ad094c37f947..98087a35e15c 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -701,20 +701,32 @@ def get_moe_configs( block_shape = [block_n, block_k] if block_n and block_k else None json_file_name = get_config_file_name(E, N, dtype, block_shape) - config_file_path = os.path.join( + config_file_paths = [] + + # note that we prioritize user defined config + user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER + if user_defined_config_folder is not None: + user_defined_config_file_path = os.path.join( + user_defined_config_folder, json_file_name) + config_file_paths.append(user_defined_config_file_path) + + default_config_file_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name) - if os.path.exists(config_file_path): - with open(config_file_path) as f: - logger.info("Using configuration from %s for MoE layer.", - config_file_path) - # If a configuration has been found, return it - return {int(key): val for key, val in json.load(f).items()} + config_file_paths.append(default_config_file_path) + + for config_file_path in config_file_paths: + if os.path.exists(config_file_path): + with open(config_file_path) as f: + logger.info("Using configuration from %s for MoE layer.", + config_file_path) + # If a configuration has been found, return it + return {int(key): val for key, val in json.load(f).items()} # If no optimized configuration is available, we will use the default # configuration logger.warning( ("Using default MoE config. Performance might be sub-optimal! " - "Config file not found at %s"), config_file_path) + "Config file not found at %s"), config_file_paths) return None