mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 14:35:00 +08:00
[FEATURE] support custom vllm tuned config path for fused moe triton kernels (#22791)
Signed-off-by: Chi Zhang <zhangchi.usc1992@bytedance.com>
This commit is contained in:
parent
653124bd46
commit
98deac3879
@ -158,6 +158,7 @@ if TYPE_CHECKING:
|
|||||||
VLLM_USE_TRTLLM_ATTENTION: Optional[str] = None
|
VLLM_USE_TRTLLM_ATTENTION: Optional[str] = None
|
||||||
VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: bool = False
|
VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: bool = False
|
||||||
VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False
|
VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False
|
||||||
|
VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
def get_default_cache_root():
|
def get_default_cache_root():
|
||||||
@ -1120,6 +1121,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
# never removed from memory until the server terminates.
|
# never removed from memory until the server terminates.
|
||||||
"VLLM_ENABLE_RESPONSES_API_STORE":
|
"VLLM_ENABLE_RESPONSES_API_STORE":
|
||||||
lambda: bool(int(os.getenv("VLLM_ENABLE_RESPONSES_API_STORE", "0"))),
|
lambda: bool(int(os.getenv("VLLM_ENABLE_RESPONSES_API_STORE", "0"))),
|
||||||
|
|
||||||
|
# Allows vllm to find tuned config under customized folder
|
||||||
|
"VLLM_TUNED_CONFIG_FOLDER":
|
||||||
|
lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# --8<-- [end:env-vars-definition]
|
# --8<-- [end:env-vars-definition]
|
||||||
|
|||||||
@ -701,20 +701,32 @@ def get_moe_configs(
|
|||||||
block_shape = [block_n, block_k] if block_n and block_k else None
|
block_shape = [block_n, block_k] if block_n and block_k else None
|
||||||
json_file_name = get_config_file_name(E, N, dtype, block_shape)
|
json_file_name = get_config_file_name(E, N, dtype, block_shape)
|
||||||
|
|
||||||
config_file_path = os.path.join(
|
config_file_paths = []
|
||||||
|
|
||||||
|
# note that we prioritize user defined config
|
||||||
|
user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER
|
||||||
|
if user_defined_config_folder is not None:
|
||||||
|
user_defined_config_file_path = os.path.join(
|
||||||
|
user_defined_config_folder, json_file_name)
|
||||||
|
config_file_paths.append(user_defined_config_file_path)
|
||||||
|
|
||||||
|
default_config_file_path = os.path.join(
|
||||||
os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name)
|
os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name)
|
||||||
if os.path.exists(config_file_path):
|
config_file_paths.append(default_config_file_path)
|
||||||
with open(config_file_path) as f:
|
|
||||||
logger.info("Using configuration from %s for MoE layer.",
|
for config_file_path in config_file_paths:
|
||||||
config_file_path)
|
if os.path.exists(config_file_path):
|
||||||
# If a configuration has been found, return it
|
with open(config_file_path) as f:
|
||||||
return {int(key): val for key, val in json.load(f).items()}
|
logger.info("Using configuration from %s for MoE layer.",
|
||||||
|
config_file_path)
|
||||||
|
# If a configuration has been found, return it
|
||||||
|
return {int(key): val for key, val in json.load(f).items()}
|
||||||
|
|
||||||
# If no optimized configuration is available, we will use the default
|
# If no optimized configuration is available, we will use the default
|
||||||
# configuration
|
# configuration
|
||||||
logger.warning(
|
logger.warning(
|
||||||
("Using default MoE config. Performance might be sub-optimal! "
|
("Using default MoE config. Performance might be sub-optimal! "
|
||||||
"Config file not found at %s"), config_file_path)
|
"Config file not found at %s"), config_file_paths)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user