From 145c2ff648ad0a300f880ac38811d0d8a2eb3e79 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Date: Fri, 31 Jan 2025 18:28:47 -0500 Subject: [PATCH] [Bugfix] Revert MoE Triton Config Default (#12629) SUMMARY: * previous PR for pulling in block configs also changed defaults (https://github.com/vllm-project/vllm/pull/11589/files) for FP8 * this broke L4 MoE since there was not enough SHM for the default configuration * this reverts the non-block example to the default Signed-off-by: rshaw@neuralmagic.com --- .../layers/fused_moe/fused_moe.py | 41 +++++-------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 39607dc4ca11e..c966be99ed24f 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -660,36 +660,17 @@ def get_default_config( is_marlin: bool, block_shape: Optional[List[int]] = None, ) -> Dict[str, int]: - if dtype == "fp8_w8a8": - if block_shape is None: - config = { - "BLOCK_SIZE_M": 128, - "BLOCK_SIZE_N": 256, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 32, - "num_warps": 8, - "num_stages": 4, - } - if M <= E: - config = { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 4, - "num_stages": 4, - } - else: - # Block-wise quant: BLOCK_SIZE_N must be divisible by block_shape[0] - # BLOCK_SIZE_K must be divisible by block_shape[1] - config = { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": block_shape[0], - "BLOCK_SIZE_K": block_shape[1], - "GROUP_SIZE_M": 32, - "num_warps": 4, - "num_stages": 3, - } + if dtype == "fp8_w8a8" and block_shape is not None: + # Block-wise quant: BLOCK_SIZE_N must be divisible by block_shape[0] + # BLOCK_SIZE_K must be divisible by block_shape[1] + config = { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": block_shape[0], + "BLOCK_SIZE_K": block_shape[1], + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3, + } else: config = { "BLOCK_SIZE_M": 64,