From 3de2ed767f64be006586b4c97e1f6524a75b4748 Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Thu, 10 Jul 2025 12:55:22 -0700 Subject: [PATCH] [Bugfix] Remove assertion of expert_map being None (#20714) Signed-off-by: Ming Yang Signed-off-by: Ming Yang --- .../layers/fused_moe/pplx_prepare_finalize.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py index c84f28d0874d..1ce47e3eeca3 100644 --- a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py @@ -6,11 +6,14 @@ import pplx_kernels as pplx import torch import vllm.model_executor.layers.fused_moe.modular_kernel as mk +from vllm.logger import init_logger from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.utils import ( _validate_scale_shape, moe_kernel_quantize_input) from vllm.utils import cdiv, round_up +logger = init_logger(__name__) + def pplx_hidden_dim_scale_bytes( max_num_tokens: int, @@ -101,9 +104,15 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): hidden_dim = a1.size(-1) # K assert topk_ids.size(0) == num_tokens - assert expert_map is None, """with expert map, -1 id is used for - non-local token; this causes error when casting ids to the - topk_indices_dtype() uint32""" + # expert_map should be None because with expert map, -1 id is used for + # non-local token; this causes error when casting ids to the + # topk_indices_dtype() int32 + # + if expert_map is not None: + logger.warn_once( + "The PPLX backend does not support expert mapping. " + "The provided `expert_map` will be ignored.") + expert_map = None #noqa: F841 # Is this always going to be a1.device? device = a1.device