[Bugfix] Remove assertion of expert_map being None (#20714)

Signed-off-by: Ming Yang <yming@meta.com> Signed-off-by: Ming Yang <minos.future@gmail.com>
2025-12-10 05:25:00 +08:00 · 2025-07-10 12:55:22 -07:00 · 2025-07-10 12:55:22 -07:00 · 3de2ed767f
commit 3de2ed767f
parent 299252ea82
1 changed files with 12 additions and 3 deletions
--- a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py
@ -6,11 +6,14 @@ import pplx_kernels as pplx
 import torch

 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
 from vllm.model_executor.layers.fused_moe.utils import (
    _validate_scale_shape, moe_kernel_quantize_input)
 from vllm.utils import cdiv, round_up

+logger = init_logger(__name__)
+

 def pplx_hidden_dim_scale_bytes(
    max_num_tokens: int,
@ -101,9 +104,15 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
        hidden_dim = a1.size(-1)  # K

        assert topk_ids.size(0) == num_tokens
-        assert expert_map is None, """with expert map, -1 id is used for
-            non-local token; this causes error when casting ids to the
-            topk_indices_dtype() uint32"""
+        # expert_map should be None because with expert map, -1 id is used for
+        # non-local token; this causes error when casting ids to the
+        # topk_indices_dtype() int32
+        #
+        if expert_map is not None:
+            logger.warn_once(
+                "The PPLX backend does not support expert mapping. "
+                "The provided `expert_map` will be ignored.")
+        expert_map = None  #noqa: F841

        # Is this always going to be a1.device?
        device = a1.device