[Bugfix] Remove assertion of expert_map being None (#20714)

Signed-off-by: Ming Yang <yming@meta.com>
Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
Ming Yang 2025-07-10 12:55:22 -07:00 committed by GitHub
parent 299252ea82
commit 3de2ed767f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -6,11 +6,14 @@ import pplx_kernels as pplx
import torch import torch
import vllm.model_executor.layers.fused_moe.modular_kernel as mk import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
from vllm.model_executor.layers.fused_moe.utils import ( from vllm.model_executor.layers.fused_moe.utils import (
_validate_scale_shape, moe_kernel_quantize_input) _validate_scale_shape, moe_kernel_quantize_input)
from vllm.utils import cdiv, round_up from vllm.utils import cdiv, round_up
logger = init_logger(__name__)
def pplx_hidden_dim_scale_bytes( def pplx_hidden_dim_scale_bytes(
max_num_tokens: int, max_num_tokens: int,
@ -101,9 +104,15 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
hidden_dim = a1.size(-1) # K hidden_dim = a1.size(-1) # K
assert topk_ids.size(0) == num_tokens assert topk_ids.size(0) == num_tokens
assert expert_map is None, """with expert map, -1 id is used for # expert_map should be None because with expert map, -1 id is used for
non-local token; this causes error when casting ids to the # non-local token; this causes error when casting ids to the
topk_indices_dtype() uint32""" # topk_indices_dtype() int32
#
if expert_map is not None:
logger.warn_once(
"The PPLX backend does not support expert mapping. "
"The provided `expert_map` will be ignored.")
expert_map = None #noqa: F841
# Is this always going to be a1.device? # Is this always going to be a1.device?
device = a1.device device = a1.device