mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:25:00 +08:00
[Bugfix] Remove assertion of expert_map being None (#20714)
Signed-off-by: Ming Yang <yming@meta.com> Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
parent
299252ea82
commit
3de2ed767f
@ -6,11 +6,14 @@ import pplx_kernels as pplx
|
||||
import torch
|
||||
|
||||
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
|
||||
from vllm.model_executor.layers.fused_moe.utils import (
|
||||
_validate_scale_shape, moe_kernel_quantize_input)
|
||||
from vllm.utils import cdiv, round_up
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def pplx_hidden_dim_scale_bytes(
|
||||
max_num_tokens: int,
|
||||
@ -101,9 +104,15 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
|
||||
hidden_dim = a1.size(-1) # K
|
||||
|
||||
assert topk_ids.size(0) == num_tokens
|
||||
assert expert_map is None, """with expert map, -1 id is used for
|
||||
non-local token; this causes error when casting ids to the
|
||||
topk_indices_dtype() uint32"""
|
||||
# expert_map should be None because with expert map, -1 id is used for
|
||||
# non-local token; this causes error when casting ids to the
|
||||
# topk_indices_dtype() int32
|
||||
#
|
||||
if expert_map is not None:
|
||||
logger.warn_once(
|
||||
"The PPLX backend does not support expert mapping. "
|
||||
"The provided `expert_map` will be ignored.")
|
||||
expert_map = None #noqa: F841
|
||||
|
||||
# Is this always going to be a1.device?
|
||||
device = a1.device
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user