mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:25:00 +08:00
[Bugfix] Remove assertion of expert_map being None (#20714)
Signed-off-by: Ming Yang <yming@meta.com> Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
parent
299252ea82
commit
3de2ed767f
@ -6,11 +6,14 @@ import pplx_kernels as pplx
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
|
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
|
||||||
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
|
from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
|
||||||
from vllm.model_executor.layers.fused_moe.utils import (
|
from vllm.model_executor.layers.fused_moe.utils import (
|
||||||
_validate_scale_shape, moe_kernel_quantize_input)
|
_validate_scale_shape, moe_kernel_quantize_input)
|
||||||
from vllm.utils import cdiv, round_up
|
from vllm.utils import cdiv, round_up
|
||||||
|
|
||||||
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def pplx_hidden_dim_scale_bytes(
|
def pplx_hidden_dim_scale_bytes(
|
||||||
max_num_tokens: int,
|
max_num_tokens: int,
|
||||||
@ -101,9 +104,15 @@ class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
|
|||||||
hidden_dim = a1.size(-1) # K
|
hidden_dim = a1.size(-1) # K
|
||||||
|
|
||||||
assert topk_ids.size(0) == num_tokens
|
assert topk_ids.size(0) == num_tokens
|
||||||
assert expert_map is None, """with expert map, -1 id is used for
|
# expert_map should be None because with expert map, -1 id is used for
|
||||||
non-local token; this causes error when casting ids to the
|
# non-local token; this causes error when casting ids to the
|
||||||
topk_indices_dtype() uint32"""
|
# topk_indices_dtype() int32
|
||||||
|
#
|
||||||
|
if expert_map is not None:
|
||||||
|
logger.warn_once(
|
||||||
|
"The PPLX backend does not support expert mapping. "
|
||||||
|
"The provided `expert_map` will be ignored.")
|
||||||
|
expert_map = None #noqa: F841
|
||||||
|
|
||||||
# Is this always going to be a1.device?
|
# Is this always going to be a1.device?
|
||||||
device = a1.device
|
device = a1.device
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user