mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-09 10:47:06 +08:00
updated
Signed-off-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
parent
5d93089686
commit
3de8a858b3
@ -325,8 +325,11 @@ class AiterExperts(mk.FusedMoEPermuteExpertsUnpermute):
|
||||
expert_tokens_meta: mk.ExpertTokensMetadata | None,
|
||||
apply_router_weight_on_input: bool,
|
||||
):
|
||||
# TODO(rob): rocm_aiter_fused_experts uses self.quant_config's
|
||||
# a_scales for static quantization. Update this to fit better
|
||||
# with the interface once all quant integrations are complete.
|
||||
assert a1q_scale is None
|
||||
assert a2_scale is None
|
||||
assert a2_scale == self.quant_config.a2_scale
|
||||
assert expert_tokens_meta is None
|
||||
|
||||
result = rocm_aiter_fused_experts(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user