mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-09 09:07:03 +08:00
Merge 3de8a858b332e4ad57a4d395304295472fba7fad into 254f6b986720c92ddf97fbb1a6a6465da8e87e29
This commit is contained in:
commit
c299410288
@ -325,8 +325,11 @@ class AiterExperts(mk.FusedMoEPermuteExpertsUnpermute):
|
||||
expert_tokens_meta: mk.ExpertTokensMetadata | None,
|
||||
apply_router_weight_on_input: bool,
|
||||
):
|
||||
# TODO(rob): rocm_aiter_fused_experts uses self.quant_config's
|
||||
# a_scales for static quantization. Update this to fit better
|
||||
# with the interface once all quant integrations are complete.
|
||||
assert a1q_scale is None
|
||||
assert a2_scale is None
|
||||
assert a2_scale == self.quant_config.a2_scale
|
||||
assert expert_tokens_meta is None
|
||||
|
||||
result = rocm_aiter_fused_experts(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user