mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-16 12:29:07 +08:00
update triton experts
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com>
This commit is contained in:
parent
fb567f60d0
commit
4f69e85bd9
@ -706,6 +706,10 @@ def invoke_fused_moe_triton_kernel(
|
|||||||
block_shape: list[int] | None = None,
|
block_shape: list[int] | None = None,
|
||||||
B_bias: torch.Tensor | None = None,
|
B_bias: torch.Tensor | None = None,
|
||||||
):
|
):
|
||||||
|
assert topk_weights is not None or not mul_routed_weight
|
||||||
|
assert topk_weights is None or topk_weights.stride(1) == 1
|
||||||
|
assert sorted_token_ids.stride(0) == 1
|
||||||
|
|
||||||
if use_fp8_w8a8 or use_int8_w8a8:
|
if use_fp8_w8a8 or use_int8_w8a8:
|
||||||
assert B_scale is not None
|
assert B_scale is not None
|
||||||
assert block_shape is None or triton.cdiv(
|
assert block_shape is None or triton.cdiv(
|
||||||
@ -2335,13 +2339,12 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
|
|||||||
topk_ids, config["BLOCK_SIZE_M"], global_num_experts, expert_map
|
topk_ids, config["BLOCK_SIZE_M"], global_num_experts, expert_map
|
||||||
)
|
)
|
||||||
|
|
||||||
dispatch_fused_moe_kernel(
|
invoke_fused_moe_triton_kernel(
|
||||||
hidden_states,
|
hidden_states,
|
||||||
w1,
|
w1,
|
||||||
intermediate_cache1,
|
intermediate_cache1,
|
||||||
a1q_scale,
|
a1q_scale,
|
||||||
self.w1_scale,
|
self.w1_scale,
|
||||||
self.w1_zp,
|
|
||||||
None, # topk_weights
|
None, # topk_weights
|
||||||
sorted_token_ids,
|
sorted_token_ids,
|
||||||
expert_ids,
|
expert_ids,
|
||||||
@ -2373,13 +2376,12 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
|
|||||||
self.block_shape,
|
self.block_shape,
|
||||||
)
|
)
|
||||||
|
|
||||||
dispatch_fused_moe_kernel(
|
invoke_fused_moe_triton_kernel(
|
||||||
qintermediate_cache2,
|
qintermediate_cache2,
|
||||||
w2,
|
w2,
|
||||||
intermediate_cache3,
|
intermediate_cache3,
|
||||||
a2q_scale,
|
a2q_scale,
|
||||||
self.w2_scale,
|
self.w2_scale,
|
||||||
self.w2_zp,
|
|
||||||
topk_weights,
|
topk_weights,
|
||||||
sorted_token_ids,
|
sorted_token_ids,
|
||||||
expert_ids,
|
expert_ids,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user