mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-01 18:17:05 +08:00
Update unquantized_fused_moe_method.py
Change assignment of unquantized moe weights when using aiter on rocm, making it safer for reloading the weights. Solve the random output case after wake-up and reloading weights in reinforcement learning.
This commit is contained in:
parent
3ce791ac77
commit
cc7d87c6dd
@ -211,16 +211,16 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
|
|||||||
super().process_weights_after_loading(layer)
|
super().process_weights_after_loading(layer)
|
||||||
|
|
||||||
# Padding the weight for better performance on ROCm
|
# Padding the weight for better performance on ROCm
|
||||||
layer.w13_weight.data = self._maybe_pad_weight(layer.w13_weight.data)
|
layer.w13_weight.data.copy_(self._maybe_pad_weight(layer.w13_weight.data))
|
||||||
layer.w2_weight.data = self._maybe_pad_weight(layer.w2_weight.data)
|
layer.w2_weight.data.copy_(self._maybe_pad_weight(layer.w2_weight.data))
|
||||||
|
|
||||||
if self.rocm_aiter_moe_enabled:
|
if self.rocm_aiter_moe_enabled:
|
||||||
shuffled_w13, shuffled_w2 = rocm_aiter_ops.shuffle_weights(
|
shuffled_w13, shuffled_w2 = rocm_aiter_ops.shuffle_weights(
|
||||||
layer.w13_weight.data, layer.w2_weight.data
|
layer.w13_weight.data, layer.w2_weight.data
|
||||||
)
|
)
|
||||||
|
|
||||||
layer.w13_weight.data = shuffled_w13
|
layer.w13_weight.data.copy_(shuffled_w13)
|
||||||
layer.w2_weight.data = shuffled_w2
|
layer.w2_weight.data.copy_(shuffled_w2)
|
||||||
|
|
||||||
if self.flashinfer_cutlass_moe_enabled:
|
if self.flashinfer_cutlass_moe_enabled:
|
||||||
# Swap halves to arrange as [w3; w1] (kernel expectation)
|
# Swap halves to arrange as [w3; w1] (kernel expectation)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user