From cc7d87c6dde84c9d86b247c575cd58a50711fed9 Mon Sep 17 00:00:00 2001 From: Zhaodong Bing <45478848+aaab8b@users.noreply.github.com> Date: Wed, 24 Dec 2025 11:22:37 +0800 Subject: [PATCH] Update unquantized_fused_moe_method.py Change assignment of unquantized moe weights when using aiter on rocm, making it safer for reloading the weights. Solve the random output case after wake-up and reloading weights in reinforcement learning. --- .../layers/fused_moe/unquantized_fused_moe_method.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py b/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py index 82dbccf3fa9da..f977c01e55d51 100644 --- a/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py +++ b/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py @@ -211,16 +211,16 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): super().process_weights_after_loading(layer) # Padding the weight for better performance on ROCm - layer.w13_weight.data = self._maybe_pad_weight(layer.w13_weight.data) - layer.w2_weight.data = self._maybe_pad_weight(layer.w2_weight.data) + layer.w13_weight.data.copy_(self._maybe_pad_weight(layer.w13_weight.data)) + layer.w2_weight.data.copy_(self._maybe_pad_weight(layer.w2_weight.data)) if self.rocm_aiter_moe_enabled: shuffled_w13, shuffled_w2 = rocm_aiter_ops.shuffle_weights( layer.w13_weight.data, layer.w2_weight.data ) - layer.w13_weight.data = shuffled_w13 - layer.w2_weight.data = shuffled_w2 + layer.w13_weight.data.copy_(shuffled_w13) + layer.w2_weight.data.copy_(shuffled_w2) if self.flashinfer_cutlass_moe_enabled: # Swap halves to arrange as [w3; w1] (kernel expectation)