From 5264015d74f2e0213a1e7d51041a558d7ea580e8 Mon Sep 17 00:00:00 2001 From: Jinghui Zhang Date: Thu, 28 Aug 2025 22:54:12 -0700 Subject: [PATCH] [BugFix][AMD][Deepseek] fix a dtype mismatch error for deepseek running on AMD (#23864) Signed-off-by: Jinghui Zhang --- .../layers/fused_moe/rocm_aiter_fused_moe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py index 93e20c3477bbe..b838fd798bbc0 100644 --- a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py @@ -279,7 +279,7 @@ def rocm_aiter_grouped_topk( if e_score_correction_bias is not None: torch.ops.vllm.rocm_aiter_biased_grouped_topk( gating_output, - e_score_correction_bias, + e_score_correction_bias.to(gating_output.dtype), topk_weights, topk_ids, num_expert_group, @@ -409,15 +409,15 @@ def shuffle_weights( *tensors: torch.Tensor, layout: tuple[int, int] = (16, 16) ) -> tuple[torch.Tensor, ...]: """ - Applies shuffle_weight function from AITER to each + Applies shuffle_weight function from AITER to each input tensor and returns them. - + Rearranges (shuffles) the input tensor/s into a specified block layout for optimized computation. Args: *tensors: Variable number of torch.Tensor objects. - layout: A pair of integers specifying the + layout: A pair of integers specifying the block sizes used to divide the tensors during shuffling. Default is (16, 16).