From ccc00515fde6954a617aea98a927b751d8082946 Mon Sep 17 00:00:00 2001 From: Zhonghua Deng Date: Mon, 24 Feb 2025 23:37:32 +0800 Subject: [PATCH] [BugFix] Illegal memory access for MoE On H20 (#13693) --- vllm/model_executor/layers/fused_moe/fused_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 4cab72a29da4a..1ddc3ce6f8954 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1271,7 +1271,7 @@ def fused_experts_impl(hidden_states: torch.Tensor, # so the cache size and config are already set correctly and # do not need to be adjusted. intermediate_cache1 = intermediate_cache1[:tokens_in_chunk] - intermediate_cache2 = intermediate_cache2[:tokens_in_chunk] + intermediate_cache2 = intermediate_cache2[:tokens_in_chunk * topk_ids.shape[1]] intermediate_cache3 = intermediate_cache3[:tokens_in_chunk] config = get_config_func(tokens_in_chunk)