From c4d62618ca7cf8507c1e357fa1180fb162c670fa Mon Sep 17 00:00:00 2001 From: yuttian1 Date: Sat, 6 Dec 2025 12:54:38 +0800 Subject: [PATCH] Fix AWQ MoE marlin check issue in marlin_utils.py for AMD backend (#30102) Signed-off-by: yuttian1 --- vllm/model_executor/layers/quantization/utils/marlin_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils.py b/vllm/model_executor/layers/quantization/utils/marlin_utils.py index 14337ee1d7bee..072b46f055210 100644 --- a/vllm/model_executor/layers/quantization/utils/marlin_utils.py +++ b/vllm/model_executor/layers/quantization/utils/marlin_utils.py @@ -179,6 +179,8 @@ def check_marlin_supports_shape( def check_marlin_supports_layer(layer: LinearBase, group_size: int) -> bool: + if current_platform.is_rocm(): + return False output_size_per_partition = ( getattr(layer, "output_size_per_partition", None) or layer.output_size ) @@ -195,6 +197,8 @@ def check_marlin_supports_layer(layer: LinearBase, group_size: int) -> bool: def check_moe_marlin_supports_layer(layer: LinearBase, group_size: int) -> bool: + if current_platform.is_rocm(): + return False hidden_size = layer.hidden_size intermediate_size_per_partition = layer.intermediate_size_per_partition # apply_router_weight_on_input is not supported for moe marlin