diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils.py b/vllm/model_executor/layers/quantization/utils/marlin_utils.py index 14337ee1d7bee..072b46f055210 100644 --- a/vllm/model_executor/layers/quantization/utils/marlin_utils.py +++ b/vllm/model_executor/layers/quantization/utils/marlin_utils.py @@ -179,6 +179,8 @@ def check_marlin_supports_shape( def check_marlin_supports_layer(layer: LinearBase, group_size: int) -> bool: + if current_platform.is_rocm(): + return False output_size_per_partition = ( getattr(layer, "output_size_per_partition", None) or layer.output_size ) @@ -195,6 +197,8 @@ def check_marlin_supports_layer(layer: LinearBase, group_size: int) -> bool: def check_moe_marlin_supports_layer(layer: LinearBase, group_size: int) -> bool: + if current_platform.is_rocm(): + return False hidden_size = layer.hidden_size intermediate_size_per_partition = layer.intermediate_size_per_partition # apply_router_weight_on_input is not supported for moe marlin