From ba09652de28dcf43fc0b55b50f400eac768e800b Mon Sep 17 00:00:00 2001 From: JartX Date: Tue, 21 Oct 2025 16:43:23 +0200 Subject: [PATCH] [ROCM] Enable CompressedTensorsWNA16 (#27187) Signed-off-by: JartX --- .../compressed_tensors/compressed_tensors_moe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 3b82f8a98bbd6..5488b65c6214f 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -142,7 +142,10 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase): # group_size=None means channelwise group_size = weight_quant.group_size or -1 # Prefer to use the MarlinMoE kernel when it is supported. - if not check_moe_marlin_supports_layer(layer, group_size): + if ( + not check_moe_marlin_supports_layer(layer, group_size) + or current_platform.is_rocm() + ): if ( weight_quant.strategy == QuantizationStrategy.GROUP and weight_quant.actorder