mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-20 06:41:19 +08:00
[ROCM] Enable CompressedTensorsWNA16 (#27187)
Signed-off-by: JartX <sagformas@epdcenter.es>
This commit is contained in:
parent
bd66b8529b
commit
ba09652de2
@ -142,7 +142,10 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
|
||||
# group_size=None means channelwise
|
||||
group_size = weight_quant.group_size or -1
|
||||
# Prefer to use the MarlinMoE kernel when it is supported.
|
||||
if not check_moe_marlin_supports_layer(layer, group_size):
|
||||
if (
|
||||
not check_moe_marlin_supports_layer(layer, group_size)
|
||||
or current_platform.is_rocm()
|
||||
):
|
||||
if (
|
||||
weight_quant.strategy == QuantizationStrategy.GROUP
|
||||
and weight_quant.actorder
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user