mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-25 00:57:54 +08:00
[ROCM] Enable CompressedTensorsWNA16 (#27187)
Signed-off-by: JartX <sagformas@epdcenter.es>
This commit is contained in:
parent
bd66b8529b
commit
ba09652de2
@ -142,7 +142,10 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
|
|||||||
# group_size=None means channelwise
|
# group_size=None means channelwise
|
||||||
group_size = weight_quant.group_size or -1
|
group_size = weight_quant.group_size or -1
|
||||||
# Prefer to use the MarlinMoE kernel when it is supported.
|
# Prefer to use the MarlinMoE kernel when it is supported.
|
||||||
if not check_moe_marlin_supports_layer(layer, group_size):
|
if (
|
||||||
|
not check_moe_marlin_supports_layer(layer, group_size)
|
||||||
|
or current_platform.is_rocm()
|
||||||
|
):
|
||||||
if (
|
if (
|
||||||
weight_quant.strategy == QuantizationStrategy.GROUP
|
weight_quant.strategy == QuantizationStrategy.GROUP
|
||||||
and weight_quant.actorder
|
and weight_quant.actorder
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user