mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-20 02:25:24 +08:00
[ROCm][MXFP4] Infer w4a4 quant method in rocm aiter fused moe (#29775)
Signed-off-by: ZhiweiYan-96 <zhiwei.yan@amd.com>
This commit is contained in:
parent
b73b158ab0
commit
3628bcaaf2
@ -345,6 +345,10 @@ class FusedMoEQuantConfig:
|
|||||||
def use_mxfp4_w4a16(self) -> bool:
|
def use_mxfp4_w4a16(self) -> bool:
|
||||||
return self._a1.dtype is None and self._w1.dtype == "mxfp4"
|
return self._a1.dtype is None and self._w1.dtype == "mxfp4"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def use_mxfp4_w4a4(self) -> bool:
|
||||||
|
return self._a1.dtype == "mxfp4" and self._w1.dtype == "mxfp4"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def use_nvfp4_w4a4(self) -> bool:
|
def use_nvfp4_w4a4(self) -> bool:
|
||||||
return self.quant_dtype == "nvfp4"
|
return self.quant_dtype == "nvfp4"
|
||||||
|
|||||||
@ -221,8 +221,8 @@ def rocm_aiter_fused_experts(
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
quant_method = QuantMethod.NO.value
|
quant_method = QuantMethod.NO.value
|
||||||
# quark moe for mxfp4 w_dtype
|
# quark moe for mxfp4 w_dtype mxfp4 a_dtype
|
||||||
if quant_config.use_mxfp4_w4a16:
|
if quant_config.use_mxfp4_w4a4:
|
||||||
quant_method = QuantMethod.BLOCK_1X32.value
|
quant_method = QuantMethod.BLOCK_1X32.value
|
||||||
# w8a8 block-scaled
|
# w8a8 block-scaled
|
||||||
if quant_config.block_shape is not None and quant_config.use_fp8_w8a8:
|
if quant_config.block_shape is not None and quant_config.use_fp8_w8a8:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user