[ROCm][MXFP4] Infer w4a4 quant method in rocm aiter fused moe (#29775)

Signed-off-by: ZhiweiYan-96 <zhiwei.yan@amd.com>
This commit is contained in:
Zhiwei 2025-12-05 19:01:16 +08:00 committed by GitHub
parent b73b158ab0
commit 3628bcaaf2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 2 deletions

View File

@ -345,6 +345,10 @@ class FusedMoEQuantConfig:
def use_mxfp4_w4a16(self) -> bool:
return self._a1.dtype is None and self._w1.dtype == "mxfp4"
@property
def use_mxfp4_w4a4(self) -> bool:
return self._a1.dtype == "mxfp4" and self._w1.dtype == "mxfp4"
@property
def use_nvfp4_w4a4(self) -> bool:
return self.quant_dtype == "nvfp4"

View File

@ -221,8 +221,8 @@ def rocm_aiter_fused_experts(
else:
quant_method = QuantMethod.NO.value
# quark moe for mxfp4 w_dtype
if quant_config.use_mxfp4_w4a16:
# quark moe for mxfp4 w_dtype mxfp4 a_dtype
if quant_config.use_mxfp4_w4a4:
quant_method = QuantMethod.BLOCK_1X32.value
# w8a8 block-scaled
if quant_config.block_shape is not None and quant_config.use_fp8_w8a8: