mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 12:45:01 +08:00
[ROCm][MXFP4] Infer w4a4 quant method in rocm aiter fused moe (#29775)
Signed-off-by: ZhiweiYan-96 <zhiwei.yan@amd.com>
This commit is contained in:
parent
b73b158ab0
commit
3628bcaaf2
@ -345,6 +345,10 @@ class FusedMoEQuantConfig:
|
||||
def use_mxfp4_w4a16(self) -> bool:
|
||||
return self._a1.dtype is None and self._w1.dtype == "mxfp4"
|
||||
|
||||
@property
|
||||
def use_mxfp4_w4a4(self) -> bool:
|
||||
return self._a1.dtype == "mxfp4" and self._w1.dtype == "mxfp4"
|
||||
|
||||
@property
|
||||
def use_nvfp4_w4a4(self) -> bool:
|
||||
return self.quant_dtype == "nvfp4"
|
||||
|
||||
@ -221,8 +221,8 @@ def rocm_aiter_fused_experts(
|
||||
|
||||
else:
|
||||
quant_method = QuantMethod.NO.value
|
||||
# quark moe for mxfp4 w_dtype
|
||||
if quant_config.use_mxfp4_w4a16:
|
||||
# quark moe for mxfp4 w_dtype mxfp4 a_dtype
|
||||
if quant_config.use_mxfp4_w4a4:
|
||||
quant_method = QuantMethod.BLOCK_1X32.value
|
||||
# w8a8 block-scaled
|
||||
if quant_config.block_shape is not None and quant_config.use_fp8_w8a8:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user