mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-25 15:11:20 +08:00
[Misc][DP] Guard mxfp4 implementation selection (#27484)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
parent
52efc34ebf
commit
269c4db0a4
@ -794,7 +794,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"Incompatible Mxfp4 backend for EP batched experts format"
|
f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for "
|
||||||
|
"EP batched experts format"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
assert self.moe_quant_config is not None
|
assert self.moe_quant_config is not None
|
||||||
@ -813,8 +814,12 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
|||||||
return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs)
|
return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs)
|
||||||
elif self.mxfp4_backend == Mxfp4Backend.MARLIN:
|
elif self.mxfp4_backend == Mxfp4Backend.MARLIN:
|
||||||
return MarlinExperts(self.moe_quant_config)
|
return MarlinExperts(self.moe_quant_config)
|
||||||
else:
|
elif self.mxfp4_backend == Mxfp4Backend.TRITON:
|
||||||
return OAITritonExperts(self.moe_quant_config)
|
return OAITritonExperts(self.moe_quant_config)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(
|
||||||
|
f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for EP"
|
||||||
|
)
|
||||||
|
|
||||||
def _route_and_experts(
|
def _route_and_experts(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user