mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-21 06:59:09 +08:00
[Misc][DP] Guard mxfp4 implementation selection (#27484)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
parent
52efc34ebf
commit
269c4db0a4
@ -794,7 +794,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Incompatible Mxfp4 backend for EP batched experts format"
|
||||
f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for "
|
||||
"EP batched experts format"
|
||||
)
|
||||
else:
|
||||
assert self.moe_quant_config is not None
|
||||
@ -813,8 +814,12 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs)
|
||||
elif self.mxfp4_backend == Mxfp4Backend.MARLIN:
|
||||
return MarlinExperts(self.moe_quant_config)
|
||||
else:
|
||||
elif self.mxfp4_backend == Mxfp4Backend.TRITON:
|
||||
return OAITritonExperts(self.moe_quant_config)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for EP"
|
||||
)
|
||||
|
||||
def _route_and_experts(
|
||||
self,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user