fix incompatibililty with non cuda platform for nvfp4 (#23478)

Signed-off-by: Lu Fang <fanglu@fb.com> Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com>
2025-12-14 00:05:36 +08:00 · 2025-08-24 15:35:41 -07:00 · 2025-08-24 15:35:41 -07:00 · c7fc6b1354
commit c7fc6b1354
parent ad78868450
1 changed files with 3 additions and 1 deletions
--- a/vllm/compilation/fusion.py
+++ b/vllm/compilation/fusion.py
@ -47,8 +47,10 @@ QUANT_OPS: dict[QuantKey, OpOverload] = {
    torch.ops._C.dynamic_scaled_fp8_quant.default,  # noqa: E501
    kFp8DynamicTokenSym:
    torch.ops._C.dynamic_per_token_scaled_fp8_quant.default,  # noqa: E501
    kNvfp4Quant: torch.ops._C.scaled_fp4_quant.default,  # noqa: E501
 }
 if current_platform.is_cuda() and hasattr(torch.ops._C, "scaled_fp4_quant"):
    QUANT_OPS[
        kNvfp4Quant] = torch.ops._C.scaled_fp4_quant.default  # noqa: E501
 class FusedRMSQuantKey(NamedTuple):