mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:05:36 +08:00
fix incompatibililty with non cuda platform for nvfp4 (#23478)
Signed-off-by: Lu Fang <fanglu@fb.com> Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com>
This commit is contained in:
parent
ad78868450
commit
c7fc6b1354
@ -47,8 +47,10 @@ QUANT_OPS: dict[QuantKey, OpOverload] = {
|
|||||||
torch.ops._C.dynamic_scaled_fp8_quant.default, # noqa: E501
|
torch.ops._C.dynamic_scaled_fp8_quant.default, # noqa: E501
|
||||||
kFp8DynamicTokenSym:
|
kFp8DynamicTokenSym:
|
||||||
torch.ops._C.dynamic_per_token_scaled_fp8_quant.default, # noqa: E501
|
torch.ops._C.dynamic_per_token_scaled_fp8_quant.default, # noqa: E501
|
||||||
kNvfp4Quant: torch.ops._C.scaled_fp4_quant.default, # noqa: E501
|
|
||||||
}
|
}
|
||||||
|
if current_platform.is_cuda() and hasattr(torch.ops._C, "scaled_fp4_quant"):
|
||||||
|
QUANT_OPS[
|
||||||
|
kNvfp4Quant] = torch.ops._C.scaled_fp4_quant.default # noqa: E501
|
||||||
|
|
||||||
|
|
||||||
class FusedRMSQuantKey(NamedTuple):
|
class FusedRMSQuantKey(NamedTuple):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user