mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-06 03:57:02 +08:00
flash_infer missing out dtype bug fix
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
parent
7794009661
commit
a8010c7b1c
@ -17,7 +17,7 @@ def apply_weights_fp8(
|
||||
x_s: torch.Tensor,
|
||||
bias: torch.Tensor,
|
||||
x_s_ub: torch.Tensor | None,
|
||||
maybe_out_dtype: torch.dtype | None,
|
||||
maybe_out_dtype: torch.dtype | None = None,
|
||||
) -> torch.Tensor:
|
||||
# ops.scaled_fp8_quant supports both dynamic and static quant.
|
||||
# If dynamic, layer.input_scale is None and x_s computed from x.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user