diff --git a/vllm/model_executor/layers/quantization/kernels/scaled_mm/utils.py b/vllm/model_executor/layers/quantization/kernels/scaled_mm/utils.py index 62bbacbc782cd..e5ab5ad4d47cf 100644 --- a/vllm/model_executor/layers/quantization/kernels/scaled_mm/utils.py +++ b/vllm/model_executor/layers/quantization/kernels/scaled_mm/utils.py @@ -17,7 +17,7 @@ def apply_weights_fp8( x_s: torch.Tensor, bias: torch.Tensor, x_s_ub: torch.Tensor | None, - maybe_out_dtype: torch.dtype | None, + maybe_out_dtype: torch.dtype | None = None, ) -> torch.Tensor: # ops.scaled_fp8_quant supports both dynamic and static quant. # If dynamic, layer.input_scale is None and x_s computed from x.