optional input scales

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
2026-06-01 19:27:08 +08:00 · 2025-11-12 07:43:16 +00:00 · 2025-11-12 07:43:16 +00:00 · 65ecf487ad
commit 65ecf487ad
parent 4c596a0b1e
1 changed files with 3 additions and 2 deletions
--- a/tests/utils.py
+++ b/tests/utils.py
@ -1325,7 +1325,8 @@ class TestFP8Layer(torch.nn.Module):
        weight_quant_key (QuantKey): Key for weight quantization configuration.
        weight (torch.Tensor): Weight tensor for linear transformation.
        weight_scale (torch.Tensor): Per-tensor or per-group scale for weights.
-        input_scale (torch.Tensor): Scale tensor for input quantization.
+        input_scale (torch.Tensor, optional): Scale tensor for input quantization.
            Defaults to None.
        out_dtype (torch.dtype, optional): Output tensor data type.
            Defaults to torch.get_default_dtype().
    """
@ -1336,7 +1337,7 @@ class TestFP8Layer(torch.nn.Module):
        weight_quant_key: QuantKey,
        weight: torch.Tensor,
        weight_scale: torch.Tensor,
-        input_scale: torch.Tensor,
+        input_scale: torch.Tensor | None = None,
        out_dtype: torch.dtype | None = None,
        force_kernel: FP8ScaledMMLinearKernel | None = None,
    ):