add minimal documentation for torch scaled mm base class

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
2026-06-10 11:55:43 +08:00 · 2025-11-07 07:30:57 +00:00 · 2025-11-07 07:30:57 +00:00 · 56a05cd818
commit 56a05cd818
parent 7fb465744c
1 changed files with 8 additions and 0 deletions
--- a/vllm/model_executor/layers/quantization/kernels/scaled_mm/pytorch.py
+++ b/vllm/model_executor/layers/quantization/kernels/scaled_mm/pytorch.py
@ -133,6 +133,14 @@ def torch_channelwise_w8a8_scaled_mm(


 class TorchScaledMMLinearKernel(FP8ScaledMMLinearKernel):
+    """
+    Base class for FP8 linear kernels using Torch.
+    Each subclass represents a kernel variant for
+    specific device capabilities and torch versions,
+    so we split them up and implement
+    get_min_capability() separately for each.
+    """
+
    def get_ouput_padding(self) -> int | None:
        vllm_config = get_current_vllm_config().compilation_config
        pad_output = vllm_config.mode < CompilationMode.VLLM_COMPILE