[Doc] fix arg docstring in linear layers (#18410)

Signed-off-by: giantcroc <1204449533@qq.com>
2026-01-29 18:57:13 +08:00 · 2025-05-21 21:45:57 +08:00 · 2025-05-21 21:45:57 +08:00 · c154d89306
commit c154d89306
parent eca18691d2
1 changed files with 9 additions and 0 deletions
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@ -261,6 +261,7 @@ class ReplicatedLinear(LinearBase):
        quant_config: Quantization configure.
        prefix: The name of the layer in the state dict, including all parents
                        (e.g. model.layers.0.qkv_proj)
+        return_bias: If true, return bias together with outputs in forward pass.
    """

    def __init__(
@ -523,6 +524,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
        quant_config: Quantization configure.
        prefix: The name of the layer in the state dict, including all parents
                        (e.g. model.layers.0.qkv_proj)
+        return_bias: If true, return bias together with outputs in forward pass.
    """

    def __init__(
@ -805,6 +807,7 @@ class QKVParallelLinear(ColumnParallelLinear):
        quant_config: Quantization configure.
        prefix: The name of the layer in the state dict, including all parents
                        (e.g. model.layers.0.qkv_proj)
+        return_bias: If true, return bias together with outputs in forward pass.
    """

    def __init__(
@ -1155,7 +1158,13 @@ class RowParallelLinear(LinearBase):
                       bias can be fused with other element-wise operations.
                       We skip adding bias but instead return it.
        params_dtype: Data type for the parameters.
+        reduce_results: If true, call all-reduce on output and make Y available
+                       to all GPUs, otherwise, every GPU will have its output
+                       which is Y = X_iA_i
        quant_config: Quantization configure.
+        prefix: The name of the layer in the state dict, including all parents
+                        (e.g. model.layers.0.down_proj)
+        return_bias: If true, return bias together with outputs in forward pass.
    """

    def __init__(