mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 11:36:20 +08:00
[Misc] Fix arg names (#5524)
This commit is contained in:
parent
703475f6c2
commit
d74674bbd9
@ -165,7 +165,7 @@ if __name__ == '__main__':
|
|||||||
choices=["v1", "v2"],
|
choices=["v1", "v2"],
|
||||||
default="v2")
|
default="v2")
|
||||||
parser.add_argument("--batch-size", type=int, default=8)
|
parser.add_argument("--batch-size", type=int, default=8)
|
||||||
parser.add_argument("--seq_len", type=int, default=4096)
|
parser.add_argument("--seq-len", type=int, default=4096)
|
||||||
parser.add_argument("--num-query-heads", type=int, default=64)
|
parser.add_argument("--num-query-heads", type=int, default=64)
|
||||||
parser.add_argument("--num-kv-heads", type=int, default=8)
|
parser.add_argument("--num-kv-heads", type=int, default=8)
|
||||||
parser.add_argument("--head-size",
|
parser.add_argument("--head-size",
|
||||||
|
|||||||
@ -17,7 +17,7 @@ def main():
|
|||||||
type=int,
|
type=int,
|
||||||
default=0,
|
default=0,
|
||||||
help='known good models by index, [0-4]')
|
help='known good models by index, [0-4]')
|
||||||
parser.add_argument('--tensor_parallel_size',
|
parser.add_argument('--tensor-parallel-size',
|
||||||
'-t',
|
'-t',
|
||||||
type=int,
|
type=int,
|
||||||
default=1,
|
default=1,
|
||||||
|
|||||||
@ -327,7 +327,7 @@ if __name__ == "__main__":
|
|||||||
"--quantization-param-path <filename>). This is only used "
|
"--quantization-param-path <filename>). This is only used "
|
||||||
"if the KV cache dtype is FP8 and on ROCm (AMD GPU).")
|
"if the KV cache dtype is FP8 and on ROCm (AMD GPU).")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--quantized_model",
|
"--quantized-model",
|
||||||
help="Specify the directory containing a single quantized HF model. "
|
help="Specify the directory containing a single quantized HF model. "
|
||||||
"It is expected that the quantization format is FP8_E4M3, for use "
|
"It is expected that the quantization format is FP8_E4M3, for use "
|
||||||
"on ROCm (AMD GPU).",
|
"on ROCm (AMD GPU).",
|
||||||
@ -339,18 +339,18 @@ if __name__ == "__main__":
|
|||||||
choices=["auto", "safetensors", "npz", "pt"],
|
choices=["auto", "safetensors", "npz", "pt"],
|
||||||
default="auto")
|
default="auto")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output_dir",
|
"--output-dir",
|
||||||
help="Optionally specify the output directory. By default the "
|
help="Optionally specify the output directory. By default the "
|
||||||
"KV cache scaling factors will be saved in the model directory, "
|
"KV cache scaling factors will be saved in the model directory, "
|
||||||
"however you can override this behavior here.",
|
"however you can override this behavior here.",
|
||||||
default=None)
|
default=None)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output_name",
|
"--output-name",
|
||||||
help="Optionally specify the output filename.",
|
help="Optionally specify the output filename.",
|
||||||
# TODO: Change this once additional scaling factors are enabled
|
# TODO: Change this once additional scaling factors are enabled
|
||||||
default="kv_cache_scales.json")
|
default="kv_cache_scales.json")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--tp_size",
|
"--tp-size",
|
||||||
help="Optionally specify the tensor-parallel (TP) size that the "
|
help="Optionally specify the tensor-parallel (TP) size that the "
|
||||||
"quantized model should correspond to. If specified, during KV "
|
"quantized model should correspond to. If specified, during KV "
|
||||||
"cache scaling factor extraction the observed TP size will be "
|
"cache scaling factor extraction the observed TP size will be "
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user