mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 23:25:34 +08:00
[Minor] Add more detailed explanation on quantization argument (#2145)
This commit is contained in:
parent
3a765bd5e1
commit
30fb0956df
@ -183,7 +183,12 @@ class EngineArgs:
|
|||||||
type=str,
|
type=str,
|
||||||
choices=['awq', 'gptq', 'squeezellm', None],
|
choices=['awq', 'gptq', 'squeezellm', None],
|
||||||
default=None,
|
default=None,
|
||||||
help='Method used to quantize the weights')
|
help='Method used to quantize the weights. If '
|
||||||
|
'None, we first check the `quantization_config` '
|
||||||
|
'attribute in the model config file. If that is '
|
||||||
|
'None, we assume the model weights are not '
|
||||||
|
'quantized and use `dtype` to determine the data '
|
||||||
|
'type of the weights.')
|
||||||
parser.add_argument('--enforce-eager',
|
parser.add_argument('--enforce-eager',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Always use eager-mode PyTorch. If False, '
|
help='Always use eager-mode PyTorch. If False, '
|
||||||
|
|||||||
@ -38,9 +38,10 @@ class LLM:
|
|||||||
However, if the `torch_dtype` in the config is `float32`, we will
|
However, if the `torch_dtype` in the config is `float32`, we will
|
||||||
use `float16` instead.
|
use `float16` instead.
|
||||||
quantization: The method used to quantize the model weights. Currently,
|
quantization: The method used to quantize the model weights. Currently,
|
||||||
we support "awq", "gptq" and "squeezellm". If None, we assume the
|
we support "awq", "gptq" and "squeezellm". If None, we first check
|
||||||
model weights are not quantized and use `dtype` to determine the
|
the `quantization_config` attribute in the model config file. If
|
||||||
data type of the weights.
|
that is None, we assume the model weights are not quantized and use
|
||||||
|
`dtype` to determine the data type of the weights.
|
||||||
revision: The specific model version to use. It can be a branch name,
|
revision: The specific model version to use. It can be a branch name,
|
||||||
a tag name, or a commit id.
|
a tag name, or a commit id.
|
||||||
tokenizer_revision: The specific tokenizer version to use. It can be a
|
tokenizer_revision: The specific tokenizer version to use. It can be a
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user