mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 09:25:44 +08:00
added support for quantize on LLM module (#1080)
This commit is contained in:
parent
90979c38f8
commit
fbe66e1d0b
@ -38,6 +38,9 @@ class LLM:
|
||||
However, if the `torch_dtype` in the config is `float32`, we will
|
||||
use `float16` instead.
|
||||
seed: The seed to initialize the random number generator for sampling.
|
||||
quantization: The method used to quantize the model weights. Currently,
|
||||
we support "awq". If None, we assume the model weights are not
|
||||
quantized and use `dtype` to determine the data type of the weights.
|
||||
revision: The specific model version to use. It can be a branch name,
|
||||
a tag name, or a commit id.
|
||||
"""
|
||||
@ -51,6 +54,7 @@ class LLM:
|
||||
tensor_parallel_size: int = 1,
|
||||
dtype: str = "auto",
|
||||
seed: int = 0,
|
||||
quantization: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
if "disable_log_stats" not in kwargs:
|
||||
@ -63,6 +67,7 @@ class LLM:
|
||||
tensor_parallel_size=tensor_parallel_size,
|
||||
dtype=dtype,
|
||||
seed=seed,
|
||||
quantization=quantization,
|
||||
**kwargs,
|
||||
)
|
||||
self.llm_engine = LLMEngine.from_engine_args(engine_args)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user