mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 11:35:49 +08:00
Skip device and quant Pydantic validation to make plugin device work (#18843)
Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
This commit is contained in:
parent
1661a9c28f
commit
3c49dbdd03
@ -304,7 +304,7 @@ class ModelConfig:
|
|||||||
- 25.6k -> 25,600"""
|
- 25.6k -> 25,600"""
|
||||||
spec_target_max_model_len: Optional[int] = None
|
spec_target_max_model_len: Optional[int] = None
|
||||||
"""Specify the maximum length for spec decoding draft models."""
|
"""Specify the maximum length for spec decoding draft models."""
|
||||||
quantization: Optional[QuantizationMethods] = None
|
quantization: SkipValidation[Optional[QuantizationMethods]] = None
|
||||||
"""Method used to quantize the weights. If `None`, we first check the
|
"""Method used to quantize the weights. If `None`, we first check the
|
||||||
`quantization_config` attribute in the model config file. If that is
|
`quantization_config` attribute in the model config file. If that is
|
||||||
`None`, we assume the model weights are not quantized and use `dtype` to
|
`None`, we assume the model weights are not quantized and use `dtype` to
|
||||||
@ -2231,7 +2231,7 @@ Device = Literal["auto", "cuda", "neuron", "cpu", "tpu", "xpu", "hpu"]
|
|||||||
class DeviceConfig:
|
class DeviceConfig:
|
||||||
"""Configuration for the device to use for vLLM execution."""
|
"""Configuration for the device to use for vLLM execution."""
|
||||||
|
|
||||||
device: Union[Device, torch.device] = "auto"
|
device: SkipValidation[Union[Device, torch.device]] = "auto"
|
||||||
"""Device type for vLLM execution.
|
"""Device type for vLLM execution.
|
||||||
This parameter is deprecated and will be
|
This parameter is deprecated and will be
|
||||||
removed in a future release.
|
removed in a future release.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user