diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py index b613b70a7564a..2e51b9c9c0c73 100644 --- a/vllm/model_executor/models/baichuan.py +++ b/vllm/model_executor/models/baichuan.py @@ -46,7 +46,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors -from .interfaces import SupportsLoRA, SupportsPP +from .interfaces import SupportsLoRA, SupportsPP, SupportsQuant from .utils import (is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers) @@ -334,7 +334,8 @@ class BaiChuanModel(nn.Module): return hidden_states -class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP): +class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP, + SupportsQuant): packed_modules_mapping = { "W_pack": ["W_pack"], "gate_up_proj": [