diff --git a/vllm/model_executor/models/arctic.py b/vllm/model_executor/models/arctic.py index d015682aab47f..27df448e63f72 100644 --- a/vllm/model_executor/models/arctic.py +++ b/vllm/model_executor/models/arctic.py @@ -33,7 +33,7 @@ from vllm.model_executor.utils import set_weight_attrs from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs.arctic import ArcticConfig -from .interfaces import SupportsPP +from .interfaces import SupportsPP, SupportsQuant from .utils import (extract_layer_index, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) @@ -423,7 +423,8 @@ class ArcticModel(nn.Module): return hidden_states -class ArcticForCausalLM(nn.Module, SupportsPP): +class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant): + packed_modules_mapping = {"qkv_proj": ["q_proj", "k_proj", "v_proj"]} def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__()