From 00294e1bc63de233966c5b99c3118ebece442611 Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Tue, 18 Feb 2025 00:35:09 -0500 Subject: [PATCH] [Quant] Arctic SupportsQuant (#13366) --- vllm/model_executor/models/arctic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/arctic.py b/vllm/model_executor/models/arctic.py index d015682aab47f..27df448e63f72 100644 --- a/vllm/model_executor/models/arctic.py +++ b/vllm/model_executor/models/arctic.py @@ -33,7 +33,7 @@ from vllm.model_executor.utils import set_weight_attrs from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs.arctic import ArcticConfig -from .interfaces import SupportsPP +from .interfaces import SupportsPP, SupportsQuant from .utils import (extract_layer_index, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) @@ -423,7 +423,8 @@ class ArcticModel(nn.Module): return hidden_states -class ArcticForCausalLM(nn.Module, SupportsPP): +class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant): + packed_modules_mapping = {"qkv_proj": ["q_proj", "k_proj", "v_proj"]} def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__()