diff --git a/vllm/model_executor/models/bamba.py b/vllm/model_executor/models/bamba.py index ec62e41d59f0f..61b68125e07e0 100644 --- a/vllm/model_executor/models/bamba.py +++ b/vllm/model_executor/models/bamba.py @@ -33,7 +33,7 @@ from vllm.sequence import IntermediateTensors from vllm.utils import LayerBlockType from .interfaces import (HasInnerState, IsHybrid, SupportsLoRA, SupportsPP, - SupportsV0Only) + SupportsQuant, SupportsV0Only) from .utils import (is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) @@ -367,7 +367,7 @@ class BambaModel(nn.Module): class BambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP, - IsHybrid, SupportsV0Only): + IsHybrid, SupportsV0Only, SupportsQuant): packed_modules_mapping = { "qkv_proj": [ "q_proj",