mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-12 12:25:40 +08:00
[Misc]Add BNB quantization for PaliGemmaForConditionalGeneration (#12237)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
96912550c8
commit
1f1542afa9
@ -136,6 +136,17 @@ class PaliGemmaMultiModalProjector(nn.Module):
|
|||||||
@INPUT_REGISTRY.register_input_processor(input_processor_for_paligemma)
|
@INPUT_REGISTRY.register_input_processor(input_processor_for_paligemma)
|
||||||
class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal,
|
class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal,
|
||||||
SupportsPP):
|
SupportsPP):
|
||||||
|
packed_modules_mapping = {
|
||||||
|
"qkv_proj": [
|
||||||
|
"q_proj",
|
||||||
|
"k_proj",
|
||||||
|
"v_proj",
|
||||||
|
],
|
||||||
|
"gate_up_proj": [
|
||||||
|
"gate_proj",
|
||||||
|
"up_proj",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|||||||
@ -344,10 +344,16 @@ class SiglipMLP(nn.Module):
|
|||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
self.activation_fn = get_act_fn(config.hidden_act)
|
self.activation_fn = get_act_fn(config.hidden_act)
|
||||||
|
# Special handling for BNB quantization
|
||||||
# For quantization, we require the hidden size to be a multiple of 64
|
if quant_config and quant_config.get_name() == "bitsandbytes":
|
||||||
quantizable = (config.hidden_size % 64 == 0
|
quantizable = True
|
||||||
and config.intermediate_size % 64 == 0)
|
else:
|
||||||
|
# For other quantization, we require the hidden size to be a
|
||||||
|
# multiple of 64
|
||||||
|
quantizable = (
|
||||||
|
config.hidden_size % 64 == 0
|
||||||
|
and config.intermediate_size % 64 == 0
|
||||||
|
)
|
||||||
self.fc1 = ColumnParallelLinear(
|
self.fc1 = ColumnParallelLinear(
|
||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
config.intermediate_size,
|
config.intermediate_size,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user