mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-14 22:44:32 +08:00
[Bugfix] Fix Minicpm-O-int4 GPTQ model inference (#17397)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
parent
08e15defa9
commit
2fa2a50bf9
@ -28,12 +28,16 @@ from typing import (Any, Callable, Literal, Optional, Set, Tuple, TypedDict,
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from transformers import BatchFeature
|
from transformers import BatchFeature, PretrainedConfig
|
||||||
from transformers.modeling_outputs import BaseModelOutputWithPast
|
from transformers.modeling_outputs import BaseModelOutputWithPast
|
||||||
from transformers.models.whisper.modeling_whisper import (
|
from transformers.models.whisper.modeling_whisper import (
|
||||||
ACT2FN, WHISPER_ATTENTION_CLASSES, WhisperConfig, WhisperEncoder)
|
ACT2FN, WHISPER_ATTENTION_CLASSES, WhisperConfig, WhisperEncoder)
|
||||||
|
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
|
from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||||
|
from vllm.model_executor.layers.quantization.gptq import GPTQConfig
|
||||||
|
from vllm.model_executor.layers.quantization.gptq_marlin import (
|
||||||
|
GPTQMarlinConfig)
|
||||||
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargs
|
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargs
|
||||||
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
|
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
|
||||||
NestedTensors)
|
NestedTensors)
|
||||||
@ -512,6 +516,36 @@ class MiniCPMO(MiniCPMV2_6):
|
|||||||
|
|
||||||
self.audio_token_id = None
|
self.audio_token_id = None
|
||||||
|
|
||||||
|
def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
|
||||||
|
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
|
||||||
|
# seems to avoid vision encoder sections for some models.
|
||||||
|
# See: https://huggingface.co/openbmb/MiniCPM-o-2_6-int4
|
||||||
|
if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
|
||||||
|
return None
|
||||||
|
return quant_config
|
||||||
|
|
||||||
|
def init_vision_module(
|
||||||
|
self,
|
||||||
|
config: PretrainedConfig,
|
||||||
|
quant_config: Optional[QuantizationConfig] = None,
|
||||||
|
prefix: str = "",
|
||||||
|
) -> nn.Module:
|
||||||
|
# MiniCPMO GPTQ model leave vpm unquantized.
|
||||||
|
quant_config = self._maybe_ignore_quant_config(quant_config)
|
||||||
|
return super().init_vision_module(config, quant_config, prefix)
|
||||||
|
|
||||||
|
def init_resampler(
|
||||||
|
self,
|
||||||
|
embed_dim: int,
|
||||||
|
vision_dim: int,
|
||||||
|
quant_config: Optional[QuantizationConfig] = None,
|
||||||
|
prefix: str = "",
|
||||||
|
) -> nn.Module:
|
||||||
|
# MiniCPMO GPTQ model leave resampler unquantized.
|
||||||
|
quant_config = self._maybe_ignore_quant_config(quant_config)
|
||||||
|
return super().init_resampler(embed_dim, vision_dim, quant_config,
|
||||||
|
prefix)
|
||||||
|
|
||||||
def init_audio_module(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
def init_audio_module(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||||
# Do not use parameters temporarily
|
# Do not use parameters temporarily
|
||||||
audio_config = self.config.audio_config
|
audio_config = self.config.audio_config
|
||||||
|
|||||||
@ -1181,7 +1181,7 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA):
|
|||||||
def init_vision_module(
|
def init_vision_module(
|
||||||
self,
|
self,
|
||||||
config: PretrainedConfig,
|
config: PretrainedConfig,
|
||||||
quant_config: Optional[QuantizationConfig],
|
quant_config: Optional[QuantizationConfig] = None,
|
||||||
prefix: str = "",
|
prefix: str = "",
|
||||||
) -> nn.Module:
|
) -> nn.Module:
|
||||||
model = Idefics2VisionTransformer(config.vision_config,
|
model = Idefics2VisionTransformer(config.vision_config,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user