[Bugfix] Fix qwen2.5-vl image processor (#13286)

This commit is contained in:
Isotr0py 2025-02-15 19:00:11 +08:00 committed by GitHub
parent 067fa2255b
commit 7fdaaf48ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 6 deletions

View File

@ -33,10 +33,11 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from einops import rearrange from einops import rearrange
from transformers import BatchFeature from transformers import BatchFeature
from transformers.models.qwen2_5_vl import (Qwen2_5_VLImageProcessor, from transformers.models.qwen2_5_vl import Qwen2_5_VLProcessor
Qwen2_5_VLProcessor)
from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import ( from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import (
Qwen2_5_VLConfig, Qwen2_5_VLVisionConfig) Qwen2_5_VLConfig, Qwen2_5_VLVisionConfig)
from transformers.models.qwen2_vl import (Qwen2VLImageProcessor,
Qwen2VLImageProcessorFast)
from vllm.attention import AttentionMetadata from vllm.attention import AttentionMetadata
from vllm.config import VllmConfig from vllm.config import VllmConfig
@ -693,7 +694,8 @@ class Qwen2_5_VLProcessingInfo(Qwen2VLProcessingInfo):
) -> Qwen2_5_VLProcessor: ) -> Qwen2_5_VLProcessor:
hf_processor = self.ctx.get_hf_processor(Qwen2_5_VLProcessor) hf_processor = self.ctx.get_hf_processor(Qwen2_5_VLProcessor)
image_processor = hf_processor.image_processor # type: ignore image_processor = hf_processor.image_processor # type: ignore
assert isinstance(image_processor, Qwen2_5_VLImageProcessor) assert isinstance(image_processor,
(Qwen2VLImageProcessor, Qwen2VLImageProcessorFast))
if min_pixels: if min_pixels:
image_processor.min_pixels = min_pixels image_processor.min_pixels = min_pixels
@ -713,14 +715,15 @@ class Qwen2_5_VLProcessingInfo(Qwen2VLProcessingInfo):
min_pixels: Optional[int] = None, min_pixels: Optional[int] = None,
max_pixels: Optional[int] = None, max_pixels: Optional[int] = None,
fps: Optional[float] = 2.0, fps: Optional[float] = 2.0,
) -> Qwen2_5_VLImageProcessor: ) -> Union[Qwen2VLImageProcessor, Qwen2VLImageProcessorFast]:
hf_processor = self.get_hf_processor( hf_processor = self.get_hf_processor(
min_pixels=min_pixels, min_pixels=min_pixels,
max_pixels=max_pixels, max_pixels=max_pixels,
fps=fps, fps=fps,
) )
image_processor = hf_processor.image_processor # type: ignore image_processor = hf_processor.image_processor # type: ignore
assert isinstance(image_processor, Qwen2_5_VLImageProcessor) assert isinstance(image_processor,
(Qwen2VLImageProcessor, Qwen2VLImageProcessorFast))
return image_processor return image_processor

View File

@ -31,7 +31,9 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from einops import rearrange, repeat from einops import rearrange, repeat
from packaging.version import Version
from transformers import BatchFeature from transformers import BatchFeature
from transformers import __version__ as TRANSFORMERS_VERSION
from transformers.models.qwen2_vl import (Qwen2VLImageProcessor, from transformers.models.qwen2_vl import (Qwen2VLImageProcessor,
Qwen2VLProcessor) Qwen2VLProcessor)
from transformers.models.qwen2_vl.configuration_qwen2_vl import ( from transformers.models.qwen2_vl.configuration_qwen2_vl import (
@ -746,7 +748,13 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
hf_processor = self.get_hf_processor(min_pixels=min_pixels, hf_processor = self.get_hf_processor(min_pixels=min_pixels,
max_pixels=max_pixels) max_pixels=max_pixels)
image_processor = hf_processor.image_processor # type: ignore image_processor = hf_processor.image_processor # type: ignore
assert isinstance(image_processor, Qwen2VLImageProcessor) if Version(TRANSFORMERS_VERSION) >= Version("4.49"):
from transformers.models.qwen2_vl import Qwen2VLImageProcessorFast
assert isinstance(
image_processor,
(Qwen2VLImageProcessor, Qwen2VLImageProcessorFast))
else:
assert isinstance(image_processor, Qwen2VLImageProcessor)
return image_processor return image_processor
def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]: def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]: