mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:15:01 +08:00
[Bugfix] Follow-up fix on MediaWithBytes (#29951)
Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
42c1949643
commit
787b84a9fc
@ -21,6 +21,8 @@ class MediaWithBytes(Generic[_T]):
|
||||
|
||||
The wrapper delegates attribute access to the underlying media object,
|
||||
making it behave transparently like the wrapped type (e.g., PIL.Image).
|
||||
|
||||
NOTE: Currently, this wrapper is used only for the image modality.
|
||||
"""
|
||||
|
||||
media: _T
|
||||
|
||||
@ -32,6 +32,7 @@ if TYPE_CHECKING:
|
||||
from PIL.Image import Image
|
||||
from transformers.feature_extraction_utils import BatchFeature
|
||||
|
||||
from .base import MediaWithBytes
|
||||
from .processing import MultiModalHashes
|
||||
|
||||
else:
|
||||
@ -59,7 +60,7 @@ Represents a single audio
|
||||
item, which can be passed to a HuggingFace `AudioProcessor`.
|
||||
"""
|
||||
|
||||
ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor"]
|
||||
ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor", "MediaWithBytes[HfImageItem]"]
|
||||
"""
|
||||
A `transformers.image_utils.ImageInput` representing a single image
|
||||
item, which can be passed to a HuggingFace `ImageProcessor`.
|
||||
|
||||
@ -484,7 +484,7 @@ class MultiModalDataParser:
|
||||
return ImageEmbeddingItems(data)
|
||||
|
||||
if (
|
||||
isinstance(data, PILImage.Image)
|
||||
isinstance(data, (PILImage.Image, MediaWithBytes))
|
||||
or isinstance(data, (np.ndarray, torch.Tensor))
|
||||
and data.ndim == 3
|
||||
):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user