[Bugfix] Follow-up fix on MediaWithBytes (#29951)

Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Roger Wang 2025-12-03 02:42:49 -08:00 committed by GitHub
parent 42c1949643
commit 787b84a9fc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 2 deletions

View File

@ -21,6 +21,8 @@ class MediaWithBytes(Generic[_T]):
The wrapper delegates attribute access to the underlying media object, The wrapper delegates attribute access to the underlying media object,
making it behave transparently like the wrapped type (e.g., PIL.Image). making it behave transparently like the wrapped type (e.g., PIL.Image).
NOTE: Currently, this wrapper is used only for the image modality.
""" """
media: _T media: _T

View File

@ -32,6 +32,7 @@ if TYPE_CHECKING:
from PIL.Image import Image from PIL.Image import Image
from transformers.feature_extraction_utils import BatchFeature from transformers.feature_extraction_utils import BatchFeature
from .base import MediaWithBytes
from .processing import MultiModalHashes from .processing import MultiModalHashes
else: else:
@ -59,7 +60,7 @@ Represents a single audio
item, which can be passed to a HuggingFace `AudioProcessor`. item, which can be passed to a HuggingFace `AudioProcessor`.
""" """
ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor"] ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor", "MediaWithBytes[HfImageItem]"]
""" """
A `transformers.image_utils.ImageInput` representing a single image A `transformers.image_utils.ImageInput` representing a single image
item, which can be passed to a HuggingFace `ImageProcessor`. item, which can be passed to a HuggingFace `ImageProcessor`.

View File

@ -484,7 +484,7 @@ class MultiModalDataParser:
return ImageEmbeddingItems(data) return ImageEmbeddingItems(data)
if ( if (
isinstance(data, PILImage.Image) isinstance(data, (PILImage.Image, MediaWithBytes))
or isinstance(data, (np.ndarray, torch.Tensor)) or isinstance(data, (np.ndarray, torch.Tensor))
and data.ndim == 3 and data.ndim == 3
): ):