mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 20:28:42 +08:00
feat(multimodal): Add customizable background color for RGBA to RGB conversion (#22052)
Signed-off-by: Jinheng Li <ahengljh@gmail.com> Co-authored-by: Jinheng Li <ahengljh@gmail.com>
This commit is contained in:
parent
f81c1bb055
commit
0a6d305e0f
@ -172,6 +172,36 @@ Multi-image input can be extended to perform video captioning. We show this with
|
||||
print(generated_text)
|
||||
```
|
||||
|
||||
#### Custom RGBA Background Color
|
||||
|
||||
When loading RGBA images (images with transparency), vLLM converts them to RGB format. By default, transparent pixels are replaced with white background. You can customize this background color using the `rgba_background_color` parameter in `media_io_kwargs`.
|
||||
|
||||
??? code
|
||||
|
||||
```python
|
||||
from vllm import LLM
|
||||
|
||||
# Default white background (no configuration needed)
|
||||
llm = LLM(model="llava-hf/llava-1.5-7b-hf")
|
||||
|
||||
# Custom black background for dark theme
|
||||
llm = LLM(
|
||||
model="llava-hf/llava-1.5-7b-hf",
|
||||
media_io_kwargs={"image": {"rgba_background_color": [0, 0, 0]}}
|
||||
)
|
||||
|
||||
# Custom brand color background (e.g., blue)
|
||||
llm = LLM(
|
||||
model="llava-hf/llava-1.5-7b-hf",
|
||||
media_io_kwargs={"image": {"rgba_background_color": [0, 0, 255]}}
|
||||
)
|
||||
```
|
||||
|
||||
!!! note
|
||||
- The `rgba_background_color` accepts RGB values as a list `[R, G, B]` or tuple `(R, G, B)` where each value is 0-255
|
||||
- This setting only affects RGBA images with transparency; RGB images are unchanged
|
||||
- If not specified, the default white background `(255, 255, 255)` is used for backward compatibility
|
||||
|
||||
### Video Inputs
|
||||
|
||||
You can pass a list of NumPy arrays directly to the `'video'` field of the multi-modal dictionary
|
||||
@ -478,6 +508,20 @@ Full example: <gh-file:examples/online_serving/openai_chat_completion_client_for
|
||||
export VLLM_VIDEO_FETCH_TIMEOUT=<timeout>
|
||||
```
|
||||
|
||||
#### Custom RGBA Background Color
|
||||
|
||||
To use a custom background color for RGBA images, pass the `rgba_background_color` parameter via `--media-io-kwargs`:
|
||||
|
||||
```bash
|
||||
# Example: Black background for dark theme
|
||||
vllm serve llava-hf/llava-1.5-7b-hf \
|
||||
--media-io-kwargs '{"image": {"rgba_background_color": [0, 0, 0]}}'
|
||||
|
||||
# Example: Custom gray background
|
||||
vllm serve llava-hf/llava-1.5-7b-hf \
|
||||
--media-io-kwargs '{"image": {"rgba_background_color": [128, 128, 128]}}'
|
||||
```
|
||||
|
||||
### Audio Inputs
|
||||
|
||||
Audio input is supported according to [OpenAI Audio API](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in).
|
||||
|
||||
@ -3,9 +3,10 @@
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from PIL import Image, ImageChops
|
||||
|
||||
from vllm.multimodal.image import convert_image_mode
|
||||
from vllm.multimodal.image import ImageMediaIO, convert_image_mode
|
||||
|
||||
ASSETS_DIR = Path(__file__).parent / "assets"
|
||||
assert ASSETS_DIR.exists()
|
||||
@ -35,3 +36,115 @@ def test_rgba_to_rgb():
|
||||
assert converted_image_numpy[i][j][0] == 255
|
||||
assert converted_image_numpy[i][j][1] == 255
|
||||
assert converted_image_numpy[i][j][2] == 255
|
||||
|
||||
|
||||
def test_rgba_to_rgb_custom_background(tmp_path):
|
||||
"""Test RGBA to RGB conversion with custom background colors."""
|
||||
# Create a simple RGBA image with transparent and opaque pixels
|
||||
rgba_image = Image.new("RGBA", (10, 10),
|
||||
(255, 0, 0, 255)) # Red with full opacity
|
||||
|
||||
# Make top-left quadrant transparent
|
||||
for i in range(5):
|
||||
for j in range(5):
|
||||
rgba_image.putpixel((i, j), (0, 0, 0, 0)) # Fully transparent
|
||||
|
||||
# Save the test image to tmp_path
|
||||
test_image_path = tmp_path / "test_rgba.png"
|
||||
rgba_image.save(test_image_path)
|
||||
|
||||
# Test 1: Default white background (backward compatibility)
|
||||
image_io_default = ImageMediaIO()
|
||||
converted_default = image_io_default.load_file(test_image_path)
|
||||
default_numpy = np.array(converted_default)
|
||||
|
||||
# Check transparent pixels are white
|
||||
assert default_numpy[0][0][0] == 255 # R
|
||||
assert default_numpy[0][0][1] == 255 # G
|
||||
assert default_numpy[0][0][2] == 255 # B
|
||||
# Check opaque pixels remain red
|
||||
assert default_numpy[5][5][0] == 255 # R
|
||||
assert default_numpy[5][5][1] == 0 # G
|
||||
assert default_numpy[5][5][2] == 0 # B
|
||||
|
||||
# Test 2: Custom black background via kwargs
|
||||
image_io_black = ImageMediaIO(rgba_background_color=(0, 0, 0))
|
||||
converted_black = image_io_black.load_file(test_image_path)
|
||||
black_numpy = np.array(converted_black)
|
||||
|
||||
# Check transparent pixels are black
|
||||
assert black_numpy[0][0][0] == 0 # R
|
||||
assert black_numpy[0][0][1] == 0 # G
|
||||
assert black_numpy[0][0][2] == 0 # B
|
||||
# Check opaque pixels remain red
|
||||
assert black_numpy[5][5][0] == 255 # R
|
||||
assert black_numpy[5][5][1] == 0 # G
|
||||
assert black_numpy[5][5][2] == 0 # B
|
||||
|
||||
# Test 3: Custom blue background via kwargs (as list)
|
||||
image_io_blue = ImageMediaIO(rgba_background_color=[0, 0, 255])
|
||||
converted_blue = image_io_blue.load_file(test_image_path)
|
||||
blue_numpy = np.array(converted_blue)
|
||||
|
||||
# Check transparent pixels are blue
|
||||
assert blue_numpy[0][0][0] == 0 # R
|
||||
assert blue_numpy[0][0][1] == 0 # G
|
||||
assert blue_numpy[0][0][2] == 255 # B
|
||||
|
||||
# Test 4: Test with load_bytes method
|
||||
with open(test_image_path, 'rb') as f:
|
||||
image_data = f.read()
|
||||
|
||||
image_io_green = ImageMediaIO(rgba_background_color=(0, 255, 0))
|
||||
converted_green = image_io_green.load_bytes(image_data)
|
||||
green_numpy = np.array(converted_green)
|
||||
|
||||
# Check transparent pixels are green
|
||||
assert green_numpy[0][0][0] == 0 # R
|
||||
assert green_numpy[0][0][1] == 255 # G
|
||||
assert green_numpy[0][0][2] == 0 # B
|
||||
|
||||
|
||||
def test_rgba_background_color_validation():
|
||||
"""Test that invalid rgba_background_color values are properly rejected."""
|
||||
|
||||
# Test invalid types
|
||||
with pytest.raises(ValueError,
|
||||
match="rgba_background_color must be a list or tuple"):
|
||||
ImageMediaIO(rgba_background_color="255,255,255")
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match="rgba_background_color must be a list or tuple"):
|
||||
ImageMediaIO(rgba_background_color=255)
|
||||
|
||||
# Test wrong number of elements
|
||||
with pytest.raises(ValueError,
|
||||
match="rgba_background_color must be a list or tuple"):
|
||||
ImageMediaIO(rgba_background_color=(255, 255))
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match="rgba_background_color must be a list or tuple"):
|
||||
ImageMediaIO(rgba_background_color=(255, 255, 255, 255))
|
||||
|
||||
# Test non-integer values
|
||||
with pytest.raises(ValueError,
|
||||
match="rgba_background_color must be a list or tuple"):
|
||||
ImageMediaIO(rgba_background_color=(255.0, 255.0, 255.0))
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match="rgba_background_color must be a list or tuple"):
|
||||
ImageMediaIO(rgba_background_color=(255, "255", 255))
|
||||
|
||||
# Test out of range values
|
||||
with pytest.raises(ValueError,
|
||||
match="rgba_background_color must be a list or tuple"):
|
||||
ImageMediaIO(rgba_background_color=(256, 255, 255))
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match="rgba_background_color must be a list or tuple"):
|
||||
ImageMediaIO(rgba_background_color=(255, -1, 255))
|
||||
|
||||
# Test that valid values work
|
||||
ImageMediaIO(rgba_background_color=(0, 0, 0)) # Should not raise
|
||||
ImageMediaIO(rgba_background_color=[255, 255, 255]) # Should not raise
|
||||
ImageMediaIO(rgba_background_color=(128, 128, 128)) # Should not raise
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import pybase64
|
||||
import torch
|
||||
@ -23,9 +24,10 @@ def rescale_image_size(image: Image.Image,
|
||||
return image
|
||||
|
||||
|
||||
# TODO: Support customizable background color to fill in.
|
||||
def rgba_to_rgb(
|
||||
image: Image.Image, background_color=(255, 255, 255)) -> Image.Image:
|
||||
image: Image.Image,
|
||||
background_color: Union[tuple[int, int, int], list[int]] = (255, 255, 255)
|
||||
) -> Image.Image:
|
||||
"""Convert an RGBA image to RGB with filled background color."""
|
||||
assert image.mode == "RGBA"
|
||||
converted = Image.new("RGB", image.size, background_color)
|
||||
@ -55,10 +57,35 @@ class ImageMediaIO(MediaIO[Image.Image]):
|
||||
# for flexible control.
|
||||
self.kwargs = kwargs
|
||||
|
||||
# Extract RGBA background color from kwargs if provided
|
||||
# Default to white background for backward compatibility
|
||||
rgba_bg = kwargs.get('rgba_background_color', (255, 255, 255))
|
||||
# Convert list to tuple for consistency
|
||||
if isinstance(rgba_bg, list):
|
||||
rgba_bg = tuple(rgba_bg)
|
||||
|
||||
# Validate rgba_background_color format
|
||||
if not (isinstance(rgba_bg, tuple) and len(rgba_bg) == 3
|
||||
and all(isinstance(c, int) and 0 <= c <= 255
|
||||
for c in rgba_bg)):
|
||||
raise ValueError(
|
||||
"rgba_background_color must be a list or tuple of 3 integers "
|
||||
"in the range [0, 255].")
|
||||
self.rgba_background_color = rgba_bg
|
||||
|
||||
def _convert_image_mode(self, image: Image.Image) -> Image.Image:
|
||||
"""Convert image mode with custom background color."""
|
||||
if image.mode == self.image_mode:
|
||||
return image
|
||||
elif image.mode == "RGBA" and self.image_mode == "RGB":
|
||||
return rgba_to_rgb(image, self.rgba_background_color)
|
||||
else:
|
||||
return convert_image_mode(image, self.image_mode)
|
||||
|
||||
def load_bytes(self, data: bytes) -> Image.Image:
|
||||
image = Image.open(BytesIO(data))
|
||||
image.load()
|
||||
return convert_image_mode(image, self.image_mode)
|
||||
return self._convert_image_mode(image)
|
||||
|
||||
def load_base64(self, media_type: str, data: str) -> Image.Image:
|
||||
return self.load_bytes(pybase64.b64decode(data, validate=True))
|
||||
@ -66,7 +93,7 @@ class ImageMediaIO(MediaIO[Image.Image]):
|
||||
def load_file(self, filepath: Path) -> Image.Image:
|
||||
image = Image.open(filepath)
|
||||
image.load()
|
||||
return convert_image_mode(image, self.image_mode)
|
||||
return self._convert_image_mode(image)
|
||||
|
||||
def encode_base64(
|
||||
self,
|
||||
@ -77,7 +104,7 @@ class ImageMediaIO(MediaIO[Image.Image]):
|
||||
image = media
|
||||
|
||||
with BytesIO() as buffer:
|
||||
image = convert_image_mode(image, self.image_mode)
|
||||
image = self._convert_image_mode(image)
|
||||
image.save(buffer, image_format)
|
||||
data = buffer.getvalue()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user