feat(multimodal): Add customizable background color for RGBA to RGB conversion (#22052)

Signed-off-by: Jinheng Li <ahengljh@gmail.com>
Co-authored-by: Jinheng Li <ahengljh@gmail.com>
This commit is contained in:
Gamhang 2025-08-01 21:07:33 +08:00 committed by GitHub
parent f81c1bb055
commit 0a6d305e0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 190 additions and 6 deletions

View File

@ -172,6 +172,36 @@ Multi-image input can be extended to perform video captioning. We show this with
print(generated_text)
```
#### Custom RGBA Background Color
When loading RGBA images (images with transparency), vLLM converts them to RGB format. By default, transparent pixels are replaced with white background. You can customize this background color using the `rgba_background_color` parameter in `media_io_kwargs`.
??? code
```python
from vllm import LLM
# Default white background (no configuration needed)
llm = LLM(model="llava-hf/llava-1.5-7b-hf")
# Custom black background for dark theme
llm = LLM(
model="llava-hf/llava-1.5-7b-hf",
media_io_kwargs={"image": {"rgba_background_color": [0, 0, 0]}}
)
# Custom brand color background (e.g., blue)
llm = LLM(
model="llava-hf/llava-1.5-7b-hf",
media_io_kwargs={"image": {"rgba_background_color": [0, 0, 255]}}
)
```
!!! note
- The `rgba_background_color` accepts RGB values as a list `[R, G, B]` or tuple `(R, G, B)` where each value is 0-255
- This setting only affects RGBA images with transparency; RGB images are unchanged
- If not specified, the default white background `(255, 255, 255)` is used for backward compatibility
### Video Inputs
You can pass a list of NumPy arrays directly to the `'video'` field of the multi-modal dictionary
@ -478,6 +508,20 @@ Full example: <gh-file:examples/online_serving/openai_chat_completion_client_for
export VLLM_VIDEO_FETCH_TIMEOUT=<timeout>
```
#### Custom RGBA Background Color
To use a custom background color for RGBA images, pass the `rgba_background_color` parameter via `--media-io-kwargs`:
```bash
# Example: Black background for dark theme
vllm serve llava-hf/llava-1.5-7b-hf \
--media-io-kwargs '{"image": {"rgba_background_color": [0, 0, 0]}}'
# Example: Custom gray background
vllm serve llava-hf/llava-1.5-7b-hf \
--media-io-kwargs '{"image": {"rgba_background_color": [128, 128, 128]}}'
```
### Audio Inputs
Audio input is supported according to [OpenAI Audio API](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in).

View File

@ -3,9 +3,10 @@
from pathlib import Path
import numpy as np
import pytest
from PIL import Image, ImageChops
from vllm.multimodal.image import convert_image_mode
from vllm.multimodal.image import ImageMediaIO, convert_image_mode
ASSETS_DIR = Path(__file__).parent / "assets"
assert ASSETS_DIR.exists()
@ -35,3 +36,115 @@ def test_rgba_to_rgb():
assert converted_image_numpy[i][j][0] == 255
assert converted_image_numpy[i][j][1] == 255
assert converted_image_numpy[i][j][2] == 255
def test_rgba_to_rgb_custom_background(tmp_path):
"""Test RGBA to RGB conversion with custom background colors."""
# Create a simple RGBA image with transparent and opaque pixels
rgba_image = Image.new("RGBA", (10, 10),
(255, 0, 0, 255)) # Red with full opacity
# Make top-left quadrant transparent
for i in range(5):
for j in range(5):
rgba_image.putpixel((i, j), (0, 0, 0, 0)) # Fully transparent
# Save the test image to tmp_path
test_image_path = tmp_path / "test_rgba.png"
rgba_image.save(test_image_path)
# Test 1: Default white background (backward compatibility)
image_io_default = ImageMediaIO()
converted_default = image_io_default.load_file(test_image_path)
default_numpy = np.array(converted_default)
# Check transparent pixels are white
assert default_numpy[0][0][0] == 255 # R
assert default_numpy[0][0][1] == 255 # G
assert default_numpy[0][0][2] == 255 # B
# Check opaque pixels remain red
assert default_numpy[5][5][0] == 255 # R
assert default_numpy[5][5][1] == 0 # G
assert default_numpy[5][5][2] == 0 # B
# Test 2: Custom black background via kwargs
image_io_black = ImageMediaIO(rgba_background_color=(0, 0, 0))
converted_black = image_io_black.load_file(test_image_path)
black_numpy = np.array(converted_black)
# Check transparent pixels are black
assert black_numpy[0][0][0] == 0 # R
assert black_numpy[0][0][1] == 0 # G
assert black_numpy[0][0][2] == 0 # B
# Check opaque pixels remain red
assert black_numpy[5][5][0] == 255 # R
assert black_numpy[5][5][1] == 0 # G
assert black_numpy[5][5][2] == 0 # B
# Test 3: Custom blue background via kwargs (as list)
image_io_blue = ImageMediaIO(rgba_background_color=[0, 0, 255])
converted_blue = image_io_blue.load_file(test_image_path)
blue_numpy = np.array(converted_blue)
# Check transparent pixels are blue
assert blue_numpy[0][0][0] == 0 # R
assert blue_numpy[0][0][1] == 0 # G
assert blue_numpy[0][0][2] == 255 # B
# Test 4: Test with load_bytes method
with open(test_image_path, 'rb') as f:
image_data = f.read()
image_io_green = ImageMediaIO(rgba_background_color=(0, 255, 0))
converted_green = image_io_green.load_bytes(image_data)
green_numpy = np.array(converted_green)
# Check transparent pixels are green
assert green_numpy[0][0][0] == 0 # R
assert green_numpy[0][0][1] == 255 # G
assert green_numpy[0][0][2] == 0 # B
def test_rgba_background_color_validation():
"""Test that invalid rgba_background_color values are properly rejected."""
# Test invalid types
with pytest.raises(ValueError,
match="rgba_background_color must be a list or tuple"):
ImageMediaIO(rgba_background_color="255,255,255")
with pytest.raises(ValueError,
match="rgba_background_color must be a list or tuple"):
ImageMediaIO(rgba_background_color=255)
# Test wrong number of elements
with pytest.raises(ValueError,
match="rgba_background_color must be a list or tuple"):
ImageMediaIO(rgba_background_color=(255, 255))
with pytest.raises(ValueError,
match="rgba_background_color must be a list or tuple"):
ImageMediaIO(rgba_background_color=(255, 255, 255, 255))
# Test non-integer values
with pytest.raises(ValueError,
match="rgba_background_color must be a list or tuple"):
ImageMediaIO(rgba_background_color=(255.0, 255.0, 255.0))
with pytest.raises(ValueError,
match="rgba_background_color must be a list or tuple"):
ImageMediaIO(rgba_background_color=(255, "255", 255))
# Test out of range values
with pytest.raises(ValueError,
match="rgba_background_color must be a list or tuple"):
ImageMediaIO(rgba_background_color=(256, 255, 255))
with pytest.raises(ValueError,
match="rgba_background_color must be a list or tuple"):
ImageMediaIO(rgba_background_color=(255, -1, 255))
# Test that valid values work
ImageMediaIO(rgba_background_color=(0, 0, 0)) # Should not raise
ImageMediaIO(rgba_background_color=[255, 255, 255]) # Should not raise
ImageMediaIO(rgba_background_color=(128, 128, 128)) # Should not raise

View File

@ -3,6 +3,7 @@
from io import BytesIO
from pathlib import Path
from typing import Union
import pybase64
import torch
@ -23,9 +24,10 @@ def rescale_image_size(image: Image.Image,
return image
# TODO: Support customizable background color to fill in.
def rgba_to_rgb(
image: Image.Image, background_color=(255, 255, 255)) -> Image.Image:
image: Image.Image,
background_color: Union[tuple[int, int, int], list[int]] = (255, 255, 255)
) -> Image.Image:
"""Convert an RGBA image to RGB with filled background color."""
assert image.mode == "RGBA"
converted = Image.new("RGB", image.size, background_color)
@ -55,10 +57,35 @@ class ImageMediaIO(MediaIO[Image.Image]):
# for flexible control.
self.kwargs = kwargs
# Extract RGBA background color from kwargs if provided
# Default to white background for backward compatibility
rgba_bg = kwargs.get('rgba_background_color', (255, 255, 255))
# Convert list to tuple for consistency
if isinstance(rgba_bg, list):
rgba_bg = tuple(rgba_bg)
# Validate rgba_background_color format
if not (isinstance(rgba_bg, tuple) and len(rgba_bg) == 3
and all(isinstance(c, int) and 0 <= c <= 255
for c in rgba_bg)):
raise ValueError(
"rgba_background_color must be a list or tuple of 3 integers "
"in the range [0, 255].")
self.rgba_background_color = rgba_bg
def _convert_image_mode(self, image: Image.Image) -> Image.Image:
"""Convert image mode with custom background color."""
if image.mode == self.image_mode:
return image
elif image.mode == "RGBA" and self.image_mode == "RGB":
return rgba_to_rgb(image, self.rgba_background_color)
else:
return convert_image_mode(image, self.image_mode)
def load_bytes(self, data: bytes) -> Image.Image:
image = Image.open(BytesIO(data))
image.load()
return convert_image_mode(image, self.image_mode)
return self._convert_image_mode(image)
def load_base64(self, media_type: str, data: str) -> Image.Image:
return self.load_bytes(pybase64.b64decode(data, validate=True))
@ -66,7 +93,7 @@ class ImageMediaIO(MediaIO[Image.Image]):
def load_file(self, filepath: Path) -> Image.Image:
image = Image.open(filepath)
image.load()
return convert_image_mode(image, self.image_mode)
return self._convert_image_mode(image)
def encode_base64(
self,
@ -77,7 +104,7 @@ class ImageMediaIO(MediaIO[Image.Image]):
image = media
with BytesIO() as buffer:
image = convert_image_mode(image, self.image_mode)
image = self._convert_image_mode(image)
image.save(buffer, image_format)
data = buffer.getvalue()