diff --git a/docs/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md index b8677f11a1d3c..cdd32924b5668 100644 --- a/docs/features/multimodal_inputs.md +++ b/docs/features/multimodal_inputs.md @@ -172,6 +172,36 @@ Multi-image input can be extended to perform video captioning. We show this with print(generated_text) ``` +#### Custom RGBA Background Color + +When loading RGBA images (images with transparency), vLLM converts them to RGB format. By default, transparent pixels are replaced with white background. You can customize this background color using the `rgba_background_color` parameter in `media_io_kwargs`. + +??? code + + ```python + from vllm import LLM + + # Default white background (no configuration needed) + llm = LLM(model="llava-hf/llava-1.5-7b-hf") + + # Custom black background for dark theme + llm = LLM( + model="llava-hf/llava-1.5-7b-hf", + media_io_kwargs={"image": {"rgba_background_color": [0, 0, 0]}} + ) + + # Custom brand color background (e.g., blue) + llm = LLM( + model="llava-hf/llava-1.5-7b-hf", + media_io_kwargs={"image": {"rgba_background_color": [0, 0, 255]}} + ) + ``` + +!!! note + - The `rgba_background_color` accepts RGB values as a list `[R, G, B]` or tuple `(R, G, B)` where each value is 0-255 + - This setting only affects RGBA images with transparency; RGB images are unchanged + - If not specified, the default white background `(255, 255, 255)` is used for backward compatibility + ### Video Inputs You can pass a list of NumPy arrays directly to the `'video'` field of the multi-modal dictionary @@ -478,6 +508,20 @@ Full example: ``` +#### Custom RGBA Background Color + +To use a custom background color for RGBA images, pass the `rgba_background_color` parameter via `--media-io-kwargs`: + +```bash +# Example: Black background for dark theme +vllm serve llava-hf/llava-1.5-7b-hf \ + --media-io-kwargs '{"image": {"rgba_background_color": [0, 0, 0]}}' + +# Example: Custom gray background +vllm serve llava-hf/llava-1.5-7b-hf \ + --media-io-kwargs '{"image": {"rgba_background_color": [128, 128, 128]}}' +``` + ### Audio Inputs Audio input is supported according to [OpenAI Audio API](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in). diff --git a/tests/multimodal/test_image.py b/tests/multimodal/test_image.py index cfd44351a6d1f..271a85f1195ec 100644 --- a/tests/multimodal/test_image.py +++ b/tests/multimodal/test_image.py @@ -3,9 +3,10 @@ from pathlib import Path import numpy as np +import pytest from PIL import Image, ImageChops -from vllm.multimodal.image import convert_image_mode +from vllm.multimodal.image import ImageMediaIO, convert_image_mode ASSETS_DIR = Path(__file__).parent / "assets" assert ASSETS_DIR.exists() @@ -35,3 +36,115 @@ def test_rgba_to_rgb(): assert converted_image_numpy[i][j][0] == 255 assert converted_image_numpy[i][j][1] == 255 assert converted_image_numpy[i][j][2] == 255 + + +def test_rgba_to_rgb_custom_background(tmp_path): + """Test RGBA to RGB conversion with custom background colors.""" + # Create a simple RGBA image with transparent and opaque pixels + rgba_image = Image.new("RGBA", (10, 10), + (255, 0, 0, 255)) # Red with full opacity + + # Make top-left quadrant transparent + for i in range(5): + for j in range(5): + rgba_image.putpixel((i, j), (0, 0, 0, 0)) # Fully transparent + + # Save the test image to tmp_path + test_image_path = tmp_path / "test_rgba.png" + rgba_image.save(test_image_path) + + # Test 1: Default white background (backward compatibility) + image_io_default = ImageMediaIO() + converted_default = image_io_default.load_file(test_image_path) + default_numpy = np.array(converted_default) + + # Check transparent pixels are white + assert default_numpy[0][0][0] == 255 # R + assert default_numpy[0][0][1] == 255 # G + assert default_numpy[0][0][2] == 255 # B + # Check opaque pixels remain red + assert default_numpy[5][5][0] == 255 # R + assert default_numpy[5][5][1] == 0 # G + assert default_numpy[5][5][2] == 0 # B + + # Test 2: Custom black background via kwargs + image_io_black = ImageMediaIO(rgba_background_color=(0, 0, 0)) + converted_black = image_io_black.load_file(test_image_path) + black_numpy = np.array(converted_black) + + # Check transparent pixels are black + assert black_numpy[0][0][0] == 0 # R + assert black_numpy[0][0][1] == 0 # G + assert black_numpy[0][0][2] == 0 # B + # Check opaque pixels remain red + assert black_numpy[5][5][0] == 255 # R + assert black_numpy[5][5][1] == 0 # G + assert black_numpy[5][5][2] == 0 # B + + # Test 3: Custom blue background via kwargs (as list) + image_io_blue = ImageMediaIO(rgba_background_color=[0, 0, 255]) + converted_blue = image_io_blue.load_file(test_image_path) + blue_numpy = np.array(converted_blue) + + # Check transparent pixels are blue + assert blue_numpy[0][0][0] == 0 # R + assert blue_numpy[0][0][1] == 0 # G + assert blue_numpy[0][0][2] == 255 # B + + # Test 4: Test with load_bytes method + with open(test_image_path, 'rb') as f: + image_data = f.read() + + image_io_green = ImageMediaIO(rgba_background_color=(0, 255, 0)) + converted_green = image_io_green.load_bytes(image_data) + green_numpy = np.array(converted_green) + + # Check transparent pixels are green + assert green_numpy[0][0][0] == 0 # R + assert green_numpy[0][0][1] == 255 # G + assert green_numpy[0][0][2] == 0 # B + + +def test_rgba_background_color_validation(): + """Test that invalid rgba_background_color values are properly rejected.""" + + # Test invalid types + with pytest.raises(ValueError, + match="rgba_background_color must be a list or tuple"): + ImageMediaIO(rgba_background_color="255,255,255") + + with pytest.raises(ValueError, + match="rgba_background_color must be a list or tuple"): + ImageMediaIO(rgba_background_color=255) + + # Test wrong number of elements + with pytest.raises(ValueError, + match="rgba_background_color must be a list or tuple"): + ImageMediaIO(rgba_background_color=(255, 255)) + + with pytest.raises(ValueError, + match="rgba_background_color must be a list or tuple"): + ImageMediaIO(rgba_background_color=(255, 255, 255, 255)) + + # Test non-integer values + with pytest.raises(ValueError, + match="rgba_background_color must be a list or tuple"): + ImageMediaIO(rgba_background_color=(255.0, 255.0, 255.0)) + + with pytest.raises(ValueError, + match="rgba_background_color must be a list or tuple"): + ImageMediaIO(rgba_background_color=(255, "255", 255)) + + # Test out of range values + with pytest.raises(ValueError, + match="rgba_background_color must be a list or tuple"): + ImageMediaIO(rgba_background_color=(256, 255, 255)) + + with pytest.raises(ValueError, + match="rgba_background_color must be a list or tuple"): + ImageMediaIO(rgba_background_color=(255, -1, 255)) + + # Test that valid values work + ImageMediaIO(rgba_background_color=(0, 0, 0)) # Should not raise + ImageMediaIO(rgba_background_color=[255, 255, 255]) # Should not raise + ImageMediaIO(rgba_background_color=(128, 128, 128)) # Should not raise diff --git a/vllm/multimodal/image.py b/vllm/multimodal/image.py index a0448a80ac7c2..1006c1ce4b241 100644 --- a/vllm/multimodal/image.py +++ b/vllm/multimodal/image.py @@ -3,6 +3,7 @@ from io import BytesIO from pathlib import Path +from typing import Union import pybase64 import torch @@ -23,9 +24,10 @@ def rescale_image_size(image: Image.Image, return image -# TODO: Support customizable background color to fill in. def rgba_to_rgb( - image: Image.Image, background_color=(255, 255, 255)) -> Image.Image: + image: Image.Image, + background_color: Union[tuple[int, int, int], list[int]] = (255, 255, 255) +) -> Image.Image: """Convert an RGBA image to RGB with filled background color.""" assert image.mode == "RGBA" converted = Image.new("RGB", image.size, background_color) @@ -55,10 +57,35 @@ class ImageMediaIO(MediaIO[Image.Image]): # for flexible control. self.kwargs = kwargs + # Extract RGBA background color from kwargs if provided + # Default to white background for backward compatibility + rgba_bg = kwargs.get('rgba_background_color', (255, 255, 255)) + # Convert list to tuple for consistency + if isinstance(rgba_bg, list): + rgba_bg = tuple(rgba_bg) + + # Validate rgba_background_color format + if not (isinstance(rgba_bg, tuple) and len(rgba_bg) == 3 + and all(isinstance(c, int) and 0 <= c <= 255 + for c in rgba_bg)): + raise ValueError( + "rgba_background_color must be a list or tuple of 3 integers " + "in the range [0, 255].") + self.rgba_background_color = rgba_bg + + def _convert_image_mode(self, image: Image.Image) -> Image.Image: + """Convert image mode with custom background color.""" + if image.mode == self.image_mode: + return image + elif image.mode == "RGBA" and self.image_mode == "RGB": + return rgba_to_rgb(image, self.rgba_background_color) + else: + return convert_image_mode(image, self.image_mode) + def load_bytes(self, data: bytes) -> Image.Image: image = Image.open(BytesIO(data)) image.load() - return convert_image_mode(image, self.image_mode) + return self._convert_image_mode(image) def load_base64(self, media_type: str, data: str) -> Image.Image: return self.load_bytes(pybase64.b64decode(data, validate=True)) @@ -66,7 +93,7 @@ class ImageMediaIO(MediaIO[Image.Image]): def load_file(self, filepath: Path) -> Image.Image: image = Image.open(filepath) image.load() - return convert_image_mode(image, self.image_mode) + return self._convert_image_mode(image) def encode_base64( self, @@ -77,7 +104,7 @@ class ImageMediaIO(MediaIO[Image.Image]): image = media with BytesIO() as buffer: - image = convert_image_mode(image, self.image_mode) + image = self._convert_image_mode(image) image.save(buffer, image_format) data = buffer.getvalue()