feat(multimodal): Add customizable background color for RGBA to RGB conversion (#22052)

Signed-off-by: Jinheng Li <ahengljh@gmail.com> Co-authored-by: Jinheng Li <ahengljh@gmail.com>
2026-01-28 05:07:14 +08:00 · 2025-08-01 21:07:33 +08:00 · 2025-08-01 21:07:33 +08:00 · 0a6d305e0f
commit 0a6d305e0f
parent f81c1bb055
3 changed files with 190 additions and 6 deletions
--- a/docs/features/multimodal_inputs.md
+++ b/docs/features/multimodal_inputs.md
@ -172,6 +172,36 @@ Multi-image input can be extended to perform video captioning. We show this with
        print(generated_text)
    ```

+#### Custom RGBA Background Color
+
+When loading RGBA images (images with transparency), vLLM converts them to RGB format. By default, transparent pixels are replaced with white background. You can customize this background color using the `rgba_background_color` parameter in `media_io_kwargs`.
+
+??? code
+
+    ```python
+    from vllm import LLM
+    
+    # Default white background (no configuration needed)
+    llm = LLM(model="llava-hf/llava-1.5-7b-hf")
+    
+    # Custom black background for dark theme
+    llm = LLM(
+        model="llava-hf/llava-1.5-7b-hf",
+        media_io_kwargs={"image": {"rgba_background_color": [0, 0, 0]}}
+    )
+    
+    # Custom brand color background (e.g., blue)
+    llm = LLM(
+        model="llava-hf/llava-1.5-7b-hf", 
+        media_io_kwargs={"image": {"rgba_background_color": [0, 0, 255]}}
+    )
+    ```
+
+!!! note
+    - The `rgba_background_color` accepts RGB values as a list `[R, G, B]` or tuple `(R, G, B)` where each value is 0-255
+    - This setting only affects RGBA images with transparency; RGB images are unchanged
+    - If not specified, the default white background `(255, 255, 255)` is used for backward compatibility
+
 ### Video Inputs

 You can pass a list of NumPy arrays directly to the `'video'` field of the multi-modal dictionary
@ -478,6 +508,20 @@ Full example: <gh-file:examples/online_serving/openai_chat_completion_client_for
    export VLLM_VIDEO_FETCH_TIMEOUT=<timeout>
    ```

+#### Custom RGBA Background Color
+
+To use a custom background color for RGBA images, pass the `rgba_background_color` parameter via `--media-io-kwargs`:
+
+```bash
+# Example: Black background for dark theme
+vllm serve llava-hf/llava-1.5-7b-hf \
+  --media-io-kwargs '{"image": {"rgba_background_color": [0, 0, 0]}}'
+
+# Example: Custom gray background
+vllm serve llava-hf/llava-1.5-7b-hf \
+  --media-io-kwargs '{"image": {"rgba_background_color": [128, 128, 128]}}'
+```
+
 ### Audio Inputs

 Audio input is supported according to [OpenAI Audio API](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in).
--- a/tests/multimodal/test_image.py
+++ b/tests/multimodal/test_image.py
@ -3,9 +3,10 @@
 from pathlib import Path

 import numpy as np
+import pytest
 from PIL import Image, ImageChops

-from vllm.multimodal.image import convert_image_mode
+from vllm.multimodal.image import ImageMediaIO, convert_image_mode

 ASSETS_DIR = Path(__file__).parent / "assets"
 assert ASSETS_DIR.exists()
@ -35,3 +36,115 @@ def test_rgba_to_rgb():
                assert converted_image_numpy[i][j][0] == 255
                assert converted_image_numpy[i][j][1] == 255
                assert converted_image_numpy[i][j][2] == 255
+
+
+def test_rgba_to_rgb_custom_background(tmp_path):
+    """Test RGBA to RGB conversion with custom background colors."""
+    # Create a simple RGBA image with transparent and opaque pixels
+    rgba_image = Image.new("RGBA", (10, 10),
+                           (255, 0, 0, 255))  # Red with full opacity
+
+    # Make top-left quadrant transparent
+    for i in range(5):
+        for j in range(5):
+            rgba_image.putpixel((i, j), (0, 0, 0, 0))  # Fully transparent
+
+    # Save the test image to tmp_path
+    test_image_path = tmp_path / "test_rgba.png"
+    rgba_image.save(test_image_path)
+
+    # Test 1: Default white background (backward compatibility)
+    image_io_default = ImageMediaIO()
+    converted_default = image_io_default.load_file(test_image_path)
+    default_numpy = np.array(converted_default)
+
+    # Check transparent pixels are white
+    assert default_numpy[0][0][0] == 255  # R
+    assert default_numpy[0][0][1] == 255  # G
+    assert default_numpy[0][0][2] == 255  # B
+    # Check opaque pixels remain red
+    assert default_numpy[5][5][0] == 255  # R
+    assert default_numpy[5][5][1] == 0  # G
+    assert default_numpy[5][5][2] == 0  # B
+
+    # Test 2: Custom black background via kwargs
+    image_io_black = ImageMediaIO(rgba_background_color=(0, 0, 0))
+    converted_black = image_io_black.load_file(test_image_path)
+    black_numpy = np.array(converted_black)
+
+    # Check transparent pixels are black
+    assert black_numpy[0][0][0] == 0  # R
+    assert black_numpy[0][0][1] == 0  # G
+    assert black_numpy[0][0][2] == 0  # B
+    # Check opaque pixels remain red
+    assert black_numpy[5][5][0] == 255  # R
+    assert black_numpy[5][5][1] == 0  # G
+    assert black_numpy[5][5][2] == 0  # B
+
+    # Test 3: Custom blue background via kwargs (as list)
+    image_io_blue = ImageMediaIO(rgba_background_color=[0, 0, 255])
+    converted_blue = image_io_blue.load_file(test_image_path)
+    blue_numpy = np.array(converted_blue)
+
+    # Check transparent pixels are blue
+    assert blue_numpy[0][0][0] == 0  # R
+    assert blue_numpy[0][0][1] == 0  # G
+    assert blue_numpy[0][0][2] == 255  # B
+
+    # Test 4: Test with load_bytes method
+    with open(test_image_path, 'rb') as f:
+        image_data = f.read()
+
+    image_io_green = ImageMediaIO(rgba_background_color=(0, 255, 0))
+    converted_green = image_io_green.load_bytes(image_data)
+    green_numpy = np.array(converted_green)
+
+    # Check transparent pixels are green
+    assert green_numpy[0][0][0] == 0  # R
+    assert green_numpy[0][0][1] == 255  # G
+    assert green_numpy[0][0][2] == 0  # B
+
+
+def test_rgba_background_color_validation():
+    """Test that invalid rgba_background_color values are properly rejected."""
+
+    # Test invalid types
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color="255,255,255")
+
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=255)
+
+    # Test wrong number of elements
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255, 255))
+
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255, 255, 255, 255))
+
+    # Test non-integer values
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255.0, 255.0, 255.0))
+
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255, "255", 255))
+
+    # Test out of range values
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(256, 255, 255))
+
+    with pytest.raises(ValueError,
+                       match="rgba_background_color must be a list or tuple"):
+        ImageMediaIO(rgba_background_color=(255, -1, 255))
+
+    # Test that valid values work
+    ImageMediaIO(rgba_background_color=(0, 0, 0))  # Should not raise
+    ImageMediaIO(rgba_background_color=[255, 255, 255])  # Should not raise
+    ImageMediaIO(rgba_background_color=(128, 128, 128))  # Should not raise
--- a/vllm/multimodal/image.py
+++ b/vllm/multimodal/image.py
@ -3,6 +3,7 @@

 from io import BytesIO
 from pathlib import Path
+from typing import Union

 import pybase64
 import torch
@ -23,9 +24,10 @@ def rescale_image_size(image: Image.Image,
    return image


-# TODO: Support customizable background color to fill in.
 def rgba_to_rgb(
-    image: Image.Image, background_color=(255, 255, 255)) -> Image.Image:
+    image: Image.Image,
+    background_color: Union[tuple[int, int, int], list[int]] = (255, 255, 255)
+) -> Image.Image:
    """Convert an RGBA image to RGB with filled background color."""
    assert image.mode == "RGBA"
    converted = Image.new("RGB", image.size, background_color)
@ -55,10 +57,35 @@ class ImageMediaIO(MediaIO[Image.Image]):
        # for flexible control.
        self.kwargs = kwargs

+        # Extract RGBA background color from kwargs if provided
+        # Default to white background for backward compatibility
+        rgba_bg = kwargs.get('rgba_background_color', (255, 255, 255))
+        # Convert list to tuple for consistency
+        if isinstance(rgba_bg, list):
+            rgba_bg = tuple(rgba_bg)
+
+        # Validate rgba_background_color format
+        if not (isinstance(rgba_bg, tuple) and len(rgba_bg) == 3
+                and all(isinstance(c, int) and 0 <= c <= 255
+                        for c in rgba_bg)):
+            raise ValueError(
+                "rgba_background_color must be a list or tuple of 3 integers "
+                "in the range [0, 255].")
+        self.rgba_background_color = rgba_bg
+
+    def _convert_image_mode(self, image: Image.Image) -> Image.Image:
+        """Convert image mode with custom background color."""
+        if image.mode == self.image_mode:
+            return image
+        elif image.mode == "RGBA" and self.image_mode == "RGB":
+            return rgba_to_rgb(image, self.rgba_background_color)
+        else:
+            return convert_image_mode(image, self.image_mode)
+
    def load_bytes(self, data: bytes) -> Image.Image:
        image = Image.open(BytesIO(data))
        image.load()
-        return convert_image_mode(image, self.image_mode)
+        return self._convert_image_mode(image)

    def load_base64(self, media_type: str, data: str) -> Image.Image:
        return self.load_bytes(pybase64.b64decode(data, validate=True))
@ -66,7 +93,7 @@ class ImageMediaIO(MediaIO[Image.Image]):
    def load_file(self, filepath: Path) -> Image.Image:
        image = Image.open(filepath)
        image.load()
-        return convert_image_mode(image, self.image_mode)
+        return self._convert_image_mode(image)

    def encode_base64(
        self,
@ -77,7 +104,7 @@ class ImageMediaIO(MediaIO[Image.Image]):
        image = media

        with BytesIO() as buffer:
-            image = convert_image_mode(image, self.image_mode)
+            image = self._convert_image_mode(image)
            image.save(buffer, image_format)
            data = buffer.getvalue()