reformat commits

2026-06-13 04:37:13 +08:00 · 2025-12-11 17:32:08 +02:00 · 2025-12-11 17:32:08 +02:00 · 4e558858b8
commit 4e558858b8
parent 1bceb28678
1 changed files with 35 additions and 21 deletions
--- a/vllm/model_executor/models/nano_nemotron_vl.py
+++ b/vllm/model_executor/models/nano_nemotron_vl.py
@ -7,16 +7,16 @@
 #     LICENSE is in root directory.
 # --------------------------------------------------------
 import copy
 import math
 import random
 from dataclasses import dataclass
 import copy
 import warnings
 from abc import ABC, abstractmethod
-from collections.abc import Iterable, Mapping, Sequence, Callable
+from collections.abc import Callable, Iterable, Mapping, Sequence
 from dataclasses import dataclass
 from typing import Annotated, Any, Literal, TypeAlias, TypeVar
 import einops
 import numpy.typing as npt
 import regex as re
 import torch
@ -24,7 +24,6 @@ import torch.nn as nn
 import torchvision.transforms as T
 from PIL import Image
 from transformers import BatchFeature, PretrainedConfig, TensorType
 import einops
 from vllm.config import VllmConfig
 from vllm.config.multimodal import BaseDummyOptions, VideoDummyOptions
@ -181,7 +180,8 @@ class DynamicResolutionImageTilingStrategy:
            thumbnail_area = self._thumbnail_size * self._thumbnail_size
            area_ratio = resized_area / thumbnail_area
-            # Only add thumbnail if resized image area is less than threshold % of thumbnail area
+            # Only add thumbnail if resized image area is less than threshold % of
            # thumbnail area
            if area_ratio < self._thumbnail_area_threshold:
                thumbnail_img = params.image.resize(
                    (self._thumbnail_size, self._thumbnail_size)
@ -198,11 +198,11 @@ class DynamicResolutionImageTilingStrategy:
        tiling_augment_prob: float = 0.4,
    ) -> DynamicResolutionParams:
        """Process a single media item and return its parameters.
        Args:
            media: The media item to process
            num_tokens_available: Number of tokens available for this media
-            data_augment: Whether to apply data augmentation to the image. Defaults to False.
+            data_augment: Whether to apply data augmentation to the image. Defaults to
            False.
        Returns:
            DynamicResolutionParams for the media
        """
@ -222,7 +222,8 @@ class DynamicResolutionImageTilingStrategy:
        target_patch_height = math.floor(factor * closest_patch_height)
        target_patch_width = math.floor(factor * closest_patch_width)
-        # We only consider self._min_num_patches if it is greater than current_num_tokens_available.
+        # We only consider self._min_num_patches if it is greater than
        # current_num_tokens_available.
        if (
            current_num_tokens_available > self._min_num_patches
            and target_patch_height * target_patch_width < self._min_num_patches
@ -244,7 +245,8 @@ class DynamicResolutionImageTilingStrategy:
                new_patch_width = math.ceil(up_factor * target_patch_width)
                if new_patch_height * new_patch_width > current_num_tokens_available:
-                    # If only one side can be min_side, make as big as possible at native aspect ratio while staying below max_patches
+                    # If only one side can be min_side, make as big as possible at
                    # native aspect ratio while staying below max_patches
                    if (
                        max(current_num_tokens_available // new_patch_width, 1)
                        * self._patch_size
@ -271,7 +273,8 @@ class DynamicResolutionImageTilingStrategy:
                new_patch_width = math.ceil(up_factor * target_patch_width)
                if new_patch_height * new_patch_width > current_num_tokens_available:
-                    # If only one side can be min_side, make as big as possible at native aspect ratio while staying below max_patches
+                    # If only one side can be min_side, make as big as possible at
                    # native aspect ratio while staying below max_patches
                    if (
                        max(current_num_tokens_available // new_patch_height, 1)
                        * self._patch_size
@ -355,7 +358,8 @@ class DynamicResolutionImageTilingStrategy:
            thumbnail_area = self._thumbnail_size * self._thumbnail_size
            area_ratio = resized_area / thumbnail_area
-            # Only add thumbnail if resized image area is less than threshold % of thumbnail area
+            # Only add thumbnail if resized image area is less than threshold % of
            # thumbnail area
            if area_ratio < self._thumbnail_area_threshold:
                num_tiles += 1  # Add 1 for thumbnail
                # Add embeddings for thumbnail (thumbnail_size x thumbnail_size)
@ -435,7 +439,8 @@ class DynamicResolutionImageTilingStrategy:
            media_list: List of media items to process
            num_tokens_available: Total number of tokens available across all media
            max_num_tiles: Maximum number of tiles (unused in this implementation)
-            data_augment: Whether to apply data augmentation to the image. Defaults to False.
+            data_augment: Whether to apply data augmentation to the image. Defaults to
            False.
        Returns:
            List of ImageTilingParams for each media item
        """
@ -444,19 +449,21 @@ class DynamicResolutionImageTilingStrategy:
            * (4 if self._pixel_shuffle else 1)
            * (4 if self._conv_merging else 1)
        )
-        # When the number of available token is too small, allow self._min_num_patches per media and
+        # When the number of available token is too small, allow self._min_num_patches
-        # let the sample be truncated.
+        # per media and let the sample be truncated.
        num_tokens_available = max(
            num_tokens_available, self._min_num_patches * len(media_list)
        )
-        # Clip the number of tokens available per media to be between min and max patches.
+        # Clip the number of tokens available per media to be between min and max
        # patches.
        num_tokens_available_per_media = [
            max(min(num_tokens_available, self._max_num_patches), self._min_num_patches)
            for _ in range(len(media_list))
        ]
-        # In theory this could be a while True loop, but in case the process_media method slightly
+        # In theory this could be a while True loop, but in case the process_media
        # method slightly
        # changes, I want to make sure we don't get stuck in an infinite loop.
        for _ in range(10):
            # Step 1: Process each media with current token budget
@ -496,8 +503,8 @@ class DynamicResolutionImageTilingStrategy:
                    for i in range(len(num_tokens_available_per_media))
                ]
            )
-            # If there was not scaling down, we're stuck just use min_num_patches per media, else
+            # If there was not scaling down, we're stuck just use min_num_patches per
-            # try with the scaled down num_tokens_available_per_media.
+            # media, else try with the scaled down num_tokens_available_per_media.
            if not scaled_down:
                num_tokens_available_per_media = [self._min_num_patches] * len(
                    media_list
@ -558,8 +565,15 @@ class DynamicResolutionImageTilingStrategy:
            )
    def __str__(self):
-        return f"DynamicResolutionImageTransform(vision_model_type={self._vision_model_type}, min_num_patches={self._min_num_patches}, patch_size={self._patch_size}, pixel_shuffle={self._pixel_shuffle}, conv_merging={self._conv_merging}, use_thumbnail={self._use_thumbnail}, thumbnail_size={self._thumbnail_size}, thumbnail_area_threshold={self._thumbnail_area_threshold})"
+        return f"DynamicResolutionImageTransform(\
-
+            vision_model_type={self._vision_model_type}, \
            min_num_patches={self._min_num_patches}, \
            patch_size={self._patch_size}, \
            pixel_shuffle={self._pixel_shuffle}, \
            conv_merging={self._conv_merging}, \
            use_thumbnail={self._use_thumbnail}, \
            thumbnail_size={self._thumbnail_size}, \
            thumbnail_area_threshold={self._thumbnail_area_threshold})"
 image_tiling_strategy = DynamicResolutionImageTilingStrategy(