ruff format

Signed-off-by: Vladimir Anisimov <vanisimov@nvidia.com>
2026-05-29 02:47:03 +08:00 · 2025-12-24 02:32:12 -08:00 · 2025-12-24 02:32:12 -08:00 · 2c92ed30cd
commit 2c92ed30cd
parent 2817110aa3
2 changed files with 28 additions and 26 deletions
--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@ -2005,4 +2005,3 @@ class TestFindBestGroupSize:
        result = kv_cache_utils._find_best_group_size(same_type_layers, vllm_config)
        # group_size >= 3 would add > 10% overhead, so fallback to 1
        assert result == 1
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@ -941,10 +941,11 @@ def is_kv_cache_type_attention_free(kv_cache_spec: dict[str, KVCacheSpec]) -> bo
 def _find_best_group_size(
-        same_type_layers: dict["KVCacheSpec", list[str]],
+    same_type_layers: dict["KVCacheSpec", list[str]],
-        vllm_config: "VllmConfig",
+    vllm_config: "VllmConfig",
-        min_preferred_group_size: int = 3,
+    min_preferred_group_size: int = 3,
-        overhead_threshold: float = 0.10) -> int:
+    overhead_threshold: float = 0.10,
 ) -> int:
    """
    Find the optimal group size that minimizes padding memory, preferring
    larger group sizes.
@ -1003,8 +1004,7 @@ def _find_best_group_size(
        Prefers larger group sizes when padding is equal.
        Key: (padding_memory, -group_size) so larger group_size wins ties.
        """
-        return min(range(start, end + 1),
+        return min(range(start, end + 1), key=lambda gs: (calc_padding_memory(gs), -gs))
                   key=lambda gs: (calc_padding_memory(gs), -gs))
    # Calculate baseline: optimal group size with no minimum constraint
    baseline_group_size = find_best_in_range(1, max_layers)
@ -1020,8 +1020,11 @@ def _find_best_group_size(
    # Check if enforcing the minimum preference adds too much overhead
    # Overhead is measured relative to total memory
-    overhead = (preferred_padding - baseline_padding) / total_base_memory \
+    overhead = (
-        if total_base_memory > 0 else 0.0
+        (preferred_padding - baseline_padding) / total_base_memory
        if total_base_memory > 0
        else 0.0
    )
    if overhead > overhead_threshold:
        # Fallback to baseline (allowing smaller group sizes)