ruff format

Signed-off-by: Vladimir Anisimov <vanisimov@nvidia.com>
This commit is contained in:
Vladimir Anisimov 2025-12-24 02:32:12 -08:00
parent 2817110aa3
commit 2c92ed30cd
2 changed files with 28 additions and 26 deletions

View File

@ -2005,4 +2005,3 @@ class TestFindBestGroupSize:
result = kv_cache_utils._find_best_group_size(same_type_layers, vllm_config) result = kv_cache_utils._find_best_group_size(same_type_layers, vllm_config)
# group_size >= 3 would add > 10% overhead, so fallback to 1 # group_size >= 3 would add > 10% overhead, so fallback to 1
assert result == 1 assert result == 1

View File

@ -941,10 +941,11 @@ def is_kv_cache_type_attention_free(kv_cache_spec: dict[str, KVCacheSpec]) -> bo
def _find_best_group_size( def _find_best_group_size(
same_type_layers: dict["KVCacheSpec", list[str]], same_type_layers: dict["KVCacheSpec", list[str]],
vllm_config: "VllmConfig", vllm_config: "VllmConfig",
min_preferred_group_size: int = 3, min_preferred_group_size: int = 3,
overhead_threshold: float = 0.10) -> int: overhead_threshold: float = 0.10,
) -> int:
""" """
Find the optimal group size that minimizes padding memory, preferring Find the optimal group size that minimizes padding memory, preferring
larger group sizes. larger group sizes.
@ -1003,8 +1004,7 @@ def _find_best_group_size(
Prefers larger group sizes when padding is equal. Prefers larger group sizes when padding is equal.
Key: (padding_memory, -group_size) so larger group_size wins ties. Key: (padding_memory, -group_size) so larger group_size wins ties.
""" """
return min(range(start, end + 1), return min(range(start, end + 1), key=lambda gs: (calc_padding_memory(gs), -gs))
key=lambda gs: (calc_padding_memory(gs), -gs))
# Calculate baseline: optimal group size with no minimum constraint # Calculate baseline: optimal group size with no minimum constraint
baseline_group_size = find_best_in_range(1, max_layers) baseline_group_size = find_best_in_range(1, max_layers)
@ -1020,8 +1020,11 @@ def _find_best_group_size(
# Check if enforcing the minimum preference adds too much overhead # Check if enforcing the minimum preference adds too much overhead
# Overhead is measured relative to total memory # Overhead is measured relative to total memory
overhead = (preferred_padding - baseline_padding) / total_base_memory \ overhead = (
if total_base_memory > 0 else 0.0 (preferred_padding - baseline_padding) / total_base_memory
if total_base_memory > 0
else 0.0
)
if overhead > overhead_threshold: if overhead > overhead_threshold:
# Fallback to baseline (allowing smaller group sizes) # Fallback to baseline (allowing smaller group sizes)