mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-29 02:47:03 +08:00
ruff format
Signed-off-by: Vladimir Anisimov <vanisimov@nvidia.com>
This commit is contained in:
parent
2817110aa3
commit
2c92ed30cd
@ -2005,4 +2005,3 @@ class TestFindBestGroupSize:
|
|||||||
result = kv_cache_utils._find_best_group_size(same_type_layers, vllm_config)
|
result = kv_cache_utils._find_best_group_size(same_type_layers, vllm_config)
|
||||||
# group_size >= 3 would add > 10% overhead, so fallback to 1
|
# group_size >= 3 would add > 10% overhead, so fallback to 1
|
||||||
assert result == 1
|
assert result == 1
|
||||||
|
|
||||||
|
|||||||
@ -941,10 +941,11 @@ def is_kv_cache_type_attention_free(kv_cache_spec: dict[str, KVCacheSpec]) -> bo
|
|||||||
|
|
||||||
|
|
||||||
def _find_best_group_size(
|
def _find_best_group_size(
|
||||||
same_type_layers: dict["KVCacheSpec", list[str]],
|
same_type_layers: dict["KVCacheSpec", list[str]],
|
||||||
vllm_config: "VllmConfig",
|
vllm_config: "VllmConfig",
|
||||||
min_preferred_group_size: int = 3,
|
min_preferred_group_size: int = 3,
|
||||||
overhead_threshold: float = 0.10) -> int:
|
overhead_threshold: float = 0.10,
|
||||||
|
) -> int:
|
||||||
"""
|
"""
|
||||||
Find the optimal group size that minimizes padding memory, preferring
|
Find the optimal group size that minimizes padding memory, preferring
|
||||||
larger group sizes.
|
larger group sizes.
|
||||||
@ -1003,8 +1004,7 @@ def _find_best_group_size(
|
|||||||
Prefers larger group sizes when padding is equal.
|
Prefers larger group sizes when padding is equal.
|
||||||
Key: (padding_memory, -group_size) so larger group_size wins ties.
|
Key: (padding_memory, -group_size) so larger group_size wins ties.
|
||||||
"""
|
"""
|
||||||
return min(range(start, end + 1),
|
return min(range(start, end + 1), key=lambda gs: (calc_padding_memory(gs), -gs))
|
||||||
key=lambda gs: (calc_padding_memory(gs), -gs))
|
|
||||||
|
|
||||||
# Calculate baseline: optimal group size with no minimum constraint
|
# Calculate baseline: optimal group size with no minimum constraint
|
||||||
baseline_group_size = find_best_in_range(1, max_layers)
|
baseline_group_size = find_best_in_range(1, max_layers)
|
||||||
@ -1020,8 +1020,11 @@ def _find_best_group_size(
|
|||||||
|
|
||||||
# Check if enforcing the minimum preference adds too much overhead
|
# Check if enforcing the minimum preference adds too much overhead
|
||||||
# Overhead is measured relative to total memory
|
# Overhead is measured relative to total memory
|
||||||
overhead = (preferred_padding - baseline_padding) / total_base_memory \
|
overhead = (
|
||||||
if total_base_memory > 0 else 0.0
|
(preferred_padding - baseline_padding) / total_base_memory
|
||||||
|
if total_base_memory > 0
|
||||||
|
else 0.0
|
||||||
|
)
|
||||||
|
|
||||||
if overhead > overhead_threshold:
|
if overhead > overhead_threshold:
|
||||||
# Fallback to baseline (allowing smaller group sizes)
|
# Fallback to baseline (allowing smaller group sizes)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user