mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-21 19:25:44 +08:00
[CI/Build] Fix some V1 tests not being run (#25569)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
d3c732e985
commit
129a643b4c
@ -300,10 +300,12 @@ steps:
|
|||||||
- pytest -v -s v1/spec_decode
|
- pytest -v -s v1/spec_decode
|
||||||
- pytest -v -s v1/kv_connector/unit
|
- pytest -v -s v1/kv_connector/unit
|
||||||
- pytest -v -s v1/metrics
|
- pytest -v -s v1/metrics
|
||||||
|
- pytest -v -s v1/test_kv_sharing.py
|
||||||
|
- pytest -v -s v1/test_metrics_reader.py
|
||||||
|
- pytest -v -s v1/test_oracle.py
|
||||||
|
- pytest -v -s v1/test_request.py
|
||||||
- pytest -v -s v1/test_serial_utils.py
|
- pytest -v -s v1/test_serial_utils.py
|
||||||
- pytest -v -s v1/test_utils.py
|
- pytest -v -s v1/test_utils.py
|
||||||
- pytest -v -s v1/test_oracle.py
|
|
||||||
- pytest -v -s v1/test_metrics_reader.py
|
|
||||||
# Integration test for streaming correctness (requires special branch).
|
# Integration test for streaming correctness (requires special branch).
|
||||||
- pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
|
- pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
|
||||||
- pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
|
- pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
|
||||||
|
|||||||
@ -1,17 +1,10 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
from unittest.mock import Mock
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from vllm.v1.attention.backends.flash_attn import (
|
|
||||||
FlashAttentionBackend, FlashAttentionMetadataBuilder)
|
|
||||||
from vllm.v1.attention.backends.flex_attention import (
|
|
||||||
FlexAttentionBackend, FlexAttentionMetadataBuilder)
|
|
||||||
from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheGroupSpec
|
from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheGroupSpec
|
||||||
from vllm.v1.worker.utils import (AttentionGroup,
|
from vllm.v1.worker.utils import add_kv_sharing_layers_to_kv_cache_groups
|
||||||
initialize_kv_cache_for_kv_sharing)
|
|
||||||
|
|
||||||
|
|
||||||
def new_kv_cache_spec():
|
def new_kv_cache_spec():
|
||||||
@ -37,56 +30,17 @@ def test_initialize_kv_cache_for_kv_sharing_different_attn_groups():
|
|||||||
new_kv_cache_spec()),
|
new_kv_cache_spec()),
|
||||||
]
|
]
|
||||||
|
|
||||||
attn_groups = [
|
add_kv_sharing_layers_to_kv_cache_groups(
|
||||||
# KV cache group 0 has two attention groups
|
|
||||||
[
|
|
||||||
AttentionGroup(
|
|
||||||
backend=FlashAttentionBackend,
|
|
||||||
metadata_builder=Mock(spec=FlashAttentionMetadataBuilder),
|
|
||||||
layer_names=["model.layers.0"],
|
|
||||||
),
|
|
||||||
AttentionGroup(
|
|
||||||
backend=FlexAttentionBackend,
|
|
||||||
metadata_builder=Mock(spec=FlexAttentionMetadataBuilder),
|
|
||||||
layer_names=["model.layers.1"],
|
|
||||||
),
|
|
||||||
],
|
|
||||||
]
|
|
||||||
|
|
||||||
# Only layers 0 and 1 will have KV caches allocated
|
|
||||||
kv_caches = {
|
|
||||||
"model.layers.0": torch.zeros(1, 2, 3),
|
|
||||||
"model.layers.1": torch.ones(1, 2, 3),
|
|
||||||
}
|
|
||||||
|
|
||||||
initialize_kv_cache_for_kv_sharing(
|
|
||||||
shared_kv_cache_layers=shared_kv_cache_layers,
|
shared_kv_cache_layers=shared_kv_cache_layers,
|
||||||
kv_cache_groups=kv_cache_groups,
|
kv_cache_groups=kv_cache_groups,
|
||||||
kv_caches=kv_caches,
|
|
||||||
attn_groups=attn_groups,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check that the KV caches were shared correctly
|
|
||||||
assert kv_caches["model.layers.2"].data_ptr(
|
|
||||||
) == kv_caches["model.layers.0"].data_ptr()
|
|
||||||
assert kv_caches["model.layers.3"].data_ptr(
|
|
||||||
) == kv_caches["model.layers.1"].data_ptr()
|
|
||||||
|
|
||||||
# Check that the layers were added to the correct KV cache group
|
# Check that the layers were added to the correct KV cache group
|
||||||
assert len(kv_cache_groups) == 1
|
assert len(kv_cache_groups) == 1
|
||||||
assert kv_cache_groups[0].layer_names == [
|
assert kv_cache_groups[0].layer_names == [
|
||||||
"model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3"
|
"model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3"
|
||||||
]
|
]
|
||||||
|
|
||||||
# Check that the layers were added to the attention groups
|
|
||||||
assert len(attn_groups) == 1 and len(attn_groups[0]) == 2
|
|
||||||
assert attn_groups[0][0].layer_names == [
|
|
||||||
"model.layers.0", "model.layers.2"
|
|
||||||
]
|
|
||||||
assert attn_groups[0][1].layer_names == [
|
|
||||||
"model.layers.1", "model.layers.3"
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_initialize_kv_cache_for_kv_sharing_same_attn_groups():
|
def test_initialize_kv_cache_for_kv_sharing_same_attn_groups():
|
||||||
"""
|
"""
|
||||||
@ -103,48 +57,17 @@ def test_initialize_kv_cache_for_kv_sharing_same_attn_groups():
|
|||||||
new_kv_cache_spec()),
|
new_kv_cache_spec()),
|
||||||
]
|
]
|
||||||
|
|
||||||
attn_groups = [
|
add_kv_sharing_layers_to_kv_cache_groups(
|
||||||
# KV cache group 0 has a single attention group
|
|
||||||
# as all layers have the same flash attention backend
|
|
||||||
[
|
|
||||||
AttentionGroup(
|
|
||||||
backend=FlashAttentionBackend,
|
|
||||||
metadata_builder=Mock(spec=FlashAttentionMetadataBuilder),
|
|
||||||
layer_names=["model.layers.0", "model.layers.1"],
|
|
||||||
),
|
|
||||||
],
|
|
||||||
]
|
|
||||||
|
|
||||||
kv_caches = {
|
|
||||||
"model.layers.0": torch.zeros(1, 2, 3),
|
|
||||||
"model.layers.1": torch.ones(1, 2, 3),
|
|
||||||
}
|
|
||||||
|
|
||||||
initialize_kv_cache_for_kv_sharing(
|
|
||||||
shared_kv_cache_layers=shared_kv_cache_layers,
|
shared_kv_cache_layers=shared_kv_cache_layers,
|
||||||
kv_cache_groups=kv_cache_groups,
|
kv_cache_groups=kv_cache_groups,
|
||||||
kv_caches=kv_caches,
|
|
||||||
attn_groups=attn_groups,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check that the KV caches were shared correctly
|
|
||||||
assert kv_caches["model.layers.2"].data_ptr(
|
|
||||||
) == kv_caches["model.layers.0"].data_ptr()
|
|
||||||
assert kv_caches["model.layers.3"].data_ptr(
|
|
||||||
) == kv_caches["model.layers.1"].data_ptr()
|
|
||||||
|
|
||||||
# Check that the layers were added to the correct KV cache group
|
# Check that the layers were added to the correct KV cache group
|
||||||
assert len(kv_cache_groups) == 1
|
assert len(kv_cache_groups) == 1
|
||||||
assert kv_cache_groups[0].layer_names == [
|
assert kv_cache_groups[0].layer_names == [
|
||||||
"model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3"
|
"model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3"
|
||||||
]
|
]
|
||||||
|
|
||||||
# Check that the layers were added to the attention groups
|
|
||||||
assert len(attn_groups) == 1 and len(attn_groups[0]) == 1
|
|
||||||
assert attn_groups[0][0].layer_names == [
|
|
||||||
"model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3"
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_initialize_kv_cache_for_kv_sharing_no_attn_groups():
|
def test_initialize_kv_cache_for_kv_sharing_no_attn_groups():
|
||||||
"""
|
"""
|
||||||
@ -162,23 +85,11 @@ def test_initialize_kv_cache_for_kv_sharing_no_attn_groups():
|
|||||||
KVCacheGroupSpec(["model.layers.1"], new_kv_cache_spec()),
|
KVCacheGroupSpec(["model.layers.1"], new_kv_cache_spec()),
|
||||||
]
|
]
|
||||||
|
|
||||||
kv_caches = {
|
add_kv_sharing_layers_to_kv_cache_groups(
|
||||||
"model.layers.0": torch.zeros(1, 2, 3),
|
|
||||||
"model.layers.1": torch.ones(1, 2, 3),
|
|
||||||
}
|
|
||||||
|
|
||||||
initialize_kv_cache_for_kv_sharing(
|
|
||||||
shared_kv_cache_layers=shared_kv_cache_layers,
|
shared_kv_cache_layers=shared_kv_cache_layers,
|
||||||
kv_cache_groups=kv_cache_groups,
|
kv_cache_groups=kv_cache_groups,
|
||||||
kv_caches=kv_caches,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check that the KV caches were shared correctly
|
|
||||||
assert kv_caches["model.layers.2"].data_ptr(
|
|
||||||
) == kv_caches["model.layers.0"].data_ptr()
|
|
||||||
assert kv_caches["model.layers.3"].data_ptr(
|
|
||||||
) == kv_caches["model.layers.1"].data_ptr()
|
|
||||||
|
|
||||||
# Check that the layers were added to the correct KV cache group
|
# Check that the layers were added to the correct KV cache group
|
||||||
assert len(kv_cache_groups) == 2
|
assert len(kv_cache_groups) == 2
|
||||||
assert kv_cache_groups[0].layer_names == [
|
assert kv_cache_groups[0].layer_names == [
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user