mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 20:04:58 +08:00
[CI/Build] Enable phi2 lora test (#20540)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
6e2c19ce22
commit
2e610deb72
@ -1,8 +1,6 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
import vllm
|
import vllm
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
|
|
||||||
@ -49,9 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
|
|||||||
return generated_texts
|
return generated_texts
|
||||||
|
|
||||||
|
|
||||||
# Skipping for V1 for now as we are hitting,
|
|
||||||
# "Head size 80 is not supported by FlashAttention." error.
|
|
||||||
@pytest.mark.skip(reason="Head size 80 is not supported by FlashAttention")
|
|
||||||
def test_phi2_lora(phi2_lora_files):
|
def test_phi2_lora(phi2_lora_files):
|
||||||
# We enable enforce_eager=True here to reduce VRAM usage for lora-test CI,
|
# We enable enforce_eager=True here to reduce VRAM usage for lora-test CI,
|
||||||
# Otherwise, the lora-test will fail due to CUDA OOM.
|
# Otherwise, the lora-test will fail due to CUDA OOM.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user