mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-08 02:09:08 +08:00
[CI/Build] Cleanup LoRA test (#26752)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
c715ba3735
commit
fdd32750f0
@ -58,7 +58,6 @@ def test_chatglm3_lora(chatglm3_lora_files):
|
|||||||
max_loras=4,
|
max_loras=4,
|
||||||
max_lora_rank=64,
|
max_lora_rank=64,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
enable_chunked_prefill=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
|
output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
|
||||||
@ -70,7 +69,6 @@ def test_chatglm3_lora(chatglm3_lora_files):
|
|||||||
|
|
||||||
|
|
||||||
@multi_gpu_test(num_gpus=4)
|
@multi_gpu_test(num_gpus=4)
|
||||||
@create_new_process_for_each_test()
|
|
||||||
def test_chatglm3_lora_tp4(chatglm3_lora_files):
|
def test_chatglm3_lora_tp4(chatglm3_lora_files):
|
||||||
llm = vllm.LLM(
|
llm = vllm.LLM(
|
||||||
MODEL_PATH,
|
MODEL_PATH,
|
||||||
@ -81,7 +79,6 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files):
|
|||||||
tensor_parallel_size=4,
|
tensor_parallel_size=4,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
fully_sharded_loras=False,
|
fully_sharded_loras=False,
|
||||||
enable_chunked_prefill=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
|
output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
|
||||||
@ -93,7 +90,6 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files):
|
|||||||
|
|
||||||
|
|
||||||
@multi_gpu_test(num_gpus=4)
|
@multi_gpu_test(num_gpus=4)
|
||||||
@create_new_process_for_each_test()
|
|
||||||
def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
|
def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
|
||||||
# https://github.com/NVIDIA/nccl/issues/1790, set a lower value for
|
# https://github.com/NVIDIA/nccl/issues/1790, set a lower value for
|
||||||
# gpu_memory_utilization here because NCCL >= 2.26.3 seems to use
|
# gpu_memory_utilization here because NCCL >= 2.26.3 seems to use
|
||||||
@ -107,7 +103,6 @@ def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
|
|||||||
tensor_parallel_size=4,
|
tensor_parallel_size=4,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
fully_sharded_loras=True,
|
fully_sharded_loras=True,
|
||||||
enable_chunked_prefill=True,
|
|
||||||
gpu_memory_utilization=0.85,
|
gpu_memory_utilization=0.85,
|
||||||
)
|
)
|
||||||
output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
|
output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
|
||||||
|
|||||||
@ -113,7 +113,6 @@ def test_llama_lora(sql_lora_files):
|
|||||||
|
|
||||||
|
|
||||||
@multi_gpu_test(num_gpus=4)
|
@multi_gpu_test(num_gpus=4)
|
||||||
@create_new_process_for_each_test()
|
|
||||||
def test_llama_lora_tp4(sql_lora_files):
|
def test_llama_lora_tp4(sql_lora_files):
|
||||||
llm = vllm.LLM(
|
llm = vllm.LLM(
|
||||||
MODEL_PATH,
|
MODEL_PATH,
|
||||||
@ -127,7 +126,6 @@ def test_llama_lora_tp4(sql_lora_files):
|
|||||||
|
|
||||||
|
|
||||||
@multi_gpu_test(num_gpus=4)
|
@multi_gpu_test(num_gpus=4)
|
||||||
@create_new_process_for_each_test()
|
|
||||||
def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
|
def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
|
||||||
llm = vllm.LLM(
|
llm = vllm.LLM(
|
||||||
MODEL_PATH,
|
MODEL_PATH,
|
||||||
@ -142,7 +140,6 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
|
|||||||
|
|
||||||
|
|
||||||
@multi_gpu_test(num_gpus=2)
|
@multi_gpu_test(num_gpus=2)
|
||||||
@create_new_process_for_each_test()
|
|
||||||
def test_tp2_serialize_and_deserialize_lora(
|
def test_tp2_serialize_and_deserialize_lora(
|
||||||
tmp_path, sql_lora_files, sql_lora_huggingface_id
|
tmp_path, sql_lora_files, sql_lora_huggingface_id
|
||||||
):
|
):
|
||||||
|
|||||||
@ -8,7 +8,7 @@ from vllm.assets.image import ImageAsset
|
|||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from ..utils import create_new_process_for_each_test
|
from ..utils import multi_gpu_test
|
||||||
|
|
||||||
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
|
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ def test_minicpmv_lora(minicpmv_lora_files):
|
|||||||
current_platform.is_rocm(),
|
current_platform.is_rocm(),
|
||||||
reason="MiniCPM-V dependency xformers incompatible with ROCm",
|
reason="MiniCPM-V dependency xformers incompatible with ROCm",
|
||||||
)
|
)
|
||||||
@create_new_process_for_each_test()
|
@multi_gpu_test(num_gpus=4)
|
||||||
def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
|
def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
|
||||||
llm = vllm.LLM(
|
llm = vllm.LLM(
|
||||||
MODEL_PATH,
|
MODEL_PATH,
|
||||||
@ -112,7 +112,7 @@ def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
|
|||||||
current_platform.is_rocm(),
|
current_platform.is_rocm(),
|
||||||
reason="MiniCPM-V dependency xformers incompatible with ROCm",
|
reason="MiniCPM-V dependency xformers incompatible with ROCm",
|
||||||
)
|
)
|
||||||
@create_new_process_for_each_test()
|
@multi_gpu_test(num_gpus=4)
|
||||||
def test_minicpmv_tp4_fully_sharded_loras(minicpmv_lora_files):
|
def test_minicpmv_tp4_fully_sharded_loras(minicpmv_lora_files):
|
||||||
llm = vllm.LLM(
|
llm = vllm.LLM(
|
||||||
MODEL_PATH,
|
MODEL_PATH,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user