mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 12:05:48 +08:00
[CI/Build] Delete ultravox LoRA test (#14730)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
55211b01e8
commit
bd44b812cb
@ -1,131 +0,0 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
import shutil
|
|
||||||
from os import path
|
|
||||||
from tempfile import TemporaryDirectory
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
import torch
|
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
from safetensors.torch import load_file, save_file
|
|
||||||
from transformers import AutoTokenizer
|
|
||||||
|
|
||||||
from vllm.lora.request import LoRARequest
|
|
||||||
|
|
||||||
from ..models.utils import check_outputs_equal
|
|
||||||
|
|
||||||
ULTRAVOX_MODEL_NAME = "fixie-ai/ultravox-v0_3"
|
|
||||||
LLMA_MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
|
|
||||||
|
|
||||||
VLLM_PLACEHOLDER = "<|reserved_special_token_0|>"
|
|
||||||
|
|
||||||
PROMPT = "Tell me about a Fool's mate move in 20 words. Provide the moves!"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def v1(run_with_both_engines_lora):
|
|
||||||
# Simple autouse wrapper to run both engines for each test
|
|
||||||
# This can be promoted up to conftest.py to run for every
|
|
||||||
# test in a package
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def llama3_1_8b_chess_lora_path():
|
|
||||||
return snapshot_download(
|
|
||||||
repo_id="mkopecki/chess-lora-adapter-llama-3.1-8b")
|
|
||||||
|
|
||||||
|
|
||||||
# can't use llama lora adapter without module name transformation
|
|
||||||
# because ultravox nest language model
|
|
||||||
def transform_module_names_for_ultravox(state_dict):
|
|
||||||
transformed_state_dict = {}
|
|
||||||
for key, value in state_dict.items():
|
|
||||||
new_key = key.replace("base_model.model",
|
|
||||||
"base_model.model.language_model")
|
|
||||||
transformed_state_dict[new_key] = value
|
|
||||||
return transformed_state_dict
|
|
||||||
|
|
||||||
|
|
||||||
def mk_llama3_1_8b_ultravox_chess_lora(source_repo, target_path):
|
|
||||||
tensor_file = "adapter_model.safetensors"
|
|
||||||
state_dict = load_file(path.join(source_repo, tensor_file))
|
|
||||||
transformed_state_dict = transform_module_names_for_ultravox(state_dict)
|
|
||||||
|
|
||||||
save_file(transformed_state_dict, path.join(target_path, tensor_file))
|
|
||||||
|
|
||||||
config_file = "adapter_config.json"
|
|
||||||
shutil.copyfile(path.join(source_repo, config_file),
|
|
||||||
path.join(target_path, config_file))
|
|
||||||
return target_path
|
|
||||||
|
|
||||||
|
|
||||||
def _get_prompt(audio_count, question, placeholder, model_name) -> str:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
||||||
placeholder = f"{placeholder}\n" * audio_count
|
|
||||||
|
|
||||||
return tokenizer.apply_chat_template([{
|
|
||||||
'role': 'user',
|
|
||||||
'content': f"{placeholder}{question}"
|
|
||||||
}],
|
|
||||||
tokenize=False,
|
|
||||||
add_generation_prompt=True)
|
|
||||||
|
|
||||||
|
|
||||||
def test_ultravox_lora(vllm_runner):
|
|
||||||
"""
|
|
||||||
TODO: Train an Ultravox LoRA instead of using a Llama LoRA.
|
|
||||||
"""
|
|
||||||
# Workaround to prevent device mismatch in Whisper.
|
|
||||||
# Can be removed when it is fixed upstream in transformer
|
|
||||||
# https://github.com/huggingface/transformers/pull/35866
|
|
||||||
torch.set_default_device("cpu")
|
|
||||||
|
|
||||||
llama3_1_8b_chess_lora = llama3_1_8b_chess_lora_path()
|
|
||||||
with TemporaryDirectory() as temp_ultravox_lora_dir:
|
|
||||||
llama3_1_8b_ultravox_chess_lora = mk_llama3_1_8b_ultravox_chess_lora(
|
|
||||||
llama3_1_8b_chess_lora, temp_ultravox_lora_dir)
|
|
||||||
with vllm_runner(
|
|
||||||
ULTRAVOX_MODEL_NAME,
|
|
||||||
enforce_eager=True,
|
|
||||||
max_num_seqs=2,
|
|
||||||
enable_lora=True,
|
|
||||||
max_loras=1,
|
|
||||||
max_lora_rank=128,
|
|
||||||
dtype="bfloat16",
|
|
||||||
max_model_len=1024,
|
|
||||||
) as vllm_model:
|
|
||||||
ultravox_outputs: list[tuple[
|
|
||||||
list[int], str]] = vllm_model.generate_greedy(
|
|
||||||
[
|
|
||||||
_get_prompt(0, PROMPT, VLLM_PLACEHOLDER,
|
|
||||||
ULTRAVOX_MODEL_NAME)
|
|
||||||
],
|
|
||||||
256,
|
|
||||||
lora_request=LoRARequest(str(1), 1,
|
|
||||||
llama3_1_8b_ultravox_chess_lora),
|
|
||||||
)
|
|
||||||
|
|
||||||
# run llama with and without lora to compare outputs with above
|
|
||||||
with vllm_runner(
|
|
||||||
LLMA_MODEL_NAME,
|
|
||||||
enforce_eager=True,
|
|
||||||
max_num_seqs=2,
|
|
||||||
enable_lora=True,
|
|
||||||
max_loras=1,
|
|
||||||
max_lora_rank=128,
|
|
||||||
dtype="bfloat16",
|
|
||||||
max_model_len=1024,
|
|
||||||
) as vllm_model:
|
|
||||||
llama_outputs: list[tuple[list[int], str]] = (
|
|
||||||
vllm_model.generate_greedy(
|
|
||||||
[_get_prompt(0, PROMPT, VLLM_PLACEHOLDER, LLMA_MODEL_NAME)],
|
|
||||||
256,
|
|
||||||
lora_request=LoRARequest(str(1), 1, llama3_1_8b_chess_lora),
|
|
||||||
))
|
|
||||||
|
|
||||||
check_outputs_equal(
|
|
||||||
outputs_0_lst=ultravox_outputs,
|
|
||||||
outputs_1_lst=llama_outputs,
|
|
||||||
name_0="ultravox",
|
|
||||||
name_1="llama",
|
|
||||||
)
|
|
||||||
Loading…
x
Reference in New Issue
Block a user