[CI/Build] Further decouple HuggingFace implementation from ours during tests (#4166)

This commit is contained in:
Cyrus Leung 2024-05-15 14:38:40 +08:00 committed by GitHub
parent 65bf2ac165
commit e9cdd2b1e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,19 +1,21 @@
import contextlib import contextlib
import gc import gc
import os import os
from typing import List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
import pytest import pytest
import torch import torch
from PIL import Image from PIL import Image
from transformers import (AutoModelForCausalLM, AutoProcessor, from transformers import (AutoModelForCausalLM, AutoProcessor, AutoTokenizer,
LlavaForConditionalGeneration) LlavaConfig, LlavaForConditionalGeneration)
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.config import TokenizerPoolConfig, VisionLanguageConfig from vllm.config import TokenizerPoolConfig, VisionLanguageConfig
from vllm.distributed import destroy_model_parallel from vllm.distributed import destroy_model_parallel
from vllm.logger import init_logger
from vllm.sequence import MultiModalData from vllm.sequence import MultiModalData
from vllm.transformers_utils.tokenizer import get_tokenizer
logger = init_logger(__name__)
_TEST_DIR = os.path.dirname(__file__) _TEST_DIR = os.path.dirname(__file__)
_TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")] _TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
@ -129,9 +131,7 @@ _STR_DTYPE_TO_TORCH_DTYPE = {
"float": torch.float, "float": torch.float,
} }
_VISION_LANGUAGE_MODELS = { AutoModelForCausalLM.register(LlavaConfig, LlavaForConditionalGeneration)
"llava-hf/llava-1.5-7b-hf": LlavaForConditionalGeneration,
}
_EMBEDDING_MODELS = [ _EMBEDDING_MODELS = [
"intfloat/e5-mistral-7b-instruct", "intfloat/e5-mistral-7b-instruct",
@ -143,23 +143,14 @@ class HfRunner:
def __init__( def __init__(
self, self,
model_name: str, model_name: str,
tokenizer_name: Optional[str] = None,
dtype: str = "half", dtype: str = "half",
) -> None: ) -> None:
assert dtype in _STR_DTYPE_TO_TORCH_DTYPE assert dtype in _STR_DTYPE_TO_TORCH_DTYPE
torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype] torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype]
self.model_name = model_name self.model_name = model_name
if model_name in _VISION_LANGUAGE_MODELS:
self.model = _VISION_LANGUAGE_MODELS[model_name].from_pretrained( if model_name in _EMBEDDING_MODELS:
model_name,
torch_dtype=torch_dtype,
trust_remote_code=True,
).cuda()
self.processor = AutoProcessor.from_pretrained(
model_name,
torch_dtype=torch_dtype,
)
elif model_name in _EMBEDDING_MODELS:
# Lazy init required for AMD CI # Lazy init required for AMD CI
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
self.model = SentenceTransformer( self.model = SentenceTransformer(
@ -172,10 +163,24 @@ class HfRunner:
torch_dtype=torch_dtype, torch_dtype=torch_dtype,
trust_remote_code=True, trust_remote_code=True,
).cuda() ).cuda()
self.processor = None
if tokenizer_name is None: self.tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name = model_name model_name,
self.tokenizer = get_tokenizer(tokenizer_name, trust_remote_code=True) torch_dtype=torch_dtype,
trust_remote_code=True,
)
try:
self.processor = AutoProcessor.from_pretrained(
model_name,
torch_dtype=torch_dtype,
trust_remote_code=True,
)
except Exception:
logger.warning(
"Unable to auto-load processor from HuggingFace for "
"model %s. Using tokenizer instead.", model_name)
self.processor = self.tokenizer
def generate( def generate(
self, self,
@ -187,19 +192,19 @@ class HfRunner:
if images: if images:
assert len(prompts) == len(images) assert len(prompts) == len(images)
for i, prompt in enumerate(prompts): for i, prompt in enumerate(prompts):
if self.model_name not in _VISION_LANGUAGE_MODELS: processor_kwargs: Dict[str, Any] = {
input_ids = self.tokenizer(prompt, "text": prompt,
return_tensors="pt").input_ids "return_tensors": "pt",
inputs = {"input_ids": input_ids.cuda()} }
else: if images is not None and images[i] is not None:
image = images[i] if images else None processor_kwargs["images"] = images[i]
inputs = self.processor(text=prompt,
images=image, inputs = self.processor(**processor_kwargs)
return_tensors="pt") inputs = {
inputs = { key: value.cuda() if value is not None else None
key: value.cuda() if value is not None else None for key, value in inputs.items()
for key, value in inputs.items() }
}
output_ids = self.model.generate( output_ids = self.model.generate(
**inputs, **inputs,
use_cache=True, use_cache=True,