mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 09:45:34 +08:00
[Bugfix][Perf] Revert applying HF processor on text-only inputs for multimodal models (#28858)
Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
64e39d667c
commit
7f064491f8
@ -86,34 +86,6 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
|
|||||||
assert zipped["mm_processor_kwargs"] == exp_kwargs
|
assert zipped["mm_processor_kwargs"] == exp_kwargs
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"model_id",
|
|
||||||
[
|
|
||||||
"facebook/opt-125m",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"prompt",
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"prompt": "",
|
|
||||||
"multi_modal_data": {"dummy": []},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"prompt_token_ids": [],
|
|
||||||
"multi_modal_data": {"dummy": []},
|
|
||||||
},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_preprocessor_text_no_mm_inputs(model_id, prompt):
|
|
||||||
model_config = ModelConfig(model=model_id)
|
|
||||||
tokenizer = init_tokenizer_from_configs(model_config)
|
|
||||||
input_preprocessor = InputPreprocessor(model_config, tokenizer)
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="does not support multimodal inputs"):
|
|
||||||
input_preprocessor.preprocess(prompt)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model_id",
|
"model_id",
|
||||||
[
|
[
|
||||||
@ -127,6 +99,13 @@ def test_preprocessor_text_no_mm_inputs(model_id, prompt):
|
|||||||
{"prompt_token_ids": []},
|
{"prompt_token_ids": []},
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.skip(
|
||||||
|
reason=(
|
||||||
|
"Applying huggingface processor on text inputs results in "
|
||||||
|
"significant performance regression for multimodal models. "
|
||||||
|
"See https://github.com/vllm-project/vllm/issues/26320"
|
||||||
|
)
|
||||||
|
)
|
||||||
def test_preprocessor_always_mm_code_path(model_id, prompt):
|
def test_preprocessor_always_mm_code_path(model_id, prompt):
|
||||||
model_config = ModelConfig(model=model_id)
|
model_config = ModelConfig(model=model_id)
|
||||||
tokenizer = init_tokenizer_from_configs(model_config)
|
tokenizer = init_tokenizer_from_configs(model_config)
|
||||||
|
|||||||
@ -348,18 +348,15 @@ class InputPreprocessor:
|
|||||||
)
|
)
|
||||||
|
|
||||||
inputs: TokenInputs | MultiModalInputs
|
inputs: TokenInputs | MultiModalInputs
|
||||||
if self.model_config.is_multimodal_model:
|
if multi_modal_data := parsed_content.get("multi_modal_data"):
|
||||||
inputs = self._process_multimodal(
|
inputs = self._process_multimodal(
|
||||||
prompt_token_ids,
|
prompt_token_ids,
|
||||||
parsed_content.get("multi_modal_data") or {},
|
multi_modal_data,
|
||||||
parsed_content.get("mm_processor_kwargs") or {},
|
parsed_content.get("mm_processor_kwargs") or {},
|
||||||
tokenization_kwargs=tokenization_kwargs,
|
tokenization_kwargs=tokenization_kwargs,
|
||||||
mm_uuids=mm_uuids,
|
mm_uuids=mm_uuids,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if parsed_content.get("multi_modal_data"):
|
|
||||||
raise ValueError("This model does not support multimodal inputs")
|
|
||||||
|
|
||||||
inputs = token_inputs(prompt_token_ids)
|
inputs = token_inputs(prompt_token_ids)
|
||||||
|
|
||||||
if cache_salt := parsed_content.get("cache_salt"):
|
if cache_salt := parsed_content.get("cache_salt"):
|
||||||
@ -377,18 +374,15 @@ class InputPreprocessor:
|
|||||||
prompt_text = parsed_content["prompt"]
|
prompt_text = parsed_content["prompt"]
|
||||||
|
|
||||||
inputs: TokenInputs | MultiModalInputs
|
inputs: TokenInputs | MultiModalInputs
|
||||||
if self.model_config.is_multimodal_model:
|
if multi_modal_data := parsed_content.get("multi_modal_data"):
|
||||||
inputs = self._process_multimodal(
|
inputs = self._process_multimodal(
|
||||||
prompt_text,
|
prompt_text,
|
||||||
parsed_content.get("multi_modal_data") or {},
|
multi_modal_data,
|
||||||
parsed_content.get("mm_processor_kwargs") or {},
|
parsed_content.get("mm_processor_kwargs") or {},
|
||||||
tokenization_kwargs=tokenization_kwargs,
|
tokenization_kwargs=tokenization_kwargs,
|
||||||
mm_uuids=mm_uuids,
|
mm_uuids=mm_uuids,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if parsed_content.get("multi_modal_data"):
|
|
||||||
raise ValueError("This model does not support multimodal inputs")
|
|
||||||
|
|
||||||
prompt_token_ids = self._tokenize_prompt(
|
prompt_token_ids = self._tokenize_prompt(
|
||||||
prompt_text,
|
prompt_text,
|
||||||
tokenization_kwargs=tokenization_kwargs,
|
tokenization_kwargs=tokenization_kwargs,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user