mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 01:25:25 +08:00
[bugfix] Missing cached item in beam search (#27874)
Signed-off-by: fake0fan <645327136@qq.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
parent
70bfbd7b16
commit
103a468bbf
@ -345,22 +345,7 @@ class OpenAIServing:
|
|||||||
|
|
||||||
if is_explicit_encoder_decoder_prompt(prompt):
|
if is_explicit_encoder_decoder_prompt(prompt):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
|
||||||
processed_inputs = processor.input_preprocessor._prompt_to_llm_inputs(
|
|
||||||
prompt
|
|
||||||
)
|
|
||||||
|
|
||||||
if processed_inputs["type"] == "embeds":
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
# This is a workaround to fix multimodal beam search; this is a
|
|
||||||
# bandaid fix for 2 small problems:
|
|
||||||
# 1. Multi_modal_data on the processed_inputs currently resolves to
|
|
||||||
# `None`.
|
|
||||||
# 2. preprocessing above expands the multimodal placeholders. However,
|
|
||||||
# this happens again in generation, so the double expansion causes
|
|
||||||
# a mismatch.
|
|
||||||
# TODO - would be ideal to handle this more gracefully.
|
|
||||||
prompt_text: str | None
|
prompt_text: str | None
|
||||||
prompt_token_ids: list[int]
|
prompt_token_ids: list[int]
|
||||||
multi_modal_data: MultiModalDataDict | None
|
multi_modal_data: MultiModalDataDict | None
|
||||||
@ -373,9 +358,16 @@ class OpenAIServing:
|
|||||||
prompt_token_ids = prompt.get("prompt_token_ids", []) # type: ignore
|
prompt_token_ids = prompt.get("prompt_token_ids", []) # type: ignore
|
||||||
multi_modal_data = prompt.get("multi_modal_data") # type: ignore
|
multi_modal_data = prompt.get("multi_modal_data") # type: ignore
|
||||||
|
|
||||||
mm_processor_kwargs: dict[str, Any] | None = processed_inputs.get(
|
mm_processor_kwargs: dict[str, Any] | None = None
|
||||||
"mm_processor_kwargs"
|
|
||||||
) # type: ignore
|
# This is a workaround to fix multimodal beam search; this is a
|
||||||
|
# bandaid fix for 2 small problems:
|
||||||
|
# 1. Multi_modal_data on the processed_inputs currently resolves to
|
||||||
|
# `None`.
|
||||||
|
# 2. preprocessing above expands the multimodal placeholders. However,
|
||||||
|
# this happens again in generation, so the double expansion causes
|
||||||
|
# a mismatch.
|
||||||
|
# TODO - would be ideal to handle this more gracefully.
|
||||||
|
|
||||||
tokenized_length = len(prompt_token_ids)
|
tokenized_length = len(prompt_token_ids)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user