mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-06 01:37:05 +08:00
revert lora_kwargs change
Signed-off-by: bk-201 <joy25810@foxmail.com>
This commit is contained in:
parent
20402090b8
commit
81b5ace128
@ -246,7 +246,6 @@ class InputPreprocessor:
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> MultiModalInputs:
|
||||
"""
|
||||
Apply the model's multi-modal processor to a multi-modal prompt,
|
||||
@ -263,7 +262,6 @@ class InputPreprocessor:
|
||||
hf_processor_mm_kwargs=mm_processor_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
mm_hashes = mm_input["mm_hashes"]
|
||||
|
||||
@ -361,7 +359,6 @@ class InputPreprocessor:
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> TokenInputs | MultiModalInputs:
|
||||
prompt_text = parsed_content["prompt"]
|
||||
|
||||
@ -373,7 +370,6 @@ class InputPreprocessor:
|
||||
parsed_content.get("mm_processor_kwargs") or {},
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
else:
|
||||
prompt_token_ids = self._tokenize_prompt(
|
||||
@ -393,7 +389,6 @@ class InputPreprocessor:
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> SingletonInputs:
|
||||
"""
|
||||
Extract the singleton inputs from a prompt.
|
||||
@ -420,7 +415,6 @@ class InputPreprocessor:
|
||||
parsed["content"],
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
if parsed["type"] == "str":
|
||||
return self._process_text(
|
||||
@ -632,7 +626,6 @@ class InputPreprocessor:
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> DecoderOnlyInputs:
|
||||
"""
|
||||
For decoder-only models:
|
||||
@ -652,7 +645,6 @@ class InputPreprocessor:
|
||||
prompt,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
|
||||
return self._build_decoder_only_llm_inputs(prompt_comps)
|
||||
@ -663,7 +655,6 @@ class InputPreprocessor:
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> ProcessorInputs:
|
||||
if self.model_config.is_encoder_decoder:
|
||||
# Encoder-decoder model requires special mapping of
|
||||
@ -685,7 +676,6 @@ class InputPreprocessor:
|
||||
cast(SingletonPrompt, prompt),
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
|
||||
def preprocess(
|
||||
@ -694,14 +684,12 @@ class InputPreprocessor:
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> ProcessorInputs:
|
||||
"""Preprocess the input prompt."""
|
||||
res = self._preprocess(
|
||||
prompt,
|
||||
tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
|
||||
if self.mm_processor_cache and self.mm_cache_stats is not None:
|
||||
|
||||
@ -1672,7 +1672,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
tokenization_kwargs: Mapping[str, object],
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> MultiModalHashes:
|
||||
"""Create MM hashes to be returned.
|
||||
|
||||
@ -1684,7 +1683,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
|
||||
hashes: MultiModalHashes = {}
|
||||
mm_uuids = mm_uuids or {}
|
||||
lora_kwargs = lora_kwargs or {}
|
||||
|
||||
for modality, items in mm_items.items():
|
||||
if modality in mm_uuids:
|
||||
@ -1705,7 +1703,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
item_uuid is None
|
||||
or hf_processor_mm_kwargs
|
||||
or tokenization_kwargs
|
||||
or lora_kwargs
|
||||
):
|
||||
# NOTE: use provided hash string to hash with kwargs
|
||||
# if available for better performance.
|
||||
@ -1716,7 +1713,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
**{modality: item},
|
||||
**hf_processor_mm_kwargs,
|
||||
**tokenization_kwargs,
|
||||
**lora_kwargs,
|
||||
)
|
||||
)
|
||||
else:
|
||||
@ -1729,7 +1725,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
**{modality: item},
|
||||
**hf_processor_mm_kwargs,
|
||||
**tokenization_kwargs,
|
||||
**lora_kwargs,
|
||||
)
|
||||
for item in items
|
||||
]
|
||||
@ -1888,7 +1883,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
tokenization_kwargs: Mapping[str, object],
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> tuple[list[int], MultiModalProcessingInfo, bool]:
|
||||
"""
|
||||
Apply the HF processor on the full prompt text,
|
||||
@ -1911,7 +1905,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
hf_processor_mm_kwargs,
|
||||
tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
|
||||
mm_is_cached, mm_missing_data_items = self._get_cache_missing_items(
|
||||
@ -2122,7 +2115,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
tokenization_kwargs: Mapping[str, object] | None = None,
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> MultiModalInputs:
|
||||
"""
|
||||
Process multi-modal inputs to be used in vLLM.
|
||||
@ -2152,7 +2144,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
hf_processor_mm_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
|
||||
# NOTE: tokenization_kwargs are not required to init processor
|
||||
@ -2233,7 +2224,6 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
|
||||
tokenization_kwargs: Mapping[str, object] | None = None,
|
||||
*,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
lora_kwargs: dict[str, Any] | None = None,
|
||||
) -> MultiModalEncDecInputs:
|
||||
"""
|
||||
Process multi-modal inputs to be used in vLLM.
|
||||
@ -2249,7 +2239,6 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
|
||||
hf_processor_mm_kwargs,
|
||||
tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
|
||||
return self._get_enc_dec_inputs(
|
||||
|
||||
@ -5,8 +5,6 @@ import time
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
import msgspec
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.inputs import ProcessorInputs, PromptType, SingletonInputs
|
||||
from vllm.inputs.parse import split_enc_dec_inputs
|
||||
@ -460,17 +458,6 @@ class InputProcessor:
|
||||
else:
|
||||
mm_uuids = None
|
||||
|
||||
# When enable_tower_connector_lora is True, multi-modal embeddings
|
||||
# vary depending on the LoRA request. Therefore, the mm_hash must be
|
||||
# generated based on the LoRA request to prevent incorrect cache hits.
|
||||
lora_config = self.lora_config
|
||||
lora_kwargs = (
|
||||
msgspec.structs.asdict(lora_request)
|
||||
if lora_request and lora_config and lora_config.enable_tower_connector_lora
|
||||
else {}
|
||||
)
|
||||
lora_kwargs = {k: v for k, v in lora_kwargs.items() if v is not None}
|
||||
|
||||
# Process inputs, which includes:
|
||||
# 1. Tokenize text prompt, with LoRA request if one exists.
|
||||
# 2. For multimodal models with a merged preprocessor, preprocess
|
||||
@ -479,7 +466,6 @@ class InputProcessor:
|
||||
prompt,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
mm_uuids=mm_uuids,
|
||||
lora_kwargs=lora_kwargs,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user