revert lora_kwargs change

Signed-off-by: bk-201 <joy25810@foxmail.com>
This commit is contained in:
bk-201 2025-12-21 04:14:11 +00:00
parent 20402090b8
commit 81b5ace128
3 changed files with 0 additions and 37 deletions

View File

@ -246,7 +246,6 @@ class InputPreprocessor:
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> MultiModalInputs:
"""
Apply the model's multi-modal processor to a multi-modal prompt,
@ -263,7 +262,6 @@ class InputPreprocessor:
hf_processor_mm_kwargs=mm_processor_kwargs,
tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
mm_hashes = mm_input["mm_hashes"]
@ -361,7 +359,6 @@ class InputPreprocessor:
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> TokenInputs | MultiModalInputs:
prompt_text = parsed_content["prompt"]
@ -373,7 +370,6 @@ class InputPreprocessor:
parsed_content.get("mm_processor_kwargs") or {},
tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
else:
prompt_token_ids = self._tokenize_prompt(
@ -393,7 +389,6 @@ class InputPreprocessor:
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> SingletonInputs:
"""
Extract the singleton inputs from a prompt.
@ -420,7 +415,6 @@ class InputPreprocessor:
parsed["content"],
tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
if parsed["type"] == "str":
return self._process_text(
@ -632,7 +626,6 @@ class InputPreprocessor:
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> DecoderOnlyInputs:
"""
For decoder-only models:
@ -652,7 +645,6 @@ class InputPreprocessor:
prompt,
tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
return self._build_decoder_only_llm_inputs(prompt_comps)
@ -663,7 +655,6 @@ class InputPreprocessor:
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> ProcessorInputs:
if self.model_config.is_encoder_decoder:
# Encoder-decoder model requires special mapping of
@ -685,7 +676,6 @@ class InputPreprocessor:
cast(SingletonPrompt, prompt),
tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
def preprocess(
@ -694,14 +684,12 @@ class InputPreprocessor:
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> ProcessorInputs:
"""Preprocess the input prompt."""
res = self._preprocess(
prompt,
tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
if self.mm_processor_cache and self.mm_cache_stats is not None:

View File

@ -1672,7 +1672,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
tokenization_kwargs: Mapping[str, object],
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> MultiModalHashes:
"""Create MM hashes to be returned.
@ -1684,7 +1683,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
hashes: MultiModalHashes = {}
mm_uuids = mm_uuids or {}
lora_kwargs = lora_kwargs or {}
for modality, items in mm_items.items():
if modality in mm_uuids:
@ -1705,7 +1703,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
item_uuid is None
or hf_processor_mm_kwargs
or tokenization_kwargs
or lora_kwargs
):
# NOTE: use provided hash string to hash with kwargs
# if available for better performance.
@ -1716,7 +1713,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
**{modality: item},
**hf_processor_mm_kwargs,
**tokenization_kwargs,
**lora_kwargs,
)
)
else:
@ -1729,7 +1725,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
**{modality: item},
**hf_processor_mm_kwargs,
**tokenization_kwargs,
**lora_kwargs,
)
for item in items
]
@ -1888,7 +1883,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
tokenization_kwargs: Mapping[str, object],
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> tuple[list[int], MultiModalProcessingInfo, bool]:
"""
Apply the HF processor on the full prompt text,
@ -1911,7 +1905,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
hf_processor_mm_kwargs,
tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
mm_is_cached, mm_missing_data_items = self._get_cache_missing_items(
@ -2122,7 +2115,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
tokenization_kwargs: Mapping[str, object] | None = None,
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> MultiModalInputs:
"""
Process multi-modal inputs to be used in vLLM.
@ -2152,7 +2144,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
hf_processor_mm_kwargs,
tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
# NOTE: tokenization_kwargs are not required to init processor
@ -2233,7 +2224,6 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
tokenization_kwargs: Mapping[str, object] | None = None,
*,
mm_uuids: MultiModalUUIDDict | None = None,
lora_kwargs: dict[str, Any] | None = None,
) -> MultiModalEncDecInputs:
"""
Process multi-modal inputs to be used in vLLM.
@ -2249,7 +2239,6 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
hf_processor_mm_kwargs,
tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
return self._get_enc_dec_inputs(

View File

@ -5,8 +5,6 @@ import time
from collections.abc import Mapping
from typing import Any, Literal, cast
import msgspec
from vllm.config import VllmConfig
from vllm.inputs import ProcessorInputs, PromptType, SingletonInputs
from vllm.inputs.parse import split_enc_dec_inputs
@ -460,17 +458,6 @@ class InputProcessor:
else:
mm_uuids = None
# When enable_tower_connector_lora is True, multi-modal embeddings
# vary depending on the LoRA request. Therefore, the mm_hash must be
# generated based on the LoRA request to prevent incorrect cache hits.
lora_config = self.lora_config
lora_kwargs = (
msgspec.structs.asdict(lora_request)
if lora_request and lora_config and lora_config.enable_tower_connector_lora
else {}
)
lora_kwargs = {k: v for k, v in lora_kwargs.items() if v is not None}
# Process inputs, which includes:
# 1. Tokenize text prompt, with LoRA request if one exists.
# 2. For multimodal models with a merged preprocessor, preprocess
@ -479,7 +466,6 @@ class InputProcessor:
prompt,
tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids,
lora_kwargs=lora_kwargs,
)
from vllm.platforms import current_platform