vllm/vllm/inputs/data.py
Russell Bryant e489ad7a21
[Misc] Add SPDX-License-Identifier headers to python source files (#12628)
- **Add SPDX license headers to python source files**
- **Check for SPDX headers using pre-commit**

commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745
Author: Russell Bryant <rbryant@redhat.com>
Date:   Fri Jan 31 14:18:24 2025 -0500

    Add SPDX license headers to python source files
    
This commit adds SPDX license headers to python source files as
recommended to
the project by the Linux Foundation. These headers provide a concise way
that is
both human and machine readable for communicating license information
for each
source file. It helps avoid any ambiguity about the license of the code
and can
    also be easily used by tools to help manage license compliance.
    
The Linux Foundation runs license scans against the codebase to help
ensure
    we are in compliance with the licenses of the code we use, including
dependencies. Having these headers in place helps that tool do its job.
    
    More information can be found on the SPDX site:
    
    - https://spdx.dev/learn/handling-license-info/
    
    Signed-off-by: Russell Bryant <rbryant@redhat.com>

commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea
Author: Russell Bryant <rbryant@redhat.com>
Date:   Fri Jan 31 14:36:32 2025 -0500

    Check for SPDX headers using pre-commit
    
    Signed-off-by: Russell Bryant <rbryant@redhat.com>

---------

Signed-off-by: Russell Bryant <rbryant@redhat.com>
2025-02-02 11:58:18 -08:00

406 lines
13 KiB
Python

# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
from functools import cached_property
from typing import (TYPE_CHECKING, Any, Dict, Generic, Iterable, List, Literal,
Optional, Tuple, Union, cast)
import torch
from typing_extensions import NotRequired, TypedDict, TypeVar, assert_never
if TYPE_CHECKING:
from vllm.multimodal import (MultiModalDataDict, MultiModalKwargs,
MultiModalPlaceholderDict)
from vllm.multimodal.inputs import MultiModalInputs
class TextPrompt(TypedDict):
"""Schema for a text prompt."""
prompt: str
"""The input text to be tokenized before passing to the model."""
multi_modal_data: NotRequired["MultiModalDataDict"]
"""
Optional multi-modal data to pass to the model,
if the model supports it.
"""
mm_processor_kwargs: NotRequired[Dict[str, Any]]
"""
Optional multi-modal processor kwargs to be forwarded to the
multimodal input mapper & processor. Note that if multiple modalities
have registered mappers etc for the model being considered, we attempt
to pass the mm_processor_kwargs to each of them.
"""
class TokensPrompt(TypedDict):
"""Schema for a tokenized prompt."""
prompt_token_ids: List[int]
"""A list of token IDs to pass to the model."""
token_type_ids: NotRequired[List[int]]
"""A list of token type IDs to pass to the cross encoder model."""
multi_modal_data: NotRequired["MultiModalDataDict"]
"""
Optional multi-modal data to pass to the model,
if the model supports it.
"""
mm_processor_kwargs: NotRequired[Dict[str, Any]]
"""
Optional multi-modal processor kwargs to be forwarded to the
multimodal input mapper & processor. Note that if multiple modalities
have registered mappers etc for the model being considered, we attempt
to pass the mm_processor_kwargs to each of them.
"""
SingletonPrompt = Union[str, TextPrompt, TokensPrompt]
"""
Set of possible schemas for a single prompt:
- A text prompt (:class:`str` or :class:`TextPrompt`)
- A tokenized prompt (:class:`TokensPrompt`)
Note that "singleton" is as opposed to a data structure
which encapsulates multiple prompts, i.e. of the sort
which may be utilized for encoder/decoder models when
the user desires to express both the encoder & decoder
prompts explicitly, i.e. :class:`ExplicitEncoderDecoderPrompt`
A prompt of type :class:`SingletonPrompt` may be employed
as (1) input to a decoder-only model, (2) input to
the encoder of an encoder/decoder model, in the scenario
where the decoder-prompt is not specified explicitly, or
(3) as a member of a larger data structure encapsulating
more than one prompt, i.e. :class:`ExplicitEncoderDecoderPrompt`
"""
_T1_co = TypeVar("_T1_co",
bound=SingletonPrompt,
default=SingletonPrompt,
covariant=True)
_T2_co = TypeVar("_T2_co",
bound=SingletonPrompt,
default=SingletonPrompt,
covariant=True)
# TODO: Make fields ReadOnly once mypy supports it
class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
"""
Represents an encoder/decoder model input prompt,
comprising an explicit encoder prompt and a decoder prompt.
The encoder and decoder prompts, respectively, may be formatted
according to any of the :class:`SingletonPrompt` schemas,
and are not required to have the same schema.
Only the encoder prompt may have multi-modal data. mm_processor_kwargs
should be at the top-level, and should not be set in the encoder/decoder
prompts, since they are agnostic to the encoder/decoder.
Note that an :class:`ExplicitEncoderDecoderPrompt` may not
be used as an input to a decoder-only model,
and that the :code:`encoder_prompt` and :code:`decoder_prompt`
fields of this data structure themselves must be
:class:`SingletonPrompt` instances.
"""
encoder_prompt: _T1_co
decoder_prompt: Optional[_T2_co]
mm_processor_kwargs: NotRequired[Dict[str, Any]]
PromptType = Union[SingletonPrompt, ExplicitEncoderDecoderPrompt]
"""
Set of possible schemas for an LLM input, including
both decoder-only and encoder/decoder input types:
- A text prompt (:class:`str` or :class:`TextPrompt`)
- A tokenized prompt (:class:`TokensPrompt`)
- A single data structure containing both an encoder and a decoder prompt
(:class:`ExplicitEncoderDecoderPrompt`)
"""
class TokenInputs(TypedDict):
"""Represents token-based inputs."""
type: Literal["token"]
"""The type of inputs."""
prompt_token_ids: List[int]
"""The token IDs of the prompt."""
token_type_ids: NotRequired[List[int]]
"""The token type IDs of the prompt."""
prompt: NotRequired[str]
"""
The original prompt text corresponding to the token IDs, if available.
"""
multi_modal_data: NotRequired["MultiModalDataDict"]
"""
Optional multi-modal data to pass to the model,
if the model supports it.
"""
multi_modal_inputs: NotRequired["MultiModalKwargs"]
"""
Optional multi-modal inputs to pass to the model,
if the model supports it.
"""
multi_modal_placeholders: NotRequired["MultiModalPlaceholderDict"]
"""
Placeholder ranges for the multi-modal data.
"""
multi_modal_hashes: NotRequired[List[str]]
"""
The hashes of the multi-modal data.
"""
mm_processor_kwargs: NotRequired[Dict[str, Any]]
"""
Optional multi-modal processor kwargs to be forwarded to the
multimodal input mapper & processor. Note that if multiple modalities
have registered mappers etc for the model being considered, we attempt
to pass the mm_processor_kwargs to each of them.
"""
def token_inputs(
prompt_token_ids: List[int],
token_type_ids: Optional[List[int]] = None,
prompt: Optional[str] = None,
multi_modal_data: Optional["MultiModalDataDict"] = None,
multi_modal_inputs: Optional["MultiModalKwargs"] = None,
multi_modal_hashes: Optional[List[str]] = None,
multi_modal_placeholders: Optional["MultiModalPlaceholderDict"] = None,
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
) -> TokenInputs:
"""Construct :class:`TokenInputs` from optional values."""
inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids)
if prompt is not None:
inputs["prompt"] = prompt
if token_type_ids is not None:
inputs["token_type_ids"] = token_type_ids
if multi_modal_data is not None:
inputs["multi_modal_data"] = multi_modal_data
if multi_modal_inputs is not None:
inputs["multi_modal_inputs"] = multi_modal_inputs
if multi_modal_hashes is not None:
inputs["multi_modal_hashes"] = multi_modal_hashes
if multi_modal_placeholders is not None:
inputs["multi_modal_placeholders"] = multi_modal_placeholders
if mm_processor_kwargs is not None:
inputs["mm_processor_kwargs"] = mm_processor_kwargs
return inputs
DecoderOnlyInputs = Union[TokenInputs, "MultiModalInputs"]
"""
The inputs in :class:`~vllm.LLMEngine` before they are
passed to the model executor.
This specifies the data required for decoder-only models.
"""
class EncoderDecoderInputs(TypedDict):
"""
The inputs in :class:`~vllm.LLMEngine` before they are
passed to the model executor.
This specifies the required data for encoder-decoder models.
"""
encoder: Union[TokenInputs, "MultiModalInputs"]
"""The inputs for the encoder portion."""
decoder: Union[TokenInputs, "MultiModalInputs"]
"""The inputs for the decoder portion."""
SingletonInputs = Union[TokenInputs, "MultiModalInputs"]
"""
A processed :class:`SingletonPrompt` which can be passed to
:class:`vllm.sequence.Sequence`.
"""
@dataclass
class SingletonInputsAdapter:
"""
Unified interface to access the components of :class:`SingletonInputs`.
"""
inputs: SingletonInputs
@cached_property
def prompt(self) -> Optional[str]:
inputs = self.inputs
if inputs["type"] == "token" or inputs["type"] == "multimodal":
return inputs.get("prompt")
assert_never(inputs) # type: ignore[arg-type]
@cached_property
def prompt_token_ids(self) -> List[int]:
inputs = self.inputs
if inputs["type"] == "token" or inputs["type"] == "multimodal":
return inputs.get("prompt_token_ids", [])
assert_never(inputs) # type: ignore[arg-type]
@cached_property
def token_type_ids(self) -> List[int]:
inputs = self.inputs
if inputs["type"] == "token" or inputs["type"] == "multimodal":
return inputs.get("token_type_ids", [])
assert_never(inputs) # type: ignore[arg-type]
@cached_property
def prompt_embeds(self) -> Optional[torch.Tensor]:
inputs = self.inputs
if inputs["type"] == "token" or inputs["type"] == "multimodal":
return None
assert_never(inputs) # type: ignore[arg-type]
@cached_property
def multi_modal_data(self) -> "MultiModalDataDict":
inputs = self.inputs
if inputs["type"] == "token":
return inputs.get("multi_modal_data", {})
if inputs["type"] == "multimodal":
return inputs.get("mm_kwargs", {})
assert_never(inputs) # type: ignore[arg-type]
@cached_property
def multi_modal_inputs(self) -> Union[Dict, "MultiModalKwargs"]:
inputs = self.inputs
if inputs["type"] == "token":
return inputs.get("multi_modal_inputs", {})
if inputs["type"] == "multimodal":
return inputs.get("mm_kwargs", {})
assert_never(inputs) # type: ignore[arg-type]
@cached_property
def multi_modal_hashes(self) -> List[str]:
inputs = self.inputs
if inputs["type"] == "token":
return inputs.get("multi_modal_hashes", [])
if inputs["type"] == "multimodal":
# only the case when we use MultiModalInputs
return inputs.get("mm_hashes", []) # type: ignore[return-value]
assert_never(inputs) # type: ignore[arg-type]
@cached_property
def multi_modal_placeholders(self) -> "MultiModalPlaceholderDict":
inputs = self.inputs
if inputs["type"] == "token":
return inputs.get("multi_modal_placeholders", {})
if inputs["type"] == "multimodal":
return inputs.get("mm_placeholders", {})
assert_never(inputs) # type: ignore[arg-type]
@cached_property
def mm_processor_kwargs(self) -> Dict[str, Any]:
inputs = self.inputs
if inputs["type"] == "token":
return inputs.get("mm_processor_kwargs", {})
if inputs["type"] == "multimodal":
return {}
assert_never(inputs) # type: ignore[arg-type]
ProcessorInputs = Union[DecoderOnlyInputs, EncoderDecoderInputs]
"""
The inputs to :data:`vllm.inputs.InputProcessor`.
"""
_T1 = TypeVar("_T1", bound=SingletonPrompt, default=SingletonPrompt)
_T2 = TypeVar("_T2", bound=SingletonPrompt, default=SingletonPrompt)
def build_explicit_enc_dec_prompt(
encoder_prompt: _T1,
decoder_prompt: Optional[_T2],
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
) -> ExplicitEncoderDecoderPrompt[_T1, _T2]:
if mm_processor_kwargs is None:
mm_processor_kwargs = {}
return ExplicitEncoderDecoderPrompt(
encoder_prompt=encoder_prompt,
decoder_prompt=decoder_prompt,
mm_processor_kwargs=mm_processor_kwargs)
def zip_enc_dec_prompts(
enc_prompts: Iterable[_T1],
dec_prompts: Iterable[Optional[_T2]],
mm_processor_kwargs: Optional[Union[Iterable[Dict[str, Any]],
Dict[str, Any]]] = None,
) -> List[ExplicitEncoderDecoderPrompt[_T1, _T2]]:
"""
Zip encoder and decoder prompts together into a list of
:class:`ExplicitEncoderDecoderPrompt` instances.
``mm_processor_kwargs`` may also be provided; if a dict is passed, the same
dictionary will be used for every encoder/decoder prompt. If an iterable is
provided, it will be zipped with the encoder/decoder prompts.
"""
if mm_processor_kwargs is None:
mm_processor_kwargs = cast(Dict[str, Any], {})
if isinstance(mm_processor_kwargs, dict):
return [
build_explicit_enc_dec_prompt(
encoder_prompt, decoder_prompt,
cast(Dict[str, Any], mm_processor_kwargs))
for (encoder_prompt,
decoder_prompt) in zip(enc_prompts, dec_prompts)
]
return [
build_explicit_enc_dec_prompt(encoder_prompt, decoder_prompt,
mm_proc_kwargs)
for (encoder_prompt, decoder_prompt, mm_proc_kwargs
) in zip(enc_prompts, dec_prompts, mm_processor_kwargs)
]
def to_enc_dec_tuple_list(
enc_dec_prompts: Iterable[ExplicitEncoderDecoderPrompt[_T1, _T2]],
) -> List[Tuple[_T1, Optional[_T2]]]:
return [(enc_dec_prompt["encoder_prompt"],
enc_dec_prompt["decoder_prompt"])
for enc_dec_prompt in enc_dec_prompts]