mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-02 00:17:54 +08:00
Add input metadata
This commit is contained in:
parent
608f74ffe5
commit
7b6844e590
@ -1,7 +1,8 @@
|
|||||||
from cacheflow.worker.models.model_utils import get_model
|
from cacheflow.models.input_metadata import InputMetadata
|
||||||
|
from cacheflow.models.model_utils import get_model
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'get_model',
|
'get_model',
|
||||||
|
'InputMetadata',
|
||||||
]
|
]
|
||||||
|
|||||||
25
cacheflow/models/input_metadata.py
Normal file
25
cacheflow/models/input_metadata.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
from typing import List
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
class InputMetadata:
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
prompt_lens: List[int],
|
||||||
|
slot_mapping: torch.Tensor,
|
||||||
|
context_lens: torch.Tensor,
|
||||||
|
max_context_len: int,
|
||||||
|
block_tables: torch.Tensor,
|
||||||
|
) -> None:
|
||||||
|
self.prompt_lens = prompt_lens
|
||||||
|
self.prompt_block_table = slot_mapping
|
||||||
|
self.context_lens = context_lens
|
||||||
|
self.max_context_len = max_context_len
|
||||||
|
self.block_tables = block_tables
|
||||||
|
|
||||||
|
self.num_prompts = len(prompt_lens)
|
||||||
|
self.num_generation_tokens = context_lens.shape[0]
|
||||||
|
self.max_num_blocks_per_seq = block_tables.shape[1]
|
||||||
|
assert self.num_generation_tokens == block_tables.shape[0]
|
||||||
Loading…
x
Reference in New Issue
Block a user