mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 21:34:55 +08:00
Add input metadata
This commit is contained in:
parent
608f74ffe5
commit
7b6844e590
@ -1,7 +1,8 @@
|
||||
from cacheflow.worker.models.model_utils import get_model
|
||||
from cacheflow.models.input_metadata import InputMetadata
|
||||
from cacheflow.models.model_utils import get_model
|
||||
|
||||
|
||||
__all__ = [
|
||||
'get_model',
|
||||
|
||||
'InputMetadata',
|
||||
]
|
||||
|
||||
25
cacheflow/models/input_metadata.py
Normal file
25
cacheflow/models/input_metadata.py
Normal file
@ -0,0 +1,25 @@
|
||||
from typing import List
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
class InputMetadata:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
prompt_lens: List[int],
|
||||
slot_mapping: torch.Tensor,
|
||||
context_lens: torch.Tensor,
|
||||
max_context_len: int,
|
||||
block_tables: torch.Tensor,
|
||||
) -> None:
|
||||
self.prompt_lens = prompt_lens
|
||||
self.prompt_block_table = slot_mapping
|
||||
self.context_lens = context_lens
|
||||
self.max_context_len = max_context_len
|
||||
self.block_tables = block_tables
|
||||
|
||||
self.num_prompts = len(prompt_lens)
|
||||
self.num_generation_tokens = context_lens.shape[0]
|
||||
self.max_num_blocks_per_seq = block_tables.shape[1]
|
||||
assert self.num_generation_tokens == block_tables.shape[0]
|
||||
Loading…
x
Reference in New Issue
Block a user