mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-12 16:47:04 +08:00
40 lines
965 B
Python
40 lines
965 B
Python
from dataclasses import dataclass
|
|
from typing import Dict, List, Optional
|
|
|
|
import torch
|
|
|
|
|
|
@dataclass
|
|
class SamplerOutput:
|
|
|
|
# [num_reqs]
|
|
sampled_token_ids: List[int]
|
|
|
|
# [num_reqs, max_num_logprobs + 1]
|
|
logprob_token_ids: Optional[torch.Tensor]
|
|
# [num_reqs, max_num_logprobs + 1]
|
|
logprobs: Optional[torch.Tensor]
|
|
|
|
# TODO: Support prompt logprobs.
|
|
prompt_logprob_token_ids: Optional[torch.Tensor]
|
|
prompt_logprobs: Optional[torch.Tensor]
|
|
|
|
|
|
# ModelRunnerOutput is serialized and sent to the scheduler process.
|
|
# This is expensive for torch.Tensor so prefer to use List instead.
|
|
@dataclass
|
|
class ModelRunnerOutput:
|
|
|
|
# [num_reqs]
|
|
req_ids: List[str]
|
|
# req_id -> index
|
|
req_id_to_index: Dict[str, int]
|
|
|
|
# [num_reqs]
|
|
sampled_token_ids: List[int]
|
|
|
|
# [num_reqs, max_num_logprobs + 1]
|
|
logprob_token_ids_cpu: Optional[torch.Tensor]
|
|
# [num_reqs, max_num_logprobs + 1]
|
|
logprobs_cpu: Optional[torch.Tensor]
|