mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-13 07:59:08 +08:00
MAX_SPEC_LEN
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
22771e5d83
commit
ba1a58f51b
@ -22,6 +22,8 @@ from vllm.v1.sample.logits_processor import LogitsProcessors
|
|||||||
from vllm.v1.sample.metadata import SamplingMetadata
|
from vllm.v1.sample.metadata import SamplingMetadata
|
||||||
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
|
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
|
||||||
|
|
||||||
|
_MAX_SPEC_LEN = 32
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class RequestData:
|
class RequestData:
|
||||||
@ -323,7 +325,7 @@ class RequestState:
|
|||||||
logits_indices,
|
logits_indices,
|
||||||
target_logits_indices,
|
target_logits_indices,
|
||||||
bonus_logits_indices,
|
bonus_logits_indices,
|
||||||
BLOCK_SIZE=triton.next_power_of_2(32 + 1),
|
BLOCK_SIZE=triton.next_power_of_2(_MAX_SPEC_LEN + 1),
|
||||||
)
|
)
|
||||||
|
|
||||||
draft_token_ids = input_ids[logits_indices]
|
draft_token_ids = input_ids[logits_indices]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user