vllm/vllm/v1/worker/ubatch_utils.py
Lucas Wilkinson cc1dc7ed6d
[Core/DBO][2/N] Dual-Batch Overlap add DeepEP High Throughput support and Prefill support (#24845)
Signed-off-by: Sage Moore <sage@neuralmagic.com>
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Sage Moore <sage@neuralmagic.com>
Co-authored-by: yewentao256 <zhyanwentao@126.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
2025-09-23 16:02:10 +00:00

28 lines
790 B
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from dataclasses import dataclass
from typing_extensions import TypeAlias
@dataclass
class UBatchSlice:
request_slice: slice
token_slice: slice
def is_empty(self) -> bool:
return self.request_slice.start == self.request_slice.stop \
or self.token_slice.start == self.token_slice.stop
@property
def num_tokens(self) -> int:
return self.token_slice.stop - self.token_slice.start
UBatchSlices: TypeAlias = list[UBatchSlice]
def is_second_ubatch_empty(orig_num_tokens_per_ubatch: int,
padded_num_tokens_per_ubatch: int) -> bool:
return padded_num_tokens_per_ubatch >= 2 * orig_num_tokens_per_ubatch