vllm/vllm/v1/worker/ubatch_utils.py
Sage Moore 567939953b
[Core/DBO][1/N] Add Dual-Batch Overlap mechanism to VLLM (#23693)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: Sage Moore <sage@neuralmagic.com>
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
Co-authored-by: Lucas Wilkinson <lwilkins@redhat.com>
Co-authored-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
Co-authored-by: yewentao256 <zhyanwentao@126.com>
Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
2025-09-16 12:21:48 -04:00

20 lines
515 B
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from dataclasses import dataclass
from typing_extensions import TypeAlias
@dataclass
class UBatchSlice:
request_slice: slice
token_slice: slice
UBatchSlices: TypeAlias = list[UBatchSlice]
def is_second_ubatch_empty(orig_num_tokens_per_ubatch: int,
padded_num_tokens_per_ubatch: int) -> bool:
return padded_num_tokens_per_ubatch >= 2 * orig_num_tokens_per_ubatch