mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-01 19:07:05 +08:00
[Misc] Fix import (#20233)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
5a52f389dd
commit
19108ef311
@ -45,7 +45,7 @@ from vllm.sequence import IntermediateTensors
|
|||||||
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
|
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
|
||||||
GiB_bytes, LazyLoader, async_tensor_h2d, cdiv,
|
GiB_bytes, LazyLoader, async_tensor_h2d, cdiv,
|
||||||
check_use_alibi, get_dtype_size,
|
check_use_alibi, get_dtype_size,
|
||||||
is_pin_memory_available)
|
is_pin_memory_available, round_up)
|
||||||
from vllm.v1.attention.backends.mamba_attn import Mamba2AttentionBackend
|
from vllm.v1.attention.backends.mamba_attn import Mamba2AttentionBackend
|
||||||
from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
|
from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
|
||||||
CommonAttentionMetadata)
|
CommonAttentionMetadata)
|
||||||
@ -1308,7 +1308,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
tp_size = self.vllm_config.parallel_config.tensor_parallel_size
|
tp_size = self.vllm_config.parallel_config.tensor_parallel_size
|
||||||
if self.compilation_config.pass_config. \
|
if self.compilation_config.pass_config. \
|
||||||
enable_sequence_parallelism and tp_size > 1:
|
enable_sequence_parallelism and tp_size > 1:
|
||||||
from vllm.utils import round_up
|
|
||||||
num_input_tokens = round_up(num_scheduled_tokens, tp_size)
|
num_input_tokens = round_up(num_scheduled_tokens, tp_size)
|
||||||
else:
|
else:
|
||||||
num_input_tokens = num_scheduled_tokens
|
num_input_tokens = num_scheduled_tokens
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user